Worked on preprocessing TODOS; made progress with glossar entries and durations
This commit is contained in:
parent
e9120a2e4b
commit
2c4f48531a
69
README.md
69
README.md
@ -157,7 +157,37 @@ dat_all[735, ]
|
||||
# 1427 2016_11_15-12_12_57.log 2016-12-15 12:12:57 850 Transform stop 076 076.xml NA 2092.25 2008.00 0.2997107 13.26223362
|
||||
```
|
||||
|
||||
## Events that only close (`date.start` is NA)
|
||||
`time_ms` does not increase from log file to log file
|
||||
|
||||
```{r}
|
||||
tmp1 <- dat[!duplicated(dat$fileid), c("fileid", "time_ms", "event")]
|
||||
tmp2 <- dat[!duplicated(dat$fileid, fromLast=T), c("fileid", "time_ms", "event")]
|
||||
tmp <- rbind(tmp1, tmp2)
|
||||
tmp <- tmp[order(tmp$fileid), ]
|
||||
head(tmp, 50)
|
||||
|
||||
plot(time_ms ~ as.factor(fileid), dat[1:2000, ], xlab = "fileid")
|
||||
```
|
||||
|
||||
## x,y-coordinates outside of display range
|
||||
|
||||
The display is a 4K-display with 3840 x 2160 pixels. When you plot the
|
||||
start and stop coordinates, the display is clearly to distinguish. However,
|
||||
a lot of points are outside of the display range. This can happen, when the
|
||||
art objects are scaled and then moved to the very edge of the table. Then
|
||||
it will record pixels outside of the table. These are actually valid data
|
||||
points and I will leave them as is.
|
||||
|
||||
```{r}
|
||||
par(mfrow = c(1, 2))
|
||||
plot(y.start ~ x.start, dat)
|
||||
abline(v = c(0, 3840), h = c(0, 2160), col = "blue", lwd = 2)
|
||||
plot(y.stop ~ x.stop, dat)
|
||||
abline(v = c(0, 3840), h = c(0, 2160), col = "blue", lwd = 2)
|
||||
|
||||
|
||||
aggregate(cbind(x.start, x.stop, y.start, y.stop) ~ 1, dat, mean)
|
||||
```
|
||||
|
||||
|
||||
## Timestamps repeat
|
||||
@ -173,7 +203,44 @@ dat_all[735, ]
|
||||
|
||||
## Add moves to `trace` variable
|
||||
|
||||
## openPopup does not close correctly
|
||||
|
||||
The sorting had to include `popup` otherwise nested events could not be
|
||||
closed correctly.
|
||||
|
||||
```{r}
|
||||
# TODO: Some correct entries are not closed:
|
||||
df[df$trace == 1843, ]
|
||||
# WHY???
|
||||
# --> Wrong eventid!
|
||||
dat5[dat5$trace == 1843, ]
|
||||
openPopup_wide[openPopup_wide$trace == 1843, ]
|
||||
```
|
||||
## Events that only close (`date.start` is NA)
|
||||
|
||||
It looks like there is some kind of log error for the events that do not
|
||||
have a start stop. I was able to get rid of most by sorting for `popup` for
|
||||
the openPopup events, but there are still some left (50 for the small data
|
||||
set, which corresponds to 0.2 per mill).
|
||||
|
||||
```{r}
|
||||
# remove all events that do not have a `date.start`
|
||||
dim(dat_all[is.na(dat_all$date.start), ])
|
||||
dat_all <- dat_all[!is.na(dat_all$date.start), ]
|
||||
# TODO: Find out how it can be that there is only a `date.stop`
|
||||
## --> happens, when event is not properly closed, see here:
|
||||
df[df$trace == 1843, ]
|
||||
dat_openPopup[dat_openPopup$trace == 1843, ]
|
||||
## --> still 50 (small data set) left, and some really do not seem to be
|
||||
## opened! Must be a log error
|
||||
# --> others should be closed!
|
||||
dat[31000:31019,] # this one e.g.
|
||||
# --> Actually NOT! card gets flipped before! Again - log error!
|
||||
```
|
||||
Will probably just get rid of them!
|
||||
|
||||
Think about if you want give warning messages about these deletions in the
|
||||
functions.
|
||||
|
||||
# Reading list
|
||||
|
||||
|
@ -30,6 +30,7 @@
|
||||
dat0 <- read.table("../data/rawdata_logfiles_small.csv", sep = ";",
|
||||
header = TRUE)
|
||||
dat0$date <- as.POSIXct(dat0$date) # create date object
|
||||
dat0$glossar <- ifelse(dat0$artwork == "glossar", 1, 0)
|
||||
|
||||
#' # Remove irrelevant events
|
||||
|
||||
@ -37,6 +38,7 @@ dat0$date <- as.POSIXct(dat0$date) # create date object
|
||||
|
||||
dat <- subset(dat0, !(dat0$event %in% c("Start Application",
|
||||
"Show Application")))
|
||||
rownames(dat) <- NULL
|
||||
|
||||
#' # Close events
|
||||
|
||||
@ -54,7 +56,7 @@ head(dat1[, c("event", "eventid")], 25)
|
||||
|
||||
table(table(dat1$eventid))
|
||||
# 1 2 3 4 5 6 7 8 10 11
|
||||
# 73 78429 5156 842 222 66 18 14 3 1
|
||||
# 70 78435 5153 842 222 66 18 14 3 1
|
||||
# --> compare to table(num_start)!
|
||||
|
||||
# Find out how often "Transform stop" follows each other
|
||||
@ -80,18 +82,27 @@ id_rm_stop <- id_stop[diff(id_stop) == 1]
|
||||
dat1 <- dat1[-(id_rm_stop + 1), ]
|
||||
|
||||
# transform to wide data format
|
||||
dat1$event <- ifelse(dat1$event == "Transform start", "start", "stop")
|
||||
dat1$time <- ifelse(dat1$event == "Transform start", "start", "stop")
|
||||
|
||||
trans_wide <- reshape(dat1, direction = "wide",
|
||||
idvar = c("eventid", "artwork"),
|
||||
timevar = "event", drop = c("fileid", "popup", "card")
|
||||
idvar = c("eventid", "artwork", "glossar"),
|
||||
timevar = "time",
|
||||
drop = c("popup", "card", "event")
|
||||
)
|
||||
# --> when fileid is part of the reshape, it does not work correctly, since
|
||||
# we sometimes have a start - stop event that is recorded in two separate
|
||||
# log files
|
||||
# TODO: This runs for quite some time
|
||||
# --> Is this more efficient with tidyr::pivot_wider?
|
||||
|
||||
# --> when fileid is part of the reshape, it does not work correctly, since
|
||||
# we sometimes have a start - stop event that is recorded in two separate
|
||||
# log files, BUT: after finding out, that `time_ms` changes for each log
|
||||
# file, I want to exclude those cases, so `fileid` has to be included!!!
|
||||
|
||||
# check how often an eventid is associated with two fileids
|
||||
nrow(subset(trans_wide, trans_wide$fileid.start != trans_wide$fileid.stop))
|
||||
|
||||
# exclude from data set ??
|
||||
# trans_wide <- subset(trans_wide, trans_wide$fileid.start != trans_wide$fileid.stop)
|
||||
|
||||
# which(is.na(trans_wide$date.start))
|
||||
|
||||
trans_wide$event <- "move"
|
||||
@ -116,38 +127,28 @@ trans_wide$popup <- NA
|
||||
dat_trans <- trans_wide[trans_wide$distance != 0 &
|
||||
trans_wide$rotationDegree != 0 &
|
||||
trans_wide$scaleSize != 1,
|
||||
c("event", "artwork", "trace", "date.start", "date.stop",
|
||||
c("fileid.start", "fileid.stop", "event", "artwork",
|
||||
"trace", "glossar", "date.start", "date.stop",
|
||||
"time_ms.start", "time_ms.stop", "duration",
|
||||
"card", "popup",
|
||||
"x.start", "y.start", "x.stop", "y.stop",
|
||||
"distance", "scale.start", "scale.stop",
|
||||
"card", "popup", "x.start", "y.start", "x.stop",
|
||||
"y.stop", "distance", "scale.start", "scale.stop",
|
||||
"scaleSize", "rotation.start", "rotation.stop",
|
||||
"rotationDegree")]
|
||||
1 - nrow(dat_trans) / nrow(trans_wide)
|
||||
# removes almost 2/3 of the data (for small data set)
|
||||
|
||||
rm(id_rm_stop, id_stop, trans_wide, num_start, num_stop)
|
||||
|
||||
summary(dat_trans)
|
||||
|
||||
# TODO: Ask Phillip what is wrong with `time_ms`
|
||||
# --> Hat er eine Erklärung dafür?
|
||||
|
||||
#plot(time_ms.stop ~ time_ms.start, dat_trans, type = "b")
|
||||
plot(time_ms.stop ~ time_ms.start, dat_trans,
|
||||
col = rgb(red = 0, green = 0, blue = 0, alpha = 0.2))
|
||||
|
||||
plot(date.stop ~ date.start, dat_trans[1:1000,], type = "b")
|
||||
|
||||
|
||||
|
||||
|
||||
#' # Close other events
|
||||
|
||||
dat2 <- dat[!dat$event %in% c("Transform start", "Transform stop"), ]
|
||||
dat2$x <- NULL
|
||||
dat2$y <- NULL
|
||||
dat2$scale <- NULL
|
||||
dat2$rotation <- NULL
|
||||
# dat2$x <- NULL
|
||||
# dat2$y <- NULL
|
||||
# dat2$scale <- NULL
|
||||
# dat2$rotation <- NULL
|
||||
rownames(dat2) <- NULL
|
||||
|
||||
# Create event ID for closing events
|
||||
@ -202,8 +203,6 @@ lut <- glossar_dict[glossar_dict$glossar_file %in% glossar_files, ]
|
||||
head(dat2[, c("artwork", "event", "popup", "trace")], 20)
|
||||
|
||||
|
||||
#df <- NULL
|
||||
|
||||
for (file in lut$glossar_file) {
|
||||
|
||||
artwork_list <- unlist(lut[lut$glossar_file == file, "artwork"])
|
||||
@ -231,16 +230,15 @@ for (file in lut$glossar_file) {
|
||||
(current_artwork %in% artwork_list) &
|
||||
dat2$popup[i] == file & (j-k == 0)) {
|
||||
|
||||
#df <- rbind(df, data.frame(file, current_artwork, i, j))
|
||||
dat2[i, "trace"] <- dat2[j, "trace"]
|
||||
dat2[i, "artwork"] <- current_artwork
|
||||
|
||||
}
|
||||
}
|
||||
}
|
||||
# dim(dat2[is.na(dat2$trace), ])
|
||||
# --> finds about half of the glossar entries for the small data set...
|
||||
|
||||
# dat2[apply(df[, c("j", "i")], 1, c), c("artwork", "event", "popup", "trace")]
|
||||
# --> finds about half of the glossar entries for the small data set...
|
||||
table(is.na(dat2[dat2$glossar == 1, "trace"]))
|
||||
|
||||
# REMEMBER: It can never bo 100% correct, since it is always possible that
|
||||
# several cards are open and that they link to the same glossar entry
|
||||
@ -251,45 +249,42 @@ lut[sapply(lut$artwork, length) == 1, "glossar_file"]
|
||||
# TODO: Fill in the ones that are associated with one artwork
|
||||
# --> Can't come up with something -- maybe ask AK???
|
||||
|
||||
# TODO: How to check if one of the former "Show Infos" is correct
|
||||
# --> Can't come up with something -- maybe ask AK???
|
||||
single <- lut[sapply(lut$artwork, length) == 1, "glossar_file"]
|
||||
tmp <- subset(dat2, is.na(dat2$trace))$popup
|
||||
inside <- unique(tmp[tmp %in% lut[sapply(lut$artwork, length) == 1, "glossar_file"]])
|
||||
single_art <- unlist(lut[lut$glossar_file %in% inside, "artwork"])
|
||||
tmp_lut <- data.frame(glossar_file = sort(inside), artwork = single_art)
|
||||
|
||||
# for (file in lut$glossar_file) {
|
||||
#
|
||||
# artwork_list <- unlist(lut[lut$glossar_file == file, "artwork"])
|
||||
#
|
||||
# for (i in seq_len(nrow(dat2))) {
|
||||
#
|
||||
# if (dat2$event[i] == "Show Info") {
|
||||
#
|
||||
# artworks <- NULL
|
||||
# current_artwork <- dat2[i, "artwork"]
|
||||
# j <- i
|
||||
#
|
||||
# } else {
|
||||
#
|
||||
# print(current_artwork)
|
||||
# artworks <- c(artworks, dat2[i, "artwork"])
|
||||
# print(artworks)
|
||||
#
|
||||
# }
|
||||
#
|
||||
# # if (dat2$artwork[i] == "glossar" &
|
||||
# # (current_artwork %in% artwork_list) &
|
||||
# # dat2$popup[i] == file) {
|
||||
# #
|
||||
# # #df <- rbind(df, data.frame(file, current_artwork, i, j))
|
||||
# # dat2[i, "trace"] <- dat2[j, "trace"]
|
||||
#
|
||||
# # }
|
||||
# }
|
||||
# }
|
||||
|
||||
# correct: 17940
|
||||
# incorrect: 17963
|
||||
dat2[dat2$glossar == 1, c("artwork", "popup", "glossar", "trace")]
|
||||
|
||||
for (file in tmp_lut$glossar_file) {
|
||||
|
||||
for (i in seq_len(nrow(dat2))) {
|
||||
|
||||
if (dat2$event[i] == "Artwork/OpenCard" & dat2$artwork[i] %in% tmp_lut$artwork) {
|
||||
|
||||
current_artwork <- dat2[i, "artwork"]
|
||||
j <- i
|
||||
|
||||
}
|
||||
|
||||
if (dat2$artwork[i] == "glossar" &
|
||||
dat2$popup[i] == file) {
|
||||
|
||||
dat2[i, "trace"] <- dat2[j, "trace"]
|
||||
dat2[i, "artwork"] <- current_artwork
|
||||
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
dat2[14110:14130, ]
|
||||
|
||||
# TODO: Integrate for loop into for loop above
|
||||
|
||||
# TODO: "glossar" entry should be changed to the corresponding artwork
|
||||
# TODO: Add additional variable `glossar` with 0/1 or similar instead
|
||||
|
||||
# TODO: For now: Exclude not matched glossar entries
|
||||
|
||||
@ -304,12 +299,12 @@ rm(lut, current_artwork, file, glossar_dict, i, j, k, artwork_list,
|
||||
|
||||
dat3 <- subset(df, df$event %in% c("Show Info", "Show Front"))
|
||||
|
||||
dat3$event <- ifelse(dat3$event == "Show Info", "start", "stop")
|
||||
dat3$time <- ifelse(dat3$event == "Show Info", "start", "stop")
|
||||
|
||||
flipCard_wide <- reshape(dat3, direction = "wide",
|
||||
idvar = c("trace", "artwork"),
|
||||
timevar = "event",
|
||||
drop = c("fileid", "popup", "card"))
|
||||
idvar = c("trace", "artwork", "glossar"),
|
||||
timevar = "time",
|
||||
drop = c("popup", "card"))
|
||||
flipCard_wide$event <- "flipCard"
|
||||
flipCard_wide$duration <- flipCard_wide$time_ms.stop -
|
||||
flipCard_wide$time_ms.start
|
||||
@ -329,14 +324,15 @@ flipCard_wide$rotation.start <- NA
|
||||
flipCard_wide$rotation.stop <- NA
|
||||
flipCard_wide$rotationDegree <- NA
|
||||
|
||||
dat_flipCard <- flipCard_wide[, c("event", "artwork", "trace",
|
||||
"date.start", "date.stop",
|
||||
"time_ms.start", "time_ms.stop",
|
||||
"duration", "card", "popup",
|
||||
"x.start", "y.start", "x.stop", "y.stop",
|
||||
"distance", "scale.start", "scale.stop",
|
||||
"scaleSize", "rotation.start",
|
||||
"rotation.stop", "rotationDegree")]
|
||||
dat_flipCard <- flipCard_wide[, c("fileid.start", "fileid.stop", "event",
|
||||
"artwork", "trace", "glossar",
|
||||
"date.start", "date.stop",
|
||||
"time_ms.start", "time_ms.stop",
|
||||
"duration", "card", "popup", "x.start",
|
||||
"y.start", "x.stop", "y.stop",
|
||||
"distance", "scale.start", "scale.stop",
|
||||
"scaleSize", "rotation.start",
|
||||
"rotation.stop", "rotationDegree")]
|
||||
|
||||
rm(flipCard_wide)
|
||||
|
||||
@ -349,11 +345,11 @@ rownames(dat4) <- NULL
|
||||
num_start <- diff(c(0, which(dat4$event == "Artwork/CloseCard")))
|
||||
dat4$eventid <- rep(seq_along(num_start), num_start)
|
||||
|
||||
dat4$event <- ifelse(dat4$event == "Artwork/OpenCard", "start", "stop")
|
||||
dat4$time <- ifelse(dat4$event == "Artwork/OpenCard", "start", "stop")
|
||||
|
||||
openTopic_wide <- reshape(dat4, direction = "wide",
|
||||
idvar = c("eventid", "trace", "artwork", "card"),
|
||||
timevar = "event", drop = c("fileid", "popup"))
|
||||
idvar = c("eventid", "trace", "glossar", "artwork", "card"),
|
||||
timevar = "time", drop = "popup")
|
||||
openTopic_wide$event <- "openTopic"
|
||||
openTopic_wide$duration <- openTopic_wide$time_ms.stop -
|
||||
openTopic_wide$time_ms.start
|
||||
@ -372,47 +368,46 @@ openTopic_wide$rotation.start <- NA
|
||||
openTopic_wide$rotation.stop <- NA
|
||||
openTopic_wide$rotationDegree <- NA
|
||||
|
||||
dat_openTopic <- openTopic_wide[, c("event", "artwork", "trace",
|
||||
"date.start", "date.stop",
|
||||
"time_ms.start", "time_ms.stop",
|
||||
"duration", "card", "popup", "x.start",
|
||||
"y.start", "x.stop", "y.stop",
|
||||
"distance", "scale.start", "scale.stop",
|
||||
"scaleSize", "rotation.start",
|
||||
"rotation.stop", "rotationDegree")]
|
||||
dat_openTopic <- openTopic_wide[, c("fileid.start", "fileid.stop", "event",
|
||||
"artwork", "trace", "glossar",
|
||||
"date.start", "date.stop",
|
||||
"time_ms.start", "time_ms.stop",
|
||||
"duration", "card", "popup", "x.start",
|
||||
"y.start", "x.stop", "y.stop",
|
||||
"distance", "scale.start",
|
||||
"scale.stop", "scaleSize",
|
||||
"rotation.start", "rotation.stop",
|
||||
"rotationDegree")]
|
||||
# TODO: card should have a unique identifier for each artwork
|
||||
|
||||
rm(openTopic_wide, num_start)
|
||||
|
||||
#' ## close openPopup
|
||||
dat5 <- subset(df, df$event %in% c("ShowPopup", "HidePopup"))
|
||||
dat5 <- dat5[order(dat5$artwork, dat5$date), ]
|
||||
dat5 <- dat5[order(dat5$artwork, dat5$popup, dat5$date), ]
|
||||
rownames(dat5) <- NULL
|
||||
|
||||
num_start <- diff(c(0, which(dat5$event == "HidePopup")))
|
||||
# last event is "ShowPopup"! Needs to be fixed
|
||||
num_start <- c(num_start, 1)
|
||||
# TODO: Needs to be caught in a function
|
||||
# last event is "ShowPopup"! Needs to be fixed
|
||||
# num_start <- c(num_start, 1)
|
||||
# TODO: Needs to be caught in a function --> not anymore - still relevant???
|
||||
|
||||
dat5$eventid <- rep(seq_along(num_start), num_start)
|
||||
|
||||
dat5$event <- ifelse(dat5$event == "ShowPopup", "start", "stop")
|
||||
dat5$time <- ifelse(dat5$event == "ShowPopup", "start", "stop")
|
||||
|
||||
openPopup_wide <- reshape(dat5, direction = "wide",
|
||||
idvar = c("eventid", "trace", "artwork", "popup"),
|
||||
timevar = "event", drop = c("fileid", "card"))
|
||||
idvar = c("eventid", "trace", "glossar", "artwork", "popup"),
|
||||
timevar = "time", drop = "card")
|
||||
# there is a pathological entry which gets deleted...
|
||||
# df[df$trace == 4595, ]
|
||||
# --> artwork 046 popup selene.xml gets opened twice
|
||||
|
||||
# TODO: Some correct entries are not closed:
|
||||
df[df$trace == 1843, ]
|
||||
# WHY???
|
||||
|
||||
openPopup_wide$event <- "openPopup"
|
||||
openPopup_wide$duration <- openPopup_wide$time_ms.stop -
|
||||
openPopup_wide$time_ms.start
|
||||
|
||||
|
||||
openPopup_wide$card <- NA
|
||||
openPopup_wide$x.start <- NA
|
||||
openPopup_wide$x.stop <- NA
|
||||
@ -426,14 +421,16 @@ openPopup_wide$rotation.start <- NA
|
||||
openPopup_wide$rotation.stop <- NA
|
||||
openPopup_wide$rotationDegree <- NA
|
||||
|
||||
dat_openPopup <- openPopup_wide[, c("event", "artwork", "trace",
|
||||
"date.start", "date.stop",
|
||||
"time_ms.start", "time_ms.stop",
|
||||
"duration", "card", "popup", "x.start",
|
||||
"y.start", "x.stop", "y.stop",
|
||||
"distance", "scale.start", "scale.stop",
|
||||
"scaleSize", "rotation.start",
|
||||
"rotation.stop", "rotationDegree")]
|
||||
dat_openPopup <- openPopup_wide[, c("fileid.start", "fileid.stop", "event",
|
||||
"artwork", "trace", "glossar",
|
||||
"date.start", "date.stop",
|
||||
"time_ms.start", "time_ms.stop",
|
||||
"duration", "card", "popup", "x.start",
|
||||
"y.start", "x.stop", "y.stop",
|
||||
"distance", "scale.start",
|
||||
"scale.stop", "scaleSize",
|
||||
"rotation.start", "rotation.stop",
|
||||
"rotationDegree")]
|
||||
rm(num_start, openPopup_wide)
|
||||
|
||||
|
||||
@ -443,14 +440,14 @@ rm(num_start, openPopup_wide)
|
||||
# dat_all <- merge(dat_all, dat_openTopic, all = TRUE)
|
||||
# dat_all <- merge(dat_all, dat_openPopup, all = TRUE)
|
||||
# })
|
||||
#
|
||||
#
|
||||
# # check
|
||||
# nrow(dat_all) == (nrow(dat_trans) + nrow(dat_flipCard) +
|
||||
# nrow(dat_openTopic) + nrow(dat_openPopup))
|
||||
#
|
||||
#
|
||||
# dat_all <- dat_all[order(dat_all$date.start), ]
|
||||
# rownames(dat_all) <- NULL
|
||||
#
|
||||
#
|
||||
|
||||
# TODO: from here on NA... WHY??
|
||||
# dat_all[19426:19435, ]
|
||||
@ -460,10 +457,10 @@ rm(num_start, openPopup_wide)
|
||||
# dat_all2 <- dplyr::full_join(dat_trans, dat_flipCard)
|
||||
# dat_all2 <- dplyr::full_join(dat_all, dat_openTopic)
|
||||
# dat_all2 <- dplyr::full_join(dat_all, dat_openPopup)
|
||||
#
|
||||
#
|
||||
# nrow(dat_all2) == (nrow(dat_trans) + nrow(dat_flipCard) +
|
||||
# nrow(dat_openTopic) + nrow(dat_openPopup))
|
||||
#
|
||||
#
|
||||
# dat_all2 <- dat_all2[order(dat_all2$date.start), ]
|
||||
# rownames(dat_all2) <- NULL
|
||||
# TODO: --> same result - but faster. Need it?
|
||||
@ -479,8 +476,22 @@ nrow(dat_all) == (nrow(dat_trans) + nrow(dat_flipCard) +
|
||||
nrow(dat_openTopic) + nrow(dat_openPopup))
|
||||
|
||||
# remove all events that do not have a `date.start`
|
||||
dim(dat_all[is.na(dat_all$date.start), ])
|
||||
dat_all <- dat_all[!is.na(dat_all$date.start), ]
|
||||
# TODO: Find out how it can be that there is only a `date.stop`
|
||||
# There is only a `date.stop`, when event is not properly closed, see here:
|
||||
df[df$trace == 1843, ]
|
||||
dat_openPopup[dat_openPopup$trace == 1843, ]
|
||||
## --> still 50 (small data set) left, and some really do not seem to be
|
||||
## opened! Must be a log error
|
||||
# --> others should be closed!
|
||||
dat[31000:31019,] # this one e.g.
|
||||
# --> Actually NOT! card gets flipped before! Again - log error!
|
||||
|
||||
|
||||
# Remove durations when event spans more than one log file, since they are
|
||||
# not interpretable
|
||||
dat_all[which(dat_all$fileid.start != dat_all$fileid.stop), "duration"] <- NA
|
||||
|
||||
|
||||
# sort by `start.date`
|
||||
dat_all <- dat_all[order(dat_all$date.start), ]
|
||||
@ -521,7 +532,8 @@ head(dat_all[, c("event", "artwork", "trace", "date.start", "timediff", "case")]
|
||||
|
||||
dat_all$eventid <- seq_len(nrow(dat_all))
|
||||
|
||||
dat_all <- dat_all[, c("eventid", "case", "trace", "event", "artwork",
|
||||
dat_all <- dat_all[, c("fileid.start", "fileid.stop", "eventid", "case",
|
||||
"trace", "glossar", "event", "artwork",
|
||||
"date.start", "date.stop", "time_ms.start",
|
||||
"time_ms.stop", "duration", "card", "popup",
|
||||
"x.start", "y.start", "x.stop", "y.stop",
|
||||
@ -574,6 +586,7 @@ for (case in cases) {
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
# TODO: Get rid of the loops
|
||||
# --> This takes forever...
|
||||
|
||||
@ -587,25 +600,20 @@ for (case in cases) {
|
||||
|
||||
# put glossar events back in
|
||||
|
||||
dat_all <- rbind(out, dat_all[dat_all$artwork == "glossar", ])
|
||||
dat_all <- dat_all[order(dat_all$date.start), ]
|
||||
rownames(dat_all) <- NULL
|
||||
#dat_all <- rbind(out, dat_all[dat_all$artwork == "glossar", ])
|
||||
out <- out[order(out$date.start), ]
|
||||
rownames(out) <- NULL
|
||||
|
||||
# Make `trace` a consecutive number
|
||||
dat_all$trace <- as.numeric(as.factor(dat_all$trace))
|
||||
|
||||
|
||||
# TODO: How to handle duration < 0
|
||||
# --> Replace with NA for now...
|
||||
dat_all$duration <- ifelse(dat_all$duration < 0, NA, dat_all$duration)
|
||||
out$trace2 <- as.numeric(factor(out$trace, levels = unique(out$trace)))
|
||||
|
||||
#head(out[, c("trace", "trace2")], 50)
|
||||
|
||||
#' # Export data
|
||||
|
||||
write.table(dat_all, "../data/event_logfiles.csv",
|
||||
write.table(out, "../data/event_logfiles.csv",
|
||||
sep = ";", quote = FALSE, row.names = FALSE)
|
||||
|
||||
|
||||
# Is `artwork` my case? Or `artwork` per day? Or `artwork` per some other
|
||||
# unit??? Maybe look at differences between timestamps separately for
|
||||
# `artwork`? And identify "new observational unit" this way?
|
||||
@ -621,6 +629,3 @@ write.table(dat_all, "../data/event_logfiles.csv",
|
||||
|
||||
# TODO: Write function for closing events
|
||||
|
||||
|
||||
|
||||
|
||||
|
@ -237,3 +237,12 @@ counts <- table(as.Date(dat$date[dat$event %in% start_events]),
|
||||
lattice::barchart(counts, auto.key = TRUE)
|
||||
|
||||
|
||||
# TODO: Ask Phillip what is wrong with `time_ms`
|
||||
# --> Hat er eine Erklärung dafür?
|
||||
|
||||
#plot(time_ms.stop ~ time_ms.start, dat_trans, type = "b")
|
||||
plot(time_ms.stop ~ time_ms.start, dat_trans,
|
||||
col = rgb(red = 0, green = 0, blue = 0, alpha = 0.2))
|
||||
|
||||
plot(date.stop ~ date.start, dat_trans[1:1000,], type = "b")
|
||||
|
Loading…
Reference in New Issue
Block a user