Fixed closing events that span more than one log file and calculated correct durations

This commit is contained in:
Nora Wickelmaier 2023-10-19 18:17:44 +02:00
parent 67f4c70203
commit d21d52e84c
1 changed files with 87 additions and 69 deletions

View File

@ -7,32 +7,32 @@ close_events <- function(data, event = c("move", "flipCard", "openTopic", "openP
switch(event, switch(event,
"move" = { "move" = {
actions <- c("Transform start", "Transform stop") actions <- c("Transform start", "Transform stop")
idvar <- c("fileId", "folder", "eventId", "artwork", "glossar") idvar <- c("folder", "eventId", "artwork", "glossar")
drop <- c("popup", "topicNumber", "trace", "event") drop <- c("popup", "topicNumber", "trace", "event")
ncol <- 17 ncol <- 18
}, },
"flipCard" = { "flipCard" = {
actions <- c("Show Info", "Show Front") actions <- c("Show Info", "Show Front")
idvar <- c("fileId", "folder", "trace", "eventId", "artwork", "glossar") idvar <- c("folder", "trace", "eventId", "artwork", "glossar")
drop <- c("popup", "topicNumber", "event") drop <- c("popup", "topicNumber", "event")
ncol <- 18 ncol <- 19
}, },
"openTopic" = { "openTopic" = {
actions <- c("Artwork/OpenCard", "Artwork/CloseCard") actions <- c("Artwork/OpenCard", "Artwork/CloseCard")
idvar <- c("fileId", "folder", "eventId", "trace", "glossar", idvar <- c("folder", "eventId", "trace", "glossar",
"artwork", "topicNumber") "artwork", "topicNumber")
drop <- c("popup", "event") drop <- c("popup", "event")
ncol <- 19 ncol <- 20
}, },
"openPopup" = { "openPopup" = {
actions <- c("ShowPopup", "HidePopup") actions <- c("ShowPopup", "HidePopup")
idvar <- c("fileId", "folder", "eventId", "trace", "glossar", idvar <- c("folder", "eventId", "trace", "glossar",
"artwork", "popup") "artwork", "popup")
drop <- c("topicNumber", "event") drop <- c("topicNumber", "event")
ncol <- 19 ncol <- 20
# TODO: Should topicNumber maybe also be filled in for "openPopup"? # TODO: Should topicNumber maybe also be filled in for "openPopup"?
} }
) )
@ -46,13 +46,20 @@ close_events <- function(data, event = c("move", "flipCard", "openTopic", "openP
} }
subdata$eventId <- rep(seq_along(num_start), num_start) subdata$eventId <- rep(seq_along(num_start), num_start)
if (event == "move") { # remove start and stop events following directly each other
subdata <- subdata[!duplicated(subdata[, c("event", "eventId")]), ] subdata <- subdata[!duplicated(subdata[, c("event", "eventId")],
fromLast = TRUE), ]
id_stop <- which(subdata$event == actions[2]) id_stop <- which(subdata$event == actions[2])
id_rm_stop <- id_stop[diff(id_stop) == 1] id_rm_stop <- id_stop[diff(id_stop) == 1]
if (length(id_rm_stop) != 0) {
subdata <- subdata[-(id_rm_stop + 1), ] subdata <- subdata[-(id_rm_stop + 1), ]
} }
# remove eventIds associated with more than one trace, usually logging
# errors that I cannot resolve
corrupt_eventIds <- names(which(rowSums(xtabs( ~ eventId + trace, subdata) != 0) != 1))
subdata <- subdata[!subdata$eventId %in% corrupt_eventIds, ]
if (event == "flipCard") { if (event == "flipCard") {
subdata$eventId <- subdata$trace subdata$eventId <- subdata$trace
} }
@ -75,6 +82,7 @@ close_events <- function(data, event = c("move", "flipCard", "openTopic", "openP
if (ncol(data_split_wide) != ncol) { if (ncol(data_split_wide) != ncol) {
if (!any(grepl("start", names(data_split_wide)))) { if (!any(grepl("start", names(data_split_wide)))) {
data_split_wide$fileId.start <- NA
data_split_wide$date.start <- NA data_split_wide$date.start <- NA
data_split_wide$timeMs.start <- NA data_split_wide$timeMs.start <- NA
data_split_wide$x.start <- NA data_split_wide$x.start <- NA
@ -86,21 +94,24 @@ close_events <- function(data, event = c("move", "flipCard", "openTopic", "openP
switch(event, switch(event,
"move" = { "move" = {
data_split_wide <- data_split_wide[, c("fileId", "folder", data_split_wide <- data_split_wide[, c("folder", "artwork",
"artwork", "glossar", "glossar", "eventId",
"eventId", "date.start", "fileId.start",
"timeMs.start", "x.start", "date.start",
"y.start", "scale.start", "timeMs.start",
"x.start", "y.start",
"scale.start",
"rotation.start", "rotation.start",
"date.stop", "timeMs.stop", "date.stop",
"x.stop", "y.stop", "timeMs.stop", "x.stop",
"scale.stop", "y.stop", "scale.stop",
"rotation.stop")] "rotation.stop")]
}, },
"flipCard" = { "flipCard" = {
data_split_wide <- data_split_wide[, c("fileId", "folder", data_split_wide <- data_split_wide[, c("folder", "artwork",
"artwork", "glossar", "glossar", "trace",
"trace", "eventId", "eventId",
"fileId.start",
"date.start", "date.start",
"timeMs.start", "timeMs.start",
"x.start", "y.start", "x.start", "y.start",
@ -112,24 +123,28 @@ close_events <- function(data, event = c("move", "flipCard", "openTopic", "openP
"rotation.stop")] "rotation.stop")]
}, },
"openTopic" = { "openTopic" = {
data_split_wide <- data_split_wide[, c("fileId", "folder", data_split_wide <- data_split_wide[, c("folder", "artwork",
"artwork", "topicNumber", "topicNumber",
"glossar", "trace", "glossar", "trace",
"eventId", "date.start", "eventId",
"fileId.start",
"date.start",
"timeMs.start", "timeMs.start",
"x.start", "y.start", "x.start", "y.start",
"scale.start", "scale.start",
"rotation.start", "rotation.start",
"date.stop", "date.stop",
"timeMs.stop", "x.stop", "timeMs.stop",
"y.stop", "scale.stop", "x.stop", "y.stop",
"scale.stop",
"rotation.stop")] "rotation.stop")]
}, },
"openPopup" = { "openPopup" = {
data_split_wide <- data_split_wide[, c("fileId", "folder", data_split_wide <- data_split_wide[, c("folder", "artwork",
"artwork", "popup", "popup", "glossar",
"glossar", "trace", "trace", "eventId",
"eventId", "date.start", "fileId.start",
"date.start",
"timeMs.start", "timeMs.start",
"x.start", "y.start", "x.start", "y.start",
"scale.start", "scale.start",
@ -141,6 +156,7 @@ close_events <- function(data, event = c("move", "flipCard", "openTopic", "openP
} }
) )
} else if (!any(grepl("stop", names(data_split_wide)))) { } else if (!any(grepl("stop", names(data_split_wide)))) {
data_split_wide$fileId.stop <- NA
data_split_wide$date.stop <- NA data_split_wide$date.stop <- NA
data_split_wide$timeMs.stop <- NA data_split_wide$timeMs.stop <- NA
data_split_wide$x.stop <- NA data_split_wide$x.stop <- NA
@ -163,27 +179,15 @@ close_events <- function(data, event = c("move", "flipCard", "openTopic", "openP
corrupt <- data_wide[select, ] corrupt <- data_wide[select, ]
corrupt$identi <- "corrupt" corrupt$identi <- "corrupt"
if (event != "move") {
corrupt_ids <- aggregate(trace ~ eventId, corrupt, function(x) length(unique(x)))
d1 <- nrow(corrupt)
corrupt_ids <- corrupt_ids$eventId[corrupt_ids$trace != 1]
corrupt <- corrupt[!corrupt$eventId %in% corrupt_ids, ]
d2 <- nrow(corrupt)
if (d2 < d1) {
corrupt <- corrupt[!is.na(corrupt$date.start), ]
warning(paste0(d1 - d2, " events spanning two log files have been removed since it could not be resolved how to close them."))
}
}
close_open_eventIds <- function(df, eventId) { close_open_eventIds <- function(df, eventId) {
dfid <- df[df$eventId == eventId, ] dfid <- df[df$eventId == eventId, ]
dfid <- dfid[!is.na(dfid$eventId), ] dfid <- dfid[!is.na(dfid$eventId), ]
dfid <- dfid[order(dfid$fileId), ] dfid <- dfid[order(dfid$fileId.start), ]
if (nrow(dfid) == 2) { if (nrow(dfid) == 2) {
out <- dfid[1, ] out <- dfid[1, ]
out[, c("date.stop", "timeMs.stop", "x.stop", "y.stop", "scale.stop", out[, c("fileId.stop", "date.stop", "timeMs.stop", "x.stop", "y.stop", "scale.stop",
"rotation.stop")] <- "rotation.stop")] <-
dfid[2, c("date.stop", "timeMs.stop", "x.stop", dfid[2, c("fileId.stop", "date.stop", "timeMs.stop", "x.stop",
"y.stop", "scale.stop", "rotation.stop")] "y.stop", "scale.stop", "rotation.stop")]
} else if (nrow(dfid) > 2) { } else if (nrow(dfid) > 2) {
stop("More than two rows for open eventIds. Something is wrong!") stop("More than two rows for open eventIds. Something is wrong!")
@ -195,10 +199,6 @@ close_events <- function(data, event = c("move", "flipCard", "openTopic", "openP
fixed <- dplyr::bind_rows(lapply(unique(corrupt$eventId), fixed <- dplyr::bind_rows(lapply(unique(corrupt$eventId),
close_open_eventIds, df = corrupt)) close_open_eventIds, df = corrupt))
# FIXME: Something is wrong with the traces for `move`, `openTopic`,
# `openPopup` --> I have correct eventIds that have two different traces
# -- that cannot be! (I also have correct different traces, that have the
# same eventId...)
data_wide <- rbind(correct, fixed) data_wide <- rbind(correct, fixed)
@ -227,19 +227,37 @@ close_events <- function(data, event = c("move", "flipCard", "openTopic", "openP
data_wide <- move_wide data_wide <- move_wide
} }
# remove durations that span more than one log file data_wide <- data_wide[order(data_wide$fileId.start,
data_wide$duration[data_wide$identi == "corrupt"] <- NA data_wide$date.start,
# TODO: Maybe there is a better solution for this? data_wide$timeMs.start), ]
# fix durations that span more than one log file
if (event != "move") {
tab <- colSums(xtabs( ~ fileId + trace, subdata) != 0)
number_logfiles <- data.frame(trace = names(tab), nlogfile = tab)
data_wide <- merge(data_wide, number_logfiles, by = "trace", all.x = TRUE)
data_wide$duration[data_wide$identi == "corrupt"] <-
(data_wide$nlogfile[data_wide$identi == "corrupt"] - 1) * 600000 -
data_wide$timeMs.start[data_wide$identi == "corrupt"] +
data_wide$timeMs.stop[data_wide$identi == "corrupt"]
# TODO: This assumes that no log files are skipped
# --> Is this assumption really valid??
} else {
data_wide$duration[data_wide$identi == "corrupt"] <-
600000 -
data_wide$timeMs.start[data_wide$identi == "corrupt"] +
data_wide$timeMs.stop[data_wide$identi == "corrupt"]
} # there should be no movements spanning more than two log files!
out <- data_wide[# !apply(data_wide, 1, function(x) all(is.na(x))), out <- data_wide[# !apply(data_wide, 1, function(x) all(is.na(x))),
# remove all NA rows # remove all NA rows
c("fileId", "folder", "event", "artwork", "trace", c("fileId.start", "fileId.stop", "folder", "event",
"glossar", "date.start", "date.stop", "artwork", "trace", "glossar", "date.start",
"timeMs.start", "timeMs.stop", "duration", "date.stop", "timeMs.start", "timeMs.stop",
"topicNumber", "popup", "x.start", "y.start", "duration", "topicNumber", "popup", "x.start",
"x.stop", "y.stop", "distance", "scale.start", "y.start", "x.stop", "y.stop", "distance",
"scale.stop", "scaleSize", "rotation.start", "scale.start", "scale.stop", "scaleSize",
"rotation.stop", "rotationDegree")] "rotation.start", "rotation.stop", "rotationDegree")]
rownames(out) <- NULL rownames(out) <- NULL
out out
} }