From d21d52e84c6a4f3c923e2e3425ae49a5c0dfb7d4 Mon Sep 17 00:00:00 2001 From: nwickel Date: Thu, 19 Oct 2023 18:17:44 +0200 Subject: [PATCH] Fixed closing events that span more than one log file and calculated correct durations --- R/close_events.R | 156 ++++++++++++++++++++++++++--------------------- 1 file changed, 87 insertions(+), 69 deletions(-) diff --git a/R/close_events.R b/R/close_events.R index 3fff56a..a414b0b 100644 --- a/R/close_events.R +++ b/R/close_events.R @@ -7,32 +7,32 @@ close_events <- function(data, event = c("move", "flipCard", "openTopic", "openP switch(event, "move" = { actions <- c("Transform start", "Transform stop") - idvar <- c("fileId", "folder", "eventId", "artwork", "glossar") + idvar <- c("folder", "eventId", "artwork", "glossar") drop <- c("popup", "topicNumber", "trace", "event") - ncol <- 17 + ncol <- 18 }, "flipCard" = { actions <- c("Show Info", "Show Front") - idvar <- c("fileId", "folder", "trace", "eventId", "artwork", "glossar") + idvar <- c("folder", "trace", "eventId", "artwork", "glossar") drop <- c("popup", "topicNumber", "event") - ncol <- 18 + ncol <- 19 }, "openTopic" = { actions <- c("Artwork/OpenCard", "Artwork/CloseCard") - idvar <- c("fileId", "folder", "eventId", "trace", "glossar", + idvar <- c("folder", "eventId", "trace", "glossar", "artwork", "topicNumber") drop <- c("popup", "event") - ncol <- 19 + ncol <- 20 }, "openPopup" = { actions <- c("ShowPopup", "HidePopup") - idvar <- c("fileId", "folder", "eventId", "trace", "glossar", + idvar <- c("folder", "eventId", "trace", "glossar", "artwork", "popup") drop <- c("topicNumber", "event") - ncol <- 19 + ncol <- 20 # TODO: Should topicNumber maybe also be filled in for "openPopup"? } ) @@ -46,13 +46,20 @@ close_events <- function(data, event = c("move", "flipCard", "openTopic", "openP } subdata$eventId <- rep(seq_along(num_start), num_start) - if (event == "move") { - subdata <- subdata[!duplicated(subdata[, c("event", "eventId")]), ] - id_stop <- which(subdata$event == actions[2]) - id_rm_stop <- id_stop[diff(id_stop) == 1] - subdata <- subdata[-(id_rm_stop + 1), ] + # remove start and stop events following directly each other + subdata <- subdata[!duplicated(subdata[, c("event", "eventId")], + fromLast = TRUE), ] + id_stop <- which(subdata$event == actions[2]) + id_rm_stop <- id_stop[diff(id_stop) == 1] + if (length(id_rm_stop) != 0) { + subdata <- subdata[-(id_rm_stop + 1), ] } + # remove eventIds associated with more than one trace, usually logging + # errors that I cannot resolve + corrupt_eventIds <- names(which(rowSums(xtabs( ~ eventId + trace, subdata) != 0) != 1)) + subdata <- subdata[!subdata$eventId %in% corrupt_eventIds, ] + if (event == "flipCard") { subdata$eventId <- subdata$trace } @@ -75,6 +82,7 @@ close_events <- function(data, event = c("move", "flipCard", "openTopic", "openP if (ncol(data_split_wide) != ncol) { if (!any(grepl("start", names(data_split_wide)))) { + data_split_wide$fileId.start <- NA data_split_wide$date.start <- NA data_split_wide$timeMs.start <- NA data_split_wide$x.start <- NA @@ -86,21 +94,24 @@ close_events <- function(data, event = c("move", "flipCard", "openTopic", "openP switch(event, "move" = { - data_split_wide <- data_split_wide[, c("fileId", "folder", - "artwork", "glossar", - "eventId", "date.start", - "timeMs.start", "x.start", - "y.start", "scale.start", + data_split_wide <- data_split_wide[, c("folder", "artwork", + "glossar", "eventId", + "fileId.start", + "date.start", + "timeMs.start", + "x.start", "y.start", + "scale.start", "rotation.start", - "date.stop", "timeMs.stop", - "x.stop", "y.stop", - "scale.stop", + "date.stop", + "timeMs.stop", "x.stop", + "y.stop", "scale.stop", "rotation.stop")] }, "flipCard" = { - data_split_wide <- data_split_wide[, c("fileId", "folder", - "artwork", "glossar", - "trace", "eventId", + data_split_wide <- data_split_wide[, c("folder", "artwork", + "glossar", "trace", + "eventId", + "fileId.start", "date.start", "timeMs.start", "x.start", "y.start", @@ -112,24 +123,28 @@ close_events <- function(data, event = c("move", "flipCard", "openTopic", "openP "rotation.stop")] }, "openTopic" = { - data_split_wide <- data_split_wide[, c("fileId", "folder", - "artwork", "topicNumber", - "glossar", "trace", - "eventId", "date.start", - "timeMs.start", - "x.start", "y.start", - "scale.start", - "rotation.start", - "date.stop", - "timeMs.stop", "x.stop", - "y.stop", "scale.stop", - "rotation.stop")] + data_split_wide <- data_split_wide[, c("folder", "artwork", + "topicNumber", + "glossar", "trace", + "eventId", + "fileId.start", + "date.start", + "timeMs.start", + "x.start", "y.start", + "scale.start", + "rotation.start", + "date.stop", + "timeMs.stop", + "x.stop", "y.stop", + "scale.stop", + "rotation.stop")] }, "openPopup" = { - data_split_wide <- data_split_wide[, c("fileId", "folder", - "artwork", "popup", - "glossar", "trace", - "eventId", "date.start", + data_split_wide <- data_split_wide[, c("folder", "artwork", + "popup", "glossar", + "trace", "eventId", + "fileId.start", + "date.start", "timeMs.start", "x.start", "y.start", "scale.start", @@ -141,6 +156,7 @@ close_events <- function(data, event = c("move", "flipCard", "openTopic", "openP } ) } else if (!any(grepl("stop", names(data_split_wide)))) { + data_split_wide$fileId.stop <- NA data_split_wide$date.stop <- NA data_split_wide$timeMs.stop <- NA data_split_wide$x.stop <- NA @@ -163,27 +179,15 @@ close_events <- function(data, event = c("move", "flipCard", "openTopic", "openP corrupt <- data_wide[select, ] corrupt$identi <- "corrupt" - if (event != "move") { - corrupt_ids <- aggregate(trace ~ eventId, corrupt, function(x) length(unique(x))) - d1 <- nrow(corrupt) - corrupt_ids <- corrupt_ids$eventId[corrupt_ids$trace != 1] - corrupt <- corrupt[!corrupt$eventId %in% corrupt_ids, ] - d2 <- nrow(corrupt) - if (d2 < d1) { - corrupt <- corrupt[!is.na(corrupt$date.start), ] - warning(paste0(d1 - d2, " events spanning two log files have been removed since it could not be resolved how to close them.")) - } - } - close_open_eventIds <- function(df, eventId) { dfid <- df[df$eventId == eventId, ] dfid <- dfid[!is.na(dfid$eventId), ] - dfid <- dfid[order(dfid$fileId), ] + dfid <- dfid[order(dfid$fileId.start), ] if (nrow(dfid) == 2) { out <- dfid[1, ] - out[, c("date.stop", "timeMs.stop", "x.stop", "y.stop", "scale.stop", + out[, c("fileId.stop", "date.stop", "timeMs.stop", "x.stop", "y.stop", "scale.stop", "rotation.stop")] <- - dfid[2, c("date.stop", "timeMs.stop", "x.stop", + dfid[2, c("fileId.stop", "date.stop", "timeMs.stop", "x.stop", "y.stop", "scale.stop", "rotation.stop")] } else if (nrow(dfid) > 2) { stop("More than two rows for open eventIds. Something is wrong!") @@ -195,10 +199,6 @@ close_events <- function(data, event = c("move", "flipCard", "openTopic", "openP fixed <- dplyr::bind_rows(lapply(unique(corrupt$eventId), close_open_eventIds, df = corrupt)) -# FIXME: Something is wrong with the traces for `move`, `openTopic`, -# `openPopup` --> I have correct eventIds that have two different traces -# -- that cannot be! (I also have correct different traces, that have the -# same eventId...) data_wide <- rbind(correct, fixed) @@ -227,19 +227,37 @@ close_events <- function(data, event = c("move", "flipCard", "openTopic", "openP data_wide <- move_wide } - # remove durations that span more than one log file - data_wide$duration[data_wide$identi == "corrupt"] <- NA -# TODO: Maybe there is a better solution for this? + data_wide <- data_wide[order(data_wide$fileId.start, + data_wide$date.start, + data_wide$timeMs.start), ] + + # fix durations that span more than one log file + if (event != "move") { + tab <- colSums(xtabs( ~ fileId + trace, subdata) != 0) + number_logfiles <- data.frame(trace = names(tab), nlogfile = tab) + data_wide <- merge(data_wide, number_logfiles, by = "trace", all.x = TRUE) + data_wide$duration[data_wide$identi == "corrupt"] <- + (data_wide$nlogfile[data_wide$identi == "corrupt"] - 1) * 600000 - + data_wide$timeMs.start[data_wide$identi == "corrupt"] + + data_wide$timeMs.stop[data_wide$identi == "corrupt"] +# TODO: This assumes that no log files are skipped +# --> Is this assumption really valid?? + } else { + data_wide$duration[data_wide$identi == "corrupt"] <- + 600000 - + data_wide$timeMs.start[data_wide$identi == "corrupt"] + + data_wide$timeMs.stop[data_wide$identi == "corrupt"] + } # there should be no movements spanning more than two log files! out <- data_wide[# !apply(data_wide, 1, function(x) all(is.na(x))), # remove all NA rows - c("fileId", "folder", "event", "artwork", "trace", - "glossar", "date.start", "date.stop", - "timeMs.start", "timeMs.stop", "duration", - "topicNumber", "popup", "x.start", "y.start", - "x.stop", "y.stop", "distance", "scale.start", - "scale.stop", "scaleSize", "rotation.start", - "rotation.stop", "rotationDegree")] + c("fileId.start", "fileId.stop", "folder", "event", + "artwork", "trace", "glossar", "date.start", + "date.stop", "timeMs.start", "timeMs.stop", + "duration", "topicNumber", "popup", "x.start", + "y.start", "x.stop", "y.stop", "distance", + "scale.start", "scale.stop", "scaleSize", + "rotation.start", "rotation.stop", "rotationDegree")] rownames(out) <- NULL out }