Fixed closing events that span more than one log file and calculated correct durations

This commit is contained in:
Nora Wickelmaier 2023-10-19 18:17:44 +02:00
parent 67f4c70203
commit d21d52e84c

View File

@ -7,32 +7,32 @@ close_events <- function(data, event = c("move", "flipCard", "openTopic", "openP
switch(event,
"move" = {
actions <- c("Transform start", "Transform stop")
idvar <- c("fileId", "folder", "eventId", "artwork", "glossar")
idvar <- c("folder", "eventId", "artwork", "glossar")
drop <- c("popup", "topicNumber", "trace", "event")
ncol <- 17
ncol <- 18
},
"flipCard" = {
actions <- c("Show Info", "Show Front")
idvar <- c("fileId", "folder", "trace", "eventId", "artwork", "glossar")
idvar <- c("folder", "trace", "eventId", "artwork", "glossar")
drop <- c("popup", "topicNumber", "event")
ncol <- 18
ncol <- 19
},
"openTopic" = {
actions <- c("Artwork/OpenCard", "Artwork/CloseCard")
idvar <- c("fileId", "folder", "eventId", "trace", "glossar",
idvar <- c("folder", "eventId", "trace", "glossar",
"artwork", "topicNumber")
drop <- c("popup", "event")
ncol <- 19
ncol <- 20
},
"openPopup" = {
actions <- c("ShowPopup", "HidePopup")
idvar <- c("fileId", "folder", "eventId", "trace", "glossar",
idvar <- c("folder", "eventId", "trace", "glossar",
"artwork", "popup")
drop <- c("topicNumber", "event")
ncol <- 19
ncol <- 20
# TODO: Should topicNumber maybe also be filled in for "openPopup"?
}
)
@ -46,13 +46,20 @@ close_events <- function(data, event = c("move", "flipCard", "openTopic", "openP
}
subdata$eventId <- rep(seq_along(num_start), num_start)
if (event == "move") {
subdata <- subdata[!duplicated(subdata[, c("event", "eventId")]), ]
id_stop <- which(subdata$event == actions[2])
id_rm_stop <- id_stop[diff(id_stop) == 1]
subdata <- subdata[-(id_rm_stop + 1), ]
# remove start and stop events following directly each other
subdata <- subdata[!duplicated(subdata[, c("event", "eventId")],
fromLast = TRUE), ]
id_stop <- which(subdata$event == actions[2])
id_rm_stop <- id_stop[diff(id_stop) == 1]
if (length(id_rm_stop) != 0) {
subdata <- subdata[-(id_rm_stop + 1), ]
}
# remove eventIds associated with more than one trace, usually logging
# errors that I cannot resolve
corrupt_eventIds <- names(which(rowSums(xtabs( ~ eventId + trace, subdata) != 0) != 1))
subdata <- subdata[!subdata$eventId %in% corrupt_eventIds, ]
if (event == "flipCard") {
subdata$eventId <- subdata$trace
}
@ -75,6 +82,7 @@ close_events <- function(data, event = c("move", "flipCard", "openTopic", "openP
if (ncol(data_split_wide) != ncol) {
if (!any(grepl("start", names(data_split_wide)))) {
data_split_wide$fileId.start <- NA
data_split_wide$date.start <- NA
data_split_wide$timeMs.start <- NA
data_split_wide$x.start <- NA
@ -86,21 +94,24 @@ close_events <- function(data, event = c("move", "flipCard", "openTopic", "openP
switch(event,
"move" = {
data_split_wide <- data_split_wide[, c("fileId", "folder",
"artwork", "glossar",
"eventId", "date.start",
"timeMs.start", "x.start",
"y.start", "scale.start",
data_split_wide <- data_split_wide[, c("folder", "artwork",
"glossar", "eventId",
"fileId.start",
"date.start",
"timeMs.start",
"x.start", "y.start",
"scale.start",
"rotation.start",
"date.stop", "timeMs.stop",
"x.stop", "y.stop",
"scale.stop",
"date.stop",
"timeMs.stop", "x.stop",
"y.stop", "scale.stop",
"rotation.stop")]
},
"flipCard" = {
data_split_wide <- data_split_wide[, c("fileId", "folder",
"artwork", "glossar",
"trace", "eventId",
data_split_wide <- data_split_wide[, c("folder", "artwork",
"glossar", "trace",
"eventId",
"fileId.start",
"date.start",
"timeMs.start",
"x.start", "y.start",
@ -112,24 +123,28 @@ close_events <- function(data, event = c("move", "flipCard", "openTopic", "openP
"rotation.stop")]
},
"openTopic" = {
data_split_wide <- data_split_wide[, c("fileId", "folder",
"artwork", "topicNumber",
"glossar", "trace",
"eventId", "date.start",
"timeMs.start",
"x.start", "y.start",
"scale.start",
"rotation.start",
"date.stop",
"timeMs.stop", "x.stop",
"y.stop", "scale.stop",
"rotation.stop")]
data_split_wide <- data_split_wide[, c("folder", "artwork",
"topicNumber",
"glossar", "trace",
"eventId",
"fileId.start",
"date.start",
"timeMs.start",
"x.start", "y.start",
"scale.start",
"rotation.start",
"date.stop",
"timeMs.stop",
"x.stop", "y.stop",
"scale.stop",
"rotation.stop")]
},
"openPopup" = {
data_split_wide <- data_split_wide[, c("fileId", "folder",
"artwork", "popup",
"glossar", "trace",
"eventId", "date.start",
data_split_wide <- data_split_wide[, c("folder", "artwork",
"popup", "glossar",
"trace", "eventId",
"fileId.start",
"date.start",
"timeMs.start",
"x.start", "y.start",
"scale.start",
@ -141,6 +156,7 @@ close_events <- function(data, event = c("move", "flipCard", "openTopic", "openP
}
)
} else if (!any(grepl("stop", names(data_split_wide)))) {
data_split_wide$fileId.stop <- NA
data_split_wide$date.stop <- NA
data_split_wide$timeMs.stop <- NA
data_split_wide$x.stop <- NA
@ -163,27 +179,15 @@ close_events <- function(data, event = c("move", "flipCard", "openTopic", "openP
corrupt <- data_wide[select, ]
corrupt$identi <- "corrupt"
if (event != "move") {
corrupt_ids <- aggregate(trace ~ eventId, corrupt, function(x) length(unique(x)))
d1 <- nrow(corrupt)
corrupt_ids <- corrupt_ids$eventId[corrupt_ids$trace != 1]
corrupt <- corrupt[!corrupt$eventId %in% corrupt_ids, ]
d2 <- nrow(corrupt)
if (d2 < d1) {
corrupt <- corrupt[!is.na(corrupt$date.start), ]
warning(paste0(d1 - d2, " events spanning two log files have been removed since it could not be resolved how to close them."))
}
}
close_open_eventIds <- function(df, eventId) {
dfid <- df[df$eventId == eventId, ]
dfid <- dfid[!is.na(dfid$eventId), ]
dfid <- dfid[order(dfid$fileId), ]
dfid <- dfid[order(dfid$fileId.start), ]
if (nrow(dfid) == 2) {
out <- dfid[1, ]
out[, c("date.stop", "timeMs.stop", "x.stop", "y.stop", "scale.stop",
out[, c("fileId.stop", "date.stop", "timeMs.stop", "x.stop", "y.stop", "scale.stop",
"rotation.stop")] <-
dfid[2, c("date.stop", "timeMs.stop", "x.stop",
dfid[2, c("fileId.stop", "date.stop", "timeMs.stop", "x.stop",
"y.stop", "scale.stop", "rotation.stop")]
} else if (nrow(dfid) > 2) {
stop("More than two rows for open eventIds. Something is wrong!")
@ -195,10 +199,6 @@ close_events <- function(data, event = c("move", "flipCard", "openTopic", "openP
fixed <- dplyr::bind_rows(lapply(unique(corrupt$eventId),
close_open_eventIds, df = corrupt))
# FIXME: Something is wrong with the traces for `move`, `openTopic`,
# `openPopup` --> I have correct eventIds that have two different traces
# -- that cannot be! (I also have correct different traces, that have the
# same eventId...)
data_wide <- rbind(correct, fixed)
@ -227,19 +227,37 @@ close_events <- function(data, event = c("move", "flipCard", "openTopic", "openP
data_wide <- move_wide
}
# remove durations that span more than one log file
data_wide$duration[data_wide$identi == "corrupt"] <- NA
# TODO: Maybe there is a better solution for this?
data_wide <- data_wide[order(data_wide$fileId.start,
data_wide$date.start,
data_wide$timeMs.start), ]
# fix durations that span more than one log file
if (event != "move") {
tab <- colSums(xtabs( ~ fileId + trace, subdata) != 0)
number_logfiles <- data.frame(trace = names(tab), nlogfile = tab)
data_wide <- merge(data_wide, number_logfiles, by = "trace", all.x = TRUE)
data_wide$duration[data_wide$identi == "corrupt"] <-
(data_wide$nlogfile[data_wide$identi == "corrupt"] - 1) * 600000 -
data_wide$timeMs.start[data_wide$identi == "corrupt"] +
data_wide$timeMs.stop[data_wide$identi == "corrupt"]
# TODO: This assumes that no log files are skipped
# --> Is this assumption really valid??
} else {
data_wide$duration[data_wide$identi == "corrupt"] <-
600000 -
data_wide$timeMs.start[data_wide$identi == "corrupt"] +
data_wide$timeMs.stop[data_wide$identi == "corrupt"]
} # there should be no movements spanning more than two log files!
out <- data_wide[# !apply(data_wide, 1, function(x) all(is.na(x))),
# remove all NA rows
c("fileId", "folder", "event", "artwork", "trace",
"glossar", "date.start", "date.stop",
"timeMs.start", "timeMs.stop", "duration",
"topicNumber", "popup", "x.start", "y.start",
"x.stop", "y.stop", "distance", "scale.start",
"scale.stop", "scaleSize", "rotation.start",
"rotation.stop", "rotationDegree")]
c("fileId.start", "fileId.stop", "folder", "event",
"artwork", "trace", "glossar", "date.start",
"date.stop", "timeMs.start", "timeMs.stop",
"duration", "topicNumber", "popup", "x.start",
"y.start", "x.stop", "y.stop", "distance",
"scale.start", "scale.stop", "scaleSize",
"rotation.start", "rotation.stop", "rotationDegree")]
rownames(out) <- NULL
out
}