Worked on closing events that span more than one log file

This commit is contained in:
Nora Wickelmaier 2023-10-15 10:55:12 +02:00
parent 046d5b2f08
commit 67f4c70203
2 changed files with 147 additions and 16 deletions

View File

@ -14,9 +14,9 @@ close_events <- function(data, event = c("move", "flipCard", "openTopic", "openP
},
"flipCard" = {
actions <- c("Show Info", "Show Front")
idvar <- c("fileId", "folder", "trace", "artwork", "glossar")
drop <- c("popup", "topicNumber", "eventId", "event")
ncol <- 17
idvar <- c("fileId", "folder", "trace", "eventId", "artwork", "glossar")
drop <- c("popup", "topicNumber", "event")
ncol <- 18
},
"openTopic" = {
@ -34,7 +34,6 @@ close_events <- function(data, event = c("move", "flipCard", "openTopic", "openP
drop <- c("topicNumber", "event")
ncol <- 19
# TODO: Should topicNumber maybe also be filled in for "openPopup"?
}
)
@ -54,6 +53,10 @@ close_events <- function(data, event = c("move", "flipCard", "openTopic", "openP
subdata <- subdata[-(id_rm_stop + 1), ]
}
if (event == "flipCard") {
subdata$eventId <- subdata$trace
}
subdata_split <- split(subdata, ~ fileId)
pbapply::pboptions(style = 3, char = "=")
@ -63,20 +66,142 @@ close_events <- function(data, event = c("move", "flipCard", "openTopic", "openP
idvar = idvar,
timevar = "time",
drop = drop)
# suppressWarnings(
# data_wide <- stats::reshape(subdata, direction = "wide",
# idvar = idvar,
# timevar = "time",
# drop = drop)
# )
# remove entries with only start or stop events since they do not have
# all columns
ids <- which(sapply(subdata_split_wide, ncol) != ncol)
if (length(ids) > 0) subdata_split_wide <- subdata_split_wide[-ids]
#which(sapply(subdata_split_wide, ncol) != ncol)
# fix log files with *only* start or *only* stop events
add_variables <- function(data_split_wide, ncol,
event = c("move", "flipCard", "openTopic", "openPopup")) {
if (ncol(data_split_wide) != ncol) {
if (!any(grepl("start", names(data_split_wide)))) {
data_split_wide$date.start <- NA
data_split_wide$timeMs.start <- NA
data_split_wide$x.start <- NA
data_split_wide$y.start <- NA
data_split_wide$scale.start <- NA
data_split_wide$rotation.start <- NA
event <- match.arg(event)
switch(event,
"move" = {
data_split_wide <- data_split_wide[, c("fileId", "folder",
"artwork", "glossar",
"eventId", "date.start",
"timeMs.start", "x.start",
"y.start", "scale.start",
"rotation.start",
"date.stop", "timeMs.stop",
"x.stop", "y.stop",
"scale.stop",
"rotation.stop")]
},
"flipCard" = {
data_split_wide <- data_split_wide[, c("fileId", "folder",
"artwork", "glossar",
"trace", "eventId",
"date.start",
"timeMs.start",
"x.start", "y.start",
"scale.start",
"rotation.start",
"date.stop",
"timeMs.stop", "x.stop",
"y.stop", "scale.stop",
"rotation.stop")]
},
"openTopic" = {
data_split_wide <- data_split_wide[, c("fileId", "folder",
"artwork", "topicNumber",
"glossar", "trace",
"eventId", "date.start",
"timeMs.start",
"x.start", "y.start",
"scale.start",
"rotation.start",
"date.stop",
"timeMs.stop", "x.stop",
"y.stop", "scale.stop",
"rotation.stop")]
},
"openPopup" = {
data_split_wide <- data_split_wide[, c("fileId", "folder",
"artwork", "popup",
"glossar", "trace",
"eventId", "date.start",
"timeMs.start",
"x.start", "y.start",
"scale.start",
"rotation.start",
"date.stop",
"timeMs.stop", "x.stop",
"y.stop", "scale.stop",
"rotation.stop")]
}
)
} else if (!any(grepl("stop", names(data_split_wide)))) {
data_split_wide$date.stop <- NA
data_split_wide$timeMs.stop <- NA
data_split_wide$x.stop <- NA
data_split_wide$y.stop <- NA
data_split_wide$scale.stop <- NA
data_split_wide$rotation.stop <- NA
}
}
data_split_wide
}
subdata_split_wide <- lapply(subdata_split_wide, add_variables, ncol = ncol,
event = event)
data_wide <- dplyr::bind_rows(subdata_split_wide)
select <- is.na(data_wide$date.start) | is.na(data_wide$date.stop)
correct <- data_wide[!select, ]
correct$identi <- "correct"
corrupt <- data_wide[select, ]
corrupt$identi <- "corrupt"
if (event != "move") {
corrupt_ids <- aggregate(trace ~ eventId, corrupt, function(x) length(unique(x)))
d1 <- nrow(corrupt)
corrupt_ids <- corrupt_ids$eventId[corrupt_ids$trace != 1]
corrupt <- corrupt[!corrupt$eventId %in% corrupt_ids, ]
d2 <- nrow(corrupt)
if (d2 < d1) {
corrupt <- corrupt[!is.na(corrupt$date.start), ]
warning(paste0(d1 - d2, " events spanning two log files have been removed since it could not be resolved how to close them."))
}
}
close_open_eventIds <- function(df, eventId) {
dfid <- df[df$eventId == eventId, ]
dfid <- dfid[!is.na(dfid$eventId), ]
dfid <- dfid[order(dfid$fileId), ]
if (nrow(dfid) == 2) {
out <- dfid[1, ]
out[, c("date.stop", "timeMs.stop", "x.stop", "y.stop", "scale.stop",
"rotation.stop")] <-
dfid[2, c("date.stop", "timeMs.stop", "x.stop",
"y.stop", "scale.stop", "rotation.stop")]
} else if (nrow(dfid) > 2) {
stop("More than two rows for open eventIds. Something is wrong!")
} else {
out <- dfid
}
out
}
fixed <- dplyr::bind_rows(lapply(unique(corrupt$eventId),
close_open_eventIds, df = corrupt))
# FIXME: Something is wrong with the traces for `move`, `openTopic`,
# `openPopup` --> I have correct eventIds that have two different traces
# -- that cannot be! (I also have correct different traces, that have the
# same eventId...)
data_wide <- rbind(correct, fixed)
for (d in drop) data_wide[d] <- NA
data_wide$distance <- NA
data_wide$scaleSize <- NA
@ -102,7 +227,13 @@ close_events <- function(data, event = c("move", "flipCard", "openTopic", "openP
data_wide <- move_wide
}
out <- data_wide[, c("fileId", "folder", "event", "artwork", "trace",
# remove durations that span more than one log file
data_wide$duration[data_wide$identi == "corrupt"] <- NA
# TODO: Maybe there is a better solution for this?
out <- data_wide[# !apply(data_wide, 1, function(x) all(is.na(x))),
# remove all NA rows
c("fileId", "folder", "event", "artwork", "trace",
"glossar", "date.start", "date.stop",
"timeMs.start", "timeMs.stop", "duration",
"topicNumber", "popup", "x.start", "y.start",

View File

@ -13,7 +13,7 @@
create_eventlogs <- function(data, xmlpath) {
if (!lubridate::is.POSIXt(data$date)){
cat("########## Convertion variable `date` to POSIXct ##########", "\n")
cat("########## Converting variable `date` to POSIXct ##########", "\n")
data$date <- as.POSIXct(data$date)
}
data$glossar <- ifelse(data$artwork == "glossar", 1, 0)