Worked on closing events that span more than one log file

This commit is contained in:
Nora Wickelmaier 2023-10-15 10:55:12 +02:00
parent 046d5b2f08
commit 67f4c70203
2 changed files with 147 additions and 16 deletions

View File

@ -14,9 +14,9 @@ close_events <- function(data, event = c("move", "flipCard", "openTopic", "openP
}, },
"flipCard" = { "flipCard" = {
actions <- c("Show Info", "Show Front") actions <- c("Show Info", "Show Front")
idvar <- c("fileId", "folder", "trace", "artwork", "glossar") idvar <- c("fileId", "folder", "trace", "eventId", "artwork", "glossar")
drop <- c("popup", "topicNumber", "eventId", "event") drop <- c("popup", "topicNumber", "event")
ncol <- 17 ncol <- 18
}, },
"openTopic" = { "openTopic" = {
@ -34,7 +34,6 @@ close_events <- function(data, event = c("move", "flipCard", "openTopic", "openP
drop <- c("topicNumber", "event") drop <- c("topicNumber", "event")
ncol <- 19 ncol <- 19
# TODO: Should topicNumber maybe also be filled in for "openPopup"? # TODO: Should topicNumber maybe also be filled in for "openPopup"?
} }
) )
@ -54,6 +53,10 @@ close_events <- function(data, event = c("move", "flipCard", "openTopic", "openP
subdata <- subdata[-(id_rm_stop + 1), ] subdata <- subdata[-(id_rm_stop + 1), ]
} }
if (event == "flipCard") {
subdata$eventId <- subdata$trace
}
subdata_split <- split(subdata, ~ fileId) subdata_split <- split(subdata, ~ fileId)
pbapply::pboptions(style = 3, char = "=") pbapply::pboptions(style = 3, char = "=")
@ -63,20 +66,142 @@ close_events <- function(data, event = c("move", "flipCard", "openTopic", "openP
idvar = idvar, idvar = idvar,
timevar = "time", timevar = "time",
drop = drop) drop = drop)
# suppressWarnings(
# data_wide <- stats::reshape(subdata, direction = "wide",
# idvar = idvar,
# timevar = "time",
# drop = drop)
# )
# remove entries with only start or stop events since they do not have #which(sapply(subdata_split_wide, ncol) != ncol)
# all columns
ids <- which(sapply(subdata_split_wide, ncol) != ncol) # fix log files with *only* start or *only* stop events
if (length(ids) > 0) subdata_split_wide <- subdata_split_wide[-ids] add_variables <- function(data_split_wide, ncol,
event = c("move", "flipCard", "openTopic", "openPopup")) {
if (ncol(data_split_wide) != ncol) {
if (!any(grepl("start", names(data_split_wide)))) {
data_split_wide$date.start <- NA
data_split_wide$timeMs.start <- NA
data_split_wide$x.start <- NA
data_split_wide$y.start <- NA
data_split_wide$scale.start <- NA
data_split_wide$rotation.start <- NA
event <- match.arg(event)
switch(event,
"move" = {
data_split_wide <- data_split_wide[, c("fileId", "folder",
"artwork", "glossar",
"eventId", "date.start",
"timeMs.start", "x.start",
"y.start", "scale.start",
"rotation.start",
"date.stop", "timeMs.stop",
"x.stop", "y.stop",
"scale.stop",
"rotation.stop")]
},
"flipCard" = {
data_split_wide <- data_split_wide[, c("fileId", "folder",
"artwork", "glossar",
"trace", "eventId",
"date.start",
"timeMs.start",
"x.start", "y.start",
"scale.start",
"rotation.start",
"date.stop",
"timeMs.stop", "x.stop",
"y.stop", "scale.stop",
"rotation.stop")]
},
"openTopic" = {
data_split_wide <- data_split_wide[, c("fileId", "folder",
"artwork", "topicNumber",
"glossar", "trace",
"eventId", "date.start",
"timeMs.start",
"x.start", "y.start",
"scale.start",
"rotation.start",
"date.stop",
"timeMs.stop", "x.stop",
"y.stop", "scale.stop",
"rotation.stop")]
},
"openPopup" = {
data_split_wide <- data_split_wide[, c("fileId", "folder",
"artwork", "popup",
"glossar", "trace",
"eventId", "date.start",
"timeMs.start",
"x.start", "y.start",
"scale.start",
"rotation.start",
"date.stop",
"timeMs.stop", "x.stop",
"y.stop", "scale.stop",
"rotation.stop")]
}
)
} else if (!any(grepl("stop", names(data_split_wide)))) {
data_split_wide$date.stop <- NA
data_split_wide$timeMs.stop <- NA
data_split_wide$x.stop <- NA
data_split_wide$y.stop <- NA
data_split_wide$scale.stop <- NA
data_split_wide$rotation.stop <- NA
}
}
data_split_wide
}
subdata_split_wide <- lapply(subdata_split_wide, add_variables, ncol = ncol,
event = event)
data_wide <- dplyr::bind_rows(subdata_split_wide) data_wide <- dplyr::bind_rows(subdata_split_wide)
select <- is.na(data_wide$date.start) | is.na(data_wide$date.stop)
correct <- data_wide[!select, ]
correct$identi <- "correct"
corrupt <- data_wide[select, ]
corrupt$identi <- "corrupt"
if (event != "move") {
corrupt_ids <- aggregate(trace ~ eventId, corrupt, function(x) length(unique(x)))
d1 <- nrow(corrupt)
corrupt_ids <- corrupt_ids$eventId[corrupt_ids$trace != 1]
corrupt <- corrupt[!corrupt$eventId %in% corrupt_ids, ]
d2 <- nrow(corrupt)
if (d2 < d1) {
corrupt <- corrupt[!is.na(corrupt$date.start), ]
warning(paste0(d1 - d2, " events spanning two log files have been removed since it could not be resolved how to close them."))
}
}
close_open_eventIds <- function(df, eventId) {
dfid <- df[df$eventId == eventId, ]
dfid <- dfid[!is.na(dfid$eventId), ]
dfid <- dfid[order(dfid$fileId), ]
if (nrow(dfid) == 2) {
out <- dfid[1, ]
out[, c("date.stop", "timeMs.stop", "x.stop", "y.stop", "scale.stop",
"rotation.stop")] <-
dfid[2, c("date.stop", "timeMs.stop", "x.stop",
"y.stop", "scale.stop", "rotation.stop")]
} else if (nrow(dfid) > 2) {
stop("More than two rows for open eventIds. Something is wrong!")
} else {
out <- dfid
}
out
}
fixed <- dplyr::bind_rows(lapply(unique(corrupt$eventId),
close_open_eventIds, df = corrupt))
# FIXME: Something is wrong with the traces for `move`, `openTopic`,
# `openPopup` --> I have correct eventIds that have two different traces
# -- that cannot be! (I also have correct different traces, that have the
# same eventId...)
data_wide <- rbind(correct, fixed)
for (d in drop) data_wide[d] <- NA for (d in drop) data_wide[d] <- NA
data_wide$distance <- NA data_wide$distance <- NA
data_wide$scaleSize <- NA data_wide$scaleSize <- NA
@ -102,7 +227,13 @@ close_events <- function(data, event = c("move", "flipCard", "openTopic", "openP
data_wide <- move_wide data_wide <- move_wide
} }
out <- data_wide[, c("fileId", "folder", "event", "artwork", "trace", # remove durations that span more than one log file
data_wide$duration[data_wide$identi == "corrupt"] <- NA
# TODO: Maybe there is a better solution for this?
out <- data_wide[# !apply(data_wide, 1, function(x) all(is.na(x))),
# remove all NA rows
c("fileId", "folder", "event", "artwork", "trace",
"glossar", "date.start", "date.stop", "glossar", "date.start", "date.stop",
"timeMs.start", "timeMs.stop", "duration", "timeMs.start", "timeMs.stop", "duration",
"topicNumber", "popup", "x.start", "y.start", "topicNumber", "popup", "x.start", "y.start",

View File

@ -13,7 +13,7 @@
create_eventlogs <- function(data, xmlpath) { create_eventlogs <- function(data, xmlpath) {
if (!lubridate::is.POSIXt(data$date)){ if (!lubridate::is.POSIXt(data$date)){
cat("########## Convertion variable `date` to POSIXct ##########", "\n") cat("########## Converting variable `date` to POSIXct ##########", "\n")
data$date <- as.POSIXct(data$date) data$date <- as.POSIXct(data$date)
} }
data$glossar <- ifelse(data$artwork == "glossar", 1, 0) data$glossar <- ifelse(data$artwork == "glossar", 1, 0)