mtt/R/close_events.R

308 lines
12 KiB
R

###########################################################################
close_events <- function(data, event = c("move", "flipCard", "openTopic", "openPopup"),
rm_nochange_moves) {
event <- match.arg(event)
switch(event,
"move" = {
actions <- c("Transform start", "Transform stop")
idvar <- c("folder", "eventId", "item", "glossar")
drop <- c("popup", "topic", "path", "event")
ncol <- 18
},
"flipCard" = {
actions <- c("Show Info", "Show Front")
idvar <- c("folder", "path", "eventId", "item", "glossar")
drop <- c("popup", "topic", "event")
ncol <- 19
},
"openTopic" = {
actions <- c("Artwork/OpenCard", "Artwork/CloseCard")
idvar <- c("folder", "eventId", "path", "glossar",
"item", "topic")
drop <- c("popup", "event")
ncol <- 20
},
"openPopup" = {
actions <- c("ShowPopup", "HidePopup")
idvar <- c("folder", "eventId", "path", "glossar",
"item", "popup")
drop <- c("topic", "event")
ncol <- 20
# TODO: Should topic maybe also be filled in for "openPopup"?
}
)
subdata <- subset(data, data$event %in% actions)
subdata <- subdata[order(subdata$item, subdata$popup, subdata$date,
subdata$timeMs), ]
subdata$time <- ifelse(subdata$event == actions[1], "start", "stop")
num_start <- diff(c(0, which(subdata$event == actions[2])))
if (utils::tail(subdata, 1)$time == "start") {
num_start <- c(num_start, 1)
}
subdata$eventId <- as.character(rep(seq_along(num_start), num_start))
if (event == "move") {
# Remove start events following directly each other for move events
subdata <- subdata[!duplicated(subdata[, c("event", "eventId")],
fromLast = TRUE), ]
} # there are so many that this is too time inefficient when done as below
# Remove stop events following directly each other
id_stop <- which(subdata$event == actions[2])
id_rm_stop <- id_stop[diff(id_stop) == 1]
if (length(id_rm_stop) != 0) {
subdata <- subdata[-(id_rm_stop + 1), ]
}
pbapply::pboptions(style = 3, char = "=")
if (event != "move") {
# Fix eventIds for start events following each other
cat("\n########## Checking unclosed start events. This can take awhile...",
"\n")
subdata_list <- pbapply::pblapply(unique(subdata$eventId),
fix_start_eventIds, df = subdata)
subdata <- dplyr::bind_rows(subdata_list)
}
# Remove eventIds associated with more than one path, usually logging
# errors that cannot be resolved for openTopic or openPopup
if (event %in% c("openTopic", "openPopup")) {
# corrupt_eventIds <- names(which(rowSums(stats::xtabs( ~ eventId + path,
# subdata) != 0) != 1))
# --> does not run on complete data set
subdata_eid <- split(subdata, ~ eventId)
tmp <- sapply(subdata_eid, function(x) length(stats::xtabs( ~ path, x)))
corrupt_eventIds <- names(tmp[tmp > 1])
subdata <- subdata[!subdata$eventId %in% corrupt_eventIds, ]
}
# if (event == "flipCard") {
# subdata$eventId <- subdata$path
# }
subdata_split <- split(subdata, ~ fileId)
cat("\n########## Closing start/stop events. This can take awhile...",
"\n")
subdata_split_wide <- pbapply::pblapply(subdata_split, stats::reshape,
direction = "wide",
idvar = idvar,
timevar = "time",
drop = drop)
#which(sapply(subdata_split_wide, ncol) != ncol)
# Add start and stop variables that get lost because events span more
# than one log file
data_wide <-
lapply(subdata_split_wide, add_variables, ncol = ncol, event = event) |>
dplyr::bind_rows()
select <- is.na(data_wide$date.start) | is.na(data_wide$date.stop)
correct <- data_wide[!select, ]
correct$identi <- "correct"
corrupt <- data_wide[select, ]
corrupt$identi <- "corrupt"
# Close events spanning more than one log file
cat("\n########## Closing start/stop events spanning several log files...",
"\n")
fixed <-
pbapply::pblapply(unique(corrupt$eventId), close_open_eventIds, df = corrupt) |>
dplyr::bind_rows()
data_wide <- rbind(correct, fixed)
for (d in drop) data_wide[d] <- NA
data_wide$distance <- NA
data_wide$scaleSize <- NA
data_wide$rotationDegree <- NA
data_wide$event <- event
data_wide$duration <- data_wide$timeMs.stop - data_wide$timeMs.start
if (event == "move") {
data_wide$distance <- apply(
data_wide[, c("x.start", "y.start", "x.stop", "y.stop")], 1,
function(x) stats::dist(matrix(x, 2, 2, byrow = TRUE)))
data_wide$rotationDegree <- data_wide$rotation.stop -
data_wide$rotation.start
data_wide$scaleSize <- data_wide$scale.stop / data_wide$scale.start
# Remove moves without any change
if (rm_nochange_moves) {
d1 <- nrow(data_wide)
data_wide <- data_wide[data_wide$distance != 0 &
data_wide$rotationDegree != 0 &
data_wide$scaleSize != 1, ]
d2 <- nrow(data_wide)
cat(paste("\nINFORMATION:", d1 - d2,
"lines containing move events were removed since they did",
"\nnot contain any change"), fill = TRUE)
}
}
out <- data_wide[order(data_wide$fileId.start,
data_wide$date.start,
data_wide$timeMs.start),
c("fileId.start", "fileId.stop", "folder", "event",
"item", "path", "glossar", "date.start",
"date.stop", "timeMs.start", "timeMs.stop",
"duration", "topic", "popup", "x.start",
"y.start", "x.stop", "y.stop", "distance",
"scale.start", "scale.stop", "scaleSize",
"rotation.start", "rotation.stop", "rotationDegree")]
rownames(out) <- NULL
out
}
###########################################################################
# Fix eventIds for start events following each other
fix_start_eventIds <- function(df, eventId) {
dfid <- df[df$eventId == eventId, ]
if (nrow(dfid) > 2) {
dfid.start <- dfid[dfid$time == "start", ]
dfid$eventId[dfid$time == "start"] <-
c(paste(utils::head(dfid.start$eventId, nrow(dfid.start) - 1),
1:(nrow(dfid.start) - 1), sep = ":"),
utils::tail(dfid.start$eventId, 1))
}
dfid
}
###########################################################################
# Add start and stop variables that get lost because events span more
# than one log file
add_variables <- function(data_split_wide, ncol,
event = c("move", "flipCard", "openTopic",
"openPopup")) {
if (ncol(data_split_wide) != ncol) {
if (!any(grepl("start", names(data_split_wide)))) {
data_split_wide$fileId.start <- NA
data_split_wide$date.start <- NA
data_split_wide$timeMs.start <- NA
data_split_wide$x.start <- NA
data_split_wide$y.start <- NA
data_split_wide$scale.start <- NA
data_split_wide$rotation.start <- NA
event <- match.arg(event)
switch(event,
"move" = {
data_split_wide <- data_split_wide[, c("folder", "item",
"glossar", "eventId",
"fileId.start",
"date.start",
"timeMs.start",
"x.start", "y.start",
"scale.start",
"rotation.start",
"fileId.stop",
"date.stop",
"timeMs.stop", "x.stop",
"y.stop", "scale.stop",
"rotation.stop")]
},
"flipCard" = {
data_split_wide <- data_split_wide[, c("folder", "item",
"glossar", "path",
"eventId",
"fileId.start",
"date.start",
"timeMs.start",
"x.start", "y.start",
"scale.start",
"rotation.start",
"fileId.stop",
"date.stop",
"timeMs.stop", "x.stop",
"y.stop", "scale.stop",
"rotation.stop")]
},
"openTopic" = {
data_split_wide <- data_split_wide[, c("folder", "item",
"topic",
"glossar", "path",
"eventId",
"fileId.start",
"date.start",
"timeMs.start",
"x.start", "y.start",
"scale.start",
"rotation.start",
"fileId.stop",
"date.stop",
"timeMs.stop",
"x.stop", "y.stop",
"scale.stop",
"rotation.stop")]
},
"openPopup" = {
data_split_wide <- data_split_wide[, c("folder", "item",
"popup", "glossar",
"path", "eventId",
"fileId.start",
"date.start",
"timeMs.start",
"x.start", "y.start",
"scale.start",
"rotation.start",
"fileId.stop",
"date.stop",
"timeMs.stop", "x.stop",
"y.stop", "scale.stop",
"rotation.stop")]
}
)
} else if (!any(grepl("stop", names(data_split_wide)))) {
data_split_wide$fileId.stop <- NA
data_split_wide$date.stop <- NA
data_split_wide$timeMs.stop <- NA
data_split_wide$x.stop <- NA
data_split_wide$y.stop <- NA
data_split_wide$scale.stop <- NA
data_split_wide$rotation.stop <- NA
}
}
data_split_wide
}
###########################################################################
# Close events spanning more than one log file
close_open_eventIds <- function(df, eventId) {
dfid <- df[df$eventId == eventId, ]
dfid <- dfid[!is.na(dfid$eventId), ]
dfid <- dfid[order(dfid$fileId.start), ]
if (nrow(dfid) == 2) {
out <- dfid[1, ]
out[, c("fileId.stop", "date.stop", "timeMs.stop", "x.stop", "y.stop", "scale.stop",
"rotation.stop")] <-
dfid[2, c("fileId.stop", "date.stop", "timeMs.stop", "x.stop",
"y.stop", "scale.stop", "rotation.stop")]
} else if (nrow(dfid) > 2) {
stop("More than two rows for open eventIds. Something is wrong!")
} else {
out <- dfid
}
out
}