From daadb7a691a4ba1d2f2718314ae4852eb09b04fc Mon Sep 17 00:00:00 2001 From: nwickel Date: Sun, 22 Oct 2023 15:13:11 +0200 Subject: [PATCH] Debugging; added rm_nochange_moves as argument --- R/add_trace.R | 2 +- R/close_events.R | 321 +++++++++++++++++++++++----------------- R/create_eventlogs.R | 46 +++--- R/extract_topics.R | 2 +- man/create_eventlogs.Rd | 9 +- 5 files changed, 223 insertions(+), 157 deletions(-) diff --git a/R/add_trace.R b/R/add_trace.R index 78937b7..304ab77 100644 --- a/R/add_trace.R +++ b/R/add_trace.R @@ -135,7 +135,7 @@ add_trace_moves <- function(data) { ) out <- dplyr::bind_rows(subdata_trace) - out <- out[order(out$fileId, out$date.start, out$timeMs.start), ] + out <- out[order(out$fileId.start, out$date.start, out$timeMs.start), ] rownames(out) <- NULL # Make trace a consecutive number diff --git a/R/close_events.R b/R/close_events.R index cee2e3b..c5d3644 100644 --- a/R/close_events.R +++ b/R/close_events.R @@ -1,6 +1,7 @@ ########################################################################### -close_events <- function(data, event = c("move", "flipCard", "openTopic", "openPopup")) { +close_events <- function(data, event = c("move", "flipCard", "openTopic", "openPopup"), + rm_nochange_moves) { event <- match.arg(event) @@ -38,36 +39,53 @@ close_events <- function(data, event = c("move", "flipCard", "openTopic", "openP ) subdata <- subset(data, data$event %in% actions) - subdata <- subdata[order(subdata$artwork, subdata$popup, subdata$date, subdata$timeMs), ] + subdata <- subdata[order(subdata$artwork, subdata$popup, subdata$date, + subdata$timeMs), ] subdata$time <- ifelse(subdata$event == actions[1], "start", "stop") num_start <- diff(c(0, which(subdata$event == actions[2]))) if (utils::tail(subdata, 1)$time == "start") { num_start <- c(num_start, 1) } - subdata$eventId <- rep(seq_along(num_start), num_start) + subdata$eventId <- as.character(rep(seq_along(num_start), num_start)) - # remove start and stop events following directly each other - subdata <- subdata[!duplicated(subdata[, c("event", "eventId")], - fromLast = TRUE), ] + if (event == "move") { + # Remove start events following directly each other for move events + subdata <- subdata[!duplicated(subdata[, c("event", "eventId")], + fromLast = TRUE), ] + } # there are so many that this is too time inefficient when done as below + + # Remove stop events following directly each other id_stop <- which(subdata$event == actions[2]) id_rm_stop <- id_stop[diff(id_stop) == 1] if (length(id_rm_stop) != 0) { subdata <- subdata[-(id_rm_stop + 1), ] } - # remove eventIds associated with more than one trace, usually logging - # errors that I cannot resolve - corrupt_eventIds <- names(which(rowSums(stats::xtabs( ~ eventId + trace, subdata) != 0) != 1)) + pbapply::pboptions(style = 3, char = "=") + + if (event != "move") { + # Fix eventIds for start events following each other + cat("\n########## Checking unclosed start events. This can take awhile...", + "\n") + subdata_list <- pbapply::pblapply(unique(subdata$eventId), + fix_start_eventIds, df = subdata) + subdata <- dplyr::bind_rows(subdata_list) + } + + # Remove eventIds associated with more than one trace, usually logging + # errors that cannot be resolved + corrupt_eventIds <- names(which(rowSums(stats::xtabs( ~ eventId + trace, + subdata) != 0) != 1)) subdata <- subdata[!subdata$eventId %in% corrupt_eventIds, ] - if (event == "flipCard") { - subdata$eventId <- subdata$trace - } + # if (event == "flipCard") { + # subdata$eventId <- subdata$trace + # } subdata_split <- split(subdata, ~ fileId) - pbapply::pboptions(style = 3, char = "=") - + cat("\n########## Closing start/stop events. This can take awhile...", + "\n") subdata_split_wide <- pbapply::pblapply(subdata_split, stats::reshape, direction = "wide", idvar = idvar, @@ -76,98 +94,8 @@ close_events <- function(data, event = c("move", "flipCard", "openTopic", "openP #which(sapply(subdata_split_wide, ncol) != ncol) - # fix log files with *only* start or *only* stop events - add_variables <- function(data_split_wide, ncol, - event = c("move", "flipCard", "openTopic", "openPopup")) { - - if (ncol(data_split_wide) != ncol) { - if (!any(grepl("start", names(data_split_wide)))) { - data_split_wide$fileId.start <- NA - data_split_wide$date.start <- NA - data_split_wide$timeMs.start <- NA - data_split_wide$x.start <- NA - data_split_wide$y.start <- NA - data_split_wide$scale.start <- NA - data_split_wide$rotation.start <- NA - - event <- match.arg(event) - - switch(event, - "move" = { - data_split_wide <- data_split_wide[, c("folder", "artwork", - "glossar", "eventId", - "fileId.start", - "date.start", - "timeMs.start", - "x.start", "y.start", - "scale.start", - "rotation.start", - "date.stop", - "timeMs.stop", "x.stop", - "y.stop", "scale.stop", - "rotation.stop")] - }, - "flipCard" = { - data_split_wide <- data_split_wide[, c("folder", "artwork", - "glossar", "trace", - "eventId", - "fileId.start", - "date.start", - "timeMs.start", - "x.start", "y.start", - "scale.start", - "rotation.start", - "date.stop", - "timeMs.stop", "x.stop", - "y.stop", "scale.stop", - "rotation.stop")] - }, - "openTopic" = { - data_split_wide <- data_split_wide[, c("folder", "artwork", - "topicNumber", - "glossar", "trace", - "eventId", - "fileId.start", - "date.start", - "timeMs.start", - "x.start", "y.start", - "scale.start", - "rotation.start", - "date.stop", - "timeMs.stop", - "x.stop", "y.stop", - "scale.stop", - "rotation.stop")] - }, - "openPopup" = { - data_split_wide <- data_split_wide[, c("folder", "artwork", - "popup", "glossar", - "trace", "eventId", - "fileId.start", - "date.start", - "timeMs.start", - "x.start", "y.start", - "scale.start", - "rotation.start", - "date.stop", - "timeMs.stop", "x.stop", - "y.stop", "scale.stop", - "rotation.stop")] - } - ) - } else if (!any(grepl("stop", names(data_split_wide)))) { - data_split_wide$fileId.stop <- NA - data_split_wide$date.stop <- NA - data_split_wide$timeMs.stop <- NA - data_split_wide$x.stop <- NA - data_split_wide$y.stop <- NA - data_split_wide$scale.stop <- NA - data_split_wide$rotation.stop <- NA - } - } - data_split_wide - } - + # Add start and stop variables that get lost because events span more + # than one log file subdata_split_wide <- lapply(subdata_split_wide, add_variables, ncol = ncol, event = event) @@ -179,24 +107,7 @@ close_events <- function(data, event = c("move", "flipCard", "openTopic", "openP corrupt <- data_wide[select, ] corrupt$identi <- "corrupt" - close_open_eventIds <- function(df, eventId) { - dfid <- df[df$eventId == eventId, ] - dfid <- dfid[!is.na(dfid$eventId), ] - dfid <- dfid[order(dfid$fileId.start), ] - if (nrow(dfid) == 2) { - out <- dfid[1, ] - out[, c("fileId.stop", "date.stop", "timeMs.stop", "x.stop", "y.stop", "scale.stop", - "rotation.stop")] <- - dfid[2, c("fileId.stop", "date.stop", "timeMs.stop", "x.stop", - "y.stop", "scale.stop", "rotation.stop")] - } else if (nrow(dfid) > 2) { - stop("More than two rows for open eventIds. Something is wrong!") - } else { - out <- dfid - } - out - } - + # Close events spanning more than one log file fixed <- dplyr::bind_rows(lapply(unique(corrupt$eventId), close_open_eventIds, df = corrupt)) @@ -217,21 +128,26 @@ close_events <- function(data, event = c("move", "flipCard", "openTopic", "openP data_wide$rotationDegree <- data_wide$rotation.stop - data_wide$rotation.start data_wide$scaleSize <- data_wide$scale.stop / data_wide$scale.start - # remove moves without any change - move_wide <- data_wide[data_wide$distance != 0 & - data_wide$rotationDegree != 0 & - data_wide$scaleSize != 1, ] - cat(paste("INFORMATION:", nrow(data_wide) - nrow(move_wide), - "lines containing move events were removed since they did", - "\nnot contain any change"), fill = TRUE) - data_wide <- move_wide + + # Remove moves without any change + if (rm_nochange_moves) { + d1 <- nrow(data_wide) + data_wide <- data_wide[data_wide$distance != 0 & + data_wide$rotationDegree != 0 & + data_wide$scaleSize != 1, ] + d2 <- nrow(data_wide) + cat(paste("INFORMATION:", d1 - d2, + "lines containing move events were removed since they did", + "\nnot contain any change"), fill = TRUE) + } + data_wide } data_wide <- data_wide[order(data_wide$fileId.start, data_wide$date.start, data_wide$timeMs.start), ] - # fix durations that span more than one log file + # Fix durations that span more than one log file if (event != "move") { tab <- colSums(stats::xtabs( ~ fileId + trace, subdata) != 0) number_logfiles <- data.frame(trace = names(tab), nlogfile = tab) @@ -262,3 +178,142 @@ close_events <- function(data, event = c("move", "flipCard", "openTopic", "openP out } +########################################################################### + +# Fix eventIds for start events following each other + +fix_start_eventIds <- function(df, eventId) { + dfid <- df[df$eventId == eventId, ] + if (nrow(dfid) > 2) { + dfid.start <- dfid[dfid$time == "start", ] + dfid$eventId[dfid$time == "start"] <- + c(paste(utils::head(dfid.start$eventId, nrow(dfid.start) - 1), + 1:(nrow(dfid.start) - 1), sep = ":"), + utils::tail(dfid.start$eventId, 1)) + } + dfid +} + +########################################################################### + +# Add start and stop variables that get lost because events span more +# than one log file + +add_variables <- function(data_split_wide, ncol, + event = c("move", "flipCard", "openTopic", + "openPopup")) { + + if (ncol(data_split_wide) != ncol) { + if (!any(grepl("start", names(data_split_wide)))) { + data_split_wide$fileId.start <- NA + data_split_wide$date.start <- NA + data_split_wide$timeMs.start <- NA + data_split_wide$x.start <- NA + data_split_wide$y.start <- NA + data_split_wide$scale.start <- NA + data_split_wide$rotation.start <- NA + + event <- match.arg(event) + + switch(event, + "move" = { + data_split_wide <- data_split_wide[, c("folder", "artwork", + "glossar", "eventId", + "fileId.start", + "date.start", + "timeMs.start", + "x.start", "y.start", + "scale.start", + "rotation.start", + "fileId.stop", + "date.stop", + "timeMs.stop", "x.stop", + "y.stop", "scale.stop", + "rotation.stop")] + }, + "flipCard" = { + data_split_wide <- data_split_wide[, c("folder", "artwork", + "glossar", "trace", + "eventId", + "fileId.start", + "date.start", + "timeMs.start", + "x.start", "y.start", + "scale.start", + "rotation.start", + "fileId.stop", + "date.stop", + "timeMs.stop", "x.stop", + "y.stop", "scale.stop", + "rotation.stop")] + }, + "openTopic" = { + data_split_wide <- data_split_wide[, c("folder", "artwork", + "topicNumber", + "glossar", "trace", + "eventId", + "fileId.start", + "date.start", + "timeMs.start", + "x.start", "y.start", + "scale.start", + "rotation.start", + "fileId.stop", + "date.stop", + "timeMs.stop", + "x.stop", "y.stop", + "scale.stop", + "rotation.stop")] + }, + "openPopup" = { + data_split_wide <- data_split_wide[, c("folder", "artwork", + "popup", "glossar", + "trace", "eventId", + "fileId.start", + "date.start", + "timeMs.start", + "x.start", "y.start", + "scale.start", + "rotation.start", + "fileId.stop", + "date.stop", + "timeMs.stop", "x.stop", + "y.stop", "scale.stop", + "rotation.stop")] + } + ) + } else if (!any(grepl("stop", names(data_split_wide)))) { + data_split_wide$fileId.stop <- NA + data_split_wide$date.stop <- NA + data_split_wide$timeMs.stop <- NA + data_split_wide$x.stop <- NA + data_split_wide$y.stop <- NA + data_split_wide$scale.stop <- NA + data_split_wide$rotation.stop <- NA + } + } + data_split_wide +} + +########################################################################### + +# Close events spanning more than one log file + +close_open_eventIds <- function(df, eventId) { + dfid <- df[df$eventId == eventId, ] + dfid <- dfid[!is.na(dfid$eventId), ] + dfid <- dfid[order(dfid$fileId.start), ] + if (nrow(dfid) == 2) { + out <- dfid[1, ] + out[, c("fileId.stop", "date.stop", "timeMs.stop", "x.stop", "y.stop", "scale.stop", + "rotation.stop")] <- + dfid[2, c("fileId.stop", "date.stop", "timeMs.stop", "x.stop", + "y.stop", "scale.stop", "rotation.stop")] + } else if (nrow(dfid) > 2) { + stop("More than two rows for open eventIds. Something is wrong!") + } else { + out <- dfid + } + out +} + diff --git a/R/create_eventlogs.R b/R/create_eventlogs.R index 055971e..5eb4922 100644 --- a/R/create_eventlogs.R +++ b/R/create_eventlogs.R @@ -6,11 +6,16 @@ #' @param data Data frame of raw log files created with `parse_logfiles()`. #' See `?parse_logfiles` for more details. #' @param xmlpath Path to folder where XML definitions of artworks live. +#' @param case_cutoff Number in seconds how long time interval between +#' different cases should be. +#' @param rm_nochange_moves Logical. Should move events that record no +#' change, meaning distance and rotationDegree are 0 and scaleSize is 1, be +#' removed. Default is TRUE. #' @return Data frame. #' @export #' @examples #' # tbd -create_eventlogs <- function(data, xmlpath) { +create_eventlogs <- function(data, xmlpath, case_cutoff = 20, rm_nochange_moves = TRUE) { if (!lubridate::is.POSIXt(data$date)){ cat("########## Converting variable `date` to POSIXct ##########", "\n") @@ -40,47 +45,46 @@ create_eventlogs <- function(data, xmlpath) { # Close events cat("\n\n########## Closing events... ##########", "\n") - c1 <- close_events(dat1, "move") + c1 <- close_events(dat1, "move", rm_nochange_moves = rm_nochange_moves) cat("## --> move events closed.", "\n") - c2 <- close_events(dat1, "flipCard") + c2 <- close_events(dat1, "flipCard", rm_nochange_moves = rm_nochange_moves) cat("## --> flipCard events closed.", "\n") - c3 <- close_events(dat1, "openTopic") + c3 <- close_events(dat1, "openTopic", rm_nochange_moves = rm_nochange_moves) cat("## --> openTopic events closed.", "\n") - c4 <- close_events(dat1, "openPopup") + c4 <- close_events(dat1, "openPopup", rm_nochange_moves = rm_nochange_moves) cat("## --> openPopup events closed.", "\n") dat2 <- rbind(c1, c2, c3, c4) - dat2 <- dat2[order(dat2$fileId, dat2$date.start, dat2$timeMs.start), ] + dat2 <- dat2[order(dat2$fileId.start, dat2$date.start, dat2$timeMs.start), ] # Remove all events that do not have a `date.start` - d1 <- nrow(dat2) - dat2 <- dat2[!is.na(dat2$date.start), ] - d2 <- nrow(dat2) - if(d1 > d2) { - warning(paste0(d1-d2, " lines that do not contain a start event have been removed. This can happen when events span over more than one log file.\n")) - } + # d1 <- nrow(dat2) + # dat2 <- dat2[!is.na(dat2$date.start), ] + # d2 <- nrow(dat2) + # if(d1 > d2) { + # warning(paste0(d1-d2, " lines that do not contain a start event have been removed. This can happen when events span over more than one log file.\n")) + # } - rownames(dat2) <- NULL + # rownames(dat2) <- NULL # Add case variable ###################################################### cat("\n########## Adding case and eventId variables... ##########", "\n\n") - dat3 <- add_case(dat2) + dat3 <- add_case(dat2, cutoff = case_cutoff) # Add event ID ########################################################### dat3$eventId <- seq_len(nrow(dat3)) - dat3 <- dat3[, c("fileId", "folder", "eventId", "case", - "trace", "glossar", "event", "artwork", + dat3 <- dat3[, c("folder", "eventId", "case", "trace", "glossar", + "event", "artwork", "fileId.start", "fileId.stop", "date.start", "date.stop", "timeMs.start", "timeMs.stop", "duration", "topicNumber", "popup", - "x.start", "y.start", "x.stop", "y.stop", - "distance", "scale.start", "scale.stop", - "scaleSize", "rotation.start", "rotation.stop", - "rotationDegree")] + "x.start", "y.start", "x.stop", "y.stop", "distance", + "scale.start", "scale.stop", "scaleSize", + "rotation.start", "rotation.stop", "rotationDegree")] # Add trace for move events ############################################## cat("\n\n########## Adding trace variable for move events... ##########", "\n") dat4 <- add_trace_moves(dat3) + dat4 } - diff --git a/R/extract_topics.R b/R/extract_topics.R index 7056ebf..05fa116 100644 --- a/R/extract_topics.R +++ b/R/extract_topics.R @@ -85,7 +85,7 @@ add_topic <- function(data, topics) { #out <- do.call(rbind, dat_topic) out <- dplyr::bind_rows(dat_topic) out$topicIndex <- as.numeric(out$topicIndex) - out <- out[order(out$fileId, out$date.start, out$timeMs.start), ] + out <- out[order(out$fileId.start, out$date.start, out$timeMs.start), ] rownames(out) <- NULL out } diff --git a/man/create_eventlogs.Rd b/man/create_eventlogs.Rd index 7bd1f55..4fb935c 100644 --- a/man/create_eventlogs.Rd +++ b/man/create_eventlogs.Rd @@ -4,13 +4,20 @@ \alias{create_eventlogs} \title{Creating log events from raw log files.} \usage{ -create_eventlogs(data, xmlpath) +create_eventlogs(data, xmlpath, case_cutoff = 20, rm_nochange_moves = TRUE) } \arguments{ \item{data}{Data frame of raw log files created with \code{parse_logfiles()}. See \code{?parse_logfiles} for more details.} \item{xmlpath}{Path to folder where XML definitions of artworks live.} + +\item{case_cutoff}{Number in seconds how long time interval between +different cases should be.} + +\item{rm_nochange_moves}{Logical. Should move events that record no +change, meaning distance and rotationDegree are 0 and scaleSize is 1, be +removed. Default is TRUE.} } \value{ Data frame.