From 3786ae4b42ff31aaecd35e0b1bac9fee9897661a Mon Sep 17 00:00:00 2001 From: nwickel Date: Fri, 27 Oct 2023 15:10:01 +0200 Subject: [PATCH] Debugging and refactoring; glossar is an argument now; create glossar dictionary was moved to add_trace_glossar --- R/add_trace.R | 26 ++++++++-------- R/create_eventlogs.R | 73 ++++++++++++++++++++++---------------------- 2 files changed, 50 insertions(+), 49 deletions(-) diff --git a/R/add_trace.R b/R/add_trace.R index 304ab77..9e011f8 100644 --- a/R/add_trace.R +++ b/R/add_trace.R @@ -33,20 +33,18 @@ add_trace_artworks <- function(subdata) { } ########################################################################### -add_trace_glossar <- function(subdata, glossar_dict) { +add_trace_glossar <- function(subdata, xmlpath) { pb <- utils::txtProgressBar(min = 0, max = nrow(subdata), initial = NA, style = 3) - # Fix glossar entries (find corresponding artworks and fill in trace) - glossar_files <- unique(subdata[subdata$artwork == "glossar", "popup"]) - - # load lookup table for artworks and glossar files - lut <- glossar_dict[names(glossar_dict) %in% glossar_files] - - inside <- glossar_files[glossar_files %in% - names(lut[sapply(lut, length) == 1])] - single_art <- unlist(lut[names(lut) %in% inside]) + cat("\n\n########## Creating glossar dictionary ##########", "\n") + artworks <- unique(subdata$artwork[subdata$artwork != "glossar"]) + glossar_files <- unique(dat[dat$artwork == "glossar", "popup"]) + lut <- create_glossardict(artworks, glossar_files, xmlpath = xmlpath) + inside <- glossar_files[glossar_files %in% + names(lut[sapply(lut, length) == 1])] + single_art <- unlist(lut[names(lut) %in% inside]) m <- 1 @@ -90,7 +88,7 @@ add_trace_glossar <- function(subdata, glossar_dict) { } ########################################################################### -add_trace <- function(data, glossar_dict) { +add_trace <- function(data, xmlpath, glossar) { data$trace <- NA subdata1 <- data[data$event %in% c("Transform start", "Transform stop"), ] @@ -98,8 +96,10 @@ add_trace <- function(data, glossar_dict) { subdata2 <- add_trace_artworks(subdata2) - if ("glossar" %in% unique(subdata2$artwork)) { - subdata2 <- add_trace_glossar(subdata2, glossar_dict) + if (glossar) { + subdata2 <- add_trace_glossar(subdata2, xmlpath) + } else { + subdata2 <- subdata2[subdata2$glossar != 1, ] } out <- rbind(subdata1, subdata2) diff --git a/R/create_eventlogs.R b/R/create_eventlogs.R index d7002dd..032144a 100644 --- a/R/create_eventlogs.R +++ b/R/create_eventlogs.R @@ -11,42 +11,33 @@ #' @param rm_nochange_moves Logical. Should move events that record no #' change, meaning distance and rotationDegree are 0 and scaleSize is 1, be #' removed. Default is TRUE. -#' @param glossar Logical indicating of glossar folder is present and if it +#' @param glossar Logical indicating if glossar folder is present and if it #' should be taken into account when preprocessing raw log files. Default -#' is FALSE. +#' is TRUE. #' @return Data frame. #' @export #' @examples #' # tbd -create_eventlogs <- function(data, xmlpath, case_cutoff = 20, rm_nochange_moves = TRUE, - glossar = FALSE) { +create_eventlogs <- function(data, xmlpath = NULL, case_cutoff = 20, + rm_nochange_moves = TRUE, glossar = FALSE) { if (!lubridate::is.POSIXt(data$date)){ cat("########## Converting variable `date` to POSIXct ##########", "\n") data$date <- as.POSIXct(data$date) } + if (!glossar & is.null(xmlpath)) { + stop("xmlpath is not specified and glossar = TRUE. Please enter the path to folder where XML definitions of artworks live.") + } + # Remove irrelevant events dat <- subset(data, !(data$event %in% c("Start Application", - "Show Application"))) - - artworks <- unique(stats::na.omit(dat$artwork)) - - # Create glossar dictionary ############################################## - if (glossar) { - dat$glossar <- ifelse(dat$artwork == "glossar", 1, 0) - - cat("\n########## Creating glossar dictionary ##########", "\n") - artworks <- artworks[artworks != "glossar"] - glossar_files <- unique(subset(dat, dat$artwork == "glossar")$popup) - glossar_dict <- create_glossardict(artworks, glossar_files, xmlpath = xmlpath) - } else { - glossar_dict <- NULL - } + "Show Application"))) + dat$glossar <- ifelse(dat$artwork == "glossar", 1, 0) # Add trace variable ##################################################### cat("\n########## Adding trace variable... ##########", "\n") - dat1 <- add_trace(dat, glossar_dict) + dat1 <- add_trace(dat, xmlpath = xmlpath, glossar = glossar) # Close events cat("\n\n########## Closing events... ##########", "\n") @@ -62,16 +53,6 @@ create_eventlogs <- function(data, xmlpath, case_cutoff = 20, rm_nochange_moves dat2 <- dat2[order(dat2$fileId.start, dat2$date.start, dat2$timeMs.start), ] - # Remove all events that do not have a `date.start` - # d1 <- nrow(dat2) - # dat2 <- dat2[!is.na(dat2$date.start), ] - # d2 <- nrow(dat2) - # if(d1 > d2) { - # warning(paste0(d1-d2, " lines that do not contain a start event have been removed. This can happen when events span over more than one log file.\n")) - # } - - # rownames(dat2) <- NULL - # Add case variable ###################################################### cat("\n########## Adding case and eventId variables... ##########", "\n\n") dat3 <- add_case(dat2, cutoff = case_cutoff) @@ -100,11 +81,13 @@ create_eventlogs <- function(data, xmlpath, case_cutoff = 20, rm_nochange_moves dat4$fIdDiff <- dat4$fIdNum.stop - dat4$fIdNum.start - # Remove moves where stop is before start - dat4 <- dat4[which(dat4$fIdDiff > 0), ] + # Remove moves where stop is before start (3) + dat4 <- dat4[dat4$fIdDiff >= 0 | is.na(dat4$fIdDiff), ] - dat4$duration[dat4$fIdDiff > 0] <- dat4$fIdDiff * 600000 - - dat4$timeMs.start + dat4$timeMs.stop + dat4$duration[which(dat4$fIdDiff > 0)] <- + dat4$fIdDiff[which(dat4$fIdDiff > 0)] * 600000 - + dat4$timeMs.start[which(dat4$fIdDiff > 0)] + + dat4$timeMs.stop[which(dat4$fIdDiff > 0)] # Remove fragmented traces ############################################### tab <- stats::xtabs( ~ trace + event, dat4) @@ -122,8 +105,26 @@ create_eventlogs <- function(data, xmlpath, case_cutoff = 20, rm_nochange_moves } dat5 <- dat4[!dat4$trace %in% fragments, ] - if (!glossar) dat5$glossar <- NULL + if (glossar) { + # Check for wrong order of events: flipCard -> openPopup -> openTopic + dat5_split <- split(dat5[dat5$event != "move", ], ~ trace) + event_list <- lapply(dat5_split, function(x) unique(x$event)) - dat5 + ids <- sapply(event_list, length) == 3 + event_dat <- as.data.frame(do.call(rbind, event_list[ids])) + names(event_dat) <- c("flipCard", "openTopic", "openPopup") + + frag_ids <- which(event_dat$openTopic == "openPopup") + dat6 <- dat5[dat5$trace %in% rownames(event_dat)[frag_ids], ] + dat6b <- dat6[!dat6$glossar == 1, ] + dat7 <- rbind(dat5[!dat5$trace %in% rownames(event_dat)[frag_ids], ], + dat6b) + # TODO: Check me! + } else { + dat7 <- dat5 + dat7$glossar <- NULL + } + + dat7 }