From 10d7a44ff9721b5e5e7d3185aa0adfdc66dc00e5 Mon Sep 17 00:00:00 2001 From: nwickel Date: Tue, 16 Jan 2024 18:51:13 +0100 Subject: [PATCH] Changed how path variable for move events is created --- R/add_path.R | 107 ++++++++++++++++++++++++++++--------------- R/create_eventlogs.R | 44 +++++++++--------- 2 files changed, 93 insertions(+), 58 deletions(-) diff --git a/R/add_path.R b/R/add_path.R index e8eb98a..5b15cb5 100644 --- a/R/add_path.R +++ b/R/add_path.R @@ -1,5 +1,8 @@ ########################################################################### add_path_items <- function(subdata) { + + pbapply::pboptions(style = 3, char = "=") + subdata_glossar <- subdata[subdata$item == "glossar", ] subdata_glossar$path <- NA @@ -52,6 +55,7 @@ add_path_items <- function(subdata) { ########################################################################### add_path_glossar <- function(subdata, xmlpath) { +# TODO: I think this needs to be completely redone pb <- utils::txtProgressBar(min = 0, max = nrow(subdata), initial = NA, style = 3) @@ -129,62 +133,93 @@ add_path <- function(data, xmlpath, glossar) { # Add path for moves -add_path_moves <- function(data) { +add_path_moves <- function(data, cutoff) { pbapply::pboptions(style = 3, char = "=") - path_max <- max(data$path, na.rm = TRUE) + subdata_item <- split(data, ~ item) - #subdata_art <- split(data, ~ item) - subdata_case <- split(data, ~ case) - - #subdata_list <- split(data, ~ item + case) - # --> does not work with complete data set - cat("Splitting data...", "\n") - subdata_list <- pbapply::pblapply(subdata_case, split, f = ~item) - subdata_list <- unlist(subdata_list, recursive = FALSE) - - cat("Adding path...", "\n") - subdata_path <- pbapply::pblapply(subdata_list, - function(x) { - path_max <<- path_max + 1 - add_path_subdata(x, max_path = path_max) - } - ) + subdata_path <- pbapply::pblapply(subdata_item, + add_path_subdata, cutoff = cutoff) out <- dplyr::bind_rows(subdata_path) out <- out[order(out$fileId.start, out$date.start, out$timeMs.start), ] + # Make path a consecutive number + out$path <- as.numeric(factor(out$path, levels = unique(out$path))) rownames(out) <- NULL out } -add_path_subdata <- function(subdata, max_path) { - - if (nrow(subdata) != 0) { - - if (length(stats::na.omit(unique(subdata$path))) == 1) { - subdata[subdata$event == "move", "path"] <- stats::na.omit(unique(subdata$path)) - } else if (length(stats::na.omit(unique(subdata$path))) > 1) { - for (i in 1:nrow(subdata)) { - if (subdata$event[i] == "move") { - if (i == 1) { - subdata$path[i] <- stats::na.omit(unique(subdata$path))[1] +add_path_subdata <- function(subdata, cutoff) { + index_flipCard <- which(subdata$event == "flipCard") + current_item <- unique(subdata$item) + for (j in seq_along(index_flipCard)) { + # forwards pass + if (j < max(seq_along(index_flipCard))) { + for (i in seq(index_flipCard[j], index_flipCard[j + 1])) { + if (subdata$event[i] == "move" & !is.na(subdata$date.stop[index_flipCard[j]])) { + timediff <- difftime(subdata$date.start[i], + subdata$date.stop[index_flipCard[j]], + units = "secs") + if (timediff <= cutoff){ + subdata$path[i] <- subdata$path[index_flipCard[j]] } else { - subdata$path[i] <- subdata$path[i - 1] + subdata$path[i] <- paste(current_item, "mv", j, sep = "_") } } } - } else if (all(is.na(subdata$path))) { - for (i in 1:nrow(subdata)) { - subdata$path[i] <- max_path + } else { + for (i in seq(index_flipCard[j], nrow(subdata))) { + if (subdata$event[i] == "move" & (!is.na(subdata$date.stop[index_flipCard[j]]))) { + timediff <- difftime(subdata$date.start[i], + subdata$date.stop[index_flipCard[j]], + units = "secs") + if (timediff <= cutoff) { + subdata$path[i] <- subdata$path[index_flipCard[j]] + } else { + subdata$path[i] <- paste(current_item, "mv", j, sep = "_") + } + } + } + } + # backwards pass + if (j > min(seq_along(index_flipCard))) { + for (i in seq(index_flipCard[j - 1], index_flipCard[j])) { + if (grepl("mv", subdata$path[i])) { + timediff <- difftime(subdata$date.start[index_flipCard[j]], + subdata$date.stop[i], + units = "secs") + if (timediff <= cutoff){ + subdata$path[i] <- subdata$path[index_flipCard[j]] + } else { + subdata$path[i] <- paste(current_item, "mv", j, sep = "_") + } + } } } - - } else { - warning("subdata has nrow = 0") } + # fix moves with same path and timediff > cutoff + subdata_moves <- split(subdata, ~ path) + + check_moves <- function(subsubdata, cutoff) { + if (any(grepl("mv", subsubdata$path))) { + for (i in seq_len(nrow(subsubdata) - 1)) { + timediff <- difftime(subsubdata$date.start[i + 1], subsubdata$date.stop[i], + units = "secs") + if (timediff > cutoff) { + subsubdata$path[i + 1] <- paste(subsubdata$path[i], i, "new", sep = "_") + } else { + subsubdata$path[i + 1] <- subsubdata$path[i] + } + } + } + subsubdata + } + + subdata_path <- lapply(subdata_moves, check_moves, cutoff = cutoff) + subdata <- dplyr::bind_rows(subdata_path) subdata } diff --git a/R/create_eventlogs.R b/R/create_eventlogs.R index cf03160..906952d 100644 --- a/R/create_eventlogs.R +++ b/R/create_eventlogs.R @@ -54,10 +54,14 @@ create_eventlogs <- function(data, xmlpath = NULL, case_cutoff = 20, dat2 <- dat2[order(dat2$fileId.start, dat2$date.start, dat2$timeMs.start), ] + # Add path for move events ############################################## + cat("\n\n########## Adding path variable for move events... ##########", "\n") + dat3 <- add_path_moves(dat2, cutoff = case_cutoff) + # Add case variable ###################################################### - cat("\n########## Adding case and eventId variables... ##########", "\n\n") - dat3 <- add_case(dat2, cutoff = case_cutoff) - dat3 <- dat3[, c("fileId.start", "fileId.stop", "date.start", + cat("\n########## Adding case variable... ##########", "\n\n") + dat4 <- add_case(dat3, cutoff = case_cutoff) + dat4 <- dat4[, c("fileId.start", "fileId.stop", "date.start", "date.stop", "folder", "case", "path", "glossar", "event", "item", "timeMs.start", "timeMs.stop", "duration", "topic", "popup", "x.start", "y.start", @@ -65,11 +69,6 @@ create_eventlogs <- function(data, xmlpath = NULL, case_cutoff = 20, "scale.stop", "scaleSize", "rotation.start", "rotation.stop", "rotationDegree")] - # Add path for move events ############################################## - cat("\n\n########## Adding path variable for move events... ##########", "\n") - dat4 <- add_path_moves(dat3) - - # Fix durations that span more than one log file ######################### levels_fId <- sort(unique(c(dat4$fileId.start, dat4$fileId.stop))) dat4$fIdNum.start <- factor(dat4$fileId.start, levels = levels_fId) @@ -92,20 +91,22 @@ create_eventlogs <- function(data, xmlpath = NULL, case_cutoff = 20, dat4$fIdDiff <- NULL # Remove fragmented paths ############################################### - tab <- stats::xtabs( ~ path + event, dat4) + # tab <- stats::xtabs( ~ path + event, dat4) - fragments <- NULL + # fragments <- NULL - for (i in seq_len(nrow(tab))) { - if (tab[i, "openPopup"] != 0 & tab[i, "flipCard"] == 0) { - fragments <- c(fragments, rownames(tab)[i]) - } else if (tab[i, "openTopic"] != 0 & tab[i, "flipCard"] == 0) { - fragments <- c(fragments, rownames(tab)[i]) - } else if (tab[i, "openPopup"] != 0 & tab[i, "openTopic"] == 0) { - fragments <- c(fragments, rownames(tab)[i]) - } - } - dat5 <- dat4[!dat4$path %in% fragments, ] + # for (i in seq_len(nrow(tab))) { + # if (tab[i, "openPopup"] != 0 & tab[i, "flipCard"] == 0) { + # fragments <- c(fragments, rownames(tab)[i]) + # } else if (tab[i, "openTopic"] != 0 & tab[i, "flipCard"] == 0) { + # fragments <- c(fragments, rownames(tab)[i]) + # } else if (tab[i, "openPopup"] != 0 & tab[i, "openTopic"] == 0) { + # fragments <- c(fragments, rownames(tab)[i]) + # } + # } + # dat5 <- dat4[!dat4$path %in% fragments, ] + # TODO: Decide if I want this or not - are all these log errors? + dat5 <- dat4 if (glossar) { # Check for wrong order of events: flipCard -> openPopup -> openTopic @@ -126,8 +127,7 @@ create_eventlogs <- function(data, xmlpath = NULL, case_cutoff = 20, dat7$glossar <- NULL } - if (save) save(dat, dat1, dat2, dat3, dat4, dat5, dat7, file = "../data/tmp_intermediate-df.RData") - + if (save) save(dat, dat1, dat2, dat3, dat4, dat5, dat7, file = "results/tmp_intermediate-df.RData") dat7 }