From 521b8aea68cbaf450c3dff89fea74f1395971b6d Mon Sep 17 00:00:00 2001 From: nwickel Date: Mon, 15 Jan 2024 15:58:39 +0100 Subject: [PATCH] Refactored add_path_items() so lonely popupevents are removed; also much faster now --- R/add_trace.R | 62 ++++++++++++++++++++++++++++++++------------------- 1 file changed, 39 insertions(+), 23 deletions(-) diff --git a/R/add_trace.R b/R/add_trace.R index 5f7d83d..e8eb98a 100644 --- a/R/add_trace.R +++ b/R/add_trace.R @@ -1,37 +1,55 @@ ########################################################################### add_path_items <- function(subdata) { - last_event <- subdata$event[1] + subdata_glossar <- subdata[subdata$item == "glossar", ] + subdata_glossar$path <- NA + items <- unique(subdata$item)[unique(subdata$item) != "glossar"] - n <- 1 # count items for progress - pb <- utils::txtProgressBar(min = 0, max = nrow(subdata), initial = NA, - style = 3) + path_per_item <- function(subdata, item) { - for (item in items) { - - cat("\n\nAdding path variable for item", item, - paste0("(", n, "/", length(items), ")"), "\n") - - for (i in 1:nrow(subdata)) { - if (last_event == "Show Info" & subdata$item[i] == item) { - subdata$path[i] <- i + subsubdata <- subdata[subdata$item == item, ] + last_event <- subsubdata$event[1] + subsubdata$path <- NA + for (i in 1:nrow(subsubdata)) { + if (last_event == "Show Info") { + subsubdata$path[i] <- i j <- i - } else if (last_event == "Show Front" & subdata$item[i] == item) { - subdata$path[i] <- j + status <- "path_started" + } else if (last_event == "Show Front") { + subsubdata$path[i] <- j + status <- "path_ended" } else if (!(last_event %in% c("Show Info", "Show Front")) & - subdata$item[i] == item) { - subdata$path[i] <- j + status == "path_started") { + subsubdata$path[i] <- j } - if (i <= nrow(subdata)) { - last_event <- subdata$event[i + 1] + if (i <= nrow(subsubdata)) { + last_event <- subsubdata$event[i + 1] } - utils::setTxtProgressBar(pb, i) } - n <- n + 1 + subsubdata$path <- ifelse(is.na(subsubdata$path), NA, + paste0(item, "_", subsubdata$path)) + subsubdata } + + subdata_list <- pbapply::pblapply(items, path_per_item, subdata = subdata) + subdata <- dplyr::bind_rows(subdata_list) + + # Remove popup events that occur after item has been closed + subdata <- subdata[!is.na(subdata$path), ] + + # Add glossar events back in + subdata <- rbind(subdata, subdata_glossar) + + # Bring back in time order + subdata <- subdata[order(subdata$fileId, subdata$date), ] + + # Make path a consecutive number + subdata$path <- as.numeric(factor(subdata$path, levels = unique(subdata$path))) + rownames(subdata) <- NULL subdata } + ########################################################################### add_path_glossar <- function(subdata, xmlpath) { @@ -39,7 +57,7 @@ add_path_glossar <- function(subdata, xmlpath) { style = 3) cat("\n\n########## Creating glossar dictionary ##########", "\n") - items <- unique(subdata$item[subdata$item != "glossar"]) + items <- unique(subdata$item[subdata$item != "glossar"]) glossar_files <- unique(subdata[subdata$item == "glossar", "popup"]) lut <- create_glossardict(items, glossar_files, xmlpath = xmlpath) inside <- glossar_files[glossar_files %in% @@ -138,8 +156,6 @@ add_path_moves <- function(data) { out <- out[order(out$fileId.start, out$date.start, out$timeMs.start), ] rownames(out) <- NULL - # Make path a consecutive number - out$path <- as.numeric(factor(out$path, levels = unique(out$path))) out }