From 5ab190a4d8034def5c8c712073d322a0ccd14d7f Mon Sep 17 00:00:00 2001 From: nwickel Date: Mon, 18 Sep 2023 13:37:35 +0200 Subject: [PATCH] Refactored add_trace_moves() so it is about 20(?) times faster now --- code/functions.R | 105 ++++++++++++++++++++++++----------------------- 1 file changed, 54 insertions(+), 51 deletions(-) diff --git a/code/functions.R b/code/functions.R index 05a32b0..67de3fd 100644 --- a/code/functions.R +++ b/code/functions.R @@ -12,7 +12,8 @@ add_trace <- function(data, glossar_dict = "../data/glossar_dict.RData") { artworks <- unique(subdata2$artwork)[unique(subdata2$artwork) != "glossar"] n <- 1 # count artworks for progress - pb <- txtProgressBar(min = 0, max = nrow(subdata2), style = 3) + pb <- txtProgressBar(min = 0, max = nrow(subdata2), initial = NA, + style = 3) for (artwork in artworks) { @@ -117,6 +118,8 @@ add_trace <- function(data, glossar_dict = "../data/glossar_dict.RData") { ########################################################################### close_events <- function(data, event = c("move", "flipCard", "openTopic", "openPopup")) { +# TODO: How do I set default vector and partial matching of arguments? + # --> `macht.arg()` and `pmatch()` if (event == "move") { actions <- c("Transform start", "Transform stop") @@ -166,10 +169,11 @@ close_events <- function(data, event = c("move", "flipCard", "openTopic", "openP subdata <- subdata[-(id_rm_stop + 1), ] } - subdata_split <- split(subdata, subdata$fileId) + subdata_split <- split(subdata, ~ fileId) + pbapply::pboptions(style = 3, char = "=") suppressWarnings( - subdata_split_wide <- lapply(subdata_split, reshape, + subdata_split_wide <- pbapply::pblapply(subdata_split, reshape, direction = "wide", idvar = idvar, timevar = "time", @@ -244,7 +248,7 @@ add_case <- function(data, cutoff = 20) { data$timediff <- as.numeric(diff(c(data$date.start[1], data$date.start))) data$case <- NA j <- 1 - pb <- txtProgressBar(min = 0, max = nrow(data), style = 3) + pb <- txtProgressBar(min = 0, max = nrow(data), initial = NA, style = 3) for (i in seq_len(nrow(data))) { if (data$timediff[i] <= cutoff) { @@ -267,61 +271,60 @@ add_trace_moves <- function(data) { cases <- unique(data$case) artworks <- unique(data$artwork)[unique(data$artwork) != "glossar"] - max_trace <- max(data$trace, na.rm = TRUE) + 1 - out <- NULL + trace_max <- max(data$trace, na.rm = TRUE) - pb <- txtProgressBar(min = 0, max = length(artworks), style = 3) + subdata_list <- split(data, ~ artwork + case) + subdata_list <- subdata_list[which(sapply(subdata_list, nrow) != 0)] - n <- 1 # count cases for progress - - for (case in cases) { - - cat("\n\nAdding trace variable for move events per case", - paste0("(", n, "/", length(cases), ")"), "\n") - j <- 1 - - for (artwork in artworks) { - subdata <- data[data$case == case & data$artwork == artwork, ] - if (nrow(subdata) != 0) { - - if (length(na.omit(unique(subdata$trace))) == 1) { - subdata[subdata$event == "move", "trace"] <- na.omit(unique(subdata$trace)) - } else if (length(na.omit(unique(subdata$trace))) > 1) { - for (i in 1:nrow(subdata)) { - if (subdata$event[i] == "move") { - if (i == 1) { - subdata$trace[i] <- na.omit(unique(subdata$trace))[1] - } else { - subdata$trace[i] <- subdata$trace[i - 1] - } - } - } - } else if (all(is.na(subdata$trace))) { - for (i in 1:nrow(subdata)) { - if (subdata$event[i] == "move") { - subdata$trace[i] <- max_trace - } - } + pbapply::pboptions(style = 3, char = "=") + subdata_trace <- pbapply::pblapply(subdata_list, + function(x) { + trace_max <<- trace_max + 1 + add_trace_subdata(x, max_trace = trace_max) } - max_trace <- max_trace + 1 - } - if (nrow(subdata) > 0) { - out <- rbind(out, subdata) - } - setTxtProgressBar(pb, j) - j <- j + 1 - } - n <- n + 1 - } - out <- out[order(out$date.start, out$fileId.start), ] + ) + + out <- dplyr::bind_rows(subdata_trace) + out <- out[order(out$fileId.start, out$date.start, out$timeMs.start), ] rownames(out) <- NULL # Make trace a consecutive number out$trace <- as.numeric(factor(out$trace, levels = unique(out$trace))) out } -# TODO: Get rid of the loops -# --> This takes forever... + + +add_trace_subdata <- function(subdata, max_trace) { + + if (nrow(subdata) != 0) { + + if (length(na.omit(unique(subdata$trace))) == 1) { + subdata[subdata$event == "move", "trace"] <- na.omit(unique(subdata$trace)) + } else if (length(na.omit(unique(subdata$trace))) > 1) { + for (i in 1:nrow(subdata)) { + if (subdata$event[i] == "move") { + if (i == 1) { + subdata$trace[i] <- na.omit(unique(subdata$trace))[1] + } else { + subdata$trace[i] <- subdata$trace[i - 1] + } + } + } + } else if (all(is.na(subdata$trace))) { + for (i in 1:nrow(subdata)) { + subdata$trace[i] <- max_trace + } + } + + } else { + warning("`subdata` has nrow = 0") + } + subdata +} + +#system.time(dat4a <- add_trace_moves2(dat3)) +#system.time(dat4b <- add_trace_moves(dat3)) + ########################################################################### @@ -386,7 +389,7 @@ add_topic <- function(data, topics) { tab_index <- lapply(tab_art, seq_along) - dat_split <- split(data, data$artwork) + dat_split <- split(data, ~ artwork) set_label <- function(x) { artwork <- unique(x$artwork)