Refactored add_trace_moves() so it is about 20(?) times faster now

This commit is contained in:
Nora Wickelmaier 2023-09-18 13:37:35 +02:00
parent d28a2497dc
commit 5ab190a4d8

View File

@ -12,7 +12,8 @@ add_trace <- function(data, glossar_dict = "../data/glossar_dict.RData") {
artworks <- unique(subdata2$artwork)[unique(subdata2$artwork) != "glossar"]
n <- 1 # count artworks for progress
pb <- txtProgressBar(min = 0, max = nrow(subdata2), style = 3)
pb <- txtProgressBar(min = 0, max = nrow(subdata2), initial = NA,
style = 3)
for (artwork in artworks) {
@ -117,6 +118,8 @@ add_trace <- function(data, glossar_dict = "../data/glossar_dict.RData") {
###########################################################################
close_events <- function(data, event = c("move", "flipCard", "openTopic", "openPopup")) {
# TODO: How do I set default vector and partial matching of arguments?
# --> `macht.arg()` and `pmatch()`
if (event == "move") {
actions <- c("Transform start", "Transform stop")
@ -166,10 +169,11 @@ close_events <- function(data, event = c("move", "flipCard", "openTopic", "openP
subdata <- subdata[-(id_rm_stop + 1), ]
}
subdata_split <- split(subdata, subdata$fileId)
subdata_split <- split(subdata, ~ fileId)
pbapply::pboptions(style = 3, char = "=")
suppressWarnings(
subdata_split_wide <- lapply(subdata_split, reshape,
subdata_split_wide <- pbapply::pblapply(subdata_split, reshape,
direction = "wide",
idvar = idvar,
timevar = "time",
@ -244,7 +248,7 @@ add_case <- function(data, cutoff = 20) {
data$timediff <- as.numeric(diff(c(data$date.start[1], data$date.start)))
data$case <- NA
j <- 1
pb <- txtProgressBar(min = 0, max = nrow(data), style = 3)
pb <- txtProgressBar(min = 0, max = nrow(data), initial = NA, style = 3)
for (i in seq_len(nrow(data))) {
if (data$timediff[i] <= cutoff) {
@ -267,21 +271,31 @@ add_trace_moves <- function(data) {
cases <- unique(data$case)
artworks <- unique(data$artwork)[unique(data$artwork) != "glossar"]
max_trace <- max(data$trace, na.rm = TRUE) + 1
out <- NULL
trace_max <- max(data$trace, na.rm = TRUE)
pb <- txtProgressBar(min = 0, max = length(artworks), style = 3)
subdata_list <- split(data, ~ artwork + case)
subdata_list <- subdata_list[which(sapply(subdata_list, nrow) != 0)]
n <- 1 # count cases for progress
pbapply::pboptions(style = 3, char = "=")
subdata_trace <- pbapply::pblapply(subdata_list,
function(x) {
trace_max <<- trace_max + 1
add_trace_subdata(x, max_trace = trace_max)
}
)
for (case in cases) {
out <- dplyr::bind_rows(subdata_trace)
out <- out[order(out$fileId.start, out$date.start, out$timeMs.start), ]
rownames(out) <- NULL
cat("\n\nAdding trace variable for move events per case",
paste0("(", n, "/", length(cases), ")"), "\n")
j <- 1
# Make trace a consecutive number
out$trace <- as.numeric(factor(out$trace, levels = unique(out$trace)))
out
}
add_trace_subdata <- function(subdata, max_trace) {
for (artwork in artworks) {
subdata <- data[data$case == case & data$artwork == artwork, ]
if (nrow(subdata) != 0) {
if (length(na.omit(unique(subdata$trace))) == 1) {
@ -298,30 +312,19 @@ add_trace_moves <- function(data) {
}
} else if (all(is.na(subdata$trace))) {
for (i in 1:nrow(subdata)) {
if (subdata$event[i] == "move") {
subdata$trace[i] <- max_trace
}
}
}
max_trace <- max_trace + 1
}
if (nrow(subdata) > 0) {
out <- rbind(out, subdata)
}
setTxtProgressBar(pb, j)
j <- j + 1
}
n <- n + 1
}
out <- out[order(out$date.start, out$fileId.start), ]
rownames(out) <- NULL
# Make trace a consecutive number
out$trace <- as.numeric(factor(out$trace, levels = unique(out$trace)))
out
} else {
warning("`subdata` has nrow = 0")
}
# TODO: Get rid of the loops
# --> This takes forever...
subdata
}
#system.time(dat4a <- add_trace_moves2(dat3))
#system.time(dat4b <- add_trace_moves(dat3))
###########################################################################
@ -386,7 +389,7 @@ add_topic <- function(data, topics) {
tab_index <- lapply(tab_art, seq_along)
dat_split <- split(data, data$artwork)
dat_split <- split(data, ~ artwork)
set_label <- function(x) {
artwork <- unique(x$artwork)