Refactored add_path_items() so lonely popupevents are removed; also much faster now

This commit is contained in:
Nora Wickelmaier 2024-01-15 15:58:39 +01:00
parent c41ba718e9
commit 521b8aea68

View File

@ -1,37 +1,55 @@
########################################################################### ###########################################################################
add_path_items <- function(subdata) { add_path_items <- function(subdata) {
last_event <- subdata$event[1] subdata_glossar <- subdata[subdata$item == "glossar", ]
subdata_glossar$path <- NA
items <- unique(subdata$item)[unique(subdata$item) != "glossar"] items <- unique(subdata$item)[unique(subdata$item) != "glossar"]
n <- 1 # count items for progress
pb <- utils::txtProgressBar(min = 0, max = nrow(subdata), initial = NA, path_per_item <- function(subdata, item) {
style = 3)
for (item in items) { subsubdata <- subdata[subdata$item == item, ]
last_event <- subsubdata$event[1]
cat("\n\nAdding path variable for item", item, subsubdata$path <- NA
paste0("(", n, "/", length(items), ")"), "\n") for (i in 1:nrow(subsubdata)) {
if (last_event == "Show Info") {
for (i in 1:nrow(subdata)) { subsubdata$path[i] <- i
if (last_event == "Show Info" & subdata$item[i] == item) {
subdata$path[i] <- i
j <- i j <- i
} else if (last_event == "Show Front" & subdata$item[i] == item) { status <- "path_started"
subdata$path[i] <- j } else if (last_event == "Show Front") {
subsubdata$path[i] <- j
status <- "path_ended"
} else if (!(last_event %in% c("Show Info", "Show Front")) & } else if (!(last_event %in% c("Show Info", "Show Front")) &
subdata$item[i] == item) { status == "path_started") {
subdata$path[i] <- j subsubdata$path[i] <- j
} }
if (i <= nrow(subdata)) { if (i <= nrow(subsubdata)) {
last_event <- subdata$event[i + 1] last_event <- subsubdata$event[i + 1]
} }
utils::setTxtProgressBar(pb, i)
} }
n <- n + 1 subsubdata$path <- ifelse(is.na(subsubdata$path), NA,
paste0(item, "_", subsubdata$path))
subsubdata
} }
subdata_list <- pbapply::pblapply(items, path_per_item, subdata = subdata)
subdata <- dplyr::bind_rows(subdata_list)
# Remove popup events that occur after item has been closed
subdata <- subdata[!is.na(subdata$path), ]
# Add glossar events back in
subdata <- rbind(subdata, subdata_glossar)
# Bring back in time order
subdata <- subdata[order(subdata$fileId, subdata$date), ]
# Make path a consecutive number
subdata$path <- as.numeric(factor(subdata$path, levels = unique(subdata$path)))
rownames(subdata) <- NULL
subdata subdata
} }
########################################################################### ###########################################################################
add_path_glossar <- function(subdata, xmlpath) { add_path_glossar <- function(subdata, xmlpath) {
@ -39,7 +57,7 @@ add_path_glossar <- function(subdata, xmlpath) {
style = 3) style = 3)
cat("\n\n########## Creating glossar dictionary ##########", "\n") cat("\n\n########## Creating glossar dictionary ##########", "\n")
items <- unique(subdata$item[subdata$item != "glossar"]) items <- unique(subdata$item[subdata$item != "glossar"])
glossar_files <- unique(subdata[subdata$item == "glossar", "popup"]) glossar_files <- unique(subdata[subdata$item == "glossar", "popup"])
lut <- create_glossardict(items, glossar_files, xmlpath = xmlpath) lut <- create_glossardict(items, glossar_files, xmlpath = xmlpath)
inside <- glossar_files[glossar_files %in% inside <- glossar_files[glossar_files %in%
@ -138,8 +156,6 @@ add_path_moves <- function(data) {
out <- out[order(out$fileId.start, out$date.start, out$timeMs.start), ] out <- out[order(out$fileId.start, out$date.start, out$timeMs.start), ]
rownames(out) <- NULL rownames(out) <- NULL
# Make path a consecutive number
out$path <- as.numeric(factor(out$path, levels = unique(out$path)))
out out
} }