Refactored add_path_items() so lonely popupevents are removed; also much faster now

This commit is contained in:
Nora Wickelmaier 2024-01-15 15:58:39 +01:00
parent c41ba718e9
commit 521b8aea68

View File

@ -1,37 +1,55 @@
###########################################################################
add_path_items <- function(subdata) {
last_event <- subdata$event[1]
subdata_glossar <- subdata[subdata$item == "glossar", ]
subdata_glossar$path <- NA
items <- unique(subdata$item)[unique(subdata$item) != "glossar"]
n <- 1 # count items for progress
pb <- utils::txtProgressBar(min = 0, max = nrow(subdata), initial = NA,
style = 3)
path_per_item <- function(subdata, item) {
for (item in items) {
cat("\n\nAdding path variable for item", item,
paste0("(", n, "/", length(items), ")"), "\n")
for (i in 1:nrow(subdata)) {
if (last_event == "Show Info" & subdata$item[i] == item) {
subdata$path[i] <- i
subsubdata <- subdata[subdata$item == item, ]
last_event <- subsubdata$event[1]
subsubdata$path <- NA
for (i in 1:nrow(subsubdata)) {
if (last_event == "Show Info") {
subsubdata$path[i] <- i
j <- i
} else if (last_event == "Show Front" & subdata$item[i] == item) {
subdata$path[i] <- j
status <- "path_started"
} else if (last_event == "Show Front") {
subsubdata$path[i] <- j
status <- "path_ended"
} else if (!(last_event %in% c("Show Info", "Show Front")) &
subdata$item[i] == item) {
subdata$path[i] <- j
status == "path_started") {
subsubdata$path[i] <- j
}
if (i <= nrow(subdata)) {
last_event <- subdata$event[i + 1]
if (i <= nrow(subsubdata)) {
last_event <- subsubdata$event[i + 1]
}
utils::setTxtProgressBar(pb, i)
}
n <- n + 1
subsubdata$path <- ifelse(is.na(subsubdata$path), NA,
paste0(item, "_", subsubdata$path))
subsubdata
}
subdata_list <- pbapply::pblapply(items, path_per_item, subdata = subdata)
subdata <- dplyr::bind_rows(subdata_list)
# Remove popup events that occur after item has been closed
subdata <- subdata[!is.na(subdata$path), ]
# Add glossar events back in
subdata <- rbind(subdata, subdata_glossar)
# Bring back in time order
subdata <- subdata[order(subdata$fileId, subdata$date), ]
# Make path a consecutive number
subdata$path <- as.numeric(factor(subdata$path, levels = unique(subdata$path)))
rownames(subdata) <- NULL
subdata
}
###########################################################################
add_path_glossar <- function(subdata, xmlpath) {
@ -138,8 +156,6 @@ add_path_moves <- function(data) {
out <- out[order(out$fileId.start, out$date.start, out$timeMs.start), ]
rownames(out) <- NULL
# Make path a consecutive number
out$path <- as.numeric(factor(out$path, levels = unique(out$path)))
out
}