From fb9db8b9087aa7c8c8c511be8de5f1a8cde5e618 Mon Sep 17 00:00:00 2001 From: nwickel Date: Tue, 19 Sep 2023 09:19:50 +0200 Subject: [PATCH] Fixed add_trace_moves() so it works with complete data set; first version that creates log event file for complete data set without crashing --- code/02_preprocessing.R | 16 +++++++++------- code/functions.R | 26 ++++++++++++++++---------- 2 files changed, 25 insertions(+), 17 deletions(-) diff --git a/code/02_preprocessing.R b/code/02_preprocessing.R index ab86df6..534a16d 100644 --- a/code/02_preprocessing.R +++ b/code/02_preprocessing.R @@ -13,13 +13,13 @@ dat0$glossar <- ifelse(dat0$artwork == "glossar", 1, 0) dat <- subset(dat0, !(dat0$event %in% c("Start Application", "Show Application"))) -#save(dat, file = "tmp/dat.RData") +save(dat, file = "tmp/dat.RData") # Add trace variable ##################################################### cat("########## Adding trace variable... ##########", "\n") dat1 <- add_trace(dat) -#save(dat1, file = "tmp/dat1.RData") +save(dat1, file = "tmp/dat1.RData") # Close events cat("########## Closing events... ##########", "\n") @@ -44,7 +44,7 @@ dat2 <- dat2[!is.na(dat2$date.start), ] rownames(dat2) <- NULL # TODO: Throw warning about this -#save(dat2, file = "tmp/dat2.RData") +save(dat2, file = "tmp/dat2.RData") # Add case variable ###################################################### cat("########## Adding case and eventId variables... ##########", "\n") @@ -61,23 +61,25 @@ dat3 <- dat3[, c("fileId.start", "fileId.stop", "eventId", "case", "scaleSize", "rotation.start", "rotation.stop", "rotationDegree")] -#save(dat3, file = "tmp/dat3.RData") +save(dat3, file = "tmp/dat3.RData") # Add trace for move events ############################################## -cat("########## Adding trace variable for move events... ##########", "\n") +cat("\n########## Adding trace variable for move events... ##########", "\n") dat4 <- add_trace_moves(dat3) -#save(dat4, file = "tmp/dat4.RData") +save(dat4, file = "tmp/dat4.RData") # Add topics: file names and topics ###################################### cat("########## Adding information about topics... ##########", "\n") artworks <- unique(dat4$artwork) +# remove artworks without XML information +artworks <- artworks[!artworks %in% c("504", "505")] topics <- extract_topics(artworks, pattern = paste0(artworks, ".xml"), path = "../data/ContentEyevisit/eyevisit_cards_light/") dat5 <- add_topic(dat4, topics = topics) -#save(dat5, file = "tmp/dat5.RData") +save(dat5, file = "tmp/dat5.RData") # TODO: Replace artwork with informative strings diff --git a/code/functions.R b/code/functions.R index 67de3fd..0ae707a 100644 --- a/code/functions.R +++ b/code/functions.R @@ -262,6 +262,7 @@ add_case <- function(data, cutoff = 20) { data$timediff <- NULL data } +# TODO: Is this faster with lapply()? ########################################################################### @@ -269,14 +270,20 @@ add_case <- function(data, cutoff = 20) { add_trace_moves <- function(data) { - cases <- unique(data$case) - artworks <- unique(data$artwork)[unique(data$artwork) != "glossar"] + pbapply::pboptions(style = 3, char = "=") + trace_max <- max(data$trace, na.rm = TRUE) - subdata_list <- split(data, ~ artwork + case) - subdata_list <- subdata_list[which(sapply(subdata_list, nrow) != 0)] + #subdata_art <- split(data, ~ artwork) + subdata_case <- split(data, ~ case) - pbapply::pboptions(style = 3, char = "=") + #subdata_list <- split(data, ~ artwork + case) + # --> does not work with complete data set + cat("Splitting data...", "\n") + subdata_list <- pbapply::pblapply(subdata_case, split, f = ~artwork) + subdata_list <- unlist(subdata_list, recursive = FALSE) + + cat("Adding trace...", "\n") subdata_trace <- pbapply::pblapply(subdata_list, function(x) { trace_max <<- trace_max + 1 @@ -322,9 +329,6 @@ add_trace_subdata <- function(subdata, max_trace) { subdata } -#system.time(dat4a <- add_trace_moves2(dat3)) -#system.time(dat4b <- add_trace_moves(dat3)) - ########################################################################### @@ -383,6 +387,7 @@ extract_topics <- function(artworks, pattern, path) { add_topic <- function(data, topics) { + artworks <- unique(data$artwork) tab_art <- lapply(artworks, function(x) names(table(data$topicNumber[data$artwork == x]))) names(tab_art) <- artworks @@ -412,8 +417,9 @@ add_topic <- function(data, topics) { dat_topic <- lapply(dat_label, set_topic) - out <- do.call(rbind, dat_topic) - out <- out[order(out$date.start, out$fileId.start), ] + #out <- do.call(rbind, dat_topic) + out <- dplyr::bind_rows(dat_topic) + out <- out[order(out$fileId.start, out$date.start, out$timeMs.start), ] rownames(out) <- NULL out }