From b9185a5645664a534f1916192aeeca6e5724dc03 Mon Sep 17 00:00:00 2001 From: nwickel Date: Thu, 21 Sep 2023 11:50:37 +0200 Subject: [PATCH] Added progress bars to parse_logfiles() and did some debugging --- R/create_eventlogs.R | 4 ++-- R/helper.R | 4 ++-- R/parse_logfiles.R | 42 +++++++++++++++++++++++++++--------------- 3 files changed, 31 insertions(+), 19 deletions(-) diff --git a/R/create_eventlogs.R b/R/create_eventlogs.R index 628bc32..3aa8ee6 100644 --- a/R/create_eventlogs.R +++ b/R/create_eventlogs.R @@ -34,7 +34,7 @@ create_eventlogs <- function(data) { cat("## --> openPopup events closed.", "\n") dat2 <- rbind(c1, c2, c3, c4) - dat2 <- dat2[order(dat2$date.start, dat2$fileId.start), ] + dat2 <- dat2[order(dat2$fileId, dat2$date.start, dat2$timeMs.start), ] # Remove all events that do not have a `date.start` d1 <- nrow(dat2) @@ -52,7 +52,7 @@ create_eventlogs <- function(data) { # Add event ID ########################################################### dat3$eventId <- seq_len(nrow(dat3)) - dat3 <- dat3[, c("fileId.start", "fileId.stop", "eventId", "case", + dat3 <- dat3[, c("fileId", "eventId", "case", "trace", "glossar", "event", "artwork", "date.start", "date.stop", "timeMs.start", "timeMs.stop", "duration", "topicNumber", "popup", diff --git a/R/helper.R b/R/helper.R index ecf7e8d..5c634c5 100644 --- a/R/helper.R +++ b/R/helper.R @@ -282,7 +282,7 @@ add_trace_moves <- function(data) { ) out <- dplyr::bind_rows(subdata_trace) - out <- out[order(out$fileId.start, out$date.start, out$timeMs.start), ] + out <- out[order(out$fileId, out$date.start, out$timeMs.start), ] rownames(out) <- NULL # Make trace a consecutive number @@ -409,7 +409,7 @@ add_topic <- function(data, topics) { #out <- do.call(rbind, dat_topic) out <- dplyr::bind_rows(dat_topic) - out <- out[order(out$fileId.start, out$date.start, out$timeMs.start), ] + out <- out[order(out$fileId, out$date.start, out$timeMs.start), ] rownames(out) <- NULL out } diff --git a/R/parse_logfiles.R b/R/parse_logfiles.R index 9b99bc8..caea45b 100644 --- a/R/parse_logfiles.R +++ b/R/parse_logfiles.R @@ -13,9 +13,10 @@ #' # dirpaths <- paste0("../data/haum_logs_2016-2023/", folders) #' # fnames <- dir(dirpaths, pattern = "*.log", full.names = TRUE) #' # leftpad_fnames(fnames) -leftpad_fnames <- function(x, dirpaths) { +leftpad_fnames <- function(fnames) { - z <- gsub(paste0(dirpaths, "/"), "\\1", x) + z <- sapply(fnames, function(x) tail(strsplit(x, "/")[[1]], 1), + USE.NAMES = FALSE) ys <- strsplit(z, "_") res <- NULL @@ -28,7 +29,6 @@ leftpad_fnames <- function(x, dirpaths) { e4 <- sprintf("%02d", as.numeric(y2[2])) e5 <- sprintf("%02d", as.numeric(y[4])) e6 <- sprintf("%02d", as.numeric(gsub(".log", "", y[5]))) - e6 <- sprintf("%02d", as.numeric(gsub(".log", "", y[5]))) res <- c(res, paste0(e1, "_", e2, "_", e3, "-", e4, "_", e5, "_", e6, ".log")) @@ -36,7 +36,7 @@ leftpad_fnames <- function(x, dirpaths) { res } -#' Creating data frame for raw log files. +#' Creating data frame from raw log files. #' #' Creates a data frame or CSV file from raw log files from a #' Multi-Touch-Table at the IWM. @@ -58,11 +58,12 @@ parse_logfiles <- function(folders, path, file = "rawdata_logfiles.csv", # TODO: This is not very intutitive fnames <- dir(dirpaths, pattern = "*.log", full.names = TRUE) + cat(paste0("\n########## Reading ", length(fnames), " log files... ##########"), "\n\n") suppressWarnings( - logs <- lapply(fnames, readLines) + logs <- pbapply::pblapply(fnames, readLines) ) nlog <- sapply(logs, length) - dat <- data.frame(fileId = rep(leftpad_fnames(fnames, dirpaths), nlog), + dat <- data.frame(fileId = rep(leftpad_fnames(fnames), nlog), logs = unlist(logs)) # Remove corrupt lines @@ -75,40 +76,50 @@ parse_logfiles <- function(folders, path, file = "rawdata_logfiles.csv", } # Extract relevant infos - date <- sapply(dat$logs, gsub, + cat("\n########## Parsing individual data parts... ##########", "\n") + cat("\n Extract dates...", "\n\n") + date <- pbapply::pbsapply(dat$logs, gsub, pattern = "^\\[(.*)\\], \\[.*$", replacement = "\\1", USE.NAMES = FALSE) - timestamp <- sapply(dat$logs, gsub, + cat("\n Extract timestamps...", "\n\n") + timestamp <- pbapply::pbsapply(dat$logs, gsub, pattern = "^\\[.*\\], \\[(.*)\\].*$", replacement = "\\1", USE.NAMES = FALSE) - action <- sapply(dat$logs, gsub, + cat("\n Extract events...", "\n\n") + action <- pbapply::pbsapply(dat$logs, gsub, pattern = "^.*EyeVisit, (.*):*.*$", replacement = "\\1", USE.NAMES = FALSE) - events <- sapply(strsplit(action, ":"), function(x) x[1]) + cat("\n Parse separate events...", "\n\n") + events <- pbapply::pbsapply(strsplit(action, ":"), function(x) x[1]) - topics <- sapply(strsplit(action, ":"), function(x) x[2]) + cat("\n Extract topics...", "\n\n") + topics <- pbapply::pbsapply(strsplit(action, ":"), function(x) x[2]) + cat("\n Extract move information...", "\n\n") suppressWarnings( - moves <- apply(do.call(rbind, + moves <- pbapply::pbapply(do.call(rbind, strsplit(sapply(strsplit(action, ":"), function(x) x[3]), ",")), 2, as.numeric) ) # ATTENTION: as.numeric() forces NAs for "OpenCard" and "CloseCard" - card_action <- trimws(sapply(strsplit(action, ":"), + cat("\n Extract popups...", "\n\n") + card_action <- trimws(pbapply::pbsapply(strsplit(action, ":"), function(x) x[3])[grep("Artwork", events)]) - card <- as.numeric(sapply(strsplit(action, ":"), function(x) x[4])) + card <- as.numeric(pbapply::pbsapply(strsplit(action, ":"), + function(x) x[4])) events[grep("Artwork", events)] <- paste("Artwork", card_action, sep = "/") + cat("\n Transform timestamps to ms...", "\n\n") ts_elements <- strsplit(timestamp, ":") time_ms <- as.numeric(sapply(ts_elements, function(x) x[4])) + as.numeric(sapply(ts_elements, function(x) x[3])) * 1000 + @@ -130,8 +141,9 @@ parse_logfiles <- function(folders, path, file = "rawdata_logfiles.csv", # Export data if (save) { + cat("Saving data...", "\n\n") utils::write.table(dat, file = file, sep = ";", row.names = FALSE) - cat(paste0("INFORMATION: Data file", file, "has been written to ", getwd(), "\n")) + cat(paste0("INFORMATION: Data file ", file, " has been written to ", getwd(), "\n\n")) } else { return(dat) }