Added progress bars to parse_logfiles() and did some debugging

This commit is contained in:
Nora Wickelmaier 2023-09-21 11:50:37 +02:00
parent 17e8e39cc3
commit b9185a5645
3 changed files with 31 additions and 19 deletions

View File

@ -34,7 +34,7 @@ create_eventlogs <- function(data) {
cat("## --> openPopup events closed.", "\n")
dat2 <- rbind(c1, c2, c3, c4)
dat2 <- dat2[order(dat2$date.start, dat2$fileId.start), ]
dat2 <- dat2[order(dat2$fileId, dat2$date.start, dat2$timeMs.start), ]
# Remove all events that do not have a `date.start`
d1 <- nrow(dat2)
@ -52,7 +52,7 @@ create_eventlogs <- function(data) {
# Add event ID ###########################################################
dat3$eventId <- seq_len(nrow(dat3))
dat3 <- dat3[, c("fileId.start", "fileId.stop", "eventId", "case",
dat3 <- dat3[, c("fileId", "eventId", "case",
"trace", "glossar", "event", "artwork",
"date.start", "date.stop", "timeMs.start",
"timeMs.stop", "duration", "topicNumber", "popup",

View File

@ -282,7 +282,7 @@ add_trace_moves <- function(data) {
)
out <- dplyr::bind_rows(subdata_trace)
out <- out[order(out$fileId.start, out$date.start, out$timeMs.start), ]
out <- out[order(out$fileId, out$date.start, out$timeMs.start), ]
rownames(out) <- NULL
# Make trace a consecutive number
@ -409,7 +409,7 @@ add_topic <- function(data, topics) {
#out <- do.call(rbind, dat_topic)
out <- dplyr::bind_rows(dat_topic)
out <- out[order(out$fileId.start, out$date.start, out$timeMs.start), ]
out <- out[order(out$fileId, out$date.start, out$timeMs.start), ]
rownames(out) <- NULL
out
}

View File

@ -13,9 +13,10 @@
#' # dirpaths <- paste0("../data/haum_logs_2016-2023/", folders)
#' # fnames <- dir(dirpaths, pattern = "*.log", full.names = TRUE)
#' # leftpad_fnames(fnames)
leftpad_fnames <- function(x, dirpaths) {
leftpad_fnames <- function(fnames) {
z <- gsub(paste0(dirpaths, "/"), "\\1", x)
z <- sapply(fnames, function(x) tail(strsplit(x, "/")[[1]], 1),
USE.NAMES = FALSE)
ys <- strsplit(z, "_")
res <- NULL
@ -28,7 +29,6 @@ leftpad_fnames <- function(x, dirpaths) {
e4 <- sprintf("%02d", as.numeric(y2[2]))
e5 <- sprintf("%02d", as.numeric(y[4]))
e6 <- sprintf("%02d", as.numeric(gsub(".log", "", y[5])))
e6 <- sprintf("%02d", as.numeric(gsub(".log", "", y[5])))
res <- c(res,
paste0(e1, "_", e2, "_", e3, "-", e4, "_", e5, "_", e6, ".log"))
@ -36,7 +36,7 @@ leftpad_fnames <- function(x, dirpaths) {
res
}
#' Creating data frame for raw log files.
#' Creating data frame from raw log files.
#'
#' Creates a data frame or CSV file from raw log files from a
#' Multi-Touch-Table at the IWM.
@ -58,11 +58,12 @@ parse_logfiles <- function(folders, path, file = "rawdata_logfiles.csv",
# TODO: This is not very intutitive
fnames <- dir(dirpaths, pattern = "*.log", full.names = TRUE)
cat(paste0("\n########## Reading ", length(fnames), " log files... ##########"), "\n\n")
suppressWarnings(
logs <- lapply(fnames, readLines)
logs <- pbapply::pblapply(fnames, readLines)
)
nlog <- sapply(logs, length)
dat <- data.frame(fileId = rep(leftpad_fnames(fnames, dirpaths), nlog),
dat <- data.frame(fileId = rep(leftpad_fnames(fnames), nlog),
logs = unlist(logs))
# Remove corrupt lines
@ -75,40 +76,50 @@ parse_logfiles <- function(folders, path, file = "rawdata_logfiles.csv",
}
# Extract relevant infos
date <- sapply(dat$logs, gsub,
cat("\n########## Parsing individual data parts... ##########", "\n")
cat("\n Extract dates...", "\n\n")
date <- pbapply::pbsapply(dat$logs, gsub,
pattern = "^\\[(.*)\\], \\[.*$",
replacement = "\\1",
USE.NAMES = FALSE)
timestamp <- sapply(dat$logs, gsub,
cat("\n Extract timestamps...", "\n\n")
timestamp <- pbapply::pbsapply(dat$logs, gsub,
pattern = "^\\[.*\\], \\[(.*)\\].*$",
replacement = "\\1",
USE.NAMES = FALSE)
action <- sapply(dat$logs, gsub,
cat("\n Extract events...", "\n\n")
action <- pbapply::pbsapply(dat$logs, gsub,
pattern = "^.*EyeVisit, (.*):*.*$",
replacement = "\\1",
USE.NAMES = FALSE)
events <- sapply(strsplit(action, ":"), function(x) x[1])
cat("\n Parse separate events...", "\n\n")
events <- pbapply::pbsapply(strsplit(action, ":"), function(x) x[1])
topics <- sapply(strsplit(action, ":"), function(x) x[2])
cat("\n Extract topics...", "\n\n")
topics <- pbapply::pbsapply(strsplit(action, ":"), function(x) x[2])
cat("\n Extract move information...", "\n\n")
suppressWarnings(
moves <- apply(do.call(rbind,
moves <- pbapply::pbapply(do.call(rbind,
strsplit(sapply(strsplit(action, ":"),
function(x) x[3]), ",")), 2,
as.numeric)
)
# ATTENTION: as.numeric() forces NAs for "OpenCard" and "CloseCard"
card_action <- trimws(sapply(strsplit(action, ":"),
cat("\n Extract popups...", "\n\n")
card_action <- trimws(pbapply::pbsapply(strsplit(action, ":"),
function(x) x[3])[grep("Artwork", events)])
card <- as.numeric(sapply(strsplit(action, ":"), function(x) x[4]))
card <- as.numeric(pbapply::pbsapply(strsplit(action, ":"),
function(x) x[4]))
events[grep("Artwork", events)] <- paste("Artwork", card_action, sep = "/")
cat("\n Transform timestamps to ms...", "\n\n")
ts_elements <- strsplit(timestamp, ":")
time_ms <- as.numeric(sapply(ts_elements, function(x) x[4])) +
as.numeric(sapply(ts_elements, function(x) x[3])) * 1000 +
@ -130,8 +141,9 @@ parse_logfiles <- function(folders, path, file = "rawdata_logfiles.csv",
# Export data
if (save) {
cat("Saving data...", "\n\n")
utils::write.table(dat, file = file, sep = ";", row.names = FALSE)
cat(paste0("INFORMATION: Data file", file, "has been written to ", getwd(), "\n"))
cat(paste0("INFORMATION: Data file ", file, " has been written to ", getwd(), "\n\n"))
} else {
return(dat)
}