Added progress bars to parse_logfiles() and did some debugging
This commit is contained in:
parent
17e8e39cc3
commit
b9185a5645
@ -34,7 +34,7 @@ create_eventlogs <- function(data) {
|
|||||||
cat("## --> openPopup events closed.", "\n")
|
cat("## --> openPopup events closed.", "\n")
|
||||||
dat2 <- rbind(c1, c2, c3, c4)
|
dat2 <- rbind(c1, c2, c3, c4)
|
||||||
|
|
||||||
dat2 <- dat2[order(dat2$date.start, dat2$fileId.start), ]
|
dat2 <- dat2[order(dat2$fileId, dat2$date.start, dat2$timeMs.start), ]
|
||||||
|
|
||||||
# Remove all events that do not have a `date.start`
|
# Remove all events that do not have a `date.start`
|
||||||
d1 <- nrow(dat2)
|
d1 <- nrow(dat2)
|
||||||
@ -52,7 +52,7 @@ create_eventlogs <- function(data) {
|
|||||||
|
|
||||||
# Add event ID ###########################################################
|
# Add event ID ###########################################################
|
||||||
dat3$eventId <- seq_len(nrow(dat3))
|
dat3$eventId <- seq_len(nrow(dat3))
|
||||||
dat3 <- dat3[, c("fileId.start", "fileId.stop", "eventId", "case",
|
dat3 <- dat3[, c("fileId", "eventId", "case",
|
||||||
"trace", "glossar", "event", "artwork",
|
"trace", "glossar", "event", "artwork",
|
||||||
"date.start", "date.stop", "timeMs.start",
|
"date.start", "date.stop", "timeMs.start",
|
||||||
"timeMs.stop", "duration", "topicNumber", "popup",
|
"timeMs.stop", "duration", "topicNumber", "popup",
|
||||||
|
@ -282,7 +282,7 @@ add_trace_moves <- function(data) {
|
|||||||
)
|
)
|
||||||
|
|
||||||
out <- dplyr::bind_rows(subdata_trace)
|
out <- dplyr::bind_rows(subdata_trace)
|
||||||
out <- out[order(out$fileId.start, out$date.start, out$timeMs.start), ]
|
out <- out[order(out$fileId, out$date.start, out$timeMs.start), ]
|
||||||
rownames(out) <- NULL
|
rownames(out) <- NULL
|
||||||
|
|
||||||
# Make trace a consecutive number
|
# Make trace a consecutive number
|
||||||
@ -409,7 +409,7 @@ add_topic <- function(data, topics) {
|
|||||||
|
|
||||||
#out <- do.call(rbind, dat_topic)
|
#out <- do.call(rbind, dat_topic)
|
||||||
out <- dplyr::bind_rows(dat_topic)
|
out <- dplyr::bind_rows(dat_topic)
|
||||||
out <- out[order(out$fileId.start, out$date.start, out$timeMs.start), ]
|
out <- out[order(out$fileId, out$date.start, out$timeMs.start), ]
|
||||||
rownames(out) <- NULL
|
rownames(out) <- NULL
|
||||||
out
|
out
|
||||||
}
|
}
|
||||||
|
@ -13,9 +13,10 @@
|
|||||||
#' # dirpaths <- paste0("../data/haum_logs_2016-2023/", folders)
|
#' # dirpaths <- paste0("../data/haum_logs_2016-2023/", folders)
|
||||||
#' # fnames <- dir(dirpaths, pattern = "*.log", full.names = TRUE)
|
#' # fnames <- dir(dirpaths, pattern = "*.log", full.names = TRUE)
|
||||||
#' # leftpad_fnames(fnames)
|
#' # leftpad_fnames(fnames)
|
||||||
leftpad_fnames <- function(x, dirpaths) {
|
leftpad_fnames <- function(fnames) {
|
||||||
|
|
||||||
z <- gsub(paste0(dirpaths, "/"), "\\1", x)
|
z <- sapply(fnames, function(x) tail(strsplit(x, "/")[[1]], 1),
|
||||||
|
USE.NAMES = FALSE)
|
||||||
ys <- strsplit(z, "_")
|
ys <- strsplit(z, "_")
|
||||||
|
|
||||||
res <- NULL
|
res <- NULL
|
||||||
@ -28,7 +29,6 @@ leftpad_fnames <- function(x, dirpaths) {
|
|||||||
e4 <- sprintf("%02d", as.numeric(y2[2]))
|
e4 <- sprintf("%02d", as.numeric(y2[2]))
|
||||||
e5 <- sprintf("%02d", as.numeric(y[4]))
|
e5 <- sprintf("%02d", as.numeric(y[4]))
|
||||||
e6 <- sprintf("%02d", as.numeric(gsub(".log", "", y[5])))
|
e6 <- sprintf("%02d", as.numeric(gsub(".log", "", y[5])))
|
||||||
e6 <- sprintf("%02d", as.numeric(gsub(".log", "", y[5])))
|
|
||||||
|
|
||||||
res <- c(res,
|
res <- c(res,
|
||||||
paste0(e1, "_", e2, "_", e3, "-", e4, "_", e5, "_", e6, ".log"))
|
paste0(e1, "_", e2, "_", e3, "-", e4, "_", e5, "_", e6, ".log"))
|
||||||
@ -36,7 +36,7 @@ leftpad_fnames <- function(x, dirpaths) {
|
|||||||
res
|
res
|
||||||
}
|
}
|
||||||
|
|
||||||
#' Creating data frame for raw log files.
|
#' Creating data frame from raw log files.
|
||||||
#'
|
#'
|
||||||
#' Creates a data frame or CSV file from raw log files from a
|
#' Creates a data frame or CSV file from raw log files from a
|
||||||
#' Multi-Touch-Table at the IWM.
|
#' Multi-Touch-Table at the IWM.
|
||||||
@ -58,11 +58,12 @@ parse_logfiles <- function(folders, path, file = "rawdata_logfiles.csv",
|
|||||||
# TODO: This is not very intutitive
|
# TODO: This is not very intutitive
|
||||||
fnames <- dir(dirpaths, pattern = "*.log", full.names = TRUE)
|
fnames <- dir(dirpaths, pattern = "*.log", full.names = TRUE)
|
||||||
|
|
||||||
|
cat(paste0("\n########## Reading ", length(fnames), " log files... ##########"), "\n\n")
|
||||||
suppressWarnings(
|
suppressWarnings(
|
||||||
logs <- lapply(fnames, readLines)
|
logs <- pbapply::pblapply(fnames, readLines)
|
||||||
)
|
)
|
||||||
nlog <- sapply(logs, length)
|
nlog <- sapply(logs, length)
|
||||||
dat <- data.frame(fileId = rep(leftpad_fnames(fnames, dirpaths), nlog),
|
dat <- data.frame(fileId = rep(leftpad_fnames(fnames), nlog),
|
||||||
logs = unlist(logs))
|
logs = unlist(logs))
|
||||||
|
|
||||||
# Remove corrupt lines
|
# Remove corrupt lines
|
||||||
@ -75,40 +76,50 @@ parse_logfiles <- function(folders, path, file = "rawdata_logfiles.csv",
|
|||||||
}
|
}
|
||||||
|
|
||||||
# Extract relevant infos
|
# Extract relevant infos
|
||||||
date <- sapply(dat$logs, gsub,
|
cat("\n########## Parsing individual data parts... ##########", "\n")
|
||||||
|
cat("\n Extract dates...", "\n\n")
|
||||||
|
date <- pbapply::pbsapply(dat$logs, gsub,
|
||||||
pattern = "^\\[(.*)\\], \\[.*$",
|
pattern = "^\\[(.*)\\], \\[.*$",
|
||||||
replacement = "\\1",
|
replacement = "\\1",
|
||||||
USE.NAMES = FALSE)
|
USE.NAMES = FALSE)
|
||||||
|
|
||||||
timestamp <- sapply(dat$logs, gsub,
|
cat("\n Extract timestamps...", "\n\n")
|
||||||
|
timestamp <- pbapply::pbsapply(dat$logs, gsub,
|
||||||
pattern = "^\\[.*\\], \\[(.*)\\].*$",
|
pattern = "^\\[.*\\], \\[(.*)\\].*$",
|
||||||
replacement = "\\1",
|
replacement = "\\1",
|
||||||
USE.NAMES = FALSE)
|
USE.NAMES = FALSE)
|
||||||
|
|
||||||
action <- sapply(dat$logs, gsub,
|
cat("\n Extract events...", "\n\n")
|
||||||
|
action <- pbapply::pbsapply(dat$logs, gsub,
|
||||||
pattern = "^.*EyeVisit, (.*):*.*$",
|
pattern = "^.*EyeVisit, (.*):*.*$",
|
||||||
replacement = "\\1",
|
replacement = "\\1",
|
||||||
USE.NAMES = FALSE)
|
USE.NAMES = FALSE)
|
||||||
|
|
||||||
events <- sapply(strsplit(action, ":"), function(x) x[1])
|
cat("\n Parse separate events...", "\n\n")
|
||||||
|
events <- pbapply::pbsapply(strsplit(action, ":"), function(x) x[1])
|
||||||
|
|
||||||
topics <- sapply(strsplit(action, ":"), function(x) x[2])
|
cat("\n Extract topics...", "\n\n")
|
||||||
|
topics <- pbapply::pbsapply(strsplit(action, ":"), function(x) x[2])
|
||||||
|
|
||||||
|
cat("\n Extract move information...", "\n\n")
|
||||||
suppressWarnings(
|
suppressWarnings(
|
||||||
moves <- apply(do.call(rbind,
|
moves <- pbapply::pbapply(do.call(rbind,
|
||||||
strsplit(sapply(strsplit(action, ":"),
|
strsplit(sapply(strsplit(action, ":"),
|
||||||
function(x) x[3]), ",")), 2,
|
function(x) x[3]), ",")), 2,
|
||||||
as.numeric)
|
as.numeric)
|
||||||
)
|
)
|
||||||
# ATTENTION: as.numeric() forces NAs for "OpenCard" and "CloseCard"
|
# ATTENTION: as.numeric() forces NAs for "OpenCard" and "CloseCard"
|
||||||
|
|
||||||
card_action <- trimws(sapply(strsplit(action, ":"),
|
cat("\n Extract popups...", "\n\n")
|
||||||
|
card_action <- trimws(pbapply::pbsapply(strsplit(action, ":"),
|
||||||
function(x) x[3])[grep("Artwork", events)])
|
function(x) x[3])[grep("Artwork", events)])
|
||||||
|
|
||||||
card <- as.numeric(sapply(strsplit(action, ":"), function(x) x[4]))
|
card <- as.numeric(pbapply::pbsapply(strsplit(action, ":"),
|
||||||
|
function(x) x[4]))
|
||||||
|
|
||||||
events[grep("Artwork", events)] <- paste("Artwork", card_action, sep = "/")
|
events[grep("Artwork", events)] <- paste("Artwork", card_action, sep = "/")
|
||||||
|
|
||||||
|
cat("\n Transform timestamps to ms...", "\n\n")
|
||||||
ts_elements <- strsplit(timestamp, ":")
|
ts_elements <- strsplit(timestamp, ":")
|
||||||
time_ms <- as.numeric(sapply(ts_elements, function(x) x[4])) +
|
time_ms <- as.numeric(sapply(ts_elements, function(x) x[4])) +
|
||||||
as.numeric(sapply(ts_elements, function(x) x[3])) * 1000 +
|
as.numeric(sapply(ts_elements, function(x) x[3])) * 1000 +
|
||||||
@ -130,8 +141,9 @@ parse_logfiles <- function(folders, path, file = "rawdata_logfiles.csv",
|
|||||||
|
|
||||||
# Export data
|
# Export data
|
||||||
if (save) {
|
if (save) {
|
||||||
|
cat("Saving data...", "\n\n")
|
||||||
utils::write.table(dat, file = file, sep = ";", row.names = FALSE)
|
utils::write.table(dat, file = file, sep = ";", row.names = FALSE)
|
||||||
cat(paste0("INFORMATION: Data file", file, "has been written to ", getwd(), "\n"))
|
cat(paste0("INFORMATION: Data file ", file, " has been written to ", getwd(), "\n\n"))
|
||||||
} else {
|
} else {
|
||||||
return(dat)
|
return(dat)
|
||||||
}
|
}
|
||||||
|
Loading…
Reference in New Issue
Block a user