158 lines
5.5 KiB
R
158 lines
5.5 KiB
R
#' Creating data frame from raw log files.
|
|
#'
|
|
#' Creates a data frame or CSV file from raw log files from a
|
|
#' Multi-Touch-Table at the IWM.
|
|
#'
|
|
#' @param folders A character vector of folder names that contain the raw
|
|
#' log files from the Multi-Touch-Table at the IWM.
|
|
#' @param path A path to the folder that contains the folders specified in
|
|
#' first argument. Needs to end in a "/"!
|
|
# TODO: How to catch this?
|
|
#' @param file Name of the file where parsed log files should be saved.
|
|
#' Default is "rawdata_logfiles.csv".
|
|
#' @param save Logical. If data frame should be returned by the function or
|
|
#' saved. Default is TRUE.
|
|
#' @return A data frame or NULL.
|
|
#' @export
|
|
#' @examples
|
|
#' # parse_logfiles("all", path = "../data/haum/haum_logs_2016-2023/")
|
|
parse_logfiles <- function(folders, path, file = "rawdata_logfiles.csv",
|
|
save = FALSE) {
|
|
|
|
pbapply::pboptions(style = 3, char = "=")
|
|
|
|
# TODO: This is not very intutitive
|
|
fnames <- dir(paste0(path, folders), pattern = "*.log", full.names = TRUE)
|
|
|
|
cat(paste0("\n########## Reading ", length(fnames), " log files... ##########"), "\n\n")
|
|
suppressWarnings(
|
|
logs <- pbapply::pblapply(fnames, readLines)
|
|
)
|
|
nlog <- sapply(logs, length)
|
|
dat <- data.frame(fileId = rep(leftpad_fnames(fnames), nlog),
|
|
logs = unlist(logs))
|
|
dat$folder <- rep(sapply(strsplit(fnames, "/"),
|
|
function(x) utils::tail(x, 2)[1]), nlog)
|
|
|
|
# Remove corrupt lines
|
|
d1 <- nrow(dat)
|
|
dat <- subset(dat, dat$logs != "")
|
|
d2 <- nrow(dat)
|
|
|
|
if(d1 > d2) {
|
|
warning(paste0(d1-d2, " corrupt lines have been found and removed from the data set\n"))
|
|
}
|
|
|
|
# Extract relevant infos
|
|
cat("\n########## Parsing individual data parts... ##########", "\n")
|
|
cat("\n Extract dates...", "\n\n")
|
|
date <- pbapply::pbsapply(dat$logs, gsub,
|
|
pattern = "^\\[(.*)\\], \\[.*$",
|
|
replacement = "\\1",
|
|
USE.NAMES = FALSE)
|
|
|
|
cat("\n Extract timestamps...", "\n\n")
|
|
timestamp <- pbapply::pbsapply(dat$logs, gsub,
|
|
pattern = "^\\[.*\\], \\[(.*)\\].*$",
|
|
replacement = "\\1",
|
|
USE.NAMES = FALSE)
|
|
|
|
cat("\n Extract events...", "\n\n")
|
|
action <- pbapply::pbsapply(dat$logs, gsub,
|
|
pattern = "^.*EyeVisit, (.*):*.*$",
|
|
replacement = "\\1",
|
|
USE.NAMES = FALSE)
|
|
|
|
cat("\n Parse separate events...", "\n\n")
|
|
events <- pbapply::pbsapply(strsplit(action, ":"), function(x) x[1])
|
|
|
|
cat("\n Extract topics...", "\n\n")
|
|
topics <- pbapply::pbsapply(strsplit(action, ":"), function(x) x[2])
|
|
|
|
cat("\n Extract move information...", "\n\n")
|
|
suppressWarnings(
|
|
moves <- pbapply::pbapply(do.call(rbind,
|
|
strsplit(sapply(strsplit(action, ":"),
|
|
function(x) x[3]), ",")), 2,
|
|
as.numeric)
|
|
)
|
|
# ATTENTION: as.numeric() forces NAs for "OpenCard" and "CloseCard"
|
|
|
|
cat("\n Extract popups...", "\n\n")
|
|
card_action <- trimws(pbapply::pbsapply(strsplit(action, ":"),
|
|
function(x) x[3])[grep("Artwork", events)])
|
|
|
|
card <- as.numeric(pbapply::pbsapply(strsplit(action, ":"),
|
|
function(x) x[4]))
|
|
|
|
events[grep("Artwork", events)] <- paste("Artwork", card_action, sep = "/")
|
|
|
|
cat("\n Transform timestamps to ms...", "\n\n")
|
|
ts_elements <- strsplit(timestamp, ":")
|
|
time_ms <- as.numeric(sapply(ts_elements, function(x) x[4])) +
|
|
as.numeric(sapply(ts_elements, function(x) x[3])) * 1000 +
|
|
as.numeric(sapply(ts_elements, function(x) x[2])) * 1000 * 60
|
|
|
|
dat$date <- lubridate::parse_date_time(date, "bdyHMSOp")
|
|
dat$timeMs <- time_ms
|
|
dat$event <- events
|
|
dat$item <- trimws(sapply(strsplit(topics, "/"), function(x) x[1]))
|
|
dat$popup <- sapply(strsplit(topics, "/"), function(x) x[2])
|
|
dat$topic <- card
|
|
dat$x <- moves[,1]
|
|
dat$y <- moves[,2]
|
|
dat$scale <- moves[,3]
|
|
dat$rotation <- moves[,4]
|
|
dat$logs <- NULL
|
|
|
|
dat <- dat[order(dat$fileId, dat$date, dat$timeMs), ]
|
|
|
|
# Export data
|
|
if (save) {
|
|
cat("Saving data...", "\n\n")
|
|
utils::write.table(dat, file = file, sep = ";", row.names = FALSE)
|
|
cat(paste0("INFORMATION: Data file ", file, " has been written to ", getwd(), "\n\n"))
|
|
} else {
|
|
return(dat)
|
|
}
|
|
}
|
|
|
|
#' Left padding file names of raw log files from Multi-Touch-Table at the
|
|
#' IWM.
|
|
#'
|
|
#' File names need to be left padded since otherwise the sorting of the
|
|
#' timestamps will be off and one will get negative durations later on
|
|
#' since the wrong events get closed.
|
|
#'
|
|
#' @param fnames File name in the form of `yyyy_mm_dd-hh_mm_ss`, possible
|
|
#' with missing zero left padding.
|
|
#' @return Left padded file names.
|
|
#' @examples
|
|
#' # folders <- "all"
|
|
#' # dirpaths <- paste0("../data/haum_logs_2016-2023/", folders)
|
|
#' # fnames <- dir(dirpaths, pattern = "*.log", full.names = TRUE)
|
|
#' # leftpad_fnames(fnames)
|
|
leftpad_fnames <- function(fnames) {
|
|
|
|
z <- sapply(fnames, function(x) utils::tail(strsplit(x, "/")[[1]], 1),
|
|
USE.NAMES = FALSE)
|
|
ys <- strsplit(z, "_")
|
|
|
|
res <- NULL
|
|
|
|
for (y in ys) {
|
|
y2 <- unlist(strsplit(y[3], "-"))
|
|
e1 <- y[1]
|
|
e2 <- sprintf("%02d", as.numeric(y[2]))
|
|
e3 <- sprintf("%02d", as.numeric(y2[1]))
|
|
e4 <- sprintf("%02d", as.numeric(y2[2]))
|
|
e5 <- sprintf("%02d", as.numeric(y[4]))
|
|
e6 <- sprintf("%02d", as.numeric(gsub(".log", "", y[5])))
|
|
|
|
res <- c(res,
|
|
paste0(e1, "_", e2, "_", e3, "-", e4, "_", e5, "_", e6, ".log"))
|
|
}
|
|
res
|
|
}
|
|
|