#' Creating data frame from raw log files. #' #' Creates a data frame or CSV file from raw log files from a #' Multi-Touch-Table at the IWM. #' #' @param folders A character vector of folder names that contain the raw #' log files from the Multi-Touch-Table at the IWM. #' @param path A path to the folder that contains the folders specified in #' first argument. Needs to end in a "/"! # TODO: How to catch this? #' @param file Name of the file where parsed log files should be saved. #' Default is "rawdata_logfiles.csv". #' @param save Logical. If data frame should be returned by the function or #' saved. Default is TRUE. #' @return A data frame or NULL. #' @export #' @examples #' # parse_logfiles("all", path = "../data/haum/haum_logs_2016-2023/") parse_logfiles <- function(folders, path, file = "rawdata_logfiles.csv", save = FALSE) { pbapply::pboptions(style = 3, char = "=") # TODO: This is not very intutitive fnames <- dir(paste0(path, folders), pattern = "*.log", full.names = TRUE) cat(paste0("\n########## Reading ", length(fnames), " log files... ##########"), "\n\n") suppressWarnings( logs <- pbapply::pblapply(fnames, readLines) ) nlog <- sapply(logs, length) dat <- data.frame(fileId = rep(leftpad_fnames(fnames), nlog), logs = unlist(logs)) dat$folder <- rep(sapply(strsplit(fnames, "/"), function(x) utils::tail(x, 2)[1]), nlog) # Remove corrupt lines d1 <- nrow(dat) dat <- subset(dat, dat$logs != "") d2 <- nrow(dat) if(d1 > d2) { warning(paste0(d1-d2, " corrupt lines have been found and removed from the data set\n")) } # Extract relevant infos cat("\n########## Parsing individual data parts... ##########", "\n") cat("\n Extract dates...", "\n\n") date <- pbapply::pbsapply(dat$logs, gsub, pattern = "^\\[(.*)\\], \\[.*$", replacement = "\\1", USE.NAMES = FALSE) cat("\n Extract timestamps...", "\n\n") timestamp <- pbapply::pbsapply(dat$logs, gsub, pattern = "^\\[.*\\], \\[(.*)\\].*$", replacement = "\\1", USE.NAMES = FALSE) cat("\n Extract events...", "\n\n") action <- pbapply::pbsapply(dat$logs, gsub, pattern = "^.*EyeVisit, (.*):*.*$", replacement = "\\1", USE.NAMES = FALSE) cat("\n Parse separate events...", "\n\n") events <- pbapply::pbsapply(strsplit(action, ":"), function(x) x[1]) cat("\n Extract topics...", "\n\n") topics <- pbapply::pbsapply(strsplit(action, ":"), function(x) x[2]) cat("\n Extract move information...", "\n\n") suppressWarnings( moves <- pbapply::pbapply(do.call(rbind, strsplit(sapply(strsplit(action, ":"), function(x) x[3]), ",")), 2, as.numeric) ) # ATTENTION: as.numeric() forces NAs for "OpenCard" and "CloseCard" cat("\n Extract popups...", "\n\n") card_action <- trimws(pbapply::pbsapply(strsplit(action, ":"), function(x) x[3])[grep("Artwork", events)]) card <- as.numeric(pbapply::pbsapply(strsplit(action, ":"), function(x) x[4])) events[grep("Artwork", events)] <- paste("Artwork", card_action, sep = "/") cat("\n Transform timestamps to ms...", "\n\n") ts_elements <- strsplit(timestamp, ":") time_ms <- as.numeric(sapply(ts_elements, function(x) x[4])) + as.numeric(sapply(ts_elements, function(x) x[3])) * 1000 + as.numeric(sapply(ts_elements, function(x) x[2])) * 1000 * 60 dat$date <- lubridate::parse_date_time(date, "bdyHMSOp") dat$timeMs <- time_ms dat$event <- events dat$item <- trimws(sapply(strsplit(topics, "/"), function(x) x[1])) dat$popup <- sapply(strsplit(topics, "/"), function(x) x[2]) dat$topic <- card dat$x <- moves[,1] dat$y <- moves[,2] dat$scale <- moves[,3] dat$rotation <- moves[,4] dat$logs <- NULL dat <- dat[order(dat$fileId, dat$date, dat$timeMs), ] # Export data if (save) { cat("Saving data...", "\n\n") utils::write.table(dat, file = file, sep = ";", row.names = FALSE) cat(paste0("INFORMATION: Data file ", file, " has been written to ", getwd(), "\n\n")) } else { return(dat) } } #' Left padding file names of raw log files from Multi-Touch-Table at the #' IWM. #' #' File names need to be left padded since otherwise the sorting of the #' timestamps will be off and one will get negative durations later on #' since the wrong events get closed. #' #' @param fnames File name in the form of `yyyy_mm_dd-hh_mm_ss`, possible #' with missing zero left padding. #' @return Left padded file names. #' @examples #' # folders <- "all" #' # dirpaths <- paste0("../data/haum_logs_2016-2023/", folders) #' # fnames <- dir(dirpaths, pattern = "*.log", full.names = TRUE) #' # leftpad_fnames(fnames) leftpad_fnames <- function(fnames) { z <- sapply(fnames, function(x) utils::tail(strsplit(x, "/")[[1]], 1), USE.NAMES = FALSE) ys <- strsplit(z, "_") res <- NULL for (y in ys) { y2 <- unlist(strsplit(y[3], "-")) e1 <- y[1] e2 <- sprintf("%02d", as.numeric(y[2])) e3 <- sprintf("%02d", as.numeric(y2[1])) e4 <- sprintf("%02d", as.numeric(y2[2])) e5 <- sprintf("%02d", as.numeric(y[4])) e6 <- sprintf("%02d", as.numeric(gsub(".log", "", y[5]))) res <- c(res, paste0(e1, "_", e2, "_", e3, "-", e4, "_", e5, "_", e6, ".log")) } res }