# setwd("C:/Users/nwickelmaier/Nextcloud/Documents/MDS/2023ss/60100_master_thesis/code") ###### HELPER ###### # Need to left pad file names. If I do not do this, the sorting of the # timestamps will be off and I get negative durations later on since the # wrong events get closed. leftpad_fnames <- function(x) { z <- gsub(paste0(dirpaths, "/"), "\\1", x) ys <- strsplit(z, "_") res <- NULL for (y in ys) { y2 <- unlist(strsplit(y[3], "-")) e1 <- y[1] e2 <- sprintf("%02d", as.numeric(y[2])) e3 <- sprintf("%02d", as.numeric(y2[1])) e4 <- sprintf("%02d", as.numeric(y2[2])) e5 <- sprintf("%02d", as.numeric(y[4])) e6 <- sprintf("%02d", as.numeric(gsub(".log", "", y[5]))) e6 <- sprintf("%02d", as.numeric(gsub(".log", "", y[5]))) res <- c(res, paste0(e1, "_", e2, "_", e3, "-", e4, "_", e5, "_", e6, ".log")) } res } ##### CONTENT ###### # Choose which folders with raw log files should be included folders <- "all" #folders <- "_2016b" dirpaths <- paste0("../data/haum_logs_2016-2023/", folders) fnames <- dir(dirpaths, pattern = "*.log", full.names = TRUE) length(fnames) head(fnames) logs <- lapply(fnames, readLines) nlog <- sapply(logs, length) dat <- data.frame(fileId = rep(leftpad_fnames(fnames), nlog), logs = unlist(logs)) head(dat$logs) # Remove corrupted lines # corrupt lines are "" and need to be removed d1 <- dim(dat)[1] dat <- subset(dat, dat$logs != "") d2 <- dim(dat)[1] # TODO: Catch this in a function and give back a meaningful warning # The files contain `r d1-d2` corrupt lines that were removed from the # data. # Extract relevant infos date <- sapply(dat$logs, gsub, pattern = "^\\[(.*)\\], \\[.*$", replacement = "\\1", USE.NAMES = FALSE) timestamp <- sapply(dat$logs, gsub, pattern = "^\\[.*\\], \\[(.*)\\].*$", replacement = "\\1", USE.NAMES = FALSE) action <- sapply(dat$logs, gsub, pattern = "^.*EyeVisit, (.*):*.*$", replacement = "\\1", USE.NAMES = FALSE) events <- sapply(strsplit(action, ":"), function(x) x[1]) topics <- sapply(strsplit(action, ":"), function(x) x[2]) moves <- apply(do.call(rbind, strsplit(sapply(strsplit(action, ":"), function(x) x[3]), ",")), 2, as.numeric) # ATTENTION: as.numeric() forces NAs for "OpenCard" and "CloseCard" card_action <- trimws(sapply(strsplit(action, ":"), function(x) x[3])[grep("Artwork", events)]) card <- as.numeric(sapply(strsplit(action, ":"), function(x) x[4])) events[grep("Artwork", events)] <- paste("Artwork", card_action, sep = "/") ts_elements <- strsplit(timestamp, ":") time_ms <- as.numeric(sapply(ts_elements, function(x) x[4])) + as.numeric(sapply(ts_elements, function(x) x[3])) * 1000 + as.numeric(sapply(ts_elements, function(x) x[2])) * 1000 * 60 dat$date <- lubridate::parse_date_time(date, "bdyHMSOp") dat$timeMs <- time_ms dat$event <- events dat$artwork <- trimws(sapply(strsplit(topics, "/"), function(x) x[1])) dat$popup <- sapply(strsplit(topics, "/"), function(x) x[2]) dat$topicNumber <- card dat$x <- moves[,1] dat$y <- moves[,2] dat$scale <- moves[,3] dat$rotation <- moves[,4] dat$logs <- NULL # remove original log files from data so file becomes smaller # sort by fileId, since reading in by file names does not make sense # because of missing left zero padding dat <- dat[order(dat$fileId, dat$date, dat$timeMs), ] # Export data write.table(dat, "../data/rawdata_logfiles.csv", sep = ";", quote = FALSE, row.names = FALSE)