From 85d3cfc2fa93485f7a346a258d622054c442ae27 Mon Sep 17 00:00:00 2001 From: nwickel Date: Fri, 22 Sep 2023 16:00:57 +0200 Subject: [PATCH] Code now also works for 8o8m; adjusted handling of glossar folder, added folder variable, etc. --- R/add_trace.R | 4 +- R/close_events.R | 25 +++-- R/create_eventlogs.R | 27 +++-- R/create_glossardict.R | 6 +- R/helper.R | 42 +++---- R/parse_logfiles.R | 236 ++++++++++++++++++++-------------------- man/create_eventlogs.Rd | 5 +- man/parse_logfiles.Rd | 2 +- tests/tests.R | 6 + 9 files changed, 181 insertions(+), 172 deletions(-) create mode 100644 tests/tests.R diff --git a/R/add_trace.R b/R/add_trace.R index 43a84b8..4425373 100644 --- a/R/add_trace.R +++ b/R/add_trace.R @@ -98,7 +98,9 @@ add_trace <- function(data, glossar_dict) { subdata2 <- add_trace_artworks(subdata2) - subdata2 <- add_trace_glossar(subdata2, glossar_dict) + if ("glossar" %in% unique(subdata2$artwork)) { + subdata2 <- add_trace_glossar(subdata2, glossar_dict) + } out <- rbind(subdata1, subdata2) out <- out[order(out$fileId, out$date, out$timeMs), ] diff --git a/R/close_events.R b/R/close_events.R index e97f11c..38b4cec 100644 --- a/R/close_events.R +++ b/R/close_events.R @@ -7,29 +7,30 @@ close_events <- function(data, event = c("move", "flipCard", "openTopic", "openP switch(event, "move" = { actions <- c("Transform start", "Transform stop") - idvar <- c("fileId", "eventId", "artwork", "glossar") + idvar <- c("fileId", "folder", "eventId", "artwork", "glossar") drop <- c("popup", "topicNumber", "trace", "event") ncol <- 16 }, "flipCard" = { actions <- c("Show Info", "Show Front") - idvar <- c("fileId", "trace", "artwork", "glossar") + idvar <- c("fileId", "folder", "trace", "artwork", "glossar") drop <- c("popup", "topicNumber", "eventId", "event") ncol <- 16 }, "openTopic" = { actions <- c("Artwork/OpenCard", "Artwork/CloseCard") - idvar <- c("fileId", "eventId", "trace", "glossar", "artwork", - "topicNumber") + idvar <- c("fileId", "folder", "eventId", "trace", "glossar", + "artwork", "topicNumber") drop <- c("popup", "event") ncol <- 18 }, "openPopup" = { actions <- c("ShowPopup", "HidePopup") - idvar <- c("fileId", "eventId", "trace", "glossar", "artwork", "popup") + idvar <- c("fileId", "folder", "eventId", "trace", "glossar", + "artwork", "popup") drop <- c("topicNumber", "event") ncol <- 18 # TODO: Should topicNumber maybe also be filled in for "openPopup"? @@ -101,13 +102,13 @@ close_events <- function(data, event = c("move", "flipCard", "openTopic", "openP data_wide <- move_wide } - out <- data_wide[, c("fileId", "event", "artwork", "trace", "glossar", - "date.start", "date.stop", "timeMs.start", - "timeMs.stop", "duration", "topicNumber", "popup", - "x.start", "y.start", "x.stop", "y.stop", - "distance", "scale.start", "scale.stop", - "scaleSize", "rotation.start", "rotation.stop", - "rotationDegree")] + out <- data_wide[, c("fileId", "folder", "event", "artwork", "trace", + "glossar", "date.start", "date.stop", + "timeMs.start", "timeMs.stop", "duration", + "topicNumber", "popup", "x.start", "y.start", + "x.stop", "y.stop", "distance", "scale.start", + "scale.stop", "scaleSize", "rotation.start", + "rotation.stop", "rotationDegree")] rownames(out) <- NULL out } diff --git a/R/create_eventlogs.R b/R/create_eventlogs.R index c0d570b..72c7578 100644 --- a/R/create_eventlogs.R +++ b/R/create_eventlogs.R @@ -5,12 +5,14 @@ #' #' @param data Data frame of raw log files created with `parse_logfiles()`. #' See `?parse_logfiles` for more details. +#' @param xmlfiles Vector of names of index files, often something like +#' `.xml`. #' @param xmlpath Path to folder where XML definitions of artworks live. #' @return Data frame. #' @export #' @examples #' # tbd -create_eventlogs <- function(data, xmlpath) { +create_eventlogs <- function(data, xmlfiles, xmlpath) { if (!lubridate::is.POSIXt(data$date)){ cat("########## Convertion variable `date` to POSIXct ##########", "\n") @@ -25,16 +27,21 @@ create_eventlogs <- function(data, xmlpath) { # Create glossar dictionary ############################################## cat("\n########## Creating glossar dictionary ##########", "\n") artworks <- unique(stats::na.omit(dat$artwork)) - artworks <- artworks[artworks != "glossar"] - glossar_files <- unique(subset(dat, dat$artwork == "glossar")$popup) - glossar_dict <- create_glossardict(artworks, glossar_files, path = xmlpath) + + if ("glossar" %in% artworks) { + artworks <- artworks[artworks != "glossar"] + glossar_files <- unique(subset(dat, dat$artwork == "glossar")$popup) + glossar_dict <- create_glossardict(artworks, glossar_files, xmlpath = xmlpath) + } else { + glossar_dict <- NULL + } # Add trace variable ##################################################### cat("\n########## Adding trace variable... ##########", "\n") dat1 <- add_trace(dat, glossar_dict) # Close events - cat("\n########## Closing events... ##########", "\n") + cat("\n\n########## Closing events... ##########", "\n") c1 <- close_events(dat1, "move") cat("## --> move events closed.", "\n") c2 <- close_events(dat1, "flipCard") @@ -63,7 +70,7 @@ create_eventlogs <- function(data, xmlpath) { # Add event ID ########################################################### dat3$eventId <- seq_len(nrow(dat3)) - dat3 <- dat3[, c("fileId", "eventId", "case", + dat3 <- dat3[, c("fileId", "folder", "eventId", "case", "trace", "glossar", "event", "artwork", "date.start", "date.stop", "timeMs.start", "timeMs.stop", "duration", "topicNumber", "popup", @@ -78,13 +85,13 @@ create_eventlogs <- function(data, xmlpath) { # Add topics: file names and topics ###################################### cat("\n########## Adding information about topics... ##########", "\n\n") - artworks <- unique(dat4$artwork) # remove artworks without XML information - artworks <- artworks[!artworks %in% c("504", "505")] - topics <- extract_topics(artworks, pattern = paste0(artworks, ".xml"), - path = xmlpath) + #artworks <- artworks[!artworks %in% c("504", "505")] + # TODO: This is hardcoded! Remove it! + topics <- extract_topics(artworks, xmlfiles = xmlfiles, xmlpath = xmlpath) dat5 <- add_topic(dat4, topics = topics) dat5 } + diff --git a/R/create_glossardict.R b/R/create_glossardict.R index e3bec27..e6f2383 100644 --- a/R/create_glossardict.R +++ b/R/create_glossardict.R @@ -1,13 +1,13 @@ -create_glossardict <- function(artworks, glossar_files, path) { +create_glossardict <- function(artworks, glossar_files, xmlpath) { x <- NULL for (glossar_file in glossar_files) { for (artwork in artworks) { - fnames <- dir(pattern = paste0(artwork, "_"), path = paste0(path, artwork)) + fnames <- dir(pattern = paste0(artwork, "_"), path = paste0(xmlpath, artwork)) for (fname in fnames) { suppressWarnings( - lines <- readLines(paste0(path, artwork, "/", fname)) + lines <- readLines(paste0(xmlpath, artwork, "/", fname)) ) if (any(grepl(glossar_file, lines))) { x <- rbind(x, data.frame(glossar_file, artwork)) diff --git a/R/helper.R b/R/helper.R index a436697..999dfff 100644 --- a/R/helper.R +++ b/R/helper.R @@ -93,50 +93,37 @@ add_trace_subdata <- function(subdata, max_trace) { # Create data frame with file names and topics for each artwork -extract_topics <- function(artworks, pattern, path) { +extract_topics <- function(artworks, xmlfiles, xmlpath) { - dat <- NULL - file_order <- NULL + out <- NULL i <- 1 for (artwork in artworks) { - if (length(pattern) == 1) { - index_file <- pattern - } else { - index_file <- pattern[i] - } + index_file <- paste0(xmlpath, artwork, "/", xmlfiles[i]) + suppressWarnings( + fnames <- gsub("^$", "\\1", + grep("^$", "\\1", grep("^$", "\\1", - grep("^ d2) { + warning(paste0(d1-d2, " corrupt lines have been found and removed from the data set\n")) + } + + # Extract relevant infos + cat("\n########## Parsing individual data parts... ##########", "\n") + cat("\n Extract dates...", "\n\n") + date <- pbapply::pbsapply(dat$logs, gsub, + pattern = "^\\[(.*)\\], \\[.*$", + replacement = "\\1", + USE.NAMES = FALSE) + + cat("\n Extract timestamps...", "\n\n") + timestamp <- pbapply::pbsapply(dat$logs, gsub, + pattern = "^\\[.*\\], \\[(.*)\\].*$", + replacement = "\\1", + USE.NAMES = FALSE) + + cat("\n Extract events...", "\n\n") + action <- pbapply::pbsapply(dat$logs, gsub, + pattern = "^.*EyeVisit, (.*):*.*$", + replacement = "\\1", + USE.NAMES = FALSE) + + cat("\n Parse separate events...", "\n\n") + events <- pbapply::pbsapply(strsplit(action, ":"), function(x) x[1]) + + cat("\n Extract topics...", "\n\n") + topics <- pbapply::pbsapply(strsplit(action, ":"), function(x) x[2]) + + cat("\n Extract move information...", "\n\n") + suppressWarnings( + moves <- pbapply::pbapply(do.call(rbind, + strsplit(sapply(strsplit(action, ":"), + function(x) x[3]), ",")), 2, + as.numeric) + ) + # ATTENTION: as.numeric() forces NAs for "OpenCard" and "CloseCard" + + cat("\n Extract popups...", "\n\n") + card_action <- trimws(pbapply::pbsapply(strsplit(action, ":"), + function(x) x[3])[grep("Artwork", events)]) + + card <- as.numeric(pbapply::pbsapply(strsplit(action, ":"), + function(x) x[4])) + + events[grep("Artwork", events)] <- paste("Artwork", card_action, sep = "/") + + cat("\n Transform timestamps to ms...", "\n\n") + ts_elements <- strsplit(timestamp, ":") + time_ms <- as.numeric(sapply(ts_elements, function(x) x[4])) + + as.numeric(sapply(ts_elements, function(x) x[3])) * 1000 + + as.numeric(sapply(ts_elements, function(x) x[2])) * 1000 * 60 + + dat$date <- lubridate::parse_date_time(date, "bdyHMSOp") + dat$timeMs <- time_ms + dat$event <- events + dat$artwork <- trimws(sapply(strsplit(topics, "/"), function(x) x[1])) + dat$popup <- sapply(strsplit(topics, "/"), function(x) x[2]) + dat$topicNumber <- card + dat$x <- moves[,1] + dat$y <- moves[,2] + dat$scale <- moves[,3] + dat$rotation <- moves[,4] + dat$logs <- NULL + + dat <- dat[order(dat$fileId, dat$date, dat$timeMs), ] + + # Export data + if (save) { + cat("Saving data...", "\n\n") + utils::write.table(dat, file = file, sep = ";", row.names = FALSE) + cat(paste0("INFORMATION: Data file ", file, " has been written to ", getwd(), "\n\n")) + } else { + return(dat) + } +} + #' Left padding file names of raw log files from Multi-Touch-Table at the #' IWM. #' @@ -8,7 +127,7 @@ #' @param fnames File name in the form of `yyyy_mm_dd-hh_mm_ss`, possible #' with missing zero left padding. #' @return Left padded file names. -#' @examples +#' @examples #' # folders <- "all" #' # dirpaths <- paste0("../data/haum_logs_2016-2023/", folders) #' # fnames <- dir(dirpaths, pattern = "*.log", full.names = TRUE) @@ -36,118 +155,3 @@ leftpad_fnames <- function(fnames) { res } -#' Creating data frame from raw log files. -#' -#' Creates a data frame or CSV file from raw log files from a -#' Multi-Touch-Table at the IWM. -#' -#' @param folders A character vector of folder names that contain the raw -#' log files from the Multi-Touch-Table at the IWM. -#' @param path A path to the folder that contains the folders specified in -#' first argument. Needs to end in a "/"! -# TODO: How to catch this? -#' @param file Name of the file where parsed log files should be saved. -#' Default is "rawdata_logfiles.csv". -#' @param save Logical. If data frame should be returned by the function or -#' saved. Default is TRUE. -#' @return A data frame or NULL. -#' @export -#' @examples -#' # parse_logfiles("all", path = "../data/haum/haum_logs_2016-2023/") -parse_logfiles <- function(folders, path, file = "rawdata_logfiles.csv", - save = TRUE) { - dirpaths <- paste0(path, folders) - # TODO: This is not very intutitive - fnames <- dir(dirpaths, pattern = "*.log", full.names = TRUE) - - cat(paste0("\n########## Reading ", length(fnames), " log files... ##########"), "\n\n") - suppressWarnings( - logs <- pbapply::pblapply(fnames, readLines) - ) - nlog <- sapply(logs, length) - dat <- data.frame(fileId = rep(leftpad_fnames(fnames), nlog), - logs = unlist(logs)) - - # Remove corrupt lines - d1 <- nrow(dat) - dat <- subset(dat, dat$logs != "") - d2 <- nrow(dat) - - if(d1 > d2) { - warning(paste0(d1-d2, " corrupt lines have been found and removed from the data set\n")) - } - - # Extract relevant infos - cat("\n########## Parsing individual data parts... ##########", "\n") - cat("\n Extract dates...", "\n\n") - date <- pbapply::pbsapply(dat$logs, gsub, - pattern = "^\\[(.*)\\], \\[.*$", - replacement = "\\1", - USE.NAMES = FALSE) - - cat("\n Extract timestamps...", "\n\n") - timestamp <- pbapply::pbsapply(dat$logs, gsub, - pattern = "^\\[.*\\], \\[(.*)\\].*$", - replacement = "\\1", - USE.NAMES = FALSE) - - cat("\n Extract events...", "\n\n") - action <- pbapply::pbsapply(dat$logs, gsub, - pattern = "^.*EyeVisit, (.*):*.*$", - replacement = "\\1", - USE.NAMES = FALSE) - - cat("\n Parse separate events...", "\n\n") - events <- pbapply::pbsapply(strsplit(action, ":"), function(x) x[1]) - - cat("\n Extract topics...", "\n\n") - topics <- pbapply::pbsapply(strsplit(action, ":"), function(x) x[2]) - - cat("\n Extract move information...", "\n\n") - suppressWarnings( - moves <- pbapply::pbapply(do.call(rbind, - strsplit(sapply(strsplit(action, ":"), - function(x) x[3]), ",")), 2, - as.numeric) - ) - # ATTENTION: as.numeric() forces NAs for "OpenCard" and "CloseCard" - - cat("\n Extract popups...", "\n\n") - card_action <- trimws(pbapply::pbsapply(strsplit(action, ":"), - function(x) x[3])[grep("Artwork", events)]) - - card <- as.numeric(pbapply::pbsapply(strsplit(action, ":"), - function(x) x[4])) - - events[grep("Artwork", events)] <- paste("Artwork", card_action, sep = "/") - - cat("\n Transform timestamps to ms...", "\n\n") - ts_elements <- strsplit(timestamp, ":") - time_ms <- as.numeric(sapply(ts_elements, function(x) x[4])) + - as.numeric(sapply(ts_elements, function(x) x[3])) * 1000 + - as.numeric(sapply(ts_elements, function(x) x[2])) * 1000 * 60 - - dat$date <- lubridate::parse_date_time(date, "bdyHMSOp") - dat$timeMs <- time_ms - dat$event <- events - dat$artwork <- trimws(sapply(strsplit(topics, "/"), function(x) x[1])) - dat$popup <- sapply(strsplit(topics, "/"), function(x) x[2]) - dat$topicNumber <- card - dat$x <- moves[,1] - dat$y <- moves[,2] - dat$scale <- moves[,3] - dat$rotation <- moves[,4] - dat$logs <- NULL - - dat <- dat[order(dat$fileId, dat$date, dat$timeMs), ] - - # Export data - if (save) { - cat("Saving data...", "\n\n") - utils::write.table(dat, file = file, sep = ";", row.names = FALSE) - cat(paste0("INFORMATION: Data file ", file, " has been written to ", getwd(), "\n\n")) - } else { - return(dat) - } -} - diff --git a/man/create_eventlogs.Rd b/man/create_eventlogs.Rd index 7bd1f55..de35622 100644 --- a/man/create_eventlogs.Rd +++ b/man/create_eventlogs.Rd @@ -4,12 +4,15 @@ \alias{create_eventlogs} \title{Creating log events from raw log files.} \usage{ -create_eventlogs(data, xmlpath) +create_eventlogs(data, xmlfiles, xmlpath) } \arguments{ \item{data}{Data frame of raw log files created with \code{parse_logfiles()}. See \code{?parse_logfiles} for more details.} +\item{xmlfiles}{Vector of names of index files, often something like +\verb{.xml}.} + \item{xmlpath}{Path to folder where XML definitions of artworks live.} } \value{ diff --git a/man/parse_logfiles.Rd b/man/parse_logfiles.Rd index 4e735bb..6446ed1 100644 --- a/man/parse_logfiles.Rd +++ b/man/parse_logfiles.Rd @@ -4,7 +4,7 @@ \alias{parse_logfiles} \title{Creating data frame from raw log files.} \usage{ -parse_logfiles(folders, path, file = "rawdata_logfiles.csv", save = TRUE) +parse_logfiles(folders, path, file = "rawdata_logfiles.csv", save = FALSE) } \arguments{ \item{folders}{A character vector of folder names that contain the raw diff --git a/tests/tests.R b/tests/tests.R new file mode 100644 index 0000000..e35fd9b --- /dev/null +++ b/tests/tests.R @@ -0,0 +1,6 @@ +# Check if traces start with `move` or `flipCard` + + + + +