Code now also works for 8o8m; adjusted handling of glossar folder, added folder variable, etc.

This commit is contained in:
Nora Wickelmaier 2023-09-22 16:00:57 +02:00
parent 7f6e967f7c
commit 85d3cfc2fa
9 changed files with 181 additions and 172 deletions

View File

@ -98,7 +98,9 @@ add_trace <- function(data, glossar_dict) {
subdata2 <- add_trace_artworks(subdata2) subdata2 <- add_trace_artworks(subdata2)
if ("glossar" %in% unique(subdata2$artwork)) {
subdata2 <- add_trace_glossar(subdata2, glossar_dict) subdata2 <- add_trace_glossar(subdata2, glossar_dict)
}
out <- rbind(subdata1, subdata2) out <- rbind(subdata1, subdata2)
out <- out[order(out$fileId, out$date, out$timeMs), ] out <- out[order(out$fileId, out$date, out$timeMs), ]

View File

@ -7,29 +7,30 @@ close_events <- function(data, event = c("move", "flipCard", "openTopic", "openP
switch(event, switch(event,
"move" = { "move" = {
actions <- c("Transform start", "Transform stop") actions <- c("Transform start", "Transform stop")
idvar <- c("fileId", "eventId", "artwork", "glossar") idvar <- c("fileId", "folder", "eventId", "artwork", "glossar")
drop <- c("popup", "topicNumber", "trace", "event") drop <- c("popup", "topicNumber", "trace", "event")
ncol <- 16 ncol <- 16
}, },
"flipCard" = { "flipCard" = {
actions <- c("Show Info", "Show Front") actions <- c("Show Info", "Show Front")
idvar <- c("fileId", "trace", "artwork", "glossar") idvar <- c("fileId", "folder", "trace", "artwork", "glossar")
drop <- c("popup", "topicNumber", "eventId", "event") drop <- c("popup", "topicNumber", "eventId", "event")
ncol <- 16 ncol <- 16
}, },
"openTopic" = { "openTopic" = {
actions <- c("Artwork/OpenCard", "Artwork/CloseCard") actions <- c("Artwork/OpenCard", "Artwork/CloseCard")
idvar <- c("fileId", "eventId", "trace", "glossar", "artwork", idvar <- c("fileId", "folder", "eventId", "trace", "glossar",
"topicNumber") "artwork", "topicNumber")
drop <- c("popup", "event") drop <- c("popup", "event")
ncol <- 18 ncol <- 18
}, },
"openPopup" = { "openPopup" = {
actions <- c("ShowPopup", "HidePopup") actions <- c("ShowPopup", "HidePopup")
idvar <- c("fileId", "eventId", "trace", "glossar", "artwork", "popup") idvar <- c("fileId", "folder", "eventId", "trace", "glossar",
"artwork", "popup")
drop <- c("topicNumber", "event") drop <- c("topicNumber", "event")
ncol <- 18 ncol <- 18
# TODO: Should topicNumber maybe also be filled in for "openPopup"? # TODO: Should topicNumber maybe also be filled in for "openPopup"?
@ -101,13 +102,13 @@ close_events <- function(data, event = c("move", "flipCard", "openTopic", "openP
data_wide <- move_wide data_wide <- move_wide
} }
out <- data_wide[, c("fileId", "event", "artwork", "trace", "glossar", out <- data_wide[, c("fileId", "folder", "event", "artwork", "trace",
"date.start", "date.stop", "timeMs.start", "glossar", "date.start", "date.stop",
"timeMs.stop", "duration", "topicNumber", "popup", "timeMs.start", "timeMs.stop", "duration",
"x.start", "y.start", "x.stop", "y.stop", "topicNumber", "popup", "x.start", "y.start",
"distance", "scale.start", "scale.stop", "x.stop", "y.stop", "distance", "scale.start",
"scaleSize", "rotation.start", "rotation.stop", "scale.stop", "scaleSize", "rotation.start",
"rotationDegree")] "rotation.stop", "rotationDegree")]
rownames(out) <- NULL rownames(out) <- NULL
out out
} }

View File

@ -5,12 +5,14 @@
#' #'
#' @param data Data frame of raw log files created with `parse_logfiles()`. #' @param data Data frame of raw log files created with `parse_logfiles()`.
#' See `?parse_logfiles` for more details. #' See `?parse_logfiles` for more details.
#' @param xmlfiles Vector of names of index files, often something like
#' `<artwork>.xml`.
#' @param xmlpath Path to folder where XML definitions of artworks live. #' @param xmlpath Path to folder where XML definitions of artworks live.
#' @return Data frame. #' @return Data frame.
#' @export #' @export
#' @examples #' @examples
#' # tbd #' # tbd
create_eventlogs <- function(data, xmlpath) { create_eventlogs <- function(data, xmlfiles, xmlpath) {
if (!lubridate::is.POSIXt(data$date)){ if (!lubridate::is.POSIXt(data$date)){
cat("########## Convertion variable `date` to POSIXct ##########", "\n") cat("########## Convertion variable `date` to POSIXct ##########", "\n")
@ -25,16 +27,21 @@ create_eventlogs <- function(data, xmlpath) {
# Create glossar dictionary ############################################## # Create glossar dictionary ##############################################
cat("\n########## Creating glossar dictionary ##########", "\n") cat("\n########## Creating glossar dictionary ##########", "\n")
artworks <- unique(stats::na.omit(dat$artwork)) artworks <- unique(stats::na.omit(dat$artwork))
if ("glossar" %in% artworks) {
artworks <- artworks[artworks != "glossar"] artworks <- artworks[artworks != "glossar"]
glossar_files <- unique(subset(dat, dat$artwork == "glossar")$popup) glossar_files <- unique(subset(dat, dat$artwork == "glossar")$popup)
glossar_dict <- create_glossardict(artworks, glossar_files, path = xmlpath) glossar_dict <- create_glossardict(artworks, glossar_files, xmlpath = xmlpath)
} else {
glossar_dict <- NULL
}
# Add trace variable ##################################################### # Add trace variable #####################################################
cat("\n########## Adding trace variable... ##########", "\n") cat("\n########## Adding trace variable... ##########", "\n")
dat1 <- add_trace(dat, glossar_dict) dat1 <- add_trace(dat, glossar_dict)
# Close events # Close events
cat("\n########## Closing events... ##########", "\n") cat("\n\n########## Closing events... ##########", "\n")
c1 <- close_events(dat1, "move") c1 <- close_events(dat1, "move")
cat("## --> move events closed.", "\n") cat("## --> move events closed.", "\n")
c2 <- close_events(dat1, "flipCard") c2 <- close_events(dat1, "flipCard")
@ -63,7 +70,7 @@ create_eventlogs <- function(data, xmlpath) {
# Add event ID ########################################################### # Add event ID ###########################################################
dat3$eventId <- seq_len(nrow(dat3)) dat3$eventId <- seq_len(nrow(dat3))
dat3 <- dat3[, c("fileId", "eventId", "case", dat3 <- dat3[, c("fileId", "folder", "eventId", "case",
"trace", "glossar", "event", "artwork", "trace", "glossar", "event", "artwork",
"date.start", "date.stop", "timeMs.start", "date.start", "date.stop", "timeMs.start",
"timeMs.stop", "duration", "topicNumber", "popup", "timeMs.stop", "duration", "topicNumber", "popup",
@ -78,13 +85,13 @@ create_eventlogs <- function(data, xmlpath) {
# Add topics: file names and topics ###################################### # Add topics: file names and topics ######################################
cat("\n########## Adding information about topics... ##########", "\n\n") cat("\n########## Adding information about topics... ##########", "\n\n")
artworks <- unique(dat4$artwork)
# remove artworks without XML information # remove artworks without XML information
artworks <- artworks[!artworks %in% c("504", "505")] #artworks <- artworks[!artworks %in% c("504", "505")]
topics <- extract_topics(artworks, pattern = paste0(artworks, ".xml"), # TODO: This is hardcoded! Remove it!
path = xmlpath) topics <- extract_topics(artworks, xmlfiles = xmlfiles, xmlpath = xmlpath)
dat5 <- add_topic(dat4, topics = topics) dat5 <- add_topic(dat4, topics = topics)
dat5 dat5
} }

View File

@ -1,13 +1,13 @@
create_glossardict <- function(artworks, glossar_files, path) { create_glossardict <- function(artworks, glossar_files, xmlpath) {
x <- NULL x <- NULL
for (glossar_file in glossar_files) { for (glossar_file in glossar_files) {
for (artwork in artworks) { for (artwork in artworks) {
fnames <- dir(pattern = paste0(artwork, "_"), path = paste0(path, artwork)) fnames <- dir(pattern = paste0(artwork, "_"), path = paste0(xmlpath, artwork))
for (fname in fnames) { for (fname in fnames) {
suppressWarnings( suppressWarnings(
lines <- readLines(paste0(path, artwork, "/", fname)) lines <- readLines(paste0(xmlpath, artwork, "/", fname))
) )
if (any(grepl(glossar_file, lines))) { if (any(grepl(glossar_file, lines))) {
x <- rbind(x, data.frame(glossar_file, artwork)) x <- rbind(x, data.frame(glossar_file, artwork))

View File

@ -93,50 +93,37 @@ add_trace_subdata <- function(subdata, max_trace) {
# Create data frame with file names and topics for each artwork # Create data frame with file names and topics for each artwork
extract_topics <- function(artworks, pattern, path) { extract_topics <- function(artworks, xmlfiles, xmlpath) {
dat <- NULL out <- NULL
file_order <- NULL
i <- 1 i <- 1
for (artwork in artworks) { for (artwork in artworks) {
if (length(pattern) == 1) { index_file <- paste0(xmlpath, artwork, "/", xmlfiles[i])
index_file <- pattern suppressWarnings(
} else { fnames <- gsub("^<card src=.*/(.*)./>$", "\\1",
index_file <- pattern[i] grep("^<card src=", trimws(readLines(index_file)),
} value = TRUE))
)
fnames <- dir(pattern = paste0(artwork, "_"),
path = paste(path, artwork, sep = "/"))
topic <- NULL topic <- NULL
for (fname in fnames) { for (fname in fnames) {
suppressWarnings( suppressWarnings(
topic <- c(topic, gsub("^<card type=.(.*).>$", "\\1", topic <- c(topic, gsub("^<card type=.(.*).>$", "\\1",
grep("^<card type=", grep("^<card type=",
trimws(readLines(paste(path, artwork, fname, sep = "/"))), trimws(readLines(paste(xmlpath, artwork, fname, sep = "/"))),
value = T))) value = T)))
) )
} }
index <- paste(path, artwork, index_file, sep = "/") out <- rbind(out, data.frame(artwork, file_name = fnames, topic))
suppressWarnings(
file_order <- c(file_order, gsub("^<card src=.*/(.*)./>$", "\\1",
grep("^<card src=", trimws(readLines(index)),
value = TRUE)))
)
in_index <- fnames %in% file_order
dat <- rbind(dat, data.frame(artwork, file_name = fnames, in_index, topic))
i <- i + 1 i <- i + 1
} }
# take only the ones that are actually displayed and sort in the same order out <- out[order(out$artwork), ]
# as indicated in index.html
out <- dat[dat$in_index, -3]
out <- out[order(file_order, out$file_name), ]
rownames(out) <- NULL rownames(out) <- NULL
out$index <- unlist(sapply(table(out$artwork), seq_len)) out$index <- unlist(lapply(table(out$artwork), seq_len))
out out
} }
@ -188,8 +175,7 @@ add_topic <- function(data, topics) {
# Create data frame with information on artworks # Create data frame with information on artworks
extract_artworks <- function(artworks, files = paste0(artworks, ".xml"), extract_artworks <- function(artworks, files, xmlpath) {
path = path) {
out <- NULL out <- NULL
i <- 1 i <- 1
@ -201,7 +187,7 @@ extract_artworks <- function(artworks, files = paste0(artworks, ".xml"),
index_file <- files[i] index_file <- files[i]
} }
index <- paste(path, artwork, index_file, sep = "/") index <- paste(xmlpath, artwork, index_file, sep = "/")
varnames <- c("artist", "title", "misc", "description") varnames <- c("artist", "title", "misc", "description")
xmllist <- XML::xmlToList(index)$header[varnames] xmllist <- XML::xmlToList(index)$header[varnames]

View File

@ -1,41 +1,3 @@
#' Left padding file names of raw log files from Multi-Touch-Table at the
#' IWM.
#'
#' File names need to be left padded since otherwise the sorting of the
#' timestamps will be off and one will get negative durations later on
#' since the wrong events get closed.
#'
#' @param fnames File name in the form of `yyyy_mm_dd-hh_mm_ss`, possible
#' with missing zero left padding.
#' @return Left padded file names.
#' @examples
#' # folders <- "all"
#' # dirpaths <- paste0("../data/haum_logs_2016-2023/", folders)
#' # fnames <- dir(dirpaths, pattern = "*.log", full.names = TRUE)
#' # leftpad_fnames(fnames)
leftpad_fnames <- function(fnames) {
z <- sapply(fnames, function(x) utils::tail(strsplit(x, "/")[[1]], 1),
USE.NAMES = FALSE)
ys <- strsplit(z, "_")
res <- NULL
for (y in ys) {
y2 <- unlist(strsplit(y[3], "-"))
e1 <- y[1]
e2 <- sprintf("%02d", as.numeric(y[2]))
e3 <- sprintf("%02d", as.numeric(y2[1]))
e4 <- sprintf("%02d", as.numeric(y2[2]))
e5 <- sprintf("%02d", as.numeric(y[4]))
e6 <- sprintf("%02d", as.numeric(gsub(".log", "", y[5])))
res <- c(res,
paste0(e1, "_", e2, "_", e3, "-", e4, "_", e5, "_", e6, ".log"))
}
res
}
#' Creating data frame from raw log files. #' Creating data frame from raw log files.
#' #'
#' Creates a data frame or CSV file from raw log files from a #' Creates a data frame or CSV file from raw log files from a
@ -55,10 +17,12 @@ leftpad_fnames <- function(fnames) {
#' @examples #' @examples
#' # parse_logfiles("all", path = "../data/haum/haum_logs_2016-2023/") #' # parse_logfiles("all", path = "../data/haum/haum_logs_2016-2023/")
parse_logfiles <- function(folders, path, file = "rawdata_logfiles.csv", parse_logfiles <- function(folders, path, file = "rawdata_logfiles.csv",
save = TRUE) { save = FALSE) {
dirpaths <- paste0(path, folders)
pbapply::pboptions(style = 3, char = "=")
# TODO: This is not very intutitive # TODO: This is not very intutitive
fnames <- dir(dirpaths, pattern = "*.log", full.names = TRUE) fnames <- dir(paste0(path, folders), pattern = "*.log", full.names = TRUE)
cat(paste0("\n########## Reading ", length(fnames), " log files... ##########"), "\n\n") cat(paste0("\n########## Reading ", length(fnames), " log files... ##########"), "\n\n")
suppressWarnings( suppressWarnings(
@ -67,6 +31,8 @@ parse_logfiles <- function(folders, path, file = "rawdata_logfiles.csv",
nlog <- sapply(logs, length) nlog <- sapply(logs, length)
dat <- data.frame(fileId = rep(leftpad_fnames(fnames), nlog), dat <- data.frame(fileId = rep(leftpad_fnames(fnames), nlog),
logs = unlist(logs)) logs = unlist(logs))
dat$folder <- rep(sapply(strsplit(fnames, "/"),
function(x) utils::tail(x, 2)[1]), nlog)
# Remove corrupt lines # Remove corrupt lines
d1 <- nrow(dat) d1 <- nrow(dat)
@ -151,3 +117,41 @@ parse_logfiles <- function(folders, path, file = "rawdata_logfiles.csv",
} }
} }
#' Left padding file names of raw log files from Multi-Touch-Table at the
#' IWM.
#'
#' File names need to be left padded since otherwise the sorting of the
#' timestamps will be off and one will get negative durations later on
#' since the wrong events get closed.
#'
#' @param fnames File name in the form of `yyyy_mm_dd-hh_mm_ss`, possible
#' with missing zero left padding.
#' @return Left padded file names.
#' @examples
#' # folders <- "all"
#' # dirpaths <- paste0("../data/haum_logs_2016-2023/", folders)
#' # fnames <- dir(dirpaths, pattern = "*.log", full.names = TRUE)
#' # leftpad_fnames(fnames)
leftpad_fnames <- function(fnames) {
z <- sapply(fnames, function(x) utils::tail(strsplit(x, "/")[[1]], 1),
USE.NAMES = FALSE)
ys <- strsplit(z, "_")
res <- NULL
for (y in ys) {
y2 <- unlist(strsplit(y[3], "-"))
e1 <- y[1]
e2 <- sprintf("%02d", as.numeric(y[2]))
e3 <- sprintf("%02d", as.numeric(y2[1]))
e4 <- sprintf("%02d", as.numeric(y2[2]))
e5 <- sprintf("%02d", as.numeric(y[4]))
e6 <- sprintf("%02d", as.numeric(gsub(".log", "", y[5])))
res <- c(res,
paste0(e1, "_", e2, "_", e3, "-", e4, "_", e5, "_", e6, ".log"))
}
res
}

View File

@ -4,12 +4,15 @@
\alias{create_eventlogs} \alias{create_eventlogs}
\title{Creating log events from raw log files.} \title{Creating log events from raw log files.}
\usage{ \usage{
create_eventlogs(data, xmlpath) create_eventlogs(data, xmlfiles, xmlpath)
} }
\arguments{ \arguments{
\item{data}{Data frame of raw log files created with \code{parse_logfiles()}. \item{data}{Data frame of raw log files created with \code{parse_logfiles()}.
See \code{?parse_logfiles} for more details.} See \code{?parse_logfiles} for more details.}
\item{xmlfiles}{Vector of names of index files, often something like
\verb{<artwork>.xml}.}
\item{xmlpath}{Path to folder where XML definitions of artworks live.} \item{xmlpath}{Path to folder where XML definitions of artworks live.}
} }
\value{ \value{

View File

@ -4,7 +4,7 @@
\alias{parse_logfiles} \alias{parse_logfiles}
\title{Creating data frame from raw log files.} \title{Creating data frame from raw log files.}
\usage{ \usage{
parse_logfiles(folders, path, file = "rawdata_logfiles.csv", save = TRUE) parse_logfiles(folders, path, file = "rawdata_logfiles.csv", save = FALSE)
} }
\arguments{ \arguments{
\item{folders}{A character vector of folder names that contain the raw \item{folders}{A character vector of folder names that contain the raw

6
tests/tests.R Normal file
View File

@ -0,0 +1,6 @@
# Check if traces start with `move` or `flipCard`