Created R package mtt
This commit is contained in:
commit
17e8e39cc3
12
DESCRIPTION
Normal file
12
DESCRIPTION
Normal file
@ -0,0 +1,12 @@
|
|||||||
|
Package: mtt
|
||||||
|
Title: Log files from a Multi-Touch-Table
|
||||||
|
Version: 0.0.0.9000
|
||||||
|
Authors@R:
|
||||||
|
person("Nora", "Wickelmaier", , "n.wickelmaier@iwm-tuebingen.de", role = c("aut", "cre"))
|
||||||
|
Imports: stats, utils, dplyr, pbapply, XML, lubridate
|
||||||
|
Description: Creating event logs from raw log files from a Multi-Touch-Table at the IWM
|
||||||
|
License: `use_mit_license()`, `use_gpl3_license()` or friends to pick a
|
||||||
|
license
|
||||||
|
Encoding: UTF-8
|
||||||
|
Roxygen: list(markdown = TRUE)
|
||||||
|
RoxygenNote: 7.2.3
|
4
NAMESPACE
Normal file
4
NAMESPACE
Normal file
@ -0,0 +1,4 @@
|
|||||||
|
# Generated by roxygen2: do not edit by hand
|
||||||
|
|
||||||
|
export(create_eventlogs)
|
||||||
|
export(parse_logfiles)
|
79
R/create_eventlogs.R
Normal file
79
R/create_eventlogs.R
Normal file
@ -0,0 +1,79 @@
|
|||||||
|
#' Creating log events from raw log files.
|
||||||
|
#'
|
||||||
|
#' Creating event logs from a data frame of raw log files from a
|
||||||
|
#' Multi-Touch-Table at the IWM.
|
||||||
|
#'
|
||||||
|
#' @param data Data frame of raw log files created with `parse_logfiles()`.
|
||||||
|
#' See `?parse_logfiles` for more details.
|
||||||
|
#' @return Data frame.
|
||||||
|
#' @export
|
||||||
|
#' @examples
|
||||||
|
#' # tbd
|
||||||
|
create_eventlogs <- function(data) {
|
||||||
|
|
||||||
|
data$date <- as.POSIXct(data$date)
|
||||||
|
data$glossar <- ifelse(data$artwork == "glossar", 1, 0)
|
||||||
|
|
||||||
|
# Remove irrelevant events
|
||||||
|
dat <- subset(data, !(data$event %in% c("Start Application",
|
||||||
|
"Show Application")))
|
||||||
|
|
||||||
|
# Add trace variable #####################################################
|
||||||
|
cat("########## Adding trace variable... ##########", "\n")
|
||||||
|
dat1 <- add_trace(dat)
|
||||||
|
|
||||||
|
# Close events
|
||||||
|
cat("########## Closing events... ##########", "\n")
|
||||||
|
c1 <- close_events(dat1, "move")
|
||||||
|
cat("## --> move events closed.", "\n")
|
||||||
|
c2 <- close_events(dat1, "flipCard")
|
||||||
|
cat("## --> flipCard events closed.", "\n")
|
||||||
|
c3 <- close_events(dat1, "openTopic")
|
||||||
|
cat("## --> openTopic events closed.", "\n")
|
||||||
|
c4 <- close_events(dat1, "openPopup")
|
||||||
|
cat("## --> openPopup events closed.", "\n")
|
||||||
|
dat2 <- rbind(c1, c2, c3, c4)
|
||||||
|
|
||||||
|
dat2 <- dat2[order(dat2$date.start, dat2$fileId.start), ]
|
||||||
|
|
||||||
|
# Remove all events that do not have a `date.start`
|
||||||
|
d1 <- nrow(dat2)
|
||||||
|
dat2 <- dat2[!is.na(dat2$date.start), ]
|
||||||
|
d2 <- nrow(dat2)
|
||||||
|
if(d1 > d2) {
|
||||||
|
warning(paste0(d1-d2, " lines that do not contain a start event have been removed. This can happen when events span over more than one log file.\n"))
|
||||||
|
}
|
||||||
|
|
||||||
|
rownames(dat2) <- NULL
|
||||||
|
|
||||||
|
# Add case variable ######################################################
|
||||||
|
cat("########## Adding case and eventId variables... ##########", "\n")
|
||||||
|
dat3 <- add_case(dat2)
|
||||||
|
|
||||||
|
# Add event ID ###########################################################
|
||||||
|
dat3$eventId <- seq_len(nrow(dat3))
|
||||||
|
dat3 <- dat3[, c("fileId.start", "fileId.stop", "eventId", "case",
|
||||||
|
"trace", "glossar", "event", "artwork",
|
||||||
|
"date.start", "date.stop", "timeMs.start",
|
||||||
|
"timeMs.stop", "duration", "topicNumber", "popup",
|
||||||
|
"x.start", "y.start", "x.stop", "y.stop",
|
||||||
|
"distance", "scale.start", "scale.stop",
|
||||||
|
"scaleSize", "rotation.start", "rotation.stop",
|
||||||
|
"rotationDegree")]
|
||||||
|
|
||||||
|
# Add trace for move events ##############################################
|
||||||
|
cat("\n########## Adding trace variable for move events... ##########", "\n")
|
||||||
|
dat4 <- add_trace_moves(dat3)
|
||||||
|
|
||||||
|
# Add topics: file names and topics ######################################
|
||||||
|
cat("########## Adding information about topics... ##########", "\n")
|
||||||
|
artworks <- unique(dat4$artwork)
|
||||||
|
# remove artworks without XML information
|
||||||
|
artworks <- artworks[!artworks %in% c("504", "505")]
|
||||||
|
topics <- extract_topics(artworks, pattern = paste0(artworks, ".xml"),
|
||||||
|
path = "../data/ContentEyevisit/eyevisit_cards_light/")
|
||||||
|
|
||||||
|
dat5 <- add_topic(dat4, topics = topics)
|
||||||
|
dat5
|
||||||
|
}
|
||||||
|
|
458
R/helper.R
Normal file
458
R/helper.R
Normal file
@ -0,0 +1,458 @@
|
|||||||
|
###########################################################################
|
||||||
|
|
||||||
|
# Add trace variable
|
||||||
|
|
||||||
|
add_trace <- function(data, glossar_dict = "../data/glossar_dict.RData") {
|
||||||
|
|
||||||
|
data$trace <- NA
|
||||||
|
subdata1 <- data[data$event %in% c("Transform start", "Transform stop"), ]
|
||||||
|
subdata2 <- data[!data$event %in% c("Transform start", "Transform stop"), ]
|
||||||
|
|
||||||
|
last_event <- subdata2$event[1]
|
||||||
|
artworks <- unique(subdata2$artwork)[unique(subdata2$artwork) != "glossar"]
|
||||||
|
n <- 1 # count artworks for progress
|
||||||
|
|
||||||
|
pb <- utils::txtProgressBar(min = 0, max = nrow(subdata2), initial = NA,
|
||||||
|
style = 3)
|
||||||
|
|
||||||
|
for (artwork in artworks) {
|
||||||
|
|
||||||
|
cat("\n\nAdding trace variable for artwork", artwork,
|
||||||
|
paste0("(", n, "/", length(artworks), ")"), "\n")
|
||||||
|
|
||||||
|
for (i in 1:nrow(subdata2)) {
|
||||||
|
|
||||||
|
if (last_event == "Show Info" & subdata2$artwork[i] == artwork) {
|
||||||
|
subdata2$trace[i] <- i
|
||||||
|
j <- i
|
||||||
|
|
||||||
|
} else if (last_event == "Show Front" & subdata2$artwork[i] == artwork) {
|
||||||
|
subdata2$trace[i] <- j
|
||||||
|
|
||||||
|
} else if (!(last_event %in% c("Show Info", "Show Front")) &
|
||||||
|
subdata2$artwork[i] == artwork) {
|
||||||
|
subdata2$trace[i] <- j
|
||||||
|
}
|
||||||
|
|
||||||
|
if (i <= nrow(subdata2)) {
|
||||||
|
last_event <- subdata2$event[i + 1]
|
||||||
|
}
|
||||||
|
utils::setTxtProgressBar(pb, i)
|
||||||
|
}
|
||||||
|
n <- n + 1
|
||||||
|
}
|
||||||
|
|
||||||
|
# Fix glossar entries (find corresponding artworks and fill in trace)
|
||||||
|
glossar_files <- unique(subdata2[subdata2$artwork == "glossar", "popup"])
|
||||||
|
|
||||||
|
# load lookup table for artworks and glossar files
|
||||||
|
load(glossar_dict)
|
||||||
|
lut <- glossar_dict[glossar_dict$glossar_file %in% glossar_files, ]
|
||||||
|
|
||||||
|
inside <- glossar_files[glossar_files %in%
|
||||||
|
lut[sapply(lut$artwork, length) == 1,
|
||||||
|
"glossar_file"]]
|
||||||
|
single_art <- unlist(lut[lut$glossar_file %in% inside, "artwork"])
|
||||||
|
|
||||||
|
m <- 1
|
||||||
|
|
||||||
|
for (file in lut$glossar_file) {
|
||||||
|
|
||||||
|
cat("\n\nAdding trace variable for glossar entry", file,
|
||||||
|
paste0("(", m, "/", length(lut$glossar_file), ")"), "\n")
|
||||||
|
|
||||||
|
artwork_list <- unlist(lut[lut$glossar_file == file, "artwork"])
|
||||||
|
|
||||||
|
for (i in seq_len(nrow(subdata2))) {
|
||||||
|
|
||||||
|
if (subdata2$event[i] == "Show Info" |
|
||||||
|
(subdata2$event[i] == "Artwork/OpenCard" &
|
||||||
|
subdata2$artwork[i] %in% single_art)) {
|
||||||
|
|
||||||
|
current_artwork <- subdata2[i, "artwork"]
|
||||||
|
j <- i
|
||||||
|
k <- i
|
||||||
|
|
||||||
|
} else {
|
||||||
|
|
||||||
|
current_artwork <- current_artwork
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
if (subdata2$event[i] == "Show Front" & subdata2$artwork[i] == current_artwork) {
|
||||||
|
# make sure artwork has not been closed, yet!
|
||||||
|
k <- i
|
||||||
|
}
|
||||||
|
|
||||||
|
if (subdata2$artwork[i] == "glossar" &
|
||||||
|
(current_artwork %in% artwork_list) &
|
||||||
|
subdata2$popup[i] == file & (j - k == 0)) {
|
||||||
|
|
||||||
|
subdata2[i, "trace"] <- subdata2[j, "trace"]
|
||||||
|
subdata2[i, "artwork"] <- current_artwork
|
||||||
|
|
||||||
|
}
|
||||||
|
utils::setTxtProgressBar(pb, i)
|
||||||
|
}
|
||||||
|
m <- m + 1
|
||||||
|
}
|
||||||
|
|
||||||
|
# Exclude not matched glossar entries
|
||||||
|
cat("\n\nINFORMATION: glossar entries that are not matched will be removed:",
|
||||||
|
sum(is.na(subdata2[subdata2$glossar == 1, "trace"])), "entries",
|
||||||
|
#proportions(table(is.na(subdata2[subdata2$glossar == 1, "trace"]))),
|
||||||
|
fill = TRUE)
|
||||||
|
subdata2 <- subset(subdata2, !is.na(subdata2$trace))
|
||||||
|
# REMEMBER: It can never be 100% correct, since it is always possible
|
||||||
|
# that several cards are open and that they link to the same glossar
|
||||||
|
# entry
|
||||||
|
|
||||||
|
# dat2[14110:14130, ]
|
||||||
|
# dat2[dat2$glossar == 1, ]
|
||||||
|
|
||||||
|
out <- rbind(subdata1, subdata2)
|
||||||
|
out <- out[order(out$fileId, out$date, out$timeMs), ]
|
||||||
|
out
|
||||||
|
}
|
||||||
|
|
||||||
|
###########################################################################
|
||||||
|
|
||||||
|
close_events <- function(data, event = c("move", "flipCard", "openTopic", "openPopup")) {
|
||||||
|
|
||||||
|
event <- match.arg(event)
|
||||||
|
|
||||||
|
switch(event,
|
||||||
|
"move" = {
|
||||||
|
actions <- c("Transform start", "Transform stop")
|
||||||
|
idvar <- c("fileId", "eventId", "artwork", "glossar")
|
||||||
|
drop <- c("popup", "topicNumber", "trace", "event")
|
||||||
|
ncol <- 16
|
||||||
|
|
||||||
|
},
|
||||||
|
"flipCard" = {
|
||||||
|
actions <- c("Show Info", "Show Front")
|
||||||
|
idvar <- c("fileId", "trace", "artwork", "glossar")
|
||||||
|
drop <- c("popup", "topicNumber", "eventId", "event")
|
||||||
|
ncol <- 16
|
||||||
|
|
||||||
|
},
|
||||||
|
"openTopic" = {
|
||||||
|
actions <- c("Artwork/OpenCard", "Artwork/CloseCard")
|
||||||
|
idvar <- c("fileId", "eventId", "trace", "glossar", "artwork",
|
||||||
|
"topicNumber")
|
||||||
|
drop <- c("popup", "event")
|
||||||
|
ncol <- 18
|
||||||
|
|
||||||
|
},
|
||||||
|
"openPopup" = {
|
||||||
|
actions <- c("ShowPopup", "HidePopup")
|
||||||
|
idvar <- c("fileId", "eventId", "trace", "glossar", "artwork", "popup")
|
||||||
|
drop <- c("topicNumber", "event")
|
||||||
|
ncol <- 18
|
||||||
|
# TODO: Should topicNumber maybe also be filled in for "openPopup"?
|
||||||
|
|
||||||
|
}
|
||||||
|
)
|
||||||
|
|
||||||
|
subdata <- subset(data, data$event %in% actions)
|
||||||
|
subdata <- subdata[order(subdata$artwork, subdata$popup, subdata$date, subdata$timeMs), ]
|
||||||
|
subdata$time <- ifelse(subdata$event == actions[1], "start", "stop")
|
||||||
|
num_start <- diff(c(0, which(subdata$event == actions[2])))
|
||||||
|
if (utils::tail(subdata, 1)$time == "start") {
|
||||||
|
num_start <- c(num_start, 1)
|
||||||
|
}
|
||||||
|
subdata$eventId <- rep(seq_along(num_start), num_start)
|
||||||
|
|
||||||
|
if (event == "move") {
|
||||||
|
subdata <- subdata[!duplicated(subdata[, c("event", "eventId")]), ]
|
||||||
|
id_stop <- which(subdata$event == actions[2])
|
||||||
|
id_rm_stop <- id_stop[diff(id_stop) == 1]
|
||||||
|
subdata <- subdata[-(id_rm_stop + 1), ]
|
||||||
|
}
|
||||||
|
|
||||||
|
subdata_split <- split(subdata, ~ fileId)
|
||||||
|
|
||||||
|
pbapply::pboptions(style = 3, char = "=")
|
||||||
|
|
||||||
|
subdata_split_wide <- pbapply::pblapply(subdata_split, stats::reshape,
|
||||||
|
direction = "wide",
|
||||||
|
idvar = idvar,
|
||||||
|
timevar = "time",
|
||||||
|
drop = drop)
|
||||||
|
# suppressWarnings(
|
||||||
|
# data_wide <- stats::reshape(subdata, direction = "wide",
|
||||||
|
# idvar = idvar,
|
||||||
|
# timevar = "time",
|
||||||
|
# drop = drop)
|
||||||
|
# )
|
||||||
|
|
||||||
|
# remove entries with only start or stop events since they do not have
|
||||||
|
# all columns
|
||||||
|
ids <- which(sapply(subdata_split_wide, ncol) != ncol)
|
||||||
|
if (length(ids) > 0) subdata_split_wide <- subdata_split_wide[-ids]
|
||||||
|
|
||||||
|
data_wide <- dplyr::bind_rows(subdata_split_wide)
|
||||||
|
|
||||||
|
for (d in drop) data_wide[d] <- NA
|
||||||
|
data_wide$distance <- NA
|
||||||
|
data_wide$scaleSize <- NA
|
||||||
|
data_wide$rotationDegree <- NA
|
||||||
|
|
||||||
|
data_wide$event <- event
|
||||||
|
data_wide$duration <- data_wide$timeMs.stop - data_wide$timeMs.start
|
||||||
|
|
||||||
|
if (event == "move") {
|
||||||
|
data_wide$distance <- apply(
|
||||||
|
data_wide[, c("x.start", "y.start", "x.stop", "y.stop")], 1,
|
||||||
|
function(x) stats::dist(matrix(x, 2, 2, byrow = TRUE)))
|
||||||
|
data_wide$rotationDegree <- data_wide$rotation.stop -
|
||||||
|
data_wide$rotation.start
|
||||||
|
data_wide$scaleSize <- data_wide$scale.stop / data_wide$scale.start
|
||||||
|
# remove moves without any change
|
||||||
|
move_wide <- data_wide[data_wide$distance != 0 &
|
||||||
|
data_wide$rotationDegree != 0 &
|
||||||
|
data_wide$scaleSize != 1, ]
|
||||||
|
cat(paste("INFORMATION:", nrow(data_wide) - nrow(move_wide),
|
||||||
|
"lines containing move events were removed since they did",
|
||||||
|
"\nnot contain any change"), fill = TRUE)
|
||||||
|
data_wide <- move_wide
|
||||||
|
}
|
||||||
|
|
||||||
|
out <- data_wide[, c("fileId", "event", "artwork", "trace", "glossar",
|
||||||
|
"date.start", "date.stop", "timeMs.start",
|
||||||
|
"timeMs.stop", "duration", "topicNumber", "popup",
|
||||||
|
"x.start", "y.start", "x.stop", "y.stop",
|
||||||
|
"distance", "scale.start", "scale.stop",
|
||||||
|
"scaleSize", "rotation.start", "rotation.stop",
|
||||||
|
"rotationDegree")]
|
||||||
|
rownames(out) <- NULL
|
||||||
|
out
|
||||||
|
}
|
||||||
|
|
||||||
|
###########################################################################
|
||||||
|
|
||||||
|
# Add case variable
|
||||||
|
|
||||||
|
add_case <- function(data, cutoff = 20) {
|
||||||
|
# TODO: What is the best choice for the cutoff here?
|
||||||
|
|
||||||
|
data$timediff <- as.numeric(diff(c(data$date.start[1], data$date.start)))
|
||||||
|
data$case <- NA
|
||||||
|
j <- 1
|
||||||
|
pb <- utils::txtProgressBar(min = 0, max = nrow(data), initial = NA, style = 3)
|
||||||
|
|
||||||
|
for (i in seq_len(nrow(data))) {
|
||||||
|
if (data$timediff[i] <= cutoff) {
|
||||||
|
data$case[i] <- j
|
||||||
|
} else {
|
||||||
|
j <- j + 1
|
||||||
|
data$case[i] <- j
|
||||||
|
}
|
||||||
|
utils::setTxtProgressBar(pb, i)
|
||||||
|
}
|
||||||
|
data$timediff <- NULL
|
||||||
|
data
|
||||||
|
}
|
||||||
|
|
||||||
|
###########################################################################
|
||||||
|
|
||||||
|
# Add trace for moves
|
||||||
|
|
||||||
|
add_trace_moves <- function(data) {
|
||||||
|
|
||||||
|
pbapply::pboptions(style = 3, char = "=")
|
||||||
|
|
||||||
|
trace_max <- max(data$trace, na.rm = TRUE)
|
||||||
|
|
||||||
|
#subdata_art <- split(data, ~ artwork)
|
||||||
|
subdata_case <- split(data, ~ case)
|
||||||
|
|
||||||
|
#subdata_list <- split(data, ~ artwork + case)
|
||||||
|
# --> does not work with complete data set
|
||||||
|
cat("Splitting data...", "\n")
|
||||||
|
subdata_list <- pbapply::pblapply(subdata_case, split, f = ~artwork)
|
||||||
|
subdata_list <- unlist(subdata_list, recursive = FALSE)
|
||||||
|
|
||||||
|
cat("Adding trace...", "\n")
|
||||||
|
subdata_trace <- pbapply::pblapply(subdata_list,
|
||||||
|
function(x) {
|
||||||
|
trace_max <<- trace_max + 1
|
||||||
|
add_trace_subdata(x, max_trace = trace_max)
|
||||||
|
}
|
||||||
|
)
|
||||||
|
|
||||||
|
out <- dplyr::bind_rows(subdata_trace)
|
||||||
|
out <- out[order(out$fileId.start, out$date.start, out$timeMs.start), ]
|
||||||
|
rownames(out) <- NULL
|
||||||
|
|
||||||
|
# Make trace a consecutive number
|
||||||
|
out$trace <- as.numeric(factor(out$trace, levels = unique(out$trace)))
|
||||||
|
out
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
add_trace_subdata <- function(subdata, max_trace) {
|
||||||
|
|
||||||
|
if (nrow(subdata) != 0) {
|
||||||
|
|
||||||
|
if (length(stats::na.omit(unique(subdata$trace))) == 1) {
|
||||||
|
subdata[subdata$event == "move", "trace"] <- stats::na.omit(unique(subdata$trace))
|
||||||
|
} else if (length(stats::na.omit(unique(subdata$trace))) > 1) {
|
||||||
|
for (i in 1:nrow(subdata)) {
|
||||||
|
if (subdata$event[i] == "move") {
|
||||||
|
if (i == 1) {
|
||||||
|
subdata$trace[i] <- stats::na.omit(unique(subdata$trace))[1]
|
||||||
|
} else {
|
||||||
|
subdata$trace[i] <- subdata$trace[i - 1]
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
} else if (all(is.na(subdata$trace))) {
|
||||||
|
for (i in 1:nrow(subdata)) {
|
||||||
|
subdata$trace[i] <- max_trace
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
} else {
|
||||||
|
warning("subdata has nrow = 0")
|
||||||
|
}
|
||||||
|
subdata
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
###########################################################################
|
||||||
|
|
||||||
|
# Create data frame with file names and topics for each artwork
|
||||||
|
|
||||||
|
extract_topics <- function(artworks, pattern, path) {
|
||||||
|
|
||||||
|
dat <- NULL
|
||||||
|
file_order <- NULL
|
||||||
|
i <- 1
|
||||||
|
|
||||||
|
for (artwork in artworks) {
|
||||||
|
|
||||||
|
if (length(pattern) == 1) {
|
||||||
|
index_file <- pattern
|
||||||
|
} else {
|
||||||
|
index_file <- pattern[i]
|
||||||
|
}
|
||||||
|
|
||||||
|
fnames <- dir(pattern = paste0(artwork, "_"),
|
||||||
|
path = paste(path, artwork, sep = "/"))
|
||||||
|
topic <- NULL
|
||||||
|
for (fname in fnames) {
|
||||||
|
suppressWarnings(
|
||||||
|
topic <- c(topic, gsub("^<card type=.(.*).>$", "\\1",
|
||||||
|
grep("^<card type=",
|
||||||
|
trimws(readLines(paste(path, artwork, fname, sep = "/"))),
|
||||||
|
value = T)))
|
||||||
|
)
|
||||||
|
|
||||||
|
}
|
||||||
|
index <- paste(path, artwork, index_file, sep = "/")
|
||||||
|
suppressWarnings(
|
||||||
|
file_order <- c(file_order, gsub("^<card src=.*/(.*)./>$", "\\1",
|
||||||
|
grep("^<card src=", trimws(readLines(index)),
|
||||||
|
value = TRUE)))
|
||||||
|
)
|
||||||
|
in_index <- fnames %in% file_order
|
||||||
|
dat <- rbind(dat, data.frame(artwork, file_name = fnames, in_index, topic))
|
||||||
|
i <- i + 1
|
||||||
|
}
|
||||||
|
|
||||||
|
# take only the ones that are actually displayed and sort in the same order
|
||||||
|
# as indicated in index.html
|
||||||
|
out <- dat[dat$in_index, -3]
|
||||||
|
out <- out[order(file_order, out$file_name), ]
|
||||||
|
rownames(out) <- NULL
|
||||||
|
|
||||||
|
out$index <- unlist(sapply(table(out$artwork), seq_len))
|
||||||
|
out
|
||||||
|
}
|
||||||
|
|
||||||
|
###########################################################################
|
||||||
|
|
||||||
|
# Add topics: file names and topics
|
||||||
|
|
||||||
|
add_topic <- function(data, topics) {
|
||||||
|
|
||||||
|
artworks <- unique(data$artwork)
|
||||||
|
tab_art <- lapply(artworks,
|
||||||
|
function(x) names(table(data$topicNumber[data$artwork == x])))
|
||||||
|
names(tab_art) <- artworks
|
||||||
|
|
||||||
|
tab_index <- lapply(tab_art, seq_along)
|
||||||
|
|
||||||
|
dat_split <- split(data, ~ artwork)
|
||||||
|
|
||||||
|
set_label <- function(x) {
|
||||||
|
artwork <- unique(x$artwork)
|
||||||
|
x$topicIndex <- factor(x$topicNumber, labels = tab_index[[artwork]])
|
||||||
|
x
|
||||||
|
}
|
||||||
|
|
||||||
|
dat_label <- lapply(dat_split, set_label)
|
||||||
|
|
||||||
|
set_topic <- function(x) {
|
||||||
|
artwork <- unique(x$artwork)
|
||||||
|
labels_file <- topics[topics$artwork == artwork,
|
||||||
|
"file_name"][as.numeric(levels(x$topicIndex))]
|
||||||
|
x$topicFile <- as.character(factor(x$topicIndex, labels = labels_file))
|
||||||
|
labels_topic <- topics[topics$artwork == artwork,
|
||||||
|
"topic"][as.numeric(levels(x$topicIndex))]
|
||||||
|
x$topic <- as.character(factor(x$topicIndex, labels = labels_topic))
|
||||||
|
x
|
||||||
|
}
|
||||||
|
|
||||||
|
dat_topic <- lapply(dat_label, set_topic)
|
||||||
|
|
||||||
|
#out <- do.call(rbind, dat_topic)
|
||||||
|
out <- dplyr::bind_rows(dat_topic)
|
||||||
|
out <- out[order(out$fileId.start, out$date.start, out$timeMs.start), ]
|
||||||
|
rownames(out) <- NULL
|
||||||
|
out
|
||||||
|
}
|
||||||
|
|
||||||
|
###########################################################################
|
||||||
|
|
||||||
|
# Create data frame with information on artworks
|
||||||
|
|
||||||
|
extract_artworks <- function(artworks, files = paste0(artworks, ".xml"),
|
||||||
|
path = path) {
|
||||||
|
out <- NULL
|
||||||
|
i <- 1
|
||||||
|
|
||||||
|
for (artwork in artworks) {
|
||||||
|
|
||||||
|
if (length(files) == 1) {
|
||||||
|
index_file <- files
|
||||||
|
} else {
|
||||||
|
index_file <- files[i]
|
||||||
|
}
|
||||||
|
|
||||||
|
index <- paste(path, artwork, index_file, sep = "/")
|
||||||
|
varnames <- c("artist", "title", "misc", "description")
|
||||||
|
xmllist <- XML::xmlToList(index)$header[varnames]
|
||||||
|
|
||||||
|
if (any(sapply(xmllist, is.null))) {# necessary for missing entries
|
||||||
|
names(xmllist) <- varnames
|
||||||
|
xmllist[which(sapply(xmllist, is.null))] <- NA
|
||||||
|
}
|
||||||
|
# remove German quotes
|
||||||
|
xmllist <- lapply(xmllist, function(x) gsub("\u201e|\u201c", "", x))
|
||||||
|
# remove HTML tags
|
||||||
|
xmllist <- lapply(xmllist, function(x) gsub("<br/>", " ", x))
|
||||||
|
xmldat <- as.data.frame(xmllist)
|
||||||
|
xmldat$artwork <- artwork
|
||||||
|
# trim white space from strings
|
||||||
|
xmldat$artist <- trimws(xmldat$artist)
|
||||||
|
xmldat$title <- trimws(xmldat$title)
|
||||||
|
xmldat$misc <- trimws(xmldat$misc)
|
||||||
|
xmldat$description <- trimws(xmldat$description)
|
||||||
|
out <- rbind(out, xmldat)
|
||||||
|
i <- i + 1
|
||||||
|
}
|
||||||
|
out
|
||||||
|
}
|
||||||
|
|
139
R/parse_logfiles.R
Normal file
139
R/parse_logfiles.R
Normal file
@ -0,0 +1,139 @@
|
|||||||
|
#' Left padding file names of raw log files from Multi-Touch-Table at the
|
||||||
|
#' IWM.
|
||||||
|
#'
|
||||||
|
#' File names need to be left padded since otherwise the sorting of the
|
||||||
|
#' timestamps will be off and one will get negative durations later on
|
||||||
|
#' since the wrong events get closed.
|
||||||
|
#'
|
||||||
|
#' @param x file name in the form of `yyyy_mm_dd-hh_mm_ss.
|
||||||
|
#' @param dirpaths Paths on system where files live that should be renamed.
|
||||||
|
#' @return Left padded file names.
|
||||||
|
#' @examples
|
||||||
|
#' # folders <- "all"
|
||||||
|
#' # dirpaths <- paste0("../data/haum_logs_2016-2023/", folders)
|
||||||
|
#' # fnames <- dir(dirpaths, pattern = "*.log", full.names = TRUE)
|
||||||
|
#' # leftpad_fnames(fnames)
|
||||||
|
leftpad_fnames <- function(x, dirpaths) {
|
||||||
|
|
||||||
|
z <- gsub(paste0(dirpaths, "/"), "\\1", x)
|
||||||
|
ys <- strsplit(z, "_")
|
||||||
|
|
||||||
|
res <- NULL
|
||||||
|
|
||||||
|
for (y in ys) {
|
||||||
|
y2 <- unlist(strsplit(y[3], "-"))
|
||||||
|
e1 <- y[1]
|
||||||
|
e2 <- sprintf("%02d", as.numeric(y[2]))
|
||||||
|
e3 <- sprintf("%02d", as.numeric(y2[1]))
|
||||||
|
e4 <- sprintf("%02d", as.numeric(y2[2]))
|
||||||
|
e5 <- sprintf("%02d", as.numeric(y[4]))
|
||||||
|
e6 <- sprintf("%02d", as.numeric(gsub(".log", "", y[5])))
|
||||||
|
e6 <- sprintf("%02d", as.numeric(gsub(".log", "", y[5])))
|
||||||
|
|
||||||
|
res <- c(res,
|
||||||
|
paste0(e1, "_", e2, "_", e3, "-", e4, "_", e5, "_", e6, ".log"))
|
||||||
|
}
|
||||||
|
res
|
||||||
|
}
|
||||||
|
|
||||||
|
#' Creating data frame for raw log files.
|
||||||
|
#'
|
||||||
|
#' Creates a data frame or CSV file from raw log files from a
|
||||||
|
#' Multi-Touch-Table at the IWM.
|
||||||
|
#'
|
||||||
|
#' @param folders A character vector of folder names that contain the raw
|
||||||
|
#' log files from the Multi-Touch-Table at the IWM.
|
||||||
|
#' @param path A path to the folders.
|
||||||
|
#' @param file Name of the file where parsed log files should be saved.
|
||||||
|
#' Default is "rawdata_logfiles.csv".
|
||||||
|
#' @param save Logical. If data frame should be returned by the function or
|
||||||
|
#' saved. Default is TRUE.
|
||||||
|
#' @return A data frame or NULL.
|
||||||
|
#' @export
|
||||||
|
#' @examples
|
||||||
|
#' # parse_logfiles("all", path = "../data/haum_logs_2016-2023/")
|
||||||
|
parse_logfiles <- function(folders, path, file = "rawdata_logfiles.csv",
|
||||||
|
save = TRUE) {
|
||||||
|
dirpaths <- paste0(path, folders)
|
||||||
|
# TODO: This is not very intutitive
|
||||||
|
fnames <- dir(dirpaths, pattern = "*.log", full.names = TRUE)
|
||||||
|
|
||||||
|
suppressWarnings(
|
||||||
|
logs <- lapply(fnames, readLines)
|
||||||
|
)
|
||||||
|
nlog <- sapply(logs, length)
|
||||||
|
dat <- data.frame(fileId = rep(leftpad_fnames(fnames, dirpaths), nlog),
|
||||||
|
logs = unlist(logs))
|
||||||
|
|
||||||
|
# Remove corrupt lines
|
||||||
|
d1 <- nrow(dat)
|
||||||
|
dat <- subset(dat, dat$logs != "")
|
||||||
|
d2 <- nrow(dat)
|
||||||
|
|
||||||
|
if(d1 > d2) {
|
||||||
|
warning(paste0(d1-d2, " corrupt lines have been found and removed from the data set\n"))
|
||||||
|
}
|
||||||
|
|
||||||
|
# Extract relevant infos
|
||||||
|
date <- sapply(dat$logs, gsub,
|
||||||
|
pattern = "^\\[(.*)\\], \\[.*$",
|
||||||
|
replacement = "\\1",
|
||||||
|
USE.NAMES = FALSE)
|
||||||
|
|
||||||
|
timestamp <- sapply(dat$logs, gsub,
|
||||||
|
pattern = "^\\[.*\\], \\[(.*)\\].*$",
|
||||||
|
replacement = "\\1",
|
||||||
|
USE.NAMES = FALSE)
|
||||||
|
|
||||||
|
action <- sapply(dat$logs, gsub,
|
||||||
|
pattern = "^.*EyeVisit, (.*):*.*$",
|
||||||
|
replacement = "\\1",
|
||||||
|
USE.NAMES = FALSE)
|
||||||
|
|
||||||
|
events <- sapply(strsplit(action, ":"), function(x) x[1])
|
||||||
|
|
||||||
|
topics <- sapply(strsplit(action, ":"), function(x) x[2])
|
||||||
|
|
||||||
|
suppressWarnings(
|
||||||
|
moves <- apply(do.call(rbind,
|
||||||
|
strsplit(sapply(strsplit(action, ":"),
|
||||||
|
function(x) x[3]), ",")), 2,
|
||||||
|
as.numeric)
|
||||||
|
)
|
||||||
|
# ATTENTION: as.numeric() forces NAs for "OpenCard" and "CloseCard"
|
||||||
|
|
||||||
|
card_action <- trimws(sapply(strsplit(action, ":"),
|
||||||
|
function(x) x[3])[grep("Artwork", events)])
|
||||||
|
|
||||||
|
card <- as.numeric(sapply(strsplit(action, ":"), function(x) x[4]))
|
||||||
|
|
||||||
|
events[grep("Artwork", events)] <- paste("Artwork", card_action, sep = "/")
|
||||||
|
|
||||||
|
ts_elements <- strsplit(timestamp, ":")
|
||||||
|
time_ms <- as.numeric(sapply(ts_elements, function(x) x[4])) +
|
||||||
|
as.numeric(sapply(ts_elements, function(x) x[3])) * 1000 +
|
||||||
|
as.numeric(sapply(ts_elements, function(x) x[2])) * 1000 * 60
|
||||||
|
|
||||||
|
dat$date <- lubridate::parse_date_time(date, "bdyHMSOp")
|
||||||
|
dat$timeMs <- time_ms
|
||||||
|
dat$event <- events
|
||||||
|
dat$artwork <- trimws(sapply(strsplit(topics, "/"), function(x) x[1]))
|
||||||
|
dat$popup <- sapply(strsplit(topics, "/"), function(x) x[2])
|
||||||
|
dat$topicNumber <- card
|
||||||
|
dat$x <- moves[,1]
|
||||||
|
dat$y <- moves[,2]
|
||||||
|
dat$scale <- moves[,3]
|
||||||
|
dat$rotation <- moves[,4]
|
||||||
|
dat$logs <- NULL
|
||||||
|
|
||||||
|
dat <- dat[order(dat$fileId, dat$date, dat$timeMs), ]
|
||||||
|
|
||||||
|
# Export data
|
||||||
|
if (save) {
|
||||||
|
utils::write.table(dat, file = file, sep = ";", row.names = FALSE)
|
||||||
|
cat(paste0("INFORMATION: Data file", file, "has been written to ", getwd(), "\n"))
|
||||||
|
} else {
|
||||||
|
return(dat)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
22
man/create_eventlogs.Rd
Normal file
22
man/create_eventlogs.Rd
Normal file
@ -0,0 +1,22 @@
|
|||||||
|
% Generated by roxygen2: do not edit by hand
|
||||||
|
% Please edit documentation in R/create_eventlogs.R
|
||||||
|
\name{create_eventlogs}
|
||||||
|
\alias{create_eventlogs}
|
||||||
|
\title{Creating log events from raw log files.}
|
||||||
|
\usage{
|
||||||
|
create_eventlogs(data)
|
||||||
|
}
|
||||||
|
\arguments{
|
||||||
|
\item{data}{Data frame of raw log files created with \code{parse_logfiles()}.
|
||||||
|
See \code{?parse_logfiles} for more details.}
|
||||||
|
}
|
||||||
|
\value{
|
||||||
|
Data frame.
|
||||||
|
}
|
||||||
|
\description{
|
||||||
|
Creating event logs from a data frame of raw log files from a
|
||||||
|
Multi-Touch-Table at the IWM.
|
||||||
|
}
|
||||||
|
\examples{
|
||||||
|
# tbd
|
||||||
|
}
|
28
man/leftpad_fnames.Rd
Normal file
28
man/leftpad_fnames.Rd
Normal file
@ -0,0 +1,28 @@
|
|||||||
|
% Generated by roxygen2: do not edit by hand
|
||||||
|
% Please edit documentation in R/parse_logfiles.R
|
||||||
|
\name{leftpad_fnames}
|
||||||
|
\alias{leftpad_fnames}
|
||||||
|
\title{Left padding file names of raw log files from Multi-Touch-Table at the
|
||||||
|
IWM.}
|
||||||
|
\usage{
|
||||||
|
leftpad_fnames(x, dirpaths)
|
||||||
|
}
|
||||||
|
\arguments{
|
||||||
|
\item{x}{file name in the form of `yyyy_mm_dd-hh_mm_ss.}
|
||||||
|
|
||||||
|
\item{dirpaths}{Paths on system where files live that should be renamed.}
|
||||||
|
}
|
||||||
|
\value{
|
||||||
|
Left padded file names.
|
||||||
|
}
|
||||||
|
\description{
|
||||||
|
File names need to be left padded since otherwise the sorting of the
|
||||||
|
timestamps will be off and one will get negative durations later on
|
||||||
|
since the wrong events get closed.
|
||||||
|
}
|
||||||
|
\examples{
|
||||||
|
# folders <- "all"
|
||||||
|
# dirpaths <- paste0("../data/haum_logs_2016-2023/", folders)
|
||||||
|
# fnames <- dir(dirpaths, pattern = "*.log", full.names = TRUE)
|
||||||
|
# leftpad_fnames(fnames)
|
||||||
|
}
|
30
man/parse_logfiles.Rd
Normal file
30
man/parse_logfiles.Rd
Normal file
@ -0,0 +1,30 @@
|
|||||||
|
% Generated by roxygen2: do not edit by hand
|
||||||
|
% Please edit documentation in R/parse_logfiles.R
|
||||||
|
\name{parse_logfiles}
|
||||||
|
\alias{parse_logfiles}
|
||||||
|
\title{Creating data frame for raw log files.}
|
||||||
|
\usage{
|
||||||
|
parse_logfiles(folders, path, file = "rawdata_logfiles.csv", save = TRUE)
|
||||||
|
}
|
||||||
|
\arguments{
|
||||||
|
\item{folders}{A character vector of folder names that contain the raw
|
||||||
|
log files from the Multi-Touch-Table at the IWM.}
|
||||||
|
|
||||||
|
\item{path}{A path to the folders.}
|
||||||
|
|
||||||
|
\item{file}{Name of the file where parsed log files should be saved.
|
||||||
|
Default is "rawdata_logfiles.csv".}
|
||||||
|
|
||||||
|
\item{save}{Logical. If data frame should be returned by the function or
|
||||||
|
saved. Default is TRUE.}
|
||||||
|
}
|
||||||
|
\value{
|
||||||
|
A data frame or NULL.
|
||||||
|
}
|
||||||
|
\description{
|
||||||
|
Creates a data frame or CSV file from raw log files from a
|
||||||
|
Multi-Touch-Table at the IWM.
|
||||||
|
}
|
||||||
|
\examples{
|
||||||
|
# parse_logfiles("all", path = "../data/haum_logs_2016-2023/")
|
||||||
|
}
|
Loading…
Reference in New Issue
Block a user