Refactoring and debugging; especially add_trace() and create_glossardict()

This commit is contained in:
Nora Wickelmaier 2023-09-21 16:47:23 +02:00
parent b9185a5645
commit 7f6e967f7c
9 changed files with 282 additions and 253 deletions

107
R/add_trace.R Normal file
View File

@ -0,0 +1,107 @@
###########################################################################
add_trace_artworks <- function(subdata) {
last_event <- subdata$event[1]
artworks <- unique(subdata$artwork)[unique(subdata$artwork) != "glossar"]
n <- 1 # count artworks for progress
pb <- utils::txtProgressBar(min = 0, max = nrow(subdata), initial = NA,
style = 3)
for (artwork in artworks) {
cat("\n\nAdding trace variable for artwork", artwork,
paste0("(", n, "/", length(artworks), ")"), "\n")
for (i in 1:nrow(subdata)) {
if (last_event == "Show Info" & subdata$artwork[i] == artwork) {
subdata$trace[i] <- i
j <- i
} else if (last_event == "Show Front" & subdata$artwork[i] == artwork) {
subdata$trace[i] <- j
} else if (!(last_event %in% c("Show Info", "Show Front")) &
subdata$artwork[i] == artwork) {
subdata$trace[i] <- j
}
if (i <= nrow(subdata)) {
last_event <- subdata$event[i + 1]
}
utils::setTxtProgressBar(pb, i)
}
n <- n + 1
}
subdata
}
###########################################################################
add_trace_glossar <- function(subdata, glossar_dict) {
pb <- utils::txtProgressBar(min = 0, max = nrow(subdata), initial = NA,
style = 3)
# Fix glossar entries (find corresponding artworks and fill in trace)
glossar_files <- unique(subdata[subdata$artwork == "glossar", "popup"])
# load lookup table for artworks and glossar files
lut <- glossar_dict[names(glossar_dict) %in% glossar_files]
inside <- glossar_files[glossar_files %in%
names(lut[sapply(lut, length) == 1])]
single_art <- unlist(lut[names(lut) %in% inside])
m <- 1
for (file in names(lut)) {
cat("\n\nAdding trace variable for glossar entry", file,
paste0("(", m, "/", length(lut), ")"), "\n")
artwork_list <- unlist(lut[names(lut) == file])
for (i in seq_len(nrow(subdata))) {
if (subdata$event[i] == "Show Info" |
(subdata$event[i] == "Artwork/OpenCard" &
subdata$artwork[i] %in% single_art)) {
current_artwork <- subdata[i, "artwork"]
j <- i
k <- i
} else {
current_artwork <- current_artwork
}
if (subdata$event[i] == "Show Front" & subdata$artwork[i] == current_artwork) {
# make sure artwork has not been closed, yet!
k <- i
}
if (subdata$artwork[i] == "glossar" &
(current_artwork %in% artwork_list) &
subdata$popup[i] == file & (j - k == 0)) {
subdata[i, "trace"] <- subdata[j, "trace"]
subdata[i, "artwork"] <- current_artwork
}
utils::setTxtProgressBar(pb, i)
}
m <- m + 1
}
# Exclude not matched glossar entries
cat("\n\nINFORMATION: glossar entries that are not matched will be removed:",
sum(is.na(subdata[subdata$glossar == 1, "trace"])), "entries",
fill = TRUE)
subset(subdata, !is.na(subdata$trace))
}
###########################################################################
add_trace <- function(data, glossar_dict) {
data$trace <- NA
subdata1 <- data[data$event %in% c("Transform start", "Transform stop"), ]
subdata2 <- data[!data$event %in% c("Transform start", "Transform stop"), ]
subdata2 <- add_trace_artworks(subdata2)
subdata2 <- add_trace_glossar(subdata2, glossar_dict)
out <- rbind(subdata1, subdata2)
out <- out[order(out$fileId, out$date, out$timeMs), ]
out
}

114
R/close_events.R Normal file
View File

@ -0,0 +1,114 @@
###########################################################################
close_events <- function(data, event = c("move", "flipCard", "openTopic", "openPopup")) {
event <- match.arg(event)
switch(event,
"move" = {
actions <- c("Transform start", "Transform stop")
idvar <- c("fileId", "eventId", "artwork", "glossar")
drop <- c("popup", "topicNumber", "trace", "event")
ncol <- 16
},
"flipCard" = {
actions <- c("Show Info", "Show Front")
idvar <- c("fileId", "trace", "artwork", "glossar")
drop <- c("popup", "topicNumber", "eventId", "event")
ncol <- 16
},
"openTopic" = {
actions <- c("Artwork/OpenCard", "Artwork/CloseCard")
idvar <- c("fileId", "eventId", "trace", "glossar", "artwork",
"topicNumber")
drop <- c("popup", "event")
ncol <- 18
},
"openPopup" = {
actions <- c("ShowPopup", "HidePopup")
idvar <- c("fileId", "eventId", "trace", "glossar", "artwork", "popup")
drop <- c("topicNumber", "event")
ncol <- 18
# TODO: Should topicNumber maybe also be filled in for "openPopup"?
}
)
subdata <- subset(data, data$event %in% actions)
subdata <- subdata[order(subdata$artwork, subdata$popup, subdata$date, subdata$timeMs), ]
subdata$time <- ifelse(subdata$event == actions[1], "start", "stop")
num_start <- diff(c(0, which(subdata$event == actions[2])))
if (utils::tail(subdata, 1)$time == "start") {
num_start <- c(num_start, 1)
}
subdata$eventId <- rep(seq_along(num_start), num_start)
if (event == "move") {
subdata <- subdata[!duplicated(subdata[, c("event", "eventId")]), ]
id_stop <- which(subdata$event == actions[2])
id_rm_stop <- id_stop[diff(id_stop) == 1]
subdata <- subdata[-(id_rm_stop + 1), ]
}
subdata_split <- split(subdata, ~ fileId)
pbapply::pboptions(style = 3, char = "=")
subdata_split_wide <- pbapply::pblapply(subdata_split, stats::reshape,
direction = "wide",
idvar = idvar,
timevar = "time",
drop = drop)
# suppressWarnings(
# data_wide <- stats::reshape(subdata, direction = "wide",
# idvar = idvar,
# timevar = "time",
# drop = drop)
# )
# remove entries with only start or stop events since they do not have
# all columns
ids <- which(sapply(subdata_split_wide, ncol) != ncol)
if (length(ids) > 0) subdata_split_wide <- subdata_split_wide[-ids]
data_wide <- dplyr::bind_rows(subdata_split_wide)
for (d in drop) data_wide[d] <- NA
data_wide$distance <- NA
data_wide$scaleSize <- NA
data_wide$rotationDegree <- NA
data_wide$event <- event
data_wide$duration <- data_wide$timeMs.stop - data_wide$timeMs.start
if (event == "move") {
data_wide$distance <- apply(
data_wide[, c("x.start", "y.start", "x.stop", "y.stop")], 1,
function(x) stats::dist(matrix(x, 2, 2, byrow = TRUE)))
data_wide$rotationDegree <- data_wide$rotation.stop -
data_wide$rotation.start
data_wide$scaleSize <- data_wide$scale.stop / data_wide$scale.start
# remove moves without any change
move_wide <- data_wide[data_wide$distance != 0 &
data_wide$rotationDegree != 0 &
data_wide$scaleSize != 1, ]
cat(paste("INFORMATION:", nrow(data_wide) - nrow(move_wide),
"lines containing move events were removed since they did",
"\nnot contain any change"), fill = TRUE)
data_wide <- move_wide
}
out <- data_wide[, c("fileId", "event", "artwork", "trace", "glossar",
"date.start", "date.stop", "timeMs.start",
"timeMs.stop", "duration", "topicNumber", "popup",
"x.start", "y.start", "x.stop", "y.stop",
"distance", "scale.start", "scale.stop",
"scaleSize", "rotation.start", "rotation.stop",
"rotationDegree")]
rownames(out) <- NULL
out
}

View File

@ -5,25 +5,36 @@
#'
#' @param data Data frame of raw log files created with `parse_logfiles()`.
#' See `?parse_logfiles` for more details.
#' @param xmlpath Path to folder where XML definitions of artworks live.
#' @return Data frame.
#' @export
#' @examples
#' # tbd
create_eventlogs <- function(data) {
create_eventlogs <- function(data, xmlpath) {
if (!lubridate::is.POSIXt(data$date)){
cat("########## Convertion variable `date` to POSIXct ##########", "\n")
data$date <- as.POSIXct(data$date)
}
data$glossar <- ifelse(data$artwork == "glossar", 1, 0)
# Remove irrelevant events
dat <- subset(data, !(data$event %in% c("Start Application",
"Show Application")))
# Create glossar dictionary ##############################################
cat("\n########## Creating glossar dictionary ##########", "\n")
artworks <- unique(stats::na.omit(dat$artwork))
artworks <- artworks[artworks != "glossar"]
glossar_files <- unique(subset(dat, dat$artwork == "glossar")$popup)
glossar_dict <- create_glossardict(artworks, glossar_files, path = xmlpath)
# Add trace variable #####################################################
cat("########## Adding trace variable... ##########", "\n")
dat1 <- add_trace(dat)
cat("\n########## Adding trace variable... ##########", "\n")
dat1 <- add_trace(dat, glossar_dict)
# Close events
cat("########## Closing events... ##########", "\n")
cat("\n########## Closing events... ##########", "\n")
c1 <- close_events(dat1, "move")
cat("## --> move events closed.", "\n")
c2 <- close_events(dat1, "flipCard")
@ -47,7 +58,7 @@ create_eventlogs <- function(data) {
rownames(dat2) <- NULL
# Add case variable ######################################################
cat("########## Adding case and eventId variables... ##########", "\n")
cat("\n########## Adding case and eventId variables... ##########", "\n\n")
dat3 <- add_case(dat2)
# Add event ID ###########################################################
@ -62,16 +73,16 @@ create_eventlogs <- function(data) {
"rotationDegree")]
# Add trace for move events ##############################################
cat("\n########## Adding trace variable for move events... ##########", "\n")
cat("\n\n########## Adding trace variable for move events... ##########", "\n")
dat4 <- add_trace_moves(dat3)
# Add topics: file names and topics ######################################
cat("########## Adding information about topics... ##########", "\n")
cat("\n########## Adding information about topics... ##########", "\n\n")
artworks <- unique(dat4$artwork)
# remove artworks without XML information
artworks <- artworks[!artworks %in% c("504", "505")]
topics <- extract_topics(artworks, pattern = paste0(artworks, ".xml"),
path = "../data/ContentEyevisit/eyevisit_cards_light/")
path = xmlpath)
dat5 <- add_topic(dat4, topics = topics)
dat5

23
R/create_glossardict.R Normal file
View File

@ -0,0 +1,23 @@
create_glossardict <- function(artworks, glossar_files, path) {
x <- NULL
for (glossar_file in glossar_files) {
for (artwork in artworks) {
fnames <- dir(pattern = paste0(artwork, "_"), path = paste0(path, artwork))
for (fname in fnames) {
suppressWarnings(
lines <- readLines(paste0(path, artwork, "/", fname))
)
if (any(grepl(glossar_file, lines))) {
x <- rbind(x, data.frame(glossar_file, artwork))
break # if one match is found, we are done
}
}
}
}
tapply(x$artwork, x$glossar_file, FUN = c)
}
# TODO: Get rid of (at least 2) loops
# TODO: Add progress bars

View File

@ -1,236 +1,5 @@
###########################################################################
# Add trace variable
add_trace <- function(data, glossar_dict = "../data/glossar_dict.RData") {
data$trace <- NA
subdata1 <- data[data$event %in% c("Transform start", "Transform stop"), ]
subdata2 <- data[!data$event %in% c("Transform start", "Transform stop"), ]
last_event <- subdata2$event[1]
artworks <- unique(subdata2$artwork)[unique(subdata2$artwork) != "glossar"]
n <- 1 # count artworks for progress
pb <- utils::txtProgressBar(min = 0, max = nrow(subdata2), initial = NA,
style = 3)
for (artwork in artworks) {
cat("\n\nAdding trace variable for artwork", artwork,
paste0("(", n, "/", length(artworks), ")"), "\n")
for (i in 1:nrow(subdata2)) {
if (last_event == "Show Info" & subdata2$artwork[i] == artwork) {
subdata2$trace[i] <- i
j <- i
} else if (last_event == "Show Front" & subdata2$artwork[i] == artwork) {
subdata2$trace[i] <- j
} else if (!(last_event %in% c("Show Info", "Show Front")) &
subdata2$artwork[i] == artwork) {
subdata2$trace[i] <- j
}
if (i <= nrow(subdata2)) {
last_event <- subdata2$event[i + 1]
}
utils::setTxtProgressBar(pb, i)
}
n <- n + 1
}
# Fix glossar entries (find corresponding artworks and fill in trace)
glossar_files <- unique(subdata2[subdata2$artwork == "glossar", "popup"])
# load lookup table for artworks and glossar files
load(glossar_dict)
lut <- glossar_dict[glossar_dict$glossar_file %in% glossar_files, ]
inside <- glossar_files[glossar_files %in%
lut[sapply(lut$artwork, length) == 1,
"glossar_file"]]
single_art <- unlist(lut[lut$glossar_file %in% inside, "artwork"])
m <- 1
for (file in lut$glossar_file) {
cat("\n\nAdding trace variable for glossar entry", file,
paste0("(", m, "/", length(lut$glossar_file), ")"), "\n")
artwork_list <- unlist(lut[lut$glossar_file == file, "artwork"])
for (i in seq_len(nrow(subdata2))) {
if (subdata2$event[i] == "Show Info" |
(subdata2$event[i] == "Artwork/OpenCard" &
subdata2$artwork[i] %in% single_art)) {
current_artwork <- subdata2[i, "artwork"]
j <- i
k <- i
} else {
current_artwork <- current_artwork
}
if (subdata2$event[i] == "Show Front" & subdata2$artwork[i] == current_artwork) {
# make sure artwork has not been closed, yet!
k <- i
}
if (subdata2$artwork[i] == "glossar" &
(current_artwork %in% artwork_list) &
subdata2$popup[i] == file & (j - k == 0)) {
subdata2[i, "trace"] <- subdata2[j, "trace"]
subdata2[i, "artwork"] <- current_artwork
}
utils::setTxtProgressBar(pb, i)
}
m <- m + 1
}
# Exclude not matched glossar entries
cat("\n\nINFORMATION: glossar entries that are not matched will be removed:",
sum(is.na(subdata2[subdata2$glossar == 1, "trace"])), "entries",
#proportions(table(is.na(subdata2[subdata2$glossar == 1, "trace"]))),
fill = TRUE)
subdata2 <- subset(subdata2, !is.na(subdata2$trace))
# REMEMBER: It can never be 100% correct, since it is always possible
# that several cards are open and that they link to the same glossar
# entry
# dat2[14110:14130, ]
# dat2[dat2$glossar == 1, ]
out <- rbind(subdata1, subdata2)
out <- out[order(out$fileId, out$date, out$timeMs), ]
out
}
###########################################################################
close_events <- function(data, event = c("move", "flipCard", "openTopic", "openPopup")) {
event <- match.arg(event)
switch(event,
"move" = {
actions <- c("Transform start", "Transform stop")
idvar <- c("fileId", "eventId", "artwork", "glossar")
drop <- c("popup", "topicNumber", "trace", "event")
ncol <- 16
},
"flipCard" = {
actions <- c("Show Info", "Show Front")
idvar <- c("fileId", "trace", "artwork", "glossar")
drop <- c("popup", "topicNumber", "eventId", "event")
ncol <- 16
},
"openTopic" = {
actions <- c("Artwork/OpenCard", "Artwork/CloseCard")
idvar <- c("fileId", "eventId", "trace", "glossar", "artwork",
"topicNumber")
drop <- c("popup", "event")
ncol <- 18
},
"openPopup" = {
actions <- c("ShowPopup", "HidePopup")
idvar <- c("fileId", "eventId", "trace", "glossar", "artwork", "popup")
drop <- c("topicNumber", "event")
ncol <- 18
# TODO: Should topicNumber maybe also be filled in for "openPopup"?
}
)
subdata <- subset(data, data$event %in% actions)
subdata <- subdata[order(subdata$artwork, subdata$popup, subdata$date, subdata$timeMs), ]
subdata$time <- ifelse(subdata$event == actions[1], "start", "stop")
num_start <- diff(c(0, which(subdata$event == actions[2])))
if (utils::tail(subdata, 1)$time == "start") {
num_start <- c(num_start, 1)
}
subdata$eventId <- rep(seq_along(num_start), num_start)
if (event == "move") {
subdata <- subdata[!duplicated(subdata[, c("event", "eventId")]), ]
id_stop <- which(subdata$event == actions[2])
id_rm_stop <- id_stop[diff(id_stop) == 1]
subdata <- subdata[-(id_rm_stop + 1), ]
}
subdata_split <- split(subdata, ~ fileId)
pbapply::pboptions(style = 3, char = "=")
subdata_split_wide <- pbapply::pblapply(subdata_split, stats::reshape,
direction = "wide",
idvar = idvar,
timevar = "time",
drop = drop)
# suppressWarnings(
# data_wide <- stats::reshape(subdata, direction = "wide",
# idvar = idvar,
# timevar = "time",
# drop = drop)
# )
# remove entries with only start or stop events since they do not have
# all columns
ids <- which(sapply(subdata_split_wide, ncol) != ncol)
if (length(ids) > 0) subdata_split_wide <- subdata_split_wide[-ids]
data_wide <- dplyr::bind_rows(subdata_split_wide)
for (d in drop) data_wide[d] <- NA
data_wide$distance <- NA
data_wide$scaleSize <- NA
data_wide$rotationDegree <- NA
data_wide$event <- event
data_wide$duration <- data_wide$timeMs.stop - data_wide$timeMs.start
if (event == "move") {
data_wide$distance <- apply(
data_wide[, c("x.start", "y.start", "x.stop", "y.stop")], 1,
function(x) stats::dist(matrix(x, 2, 2, byrow = TRUE)))
data_wide$rotationDegree <- data_wide$rotation.stop -
data_wide$rotation.start
data_wide$scaleSize <- data_wide$scale.stop / data_wide$scale.start
# remove moves without any change
move_wide <- data_wide[data_wide$distance != 0 &
data_wide$rotationDegree != 0 &
data_wide$scaleSize != 1, ]
cat(paste("INFORMATION:", nrow(data_wide) - nrow(move_wide),
"lines containing move events were removed since they did",
"\nnot contain any change"), fill = TRUE)
data_wide <- move_wide
}
out <- data_wide[, c("fileId", "event", "artwork", "trace", "glossar",
"date.start", "date.stop", "timeMs.start",
"timeMs.stop", "duration", "topicNumber", "popup",
"x.start", "y.start", "x.stop", "y.stop",
"distance", "scale.start", "scale.stop",
"scaleSize", "rotation.start", "rotation.stop",
"rotationDegree")]
rownames(out) <- NULL
out
}
###########################################################################
# Add case variable
add_case <- function(data, cutoff = 20) {
@ -409,6 +178,7 @@ add_topic <- function(data, topics) {
#out <- do.call(rbind, dat_topic)
out <- dplyr::bind_rows(dat_topic)
out$topicIndex <- as.numeric(out$topicIndex)
out <- out[order(out$fileId, out$date.start, out$timeMs.start), ]
rownames(out) <- NULL
out

View File

@ -5,8 +5,8 @@
#' timestamps will be off and one will get negative durations later on
#' since the wrong events get closed.
#'
#' @param x file name in the form of `yyyy_mm_dd-hh_mm_ss.
#' @param dirpaths Paths on system where files live that should be renamed.
#' @param fnames File name in the form of `yyyy_mm_dd-hh_mm_ss`, possible
#' with missing zero left padding.
#' @return Left padded file names.
#' @examples
#' # folders <- "all"
@ -15,7 +15,7 @@
#' # leftpad_fnames(fnames)
leftpad_fnames <- function(fnames) {
z <- sapply(fnames, function(x) tail(strsplit(x, "/")[[1]], 1),
z <- sapply(fnames, function(x) utils::tail(strsplit(x, "/")[[1]], 1),
USE.NAMES = FALSE)
ys <- strsplit(z, "_")
@ -43,7 +43,9 @@ leftpad_fnames <- function(fnames) {
#'
#' @param folders A character vector of folder names that contain the raw
#' log files from the Multi-Touch-Table at the IWM.
#' @param path A path to the folders.
#' @param path A path to the folder that contains the folders specified in
#' first argument. Needs to end in a "/"!
# TODO: How to catch this?
#' @param file Name of the file where parsed log files should be saved.
#' Default is "rawdata_logfiles.csv".
#' @param save Logical. If data frame should be returned by the function or
@ -51,7 +53,7 @@ leftpad_fnames <- function(fnames) {
#' @return A data frame or NULL.
#' @export
#' @examples
#' # parse_logfiles("all", path = "../data/haum_logs_2016-2023/")
#' # parse_logfiles("all", path = "../data/haum/haum_logs_2016-2023/")
parse_logfiles <- function(folders, path, file = "rawdata_logfiles.csv",
save = TRUE) {
dirpaths <- paste0(path, folders)

View File

@ -4,11 +4,13 @@
\alias{create_eventlogs}
\title{Creating log events from raw log files.}
\usage{
create_eventlogs(data)
create_eventlogs(data, xmlpath)
}
\arguments{
\item{data}{Data frame of raw log files created with \code{parse_logfiles()}.
See \code{?parse_logfiles} for more details.}
\item{xmlpath}{Path to folder where XML definitions of artworks live.}
}
\value{
Data frame.

View File

@ -5,12 +5,11 @@
\title{Left padding file names of raw log files from Multi-Touch-Table at the
IWM.}
\usage{
leftpad_fnames(x, dirpaths)
leftpad_fnames(fnames)
}
\arguments{
\item{x}{file name in the form of `yyyy_mm_dd-hh_mm_ss.}
\item{dirpaths}{Paths on system where files live that should be renamed.}
\item{fnames}{File name in the form of \code{yyyy_mm_dd-hh_mm_ss}, possible
with missing zero left padding.}
}
\value{
Left padded file names.

View File

@ -2,7 +2,7 @@
% Please edit documentation in R/parse_logfiles.R
\name{parse_logfiles}
\alias{parse_logfiles}
\title{Creating data frame for raw log files.}
\title{Creating data frame from raw log files.}
\usage{
parse_logfiles(folders, path, file = "rawdata_logfiles.csv", save = TRUE)
}
@ -10,7 +10,8 @@ parse_logfiles(folders, path, file = "rawdata_logfiles.csv", save = TRUE)
\item{folders}{A character vector of folder names that contain the raw
log files from the Multi-Touch-Table at the IWM.}
\item{path}{A path to the folders.}
\item{path}{A path to the folder that contains the folders specified in
first argument. Needs to end in a "/"!}
\item{file}{Name of the file where parsed log files should be saved.
Default is "rawdata_logfiles.csv".}
@ -26,5 +27,5 @@ Creates a data frame or CSV file from raw log files from a
Multi-Touch-Table at the IWM.
}
\examples{
# parse_logfiles("all", path = "../data/haum_logs_2016-2023/")
# parse_logfiles("all", path = "../data/haum/haum_logs_2016-2023/")
}