Renamed trace into path

This commit is contained in:
Nora Wickelmaier 2024-01-02 14:17:50 +01:00
parent 46a2ec6e79
commit ec45877229
6 changed files with 71 additions and 74 deletions

View File

@ -1,5 +1,5 @@
########################################################################### ###########################################################################
add_trace_items <- function(subdata) { add_path_items <- function(subdata) {
last_event <- subdata$event[1] last_event <- subdata$event[1]
items <- unique(subdata$item)[unique(subdata$item) != "glossar"] items <- unique(subdata$item)[unique(subdata$item) != "glossar"]
n <- 1 # count items for progress n <- 1 # count items for progress
@ -9,18 +9,18 @@ add_trace_items <- function(subdata) {
for (item in items) { for (item in items) {
cat("\n\nAdding trace variable for item", item, cat("\n\nAdding path variable for item", item,
paste0("(", n, "/", length(items), ")"), "\n") paste0("(", n, "/", length(items), ")"), "\n")
for (i in 1:nrow(subdata)) { for (i in 1:nrow(subdata)) {
if (last_event == "Show Info" & subdata$item[i] == item) { if (last_event == "Show Info" & subdata$item[i] == item) {
subdata$trace[i] <- i subdata$path[i] <- i
j <- i j <- i
} else if (last_event == "Show Front" & subdata$item[i] == item) { } else if (last_event == "Show Front" & subdata$item[i] == item) {
subdata$trace[i] <- j subdata$path[i] <- j
} else if (!(last_event %in% c("Show Info", "Show Front")) & } else if (!(last_event %in% c("Show Info", "Show Front")) &
subdata$item[i] == item) { subdata$item[i] == item) {
subdata$trace[i] <- j subdata$path[i] <- j
} }
if (i <= nrow(subdata)) { if (i <= nrow(subdata)) {
last_event <- subdata$event[i + 1] last_event <- subdata$event[i + 1]
@ -33,7 +33,7 @@ add_trace_items <- function(subdata) {
} }
########################################################################### ###########################################################################
add_trace_glossar <- function(subdata, xmlpath) { add_path_glossar <- function(subdata, xmlpath) {
pb <- utils::txtProgressBar(min = 0, max = nrow(subdata), initial = NA, pb <- utils::txtProgressBar(min = 0, max = nrow(subdata), initial = NA,
style = 3) style = 3)
@ -50,7 +50,7 @@ add_trace_glossar <- function(subdata, xmlpath) {
for (file in names(lut)) { for (file in names(lut)) {
cat("\n\nAdding trace variable for glossar entry", file, cat("\n\nAdding path variable for glossar entry", file,
paste0("(", m, "/", length(lut), ")"), "\n") paste0("(", m, "/", length(lut), ")"), "\n")
item_list <- unlist(lut[names(lut) == file]) item_list <- unlist(lut[names(lut) == file])
@ -72,7 +72,7 @@ add_trace_glossar <- function(subdata, xmlpath) {
if (subdata$item[i] == "glossar" & if (subdata$item[i] == "glossar" &
(current_item %in% item_list) & (current_item %in% item_list) &
subdata$popup[i] == file & (j - k == 0)) { subdata$popup[i] == file & (j - k == 0)) {
subdata[i, "trace"] <- subdata[j, "trace"] subdata[i, "path"] <- subdata[j, "path"]
subdata[i, "item"] <- current_item subdata[i, "item"] <- current_item
} }
utils::setTxtProgressBar(pb, i) utils::setTxtProgressBar(pb, i)
@ -82,22 +82,22 @@ add_trace_glossar <- function(subdata, xmlpath) {
# Exclude not matched glossar entries # Exclude not matched glossar entries
cat("\n\nINFORMATION: glossar entries that are not matched will be removed:", cat("\n\nINFORMATION: glossar entries that are not matched will be removed:",
sum(is.na(subdata[subdata$glossar == 1, "trace"])), "entries", sum(is.na(subdata[subdata$glossar == 1, "path"])), "entries",
fill = TRUE) fill = TRUE)
subset(subdata, !is.na(subdata$trace)) subset(subdata, !is.na(subdata$path))
} }
########################################################################### ###########################################################################
add_trace <- function(data, xmlpath, glossar) { add_path <- function(data, xmlpath, glossar) {
data$trace <- NA data$path <- NA
subdata1 <- data[data$event %in% c("Transform start", "Transform stop"), ] subdata1 <- data[data$event %in% c("Transform start", "Transform stop"), ]
subdata2 <- data[!data$event %in% c("Transform start", "Transform stop"), ] subdata2 <- data[!data$event %in% c("Transform start", "Transform stop"), ]
subdata2 <- add_trace_items(subdata2) subdata2 <- add_path_items(subdata2)
if (glossar) { if (glossar) {
subdata2 <- add_trace_glossar(subdata2, xmlpath) subdata2 <- add_path_glossar(subdata2, xmlpath)
} else { } else {
subdata2 <- subdata2[subdata2$glossar != 1, ] subdata2 <- subdata2[subdata2$glossar != 1, ]
} }
@ -109,13 +109,13 @@ add_trace <- function(data, xmlpath, glossar) {
########################################################################### ###########################################################################
# Add trace for moves # Add path for moves
add_trace_moves <- function(data) { add_path_moves <- function(data) {
pbapply::pboptions(style = 3, char = "=") pbapply::pboptions(style = 3, char = "=")
trace_max <- max(data$trace, na.rm = TRUE) path_max <- max(data$path, na.rm = TRUE)
#subdata_art <- split(data, ~ item) #subdata_art <- split(data, ~ item)
subdata_case <- split(data, ~ case) subdata_case <- split(data, ~ case)
@ -126,43 +126,43 @@ add_trace_moves <- function(data) {
subdata_list <- pbapply::pblapply(subdata_case, split, f = ~item) subdata_list <- pbapply::pblapply(subdata_case, split, f = ~item)
subdata_list <- unlist(subdata_list, recursive = FALSE) subdata_list <- unlist(subdata_list, recursive = FALSE)
cat("Adding trace...", "\n") cat("Adding path...", "\n")
subdata_trace <- pbapply::pblapply(subdata_list, subdata_path <- pbapply::pblapply(subdata_list,
function(x) { function(x) {
trace_max <<- trace_max + 1 path_max <<- path_max + 1
add_trace_subdata(x, max_trace = trace_max) add_path_subdata(x, max_path = path_max)
} }
) )
out <- dplyr::bind_rows(subdata_trace) out <- dplyr::bind_rows(subdata_path)
out <- out[order(out$fileId.start, out$date.start, out$timeMs.start), ] out <- out[order(out$fileId.start, out$date.start, out$timeMs.start), ]
rownames(out) <- NULL rownames(out) <- NULL
# Make trace a consecutive number # Make path a consecutive number
out$trace <- as.numeric(factor(out$trace, levels = unique(out$trace))) out$path <- as.numeric(factor(out$path, levels = unique(out$path)))
out out
} }
add_trace_subdata <- function(subdata, max_trace) { add_path_subdata <- function(subdata, max_path) {
if (nrow(subdata) != 0) { if (nrow(subdata) != 0) {
if (length(stats::na.omit(unique(subdata$trace))) == 1) { if (length(stats::na.omit(unique(subdata$path))) == 1) {
subdata[subdata$event == "move", "trace"] <- stats::na.omit(unique(subdata$trace)) subdata[subdata$event == "move", "path"] <- stats::na.omit(unique(subdata$path))
} else if (length(stats::na.omit(unique(subdata$trace))) > 1) { } else if (length(stats::na.omit(unique(subdata$path))) > 1) {
for (i in 1:nrow(subdata)) { for (i in 1:nrow(subdata)) {
if (subdata$event[i] == "move") { if (subdata$event[i] == "move") {
if (i == 1) { if (i == 1) {
subdata$trace[i] <- stats::na.omit(unique(subdata$trace))[1] subdata$path[i] <- stats::na.omit(unique(subdata$path))[1]
} else { } else {
subdata$trace[i] <- subdata$trace[i - 1] subdata$path[i] <- subdata$path[i - 1]
} }
} }
} }
} else if (all(is.na(subdata$trace))) { } else if (all(is.na(subdata$path))) {
for (i in 1:nrow(subdata)) { for (i in 1:nrow(subdata)) {
subdata$trace[i] <- max_trace subdata$path[i] <- max_path
} }
} }

View File

@ -9,20 +9,20 @@ close_events <- function(data, event = c("move", "flipCard", "openTopic", "openP
"move" = { "move" = {
actions <- c("Transform start", "Transform stop") actions <- c("Transform start", "Transform stop")
idvar <- c("folder", "eventId", "item", "glossar") idvar <- c("folder", "eventId", "item", "glossar")
drop <- c("popup", "topic", "trace", "event") drop <- c("popup", "topic", "path", "event")
ncol <- 18 ncol <- 18
}, },
"flipCard" = { "flipCard" = {
actions <- c("Show Info", "Show Front") actions <- c("Show Info", "Show Front")
idvar <- c("folder", "trace", "eventId", "item", "glossar") idvar <- c("folder", "path", "eventId", "item", "glossar")
drop <- c("popup", "topic", "event") drop <- c("popup", "topic", "event")
ncol <- 19 ncol <- 19
}, },
"openTopic" = { "openTopic" = {
actions <- c("Artwork/OpenCard", "Artwork/CloseCard") actions <- c("Artwork/OpenCard", "Artwork/CloseCard")
idvar <- c("folder", "eventId", "trace", "glossar", idvar <- c("folder", "eventId", "path", "glossar",
"item", "topic") "item", "topic")
drop <- c("popup", "event") drop <- c("popup", "event")
ncol <- 20 ncol <- 20
@ -30,7 +30,7 @@ close_events <- function(data, event = c("move", "flipCard", "openTopic", "openP
}, },
"openPopup" = { "openPopup" = {
actions <- c("ShowPopup", "HidePopup") actions <- c("ShowPopup", "HidePopup")
idvar <- c("folder", "eventId", "trace", "glossar", idvar <- c("folder", "eventId", "path", "glossar",
"item", "popup") "item", "popup")
drop <- c("topic", "event") drop <- c("topic", "event")
ncol <- 20 ncol <- 20
@ -72,22 +72,22 @@ close_events <- function(data, event = c("move", "flipCard", "openTopic", "openP
subdata <- dplyr::bind_rows(subdata_list) subdata <- dplyr::bind_rows(subdata_list)
} }
# Remove eventIds associated with more than one trace, usually logging # Remove eventIds associated with more than one path, usually logging
# errors that cannot be resolved for openTopic or openPopup # errors that cannot be resolved for openTopic or openPopup
if (event %in% c("openTopic", "openPopup")) { if (event %in% c("openTopic", "openPopup")) {
# corrupt_eventIds <- names(which(rowSums(stats::xtabs( ~ eventId + trace, # corrupt_eventIds <- names(which(rowSums(stats::xtabs( ~ eventId + path,
# subdata) != 0) != 1)) # subdata) != 0) != 1))
# --> does not run on complete data set # --> does not run on complete data set
subdata_eid <- split(subdata, ~ eventId) subdata_eid <- split(subdata, ~ eventId)
tmp <- sapply(subdata_eid, function(x) length(stats::xtabs( ~ trace, x))) tmp <- sapply(subdata_eid, function(x) length(stats::xtabs( ~ path, x)))
corrupt_eventIds <- names(tmp[tmp > 1]) corrupt_eventIds <- names(tmp[tmp > 1])
subdata <- subdata[!subdata$eventId %in% corrupt_eventIds, ] subdata <- subdata[!subdata$eventId %in% corrupt_eventIds, ]
} }
# if (event == "flipCard") { # if (event == "flipCard") {
# subdata$eventId <- subdata$trace # subdata$eventId <- subdata$path
# } # }
subdata_split <- split(subdata, ~ fileId) subdata_split <- split(subdata, ~ fileId)
@ -156,7 +156,7 @@ close_events <- function(data, event = c("move", "flipCard", "openTopic", "openP
data_wide$date.start, data_wide$date.start,
data_wide$timeMs.start), data_wide$timeMs.start),
c("fileId.start", "fileId.stop", "folder", "event", c("fileId.start", "fileId.stop", "folder", "event",
"item", "trace", "glossar", "date.start", "item", "path", "glossar", "date.start",
"date.stop", "timeMs.start", "timeMs.stop", "date.stop", "timeMs.start", "timeMs.stop",
"duration", "topic", "popup", "x.start", "duration", "topic", "popup", "x.start",
"y.start", "x.stop", "y.stop", "distance", "y.start", "x.stop", "y.stop", "distance",
@ -221,7 +221,7 @@ add_variables <- function(data_split_wide, ncol,
}, },
"flipCard" = { "flipCard" = {
data_split_wide <- data_split_wide[, c("folder", "item", data_split_wide <- data_split_wide[, c("folder", "item",
"glossar", "trace", "glossar", "path",
"eventId", "eventId",
"fileId.start", "fileId.start",
"date.start", "date.start",
@ -238,7 +238,7 @@ add_variables <- function(data_split_wide, ncol,
"openTopic" = { "openTopic" = {
data_split_wide <- data_split_wide[, c("folder", "item", data_split_wide <- data_split_wide[, c("folder", "item",
"topic", "topic",
"glossar", "trace", "glossar", "path",
"eventId", "eventId",
"fileId.start", "fileId.start",
"date.start", "date.start",
@ -256,7 +256,7 @@ add_variables <- function(data_split_wide, ncol,
"openPopup" = { "openPopup" = {
data_split_wide <- data_split_wide[, c("folder", "item", data_split_wide <- data_split_wide[, c("folder", "item",
"popup", "glossar", "popup", "glossar",
"trace", "eventId", "path", "eventId",
"fileId.start", "fileId.start",
"date.start", "date.start",
"timeMs.start", "timeMs.start",

View File

@ -36,9 +36,9 @@ create_eventlogs <- function(data, xmlpath = NULL, case_cutoff = 20,
"Show Application"))) "Show Application")))
dat$glossar <- ifelse(dat$item == "glossar", 1, 0) dat$glossar <- ifelse(dat$item == "glossar", 1, 0)
# Add trace variable ##################################################### # Add path variable #####################################################
cat("\n########## Adding trace variable... ##########", "\n") cat("\n########## Adding path variable... ##########", "\n")
dat1 <- add_trace(dat, xmlpath = xmlpath, glossar = glossar) dat1 <- add_path(dat, xmlpath = xmlpath, glossar = glossar)
# Close events # Close events
cat("\n\n########## Closing events... ##########", "\n") cat("\n\n########## Closing events... ##########", "\n")
@ -57,20 +57,17 @@ create_eventlogs <- function(data, xmlpath = NULL, case_cutoff = 20,
# Add case variable ###################################################### # Add case variable ######################################################
cat("\n########## Adding case and eventId variables... ##########", "\n\n") cat("\n########## Adding case and eventId variables... ##########", "\n\n")
dat3 <- add_case(dat2, cutoff = case_cutoff) dat3 <- add_case(dat2, cutoff = case_cutoff)
dat3 <- dat3[, c("fileId.start", "fileId.stop", "date.start",
"date.stop", "folder", "case", "path", "glossar",
"event", "item", "timeMs.start", "timeMs.stop",
"duration", "topic", "popup", "x.start", "y.start",
"x.stop", "y.stop", "distance", "scale.start",
"scale.stop", "scaleSize", "rotation.start",
"rotation.stop", "rotationDegree")]
# Add event ID ########################################################### # Add path for move events ##############################################
dat3$eventId <- seq_len(nrow(dat3)) cat("\n\n########## Adding path variable for move events... ##########", "\n")
dat3 <- dat3[, c("folder", "eventId", "case", "trace", "glossar", dat4 <- add_path_moves(dat3)
"event", "item", "fileId.start", "fileId.stop",
"date.start", "date.stop", "timeMs.start",
"timeMs.stop", "duration", "topic", "popup",
"x.start", "y.start", "x.stop", "y.stop", "distance",
"scale.start", "scale.stop", "scaleSize",
"rotation.start", "rotation.stop", "rotationDegree")]
# Add trace for move events ##############################################
cat("\n\n########## Adding trace variable for move events... ##########", "\n")
dat4 <- add_trace_moves(dat3)
# Fix durations that span more than one log file ######################### # Fix durations that span more than one log file #########################
@ -94,8 +91,8 @@ create_eventlogs <- function(data, xmlpath = NULL, case_cutoff = 20,
dat4$fIdNum.stop <- NULL dat4$fIdNum.stop <- NULL
dat4$fIdDiff <- NULL dat4$fIdDiff <- NULL
# Remove fragmented traces ############################################### # Remove fragmented paths ###############################################
tab <- stats::xtabs( ~ trace + event, dat4) tab <- stats::xtabs( ~ path + event, dat4)
fragments <- NULL fragments <- NULL
@ -108,11 +105,11 @@ create_eventlogs <- function(data, xmlpath = NULL, case_cutoff = 20,
fragments <- c(fragments, rownames(tab)[i]) fragments <- c(fragments, rownames(tab)[i])
} }
} }
dat5 <- dat4[!dat4$trace %in% fragments, ] dat5 <- dat4[!dat4$path %in% fragments, ]
if (glossar) { if (glossar) {
# Check for wrong order of events: flipCard -> openPopup -> openTopic # Check for wrong order of events: flipCard -> openPopup -> openTopic
dat5_split <- split(dat5[dat5$event != "move", ], ~ trace) dat5_split <- split(dat5[dat5$event != "move", ], ~ path)
event_list <- lapply(dat5_split, function(x) unique(x$event)) event_list <- lapply(dat5_split, function(x) unique(x$event))
ids <- sapply(event_list, length) == 3 ids <- sapply(event_list, length) == 3
@ -120,9 +117,9 @@ create_eventlogs <- function(data, xmlpath = NULL, case_cutoff = 20,
names(event_dat) <- c("flipCard", "openTopic", "openPopup") names(event_dat) <- c("flipCard", "openTopic", "openPopup")
frag_ids <- which(event_dat$openTopic == "openPopup") frag_ids <- which(event_dat$openTopic == "openPopup")
dat6 <- dat5[dat5$trace %in% rownames(event_dat)[frag_ids], ] dat6 <- dat5[dat5$path %in% rownames(event_dat)[frag_ids], ]
dat6b <- dat6[!dat6$glossar == 1, ] dat6b <- dat6[!dat6$glossar == 1, ]
dat7 <- rbind(dat5[!dat5$trace %in% rownames(event_dat)[frag_ids], ], dat7 <- rbind(dat5[!dat5$path %in% rownames(event_dat)[frag_ids], ],
dat6b) dat6b)
} else { } else {
dat7 <- dat5 dat7 <- dat5

View File

@ -17,7 +17,7 @@ create_eventlogs(
\item{data}{Data frame of raw log files created with \code{parse_logfiles()}. \item{data}{Data frame of raw log files created with \code{parse_logfiles()}.
See \code{?parse_logfiles} for more details.} See \code{?parse_logfiles} for more details.}
\item{xmlpath}{Path to folder where XML definitions of artworks live.} \item{xmlpath}{Path to folder where XML definitions of items live.}
\item{case_cutoff}{Number in seconds how long time interval between \item{case_cutoff}{Number in seconds how long time interval between
different cases should be.} different cases should be.}

View File

@ -19,7 +19,7 @@ correspond to the folder names which contain the XML files.}
Data frame. Data frame.
} }
\description{ \description{
Information about artowrks are extracted from XML files and written to a Information about artworks are extracted from XML files and written to a
data frame that contains \code{artist}, \code{title}, \code{misc}, and \code{description}. data frame that contains \code{artist}, \code{title}, \code{misc}, and \code{description}.
} }
\examples{ \examples{

View File

@ -2,25 +2,25 @@
% Please edit documentation in R/extract_topics.R % Please edit documentation in R/extract_topics.R
\name{extract_topics} \name{extract_topics}
\alias{extract_topics} \alias{extract_topics}
\title{Creating data frame with artworks and topics} \title{Creating data frame with items and topics}
\usage{ \usage{
extract_topics(artworks, xmlfiles, xmlpath) extract_topics(items, xmlfiles, xmlpath)
} }
\arguments{ \arguments{
\item{artworks}{A character vector with names of the artworks. Needs to \item{items}{A character vector with names of the items. Needs to
correspond to the folder names which contain the XML files.} correspond to the folder names which contain the XML files.}
\item{xmlfiles}{Vector of names of index files, often something like \item{xmlfiles}{Vector of names of index files, often something like
\verb{<artwork>.xml}. Need to be in the same order as artworks!} \verb{<item>.xml}. Need to be in the same order as items!}
\item{xmlpath}{Path to folder where XML definitions of artworks live.} \item{xmlpath}{Path to folder where XML definitions of items live.}
} }
\value{ \value{
Data frame. Data frame.
} }
\description{ \description{
Topics are extracted from XML files and written to a data frame that Topics are extracted from XML files and written to a data frame that
shows which artworks belong to which topics. shows which items belong to which topics.
} }
\examples{ \examples{
# tbd # tbd