Moved most stuff into package folder mtt; updated README so it works with new code
This commit is contained in:
parent
55adcf03d7
commit
3dd13a6c6e
37
README.Rmd
37
README.Rmd
@ -8,6 +8,11 @@ output:
|
|||||||
toc: true
|
toc: true
|
||||||
---
|
---
|
||||||
|
|
||||||
|
```{r, include = FALSE}
|
||||||
|
# setwd("C:/Users/nwickelmaier/Nextcloud/Documents/MDS/2023ss/60100_master_thesis")
|
||||||
|
devtools::load_all("../../../software/mtt")
|
||||||
|
```
|
||||||
|
|
||||||
# Log data from the Multi-Touch Table at the HAUM
|
# Log data from the Multi-Touch Table at the HAUM
|
||||||
|
|
||||||
The Multi Touch Table at the Herzog-Anton-Ulrich-Museum (HAUM) in
|
The Multi Touch Table at the Herzog-Anton-Ulrich-Museum (HAUM) in
|
||||||
@ -117,11 +122,6 @@ files have been affected.
|
|||||||
|
|
||||||
# Problems and how I handled them
|
# Problems and how I handled them
|
||||||
|
|
||||||
```{r, include = FALSE}
|
|
||||||
# setwd("C:/Users/nwickelmaier/Nextcloud/Documents/MDS/2023ss/60100_master_thesis")
|
|
||||||
source("code/functions.R")
|
|
||||||
```
|
|
||||||
|
|
||||||
This lists some problems with the log data that required decisions. These
|
This lists some problems with the log data that required decisions. These
|
||||||
decisions influence the outcome and maybe even the data quality. Hence, I
|
decisions influence the outcome and maybe even the data quality. Hence, I
|
||||||
tried to document how I handled these problems and explain the decisions I
|
tried to document how I handled these problems and explain the decisions I
|
||||||
@ -136,7 +136,7 @@ continuous within one log file but not over several log files.
|
|||||||
|
|
||||||
```{r}
|
```{r}
|
||||||
# Read data
|
# Read data
|
||||||
dat0 <- read.table("data/rawdata_logfiles_small.csv", sep = ";",
|
dat0 <- read.table("data/haum/rawdata_logfiles_small.csv", sep = ";",
|
||||||
header = TRUE)
|
header = TRUE)
|
||||||
dat0$date <- as.POSIXct(dat0$date)
|
dat0$date <- as.POSIXct(dat0$date)
|
||||||
dat0$glossar <- ifelse(dat0$artwork == "glossar", 1, 0)
|
dat0$glossar <- ifelse(dat0$artwork == "glossar", 1, 0)
|
||||||
@ -146,26 +146,16 @@ dat <- subset(dat0, !(dat0$event %in% c("Start Application",
|
|||||||
"Show Application")))
|
"Show Application")))
|
||||||
|
|
||||||
# Add trace variable
|
# Add trace variable
|
||||||
dat1 <- add_trace(dat, glossar_dict = "data/glossar_dict.RData")
|
dat1 <- add_trace(dat, glossar_dict = "data/haum/glossar_dict.RData")
|
||||||
|
|
||||||
# Close events
|
# Close events
|
||||||
dat2 <- rbind(close_events(dat1, "move"),
|
dat2 <- rbind(close_events(dat1, "move"),
|
||||||
close_events(dat1, "flipCard"),
|
close_events(dat1, "flipCard"),
|
||||||
close_events(dat1, "openTopic"),
|
close_events(dat1, "openTopic"),
|
||||||
close_events(dat1, "openPopup"))
|
close_events(dat1, "openPopup"))
|
||||||
dat2 <- dat2[order(dat2$date.start, dat2$fileId.start), ]
|
dat2 <- dat2[order(dat2$date.start, dat2$fileId), ]
|
||||||
|
|
||||||
head(dat2[which(dat2$duration < 0),
|
|
||||||
c("fileId.start", "fileId.stop", "event", "artwork", "duration")], 20)
|
|
||||||
|
|
||||||
head(dat2[which(dat2$fileId.start != dat2$fileId.stop),
|
|
||||||
c("fileId.start", "fileId.stop", "event", "artwork", "duration")], 20)
|
|
||||||
|
|
||||||
plot(timeMs ~ as.factor(fileId), dat[1:5000,], xlab = "fileId")
|
plot(timeMs ~ as.factor(fileId), dat[1:5000,], xlab = "fileId")
|
||||||
|
|
||||||
# Remove durations when event spans more than one log file, since they are
|
|
||||||
# not interpretable
|
|
||||||
#dat2[which(dat2$fileId.start != dat2$fileId.stop), "duration"] <- NA
|
|
||||||
```
|
```
|
||||||
|
|
||||||
The boxplot shows that we have a continuous range of values within one log
|
The boxplot shows that we have a continuous range of values within one log
|
||||||
@ -183,6 +173,9 @@ exactly fixed. Unfortunately, only three `move` events were fixed, since it
|
|||||||
only fixed irregularities *within* one log file. See below for more
|
only fixed irregularities *within* one log file. See below for more
|
||||||
details.
|
details.
|
||||||
|
|
||||||
|
UPDATE: By now I remove all events that span more than one log file. This
|
||||||
|
lets me improve speed considerably.
|
||||||
|
|
||||||
## Left padding of file IDs
|
## Left padding of file IDs
|
||||||
|
|
||||||
The file names of the raw log files are automatically generated and contain
|
The file names of the raw log files are automatically generated and contain
|
||||||
@ -196,7 +189,7 @@ will sort these files in the order shown below. In order to preprocess the
|
|||||||
data and close events that belong together, the data need to be sorted by
|
data and close events that belong together, the data need to be sorted by
|
||||||
events and artworks repeatedly. In order to get them back in the correct
|
events and artworks repeatedly. In order to get them back in the correct
|
||||||
time order, it is necessary to order them based on three variables:
|
time order, it is necessary to order them based on three variables:
|
||||||
`fileId.start`, `date.start` and `timeMs`. The file IDs therefore need to
|
`fileId`, `date.start` and `timeMs`. The file IDs therefore need to
|
||||||
sort in the correct order (again see below for example). I zero left padded
|
sort in the correct order (again see below for example). I zero left padded
|
||||||
the log file names within the data frame using it as an identifier. These
|
the log file names within the data frame using it as an identifier. These
|
||||||
"file names" do not correspond exactly to the original raw log file names.
|
"file names" do not correspond exactly to the original raw log file names.
|
||||||
@ -406,7 +399,7 @@ assign topics and file names to the according pop-ups. This needs to be
|
|||||||
cross checked with the programming, but seems the most plausible approach
|
cross checked with the programming, but seems the most plausible approach
|
||||||
with my current knowledge.
|
with my current knowledge.
|
||||||
|
|
||||||
## Extracting topics from `index.xml` vs. `<artwork_number>.xml
|
## Extracting topics from `index.xml` vs. `<artwork_number>.xml`
|
||||||
|
|
||||||
When I extract the topics from `index.html` I get different topics, than
|
When I extract the topics from `index.html` I get different topics, than
|
||||||
when I get them from `<artwork>.html`. At first glance, it looks like using
|
when I get them from `<artwork>.html`. At first glance, it looks like using
|
||||||
@ -414,7 +407,7 @@ when I get them from `<artwork>.html`. At first glance, it looks like using
|
|||||||
|
|
||||||
```{r}
|
```{r}
|
||||||
artworks <- unique(dat2$artwork)
|
artworks <- unique(dat2$artwork)
|
||||||
path <- "data/ContentEyevisit/eyevisit_cards_light/"
|
path <- "data/haum/ContentEyevisit/eyevisit_cards_light/"
|
||||||
topics <- extract_topics(artworks, "index.xml", path)
|
topics <- extract_topics(artworks, "index.xml", path)
|
||||||
topics2 <- extract_topics(artworks, paste0(artworks, ".xml"), path)
|
topics2 <- extract_topics(artworks, paste0(artworks, ".xml"), path)
|
||||||
|
|
||||||
@ -434,7 +427,7 @@ sudden there were 72 instead of 70 artworks. It seems like these two
|
|||||||
artworks appear on October 21, 2022.
|
artworks appear on October 21, 2022.
|
||||||
|
|
||||||
```{r}
|
```{r}
|
||||||
dat0 <- read.table("data/rawdata_logfiles.csv", sep = ";", header = TRUE)
|
dat0 <- read.table("data/haum/rawdata_logfiles.csv", sep = ";", header = TRUE)
|
||||||
dat0$date <- as.POSIXct(dat0$date)
|
dat0$date <- as.POSIXct(dat0$date)
|
||||||
dat0$glossar <- ifelse(dat0$artwork == "glossar", 1, 0)
|
dat0$glossar <- ifelse(dat0$artwork == "glossar", 1, 0)
|
||||||
|
|
||||||
|
@ -1,125 +0,0 @@
|
|||||||
# setwd("C:/Users/nwickelmaier/Nextcloud/Documents/MDS/2023ss/60100_master_thesis/code")
|
|
||||||
|
|
||||||
###### HELPER ######
|
|
||||||
|
|
||||||
# Need to left pad file names. If I do not do this, the sorting of the
|
|
||||||
# timestamps will be off and I get negative durations later on since the
|
|
||||||
# wrong events get closed.
|
|
||||||
|
|
||||||
|
|
||||||
leftpad_fnames <- function(x) {
|
|
||||||
|
|
||||||
z <- gsub(paste0(dirpaths, "/"), "\\1", x)
|
|
||||||
ys <- strsplit(z, "_")
|
|
||||||
|
|
||||||
res <- NULL
|
|
||||||
|
|
||||||
for (y in ys) {
|
|
||||||
y2 <- unlist(strsplit(y[3], "-"))
|
|
||||||
e1 <- y[1]
|
|
||||||
e2 <- sprintf("%02d", as.numeric(y[2]))
|
|
||||||
e3 <- sprintf("%02d", as.numeric(y2[1]))
|
|
||||||
e4 <- sprintf("%02d", as.numeric(y2[2]))
|
|
||||||
e5 <- sprintf("%02d", as.numeric(y[4]))
|
|
||||||
e6 <- sprintf("%02d", as.numeric(gsub(".log", "", y[5])))
|
|
||||||
e6 <- sprintf("%02d", as.numeric(gsub(".log", "", y[5])))
|
|
||||||
|
|
||||||
res <- c(res,
|
|
||||||
paste0(e1, "_", e2, "_", e3, "-", e4, "_", e5, "_", e6, ".log"))
|
|
||||||
}
|
|
||||||
res
|
|
||||||
}
|
|
||||||
|
|
||||||
##### CONTENT ######
|
|
||||||
|
|
||||||
# Choose which folders with raw log files should be included
|
|
||||||
|
|
||||||
folders <- "all"
|
|
||||||
#folders <- "_2016b"
|
|
||||||
|
|
||||||
dirpaths <- paste0("../data/haum_logs_2016-2023/", folders)
|
|
||||||
|
|
||||||
fnames <- dir(dirpaths, pattern = "*.log", full.names = TRUE)
|
|
||||||
length(fnames)
|
|
||||||
head(fnames)
|
|
||||||
|
|
||||||
logs <- lapply(fnames, readLines)
|
|
||||||
nlog <- sapply(logs, length)
|
|
||||||
dat <- data.frame(fileId = rep(leftpad_fnames(fnames), nlog),
|
|
||||||
logs = unlist(logs))
|
|
||||||
head(dat$logs)
|
|
||||||
|
|
||||||
# Remove corrupted lines
|
|
||||||
|
|
||||||
|
|
||||||
# corrupt lines are "" and need to be removed
|
|
||||||
d1 <- dim(dat)[1]
|
|
||||||
dat <- subset(dat, dat$logs != "")
|
|
||||||
d2 <- dim(dat)[1]
|
|
||||||
|
|
||||||
# TODO: Catch this in a function and give back a meaningful warning
|
|
||||||
# The files contain `r d1-d2` corrupt lines that were removed from the
|
|
||||||
# data.
|
|
||||||
|
|
||||||
# Extract relevant infos
|
|
||||||
|
|
||||||
date <- sapply(dat$logs, gsub,
|
|
||||||
pattern = "^\\[(.*)\\], \\[.*$",
|
|
||||||
replacement = "\\1",
|
|
||||||
USE.NAMES = FALSE)
|
|
||||||
|
|
||||||
timestamp <- sapply(dat$logs, gsub,
|
|
||||||
pattern = "^\\[.*\\], \\[(.*)\\].*$",
|
|
||||||
replacement = "\\1",
|
|
||||||
USE.NAMES = FALSE)
|
|
||||||
|
|
||||||
action <- sapply(dat$logs, gsub,
|
|
||||||
pattern = "^.*EyeVisit, (.*):*.*$",
|
|
||||||
replacement = "\\1",
|
|
||||||
USE.NAMES = FALSE)
|
|
||||||
|
|
||||||
events <- sapply(strsplit(action, ":"), function(x) x[1])
|
|
||||||
|
|
||||||
topics <- sapply(strsplit(action, ":"), function(x) x[2])
|
|
||||||
|
|
||||||
moves <- apply(do.call(rbind,
|
|
||||||
strsplit(sapply(strsplit(action, ":"), function(x) x[3]),
|
|
||||||
",")),
|
|
||||||
2, as.numeric)
|
|
||||||
# ATTENTION: as.numeric() forces NAs for "OpenCard" and "CloseCard"
|
|
||||||
|
|
||||||
card_action <- trimws(sapply(strsplit(action, ":"),
|
|
||||||
function(x) x[3])[grep("Artwork", events)])
|
|
||||||
|
|
||||||
card <- as.numeric(sapply(strsplit(action, ":"), function(x) x[4]))
|
|
||||||
|
|
||||||
events[grep("Artwork", events)] <- paste("Artwork", card_action, sep = "/")
|
|
||||||
|
|
||||||
ts_elements <- strsplit(timestamp, ":")
|
|
||||||
time_ms <- as.numeric(sapply(ts_elements, function(x) x[4])) +
|
|
||||||
as.numeric(sapply(ts_elements, function(x) x[3])) * 1000 +
|
|
||||||
as.numeric(sapply(ts_elements, function(x) x[2])) * 1000 * 60
|
|
||||||
|
|
||||||
dat$date <- lubridate::parse_date_time(date, "bdyHMSOp")
|
|
||||||
dat$timeMs <- time_ms
|
|
||||||
dat$event <- events
|
|
||||||
dat$artwork <- trimws(sapply(strsplit(topics, "/"), function(x) x[1]))
|
|
||||||
dat$popup <- sapply(strsplit(topics, "/"), function(x) x[2])
|
|
||||||
dat$topicNumber <- card
|
|
||||||
dat$x <- moves[,1]
|
|
||||||
dat$y <- moves[,2]
|
|
||||||
dat$scale <- moves[,3]
|
|
||||||
dat$rotation <- moves[,4]
|
|
||||||
|
|
||||||
dat$logs <- NULL
|
|
||||||
# remove original log files from data so file becomes smaller
|
|
||||||
|
|
||||||
# sort by fileId, since reading in by file names does not make sense
|
|
||||||
# because of missing left zero padding
|
|
||||||
dat <- dat[order(dat$fileId, dat$date, dat$timeMs), ]
|
|
||||||
|
|
||||||
# Export data
|
|
||||||
|
|
||||||
write.table(dat, "../data/rawdata_logfiles.csv",
|
|
||||||
sep = ";", quote = FALSE, row.names = FALSE)
|
|
||||||
|
|
@ -1,37 +0,0 @@
|
|||||||
# setwd("C:/Users/nwickelmaier/Nextcloud/Documents/MDS/2023ss/60100_master_thesis/data/ContentEyevisit/eyevisit_cards_light")
|
|
||||||
|
|
||||||
dat0 <- read.table("../../rawdata_logfiles.csv", sep = ";",
|
|
||||||
header = TRUE)
|
|
||||||
# artwork names
|
|
||||||
artworks <- unique(na.omit(dat0$artwork))[unique(na.omit(dat0$artwork)) != "glossar"]
|
|
||||||
|
|
||||||
dat <- subset(dat0, dat0$artwork == "glossar")
|
|
||||||
|
|
||||||
glossar_files <- unique(dat$popup)
|
|
||||||
|
|
||||||
x <- NULL
|
|
||||||
|
|
||||||
for (glossar_file in glossar_files) {
|
|
||||||
for (artwork in artworks) {
|
|
||||||
fnames <- dir(pattern = paste0(artwork, "_"), path = artwork)
|
|
||||||
for (fname in fnames) {
|
|
||||||
lines <- readLines(paste0(artwork, "/", fname))
|
|
||||||
if (any(grepl(glossar_file, lines))) {
|
|
||||||
x <- rbind(x, data.frame(glossar_file, artwork))
|
|
||||||
break
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
head(x, 20)
|
|
||||||
|
|
||||||
glossar_dict <- as.data.frame(tapply(x$artwork, x$glossar_file, FUN = c))
|
|
||||||
names(glossar_dict) <- "artwork"
|
|
||||||
glossar_dict$glossar_file <- rownames(glossar_dict)
|
|
||||||
rownames(glossar_dict) <- NULL
|
|
||||||
glossar_dict <- glossar_dict[, c("glossar_file", "artwork")]
|
|
||||||
|
|
||||||
save(glossar_dict, file = "../../glossar_dict.RData")
|
|
||||||
# TODO: Save in interoperable format
|
|
||||||
|
|
@ -1,6 +1,9 @@
|
|||||||
|
# TODO: This script is obsolete and needs to be updated!
|
||||||
|
|
||||||
# setwd("C:/Users/nwickelmaier/Nextcloud/Documents/MDS/2023ss/60100_master_thesis/code")
|
# setwd("C:/Users/nwickelmaier/Nextcloud/Documents/MDS/2023ss/60100_master_thesis/code")
|
||||||
|
|
||||||
source("functions.R")
|
#source("functions.R")
|
||||||
|
devtools::load_all("../../../../software/mtt")
|
||||||
|
|
||||||
small <- TRUE
|
small <- TRUE
|
||||||
|
|
||||||
@ -10,10 +13,10 @@ now <- Sys.time()
|
|||||||
cat("########## Reading in data... ##########", "\n")
|
cat("########## Reading in data... ##########", "\n")
|
||||||
|
|
||||||
if (small) {
|
if (small) {
|
||||||
dat0 <- read.table("../data/rawdata_logfiles_small.csv", sep = ";",
|
dat0 <- read.table("../data/haum/rawdata_logfiles_small.csv", sep = ";",
|
||||||
header = TRUE)
|
header = TRUE)
|
||||||
} else {
|
} else {
|
||||||
dat0 <- read.table("../data/rawdata_logfiles.csv", sep = ";",
|
dat0 <- read.table("../data/haum/rawdata_logfiles.csv", sep = ";",
|
||||||
header = TRUE)
|
header = TRUE)
|
||||||
}
|
}
|
||||||
dat0$date <- as.POSIXct(dat0$date)
|
dat0$date <- as.POSIXct(dat0$date)
|
||||||
@ -54,7 +57,6 @@ dat2 <- dat2[order(dat2$date.start, dat2$fileId.start), ]
|
|||||||
# Remove all events that do not have a `date.start`
|
# Remove all events that do not have a `date.start`
|
||||||
dat2 <- dat2[!is.na(dat2$date.start), ]
|
dat2 <- dat2[!is.na(dat2$date.start), ]
|
||||||
rownames(dat2) <- NULL
|
rownames(dat2) <- NULL
|
||||||
# TODO: Throw warning about this
|
|
||||||
|
|
||||||
save(dat2, file = paste("tmp/dat2", ifelse(small, "small_", "full_"),
|
save(dat2, file = paste("tmp/dat2", ifelse(small, "small_", "full_"),
|
||||||
format(now, "%Y-%m-%d_%H-%M-%S"), ".RData"))
|
format(now, "%Y-%m-%d_%H-%M-%S"), ".RData"))
|
||||||
@ -90,7 +92,7 @@ artworks <- unique(dat4$artwork)
|
|||||||
# remove artworks without XML information
|
# remove artworks without XML information
|
||||||
artworks <- artworks[!artworks %in% c("504", "505")]
|
artworks <- artworks[!artworks %in% c("504", "505")]
|
||||||
topics <- extract_topics(artworks, pattern = paste0(artworks, ".xml"),
|
topics <- extract_topics(artworks, pattern = paste0(artworks, ".xml"),
|
||||||
path = "../data/ContentEyevisit/eyevisit_cards_light/")
|
path = "../data/haum/ContentEyevisit/eyevisit_cards_light/")
|
||||||
|
|
||||||
dat5 <- add_topic(dat4, topics = topics)
|
dat5 <- add_topic(dat4, topics = topics)
|
||||||
|
|
||||||
@ -101,6 +103,6 @@ save(dat5, file = paste("tmp/dat5", ifelse(small, "small_", "full_"),
|
|||||||
|
|
||||||
# Export data ############################################################
|
# Export data ############################################################
|
||||||
cat("########## Exporting data frame with event logs... ##########", "\n")
|
cat("########## Exporting data frame with event logs... ##########", "\n")
|
||||||
write.table(dat5, "../data/event_logfiles.csv", sep = ";",
|
write.table(dat5, "../data/haum/event_logfiles.csv", sep = ";",
|
||||||
row.names = FALSE)
|
row.names = FALSE)
|
||||||
|
|
||||||
|
@ -16,7 +16,7 @@
|
|||||||
|
|
||||||
#' # Read data
|
#' # Read data
|
||||||
|
|
||||||
dat <- read.table("../data/event_logfiles.csv", sep = ";", header = TRUE)
|
dat <- read.table("../data/haum/event_logfiles.csv", sep = ";", header = TRUE)
|
||||||
dat$date.start <- as.POSIXct(dat$date.start)
|
dat$date.start <- as.POSIXct(dat$date.start)
|
||||||
dat$date.stop <- as.POSIXct(dat$date.stop)
|
dat$date.stop <- as.POSIXct(dat$date.stop)
|
||||||
|
|
||||||
|
@ -1,16 +0,0 @@
|
|||||||
path <- "C:/Users/nwickelmaier/Nextcloud/Documents/MDS/2023ss/60100_master_thesis/data/ContentEyevisit/eyevisit_cards_light"
|
|
||||||
|
|
||||||
setwd(path)
|
|
||||||
|
|
||||||
# artwork names
|
|
||||||
dat0 <- read.table("../../event_logfiles.csv", sep = ";", header = TRUE)
|
|
||||||
dat0$artwork <- sprintf("%03d", dat0$artwork)
|
|
||||||
artworks <- sort(unique(dat0$artwork))
|
|
||||||
|
|
||||||
# extract topics
|
|
||||||
topics <- extract_topics(artworks, paste0(artworks, ".xml"), path)
|
|
||||||
|
|
||||||
write.table(topics, file = "../../topics.csv", sep = ";", row.names = FALSE)
|
|
||||||
|
|
||||||
# TODO: Keep this file?
|
|
||||||
|
|
458
code/functions.R
458
code/functions.R
@ -1,458 +0,0 @@
|
|||||||
###########################################################################
|
|
||||||
|
|
||||||
# Add trace variable
|
|
||||||
|
|
||||||
add_trace <- function(data, glossar_dict = "../data/glossar_dict.RData") {
|
|
||||||
|
|
||||||
data$trace <- NA
|
|
||||||
subdata1 <- data[data$event %in% c("Transform start", "Transform stop"), ]
|
|
||||||
subdata2 <- data[!data$event %in% c("Transform start", "Transform stop"), ]
|
|
||||||
|
|
||||||
last_event <- subdata2$event[1]
|
|
||||||
artworks <- unique(subdata2$artwork)[unique(subdata2$artwork) != "glossar"]
|
|
||||||
n <- 1 # count artworks for progress
|
|
||||||
|
|
||||||
pb <- txtProgressBar(min = 0, max = nrow(subdata2), initial = NA,
|
|
||||||
style = 3)
|
|
||||||
|
|
||||||
for (artwork in artworks) {
|
|
||||||
|
|
||||||
cat("\n\nAdding trace variable for artwork", artwork,
|
|
||||||
paste0("(", n, "/", length(artworks), ")"), "\n")
|
|
||||||
|
|
||||||
for (i in 1:nrow(subdata2)) {
|
|
||||||
|
|
||||||
if (last_event == "Show Info" & subdata2$artwork[i] == artwork) {
|
|
||||||
subdata2$trace[i] <- i
|
|
||||||
j <- i
|
|
||||||
|
|
||||||
} else if (last_event == "Show Front" & subdata2$artwork[i] == artwork) {
|
|
||||||
subdata2$trace[i] <- j
|
|
||||||
|
|
||||||
} else if (!(last_event %in% c("Show Info", "Show Front")) &
|
|
||||||
subdata2$artwork[i] == artwork) {
|
|
||||||
subdata2$trace[i] <- j
|
|
||||||
}
|
|
||||||
|
|
||||||
if (i <= nrow(subdata2)) {
|
|
||||||
last_event <- subdata2$event[i + 1]
|
|
||||||
}
|
|
||||||
setTxtProgressBar(pb, i)
|
|
||||||
}
|
|
||||||
n <- n + 1
|
|
||||||
}
|
|
||||||
|
|
||||||
# Fix glossar entries (find corresponding artworks and fill in trace)
|
|
||||||
glossar_files <- unique(subdata2[subdata2$artwork == "glossar", "popup"])
|
|
||||||
|
|
||||||
# load lookup table for artworks and glossar files
|
|
||||||
load(glossar_dict)
|
|
||||||
lut <- glossar_dict[glossar_dict$glossar_file %in% glossar_files, ]
|
|
||||||
|
|
||||||
inside <- glossar_files[glossar_files %in%
|
|
||||||
lut[sapply(lut$artwork, length) == 1,
|
|
||||||
"glossar_file"]]
|
|
||||||
single_art <- unlist(lut[lut$glossar_file %in% inside, "artwork"])
|
|
||||||
|
|
||||||
m <- 1
|
|
||||||
|
|
||||||
for (file in lut$glossar_file) {
|
|
||||||
|
|
||||||
cat("\n\nAdding trace variable for glossar entry", file,
|
|
||||||
paste0("(", m, "/", length(lut$glossar_file), ")"), "\n")
|
|
||||||
|
|
||||||
artwork_list <- unlist(lut[lut$glossar_file == file, "artwork"])
|
|
||||||
|
|
||||||
for (i in seq_len(nrow(subdata2))) {
|
|
||||||
|
|
||||||
if (subdata2$event[i] == "Show Info" |
|
|
||||||
(subdata2$event[i] == "Artwork/OpenCard" &
|
|
||||||
subdata2$artwork[i] %in% single_art)) {
|
|
||||||
|
|
||||||
current_artwork <- subdata2[i, "artwork"]
|
|
||||||
j <- i
|
|
||||||
k <- i
|
|
||||||
|
|
||||||
} else {
|
|
||||||
|
|
||||||
current_artwork <- current_artwork
|
|
||||||
|
|
||||||
}
|
|
||||||
|
|
||||||
if (subdata2$event[i] == "Show Front" & subdata2$artwork[i] == current_artwork) {
|
|
||||||
# make sure artwork has not been closed, yet!
|
|
||||||
k <- i
|
|
||||||
}
|
|
||||||
|
|
||||||
if (subdata2$artwork[i] == "glossar" &
|
|
||||||
(current_artwork %in% artwork_list) &
|
|
||||||
subdata2$popup[i] == file & (j - k == 0)) {
|
|
||||||
|
|
||||||
subdata2[i, "trace"] <- subdata2[j, "trace"]
|
|
||||||
subdata2[i, "artwork"] <- current_artwork
|
|
||||||
|
|
||||||
}
|
|
||||||
setTxtProgressBar(pb, i)
|
|
||||||
}
|
|
||||||
m <- m + 1
|
|
||||||
}
|
|
||||||
|
|
||||||
# Exclude not matched glossar entries
|
|
||||||
cat("\n\nINFORMATION: glossar entries that are not matched will be removed:",
|
|
||||||
sum(is.na(subdata2[subdata2$glossar == 1, "trace"])), "entries",
|
|
||||||
#proportions(table(is.na(subdata2[subdata2$glossar == 1, "trace"]))),
|
|
||||||
fill = TRUE)
|
|
||||||
subdata2 <- subset(subdata2, !is.na(subdata2$trace))
|
|
||||||
# REMEMBER: It can never be 100% correct, since it is always possible
|
|
||||||
# that several cards are open and that they link to the same glossar
|
|
||||||
# entry
|
|
||||||
|
|
||||||
# dat2[14110:14130, ]
|
|
||||||
# dat2[dat2$glossar == 1, ]
|
|
||||||
|
|
||||||
out <- rbind(subdata1, subdata2)
|
|
||||||
out <- out[order(out$fileId, out$date, out$timeMs), ]
|
|
||||||
out
|
|
||||||
}
|
|
||||||
|
|
||||||
###########################################################################
|
|
||||||
|
|
||||||
close_events <- function(data, event = c("move", "flipCard", "openTopic", "openPopup")) {
|
|
||||||
|
|
||||||
event <- match.arg(event)
|
|
||||||
|
|
||||||
switch(event,
|
|
||||||
"move" = {
|
|
||||||
actions <- c("Transform start", "Transform stop")
|
|
||||||
idvar <- c("fileId", "eventId", "artwork", "glossar")
|
|
||||||
drop <- c("popup", "topicNumber", "trace", "event")
|
|
||||||
ncol <- 16
|
|
||||||
|
|
||||||
},
|
|
||||||
"flipCard" = {
|
|
||||||
actions <- c("Show Info", "Show Front")
|
|
||||||
idvar <- c("fileId", "trace", "artwork", "glossar")
|
|
||||||
drop <- c("popup", "topicNumber", "eventId", "event")
|
|
||||||
ncol <- 16
|
|
||||||
|
|
||||||
},
|
|
||||||
"openTopic" = {
|
|
||||||
actions <- c("Artwork/OpenCard", "Artwork/CloseCard")
|
|
||||||
idvar <- c("fileId", "eventId", "trace", "glossar", "artwork",
|
|
||||||
"topicNumber")
|
|
||||||
drop <- c("popup", "event")
|
|
||||||
ncol <- 18
|
|
||||||
|
|
||||||
},
|
|
||||||
"openPopup" = {
|
|
||||||
actions <- c("ShowPopup", "HidePopup")
|
|
||||||
idvar <- c("fileId", "eventId", "trace", "glossar", "artwork", "popup")
|
|
||||||
drop <- c("topicNumber", "event")
|
|
||||||
ncol <- 18
|
|
||||||
# TODO: Should topicNumber maybe also be filled in for "openPopup"?
|
|
||||||
|
|
||||||
}
|
|
||||||
)
|
|
||||||
|
|
||||||
subdata <- subset(data, data$event %in% actions)
|
|
||||||
subdata <- subdata[order(subdata$artwork, subdata$popup, subdata$date, subdata$timeMs), ]
|
|
||||||
subdata$time <- ifelse(subdata$event == actions[1], "start", "stop")
|
|
||||||
num_start <- diff(c(0, which(subdata$event == actions[2])))
|
|
||||||
if (tail(subdata, 1)$time == "start") {
|
|
||||||
num_start <- c(num_start, 1)
|
|
||||||
}
|
|
||||||
subdata$eventId <- rep(seq_along(num_start), num_start)
|
|
||||||
|
|
||||||
if (event == "move") {
|
|
||||||
subdata <- subdata[!duplicated(subdata[, c("event", "eventId")]), ]
|
|
||||||
id_stop <- which(subdata$event == actions[2])
|
|
||||||
id_rm_stop <- id_stop[diff(id_stop) == 1]
|
|
||||||
subdata <- subdata[-(id_rm_stop + 1), ]
|
|
||||||
}
|
|
||||||
|
|
||||||
subdata_split <- split(subdata, ~ fileId)
|
|
||||||
|
|
||||||
pbapply::pboptions(style = 3, char = "=")
|
|
||||||
|
|
||||||
subdata_split_wide <- pbapply::pblapply(subdata_split, reshape,
|
|
||||||
direction = "wide",
|
|
||||||
idvar = idvar,
|
|
||||||
timevar = "time",
|
|
||||||
drop = drop)
|
|
||||||
# suppressWarnings(
|
|
||||||
# data_wide <- reshape(subdata, direction = "wide",
|
|
||||||
# idvar = idvar,
|
|
||||||
# timevar = "time",
|
|
||||||
# drop = drop)
|
|
||||||
# )
|
|
||||||
|
|
||||||
# remove entries with only start or stop events since they do not have
|
|
||||||
# all columns
|
|
||||||
ids <- which(sapply(subdata_split_wide, ncol) != ncol)
|
|
||||||
if (length(ids) > 0) subdata_split_wide <- subdata_split_wide[-ids]
|
|
||||||
|
|
||||||
data_wide <- dplyr::bind_rows(subdata_split_wide)
|
|
||||||
|
|
||||||
for (d in drop) data_wide[d] <- NA
|
|
||||||
data_wide$distance <- NA
|
|
||||||
data_wide$scaleSize <- NA
|
|
||||||
data_wide$rotationDegree <- NA
|
|
||||||
|
|
||||||
data_wide$event <- event
|
|
||||||
data_wide$duration <- data_wide$timeMs.stop - data_wide$timeMs.start
|
|
||||||
|
|
||||||
if (event == "move") {
|
|
||||||
data_wide$distance <- apply(
|
|
||||||
data_wide[, c("x.start", "y.start", "x.stop", "y.stop")], 1,
|
|
||||||
function(x) dist(matrix(x, 2, 2, byrow = TRUE)))
|
|
||||||
data_wide$rotationDegree <- data_wide$rotation.stop -
|
|
||||||
data_wide$rotation.start
|
|
||||||
data_wide$scaleSize <- data_wide$scale.stop / data_wide$scale.start
|
|
||||||
# remove moves without any change
|
|
||||||
move_wide <- data_wide[data_wide$distance != 0 &
|
|
||||||
data_wide$rotationDegree != 0 &
|
|
||||||
data_wide$scaleSize != 1, ]
|
|
||||||
cat(paste("INFORMATION:", nrow(data_wide) - nrow(move_wide),
|
|
||||||
"lines containing move events were removed since they did",
|
|
||||||
"\nnot contain any change"), fill = TRUE)
|
|
||||||
data_wide <- move_wide
|
|
||||||
}
|
|
||||||
|
|
||||||
out <- data_wide[, c("fileId", "event", "artwork", "trace", "glossar",
|
|
||||||
"date.start", "date.stop", "timeMs.start",
|
|
||||||
"timeMs.stop", "duration", "topicNumber", "popup",
|
|
||||||
"x.start", "y.start", "x.stop", "y.stop",
|
|
||||||
"distance", "scale.start", "scale.stop",
|
|
||||||
"scaleSize", "rotation.start", "rotation.stop",
|
|
||||||
"rotationDegree")]
|
|
||||||
rownames(out) <- NULL
|
|
||||||
out
|
|
||||||
}
|
|
||||||
|
|
||||||
###########################################################################
|
|
||||||
|
|
||||||
# Add case variable
|
|
||||||
|
|
||||||
add_case <- function(data, cutoff = 20) {
|
|
||||||
# TODO: What is the best choice for the cutoff here?
|
|
||||||
|
|
||||||
data$timediff <- as.numeric(diff(c(data$date.start[1], data$date.start)))
|
|
||||||
data$case <- NA
|
|
||||||
j <- 1
|
|
||||||
pb <- txtProgressBar(min = 0, max = nrow(data), initial = NA, style = 3)
|
|
||||||
|
|
||||||
for (i in seq_len(nrow(data))) {
|
|
||||||
if (data$timediff[i] <= cutoff) {
|
|
||||||
data$case[i] <- j
|
|
||||||
} else {
|
|
||||||
j <- j + 1
|
|
||||||
data$case[i] <- j
|
|
||||||
}
|
|
||||||
setTxtProgressBar(pb, i)
|
|
||||||
}
|
|
||||||
data$timediff <- NULL
|
|
||||||
data
|
|
||||||
}
|
|
||||||
|
|
||||||
###########################################################################
|
|
||||||
|
|
||||||
# Add trace for moves
|
|
||||||
|
|
||||||
add_trace_moves <- function(data) {
|
|
||||||
|
|
||||||
pbapply::pboptions(style = 3, char = "=")
|
|
||||||
|
|
||||||
trace_max <- max(data$trace, na.rm = TRUE)
|
|
||||||
|
|
||||||
#subdata_art <- split(data, ~ artwork)
|
|
||||||
subdata_case <- split(data, ~ case)
|
|
||||||
|
|
||||||
#subdata_list <- split(data, ~ artwork + case)
|
|
||||||
# --> does not work with complete data set
|
|
||||||
cat("Splitting data...", "\n")
|
|
||||||
subdata_list <- pbapply::pblapply(subdata_case, split, f = ~artwork)
|
|
||||||
subdata_list <- unlist(subdata_list, recursive = FALSE)
|
|
||||||
|
|
||||||
cat("Adding trace...", "\n")
|
|
||||||
subdata_trace <- pbapply::pblapply(subdata_list,
|
|
||||||
function(x) {
|
|
||||||
trace_max <<- trace_max + 1
|
|
||||||
add_trace_subdata(x, max_trace = trace_max)
|
|
||||||
}
|
|
||||||
)
|
|
||||||
|
|
||||||
out <- dplyr::bind_rows(subdata_trace)
|
|
||||||
out <- out[order(out$fileId.start, out$date.start, out$timeMs.start), ]
|
|
||||||
rownames(out) <- NULL
|
|
||||||
|
|
||||||
# Make trace a consecutive number
|
|
||||||
out$trace <- as.numeric(factor(out$trace, levels = unique(out$trace)))
|
|
||||||
out
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
add_trace_subdata <- function(subdata, max_trace) {
|
|
||||||
|
|
||||||
if (nrow(subdata) != 0) {
|
|
||||||
|
|
||||||
if (length(na.omit(unique(subdata$trace))) == 1) {
|
|
||||||
subdata[subdata$event == "move", "trace"] <- na.omit(unique(subdata$trace))
|
|
||||||
} else if (length(na.omit(unique(subdata$trace))) > 1) {
|
|
||||||
for (i in 1:nrow(subdata)) {
|
|
||||||
if (subdata$event[i] == "move") {
|
|
||||||
if (i == 1) {
|
|
||||||
subdata$trace[i] <- na.omit(unique(subdata$trace))[1]
|
|
||||||
} else {
|
|
||||||
subdata$trace[i] <- subdata$trace[i - 1]
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
} else if (all(is.na(subdata$trace))) {
|
|
||||||
for (i in 1:nrow(subdata)) {
|
|
||||||
subdata$trace[i] <- max_trace
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
} else {
|
|
||||||
warning("`subdata` has nrow = 0")
|
|
||||||
}
|
|
||||||
subdata
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
###########################################################################
|
|
||||||
|
|
||||||
# Create data frame with file names and topics for each artwork
|
|
||||||
|
|
||||||
extract_topics <- function(artworks, pattern, path) {
|
|
||||||
|
|
||||||
dat <- NULL
|
|
||||||
file_order <- NULL
|
|
||||||
i <- 1
|
|
||||||
|
|
||||||
for (artwork in artworks) {
|
|
||||||
|
|
||||||
if (length(pattern) == 1) {
|
|
||||||
index_file <- pattern
|
|
||||||
} else {
|
|
||||||
index_file <- pattern[i]
|
|
||||||
}
|
|
||||||
|
|
||||||
fnames <- dir(pattern = paste0(artwork, "_"),
|
|
||||||
path = paste(path, artwork, sep = "/"))
|
|
||||||
topic <- NULL
|
|
||||||
for (fname in fnames) {
|
|
||||||
suppressWarnings(
|
|
||||||
topic <- c(topic, gsub("^<card type=.(.*).>$", "\\1",
|
|
||||||
grep("^<card type=",
|
|
||||||
trimws(readLines(paste(path, artwork, fname, sep = "/"))),
|
|
||||||
value = T)))
|
|
||||||
)
|
|
||||||
|
|
||||||
}
|
|
||||||
index <- paste(path, artwork, index_file, sep = "/")
|
|
||||||
suppressWarnings(
|
|
||||||
file_order <- c(file_order, gsub("^<card src=.*/(.*)./>$", "\\1",
|
|
||||||
grep("^<card src=", trimws(readLines(index)),
|
|
||||||
value = TRUE)))
|
|
||||||
)
|
|
||||||
in_index <- fnames %in% file_order
|
|
||||||
dat <- rbind(dat, data.frame(artwork, file_name = fnames, in_index, topic))
|
|
||||||
i <- i + 1
|
|
||||||
}
|
|
||||||
|
|
||||||
# take only the ones that are actually displayed and sort in the same order
|
|
||||||
# as indicated in index.html
|
|
||||||
out <- dat[dat$in_index, -3]
|
|
||||||
out <- out[order(file_order, out$file_name), ]
|
|
||||||
rownames(out) <- NULL
|
|
||||||
|
|
||||||
out$index <- unlist(sapply(table(out$artwork), seq_len))
|
|
||||||
out
|
|
||||||
}
|
|
||||||
|
|
||||||
###########################################################################
|
|
||||||
|
|
||||||
# Add topics: file names and topics
|
|
||||||
|
|
||||||
add_topic <- function(data, topics) {
|
|
||||||
|
|
||||||
artworks <- unique(data$artwork)
|
|
||||||
tab_art <- lapply(artworks,
|
|
||||||
function(x) names(table(data$topicNumber[data$artwork == x])))
|
|
||||||
names(tab_art) <- artworks
|
|
||||||
|
|
||||||
tab_index <- lapply(tab_art, seq_along)
|
|
||||||
|
|
||||||
dat_split <- split(data, ~ artwork)
|
|
||||||
|
|
||||||
set_label <- function(x) {
|
|
||||||
artwork <- unique(x$artwork)
|
|
||||||
x$topicIndex <- factor(x$topicNumber, labels = tab_index[[artwork]])
|
|
||||||
x
|
|
||||||
}
|
|
||||||
|
|
||||||
dat_label <- lapply(dat_split, set_label)
|
|
||||||
|
|
||||||
set_topic <- function(x) {
|
|
||||||
artwork <- unique(x$artwork)
|
|
||||||
labels_file <- topics[topics$artwork == artwork,
|
|
||||||
"file_name"][as.numeric(levels(x$topicIndex))]
|
|
||||||
x$topicFile <- as.character(factor(x$topicIndex, labels = labels_file))
|
|
||||||
labels_topic <- topics[topics$artwork == artwork,
|
|
||||||
"topic"][as.numeric(levels(x$topicIndex))]
|
|
||||||
x$topic <- as.character(factor(x$topicIndex, labels = labels_topic))
|
|
||||||
x
|
|
||||||
}
|
|
||||||
|
|
||||||
dat_topic <- lapply(dat_label, set_topic)
|
|
||||||
|
|
||||||
#out <- do.call(rbind, dat_topic)
|
|
||||||
out <- dplyr::bind_rows(dat_topic)
|
|
||||||
out <- out[order(out$fileId.start, out$date.start, out$timeMs.start), ]
|
|
||||||
rownames(out) <- NULL
|
|
||||||
out
|
|
||||||
}
|
|
||||||
|
|
||||||
###########################################################################
|
|
||||||
|
|
||||||
# Create data frame with information on artworks
|
|
||||||
|
|
||||||
extract_artworks <- function(artworks, files = paste0(artworks, ".xml"),
|
|
||||||
path = path) {
|
|
||||||
out <- NULL
|
|
||||||
i <- 1
|
|
||||||
|
|
||||||
for (artwork in artworks) {
|
|
||||||
|
|
||||||
if (length(files) == 1) {
|
|
||||||
index_file <- files
|
|
||||||
} else {
|
|
||||||
index_file <- files[i]
|
|
||||||
}
|
|
||||||
|
|
||||||
index <- paste(path, artwork, index_file, sep = "/")
|
|
||||||
varnames <- c("artist", "title", "misc", "description")
|
|
||||||
xmllist <- XML::xmlToList(index)$header[varnames]
|
|
||||||
|
|
||||||
if (any(sapply(xmllist, is.null))) {# necessary for missing entries
|
|
||||||
names(xmllist) <- varnames
|
|
||||||
xmllist[which(sapply(xmllist, is.null))] <- NA
|
|
||||||
}
|
|
||||||
# remove ugly quotes
|
|
||||||
xmllist <- lapply(xmllist, function(x) gsub("„|“", "", x))
|
|
||||||
# remove HTML tags
|
|
||||||
xmllist <- lapply(xmllist, function(x) gsub("<br/>", " ", x))
|
|
||||||
xmldat <- as.data.frame(xmllist)
|
|
||||||
xmldat$artwork <- artwork
|
|
||||||
# trim white space from strings
|
|
||||||
xmldat$artist <- trimws(xmldat$artist)
|
|
||||||
xmldat$title <- trimws(xmldat$title)
|
|
||||||
xmldat$misc <- trimws(xmldat$misc)
|
|
||||||
xmldat$description <- trimws(xmldat$description)
|
|
||||||
out <- rbind(out, xmldat)
|
|
||||||
i <- i + 1
|
|
||||||
}
|
|
||||||
out
|
|
||||||
}
|
|
||||||
|
|
@ -2,7 +2,7 @@
|
|||||||
|
|
||||||
library(lubridate)
|
library(lubridate)
|
||||||
|
|
||||||
dat <- read.table("../data/rawdata_logfiles.csv", header = TRUE, sep = ";")
|
dat <- read.table("../data/haum/rawdata_logfiles.csv", header = TRUE, sep = ";")
|
||||||
# dat$event <- factor(dat$event, levels = c("Start Application",
|
# dat$event <- factor(dat$event, levels = c("Start Application",
|
||||||
# "Show Application",
|
# "Show Application",
|
||||||
# "Transform start",
|
# "Transform start",
|
26
code/visualization.R
Normal file
26
code/visualization.R
Normal file
@ -0,0 +1,26 @@
|
|||||||
|
setwd("C:/Users/nwickelmaier/Nextcloud/Documents/MDS/2023ss/60100_master_thesis/code/")
|
||||||
|
|
||||||
|
devtools::load_all("../../../../software/mtt")
|
||||||
|
#library(mtt)
|
||||||
|
|
||||||
|
dat <- parse_logfiles("2016", path = "../data/haum/LogFiles/",
|
||||||
|
save = FALSE)
|
||||||
|
datlogs <- create_eventlogs(dat, "../data/haum/ContentEyevisit/eyevisit_cards_light/")
|
||||||
|
|
||||||
|
dat001 <- datlogs[which(datlogs$artwork == "001"), ]
|
||||||
|
|
||||||
|
index <- as.numeric(as.factor(dat001$trace))
|
||||||
|
cc <- sample(colors(), length(unique(dat001$trace)))
|
||||||
|
|
||||||
|
plot(y.start ~ x.start, dat001, type = "n", xlab = "x", ylab = "y",
|
||||||
|
xlim = c(0, 3840), ylim = c(0, 2160))
|
||||||
|
with(dat001[1:200,], arrows(x.start, y.start, x.stop, y.stop,
|
||||||
|
length = .07, col = cc[index]))
|
||||||
|
|
||||||
|
plot(y.start ~ x.start, dat001, xlab = "x", ylab = "y",
|
||||||
|
xlim = c(0, 3840), ylim = c(0, 2160), pch = 16, col = "gray")
|
||||||
|
points(y.start ~ x.start, dat001, xlab = "x", ylab = "y",
|
||||||
|
xlim = c(0, 3840), ylim = c(0, 2160), cex = dat001$scaleSize,
|
||||||
|
col = "blue")
|
||||||
|
|
||||||
|
|
Loading…
Reference in New Issue
Block a user