106 lines
2.7 KiB
R
106 lines
2.7 KiB
R
|
#' ---
|
||
|
#' title: "Programming input"
|
||
|
#' author: "Nora Wickelmaier"
|
||
|
#' date: "`r Sys.Date()`"
|
||
|
#' output: html_document
|
||
|
#' ---
|
||
|
|
||
|
#+ include = FALSE
|
||
|
# setwd("C:/Users/nwickelmaier/Nextcloud/Documents/MDS/2023ss/60100_master_thesis/code")
|
||
|
|
||
|
#+
|
||
|
dat0 <- read.table("../data/rawdata_logfiles_small.csv", sep = ";", header = TRUE)
|
||
|
dat0$date <- as.POSIXct(dat0$date) # create date object
|
||
|
|
||
|
# Remove irrelevant events
|
||
|
dat <- subset(dat0, !(dat0$event %in% c("Start Application", "Show Application")))
|
||
|
str(dat)
|
||
|
|
||
|
# make data better manageable
|
||
|
tmp <- dat[!dat$event %in% c("Transform start", "Transform stop"), ]
|
||
|
rownames(tmp) <- NULL
|
||
|
|
||
|
#' # Add `trace` variable for closing events
|
||
|
|
||
|
tmp$trace <- NA
|
||
|
last_event <- tmp$event[1]
|
||
|
aws <- unique(tmp$artwork)[unique(tmp$artwork) != "glossar"]
|
||
|
|
||
|
for (art in aws) { # select artwork
|
||
|
|
||
|
for (i in 1:nrow(tmp)) { # go through rows
|
||
|
|
||
|
if (last_event == "Show Info" & tmp$artwork[i] == art) {
|
||
|
tmp$trace[i] <- i
|
||
|
j <- i
|
||
|
|
||
|
} else if (last_event == "Show Front" & tmp$artwork[i] == art) {
|
||
|
tmp$trace[i] <- j
|
||
|
|
||
|
} else if (!(last_event %in% c("Show Info", "Show Front")) &
|
||
|
tmp$artwork[i] == art) {
|
||
|
tmp$trace[i] <- j
|
||
|
}
|
||
|
|
||
|
if (i <= nrow(tmp)) {
|
||
|
last_event <- tmp$event[i + 1]
|
||
|
}
|
||
|
}
|
||
|
}
|
||
|
|
||
|
head(tmp[, c("artwork", "event", "trace")], 50)
|
||
|
|
||
|
#' # Find artwork for glossar entry
|
||
|
|
||
|
glossar_files <- unique(tmp[tmp$artwork == "glossar", "popup"])
|
||
|
|
||
|
# Load lookup table for artworks and glossar files
|
||
|
load("../data/glossar_dict.RData")
|
||
|
lut <- glossar_dict[glossar_dict$glossar_file %in% glossar_files, ]
|
||
|
|
||
|
# Fill in trace variable based on last `Show Info`
|
||
|
for (file in lut$glossar_file) {
|
||
|
|
||
|
artwork_list <- unlist(lut[lut$glossar_file == file, "artwork"])
|
||
|
|
||
|
for (i in seq_len(nrow(tmp))) {
|
||
|
|
||
|
if (tmp$event[i] == "Show Info") {
|
||
|
|
||
|
current_artwork <- tmp[i, "artwork"]
|
||
|
j <- i
|
||
|
k <- i
|
||
|
|
||
|
} else {
|
||
|
|
||
|
current_artwork <- current_artwork
|
||
|
|
||
|
}
|
||
|
|
||
|
if (tmp$event[i] == "Show Front" & tmp$artwork[i] == current_artwork) {
|
||
|
# make sure artwork has not been closed, yet!
|
||
|
k <- i
|
||
|
}
|
||
|
|
||
|
if (tmp$artwork[i] == "glossar" &
|
||
|
(current_artwork %in% artwork_list) &
|
||
|
tmp$popup[i] == file & (j-k == 0)) {
|
||
|
|
||
|
tmp[i, "trace"] <- tmp[j, "trace"]
|
||
|
|
||
|
}
|
||
|
}
|
||
|
}
|
||
|
|
||
|
tmp[tmp$artwork == "glossar", c("artwork", "event", "popup", "trace")]
|
||
|
|
||
|
proportions(table(is.na(tmp$trace[tmp$artwork == "glossar"])))
|
||
|
# --> finds about half of the glossar entries for small data set...
|
||
|
|
||
|
# REMEMBER: It can never be 100% correct, since it is always possible that
|
||
|
# several cards are open and that they link to the same glossar entry
|
||
|
|
||
|
# How many glossar_files are only associated with one artwork?
|
||
|
lut[sapply(lut$artwork, length) == 1, "glossar_file"]
|
||
|
|