mtt_haum/code/questions/questions_programming-input.R

106 lines
2.7 KiB
R

#' ---
#' title: "Programming input"
#' author: "Nora Wickelmaier"
#' date: "`r Sys.Date()`"
#' output: html_document
#' ---
#+ include = FALSE
# setwd("C:/Users/nwickelmaier/Nextcloud/Documents/MDS/2023ss/60100_master_thesis/code")
#+
dat0 <- read.table("../data/rawdata_logfiles_small.csv", sep = ";", header = TRUE)
dat0$date <- as.POSIXct(dat0$date) # create date object
# Remove irrelevant events
dat <- subset(dat0, !(dat0$event %in% c("Start Application", "Show Application")))
str(dat)
# make data better manageable
tmp <- dat[!dat$event %in% c("Transform start", "Transform stop"), ]
rownames(tmp) <- NULL
#' # Add `trace` variable for closing events
tmp$trace <- NA
last_event <- tmp$event[1]
aws <- unique(tmp$artwork)[unique(tmp$artwork) != "glossar"]
for (art in aws) { # select artwork
for (i in 1:nrow(tmp)) { # go through rows
if (last_event == "Show Info" & tmp$artwork[i] == art) {
tmp$trace[i] <- i
j <- i
} else if (last_event == "Show Front" & tmp$artwork[i] == art) {
tmp$trace[i] <- j
} else if (!(last_event %in% c("Show Info", "Show Front")) &
tmp$artwork[i] == art) {
tmp$trace[i] <- j
}
if (i <= nrow(tmp)) {
last_event <- tmp$event[i + 1]
}
}
}
head(tmp[, c("artwork", "event", "trace")], 50)
#' # Find artwork for glossar entry
glossar_files <- unique(tmp[tmp$artwork == "glossar", "popup"])
# Load lookup table for artworks and glossar files
load("../data/glossar_dict.RData")
lut <- glossar_dict[glossar_dict$glossar_file %in% glossar_files, ]
# Fill in trace variable based on last `Show Info`
for (file in lut$glossar_file) {
artwork_list <- unlist(lut[lut$glossar_file == file, "artwork"])
for (i in seq_len(nrow(tmp))) {
if (tmp$event[i] == "Show Info") {
current_artwork <- tmp[i, "artwork"]
j <- i
k <- i
} else {
current_artwork <- current_artwork
}
if (tmp$event[i] == "Show Front" & tmp$artwork[i] == current_artwork) {
# make sure artwork has not been closed, yet!
k <- i
}
if (tmp$artwork[i] == "glossar" &
(current_artwork %in% artwork_list) &
tmp$popup[i] == file & (j-k == 0)) {
tmp[i, "trace"] <- tmp[j, "trace"]
}
}
}
tmp[tmp$artwork == "glossar", c("artwork", "event", "popup", "trace")]
proportions(table(is.na(tmp$trace[tmp$artwork == "glossar"])))
# --> finds about half of the glossar entries for small data set...
# REMEMBER: It can never be 100% correct, since it is always possible that
# several cards are open and that they link to the same glossar entry
# How many glossar_files are only associated with one artwork?
lut[sapply(lut$artwork, length) == 1, "glossar_file"]