#' --- #' title: "Programming input" #' author: "Nora Wickelmaier" #' date: "`r Sys.Date()`" #' output: html_document #' --- #+ include = FALSE # setwd("C:/Users/nwickelmaier/Nextcloud/Documents/MDS/2023ss/60100_master_thesis/code") #+ dat0 <- read.table("../data/rawdata_logfiles_small.csv", sep = ";", header = TRUE) dat0$date <- as.POSIXct(dat0$date) # create date object # Remove irrelevant events dat <- subset(dat0, !(dat0$event %in% c("Start Application", "Show Application"))) str(dat) # make data better manageable tmp <- dat[!dat$event %in% c("Transform start", "Transform stop"), ] rownames(tmp) <- NULL #' # Add `trace` variable for closing events tmp$trace <- NA last_event <- tmp$event[1] aws <- unique(tmp$artwork)[unique(tmp$artwork) != "glossar"] for (art in aws) { # select artwork for (i in 1:nrow(tmp)) { # go through rows if (last_event == "Show Info" & tmp$artwork[i] == art) { tmp$trace[i] <- i j <- i } else if (last_event == "Show Front" & tmp$artwork[i] == art) { tmp$trace[i] <- j } else if (!(last_event %in% c("Show Info", "Show Front")) & tmp$artwork[i] == art) { tmp$trace[i] <- j } if (i <= nrow(tmp)) { last_event <- tmp$event[i + 1] } } } head(tmp[, c("artwork", "event", "trace")], 50) #' # Find artwork for glossar entry glossar_files <- unique(tmp[tmp$artwork == "glossar", "popup"]) # Load lookup table for artworks and glossar files load("../data/glossar_dict.RData") lut <- glossar_dict[glossar_dict$glossar_file %in% glossar_files, ] # Fill in trace variable based on last `Show Info` for (file in lut$glossar_file) { artwork_list <- unlist(lut[lut$glossar_file == file, "artwork"]) for (i in seq_len(nrow(tmp))) { if (tmp$event[i] == "Show Info") { current_artwork <- tmp[i, "artwork"] j <- i k <- i } else { current_artwork <- current_artwork } if (tmp$event[i] == "Show Front" & tmp$artwork[i] == current_artwork) { # make sure artwork has not been closed, yet! k <- i } if (tmp$artwork[i] == "glossar" & (current_artwork %in% artwork_list) & tmp$popup[i] == file & (j-k == 0)) { tmp[i, "trace"] <- tmp[j, "trace"] } } } tmp[tmp$artwork == "glossar", c("artwork", "event", "popup", "trace")] proportions(table(is.na(tmp$trace[tmp$artwork == "glossar"]))) # --> finds about half of the glossar entries for small data set... # REMEMBER: It can never be 100% correct, since it is always possible that # several cards are open and that they link to the same glossar entry # How many glossar_files are only associated with one artwork? lut[sapply(lut$artwork, length) == 1, "glossar_file"]