diff --git a/code/questions/questions_data-inconsistencies.R b/code/questions/questions_data-inconsistencies.R
deleted file mode 100644
index 6ebdf04..0000000
--- a/code/questions/questions_data-inconsistencies.R
+++ /dev/null
@@ -1,125 +0,0 @@
-#' ---
-#' title: "Open Questions"
-#' author: "Nora Wickelmaier"
-#' date: "`r Sys.Date()`"
-#' output: 
-#'   html_document:
-#'     number_sections: true
-#'     toc: true
-#' ---
-
-#+ include = FALSE
-# setwd("C:/Users/nwickelmaier/Nextcloud/Documents/MDS/2023ss/60100_master_thesis/code")
-dat <- read.table("../data/event_logfiles.csv", sep = ";", header = TRUE)
-dat$date.start <- as.POSIXct(dat$date.start)
-dat$date.stop <- as.POSIXct(dat$date.stop)
-
-#' This is what the data look like after preprocessing right now
-
-#+ include = FALSE
-mat <- as.data.frame(t(sapply(dat, range, na.rm = TRUE)))
-names(mat) <- c("min", "max")
-mat$min <- round(as.numeric(mat$min), 1)
-mat$max <- round(as.numeric(mat$max), 1)
-mat$mean <- round(sapply(dat, function(x) mean(x, na.rm = TRUE)), 1)
-mat$missings <- sapply(dat, function(x) sum(is.na(x)))
-mat <- mat[!(rownames(mat) %in% c("eventid", "case", "trace", "event", "artwork", "card", "popup", "date.start", "date.stop")), ]
-
-#+ echo = FALSE
-knitr::kable(mat)
-
-#' This is only the data for 2016! So only about 2 weeks in December.
-
-# Date ranges
-range(dat$date.start)
-range(dat$date.stop, na.rm = TRUE)
-
-#' # Units of x and y
-#' I assume that x and y are pixel $\to$ correct?
-
-#' But they look weird, when plotted. Is it possible that there are
-#' outliers? If yes, how? Do we have the true ranges of the display?
-
-par(mfrow = c(1, 2))
-plot(y.start ~ x.start, dat)
-abline(v = c(0, 3800), h = c(0, 2150), col = "blue", lwd = 2)
-plot(y.stop ~ x.stop, dat)
-abline(v = c(0, 3800), h = c(0, 2150), col = "blue", lwd = 2)
-
-aggregate(cbind(x.start, x.stop, y.start, y.stop) ~ 1, dat, mean)
-
-#' Looks like the range should be something like $x = [0, 3800]$ and
-#' $y = [0, 2150]$. Do we have the starting coordinates for each artwork?
-#'
-
-#' # Unit of scale
-
-summary(dat$scaleSize)
-
-#' I thought it would be some kind of scaling factor, but then I would
-#' have expected that `scale.start` is always 1 or something.
-#'
-
-#' # Unit of rotation
-
-summary(dat$rotationDegree)
-
-#' This looks pretty clear. Should be degree. Anything else to consider
-#' here? I am assuming negative means left, but maybe not?
-#'
-
-#' # Meaningful unit for "case"
-
-#' I pretty randomly chose `20 sec` based on this plot. I would love a
-#' second opinion. `:)`
-
-timediff <- as.numeric(diff(c(dat$date.start[1], dat$date.start)))
-hist(timediff[timediff < 40], breaks = 50)
-abline(v = 20, col = "red", lwd = 2)
-
-#' This actually works pretty well and lets me assign `trace` values to the
-#' moves. But maybe there are other ideas on how to define this?
-
-dat[1:40, c("date.start", "case", "trace", "event", "artwork")]
-
-
-#' # Problems with `time_ms`
-
-#' What exactly happens, when `time_ms` goes down again? Why does it not go
-#' down to 0?
-
-par(mfrow = c(1, 2))
-
-plot(dat$time_ms.start[1:100], type = "b", ylab = "time_ms", xlab = "")
-points(dat$time_ms.stop[1:100], type = "b", col = rgb(1, 0, 0, .5))
-legend("topleft", c("start", "stop"), lty = 1, col = c("black", "red"))
-
-plot(dat$time_ms.stop[1:100] - dat$time_ms.start[1:100], type = "b",
-     ylab = "duration", col = rgb(0, 0, 1, .5))
-abline(h = 0, lty = 2)
-
-#' For the regular timestamps everything looks fine.
-
-par(mfrow = c(1, 2))
-
-plot(dat$date.stop[1:100], type = "b", ylab = "timestamp", xlab = "",
-     col = rgb(1, 0, 0, .5))
-points(dat$date.start[1:100], type = "b")
-legend("topleft", c("start", "stop"), lty = 1, col = c("black", "red"))
-
-plot(dat$date.stop[1:100] - dat$date.start[1:100], type = "b",
-     ylab = "duration", col = rgb(0, 0, 1, .5))
-abline(h = 0, lty = 2)
-
-#+
-plot(time_ms.start ~ date.start, dat[1:1000, ], type = "b")
-points(time_ms.stop ~ date.stop, dat[1:1000, ], type = "b", col = rgb(1, 0, 0, .3))
-
-#' For `time_ms.stop` this looks even weirder.
-#'
-
-#' # After which time interval does the table reset?
-
-#' I cannot see this in the data at all. Or can I? Has this something to do
-#' with the weird behavior of `time_ms`?
-
diff --git a/code/questions/questions_number-of-cards.R b/code/questions/questions_number-of-cards.R
deleted file mode 100644
index 19f7a2b..0000000
--- a/code/questions/questions_number-of-cards.R
+++ /dev/null
@@ -1,58 +0,0 @@
-#' ---
-#' title: "Open Questions -- Card indices"
-#' author: "Nora Wickelmaier"
-#' date: "`r Sys.Date()`"
-#' output: 
-#'   html_document:
-#'     number_sections: true
-#'     toc: true
-#' ---
-
-#+ include = FALSE
-# setwd("C:/Users/nwickelmaier/Nextcloud/Documents/MDS/2023ss/60100_master_thesis/code")
-dat <- read.table("../data/event_logfiles.csv", sep = ";", header = TRUE)
-dat$date.start <- as.POSIXct(dat$date.start)
-dat$date.stop <- as.POSIXct(dat$date.stop)
-dat$artwork <- sprintf("%03d", dat$artwork)
-
-#' The following table shows an overview of the card indices. The indices
-#' should have values between 0 and 5. It is unclear what the numbers mean.
-
-table(dat$card)
-
-#' Number of cards for each artwork in the data set (subset from 2016)
-
-artworks <- sort(unique(dat$artwork))
-
-count <- function(x) length(table(dat[which(dat$artwork == x), "card"]))
-max_index <- function(x) max(dat[which(dat$artwork == x), "card"], na.rm = TRUE)
-num_cards <- sapply(artworks, count)
-highest_index <- sapply(artworks, max_index)
-
-#' Check how many XML-files for cards are present
-
-path <- "../data/ContentEyevisit/eyevisit_cards_light"
-
-num_files <- NULL
-for (artwork in artworks) {
-  fnames <- dir(pattern = paste0(artwork, "_"), path = paste(path, artwork, sep = "/"))
-  num_files <- c(num_files, length(fnames))
-}
-
-#' The table shows that each artwork has 6 cards the most (as expected).
-#' This is a subset of the data, so not all cards have been opened.
-
-cards <- data.frame(artwork = artworks, num_cards, highest_index,
-                    num_files, diff = num_files - highest_index)
-cards
-
-#' There are more than 8 files for a couple of artworks:
-
-subset(cards, cards$num_files >= 8)
-
-#' It might be possible, that the number indicates the index of the file
-#' and not the actual card that was displayed. BUT: In many cases, there
-#' are only 6 (or less) files, but a higher index is present...
-
-subset(cards, cards$diff < 0)
-
diff --git a/code/questions/questions_programming-input.R b/code/questions/questions_programming-input.R
deleted file mode 100644
index 9fc5afb..0000000
--- a/code/questions/questions_programming-input.R
+++ /dev/null
@@ -1,105 +0,0 @@
-#' ---
-#' title: "Programming input"
-#' author: "Nora Wickelmaier"
-#' date: "`r Sys.Date()`"
-#' output: html_document
-#' ---
-
-#+ include = FALSE
-# setwd("C:/Users/nwickelmaier/Nextcloud/Documents/MDS/2023ss/60100_master_thesis/code")
-
-#+
-dat0 <- read.table("../data/rawdata_logfiles_small.csv", sep = ";", header = TRUE)
-dat0$date <- as.POSIXct(dat0$date)  # create date object
-
-# Remove irrelevant events
-dat <- subset(dat0, !(dat0$event %in% c("Start Application", "Show Application")))
-str(dat)
-
-# make data better manageable
-tmp <- dat[!dat$event %in% c("Transform start", "Transform stop"), ]
-rownames(tmp) <- NULL
-
-#' # Add `trace` variable for closing events
-
-tmp$trace <- NA
-last_event <- tmp$event[1]
-aws <- unique(tmp$artwork)[unique(tmp$artwork) != "glossar"]
-
-for (art in aws) {              # select artwork
-
-  for (i in 1:nrow(tmp)) {      # go through rows
-
-    if (last_event == "Show Info" & tmp$artwork[i] == art) {
-      tmp$trace[i] <- i
-      j <- i
-
-    } else if (last_event == "Show Front" & tmp$artwork[i] == art) {
-      tmp$trace[i] <- j
-
-    } else if (!(last_event %in% c("Show Info", "Show Front")) &
-               tmp$artwork[i] == art) {
-      tmp$trace[i] <- j
-    }
-
-    if (i <= nrow(tmp)) {
-      last_event <- tmp$event[i + 1]
-    }
-  }
-}
-
-head(tmp[, c("artwork", "event", "trace")], 50)
-
-#' # Find artwork for glossar entry
-
-glossar_files <- unique(tmp[tmp$artwork == "glossar", "popup"])
-
-# Load lookup table for artworks and glossar files
-load("../data/glossar_dict.RData")
-lut <- glossar_dict[glossar_dict$glossar_file %in% glossar_files, ]
-
-# Fill in trace variable based on last `Show Info`
-for (file in lut$glossar_file) {
-
-  artwork_list <- unlist(lut[lut$glossar_file == file, "artwork"])
-
-  for (i in seq_len(nrow(tmp))) {
-
-    if (tmp$event[i] == "Show Info") {
-
-      current_artwork <- tmp[i, "artwork"]
-      j <- i
-      k <- i
-
-    } else {
-
-      current_artwork <- current_artwork
-
-    }
-
-    if (tmp$event[i] == "Show Front" & tmp$artwork[i] == current_artwork) {
-    # make sure artwork has not been closed, yet!
-      k <- i
-    }
-
-    if (tmp$artwork[i] == "glossar" &
-        (current_artwork %in% artwork_list) &
-        tmp$popup[i] == file & (j-k == 0)) {
-
-      tmp[i, "trace"] <- tmp[j, "trace"]
-
-    }
-  }
-}
-
-tmp[tmp$artwork == "glossar", c("artwork", "event", "popup", "trace")]
-
-proportions(table(is.na(tmp$trace[tmp$artwork == "glossar"])))
-# --> finds about half of the glossar entries for small data set...
-
-# REMEMBER: It can never be 100% correct, since it is always possible that
-# several cards are open and that they link to the same glossar entry
-
-# How many glossar_files are only associated with one artwork?
-lut[sapply(lut$artwork, length) == 1, "glossar_file"]
-