#' --- #' title: "Open Questions" #' author: "Nora Wickelmaier" #' date: "`r Sys.Date()`" #' output: #' html_document: #' number_sections: true #' toc: true #' --- #+ include = FALSE # setwd("C:/Users/nwickelmaier/Nextcloud/Documents/MDS/2023ss/60100_master_thesis/code") dat <- read.table("../data/event_logfiles.csv", sep = ";", header = TRUE) dat$date.start <- as.POSIXct(dat$date.start) dat$date.stop <- as.POSIXct(dat$date.stop) #' This is what the data look like after preprocessing right now #+ include = FALSE mat <- as.data.frame(t(sapply(dat, range, na.rm = TRUE))) names(mat) <- c("min", "max") mat$min <- round(as.numeric(mat$min), 1) mat$max <- round(as.numeric(mat$max), 1) mat$mean <- round(sapply(dat, function(x) mean(x, na.rm = TRUE)), 1) mat$missings <- sapply(dat, function(x) sum(is.na(x))) mat <- mat[!(rownames(mat) %in% c("eventid", "case", "trace", "event", "artwork", "card", "popup", "date.start", "date.stop")), ] #+ echo = FALSE knitr::kable(mat) #' This is only the data for 2016! So only about 2 weeks in December. # Date ranges range(dat$date.start) range(dat$date.stop, na.rm = TRUE) #' # Units of x and y #' I assume that x and y are pixel $\to$ correct? #' But they look weird, when plotted. Is it possible that there are #' outliers? If yes, how? Do we have the true ranges of the display? par(mfrow = c(1, 2)) plot(y.start ~ x.start, dat) abline(v = c(0, 3800), h = c(0, 2150), col = "blue", lwd = 2) plot(y.stop ~ x.stop, dat) abline(v = c(0, 3800), h = c(0, 2150), col = "blue", lwd = 2) aggregate(cbind(x.start, x.stop, y.start, y.stop) ~ 1, dat, mean) #' Looks like the range should be something like $x = [0, 3800]$ and #' $y = [0, 2150]$. Do we have the starting coordinates for each artwork? #' #' # Unit of scale summary(dat$scaleSize) #' I thought it would be some kind of scaling factor, but then I would #' have expected that `scale.start` is always 1 or something. #' #' # Unit of rotation summary(dat$rotationDegree) #' This looks pretty clear. Should be degree. Anything else to consider #' here? I am assuming negative means left, but maybe not? #' #' # Meaningful unit for "case" #' I pretty randomly chose `20 sec` based on this plot. I would love a #' second opinion. `:)` timediff <- as.numeric(diff(c(dat$date.start[1], dat$date.start))) hist(timediff[timediff < 40], breaks = 50) abline(v = 20, col = "red", lwd = 2) #' This actually works pretty well and lets me assign `trace` values to the #' moves. But maybe there are other ideas on how to define this? dat[1:40, c("date.start", "case", "trace", "event", "artwork")] #' # Problems with `time_ms` #' What exactly happens, when `time_ms` goes down again? Why does it not go #' down to 0? par(mfrow = c(1, 2)) plot(dat$time_ms.start[1:100], type = "b", ylab = "time_ms", xlab = "") points(dat$time_ms.stop[1:100], type = "b", col = rgb(1, 0, 0, .5)) legend("topleft", c("start", "stop"), lty = 1, col = c("black", "red")) plot(dat$time_ms.stop[1:100] - dat$time_ms.start[1:100], type = "b", ylab = "duration", col = rgb(0, 0, 1, .5)) abline(h = 0, lty = 2) #' For the regular timestamps everything looks fine. par(mfrow = c(1, 2)) plot(dat$date.stop[1:100], type = "b", ylab = "timestamp", xlab = "", col = rgb(1, 0, 0, .5)) points(dat$date.start[1:100], type = "b") legend("topleft", c("start", "stop"), lty = 1, col = c("black", "red")) plot(dat$date.stop[1:100] - dat$date.start[1:100], type = "b", ylab = "duration", col = rgb(0, 0, 1, .5)) abline(h = 0, lty = 2) #+ plot(time_ms.start ~ date.start, dat[1:1000, ], type = "b") points(time_ms.stop ~ date.stop, dat[1:1000, ], type = "b", col = rgb(1, 0, 0, .3)) #' For `time_ms.stop` this looks even weirder. #' #' # After which time interval does the table reset? #' I cannot see this in the data at all. Or can I? Has this something to do #' with the weird behavior of `time_ms`?