126 lines
3.8 KiB
R
126 lines
3.8 KiB
R
#' ---
|
|
#' title: "Open Questions"
|
|
#' author: "Nora Wickelmaier"
|
|
#' date: "`r Sys.Date()`"
|
|
#' output:
|
|
#' html_document:
|
|
#' number_sections: true
|
|
#' toc: true
|
|
#' ---
|
|
|
|
#+ include = FALSE
|
|
# setwd("C:/Users/nwickelmaier/Nextcloud/Documents/MDS/2023ss/60100_master_thesis/code")
|
|
dat <- read.table("../data/event_logfiles.csv", sep = ";", header = TRUE)
|
|
dat$date.start <- as.POSIXct(dat$date.start)
|
|
dat$date.stop <- as.POSIXct(dat$date.stop)
|
|
|
|
#' This is what the data look like after preprocessing right now
|
|
|
|
#+ include = FALSE
|
|
mat <- as.data.frame(t(sapply(dat, range, na.rm = TRUE)))
|
|
names(mat) <- c("min", "max")
|
|
mat$min <- round(as.numeric(mat$min), 1)
|
|
mat$max <- round(as.numeric(mat$max), 1)
|
|
mat$mean <- round(sapply(dat, function(x) mean(x, na.rm = TRUE)), 1)
|
|
mat$missings <- sapply(dat, function(x) sum(is.na(x)))
|
|
mat <- mat[!(rownames(mat) %in% c("eventid", "case", "trace", "event", "artwork", "card", "popup", "date.start", "date.stop")), ]
|
|
|
|
#+ echo = FALSE
|
|
knitr::kable(mat)
|
|
|
|
#' This is only the data for 2016! So only about 2 weeks in December.
|
|
|
|
# Date ranges
|
|
range(dat$date.start)
|
|
range(dat$date.stop, na.rm = TRUE)
|
|
|
|
#' # Units of x and y
|
|
#' I assume that x and y are pixel $\to$ correct?
|
|
|
|
#' But they look weird, when plotted. Is it possible that there are
|
|
#' outliers? If yes, how? Do we have the true ranges of the display?
|
|
|
|
par(mfrow = c(1, 2))
|
|
plot(y.start ~ x.start, dat)
|
|
abline(v = c(0, 3800), h = c(0, 2150), col = "blue", lwd = 2)
|
|
plot(y.stop ~ x.stop, dat)
|
|
abline(v = c(0, 3800), h = c(0, 2150), col = "blue", lwd = 2)
|
|
|
|
aggregate(cbind(x.start, x.stop, y.start, y.stop) ~ 1, dat, mean)
|
|
|
|
#' Looks like the range should be something like $x = [0, 3800]$ and
|
|
#' $y = [0, 2150]$. Do we have the starting coordinates for each artwork?
|
|
#'
|
|
|
|
#' # Unit of scale
|
|
|
|
summary(dat$scaleSize)
|
|
|
|
#' I thought it would be some kind of scaling factor, but then I would
|
|
#' have expected that `scale.start` is always 1 or something.
|
|
#'
|
|
|
|
#' # Unit of rotation
|
|
|
|
summary(dat$rotationDegree)
|
|
|
|
#' This looks pretty clear. Should be degree. Anything else to consider
|
|
#' here? I am assuming negative means left, but maybe not?
|
|
#'
|
|
|
|
#' # Meaningful unit for "case"
|
|
|
|
#' I pretty randomly chose `20 sec` based on this plot. I would love a
|
|
#' second opinion. `:)`
|
|
|
|
timediff <- as.numeric(diff(c(dat$date.start[1], dat$date.start)))
|
|
hist(timediff[timediff < 40], breaks = 50)
|
|
abline(v = 20, col = "red", lwd = 2)
|
|
|
|
#' This actually works pretty well and lets me assign `trace` values to the
|
|
#' moves. But maybe there are other ideas on how to define this?
|
|
|
|
dat[1:40, c("date.start", "case", "trace", "event", "artwork")]
|
|
|
|
|
|
#' # Problems with `time_ms`
|
|
|
|
#' What exactly happens, when `time_ms` goes down again? Why does it not go
|
|
#' down to 0?
|
|
|
|
par(mfrow = c(1, 2))
|
|
|
|
plot(dat$time_ms.start[1:100], type = "b", ylab = "time_ms", xlab = "")
|
|
points(dat$time_ms.stop[1:100], type = "b", col = rgb(1, 0, 0, .5))
|
|
legend("topleft", c("start", "stop"), lty = 1, col = c("black", "red"))
|
|
|
|
plot(dat$time_ms.stop[1:100] - dat$time_ms.start[1:100], type = "b",
|
|
ylab = "duration", col = rgb(0, 0, 1, .5))
|
|
abline(h = 0, lty = 2)
|
|
|
|
#' For the regular timestamps everything looks fine.
|
|
|
|
par(mfrow = c(1, 2))
|
|
|
|
plot(dat$date.stop[1:100], type = "b", ylab = "timestamp", xlab = "",
|
|
col = rgb(1, 0, 0, .5))
|
|
points(dat$date.start[1:100], type = "b")
|
|
legend("topleft", c("start", "stop"), lty = 1, col = c("black", "red"))
|
|
|
|
plot(dat$date.stop[1:100] - dat$date.start[1:100], type = "b",
|
|
ylab = "duration", col = rgb(0, 0, 1, .5))
|
|
abline(h = 0, lty = 2)
|
|
|
|
#+
|
|
plot(time_ms.start ~ date.start, dat[1:1000, ], type = "b")
|
|
points(time_ms.stop ~ date.stop, dat[1:1000, ], type = "b", col = rgb(1, 0, 0, .3))
|
|
|
|
#' For `time_ms.stop` this looks even weirder.
|
|
#'
|
|
|
|
#' # After which time interval does the table reset?
|
|
|
|
#' I cannot see this in the data at all. Or can I? Has this something to do
|
|
#' with the weird behavior of `time_ms`?
|
|
|