126 lines
		
	
	
		
			3.8 KiB
		
	
	
	
		
			R
		
	
	
	
	
	
			
		
		
	
	
			126 lines
		
	
	
		
			3.8 KiB
		
	
	
	
		
			R
		
	
	
	
	
	
| #' ---
 | |
| #' title: "Open Questions"
 | |
| #' author: "Nora Wickelmaier"
 | |
| #' date: "`r Sys.Date()`"
 | |
| #' output: 
 | |
| #'   html_document:
 | |
| #'     number_sections: true
 | |
| #'     toc: true
 | |
| #' ---
 | |
| 
 | |
| #+ include = FALSE
 | |
| # setwd("C:/Users/nwickelmaier/Nextcloud/Documents/MDS/2023ss/60100_master_thesis/code")
 | |
| dat <- read.table("../data/event_logfiles.csv", sep = ";", header = TRUE)
 | |
| dat$date.start <- as.POSIXct(dat$date.start)
 | |
| dat$date.stop <- as.POSIXct(dat$date.stop)
 | |
| 
 | |
| #' This is what the data look like after preprocessing right now
 | |
| 
 | |
| #+ include = FALSE
 | |
| mat <- as.data.frame(t(sapply(dat, range, na.rm = TRUE)))
 | |
| names(mat) <- c("min", "max")
 | |
| mat$min <- round(as.numeric(mat$min), 1)
 | |
| mat$max <- round(as.numeric(mat$max), 1)
 | |
| mat$mean <- round(sapply(dat, function(x) mean(x, na.rm = TRUE)), 1)
 | |
| mat$missings <- sapply(dat, function(x) sum(is.na(x)))
 | |
| mat <- mat[!(rownames(mat) %in% c("eventid", "case", "trace", "event", "artwork", "card", "popup", "date.start", "date.stop")), ]
 | |
| 
 | |
| #+ echo = FALSE
 | |
| knitr::kable(mat)
 | |
| 
 | |
| #' This is only the data for 2016! So only about 2 weeks in December.
 | |
| 
 | |
| # Date ranges
 | |
| range(dat$date.start)
 | |
| range(dat$date.stop, na.rm = TRUE)
 | |
| 
 | |
| #' # Units of x and y
 | |
| #' I assume that x and y are pixel $\to$ correct?
 | |
| 
 | |
| #' But they look weird, when plotted. Is it possible that there are
 | |
| #' outliers? If yes, how? Do we have the true ranges of the display?
 | |
| 
 | |
| par(mfrow = c(1, 2))
 | |
| plot(y.start ~ x.start, dat)
 | |
| abline(v = c(0, 3800), h = c(0, 2150), col = "blue", lwd = 2)
 | |
| plot(y.stop ~ x.stop, dat)
 | |
| abline(v = c(0, 3800), h = c(0, 2150), col = "blue", lwd = 2)
 | |
| 
 | |
| aggregate(cbind(x.start, x.stop, y.start, y.stop) ~ 1, dat, mean)
 | |
| 
 | |
| #' Looks like the range should be something like $x = [0, 3800]$ and
 | |
| #' $y = [0, 2150]$. Do we have the starting coordinates for each artwork?
 | |
| #'
 | |
| 
 | |
| #' # Unit of scale
 | |
| 
 | |
| summary(dat$scaleSize)
 | |
| 
 | |
| #' I thought it would be some kind of scaling factor, but then I would
 | |
| #' have expected that `scale.start` is always 1 or something.
 | |
| #'
 | |
| 
 | |
| #' # Unit of rotation
 | |
| 
 | |
| summary(dat$rotationDegree)
 | |
| 
 | |
| #' This looks pretty clear. Should be degree. Anything else to consider
 | |
| #' here? I am assuming negative means left, but maybe not?
 | |
| #'
 | |
| 
 | |
| #' # Meaningful unit for "case"
 | |
| 
 | |
| #' I pretty randomly chose `20 sec` based on this plot. I would love a
 | |
| #' second opinion. `:)`
 | |
| 
 | |
| timediff <- as.numeric(diff(c(dat$date.start[1], dat$date.start)))
 | |
| hist(timediff[timediff < 40], breaks = 50)
 | |
| abline(v = 20, col = "red", lwd = 2)
 | |
| 
 | |
| #' This actually works pretty well and lets me assign `trace` values to the
 | |
| #' moves. But maybe there are other ideas on how to define this?
 | |
| 
 | |
| dat[1:40, c("date.start", "case", "trace", "event", "artwork")]
 | |
| 
 | |
| 
 | |
| #' # Problems with `time_ms`
 | |
| 
 | |
| #' What exactly happens, when `time_ms` goes down again? Why does it not go
 | |
| #' down to 0?
 | |
| 
 | |
| par(mfrow = c(1, 2))
 | |
| 
 | |
| plot(dat$time_ms.start[1:100], type = "b", ylab = "time_ms", xlab = "")
 | |
| points(dat$time_ms.stop[1:100], type = "b", col = rgb(1, 0, 0, .5))
 | |
| legend("topleft", c("start", "stop"), lty = 1, col = c("black", "red"))
 | |
| 
 | |
| plot(dat$time_ms.stop[1:100] - dat$time_ms.start[1:100], type = "b",
 | |
|      ylab = "duration", col = rgb(0, 0, 1, .5))
 | |
| abline(h = 0, lty = 2)
 | |
| 
 | |
| #' For the regular timestamps everything looks fine.
 | |
| 
 | |
| par(mfrow = c(1, 2))
 | |
| 
 | |
| plot(dat$date.stop[1:100], type = "b", ylab = "timestamp", xlab = "",
 | |
|      col = rgb(1, 0, 0, .5))
 | |
| points(dat$date.start[1:100], type = "b")
 | |
| legend("topleft", c("start", "stop"), lty = 1, col = c("black", "red"))
 | |
| 
 | |
| plot(dat$date.stop[1:100] - dat$date.start[1:100], type = "b",
 | |
|      ylab = "duration", col = rgb(0, 0, 1, .5))
 | |
| abline(h = 0, lty = 2)
 | |
| 
 | |
| #+
 | |
| plot(time_ms.start ~ date.start, dat[1:1000, ], type = "b")
 | |
| points(time_ms.stop ~ date.stop, dat[1:1000, ], type = "b", col = rgb(1, 0, 0, .3))
 | |
| 
 | |
| #' For `time_ms.stop` this looks even weirder.
 | |
| #'
 | |
| 
 | |
| #' # After which time interval does the table reset?
 | |
| 
 | |
| #' I cannot see this in the data at all. Or can I? Has this something to do
 | |
| #' with the weird behavior of `time_ms`?
 | |
| 
 |