# setwd("C:/Users/nwickelmaier/Nextcloud/Documents/MDS/2023ss/60100_master_thesis/code/") library(lattice) #library(mtt) devtools::load_all("../../../../software/mtt") # Achims colors (used by lattice) #cc <- palette.colors(palette = "Okabe-Ito") #plot(1:10, col = cc, pch = 16, cex = 2) # Read data datlogs <- read.table("results/haum/event_logfiles_2024-01-02_19-44-50.csv", colClasses = c("character", "character", "POSIXct", "POSIXct", "character", "integer", "numeric", "character", "character", rep("numeric", 3), "character", "character", rep("numeric", 11), "character", "character"), sep = ";", header = TRUE) datlogs$event <- factor(datlogs$event, levels = c("move", "flipCard", "openTopic", "openPopup")) ### Number of log files length(unique(datlogs$fileId.start)) length(unique(datlogs$fileId.stop)) length(unique(c(datlogs$fileId.start, datlogs$fileId.stop))) # 22803 ### Number of activities nrow(datlogs) table(datlogs$event) proportions(table(datlogs$event)) proportions(table(datlogs$event[datlogs$event != "move"])) ### Time range range(as.Date(datlogs$date.start)) ### Topics per item print(xtabs( ~ item + topic, datlogs), zero = "-") lattice::dotplot(xtabs( ~ item + topic, datlogs), auto.key = TRUE) ### Which item gets touched most often? counts_item <- table(datlogs$item) barchart(counts_item) items <- unique(datlogs$item) #items <- items[!items %in% c("504", "505")] datart <- extract_artworks(items, paste0(items, ".xml"), "../data/haum/ContentEyevisit/eyevisit_cards_light/") datart <- datart[order(datart$artwork), ] names(counts_item) <- datart$title pdf("../figures/counts_item.pdf", width = 20, height = 10, pointsize = 10) par(mai = c(5, .6, .1, .1)) tmp <- barplot(counts_item, las = 2, ylim = c(0, 60000), border = "white", col = "#3CB4DC") text(tmp, counts_item + 1000, datart$artwork) dev.off() ### Which item gets touched most often first? datcase <- datlogs[!duplicated(datlogs$case), ] counts_case <- table(datcase$item) names(counts_case) <- datart$title tmp <- barplot(counts_case, las = 2, border = "white") text(tmp, counts_case + 100, datart$item) counts <- rbind(counts_item, counts_case) barplot(counts, las = 2, border = "white", col = c("gray", "darkorange")) ### Which teasers seem to work well? barplot(table(datlogs$topic), las = 2) ### Dwell times/duration datagg <- aggregate(duration ~ event + item, datlogs, mean) datagg$ds <- datagg$duration / 1000 bwplot(ds ~ event, datagg) bwplot(ds ~ event | item, datagg) xyplot(ds ~ event, datagg, groups = item) # without aggregation bwplot(duration ~ event, datlogs) # in min set.seed(1027) pdf("../figures/duration.pdf", width = 5, height = 5, pointsize = 10) bwplot(I(duration/1000/60) ~ event, datlogs[sample(nrow(datlogs), 100000), ], ylab = "Duration in min") dev.off() ### Move events datmove <- aggregate(cbind(duration, scaleSize, rotationDegree, distance, x.start, y.start, x.stop, y.stop) ~ item, datlogs, mean) hist(log(datlogs$scaleSize)) # --> better interpretable on logscale plot(y.start ~ x.start, datmove, pch = 16, col = "gray") points(y.start ~ x.start, datmove, col = "#3CB4DC", cex = datmove$scaleSize) plot(y.start ~ x.start, datmove, type = "n", xlab = "x", ylab = "y", xlim = c(0, 3840), ylim = c(0, 2160)) with(datmove, text(x.start, y.start, item, col = "gray", cex = 1.5)) with(datmove, arrows(x.start, y.start, x.stop, y.stop, length = 0.07, lwd = 2) ) abline(v = c(0, 3840), h = c(0, 2160), col = "#3CB4DC", lwd = 2) datscale <- aggregate(scaleSize ~ item, datlogs, max) plot(y.start ~ x.start, datmove, pch = 16, col = "gray") points(y.start ~ x.start, datmove, col = "#3CB4DC", cex = datscale$scaleSize) plot(y.start ~ x.start, datmove, type = "n", xlab = "x", ylab = "y", xlim = c(0, 3840), ylim = c(0, 2160)) #with(datmove, text(x.stop, y.stop, item)) with(datmove, text(x.start, y.start, item)) ### Are there certain areas of the table that are touched most often? # heatmap cuts <- 100 datlogs$x.start.cat <- cut(datlogs$x.start, cuts) datlogs$y.start.cat <- cut(datlogs$y.start, cuts) tab <- xtabs( ~ x.start.cat + y.start.cat, datlogs) colnames(tab) <- paste0("c", 1:cuts) rownames(tab) <- paste0("c", 1:cuts) heatmap(tab, Rowv = NA, Colv = NA) library(ggplot2) ggplot(as.data.frame(tab)) + geom_tile(aes(x = x.start.cat, y = y.start.cat, fill = Freq)) + scale_fill_gradient(low = "gray40", high = "orange") dattrim <- datlogs[datlogs$x.start < 3840 & datlogs$x.start > 0 & datlogs$y.start < 2160 & datlogs$y.start > 0 & datlogs$x.stop < 3840 & datlogs$x.stop > 0 & datlogs$y.stop < 2160 & datlogs$y.stop > 0, ] cuts <- 100 # 200, 100, 70, ... # start dattrim$x.start.cat <- cut(dattrim$x.start, cuts) dattrim$y.start.cat <- cut(dattrim$y.start, cuts) tab.start <- xtabs( ~ x.start.cat + y.start.cat, dattrim) colnames(tab.start) <- NULL rownames(tab.start) <- NULL pdf("../figures/heatmap_start.pdf", width = 5, height = 5, pointsize = 10) heatmap(tab.start, Rowv = NA, Colv = NA) dev.off() my_colors <- colorRampPalette(c("#009E73", "#E69F00")) heatmap(tab.start, Rowv = NA, Colv = NA, col = my_colors(1000)) ggplot(as.data.frame(tab.start)) + geom_tile(aes(x = x.start.cat, y = y.start.cat, fill = Freq)) + scale_fill_gradient(low = "#009E73", high = "#E69F00") # stop dattrim$x.stop.cat <- cut(dattrim$x.stop, cuts) dattrim$y.stop.cat <- cut(dattrim$y.stop, cuts) tab.stop <- xtabs( ~ x.stop.cat + y.stop.cat, dattrim) colnames(tab.stop) <- NULL rownames(tab.stop) <- NULL pdf("../figures/heatmap_stop.pdf", width = 5, height = 5, pointsize = 10) heatmap(tab.stop, Rowv = NA, Colv = NA) dev.off() heatmap(tab.stop, Rowv = NA, Colv = NA, col = my_colors(10)) ### How many visitors per day datlogs$date <- as.Date(datlogs$date.start) # Interactions per day datint <- aggregate(case ~ date, datlogs, length) plot(datint, type = "h") # Cases per day datcase <- aggregate(case ~ date, datlogs, function(x) length(unique(x))) plot(datcase, type = "h") # Paths per day datpath <- aggregate(path ~ date, datlogs, function(x) length(unique(x))) plot(datpath, type = "h") plot(path ~ date, datpath, type = "h", col = "#3CB4DC") points(case ~ date, datcase, type = "h") pdf("../figures/cases_per_day2.pdf", width = 9, height = 5, pointsize = 10) par(mai = c(.6, .6, .1, .1), mgp = c(2.4, 1, 0)) plot(case ~ date, datcase, type = "h", col = "#3CB4DC") abline(v = datcase$date[datcase$date %in% c("2020-03-13", "2022-10-25")], col = "#FF6900", lty = 2) text(datcase$date[datcase$date == "2020-03-13"]+470, 80, "Corona gap from 2020-03-13 to 2022-10-25", col = "#D55E00") dev.off() ### Other stuff ## function dependencies of mtt devtools::load_all("../../../../software/mtt") #library(mtt) library(mvbutils) foodweb(where = "package:mtt") pdf("../figures/fun_depend_mtt.pdf", width = 8, height = 4, pointsize = 10) foodweb(where = "package:mtt", prune = c("parse_logfiles", "create_eventlogs", "extract_artworks", "extract_topics"), #expand.ybox = 1.8, #cex = .6, #border = TRUE, #boxcolor = "gray", color.lines = FALSE, lwd = 2, mai = c(0, 0, 0, 0)) dev.off() ## weird behavior of timeMs pdf("../figures/timeMs.pdf", width = 9, height = 6, pointsize = 10) #par(mai = c(.6, .6, .1, .1), mgp = c(2.4, 1, 0)) #plot(timeMs.start ~ as.factor(fileId), datlogs[1:2000,], xlab = "fileId") bwplot(timeMs.start ~ as.factor(fileId.start), datlogs[1:2000,], xlab = "", scales = list(x = list(rot = 90), y = list(rot = 90))) dev.off() ## x,y-coordinates out of range set.seed(1522) pdf("../figures/xycoord.pdf", width = 5, height = 5, pointsize = 10) par(mai = c(.6, .6, .1, .1), mgp = c(2.4, 1, 0)) #par(mfrow = c(1, 2)) plot(y.start ~ x.start, datlogs[sample(nrow(datlogs), 10000), ]) abline(v = c(0, 3840), h = c(0, 2160), col = "#3CB4DC", lwd = 2) #plot(y.stop ~ x.stop, datlogs) #abline(v = c(0, 3840), h = c(0, 2160), col = "#3CB4DC", lwd = 2) legend("bottomleft", "Random sample of 10,000", bg = "white") legend("topleft", "4K-Display: 3840 x 2160", bg = "white") dev.off() ## moves dat001 <- datlogs[which(datlogs$item == "001"), ] index <- as.numeric(as.factor(dat001$path)) cc <- sample(colors(), 100) plot(y.start ~ x.start, dat001, type = "n", xlab = "x", ylab = "y", xlim = c(0, 3840), ylim = c(0, 2160)) with(dat001[1:200,], arrows(x.start, y.start, x.stop, y.stop, length = .07, col = cc[index])) plot(y.start ~ x.start, dat001, xlab = "x", ylab = "y", xlim = c(0, 3840), ylim = c(0, 2160), pch = 16, col = "gray") points(y.start ~ x.start, dat001, xlab = "x", ylab = "y", xlim = c(0, 3840), ylim = c(0, 2160), cex = dat001$scaleSize, col = "blue") cc <- sample(colors(), 70) dat1 <- datlogs[!duplicated(datlogs$item), ] dat1 <- dat1[order(dat1$item), ] plot(y.start ~ x.start, dat1, type = "n", xlim = c(-100, 4500), ylim = c(-100, 2500)) abline(h = c(0, 2160), v = c(0, 3840), col = "lightgray") with(dat1, points(x.start, y.start, col = cc, pch = 16)) with(dat1, points(x.stop, y.stop, col = cc, pch = 16)) with(dat1, arrows(x.start, y.start, x.stop, y.stop, length = .07, col = cc))