# setwd("C:/Users/nwickelmaier/Nextcloud/Documents/MDS/2023ss/60100_master_thesis/code/") library(lattice) # Achims colors (used by lattice) #cc <- palette.colors(palette = "Okabe-Ito") #plot(1:10, col = cc, pch = 16, cex = 2) # Read data datlogs <- read.table("../data/haum/event_logfiles_metadata_2023-09-23_01-31-30.csv", sep = ";", header = TRUE) datlogs$date <- as.Date(datlogs$date) datlogs$date.start <- as.POSIXct(datlogs$date.start) datlogs$date.stop <- as.POSIXct(datlogs$date.stop) datlogs$artwork <- sprintf("%03d", datlogs$artwork) ### Which artwork gets touched most often? counts_artwork <- table(datlogs$artwork) barchart(counts_artwork) artworks <- unique(datlogs$artwork) artworks <- artworks[!artworks %in% c("504", "505")] datart <- extract_artworks(artworks, paste0(artworks, ".xml"), "../data/haum/ContentEyevisit/eyevisit_cards_light/") datart <- datart[order(datart$artwork), ] names(counts_artwork) <- datart$title pdf("../figures/counts_artwork.pdf", width = 20, height = 10, pointsize = 10) par(mai = c(5, .6, .1, .1)) tmp <- barplot(counts_artwork, las = 2, ylim = c(0, 60000), border = "white", col = "#0072B2") text(tmp, counts_artwork + 1000, c(datart$artwork, "504", "505")) dev.off() ### Which artwork gets touched most often first? datcase <- datlogs[!duplicated(datlogs$case), ] counts_case <- table(datcase$artwork) names(counts_case) <- datart$title tmp <- barplot(counts_case, las = 2, border = "white") text(tmp, counts_case + 100, c(datart$artwork, "504", "505")) counts <- rbind(counts_artwork, counts_case) barplot(counts, las = 2, border = "white", col = c("gray", "darkorange")) ### Which teasers seem to work well? barplot(table(datlogs$topic), las = 2) barplot(table(datlogs$topicFile), las = 2) ### Dwell times/duration datagg <- aggregate(duration ~ event + artwork, datlogs, mean) datagg$ds <- datagg$duration / 1000 bwplot(ds ~ as.factor(event), datagg) bwplot(ds ~ as.factor(event) | artwork, datagg) xyplot(ds ~ as.factor(event), datagg, groups = artwork) # without aggregation bwplot(duration ~ as.factor(event), datlogs) # in min set.seed(1027) pdf("../figures/duration.pdf", width = 5, height = 5, pointsize = 10) bwplot(I(duration/1000/60) ~ as.factor(event), datlogs[sample(nrow(datlogs), 100000), ], ylab = "Duration in min") dev.off() datlogs$daydiff <- c(NA, diff(datlogs$date)) plot(daydiff ~ date, datlogs, type = "b") ### Move events datmove <- aggregate(cbind(duration, scaleSize, rotationDegree, x.start, y.start, x.stop, y.stop) ~ artwork, datlogs, mean) hist(log(datlogs$scaleSize)) # --> better interpretable on logscale plot(y.start ~ x.start, datmove, pch = 16, col = "gray") points(y.start ~ x.start, datmove, col = "#0072B2", cex = log(datmove$scaleSize)) points(y.start ~ x.start, datmove, col = "#009E73", cex = datmove$rotationDegree) plot(y.start ~ x.start, datmove, type = "n", xlab = "x", ylab = "y", xlim = c(0, 3840), ylim = c(0, 2160)) with(datmove, text(x.start, y.start, artwork, col = "gray", cex = 1.5)) with(datmove, arrows(x.start, y.start, x.stop, y.stop, length = 0.07, lwd = 2) ) abline(v = c(0, 3840), h = c(0, 2160), col = "#0072B2", lwd = 2) datscale <- aggregate(scaleSize ~ artwork, datlogs, max) plot(y.start ~ x.start, datmove, pch = 16, col = "gray") points(y.start ~ x.start, datmove, col = "#0072B2", cex = log(datscale$scaleSize)) plot(y.start ~ x.start, datmove, type = "n", xlab = "x", ylab = "y", xlim = c(0, 3840), ylim = c(0, 2160)) #with(datmove, text(x.stop, y.stop, artwork)) with(datmove, text(x.start, y.start, artwork)) ### Are there certain areas of the table that are touched most often? # heatmap cuts <- 100 datlogs$x.start.cat <- cut(datlogs$x.start, cuts) datlogs$y.start.cat <- cut(datlogs$y.start, cuts) tab <- xtabs( ~ x.start.cat + y.start.cat, datlogs) colnames(tab) <- paste0("c", 1:cuts) rownames(tab) <- paste0("c", 1:cuts) heatmap(tab, Rowv = NA, Colv = NA) library(ggplot2) ggplot(as.data.frame(tab)) + geom_tile(aes(x = x.start.cat, y = y.start.cat, fill = Freq)) + scale_fill_gradient(low = "gray40", high = "orange") dattrim <- datlogs[datlogs$x.start < 3840 & datlogs$x.start > 0 & datlogs$y.start < 2160 & datlogs$y.start > 0 & datlogs$x.stop < 3840 & datlogs$x.stop > 0 & datlogs$y.stop < 2160 & datlogs$y.stop > 0, ] cuts <- 100 # 200, 100, 70, ... # start dattrim$x.start.cat <- cut(dattrim$x.start, cuts) dattrim$y.start.cat <- cut(dattrim$y.start, cuts) tab.start <- xtabs( ~ x.start.cat + y.start.cat, dattrim) colnames(tab.start) <- NULL rownames(tab.start) <- NULL pdf("../figures/heatmap_start.pdf", width = 5, height = 5, pointsize = 10) heatmap(tab.start, Rowv = NA, Colv = NA) dev.off() my_colors <- colorRampPalette(c("#009E73", "#E69F00")) heatmap(tab.start, Rowv = NA, Colv = NA, col = my_colors(1000)) ggplot(as.data.frame(tab.start)) + geom_tile(aes(x = x.start.cat, y = y.start.cat, fill = Freq)) + scale_fill_gradient(low = "#009E73", high = "#E69F00") # stop dattrim$x.stop.cat <- cut(dattrim$x.stop, cuts) dattrim$y.stop.cat <- cut(dattrim$y.stop, cuts) tab.stop <- xtabs( ~ x.stop.cat + y.stop.cat, dattrim) colnames(tab.stop) <- NULL rownames(tab.stop) <- NULL pdf("../figures/heatmap_stop.pdf", width = 5, height = 5, pointsize = 10) heatmap(tab.stop, Rowv = NA, Colv = NA) dev.off() heatmap(tab.stop, Rowv = NA, Colv = NA, col = my_colors(10)) ### How many visitors per day # Interactions per day datint <- aggregate(case ~ date, datlogs, length) plot(datint, type = "h") # Cases per day datcase <- aggregate(case ~ date, datlogs, function(x) length(unique(x))) plot(datcase, type = "h") # Traces per day dattrace <- aggregate(trace ~ date, datlogs, function(x) length(unique(x))) plot(dattrace, type = "h") plot(trace ~ date, dattrace, type = "h", col = "#0072B2") points(case ~ date, datcase, type = "h") pdf("../figures/cases_per_day.pdf", width = 9, height = 5, pointsize = 10) par(mai = c(.6, .6, .1, .1), mgp = c(2.4, 1, 0)) plot(case ~ date, datcase, type = "h", col = "#0072B2") abline(v = datcase$date[c(1027, 1039)], col = "#D55E00", lty = 2) text(datcase$date[1037]-250, 150, paste0("Corona gap from ", datcase$date[1027], " to ", datcase$date[1039]), col = "#D55E00") dev.off() ### Other stuff ## function dependencies of mtt devtools::load_all("../../../../software/mtt") #library(mtt) library(mvbutils) foodweb(where = "package:mtt") pdf("../figures/fun_depend_mtt.pdf", width = 8, height = 4, pointsize = 10) foodweb(where = "package:mtt", prune = c("parse_logfiles", "create_eventlogs", "extract_artworks", "extract_topics", "add_topic"), expand.ybox = 1.8, #cex = .6, border = TRUE, #boxcolor = "gray", lwd = 2, mai = c(0, 0, 0, 0)) dev.off() ## weird behavior of timeMs pdf("../figures/timeMs.pdf", width = 9, height = 6, pointsize = 10) #par(mai = c(.6, .6, .1, .1), mgp = c(2.4, 1, 0)) #plot(timeMs.start ~ as.factor(fileId), datlogs[1:2000,], xlab = "fileId") bwplot(timeMs.start ~ as.factor(fileId), datlogs[1:2000,], xlab = "", scales = list(x = list(rot = 90), y = list(rot = 90))) dev.off() ## x,y-coordinates out of range set.seed(1522) pdf("../figures/xycoord.pdf", width = 5, height = 5, pointsize = 10) par(mai = c(.6, .6, .1, .1), mgp = c(2.4, 1, 0)) #par(mfrow = c(1, 2)) plot(y.start ~ x.start, datlogs[sample(nrow(datlogs), 10000), ]) abline(v = c(0, 3840), h = c(0, 2160), col = "#0072B2", lwd = 2) #plot(y.stop ~ x.stop, datlogs) #abline(v = c(0, 3840), h = c(0, 2160), col = "#0072B2", lwd = 2) legend("bottomleft", "Random sample of 10,000", bg = "white") legend("topleft", "4K-Display: 3840 x 2160", bg = "white") dev.off() ## moves dat001 <- datlogs[which(datlogs$artwork == "001"), ] index <- as.numeric(as.factor(dat001$trace)) cc <- sample(colors(), 100) plot(y.start ~ x.start, dat001, type = "n", xlab = "x", ylab = "y", xlim = c(0, 3840), ylim = c(0, 2160)) with(dat001[1:200,], arrows(x.start, y.start, x.stop, y.stop, length = .07, col = cc[index])) plot(y.start ~ x.start, dat001, xlab = "x", ylab = "y", xlim = c(0, 3840), ylim = c(0, 2160), pch = 16, col = "gray") points(y.start ~ x.start, dat001, xlab = "x", ylab = "y", xlim = c(0, 3840), ylim = c(0, 2160), cex = dat001$scaleSize, col = "blue") cc <- sample(colors(), 70) dat1 <- datlogs[!duplicated(datlogs$artwork), ] dat1 <- dat1[order(dat1$artwork), ] plot(y.start ~ x.start, dat1, type = "n", xlim = c(-100, 4500), ylim = c(-100, 2500)) abline(h = c(0, 2160), v = c(0, 3840), col = "lightgray") with(dat1, points(x.start, y.start, col = cc, pch = 16)) with(dat1, points(x.stop, y.stop, col = cc, pch = 16)) with(dat1, arrows(x.start, y.start, x.stop, y.stop, length = .07, col = cc))