mtt_haum/code/03_plots_8o8m.R

126 lines
4.1 KiB
R

# setwd("C:/Users/nwickelmaier/Nextcloud/Documents/MDS/2023ss/60100_master_thesis/code/")
library(lattice)
devtools::load_all("../../../../software/mtt")
library(ggplot2)
# Read data
datlogs <- read.table("../data/8o8m/event_logfiles_2023-09-22_18-54-49.csv",
sep = ";", header = TRUE)
datlogs$date <- as.Date(datlogs$date.start)
datlogs$date.start <- as.POSIXct(datlogs$date.start)
datlogs$date.stop <- as.POSIXct(datlogs$date.stop)
datlogs$artwork <- sprintf("%02d", datlogs$artwork)
### Which artwork gets touched most often?
counts_artwork <- table(datlogs$artwork)
artworks <- unique(datlogs$artwork)
datart <- extract_artworks(artworks,
paste0(artworks, "_de.xml"),
"../data/8o8m/Content8o8m/")
datart <- datart[order(datart$artwork), ]
names(counts_artwork) <- datart$title
#pdf("../figures/counts_artwork_8o8m.pdf", width = 20, height = 10, pointsize = 10)
#par(mai = c(3, .6, .1, .1))
tmp <- barplot(counts_artwork, las = 2, ylim = c(0, 90000), border = "white")
text(tmp, counts_artwork + 1500, c(datart$artwork))
#dev.off()
# more interesting per museum in this case...
counts_artwork <- aggregate(trace ~ artwork + folder, datlogs, length)
pdf("../figures/counts_artwork_8o8m.pdf", width = 20, height = 6, pointsize = 10)
barchart(trace ~ artwork | folder, counts_artwork, ylab = "", layout = c(5, 1),
border = "transparent", col = "#0072B2")
dev.off()
### Dwell times/duration
set.seed(1033)
pdf("../figures/duration_8o8m.pdf", width = 20, height = 6, pointsize = 10)
bwplot(I(duration/1000/60) ~ as.factor(event) | folder,
datlogs[sample(nrow(datlogs), 100000), ], ylab = "Duration in min")
dev.off()
set.seed(1033)
pdf("../figures/duration_8o8m_artworks.pdf", width = 20, height = 10, pointsize = 10)
bwplot(I(duration/1000/60) ~ as.factor(event) | artwork + folder,
datlogs[sample(nrow(datlogs), 100000), ], ylab = "Duration in min")
dev.off()
### Are there certain areas of the table that are touched most often?
# heatmap
dattrim <- datlogs[datlogs$x.start < 3840 &
datlogs$x.start > 0 &
datlogs$y.start < 2160 &
datlogs$y.start > 0 &
datlogs$x.stop < 3840 &
datlogs$x.stop > 0 &
datlogs$y.stop < 2160 &
datlogs$y.stop > 0, ]
cuts <- 100 # 200, 100, 70, ...
# start
dattrim$x.start.cat <- cut(dattrim$x.start, cuts)
dattrim$y.start.cat <- cut(dattrim$y.start, cuts)
tab.start <- xtabs( ~ x.start.cat + y.start.cat, dattrim)
colnames(tab.start) <- NULL
rownames(tab.start) <- NULL
pdf("../figures/heatmap_start_8o8m.pdf", width = 5, height = 5, pointsize = 10)
heatmap(tab.start, Rowv = NA, Colv = NA)
dev.off()
# stop
dattrim$x.stop.cat <- cut(dattrim$x.stop, cuts)
dattrim$y.stop.cat <- cut(dattrim$y.stop, cuts)
tab.stop <- xtabs( ~ x.stop.cat + y.stop.cat, dattrim)
colnames(tab.stop) <- NULL
rownames(tab.stop) <- NULL
pdf("../figures/heatmap_stop_8o8m.pdf", width = 5, height = 5, pointsize = 10)
heatmap(tab.stop, Rowv = NA, Colv = NA)
dev.off()
### How many visitors per day
# Cases per day
datcase <- aggregate(case ~ date + folder, datlogs, function(x) length(unique(x)))
pdf("../figures/cases_per_day_8o8m.pdf", width = 20, height = 6, pointsize = 10)
barchart(case ~ date | folder, datcase, horizontal = F,
scales = list(x = list(rot = 90, at = seq(1, 122, 10)), y = list(rot = 90)),
border = "transparent", col = "#0072B2")
dev.off()
### Other stuff
## weird behavior of timeMs
pdf("../figures/timeMs_8o8m.pdf", width = 9, height = 6, pointsize = 10)
bwplot(timeMs.start ~ as.factor(fileId), datlogs[1:2000,], xlab = "",
scales = list(x = list(rot = 90), y = list(rot = 90)))
dev.off()
## x,y-coordinates out of range
set.seed(1522)
pdf("../figures/xycoord_8o8m.pdf", width = 5, height = 5, pointsize = 10)
par(mai = c(.6, .6, .1, .1), mgp = c(2.4, 1, 0))
plot(y.start ~ x.start, datlogs[sample(nrow(datlogs), 10000), ])
abline(v = c(0, 3840), h = c(0, 2160), col = "#0072B2", lwd = 2)
legend("bottomleft", "Random sample of 10,000", bg = "white")
legend("topleft", "4K-Display: 3840 x 2160", bg = "white")
dev.off()