mtt_haum/code/visualization.R

201 lines
5.7 KiB
R

# setwd("C:/Users/nwickelmaier/Nextcloud/Documents/MDS/2023ss/60100_master_thesis/code/")
library(lattice)
# Read data
datlogs <- read.table("../data/haum/event_logfiles_metadata_2023-09-23_01-31-30.csv",
sep = ";", header = TRUE)
datlogs$date <- as.Date(datlogs$date.start)
datlogs$date.start <- as.POSIXct(datlogs$date.start)
datlogs$date.stop <- as.POSIXct(datlogs$date.stop)
datlogs$artwork <- sprintf("%03d", datlogs$artwork)
### Which artwork gets touched most often/first?
counts_artwork <- table(datlogs$artwork)
barchart(counts_artwork)
artworks <- unique(datlogs$artwork)
artworks <- artworks[!artworks %in% c("504", "505")]
datart <- extract_artworks(artworks,
paste0(artworks, ".xml"),
"../data/haum/ContentEyevisit/eyevisit_cards_light/")
datart <- datart[order(datart$artwork), ]
names(counts_artwork) <- datart$title
pdf("../figures/counts_artwork.pdf", width = 20, height = 10, pointsize = 10)
par(mai = c(5, .6, .1, .1))
mtp <- barplot(counts_artwork, las = 2, ylim = c(0, 60000), border = "white")
text(tmp, counts_artwork + 1000, c(datart$artwork, "504", "505"))
dev.off()
### Which artwork gets touched most often first?
datcase <- datlogs[!duplicated(datlogs$case), ]
counts_case <- table(datcase$artwork)
names(counts_case) <- datart$title
tmp <- barplot(counts_case, las = 2, border = "white")
text(tmp, counts_case + 100, c(datart$artwork, "504", "505"))
counts <- rbind(counts_artwork, counts_case)
barplot(counts, las = 2, border = "white", col = c("gray", "darkorange"))
### Which teasers seem to work well?
barplot(table(datlogs$topic), las = 2)
barplot(table(datlogs$topicFile), las = 2)
### Dwell times/duration
datagg <- aggregate(duration ~ event + artwork, datlogs, mean)
datagg$ds <- datagg$duration / 1000
bwplot(ds ~ as.factor(event), datagg)
xyplot(ds ~ as.factor(event), datagg, groups = artwork)
# without aggregation
bwplot(duration ~ as.factor(event), datlogs)
# in min
bwplot(I(duration/1000/60) ~ as.factor(event), datlogs)
datlogs$daydiff <- c(NA, diff(datlogs$date))
plot(daydiff ~ date, datlogs, type = "b")
### Are there certain areas of the table that are touched most often?
# heatmap
cuts <- 100
datlogs$x.start.cat <- cut(datlogs$x.start, cuts)
datlogs$y.start.cat <- cut(datlogs$y.start, cuts)
tab <- xtabs( ~ x.start.cat + y.start.cat, datlogs)
colnames(tab) <- paste0("c", 1:cuts)
rownames(tab) <- paste0("c", 1:cuts)
heatmap(tab, Rowv = NA, Colv = NA)
library(ggplot2)
ggplot(as.data.frame(tab)) +
geom_tile(aes(x = x.start.cat, y = y.start.cat, fill = Freq)) +
scale_fill_gradient(low = "gray40", high = "orange")
dattrim <- datlogs[datlogs$x.start < 3840 &
datlogs$x.start > 0 &
datlogs$y.start < 2160 &
datlogs$y.start > 0 &
datlogs$x.stop < 3840 &
datlogs$x.stop > 0 &
datlogs$y.stop < 2160 &
datlogs$y.stop > 0, ]
cuts <- 200 # 100, 70, ...
# start
dattrim$x.start.cat <- cut(dattrim$x.start, cuts)
dattrim$y.start.cat <- cut(dattrim$y.start, cuts)
tab.start <- xtabs( ~ x.start.cat + y.start.cat, dattrim)
colnames(tab.start) <- paste0("c", 1:cuts)
rownames(tab.start) <- paste0("c", 1:cuts)
heatmap(tab.start, Rowv = NA, Colv = NA)
my_colors <- colorRampPalette(c("gray40", "orange"))
heatmap(tab.start, Rowv = NA, Colv = NA, col = my_colors(1000))
ggplot(as.data.frame(tab.start)) +
geom_tile(aes(x = x.start.cat, y = y.start.cat, fill = Freq)) +
scale_fill_gradient(low = "gray40", high = "orange")
# stop
dattrim$x.stop.cat <- cut(dattrim$x.stop, cuts)
dattrim$y.stop.cat <- cut(dattrim$y.stop, cuts)
tab.stop <- xtabs( ~ x.stop.cat + y.stop.cat, dattrim)
colnames(tab.stop) <- paste0("c", 1:cuts)
rownames(tab.stop) <- paste0("c", 1:cuts)
heatmap(tab.stop, Rowv = NA, Colv = NA)
heatmap(tab.stop, Rowv = NA, Colv = NA, col = my_colors(1000))
### How many visitors per day
# Interactions per day
datint <- aggregate(case ~ date, datlogs, length)
plot(datint, type = "h")
# Cases per day
datcase <- aggregate(case ~ date, datlogs, function(x) length(unique(x)))
plot(datcase, type = "h")
# Traces per day
dattrace <- aggregate(trace ~ date, datlogs, function(x) length(unique(x)))
plot(dattrace, type = "h")
# function dependencies of mtt
devtools::load_all("../../../../software/mtt")
#library(mtt)
library(mvbutils)
foodweb(where = "package:mtt")
foodweb(where = "package:mtt",
prune = c("parse_logfiles", "create_eventlogs", "extract_artworks",
"extract_topics", "add_topic"),
expand.ybox = 1.8, #cex = .6,
boxcolor = "gray", lwd = 2)
### Other stuff
dat001 <- datlogs[which(datlogs$artwork == "001"), ]
index <- as.numeric(as.factor(dat001$trace))
cc <- sample(colors(), 100)
plot(y.start ~ x.start, dat001, type = "n", xlab = "x", ylab = "y",
xlim = c(0, 3840), ylim = c(0, 2160))
with(dat001[1:200,], arrows(x.start, y.start, x.stop, y.stop,
length = .07, col = cc[index]))
plot(y.start ~ x.start, dat001, xlab = "x", ylab = "y",
xlim = c(0, 3840), ylim = c(0, 2160), pch = 16, col = "gray")
points(y.start ~ x.start, dat001, xlab = "x", ylab = "y",
xlim = c(0, 3840), ylim = c(0, 2160), cex = dat001$scaleSize,
col = "blue")
cc <- sample(colors(), 70)
dat1 <- datlogs[!duplicated(datlogs$artwork), ]
dat1 <- dat1[order(dat1$artwork), ]
plot(y.start ~ x.start, dat1, type = "n", xlim = c(-100, 4500), ylim = c(-100, 2500))
abline(h = c(0, 2160), v = c(0, 3840), col = "lightgray")
with(dat1, points(x.start, y.start, col = cc, pch = 16))
with(dat1, points(x.stop, y.stop, col = cc, pch = 16))
with(dat1, arrows(x.start, y.start, x.stop, y.stop, length = .07, col = cc))