Finally added old descriptives stuff; needed the plots

This commit is contained in:
Nora Wickelmaier 2024-03-20 17:08:28 +01:00
parent b50f52dc6c
commit 6feea5a251

View File

@ -10,6 +10,7 @@
# (3.5) Topics
#
# input: results/haum/event_logfiles_2024-02-21_16-07-33.csv
# results/haum/raw_logfiles_2024-02-21_16-07-33.csv
# output:
#
# last mod: 2024-03-13
@ -18,6 +19,9 @@
library(lattice)
library(bupaverse)
#library(mtt)
devtools::load_all("../../../../../software/mtt")
# Overall Research Question: How do museum visitors interact with the
# artworks presented on the MTT?
@ -74,43 +78,281 @@ lattice::dotplot(xtabs( ~ item + topic, datlogs), auto.key = TRUE)
mat <- t(as.matrix(xtabs( ~ item + topic, datlogs)))
mat[mat == 0] <- NA
image(mat, axes = F, col = rainbow(100))
heatmap(t(mat))
datlogs$start <- datlogs$date.start
datlogs$complete <- datlogs$date.stop
#--------------- (2) Descriptives ---------------
### Which item gets touched most often?
counts_item <- table(datlogs$item)
lattice::barchart(counts_item)
items <- unique(datlogs$item)
#items <- items[!items %in% c("504", "505")]
datart <- extract_artworks(items,
paste0(items, ".xml"),
"../data/haum/ContentEyevisit/eyevisit_cards_light/")
datart <- datart[order(datart$artwork), ]
names(counts_item) <- datart$title
pdf("results/figures/counts_item.pdf", width = 20, height = 10, pointsize = 10)
par(mai = c(5, .6, .1, .1))
tmp <- barplot(counts_item, las = 2, ylim = c(0, 60000),
border = NA, col = "#434F4F")
text(tmp, counts_item + 1000, datart$artwork)
dev.off()
### Which item gets touched most often first?
datcase <- datlogs[!duplicated(datlogs$case), ]
counts_case <- table(datcase$item)
names(counts_case) <- datart$title
tmp <- barplot(counts_case, las = 2, border = "white")
text(tmp, counts_case + 100, datart$item)
counts <- rbind(counts_item, counts_case)
pdf("results/figures/counts_item_firsttouch.pdf",
width = 20, height = 10, pointsize = 10)
par(mai = c(5, .6, .1, .1))
tmp <- barplot(counts, las = 2, border = NA, col = c("#434F4F", "#FF6900"), ylim = c(0, 65000))
text(tmp, counts_item + counts_case + 1000, datart$artwork)
legend("topleft", c("Total interactions", "First interactions"),
col = c("#434F4F", "#FF6900"), pch = 15, bty = "n")
dev.off()
### Which teasers seem to work well?
barplot(table(datlogs$topic), las = 2)
### Dwell times/duration
datagg <- aggregate(duration ~ event + item, datlogs, mean)
datagg$ds <- datagg$duration / 1000 # in secs
lattice::bwplot(ds ~ event, datagg)
# without aggregation
lattice::bwplot(duration / 1000 / 60 ~ event, datlogs)
# in min
set.seed(1027)
pdf("results/figures/duration.pdf", width = 5, height = 5, pointsize = 10)
lattice::bwplot(I(duration/1000/60) ~ event, datlogs[sample(nrow(datlogs), 100000), ],
ylab = "Duration in min")
dev.off()
### Move events
datmove <- aggregate(cbind(duration, scaleSize, rotationDegree, distance, x.start,
y.start, x.stop, y.stop) ~ item, datlogs,
mean)
hist(log(datlogs$scaleSize))
# --> better interpretable on logscale
plot(y.start ~ x.start, datmove, pch = 16, col = "gray")
points(y.start ~ x.start, datmove, col = "#3CB4DC", cex = datmove$scaleSize)
plot(y.start ~ x.start, datmove, type = "n", xlab = "x", ylab = "y",
xlim = c(0, 3840), ylim = c(0, 2160))
with(datmove, text(x.start, y.start, item, col = "gray", cex = 1.5))
with(datmove,
arrows(x.start, y.start, x.stop, y.stop, length = 0.07, lwd = 2)
)
abline(v = c(0, 3840), h = c(0, 2160), col = "#3CB4DC", lwd = 2)
datscale <- aggregate(scaleSize ~ item, datlogs, max)
plot(y.start ~ x.start, datmove, pch = 16, col = "gray")
points(y.start ~ x.start, datmove, col = "#3CB4DC", cex = datscale$scaleSize)
plot(y.start ~ x.start, datmove, type = "n", xlab = "x", ylab = "y",
xlim = c(0, 3840), ylim = c(0, 2160))
#with(datmove, text(x.stop, y.stop, item))
with(datmove, text(x.start, y.start, item))
### Are there certain areas of the table that are touched most often?
# heatmap
cuts <- 100
datlogs$x.start.cat <- cut(datlogs$x.start, cuts)
datlogs$y.start.cat <- cut(datlogs$y.start, cuts)
tab <- xtabs( ~ x.start.cat + y.start.cat, datlogs)
colnames(tab) <- paste0("c", 1:cuts)
rownames(tab) <- paste0("c", 1:cuts)
heatmap(tab, Rowv = NA, Colv = NA)
dattrim <- datlogs[datlogs$x.start < 3840 &
datlogs$x.start > 0 &
datlogs$y.start < 2160 &
datlogs$y.start > 0 &
datlogs$x.stop < 3840 &
datlogs$x.stop > 0 &
datlogs$y.stop < 2160 &
datlogs$y.stop > 0, ]
cuts <- 100 # 200, 100, 70, ...
# start
dattrim$x.start.cat <- cut(dattrim$x.start, cuts)
dattrim$y.start.cat <- cut(dattrim$y.start, cuts)
tab.start <- xtabs( ~ x.start.cat + y.start.cat, dattrim)
colnames(tab.start) <- NULL
rownames(tab.start) <- NULL
pdf("results/figures/heatmap_start.pdf", width = 5, height = 5, pointsize = 10)
heatmap(tab.start, Rowv = NA, Colv = NA)
dev.off()
# stop
dattrim$x.stop.cat <- cut(dattrim$x.stop, cuts)
dattrim$y.stop.cat <- cut(dattrim$y.stop, cuts)
tab.stop <- xtabs( ~ x.stop.cat + y.stop.cat, dattrim)
colnames(tab.stop) <- NULL
rownames(tab.stop) <- NULL
pdf("results/figures/heatmap_stop.pdf", width = 5, height = 5, pointsize = 10)
heatmap(tab.stop, Rowv = NA, Colv = NA)
dev.off()
### How many visitors per day
datlogs$date <- as.Date(datlogs$date.start)
# Interactions per day
datint <- aggregate(case ~ date, datlogs, length)
plot(datint, type = "h")
# Cases per day
datcase <- aggregate(case ~ date, datlogs, function(x) length(unique(x)))
plot(datcase, type = "h")
# Paths per day
datpath <- aggregate(path ~ date, datlogs, function(x) length(unique(x)))
plot(datpath, type = "h")
plot(path ~ date, datpath, type = "h", col = "#3CB4DC")
points(case ~ date, datcase, type = "h")
pdf("results/figures/cases_per_day.pdf", width = 9, height = 5, pointsize = 10)
par(mai = c(.6, .6, .1, .1), mgp = c(2.4, 1, 0))
plot(case ~ date, datcase, type = "h", col = "#434F4F")
abline(v = datcase$date[datcase$date %in% c("2020-03-13", "2022-10-25")],
col = "#FF6900", lty = 2)
text(datcase$date[datcase$date == "2020-03-13"]+470, 80,
"Corona gap from 2020-03-13 to 2022-10-25",
col = "#FF6900")
dev.off()
### Other stuff
library(mvbutils)
foodweb(where = "package:mtt")
pdf("results/figures/fun_depend_mtt.pdf", width = 8, height = 4, pointsize = 10)
foodweb(where = "package:mtt",
prune = c("parse_logfiles", "create_eventlogs", "extract_artworks",
"extract_topics"),
#expand.ybox = 1.8, #cex = .6,
#border = TRUE,
#boxcolor = "gray",
color.lines = FALSE,
lwd = 2, mai = c(0, 0, 0, 0))
dev.off()
## weird behavior of timeMs
pdf("results/figures/timeMs.pdf", width = 9, height = 6, pointsize = 10)
#par(mai = c(.6, .6, .1, .1), mgp = c(2.4, 1, 0))
#plot(timeMs.start ~ as.factor(fileId), datlogs[1:2000,], xlab = "fileId")
lattice::bwplot(timeMs.start ~ as.factor(fileId.start), datlogs[1:2000,], xlab = "",
scales = list(x = list(rot = 90), y = list(rot = 90)))
dev.off()
## x,y-coordinates out of range
set.seed(1522)
pdf("results/figures/xycoord.pdf", width = 5, height = 5, pointsize = 10)
par(mai = c(.6, .6, .1, .1), mgp = c(2.4, 1, 0))
#par(mfrow = c(1, 2))
plot(y.start ~ x.start, datlogs[sample(nrow(datlogs), 10000), ])
abline(v = c(0, 3840), h = c(0, 2160), col = "#3CB4DC", lwd = 2)
#plot(y.stop ~ x.stop, datlogs)
#abline(v = c(0, 3840), h = c(0, 2160), col = "#3CB4DC", lwd = 2)
legend("bottomleft", "Random sample of 10,000", bg = "white")
legend("topleft", "4K-Display: 3840 x 2160", bg = "white")
dev.off()
## moves
dat001 <- datlogs[which(datlogs$item == "001"), ]
index <- as.numeric(as.factor(dat001$path))
cc <- sample(colors(), 100)
plot(y.start ~ x.start, dat001, type = "n", xlab = "x", ylab = "y",
xlim = c(0, 3840), ylim = c(0, 2160))
with(dat001[1:200,], arrows(x.start, y.start, x.stop, y.stop,
length = .07, col = cc[index]))
plot(y.start ~ x.start, dat001, xlab = "x", ylab = "y",
xlim = c(0, 3840), ylim = c(0, 2160), pch = 16, col = "gray")
points(y.start ~ x.start, dat001, xlab = "x", ylab = "y",
xlim = c(0, 3840), ylim = c(0, 2160), cex = dat001$scaleSize,
col = "blue")
cc <- sample(colors(), 70)
dat1 <- datlogs[!duplicated(datlogs$item), ]
dat1 <- dat1[order(dat1$item), ]
plot(y.start ~ x.start, dat1, type = "n", xlim = c(-100, 4500), ylim = c(-100, 2500))
abline(h = c(0, 2160), v = c(0, 3840), col = "lightgray")
with(dat1, points(x.start, y.start, col = cc, pch = 16))
with(dat1, points(x.stop, y.stop, col = cc, pch = 16))
with(dat1, arrows(x.start, y.start, x.stop, y.stop, length = .07, col = cc))
# How many events per topic, per path, ...
# How many popups per artwork?
# Number of events per artwork
tab <- xtabs( ~ artwork + event, datlogs)
tab <- xtabs( ~ item + event, datlogs)
addmargins(tab)
proportions(tab, margin = "artwork")
proportions(tab, margin = "item")
proportions(tab, margin = "event")
cc <- palette.colors(palette = "Okabe-Ito")[c(3,2,4,8)]
pdf("../figures/event-dist.pdf", height = 3.375, width = 12, pointsize = 10)
pdf("results/figures/event-dist.pdf", height = 3.375, width = 12, pointsize = 10)
par(mai = c(.4,.4,.1,.1), mgp = c(2.4, 1, 0))
barplot(t(proportions(tab, margin = "artwork")), las = 2, col = cc,
barplot(t(proportions(tab, margin = "item")), las = 2, col = c("#78004B", "#3CB4DC", "#91C86E", "#FF6900"),
legend.text = levels(datlogs$event), args.legend = list(x = "bottomleft", bg = "white"))
dev.off()
#barchart(proportions(tab, margin = "artwork"), las = 2)
#barchart(proportions(tab, margin = "item"), las = 2)
# Proportion of events
proportions(xtabs( ~ event, datlogs))
# Mean proportion of event per path
colMeans(proportions(xtabs( ~ path + event, datlogs), margin = "path"))
# Mean proportion of event per artwork
colMeans(proportions(tab, margin = "artwork"))
# Mean proportion of event per item
colMeans(proportions(tab, margin = "item"))
# Proportion of unclosed events
@ -126,15 +368,17 @@ sum(datlogs$fileId.start != datlogs$fileId.stop, na.rm = TRUE) / nrow(datlogs)
#--------------- (3.1) Check data quality ---------------
datlogs$start <- datlogs$date.start
datlogs$complete <- datlogs$date.stop
alog <- activitylog(datlogs,
case_id = "path",
activity_id = "event",
#resource_id = "case",
resource_id = "artwork",
resource_id = "item",
timestamps = c("start", "complete"))
# process_map(alog, frequency("relative"))
map_as_pdf(alog, file = "../figures/pm_trace-event.pdf")
process_map(alog, frequency("relative"))
alogf <- edeaR::filter_trace_frequency(alog, percentage = 0.9)
@ -188,7 +432,7 @@ alog080 <- activitylog(datlogs[datlogs$artwork == "080",],
resource_id = "artwork",
timestamps = c("start", "complete"))
map_as_pdf(alog080, file = "../figures/pm_trace-event_080.pdf")
process_map(alog80, frequency("relative"))
alog087 <- activitylog(datlogs[datlogs$artwork == "087",],
case_id = "path",
@ -196,7 +440,7 @@ alog087 <- activitylog(datlogs[datlogs$artwork == "087",],
resource_id = "artwork",
timestamps = c("start", "complete"))
map_as_pdf(alog087, file = "../figures/pm_trace-event_087.pdf")
process_map(alog087, frequency("relative"))
alog504 <- activitylog(datlogs[datlogs$artwork == "504",],
case_id = "path",
@ -204,7 +448,7 @@ alog504 <- activitylog(datlogs[datlogs$artwork == "504",],
resource_id = "artwork",
timestamps = c("start", "complete"))
map_as_pdf(alog504, file = "../figures/pm_trace-event_504.pdf")
process_map(alog504, frequency("relative"))
#--------------- (3.3) Patterns of cases ---------------
@ -221,7 +465,7 @@ alog <- activitylog(datlogs,
resource_id = "path",
timestamps = c("start", "complete"))
map_as_pdf(alog, file = "../figures/pm_case-event.pdf")
process_map(alog, frequency("relative"))
alog_no_move <- alog[alog$event != "move", ]
@ -233,7 +477,7 @@ processmapR::trace_explorer(alog_no_move[alog_no_move$path %in%
abbreviate = T)
dev.off()
map_as_pdf(alog080, file = "../figures/pm_case-event_080.pdf")
process_map(alog080, frequency("relative"))
alog087 <- activitylog(datlogs[datlogs$artwork == "087",],
case_id = "case",
@ -241,7 +485,7 @@ alog087 <- activitylog(datlogs[datlogs$artwork == "087",],
resource_id = "path",
timestamps = c("start", "complete"))
map_as_pdf(alog087, file = "../figures/pm_case-event_087.pdf")
process_map(alog087, frequency("relative"))
### Mornings and afternoons
@ -253,7 +497,7 @@ alog <- activitylog(datlogs[datlogs$tod == "morning",],
resource_id = "path",
timestamps = c("start", "complete"))
map_as_pdf(alog, file = "../figures/pm_case-event_morning.pdf")
process_map(alog, frequency("relative"))
alog <- activitylog(datlogs[datlogs$tod == "afternoon",],
case_id = "case",
@ -261,7 +505,7 @@ alog <- activitylog(datlogs[datlogs$tod == "afternoon",],
resource_id = "path",
timestamps = c("start", "complete"))
map_as_pdf(alog, file = "../figures/pm_case-event_afternoon.pdf")
process_map(alog, frequency("relative"))
# Are the same artworks looked at?
pdf("../figures/bp_tod.pdf", height = 3.375, width = 12, pointsize = 10)
@ -283,7 +527,7 @@ alog <- activitylog(datlogs[datlogs$wd == "weekend",],
resource_id = "path",
timestamps = c("start", "complete"))
map_as_pdf(alog, file = "../figures/pm_case-event_weekend.pdf")
process_map(alog, frequency("relative"))
alog <- activitylog(datlogs[datlogs$wd == "weekday",],
case_id = "case",
@ -291,7 +535,7 @@ alog <- activitylog(datlogs[datlogs$wd == "weekday",],
resource_id = "path",
timestamps = c("start", "complete"))
map_as_pdf(alog, file = "../figures/pm_case-event_weekday.pdf")
process_map(alog, frequency("relative"))
# Are the same artworks looked at?
pdf("../figures/bp_wd.pdf", height = 3.375, width = 12, pointsize = 10)
@ -314,7 +558,7 @@ alog <- activitylog(datlogs[which(datlogs$wds == "school"),],
resource_id = "path",
timestamps = c("start", "complete"))
map_as_pdf(alog, file = "../figures/pm_case-event_school.pdf")
process_map(alog, frequency("relative"))
alog <- activitylog(datlogs[which(datlogs$wds == "vacation"),],
case_id = "case",
@ -322,7 +566,7 @@ alog <- activitylog(datlogs[which(datlogs$wds == "vacation"),],
resource_id = "path",
timestamps = c("start", "complete"))
map_as_pdf(alog, file = "../figures/pm_case-event_vacation.pdf")
process_map(alog, frequency("relative"))
# Are the same artworks looked at?
pdf("../figures/bp_wds.pdf", height = 3.375, width = 12, pointsize = 10)
@ -345,7 +589,7 @@ alog <- activitylog(datlogs[which(datlogs$corona == "pre"),],
resource_id = "path",
timestamps = c("start", "complete"))
map_as_pdf(alog, file = "../figures/pm_case-event_pre-corona.pdf")
process_map(alog, frequency("relative"))
alog <- activitylog(datlogs[which(datlogs$corona == "post"),],
case_id = "case",
@ -353,7 +597,7 @@ alog <- activitylog(datlogs[which(datlogs$corona == "post"),],
resource_id = "path",
timestamps = c("start", "complete"))
map_as_pdf(alog, file = "../figures/pm_case-event_post-corona.pdf")
process_map(alog, frequency("relative"))
# Are the same artworks looked at?
pdf("../figures/bp_corona.pdf", height = 3.375, width = 12, pointsize = 10)
@ -398,7 +642,7 @@ alog <- activitylog(datlogs[datlogs$artwork %in% often080, ],
resource_id = "path",
timestamps = c("start", "complete"))
map_as_pdf(alog, file = "../figures/pm_case-artwork_often080.pdf")
process_map(alog, frequency("relative"))
pdf("../figures/traceexplore_case-artwork_often080.pdf", height = 8, width = 12, pointsize = 10)
@ -420,7 +664,7 @@ alog <- activitylog(datlogs[which(datlogs$event == "openTopic"),],
resource_id = "path",
timestamps = c("start", "complete"))
map_as_pdf(alog, file = "../figures/pm_case-topic.pdf")
process_map(alog, frequency("relative"))
# Order of topics for Vermeer
# alog080 <- activitylog(datlogs[datlogs$artwork == "080",],
@ -429,9 +673,6 @@ map_as_pdf(alog, file = "../figures/pm_case-topic.pdf")
# resource_id = "path",
# timestamps = c("start", "complete"))
#
# map_as_pdf(alog080, file = "../figures/pm_case-topic_080.pdf")
#
#
# alog080 <- activitylog(datlogs[datlogs$artwork == "080",],
# case_id = "case",
# activity_id = "topicFile",
@ -446,8 +687,6 @@ map_as_pdf(alog, file = "../figures/pm_case-topic.pdf")
# activity_id = "topic",
# resource_id = "path",
# timestamps = c("start", "complete"))
#
# map_as_pdf(alog083, file = "../figures/pm_case-topic_083.pdf")
# artworks that have the same topics than Vermeer
which(rowSums(xtabs( ~ artwork + topic, datlogs[datlogs$topic %in%
@ -464,7 +703,7 @@ for (art in c("037", "046", "062", "080", "083", "109")) {
resource_id = "path",
timestamps = c("start", "complete"))
map_as_pdf(alog, file = paste0("../figures/pm_case-topic_", art, ".pdf"))
process_map(alog, frequency("relative"))
}
@ -518,13 +757,3 @@ plot(path ~ time, tmp, cex = 2, col = rgb(0,0,0,.3))
lattice::barchart(path ~ time, tmp, horizontal=F)
###########################################################################
# HELPER
map_as_pdf <- function(alog, file, type = frequency("relative")) {
map <- process_map(alog, type = type)
g <- DiagrammeR::grViz(map$x$diagram) |> DiagrammeRsvg::export_svg() |> charToRaw()
rsvg::rsvg_pdf(g, file)
}