diff --git a/code/02_descriptives.R b/code/02_descriptives.R index 40d74a1..0b74a44 100644 --- a/code/02_descriptives.R +++ b/code/02_descriptives.R @@ -10,6 +10,7 @@ # (3.5) Topics # # input: results/haum/event_logfiles_2024-02-21_16-07-33.csv +# results/haum/raw_logfiles_2024-02-21_16-07-33.csv # output: # # last mod: 2024-03-13 @@ -18,6 +19,9 @@ library(lattice) library(bupaverse) +#library(mtt) +devtools::load_all("../../../../../software/mtt") + # Overall Research Question: How do museum visitors interact with the # artworks presented on the MTT? @@ -74,43 +78,281 @@ lattice::dotplot(xtabs( ~ item + topic, datlogs), auto.key = TRUE) mat <- t(as.matrix(xtabs( ~ item + topic, datlogs))) mat[mat == 0] <- NA image(mat, axes = F, col = rainbow(100)) -heatmap(t(mat)) - - -datlogs$start <- datlogs$date.start -datlogs$complete <- datlogs$date.stop - - #--------------- (2) Descriptives --------------- + +### Which item gets touched most often? + +counts_item <- table(datlogs$item) +lattice::barchart(counts_item) + +items <- unique(datlogs$item) +#items <- items[!items %in% c("504", "505")] +datart <- extract_artworks(items, + paste0(items, ".xml"), + "../data/haum/ContentEyevisit/eyevisit_cards_light/") +datart <- datart[order(datart$artwork), ] +names(counts_item) <- datart$title + +pdf("results/figures/counts_item.pdf", width = 20, height = 10, pointsize = 10) +par(mai = c(5, .6, .1, .1)) +tmp <- barplot(counts_item, las = 2, ylim = c(0, 60000), + border = NA, col = "#434F4F") +text(tmp, counts_item + 1000, datart$artwork) +dev.off() + +### Which item gets touched most often first? + +datcase <- datlogs[!duplicated(datlogs$case), ] +counts_case <- table(datcase$item) +names(counts_case) <- datart$title +tmp <- barplot(counts_case, las = 2, border = "white") +text(tmp, counts_case + 100, datart$item) + +counts <- rbind(counts_item, counts_case) + +pdf("results/figures/counts_item_firsttouch.pdf", + width = 20, height = 10, pointsize = 10) +par(mai = c(5, .6, .1, .1)) + +tmp <- barplot(counts, las = 2, border = NA, col = c("#434F4F", "#FF6900"), ylim = c(0, 65000)) +text(tmp, counts_item + counts_case + 1000, datart$artwork) +legend("topleft", c("Total interactions", "First interactions"), + col = c("#434F4F", "#FF6900"), pch = 15, bty = "n") +dev.off() + +### Which teasers seem to work well? +barplot(table(datlogs$topic), las = 2) + +### Dwell times/duration +datagg <- aggregate(duration ~ event + item, datlogs, mean) +datagg$ds <- datagg$duration / 1000 # in secs + +lattice::bwplot(ds ~ event, datagg) + +# without aggregation +lattice::bwplot(duration / 1000 / 60 ~ event, datlogs) +# in min + +set.seed(1027) + +pdf("results/figures/duration.pdf", width = 5, height = 5, pointsize = 10) +lattice::bwplot(I(duration/1000/60) ~ event, datlogs[sample(nrow(datlogs), 100000), ], + ylab = "Duration in min") +dev.off() + +### Move events + +datmove <- aggregate(cbind(duration, scaleSize, rotationDegree, distance, x.start, + y.start, x.stop, y.stop) ~ item, datlogs, + mean) + +hist(log(datlogs$scaleSize)) +# --> better interpretable on logscale + +plot(y.start ~ x.start, datmove, pch = 16, col = "gray") +points(y.start ~ x.start, datmove, col = "#3CB4DC", cex = datmove$scaleSize) + + +plot(y.start ~ x.start, datmove, type = "n", xlab = "x", ylab = "y", + xlim = c(0, 3840), ylim = c(0, 2160)) +with(datmove, text(x.start, y.start, item, col = "gray", cex = 1.5)) +with(datmove, + arrows(x.start, y.start, x.stop, y.stop, length = 0.07, lwd = 2) + ) +abline(v = c(0, 3840), h = c(0, 2160), col = "#3CB4DC", lwd = 2) + +datscale <- aggregate(scaleSize ~ item, datlogs, max) +plot(y.start ~ x.start, datmove, pch = 16, col = "gray") +points(y.start ~ x.start, datmove, col = "#3CB4DC", cex = datscale$scaleSize) + +plot(y.start ~ x.start, datmove, type = "n", xlab = "x", ylab = "y", + xlim = c(0, 3840), ylim = c(0, 2160)) +#with(datmove, text(x.stop, y.stop, item)) +with(datmove, text(x.start, y.start, item)) + + +### Are there certain areas of the table that are touched most often? + +# heatmap +cuts <- 100 + +datlogs$x.start.cat <- cut(datlogs$x.start, cuts) +datlogs$y.start.cat <- cut(datlogs$y.start, cuts) + +tab <- xtabs( ~ x.start.cat + y.start.cat, datlogs) + +colnames(tab) <- paste0("c", 1:cuts) +rownames(tab) <- paste0("c", 1:cuts) + +heatmap(tab, Rowv = NA, Colv = NA) + + +dattrim <- datlogs[datlogs$x.start < 3840 & + datlogs$x.start > 0 & + datlogs$y.start < 2160 & + datlogs$y.start > 0 & + datlogs$x.stop < 3840 & + datlogs$x.stop > 0 & + datlogs$y.stop < 2160 & + datlogs$y.stop > 0, ] + +cuts <- 100 # 200, 100, 70, ... + +# start +dattrim$x.start.cat <- cut(dattrim$x.start, cuts) +dattrim$y.start.cat <- cut(dattrim$y.start, cuts) + +tab.start <- xtabs( ~ x.start.cat + y.start.cat, dattrim) +colnames(tab.start) <- NULL +rownames(tab.start) <- NULL + +pdf("results/figures/heatmap_start.pdf", width = 5, height = 5, pointsize = 10) +heatmap(tab.start, Rowv = NA, Colv = NA) +dev.off() + +# stop +dattrim$x.stop.cat <- cut(dattrim$x.stop, cuts) +dattrim$y.stop.cat <- cut(dattrim$y.stop, cuts) +tab.stop <- xtabs( ~ x.stop.cat + y.stop.cat, dattrim) +colnames(tab.stop) <- NULL +rownames(tab.stop) <- NULL + +pdf("results/figures/heatmap_stop.pdf", width = 5, height = 5, pointsize = 10) +heatmap(tab.stop, Rowv = NA, Colv = NA) +dev.off() + +### How many visitors per day + +datlogs$date <- as.Date(datlogs$date.start) + +# Interactions per day +datint <- aggregate(case ~ date, datlogs, length) +plot(datint, type = "h") + +# Cases per day +datcase <- aggregate(case ~ date, datlogs, function(x) length(unique(x))) +plot(datcase, type = "h") + +# Paths per day +datpath <- aggregate(path ~ date, datlogs, function(x) length(unique(x))) +plot(datpath, type = "h") + +plot(path ~ date, datpath, type = "h", col = "#3CB4DC") +points(case ~ date, datcase, type = "h") + +pdf("results/figures/cases_per_day.pdf", width = 9, height = 5, pointsize = 10) +par(mai = c(.6, .6, .1, .1), mgp = c(2.4, 1, 0)) +plot(case ~ date, datcase, type = "h", col = "#434F4F") +abline(v = datcase$date[datcase$date %in% c("2020-03-13", "2022-10-25")], + col = "#FF6900", lty = 2) +text(datcase$date[datcase$date == "2020-03-13"]+470, 80, + "Corona gap from 2020-03-13 to 2022-10-25", + col = "#FF6900") +dev.off() + + +### Other stuff + +library(mvbutils) +foodweb(where = "package:mtt") + +pdf("results/figures/fun_depend_mtt.pdf", width = 8, height = 4, pointsize = 10) +foodweb(where = "package:mtt", + prune = c("parse_logfiles", "create_eventlogs", "extract_artworks", + "extract_topics"), + #expand.ybox = 1.8, #cex = .6, + #border = TRUE, + #boxcolor = "gray", + color.lines = FALSE, + lwd = 2, mai = c(0, 0, 0, 0)) +dev.off() + + +## weird behavior of timeMs + +pdf("results/figures/timeMs.pdf", width = 9, height = 6, pointsize = 10) +#par(mai = c(.6, .6, .1, .1), mgp = c(2.4, 1, 0)) +#plot(timeMs.start ~ as.factor(fileId), datlogs[1:2000,], xlab = "fileId") +lattice::bwplot(timeMs.start ~ as.factor(fileId.start), datlogs[1:2000,], xlab = "", + scales = list(x = list(rot = 90), y = list(rot = 90))) +dev.off() + +## x,y-coordinates out of range + +set.seed(1522) + +pdf("results/figures/xycoord.pdf", width = 5, height = 5, pointsize = 10) +par(mai = c(.6, .6, .1, .1), mgp = c(2.4, 1, 0)) +#par(mfrow = c(1, 2)) +plot(y.start ~ x.start, datlogs[sample(nrow(datlogs), 10000), ]) +abline(v = c(0, 3840), h = c(0, 2160), col = "#3CB4DC", lwd = 2) +#plot(y.stop ~ x.stop, datlogs) +#abline(v = c(0, 3840), h = c(0, 2160), col = "#3CB4DC", lwd = 2) +legend("bottomleft", "Random sample of 10,000", bg = "white") +legend("topleft", "4K-Display: 3840 x 2160", bg = "white") +dev.off() + +## moves + +dat001 <- datlogs[which(datlogs$item == "001"), ] + +index <- as.numeric(as.factor(dat001$path)) +cc <- sample(colors(), 100) + +plot(y.start ~ x.start, dat001, type = "n", xlab = "x", ylab = "y", + xlim = c(0, 3840), ylim = c(0, 2160)) +with(dat001[1:200,], arrows(x.start, y.start, x.stop, y.stop, + length = .07, col = cc[index])) + +plot(y.start ~ x.start, dat001, xlab = "x", ylab = "y", + xlim = c(0, 3840), ylim = c(0, 2160), pch = 16, col = "gray") +points(y.start ~ x.start, dat001, xlab = "x", ylab = "y", + xlim = c(0, 3840), ylim = c(0, 2160), cex = dat001$scaleSize, + col = "blue") + + + +cc <- sample(colors(), 70) + +dat1 <- datlogs[!duplicated(datlogs$item), ] +dat1 <- dat1[order(dat1$item), ] + +plot(y.start ~ x.start, dat1, type = "n", xlim = c(-100, 4500), ylim = c(-100, 2500)) +abline(h = c(0, 2160), v = c(0, 3840), col = "lightgray") +with(dat1, points(x.start, y.start, col = cc, pch = 16)) +with(dat1, points(x.stop, y.stop, col = cc, pch = 16)) +with(dat1, arrows(x.start, y.start, x.stop, y.stop, length = .07, col = cc)) + + # How many events per topic, per path, ... # How many popups per artwork? # Number of events per artwork -tab <- xtabs( ~ artwork + event, datlogs) +tab <- xtabs( ~ item + event, datlogs) addmargins(tab) -proportions(tab, margin = "artwork") +proportions(tab, margin = "item") proportions(tab, margin = "event") cc <- palette.colors(palette = "Okabe-Ito")[c(3,2,4,8)] -pdf("../figures/event-dist.pdf", height = 3.375, width = 12, pointsize = 10) +pdf("results/figures/event-dist.pdf", height = 3.375, width = 12, pointsize = 10) par(mai = c(.4,.4,.1,.1), mgp = c(2.4, 1, 0)) -barplot(t(proportions(tab, margin = "artwork")), las = 2, col = cc, +barplot(t(proportions(tab, margin = "item")), las = 2, col = c("#78004B", "#3CB4DC", "#91C86E", "#FF6900"), legend.text = levels(datlogs$event), args.legend = list(x = "bottomleft", bg = "white")) dev.off() -#barchart(proportions(tab, margin = "artwork"), las = 2) +#barchart(proportions(tab, margin = "item"), las = 2) # Proportion of events proportions(xtabs( ~ event, datlogs)) # Mean proportion of event per path colMeans(proportions(xtabs( ~ path + event, datlogs), margin = "path")) -# Mean proportion of event per artwork -colMeans(proportions(tab, margin = "artwork")) +# Mean proportion of event per item +colMeans(proportions(tab, margin = "item")) # Proportion of unclosed events @@ -126,15 +368,17 @@ sum(datlogs$fileId.start != datlogs$fileId.stop, na.rm = TRUE) / nrow(datlogs) #--------------- (3.1) Check data quality --------------- +datlogs$start <- datlogs$date.start +datlogs$complete <- datlogs$date.stop + alog <- activitylog(datlogs, case_id = "path", activity_id = "event", #resource_id = "case", - resource_id = "artwork", + resource_id = "item", timestamps = c("start", "complete")) -# process_map(alog, frequency("relative")) -map_as_pdf(alog, file = "../figures/pm_trace-event.pdf") +process_map(alog, frequency("relative")) alogf <- edeaR::filter_trace_frequency(alog, percentage = 0.9) @@ -188,7 +432,7 @@ alog080 <- activitylog(datlogs[datlogs$artwork == "080",], resource_id = "artwork", timestamps = c("start", "complete")) -map_as_pdf(alog080, file = "../figures/pm_trace-event_080.pdf") +process_map(alog80, frequency("relative")) alog087 <- activitylog(datlogs[datlogs$artwork == "087",], case_id = "path", @@ -196,7 +440,7 @@ alog087 <- activitylog(datlogs[datlogs$artwork == "087",], resource_id = "artwork", timestamps = c("start", "complete")) -map_as_pdf(alog087, file = "../figures/pm_trace-event_087.pdf") +process_map(alog087, frequency("relative")) alog504 <- activitylog(datlogs[datlogs$artwork == "504",], case_id = "path", @@ -204,7 +448,7 @@ alog504 <- activitylog(datlogs[datlogs$artwork == "504",], resource_id = "artwork", timestamps = c("start", "complete")) -map_as_pdf(alog504, file = "../figures/pm_trace-event_504.pdf") +process_map(alog504, frequency("relative")) #--------------- (3.3) Patterns of cases --------------- @@ -221,7 +465,7 @@ alog <- activitylog(datlogs, resource_id = "path", timestamps = c("start", "complete")) -map_as_pdf(alog, file = "../figures/pm_case-event.pdf") +process_map(alog, frequency("relative")) alog_no_move <- alog[alog$event != "move", ] @@ -233,7 +477,7 @@ processmapR::trace_explorer(alog_no_move[alog_no_move$path %in% abbreviate = T) dev.off() -map_as_pdf(alog080, file = "../figures/pm_case-event_080.pdf") +process_map(alog080, frequency("relative")) alog087 <- activitylog(datlogs[datlogs$artwork == "087",], case_id = "case", @@ -241,7 +485,7 @@ alog087 <- activitylog(datlogs[datlogs$artwork == "087",], resource_id = "path", timestamps = c("start", "complete")) -map_as_pdf(alog087, file = "../figures/pm_case-event_087.pdf") +process_map(alog087, frequency("relative")) ### Mornings and afternoons @@ -253,7 +497,7 @@ alog <- activitylog(datlogs[datlogs$tod == "morning",], resource_id = "path", timestamps = c("start", "complete")) -map_as_pdf(alog, file = "../figures/pm_case-event_morning.pdf") +process_map(alog, frequency("relative")) alog <- activitylog(datlogs[datlogs$tod == "afternoon",], case_id = "case", @@ -261,7 +505,7 @@ alog <- activitylog(datlogs[datlogs$tod == "afternoon",], resource_id = "path", timestamps = c("start", "complete")) -map_as_pdf(alog, file = "../figures/pm_case-event_afternoon.pdf") +process_map(alog, frequency("relative")) # Are the same artworks looked at? pdf("../figures/bp_tod.pdf", height = 3.375, width = 12, pointsize = 10) @@ -283,7 +527,7 @@ alog <- activitylog(datlogs[datlogs$wd == "weekend",], resource_id = "path", timestamps = c("start", "complete")) -map_as_pdf(alog, file = "../figures/pm_case-event_weekend.pdf") +process_map(alog, frequency("relative")) alog <- activitylog(datlogs[datlogs$wd == "weekday",], case_id = "case", @@ -291,7 +535,7 @@ alog <- activitylog(datlogs[datlogs$wd == "weekday",], resource_id = "path", timestamps = c("start", "complete")) -map_as_pdf(alog, file = "../figures/pm_case-event_weekday.pdf") +process_map(alog, frequency("relative")) # Are the same artworks looked at? pdf("../figures/bp_wd.pdf", height = 3.375, width = 12, pointsize = 10) @@ -314,7 +558,7 @@ alog <- activitylog(datlogs[which(datlogs$wds == "school"),], resource_id = "path", timestamps = c("start", "complete")) -map_as_pdf(alog, file = "../figures/pm_case-event_school.pdf") +process_map(alog, frequency("relative")) alog <- activitylog(datlogs[which(datlogs$wds == "vacation"),], case_id = "case", @@ -322,7 +566,7 @@ alog <- activitylog(datlogs[which(datlogs$wds == "vacation"),], resource_id = "path", timestamps = c("start", "complete")) -map_as_pdf(alog, file = "../figures/pm_case-event_vacation.pdf") +process_map(alog, frequency("relative")) # Are the same artworks looked at? pdf("../figures/bp_wds.pdf", height = 3.375, width = 12, pointsize = 10) @@ -345,7 +589,7 @@ alog <- activitylog(datlogs[which(datlogs$corona == "pre"),], resource_id = "path", timestamps = c("start", "complete")) -map_as_pdf(alog, file = "../figures/pm_case-event_pre-corona.pdf") +process_map(alog, frequency("relative")) alog <- activitylog(datlogs[which(datlogs$corona == "post"),], case_id = "case", @@ -353,7 +597,7 @@ alog <- activitylog(datlogs[which(datlogs$corona == "post"),], resource_id = "path", timestamps = c("start", "complete")) -map_as_pdf(alog, file = "../figures/pm_case-event_post-corona.pdf") +process_map(alog, frequency("relative")) # Are the same artworks looked at? pdf("../figures/bp_corona.pdf", height = 3.375, width = 12, pointsize = 10) @@ -398,7 +642,7 @@ alog <- activitylog(datlogs[datlogs$artwork %in% often080, ], resource_id = "path", timestamps = c("start", "complete")) -map_as_pdf(alog, file = "../figures/pm_case-artwork_often080.pdf") +process_map(alog, frequency("relative")) pdf("../figures/traceexplore_case-artwork_often080.pdf", height = 8, width = 12, pointsize = 10) @@ -420,7 +664,7 @@ alog <- activitylog(datlogs[which(datlogs$event == "openTopic"),], resource_id = "path", timestamps = c("start", "complete")) -map_as_pdf(alog, file = "../figures/pm_case-topic.pdf") +process_map(alog, frequency("relative")) # Order of topics for Vermeer # alog080 <- activitylog(datlogs[datlogs$artwork == "080",], @@ -429,9 +673,6 @@ map_as_pdf(alog, file = "../figures/pm_case-topic.pdf") # resource_id = "path", # timestamps = c("start", "complete")) # -# map_as_pdf(alog080, file = "../figures/pm_case-topic_080.pdf") -# -# # alog080 <- activitylog(datlogs[datlogs$artwork == "080",], # case_id = "case", # activity_id = "topicFile", @@ -446,8 +687,6 @@ map_as_pdf(alog, file = "../figures/pm_case-topic.pdf") # activity_id = "topic", # resource_id = "path", # timestamps = c("start", "complete")) -# -# map_as_pdf(alog083, file = "../figures/pm_case-topic_083.pdf") # artworks that have the same topics than Vermeer which(rowSums(xtabs( ~ artwork + topic, datlogs[datlogs$topic %in% @@ -464,7 +703,7 @@ for (art in c("037", "046", "062", "080", "083", "109")) { resource_id = "path", timestamps = c("start", "complete")) - map_as_pdf(alog, file = paste0("../figures/pm_case-topic_", art, ".pdf")) + process_map(alog, frequency("relative")) } @@ -518,13 +757,3 @@ plot(path ~ time, tmp, cex = 2, col = rgb(0,0,0,.3)) lattice::barchart(path ~ time, tmp, horizontal=F) - -########################################################################### -# HELPER - -map_as_pdf <- function(alog, file, type = frequency("relative")) { - map <- process_map(alog, type = type) - g <- DiagrammeR::grViz(map$x$diagram) |> DiagrammeRsvg::export_svg() |> charToRaw() - rsvg::rsvg_pdf(g, file) -} -