2024-02-13 09:03:35 +01:00
|
|
|
# 02_descriptives.R
|
2023-11-01 18:46:39 +01:00
|
|
|
#
|
2023-11-17 18:46:22 +01:00
|
|
|
# content: (1) Read data
|
2023-11-01 18:46:39 +01:00
|
|
|
# (2) Descriptives
|
|
|
|
# (3) Process Mining
|
2023-11-17 18:46:22 +01:00
|
|
|
# (3.1) Check data quality
|
|
|
|
# (3.2) Interactions for different artworks
|
|
|
|
# (3.3) Patterns of cases
|
|
|
|
# (3.4) Artwork sequences
|
|
|
|
# (3.5) Topics
|
2023-11-01 18:46:39 +01:00
|
|
|
#
|
2024-03-22 12:07:45 +01:00
|
|
|
# input: results/event_logfiles_2024-02-21_16-07-33.csv
|
|
|
|
# results/raw_logfiles_2024-02-21_16-07-33.csv
|
2024-03-25 16:37:37 +01:00
|
|
|
# output: results/figures/counts_item_firsttouch.pdf
|
2024-03-22 11:00:36 +01:00
|
|
|
# results/figures/duration.pdf
|
|
|
|
# results/figures/heatmap_start.pdf
|
|
|
|
# results/figures/heatmap_stop.pdf
|
|
|
|
# results/figures/timeMs.pdf
|
|
|
|
# results/figures/xycoord.pdf
|
|
|
|
# results/figures/event-dist.pdf
|
|
|
|
# results/figures/traceexplore_trace-event.pdf
|
|
|
|
# results/figures/ra_trace-event.pdf
|
|
|
|
# results/figures/traceexplore_case-event.pdf
|
|
|
|
# results/figures/bp_tod.pdf
|
|
|
|
# results/figures/bp_wd.pdf
|
|
|
|
# results/figures/bp_wds.pdf
|
|
|
|
# results/figures/bp_corona.pdf
|
|
|
|
# results/figures/traceexplore_case-artwork_often080.pdf
|
2023-11-01 18:46:39 +01:00
|
|
|
#
|
2024-03-28 18:28:18 +01:00
|
|
|
# last mod: 2024-03-28
|
2023-11-01 18:46:39 +01:00
|
|
|
|
2024-03-13 18:14:57 +01:00
|
|
|
# setwd("C:/Users/nwickelmaier/Nextcloud/Documents/MDS/2023ss/60100_master_thesis/analysis/code")
|
2023-11-01 18:46:39 +01:00
|
|
|
|
|
|
|
#--------------- (1) Read data ---------------
|
|
|
|
|
2024-03-22 12:07:45 +01:00
|
|
|
datlogs <- read.table("results/event_logfiles_2024-02-21_16-07-33.csv",
|
2024-01-31 12:09:27 +01:00
|
|
|
colClasses = c("character", "character", "POSIXct",
|
|
|
|
"POSIXct", "character", "integer",
|
|
|
|
"numeric", "character", "character",
|
|
|
|
rep("numeric", 3), "character",
|
|
|
|
"character", rep("numeric", 11),
|
|
|
|
"character", "character"),
|
|
|
|
sep = ";", header = TRUE)
|
|
|
|
|
|
|
|
datlogs$event <- factor(datlogs$event, levels = c("move", "flipCard",
|
|
|
|
"openTopic",
|
|
|
|
"openPopup"))
|
|
|
|
|
2024-03-22 12:07:45 +01:00
|
|
|
datraw <- read.table("results/raw_logfiles_2024-02-21_16-07-33.csv",
|
2024-01-31 12:09:27 +01:00
|
|
|
sep = ";", header = TRUE)
|
2023-11-01 18:46:39 +01:00
|
|
|
|
|
|
|
# Add weekdays to data frame
|
2024-01-31 12:09:27 +01:00
|
|
|
datlogs$weekdays <- factor(weekdays(datlogs$date.start),
|
2023-11-01 18:46:39 +01:00
|
|
|
levels = c("Montag", "Dienstag", "Mittwoch",
|
|
|
|
"Donnerstag", "Freitag", "Samstag",
|
|
|
|
"Sonntag"),
|
|
|
|
labels = c("Monday", "Tuesday", "Wednesday",
|
|
|
|
"Thursday", "Friday", "Saturday",
|
|
|
|
"Sunday"))
|
2024-03-13 18:14:57 +01:00
|
|
|
|
2024-01-31 12:09:27 +01:00
|
|
|
### Number of log files
|
|
|
|
length(unique(datraw$fileId))
|
|
|
|
# 39767
|
|
|
|
length(unique(c(datlogs$fileId.start, datlogs$fileId.stop)))
|
|
|
|
# 22789
|
|
|
|
|
|
|
|
### Number of activities
|
|
|
|
nrow(datlogs)
|
|
|
|
table(datlogs$event)
|
|
|
|
proportions(table(datlogs$event))
|
|
|
|
proportions(table(datlogs$event[datlogs$event != "move"]))
|
|
|
|
|
|
|
|
### Time range
|
|
|
|
range(as.Date(datlogs$date.start))
|
|
|
|
|
|
|
|
### Topics per item
|
|
|
|
print(xtabs( ~ item + topic, datlogs), zero = "-")
|
|
|
|
lattice::dotplot(xtabs( ~ item + topic, datlogs), auto.key = TRUE)
|
|
|
|
|
|
|
|
mat <- t(as.matrix(xtabs( ~ item + topic, datlogs)))
|
|
|
|
mat[mat == 0] <- NA
|
|
|
|
image(mat, axes = F, col = rainbow(100))
|
|
|
|
|
2024-03-20 17:08:28 +01:00
|
|
|
#--------------- (2) Descriptives ---------------
|
2024-01-31 12:09:27 +01:00
|
|
|
|
2024-03-20 17:08:28 +01:00
|
|
|
### Which item gets touched most often?
|
2023-11-01 18:46:39 +01:00
|
|
|
|
2024-03-20 17:08:28 +01:00
|
|
|
counts_item <- table(datlogs$item)
|
|
|
|
lattice::barchart(counts_item)
|
|
|
|
|
|
|
|
items <- unique(datlogs$item)
|
|
|
|
#items <- items[!items %in% c("504", "505")]
|
2024-03-22 11:00:36 +01:00
|
|
|
datart <- mtt::extract_artworks(items,
|
2024-03-20 17:08:28 +01:00
|
|
|
paste0(items, ".xml"),
|
|
|
|
"../data/haum/ContentEyevisit/eyevisit_cards_light/")
|
|
|
|
datart <- datart[order(datart$artwork), ]
|
|
|
|
names(counts_item) <- datart$title
|
|
|
|
|
|
|
|
tmp <- barplot(counts_item, las = 2, ylim = c(0, 60000),
|
|
|
|
border = NA, col = "#434F4F")
|
|
|
|
text(tmp, counts_item + 1000, datart$artwork)
|
|
|
|
|
|
|
|
### Which item gets touched most often first?
|
|
|
|
|
|
|
|
datcase <- datlogs[!duplicated(datlogs$case), ]
|
|
|
|
counts_case <- table(datcase$item)
|
|
|
|
names(counts_case) <- datart$title
|
|
|
|
tmp <- barplot(counts_case, las = 2, border = "white")
|
|
|
|
text(tmp, counts_case + 100, datart$item)
|
|
|
|
|
|
|
|
counts <- rbind(counts_item, counts_case)
|
|
|
|
|
|
|
|
pdf("results/figures/counts_item_firsttouch.pdf",
|
|
|
|
width = 20, height = 10, pointsize = 10)
|
|
|
|
par(mai = c(5, .6, .1, .1))
|
|
|
|
|
|
|
|
tmp <- barplot(counts, las = 2, border = NA, col = c("#434F4F", "#FF6900"), ylim = c(0, 65000))
|
|
|
|
text(tmp, counts_item + counts_case + 1000, datart$artwork)
|
|
|
|
legend("topleft", c("Total interactions", "First interactions"),
|
|
|
|
col = c("#434F4F", "#FF6900"), pch = 15, bty = "n")
|
|
|
|
dev.off()
|
|
|
|
|
|
|
|
### Which teasers seem to work well?
|
|
|
|
barplot(table(datlogs$topic), las = 2)
|
|
|
|
|
|
|
|
### Dwell times/duration
|
|
|
|
datagg <- aggregate(duration ~ event + item, datlogs, mean)
|
|
|
|
datagg$ds <- datagg$duration / 1000 # in secs
|
|
|
|
|
|
|
|
lattice::bwplot(ds ~ event, datagg)
|
|
|
|
|
|
|
|
# without aggregation
|
|
|
|
lattice::bwplot(duration / 1000 / 60 ~ event, datlogs)
|
|
|
|
# in min
|
|
|
|
|
|
|
|
set.seed(1027)
|
|
|
|
|
|
|
|
pdf("results/figures/duration.pdf", width = 5, height = 5, pointsize = 10)
|
|
|
|
lattice::bwplot(I(duration/1000/60) ~ event, datlogs[sample(nrow(datlogs), 100000), ],
|
|
|
|
ylab = "Duration in min")
|
|
|
|
dev.off()
|
|
|
|
|
|
|
|
### Move events
|
|
|
|
|
|
|
|
datmove <- aggregate(cbind(duration, scaleSize, rotationDegree, distance, x.start,
|
|
|
|
y.start, x.stop, y.stop) ~ item, datlogs,
|
|
|
|
mean)
|
|
|
|
|
|
|
|
hist(log(datlogs$scaleSize))
|
|
|
|
# --> better interpretable on logscale
|
|
|
|
|
|
|
|
plot(y.start ~ x.start, datmove, pch = 16, col = "gray")
|
|
|
|
points(y.start ~ x.start, datmove, col = "#3CB4DC", cex = datmove$scaleSize)
|
|
|
|
|
|
|
|
|
|
|
|
plot(y.start ~ x.start, datmove, type = "n", xlab = "x", ylab = "y",
|
|
|
|
xlim = c(0, 3840), ylim = c(0, 2160))
|
|
|
|
with(datmove, text(x.start, y.start, item, col = "gray", cex = 1.5))
|
|
|
|
with(datmove,
|
|
|
|
arrows(x.start, y.start, x.stop, y.stop, length = 0.07, lwd = 2)
|
|
|
|
)
|
|
|
|
abline(v = c(0, 3840), h = c(0, 2160), col = "#3CB4DC", lwd = 2)
|
|
|
|
|
|
|
|
datscale <- aggregate(scaleSize ~ item, datlogs, max)
|
|
|
|
plot(y.start ~ x.start, datmove, pch = 16, col = "gray")
|
|
|
|
points(y.start ~ x.start, datmove, col = "#3CB4DC", cex = datscale$scaleSize)
|
|
|
|
|
|
|
|
plot(y.start ~ x.start, datmove, type = "n", xlab = "x", ylab = "y",
|
|
|
|
xlim = c(0, 3840), ylim = c(0, 2160))
|
|
|
|
#with(datmove, text(x.stop, y.stop, item))
|
|
|
|
with(datmove, text(x.start, y.start, item))
|
|
|
|
|
|
|
|
|
|
|
|
### Are there certain areas of the table that are touched most often?
|
|
|
|
|
|
|
|
# heatmap
|
|
|
|
cuts <- 100
|
|
|
|
|
|
|
|
datlogs$x.start.cat <- cut(datlogs$x.start, cuts)
|
|
|
|
datlogs$y.start.cat <- cut(datlogs$y.start, cuts)
|
|
|
|
|
|
|
|
tab <- xtabs( ~ x.start.cat + y.start.cat, datlogs)
|
|
|
|
|
|
|
|
colnames(tab) <- paste0("c", 1:cuts)
|
|
|
|
rownames(tab) <- paste0("c", 1:cuts)
|
|
|
|
|
|
|
|
heatmap(tab, Rowv = NA, Colv = NA)
|
|
|
|
|
|
|
|
|
|
|
|
dattrim <- datlogs[datlogs$x.start < 3840 &
|
|
|
|
datlogs$x.start > 0 &
|
|
|
|
datlogs$y.start < 2160 &
|
|
|
|
datlogs$y.start > 0 &
|
|
|
|
datlogs$x.stop < 3840 &
|
|
|
|
datlogs$x.stop > 0 &
|
|
|
|
datlogs$y.stop < 2160 &
|
|
|
|
datlogs$y.stop > 0, ]
|
|
|
|
|
|
|
|
cuts <- 100 # 200, 100, 70, ...
|
|
|
|
|
|
|
|
# start
|
|
|
|
dattrim$x.start.cat <- cut(dattrim$x.start, cuts)
|
|
|
|
dattrim$y.start.cat <- cut(dattrim$y.start, cuts)
|
|
|
|
|
|
|
|
tab.start <- xtabs( ~ x.start.cat + y.start.cat, dattrim)
|
|
|
|
colnames(tab.start) <- NULL
|
|
|
|
rownames(tab.start) <- NULL
|
|
|
|
|
|
|
|
pdf("results/figures/heatmap_start.pdf", width = 5, height = 5, pointsize = 10)
|
|
|
|
heatmap(tab.start, Rowv = NA, Colv = NA)
|
|
|
|
dev.off()
|
|
|
|
|
|
|
|
# stop
|
|
|
|
dattrim$x.stop.cat <- cut(dattrim$x.stop, cuts)
|
|
|
|
dattrim$y.stop.cat <- cut(dattrim$y.stop, cuts)
|
|
|
|
tab.stop <- xtabs( ~ x.stop.cat + y.stop.cat, dattrim)
|
|
|
|
colnames(tab.stop) <- NULL
|
|
|
|
rownames(tab.stop) <- NULL
|
|
|
|
|
|
|
|
pdf("results/figures/heatmap_stop.pdf", width = 5, height = 5, pointsize = 10)
|
|
|
|
heatmap(tab.stop, Rowv = NA, Colv = NA)
|
|
|
|
dev.off()
|
|
|
|
|
|
|
|
### How many visitors per day
|
|
|
|
|
|
|
|
datlogs$date <- as.Date(datlogs$date.start)
|
|
|
|
|
|
|
|
# Interactions per day
|
|
|
|
datint <- aggregate(case ~ date, datlogs, length)
|
|
|
|
plot(datint, type = "h")
|
|
|
|
|
|
|
|
# Cases per day
|
|
|
|
datcase <- aggregate(case ~ date, datlogs, function(x) length(unique(x)))
|
|
|
|
plot(datcase, type = "h")
|
|
|
|
|
|
|
|
# Paths per day
|
|
|
|
datpath <- aggregate(path ~ date, datlogs, function(x) length(unique(x)))
|
|
|
|
plot(datpath, type = "h")
|
|
|
|
|
|
|
|
plot(path ~ date, datpath, type = "h", col = "#3CB4DC")
|
|
|
|
points(case ~ date, datcase, type = "h")
|
|
|
|
|
|
|
|
plot(case ~ date, datcase, type = "h", col = "#434F4F")
|
|
|
|
|
|
|
|
## weird behavior of timeMs
|
|
|
|
|
|
|
|
pdf("results/figures/timeMs.pdf", width = 9, height = 6, pointsize = 10)
|
|
|
|
#par(mai = c(.6, .6, .1, .1), mgp = c(2.4, 1, 0))
|
|
|
|
#plot(timeMs.start ~ as.factor(fileId), datlogs[1:2000,], xlab = "fileId")
|
|
|
|
lattice::bwplot(timeMs.start ~ as.factor(fileId.start), datlogs[1:2000,], xlab = "",
|
|
|
|
scales = list(x = list(rot = 90), y = list(rot = 90)))
|
|
|
|
dev.off()
|
|
|
|
|
|
|
|
## x,y-coordinates out of range
|
|
|
|
|
|
|
|
set.seed(1522)
|
|
|
|
|
|
|
|
pdf("results/figures/xycoord.pdf", width = 5, height = 5, pointsize = 10)
|
|
|
|
par(mai = c(.6, .6, .1, .1), mgp = c(2.4, 1, 0))
|
|
|
|
#par(mfrow = c(1, 2))
|
|
|
|
plot(y.start ~ x.start, datlogs[sample(nrow(datlogs), 10000), ])
|
|
|
|
abline(v = c(0, 3840), h = c(0, 2160), col = "#3CB4DC", lwd = 2)
|
|
|
|
#plot(y.stop ~ x.stop, datlogs)
|
|
|
|
#abline(v = c(0, 3840), h = c(0, 2160), col = "#3CB4DC", lwd = 2)
|
|
|
|
legend("bottomleft", "Random sample of 10,000", bg = "white")
|
|
|
|
legend("topleft", "4K-Display: 3840 x 2160", bg = "white")
|
|
|
|
dev.off()
|
|
|
|
|
|
|
|
## moves
|
|
|
|
|
|
|
|
dat001 <- datlogs[which(datlogs$item == "001"), ]
|
|
|
|
|
|
|
|
index <- as.numeric(as.factor(dat001$path))
|
|
|
|
cc <- sample(colors(), 100)
|
|
|
|
|
|
|
|
plot(y.start ~ x.start, dat001, type = "n", xlab = "x", ylab = "y",
|
|
|
|
xlim = c(0, 3840), ylim = c(0, 2160))
|
|
|
|
with(dat001[1:200,], arrows(x.start, y.start, x.stop, y.stop,
|
|
|
|
length = .07, col = cc[index]))
|
|
|
|
|
|
|
|
plot(y.start ~ x.start, dat001, xlab = "x", ylab = "y",
|
|
|
|
xlim = c(0, 3840), ylim = c(0, 2160), pch = 16, col = "gray")
|
|
|
|
points(y.start ~ x.start, dat001, xlab = "x", ylab = "y",
|
|
|
|
xlim = c(0, 3840), ylim = c(0, 2160), cex = dat001$scaleSize,
|
|
|
|
col = "blue")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
cc <- sample(colors(), 70)
|
|
|
|
|
|
|
|
dat1 <- datlogs[!duplicated(datlogs$item), ]
|
|
|
|
dat1 <- dat1[order(dat1$item), ]
|
|
|
|
|
|
|
|
plot(y.start ~ x.start, dat1, type = "n", xlim = c(-100, 4500), ylim = c(-100, 2500))
|
|
|
|
abline(h = c(0, 2160), v = c(0, 3840), col = "lightgray")
|
|
|
|
with(dat1, points(x.start, y.start, col = cc, pch = 16))
|
|
|
|
with(dat1, points(x.stop, y.stop, col = cc, pch = 16))
|
|
|
|
with(dat1, arrows(x.start, y.start, x.stop, y.stop, length = .07, col = cc))
|
2023-11-01 18:46:39 +01:00
|
|
|
|
2023-11-15 15:54:23 +01:00
|
|
|
|
2024-01-31 12:09:27 +01:00
|
|
|
# How many events per topic, per path, ...
|
2023-11-01 18:46:39 +01:00
|
|
|
# How many popups per artwork?
|
|
|
|
|
|
|
|
# Number of events per artwork
|
2024-03-20 17:08:28 +01:00
|
|
|
tab <- xtabs( ~ item + event, datlogs)
|
2023-11-01 18:46:39 +01:00
|
|
|
addmargins(tab)
|
|
|
|
|
2024-03-20 17:08:28 +01:00
|
|
|
proportions(tab, margin = "item")
|
2023-11-01 18:46:39 +01:00
|
|
|
proportions(tab, margin = "event")
|
|
|
|
|
2023-11-02 14:24:06 +01:00
|
|
|
cc <- palette.colors(palette = "Okabe-Ito")[c(3,2,4,8)]
|
|
|
|
|
2024-03-20 17:08:28 +01:00
|
|
|
pdf("results/figures/event-dist.pdf", height = 3.375, width = 12, pointsize = 10)
|
2023-11-02 14:24:06 +01:00
|
|
|
par(mai = c(.4,.4,.1,.1), mgp = c(2.4, 1, 0))
|
|
|
|
|
2024-03-20 17:08:28 +01:00
|
|
|
barplot(t(proportions(tab, margin = "item")), las = 2, col = c("#78004B", "#3CB4DC", "#91C86E", "#FF6900"),
|
2024-01-31 12:09:27 +01:00
|
|
|
legend.text = levels(datlogs$event), args.legend = list(x = "bottomleft", bg = "white"))
|
2023-11-02 14:24:06 +01:00
|
|
|
|
|
|
|
dev.off()
|
|
|
|
|
2024-03-22 11:00:36 +01:00
|
|
|
#lattice::barchart(proportions(tab, margin = "item"), las = 2)
|
2023-11-01 18:46:39 +01:00
|
|
|
|
|
|
|
# Proportion of events
|
2024-01-31 12:09:27 +01:00
|
|
|
proportions(xtabs( ~ event, datlogs))
|
|
|
|
# Mean proportion of event per path
|
|
|
|
colMeans(proportions(xtabs( ~ path + event, datlogs), margin = "path"))
|
2024-03-20 17:08:28 +01:00
|
|
|
# Mean proportion of event per item
|
|
|
|
colMeans(proportions(tab, margin = "item"))
|
2023-11-01 18:46:39 +01:00
|
|
|
|
|
|
|
# Proportion of unclosed events
|
|
|
|
|
2024-01-31 12:09:27 +01:00
|
|
|
nrow(datlogs[is.na(datlogs$complete), ])
|
|
|
|
nrow(datlogs[is.na(datlogs$complete), ]) / nrow(datlogs)
|
2023-11-01 18:46:39 +01:00
|
|
|
|
2023-11-02 14:24:06 +01:00
|
|
|
# Proportion of events spanning more than one log file
|
2024-01-31 12:09:27 +01:00
|
|
|
sum(datlogs$fileId.start != datlogs$fileId.stop, na.rm = TRUE)
|
|
|
|
sum(datlogs$fileId.start != datlogs$fileId.stop, na.rm = TRUE) / nrow(datlogs)
|
2023-11-02 14:24:06 +01:00
|
|
|
|
2023-11-01 18:46:39 +01:00
|
|
|
|
|
|
|
#--------------- (3) Process Mining ---------------
|
|
|
|
|
|
|
|
#--------------- (3.1) Check data quality ---------------
|
|
|
|
|
2024-03-20 17:08:28 +01:00
|
|
|
datlogs$start <- datlogs$date.start
|
|
|
|
datlogs$complete <- datlogs$date.stop
|
|
|
|
|
2024-03-22 11:00:36 +01:00
|
|
|
alog <- bupaR::activitylog(datlogs,
|
2024-01-31 12:09:27 +01:00
|
|
|
case_id = "path",
|
2023-11-01 18:46:39 +01:00
|
|
|
activity_id = "event",
|
|
|
|
#resource_id = "case",
|
2024-03-20 17:08:28 +01:00
|
|
|
resource_id = "item",
|
2023-11-01 18:46:39 +01:00
|
|
|
timestamps = c("start", "complete"))
|
|
|
|
|
2024-03-22 11:00:36 +01:00
|
|
|
processmapR::process_map(alog, processmapR::frequency("relative"))
|
2023-11-01 18:46:39 +01:00
|
|
|
|
2023-11-10 18:30:15 +01:00
|
|
|
alogf <- edeaR::filter_trace_frequency(alog, percentage = 0.9)
|
|
|
|
|
|
|
|
processmapR::process_map(alogf, # alog,
|
|
|
|
type_nodes = processmapR::frequency("absolute"),
|
|
|
|
sec_nodes = processmapR::frequency("relative"),
|
|
|
|
type_edges = processmapR::frequency("absolute"),
|
|
|
|
sec_edges = processmapR::frequency("relative"),
|
|
|
|
rankdir = "TB")
|
|
|
|
|
|
|
|
alog_no_move <- alog[alog$event != "move", ]
|
2023-11-15 15:54:23 +01:00
|
|
|
|
2024-03-22 11:00:36 +01:00
|
|
|
pdf("results/figures/traceexplore_trace-event.pdf", height = 8, width = 12, pointsize = 10)
|
2023-11-10 18:30:15 +01:00
|
|
|
set.seed(1447)
|
2024-01-31 12:09:27 +01:00
|
|
|
processmapR::trace_explorer(alog_no_move[alog_no_move$path %in%
|
|
|
|
sample(unique(alog_no_move$path), 400),],
|
2023-11-10 18:30:15 +01:00
|
|
|
coverage = 1, type = "frequent",
|
|
|
|
abbreviate = T)
|
2023-11-15 15:54:23 +01:00
|
|
|
dev.off()
|
|
|
|
|
2024-03-22 11:00:36 +01:00
|
|
|
pdf("results/figures/ra_trace-event.pdf", height = 8, width = 12, pointsize = 10)
|
2023-11-10 18:30:15 +01:00
|
|
|
|
|
|
|
ra_no_move <- edeaR::resource_frequency(alog_no_move, "resource-activity")
|
|
|
|
levels(ra_no_move$event) <- c("flipCard", "flipCard", "openTopic", "openPopup")
|
|
|
|
plot(ra_no_move)
|
|
|
|
|
2023-11-15 15:54:23 +01:00
|
|
|
dev.off()
|
|
|
|
|
2023-11-10 18:30:15 +01:00
|
|
|
ra <- edeaR::resource_frequency(alog, "resource-activity")
|
|
|
|
plot(ra)
|
|
|
|
|
|
|
|
heatmap(xtabs(relative_activity ~ artwork + event, ra))
|
|
|
|
heatmap(xtabs(relative_resource ~ artwork + event, ra_no_move))
|
|
|
|
heatmap(xtabs(relative_activity ~ artwork + event, ra_no_move))
|
|
|
|
|
|
|
|
aggregate(relative_activity ~ event, ra, sum)
|
|
|
|
aggregate(relative_resource ~ artwork, ra, sum)
|
|
|
|
|
|
|
|
|
2023-11-01 18:46:39 +01:00
|
|
|
#--------------- (3.2) Interactions for different artworks ---------------
|
|
|
|
# Do interaction patterns for events per trace look different for different
|
|
|
|
# artworks?
|
|
|
|
|
2024-01-31 12:09:27 +01:00
|
|
|
which.max(table(datlogs$artwork))
|
|
|
|
which.min(table(datlogs$artwork))
|
|
|
|
which.min(table(datlogs$artwork)[-c(71,72)])
|
2023-11-01 18:46:39 +01:00
|
|
|
|
2024-03-22 11:00:36 +01:00
|
|
|
alog080 <- bupaR::activitylog(datlogs[datlogs$artwork == "080",],
|
2024-01-31 12:09:27 +01:00
|
|
|
case_id = "path",
|
2023-11-01 18:46:39 +01:00
|
|
|
activity_id = "event",
|
|
|
|
resource_id = "artwork",
|
|
|
|
timestamps = c("start", "complete"))
|
|
|
|
|
2024-03-22 11:00:36 +01:00
|
|
|
processmapR::process_map(alog80, processmapR::frequency("relative"))
|
2023-11-01 18:46:39 +01:00
|
|
|
|
2024-03-22 11:00:36 +01:00
|
|
|
alog087 <- bupaR::activitylog(datlogs[datlogs$artwork == "087",],
|
2024-01-31 12:09:27 +01:00
|
|
|
case_id = "path",
|
2023-11-01 18:46:39 +01:00
|
|
|
activity_id = "event",
|
|
|
|
resource_id = "artwork",
|
|
|
|
timestamps = c("start", "complete"))
|
|
|
|
|
2024-03-22 11:00:36 +01:00
|
|
|
processmapR::process_map(alog087, processmapR::frequency("relative"))
|
2023-11-01 18:46:39 +01:00
|
|
|
|
2024-03-22 11:00:36 +01:00
|
|
|
alog504 <- bupaR::activitylog(datlogs[datlogs$artwork == "504",],
|
2024-01-31 12:09:27 +01:00
|
|
|
case_id = "path",
|
2023-11-01 18:46:39 +01:00
|
|
|
activity_id = "event",
|
|
|
|
resource_id = "artwork",
|
|
|
|
timestamps = c("start", "complete"))
|
|
|
|
|
2024-03-22 11:00:36 +01:00
|
|
|
processmapR::process_map(alog504, processmapR::frequency("relative"))
|
2023-11-01 18:46:39 +01:00
|
|
|
|
|
|
|
#--------------- (3.3) Patterns of cases ---------------
|
|
|
|
|
|
|
|
# What kind of patterns do we have? Are their typical sequences for cases?
|
|
|
|
# Do case patterns look different for ...
|
|
|
|
# ... mornings and afternoons?
|
|
|
|
# ... weekdays and weekends?
|
|
|
|
# ... weekdays for "normal" and school vacation days?
|
|
|
|
# ... pre and post corona?
|
|
|
|
|
2024-03-22 11:00:36 +01:00
|
|
|
alog <- bupaR::activitylog(datlogs,
|
2023-11-01 18:46:39 +01:00
|
|
|
case_id = "case",
|
|
|
|
activity_id = "event",
|
2024-01-31 12:09:27 +01:00
|
|
|
resource_id = "path",
|
2023-11-01 18:46:39 +01:00
|
|
|
timestamps = c("start", "complete"))
|
|
|
|
|
2024-03-22 11:00:36 +01:00
|
|
|
processmapR::process_map(alog, processmapR::frequency("relative"))
|
2023-11-01 18:46:39 +01:00
|
|
|
|
2023-11-15 15:54:23 +01:00
|
|
|
alog_no_move <- alog[alog$event != "move", ]
|
|
|
|
|
2024-03-22 11:00:36 +01:00
|
|
|
pdf("results/figures/traceexplore_case-event.pdf", height = 8, width = 12, pointsize = 10)
|
2023-11-15 15:54:23 +01:00
|
|
|
set.seed(1050)
|
2024-01-31 12:09:27 +01:00
|
|
|
processmapR::trace_explorer(alog_no_move[alog_no_move$path %in%
|
|
|
|
sample(unique(alog_no_move$path), 300),],
|
2023-11-15 15:54:23 +01:00
|
|
|
coverage = 1, type = "frequent",
|
|
|
|
abbreviate = T)
|
|
|
|
dev.off()
|
2023-11-01 18:46:39 +01:00
|
|
|
|
2024-03-22 11:00:36 +01:00
|
|
|
processmapR::process_map(alog080, processmapR::frequency("relative"))
|
2023-11-01 18:46:39 +01:00
|
|
|
|
2024-03-22 11:00:36 +01:00
|
|
|
alog087 <- bupaR::activitylog(datlogs[datlogs$artwork == "087",],
|
2023-11-01 18:46:39 +01:00
|
|
|
case_id = "case",
|
|
|
|
activity_id = "event",
|
2024-01-31 12:09:27 +01:00
|
|
|
resource_id = "path",
|
2023-11-01 18:46:39 +01:00
|
|
|
timestamps = c("start", "complete"))
|
|
|
|
|
2024-03-22 11:00:36 +01:00
|
|
|
processmapR::process_map(alog087, processmapR::frequency("relative"))
|
2023-11-01 18:46:39 +01:00
|
|
|
|
|
|
|
### Mornings and afternoons
|
|
|
|
|
2024-01-31 12:09:27 +01:00
|
|
|
datlogs$tod <- ifelse(lubridate::hour(datlogs$start) > 13, "afternoon", "morning")
|
2023-11-01 18:46:39 +01:00
|
|
|
|
2024-03-22 11:00:36 +01:00
|
|
|
alog <- bupaR::activitylog(datlogs[datlogs$tod == "morning",],
|
2023-11-01 18:46:39 +01:00
|
|
|
case_id = "case",
|
|
|
|
activity_id = "event",
|
2024-01-31 12:09:27 +01:00
|
|
|
resource_id = "path",
|
2023-11-01 18:46:39 +01:00
|
|
|
timestamps = c("start", "complete"))
|
|
|
|
|
2024-03-22 11:00:36 +01:00
|
|
|
processmapR::process_map(alog, processmapR::frequency("relative"))
|
2023-11-01 18:46:39 +01:00
|
|
|
|
2024-03-22 11:00:36 +01:00
|
|
|
alog <- bupaR::activitylog(datlogs[datlogs$tod == "afternoon",],
|
2023-11-01 18:46:39 +01:00
|
|
|
case_id = "case",
|
|
|
|
activity_id = "event",
|
2024-01-31 12:09:27 +01:00
|
|
|
resource_id = "path",
|
2023-11-01 18:46:39 +01:00
|
|
|
timestamps = c("start", "complete"))
|
|
|
|
|
2024-03-22 11:00:36 +01:00
|
|
|
processmapR::process_map(alog, processmapR::frequency("relative"))
|
2023-11-01 18:46:39 +01:00
|
|
|
|
2023-11-02 14:24:06 +01:00
|
|
|
# Are the same artworks looked at?
|
2024-03-22 11:00:36 +01:00
|
|
|
pdf("results/figures/bp_tod.pdf", height = 3.375, width = 12, pointsize = 10)
|
2023-11-02 14:24:06 +01:00
|
|
|
par(mai = c(.5,.6,.1,.1), mgp = c(2.4, 1, 0))
|
|
|
|
|
2024-01-31 12:09:27 +01:00
|
|
|
barplot(proportions(xtabs( ~ tod + artwork, datlogs), margin = "tod"), #col = cc[1:2],
|
2023-11-02 14:24:06 +01:00
|
|
|
las = 2, beside = TRUE, legend = c("afternoon", "morning"),
|
|
|
|
args.legend = list(x = "topleft"))
|
|
|
|
|
|
|
|
dev.off()
|
|
|
|
|
2023-11-01 18:46:39 +01:00
|
|
|
### Weekdays and weekends
|
|
|
|
|
2024-01-31 12:09:27 +01:00
|
|
|
datlogs$wd <- ifelse(datlogs$weekdays %in% c("Saturday", "Sunday"), "weekend", "weekday")
|
2023-11-01 18:46:39 +01:00
|
|
|
|
2024-03-22 11:00:36 +01:00
|
|
|
alog <- bupaR::activitylog(datlogs[datlogs$wd == "weekend",],
|
2023-11-01 18:46:39 +01:00
|
|
|
case_id = "case",
|
|
|
|
activity_id = "event",
|
2024-01-31 12:09:27 +01:00
|
|
|
resource_id = "path",
|
2023-11-01 18:46:39 +01:00
|
|
|
timestamps = c("start", "complete"))
|
|
|
|
|
2024-03-22 11:00:36 +01:00
|
|
|
processmapR::process_map(alog, processmapR::frequency("relative"))
|
2023-11-01 18:46:39 +01:00
|
|
|
|
2024-03-22 11:00:36 +01:00
|
|
|
alog <- bupaR::activitylog(datlogs[datlogs$wd == "weekday",],
|
2023-11-01 18:46:39 +01:00
|
|
|
case_id = "case",
|
|
|
|
activity_id = "event",
|
2024-01-31 12:09:27 +01:00
|
|
|
resource_id = "path",
|
2023-11-01 18:46:39 +01:00
|
|
|
timestamps = c("start", "complete"))
|
|
|
|
|
2024-03-22 11:00:36 +01:00
|
|
|
processmapR::process_map(alog, processmapR::frequency("relative"))
|
2023-11-01 18:46:39 +01:00
|
|
|
|
2023-11-02 14:24:06 +01:00
|
|
|
# Are the same artworks looked at?
|
2024-03-22 11:00:36 +01:00
|
|
|
pdf("results/figures/bp_wd.pdf", height = 3.375, width = 12, pointsize = 10)
|
2023-11-02 14:24:06 +01:00
|
|
|
par(mai = c(.5,.6,.1,.1), mgp = c(2.4, 1, 0))
|
|
|
|
|
2024-01-31 12:09:27 +01:00
|
|
|
barplot(proportions(xtabs( ~ wd + artwork, datlogs), margin = "wd"),
|
2023-11-02 14:24:06 +01:00
|
|
|
las = 2, beside = TRUE, legend = c("weekday", "weekend"),
|
|
|
|
args.legend = list(x = "topleft"))
|
|
|
|
|
|
|
|
dev.off()
|
|
|
|
|
2023-11-01 18:46:39 +01:00
|
|
|
### Weekdays vs. school vacation weekdays
|
|
|
|
|
2024-01-31 12:09:27 +01:00
|
|
|
datlogs$wds <- ifelse(!is.na(datlogs$vacation), "vacation", "school")
|
|
|
|
datlogs$wds[datlogs$wd == "weekend"] <- NA
|
2023-11-01 18:46:39 +01:00
|
|
|
|
2024-03-22 11:00:36 +01:00
|
|
|
alog <- bupaR::activitylog(datlogs[which(datlogs$wds == "school"),],
|
2023-11-01 18:46:39 +01:00
|
|
|
case_id = "case",
|
|
|
|
activity_id = "event",
|
2024-01-31 12:09:27 +01:00
|
|
|
resource_id = "path",
|
2023-11-01 18:46:39 +01:00
|
|
|
timestamps = c("start", "complete"))
|
|
|
|
|
2024-03-22 11:00:36 +01:00
|
|
|
processmapR::process_map(alog, processmapR::frequency("relative"))
|
2023-11-01 18:46:39 +01:00
|
|
|
|
2024-03-22 11:00:36 +01:00
|
|
|
alog <- bupaR::activitylog(datlogs[which(datlogs$wds == "vacation"),],
|
2023-11-01 18:46:39 +01:00
|
|
|
case_id = "case",
|
|
|
|
activity_id = "event",
|
2024-01-31 12:09:27 +01:00
|
|
|
resource_id = "path",
|
2023-11-01 18:46:39 +01:00
|
|
|
timestamps = c("start", "complete"))
|
|
|
|
|
2024-03-22 11:00:36 +01:00
|
|
|
processmapR::process_map(alog, processmapR::frequency("relative"))
|
2023-11-01 18:46:39 +01:00
|
|
|
|
2023-11-02 14:24:06 +01:00
|
|
|
# Are the same artworks looked at?
|
2024-03-22 11:00:36 +01:00
|
|
|
pdf("results/figures/bp_wds.pdf", height = 3.375, width = 12, pointsize = 10)
|
2023-11-02 14:24:06 +01:00
|
|
|
par(mai = c(.5,.6,.1,.1), mgp = c(2.4, 1, 0))
|
|
|
|
|
2024-01-31 12:09:27 +01:00
|
|
|
#barplot(xtabs( ~ wds + artwork, datlogs), las = 2, beside = TRUE,
|
|
|
|
barplot(proportions(xtabs( ~ wds + artwork, datlogs), margin = "wds"),
|
2023-11-02 14:24:06 +01:00
|
|
|
las = 2, beside = TRUE,
|
|
|
|
legend = c("school", "vacation"), args.legend = list(x = "topleft"))
|
|
|
|
|
|
|
|
dev.off()
|
|
|
|
|
2023-11-01 18:46:39 +01:00
|
|
|
### Pre and post Corona
|
|
|
|
|
2024-01-31 12:09:27 +01:00
|
|
|
datlogs$corona <- ifelse(datlogs$date < "2020-03-14", "pre", "post")
|
2023-11-01 18:46:39 +01:00
|
|
|
|
2024-03-22 11:00:36 +01:00
|
|
|
alog <- bupaR::activitylog(datlogs[which(datlogs$corona == "pre"),],
|
2023-11-01 18:46:39 +01:00
|
|
|
case_id = "case",
|
|
|
|
activity_id = "event",
|
2024-01-31 12:09:27 +01:00
|
|
|
resource_id = "path",
|
2023-11-01 18:46:39 +01:00
|
|
|
timestamps = c("start", "complete"))
|
|
|
|
|
2024-03-22 11:00:36 +01:00
|
|
|
processmapR::process_map(alog, processmapR::frequency("relative"))
|
2023-11-01 18:46:39 +01:00
|
|
|
|
2024-03-22 11:00:36 +01:00
|
|
|
alog <- bupaR::activitylog(datlogs[which(datlogs$corona == "post"),],
|
2023-11-01 18:46:39 +01:00
|
|
|
case_id = "case",
|
|
|
|
activity_id = "event",
|
2024-01-31 12:09:27 +01:00
|
|
|
resource_id = "path",
|
2023-11-01 18:46:39 +01:00
|
|
|
timestamps = c("start", "complete"))
|
|
|
|
|
2024-03-22 11:00:36 +01:00
|
|
|
processmapR::process_map(alog, processmapR::frequency("relative"))
|
2023-11-01 18:46:39 +01:00
|
|
|
|
2023-11-02 14:24:06 +01:00
|
|
|
# Are the same artworks looked at?
|
2024-03-22 11:00:36 +01:00
|
|
|
pdf("results/figures/bp_corona.pdf", height = 3.375, width = 12, pointsize = 10)
|
2023-11-02 14:24:06 +01:00
|
|
|
par(mai = c(.5,.6,.1,.1), mgp = c(2.4, 1, 0))
|
|
|
|
|
2024-01-31 12:09:27 +01:00
|
|
|
barplot(proportions(xtabs( ~ corona + artwork, datlogs), margin = "corona"),
|
2023-11-02 14:24:06 +01:00
|
|
|
las = 2, beside = TRUE,
|
|
|
|
legend = c("post", "pre"), args.legend = list(x = "topleft"))
|
|
|
|
|
|
|
|
dev.off()
|
|
|
|
|
2023-11-01 18:46:39 +01:00
|
|
|
#--------------- (3.4) Artwork sequences ---------------
|
|
|
|
# Order in which artworks are looked at
|
|
|
|
|
|
|
|
nart <- 5 # select 5 artworks randomly
|
2024-03-22 11:00:36 +01:00
|
|
|
alog <- bupaR::activitylog(datlogs,#[datlogs$artwork %in% sample(unique(datlogs$artwork), nart), ],
|
2023-11-01 18:46:39 +01:00
|
|
|
case_id = "case",
|
|
|
|
activity_id = "artwork",
|
2024-01-31 12:09:27 +01:00
|
|
|
resource_id = "path",
|
2023-11-01 18:46:39 +01:00
|
|
|
timestamps = c("start", "complete"))
|
|
|
|
|
2024-03-22 11:00:36 +01:00
|
|
|
#map <- processmapR::process_map(alog, processmapR::frequency("relative"))
|
2023-11-01 18:46:39 +01:00
|
|
|
|
|
|
|
## select cases with Vermeer
|
2024-01-31 12:09:27 +01:00
|
|
|
length(unique(datlogs[datlogs$artwork == "080", "case"]))
|
2023-11-01 18:46:39 +01:00
|
|
|
# 12615
|
2024-01-31 12:09:27 +01:00
|
|
|
case080 <- unique(datlogs[datlogs$artwork == "080", "case"])
|
|
|
|
tmp <- datlogs[datlogs$case %in% case080, ]
|
2023-11-01 18:46:39 +01:00
|
|
|
table(tmp$artwork)
|
|
|
|
# --> all :)
|
|
|
|
|
|
|
|
# select the ones most often (I am aiming for 10...)
|
|
|
|
barplot(table(tmp$artwork))
|
2023-11-06 09:15:03 +01:00
|
|
|
abline(h = 14000, col = "red")
|
|
|
|
which(table(tmp$artwork) > 14000)
|
2023-11-01 18:46:39 +01:00
|
|
|
|
2023-11-06 09:15:03 +01:00
|
|
|
often080 <- names(which(table(tmp$artwork) > 14000))
|
2023-11-01 18:46:39 +01:00
|
|
|
|
2024-03-22 11:00:36 +01:00
|
|
|
alog <- bupaR::activitylog(datlogs[datlogs$artwork %in% often080, ],
|
2023-11-01 18:46:39 +01:00
|
|
|
case_id = "case",
|
|
|
|
activity_id = "artwork",
|
2024-01-31 12:09:27 +01:00
|
|
|
resource_id = "path",
|
2023-11-01 18:46:39 +01:00
|
|
|
timestamps = c("start", "complete"))
|
|
|
|
|
2024-03-22 11:00:36 +01:00
|
|
|
processmapR::process_map(alog, processmapR::frequency("relative"))
|
2023-11-01 18:46:39 +01:00
|
|
|
|
|
|
|
|
2024-03-22 11:00:36 +01:00
|
|
|
pdf("results/figures/traceexplore_case-artwork_often080.pdf", height = 8, width = 12, pointsize = 10)
|
2023-11-15 15:54:23 +01:00
|
|
|
|
|
|
|
processmapR::trace_explorer(alog,
|
|
|
|
n_traces = 30, type = "frequent",
|
|
|
|
abbreviate = TRUE)
|
|
|
|
|
|
|
|
dev.off()
|
|
|
|
|
2023-11-01 18:46:39 +01:00
|
|
|
#--------------- (3.5) Topics ---------------
|
|
|
|
|
|
|
|
# Are there certain topics that people are interested in more than others?
|
|
|
|
# Do these topic distributions differ for comparable artworks?
|
|
|
|
|
2024-03-22 11:00:36 +01:00
|
|
|
alog <- bupaR::activitylog(datlogs[which(datlogs$event == "openTopic"),],
|
2023-11-02 14:24:06 +01:00
|
|
|
case_id = "case",
|
|
|
|
activity_id = "topic",
|
2024-01-31 12:09:27 +01:00
|
|
|
resource_id = "path",
|
2023-11-02 14:24:06 +01:00
|
|
|
timestamps = c("start", "complete"))
|
|
|
|
|
2024-03-22 11:00:36 +01:00
|
|
|
processmapR::process_map(alog, processmapR::frequency("relative"))
|
2023-11-02 14:24:06 +01:00
|
|
|
|
2023-11-01 18:46:39 +01:00
|
|
|
# Order of topics for Vermeer
|
2024-03-22 11:00:36 +01:00
|
|
|
# alog080 <- bupaR::activitylog(datlogs[datlogs$artwork == "080",],
|
2023-11-01 18:46:39 +01:00
|
|
|
# case_id = "case",
|
|
|
|
# activity_id = "topic",
|
2024-01-31 12:09:27 +01:00
|
|
|
# resource_id = "path",
|
2023-11-01 18:46:39 +01:00
|
|
|
# timestamps = c("start", "complete"))
|
2023-11-10 18:30:15 +01:00
|
|
|
#
|
2024-03-22 11:00:36 +01:00
|
|
|
# alog080 <- bupaR::activitylog(datlogs[datlogs$artwork == "080",],
|
2023-11-01 18:46:39 +01:00
|
|
|
# case_id = "case",
|
|
|
|
# activity_id = "topicFile",
|
2024-01-31 12:09:27 +01:00
|
|
|
# resource_id = "path",
|
2023-11-01 18:46:39 +01:00
|
|
|
# timestamps = c("start", "complete"))
|
2023-11-10 18:30:15 +01:00
|
|
|
#
|
2024-03-22 11:00:36 +01:00
|
|
|
# #processmapR::process_map(alog080, processmapR::frequency("relative"))
|
2023-11-10 18:30:15 +01:00
|
|
|
#
|
2023-11-01 18:46:39 +01:00
|
|
|
# # Comparable artwork
|
2024-03-22 11:00:36 +01:00
|
|
|
# alog083 <- bupaR::activitylog(datlogs[datlogs$artwork == "083",],
|
2023-11-01 18:46:39 +01:00
|
|
|
# case_id = "case",
|
|
|
|
# activity_id = "topic",
|
2024-01-31 12:09:27 +01:00
|
|
|
# resource_id = "path",
|
2023-11-01 18:46:39 +01:00
|
|
|
# timestamps = c("start", "complete"))
|
|
|
|
|
|
|
|
# artworks that have the same topics than Vermeer
|
2024-01-31 12:09:27 +01:00
|
|
|
which(rowSums(xtabs( ~ artwork + topic, datlogs[datlogs$topic %in%
|
2023-11-01 18:46:39 +01:00
|
|
|
c("artist", "details", "extra info", "komposition",
|
|
|
|
"licht und farbe", "thema"), ]) != 0) == 6)
|
|
|
|
|
2023-11-10 18:30:15 +01:00
|
|
|
#037 046 062 080 083 109
|
2023-11-01 18:46:39 +01:00
|
|
|
|
|
|
|
for (art in c("037", "046", "062", "080", "083", "109")) {
|
|
|
|
|
2024-03-22 11:00:36 +01:00
|
|
|
alog <- bupaR::activitylog(datlogs[datlogs$event == "openTopic" & datlogs$artwork == art,],
|
2023-11-01 18:46:39 +01:00
|
|
|
case_id = "case",
|
|
|
|
activity_id = "topic",
|
2024-01-31 12:09:27 +01:00
|
|
|
resource_id = "path",
|
2023-11-01 18:46:39 +01:00
|
|
|
timestamps = c("start", "complete"))
|
2023-11-10 18:30:15 +01:00
|
|
|
|
2024-03-22 11:00:36 +01:00
|
|
|
processmapR::process_map(alog, processmapR::frequency("relative"))
|
2023-11-01 18:46:39 +01:00
|
|
|
}
|
|
|
|
|
|
|
|
|