371 lines
12 KiB
R
371 lines
12 KiB
R
# 00_current_analysis.R
|
|
#
|
|
# content: (1) Read event log data
|
|
# (2) Descriptives
|
|
# (3) Process Mining
|
|
#
|
|
# input: ../data/haum/event_logfiles_glossar_2023-10-29_10-26-42.csv
|
|
# output:
|
|
#
|
|
# last mod: 2023-11-02, NW
|
|
|
|
# setwd("C:/Users/nwickelmaier/Nextcloud/Documents/MDS/2023ss/60100_master_thesis/code")
|
|
|
|
#library(mtt)
|
|
devtools::load_all("../../../../software/mtt")
|
|
library(lattice)
|
|
library(bupaverse)
|
|
|
|
# Overall Research Question: How do museum visitors interact with the
|
|
# artworks presented on the MTT?
|
|
|
|
|
|
|
|
# What about popups?
|
|
|
|
|
|
# Distribution of bursts
|
|
# Can this be visualized in a nice way?
|
|
|
|
#--------------- (1) Read data ---------------
|
|
|
|
dat <- read.table("../data/haum/event_logfiles_glossar_2023-10-29_10-26-42.csv",
|
|
sep = ";", header = TRUE)
|
|
dat$date <- as.POSIXct(dat$date)
|
|
dat$date.start <- as.POSIXct(dat$date.start)
|
|
dat$date.stop <- as.POSIXct(dat$date.stop)
|
|
dat$artwork <- sprintf("%03d", dat$artwork)
|
|
dat$event <- factor(dat$event, levels = c("move", "flipCard", "openTopic", "openPopup"))
|
|
|
|
# Add weekdays to data frame
|
|
|
|
dat$weekdays <- factor(weekdays(dat$date.start),
|
|
levels = c("Montag", "Dienstag", "Mittwoch",
|
|
"Donnerstag", "Freitag", "Samstag",
|
|
"Sonntag"),
|
|
labels = c("Monday", "Tuesday", "Wednesday",
|
|
"Thursday", "Friday", "Saturday",
|
|
"Sunday"))
|
|
|
|
names(dat)[names(dat) %in% c("date.start", "date.stop")] <- c("start", "complete")
|
|
|
|
#--------------- (2) Descriptives ---------------
|
|
# How many events per topic, per trace, ...
|
|
# How many popups per artwork?
|
|
|
|
# Number of events per artwork
|
|
tab <- xtabs( ~ artwork + event, dat)
|
|
addmargins(tab)
|
|
|
|
proportions(tab, margin = "artwork")
|
|
proportions(tab, margin = "event")
|
|
|
|
barplot(t(proportions(tab, margin = "artwork")), las = 2)
|
|
|
|
# Proportion of events
|
|
proportions(xtabs( ~ event, dat))
|
|
# Mean proportion of event per trace
|
|
colMeans(proportions(xtabs( ~ trace + event, dat), margin = "trace"))
|
|
# Mean proportion of event per artwork
|
|
colMeans(proportions(tab, margin = "artwork"))
|
|
|
|
# Proportion of unclosed events
|
|
|
|
nrow(dat[is.na(dat$complete), ])
|
|
nrow(dat[is.na(dat$complete), ]) / nrow(dat)
|
|
|
|
|
|
#--------------- (3) Process Mining ---------------
|
|
|
|
#--------------- (3.1) Check data quality ---------------
|
|
|
|
alog <- activitylog(dat,
|
|
case_id = "trace",
|
|
activity_id = "event",
|
|
#resource_id = "case",
|
|
resource_id = "artwork",
|
|
timestamps = c("start", "complete"))
|
|
|
|
# process_map(alog, frequency("relative"))
|
|
map_as_pdf(alog, file = "../figures/pm_trace-event.pdf")
|
|
|
|
#--------------- (3.2) Interactions for different artworks ---------------
|
|
# Do interaction patterns for events per trace look different for different
|
|
# artworks?
|
|
|
|
which.max(table(dat$artwork))
|
|
which.min(table(dat$artwork))
|
|
which.min(table(dat$artwork)[-c(71,72)])
|
|
|
|
alog080 <- activitylog(dat[dat$artwork == "080",],
|
|
case_id = "trace",
|
|
activity_id = "event",
|
|
resource_id = "artwork",
|
|
timestamps = c("start", "complete"))
|
|
|
|
map_as_pdf(alog080, file = "../figures/pm_trace-event_080.pdf")
|
|
|
|
alog087 <- activitylog(dat[dat$artwork == "087",],
|
|
case_id = "trace",
|
|
activity_id = "event",
|
|
resource_id = "artwork",
|
|
timestamps = c("start", "complete"))
|
|
|
|
map_as_pdf(alog087, file = "../figures/pm_trace-event_087.pdf")
|
|
|
|
alog504 <- activitylog(dat[dat$artwork == "504",],
|
|
case_id = "trace",
|
|
activity_id = "event",
|
|
resource_id = "artwork",
|
|
timestamps = c("start", "complete"))
|
|
|
|
map_as_pdf(alog504, file = "../figures/pm_trace-event_504.pdf")
|
|
|
|
#--------------- (3.3) Patterns of cases ---------------
|
|
|
|
# What kind of patterns do we have? Are their typical sequences for cases?
|
|
# Do case patterns look different for ...
|
|
# ... mornings and afternoons?
|
|
# ... weekdays and weekends?
|
|
# ... weekdays for "normal" and school vacation days?
|
|
# ... pre and post corona?
|
|
|
|
alog <- activitylog(dat,
|
|
case_id = "case",
|
|
activity_id = "event",
|
|
resource_id = "trace",
|
|
timestamps = c("start", "complete"))
|
|
|
|
map_as_pdf(alog, file = "../figures/pm_case-event.pdf")
|
|
|
|
alog080 <- activitylog(dat[dat$artwork == "080",],
|
|
case_id = "case",
|
|
activity_id = "event",
|
|
resource_id = "trace",
|
|
timestamps = c("start", "complete"))
|
|
|
|
map_as_pdf(alog080, file = "../figures/pm_case-event_080.pdf")
|
|
|
|
alog087 <- activitylog(dat[dat$artwork == "087",],
|
|
case_id = "case",
|
|
activity_id = "event",
|
|
resource_id = "trace",
|
|
timestamps = c("start", "complete"))
|
|
|
|
map_as_pdf(alog087, file = "../figures/pm_case-event_087.pdf")
|
|
|
|
### Mornings and afternoons
|
|
|
|
dat$tod <- ifelse(lubridate::hour(dat$start) > 13, "afternoon", "morning")
|
|
|
|
alog <- activitylog(dat[dat$tod == "morning",],
|
|
case_id = "case",
|
|
activity_id = "event",
|
|
resource_id = "trace",
|
|
timestamps = c("start", "complete"))
|
|
|
|
map_as_pdf(alog, file = "../figures/pm_case-event_morning.pdf")
|
|
|
|
alog <- activitylog(dat[dat$tod == "afternoon",],
|
|
case_id = "case",
|
|
activity_id = "event",
|
|
resource_id = "trace",
|
|
timestamps = c("start", "complete"))
|
|
|
|
map_as_pdf(alog, file = "../figures/pm_case-event_afternoon.pdf")
|
|
|
|
### Weekdays and weekends
|
|
|
|
dat$wd <- ifelse(dat$weekdays %in% c("Saturday", "Sunday"), "weekend", "weekday")
|
|
|
|
alog <- activitylog(dat[dat$wd == "weekend",],
|
|
case_id = "case",
|
|
activity_id = "event",
|
|
resource_id = "trace",
|
|
timestamps = c("start", "complete"))
|
|
|
|
map_as_pdf(alog, file = "../figures/pm_case-event_weekend.pdf")
|
|
|
|
alog <- activitylog(dat[dat$wd == "weekday",],
|
|
case_id = "case",
|
|
activity_id = "event",
|
|
resource_id = "trace",
|
|
timestamps = c("start", "complete"))
|
|
|
|
map_as_pdf(alog, file = "../figures/pm_case-event_weekday.pdf")
|
|
|
|
### Weekdays vs. school vacation weekdays
|
|
|
|
dat$wds <- ifelse(!is.na(dat$vacation), "vacation", "school")
|
|
dat$wds[dat$wd == "weekend"] <- NA
|
|
|
|
alog <- activitylog(dat[which(dat$wds == "school"),],
|
|
case_id = "case",
|
|
activity_id = "event",
|
|
resource_id = "trace",
|
|
timestamps = c("start", "complete"))
|
|
|
|
map_as_pdf(alog, file = "../figures/pm_case-event_school.pdf")
|
|
|
|
alog <- activitylog(dat[which(dat$wds == "vacation"),],
|
|
case_id = "case",
|
|
activity_id = "event",
|
|
resource_id = "trace",
|
|
timestamps = c("start", "complete"))
|
|
|
|
map_as_pdf(alog, file = "../figures/pm_case-event_vacation.pdf")
|
|
|
|
### Pre and post Corona
|
|
|
|
dat$corona <- ifelse(dat$date < "2020-03-14", "pre", "post")
|
|
|
|
alog <- activitylog(dat[which(dat$corona == "pre"),],
|
|
case_id = "case",
|
|
activity_id = "event",
|
|
resource_id = "trace",
|
|
timestamps = c("start", "complete"))
|
|
|
|
map_as_pdf(alog, file = "../figures/pm_case-event_pre-corona.pdf")
|
|
|
|
alog <- activitylog(dat[which(dat$corona == "post"),],
|
|
case_id = "case",
|
|
activity_id = "event",
|
|
resource_id = "trace",
|
|
timestamps = c("start", "complete"))
|
|
|
|
map_as_pdf(alog, file = "../figures/pm_case-event_post-corona.pdf")
|
|
|
|
#--------------- (3.4) Artwork sequences ---------------
|
|
# Order in which artworks are looked at
|
|
|
|
nart <- 5 # select 5 artworks randomly
|
|
alog <- activitylog(dat,#[dat$artwork %in% sample(unique(dat$artwork), nart), ],
|
|
case_id = "case",
|
|
activity_id = "artwork",
|
|
resource_id = "trace",
|
|
timestamps = c("start", "complete"))
|
|
|
|
#map <- process_map(alog, frequency("relative"))
|
|
|
|
## select cases with Vermeer
|
|
length(unique(dat[dat$artwork == "080", "case"]))
|
|
# 12615
|
|
case080 <- unique(dat[dat$artwork == "080", "case"])
|
|
tmp <- dat[dat$case %in% case080, ]
|
|
table(tmp$artwork)
|
|
# --> all :)
|
|
|
|
# select the ones most often (I am aiming for 10...)
|
|
barplot(table(tmp$artwork))
|
|
abline(h = 12000, col = "red")
|
|
which(table(tmp$artwork) > 12000)
|
|
|
|
often080 <- names(which(table(tmp$artwork) > 12000))
|
|
|
|
alog <- activitylog(dat[dat$artwork %in% often080, ],
|
|
case_id = "case",
|
|
activity_id = "artwork",
|
|
resource_id = "trace",
|
|
timestamps = c("start", "complete"))
|
|
|
|
map_as_pdf(alog, file = "../figures/pm_case-artwork_often080.pdf")
|
|
|
|
|
|
#--------------- (3.5) Topics ---------------
|
|
|
|
# Are there certain topics that people are interested in more than others?
|
|
# Do these topic distributions differ for comparable artworks?
|
|
|
|
# Order of topics for Vermeer
|
|
# alog080 <- activitylog(dat[dat$artwork == "080",],
|
|
# case_id = "case",
|
|
# activity_id = "topic",
|
|
# resource_id = "trace",
|
|
# timestamps = c("start", "complete"))
|
|
#
|
|
# map_as_pdf(alog080, file = "../figures/pm_case-topic_080.pdf")
|
|
#
|
|
#
|
|
# alog080 <- activitylog(dat[dat$artwork == "080",],
|
|
# case_id = "case",
|
|
# activity_id = "topicFile",
|
|
# resource_id = "trace",
|
|
# timestamps = c("start", "complete"))
|
|
#
|
|
# #process_map(alog080, frequency("relative"))
|
|
#
|
|
# # Comparable artwork
|
|
# alog083 <- activitylog(dat[dat$artwork == "083",],
|
|
# case_id = "case",
|
|
# activity_id = "topic",
|
|
# resource_id = "trace",
|
|
# timestamps = c("start", "complete"))
|
|
#
|
|
# map_as_pdf(alog083, file = "../figures/pm_case-topic_083.pdf")
|
|
|
|
# artworks that have the same topics than Vermeer
|
|
which(rowSums(xtabs( ~ artwork + topic, dat[dat$topic %in%
|
|
c("artist", "details", "extra info", "komposition",
|
|
"licht und farbe", "thema"), ]) != 0) == 6)
|
|
|
|
#037 046 062 080 083 109
|
|
|
|
for (art in c("037", "046", "062", "080", "083", "109")) {
|
|
|
|
alog <- activitylog(dat[dat$artwork == art,],
|
|
case_id = "case",
|
|
activity_id = "topic",
|
|
resource_id = "trace",
|
|
timestamps = c("start", "complete"))
|
|
|
|
map_as_pdf(alog, file = paste0("../figures/pm_case-topic_", art, ".pdf"))
|
|
}
|
|
|
|
|
|
|
|
# Angewandte Kunst, Graphik, Gemälde, Kultur
|
|
|
|
|
|
c("Kultur", "Kultur", "Graphik", "Gemälde", "Gemälde", "Gemälde",
|
|
"Gemälde", "Gemälde", "Graphik", "Gemälde", "Angewandte Kunst", "",
|
|
"Gemälde", "Angewandte Kunst", "", "", "Graphik", "Angewandte Kunst",
|
|
"Angewandte Kunst", "Gemälde", "Angewandte Kunst", "Gemälde", "",
|
|
"Gemälde", "Gemälde", "Gemälde", "Graphik", "Gemälde", "Gemälde",
|
|
"Gemälde", "", "Angewandte Kunst", "Angewandte Kunst", "Gemälde",
|
|
"Graphik", "Gemälde", "Gemälde", "Gemälde", "Gemälde",
|
|
"Angewandte Kunst", "Gemälde", "Gemälde", "Gemälde", "Kultur", "Kultur",
|
|
"Gemälde", "Kultur", "", "Gemälde", "", "Graphik", "Kultur", "Gemälde",
|
|
"", "Kultur", "Gemälde", "Kultur", "Gemälde", "Gemälde", "Gemälde",
|
|
"Kultur", "Kultur", "Kultur", "Kultur", "Kultur", "Kultur",
|
|
"Angewandte Kunst", "Info", "Info", "Info", "Kultur", "Kultur")
|
|
|
|
|
|
|
|
# BURSTS
|
|
which.max(table(dat$date))
|
|
tmp <- dat[dat$date == "2017-02-12", ]
|
|
|
|
# number of traces per case on 2017-02-12
|
|
rowSums(xtabs( ~ case + trace, tmp) != 0)
|
|
|
|
range(tmp$start)
|
|
startcut <- cut(tmp$start, 7)
|
|
xtabs( ~ case + startcut, tmp)
|
|
|
|
# distribution of cases over the day
|
|
colSums(xtabs( ~ case + startcut, tmp) != 0)
|
|
barplot(colSums(xtabs( ~ case + startcut, tmp) != 0))
|
|
|
|
aggregate(trace ~ case + startcut, tmp, length)
|
|
|
|
|
|
###########################################################################
|
|
# HELPER
|
|
|
|
map_as_pdf <- function(alog, file, type = frequency("relative")) {
|
|
map <- process_map(alog, type = type)
|
|
g <- DiagrammeR::grViz(map$x$diagram) |> DiagrammeRsvg::export_svg() |> charToRaw()
|
|
rsvg::rsvg_pdf(g, file)
|
|
}
|
|
|