Prepared slides for meeting with BB
This commit is contained in:
parent
b81a3e984c
commit
d4e9676dd3
370
code/00_current-anaylsis.R
Normal file
370
code/00_current-anaylsis.R
Normal file
@ -0,0 +1,370 @@
|
||||
# 00_current_analysis.R
|
||||
#
|
||||
# content: (1) Read event log data
|
||||
# (2) Descriptives
|
||||
# (3) Process Mining
|
||||
#
|
||||
# input: ../data/haum/event_logfiles_glossar_2023-10-29_10-26-42.csv
|
||||
# output:
|
||||
#
|
||||
# last mod: 2023-11-02, NW
|
||||
|
||||
# setwd("C:/Users/nwickelmaier/Nextcloud/Documents/MDS/2023ss/60100_master_thesis/code")
|
||||
|
||||
#library(mtt)
|
||||
devtools::load_all("../../../../software/mtt")
|
||||
library(lattice)
|
||||
library(bupaverse)
|
||||
|
||||
# Overall Research Question: How do museum visitors interact with the
|
||||
# artworks presented on the MTT?
|
||||
|
||||
|
||||
|
||||
# What about popups?
|
||||
|
||||
|
||||
# Distribution of bursts
|
||||
# Can this be visualized in a nice way?
|
||||
|
||||
#--------------- (1) Read data ---------------
|
||||
|
||||
dat <- read.table("../data/haum/event_logfiles_glossar_2023-10-29_10-26-42.csv",
|
||||
sep = ";", header = TRUE)
|
||||
dat$date <- as.POSIXct(dat$date)
|
||||
dat$date.start <- as.POSIXct(dat$date.start)
|
||||
dat$date.stop <- as.POSIXct(dat$date.stop)
|
||||
dat$artwork <- sprintf("%03d", dat$artwork)
|
||||
dat$event <- factor(dat$event, levels = c("move", "flipCard", "openTopic", "openPopup"))
|
||||
|
||||
# Add weekdays to data frame
|
||||
|
||||
dat$weekdays <- factor(weekdays(dat$date.start),
|
||||
levels = c("Montag", "Dienstag", "Mittwoch",
|
||||
"Donnerstag", "Freitag", "Samstag",
|
||||
"Sonntag"),
|
||||
labels = c("Monday", "Tuesday", "Wednesday",
|
||||
"Thursday", "Friday", "Saturday",
|
||||
"Sunday"))
|
||||
|
||||
names(dat)[names(dat) %in% c("date.start", "date.stop")] <- c("start", "complete")
|
||||
|
||||
#--------------- (2) Descriptives ---------------
|
||||
# How many events per topic, per trace, ...
|
||||
# How many popups per artwork?
|
||||
|
||||
# Number of events per artwork
|
||||
tab <- xtabs( ~ artwork + event, dat)
|
||||
addmargins(tab)
|
||||
|
||||
proportions(tab, margin = "artwork")
|
||||
proportions(tab, margin = "event")
|
||||
|
||||
barplot(t(proportions(tab, margin = "artwork")), las = 2)
|
||||
|
||||
# Proportion of events
|
||||
proportions(xtabs( ~ event, dat))
|
||||
# Mean proportion of event per trace
|
||||
colMeans(proportions(xtabs( ~ trace + event, dat), margin = "trace"))
|
||||
# Mean proportion of event per artwork
|
||||
colMeans(proportions(tab, margin = "artwork"))
|
||||
|
||||
# Proportion of unclosed events
|
||||
|
||||
nrow(dat[is.na(dat$complete), ])
|
||||
nrow(dat[is.na(dat$complete), ]) / nrow(dat)
|
||||
|
||||
|
||||
#--------------- (3) Process Mining ---------------
|
||||
|
||||
#--------------- (3.1) Check data quality ---------------
|
||||
|
||||
alog <- activitylog(dat,
|
||||
case_id = "trace",
|
||||
activity_id = "event",
|
||||
#resource_id = "case",
|
||||
resource_id = "artwork",
|
||||
timestamps = c("start", "complete"))
|
||||
|
||||
# process_map(alog, frequency("relative"))
|
||||
map_as_pdf(alog, file = "../figures/pm_trace-event.pdf")
|
||||
|
||||
#--------------- (3.2) Interactions for different artworks ---------------
|
||||
# Do interaction patterns for events per trace look different for different
|
||||
# artworks?
|
||||
|
||||
which.max(table(dat$artwork))
|
||||
which.min(table(dat$artwork))
|
||||
which.min(table(dat$artwork)[-c(71,72)])
|
||||
|
||||
alog080 <- activitylog(dat[dat$artwork == "080",],
|
||||
case_id = "trace",
|
||||
activity_id = "event",
|
||||
resource_id = "artwork",
|
||||
timestamps = c("start", "complete"))
|
||||
|
||||
map_as_pdf(alog080, file = "../figures/pm_trace-event_080.pdf")
|
||||
|
||||
alog087 <- activitylog(dat[dat$artwork == "087",],
|
||||
case_id = "trace",
|
||||
activity_id = "event",
|
||||
resource_id = "artwork",
|
||||
timestamps = c("start", "complete"))
|
||||
|
||||
map_as_pdf(alog087, file = "../figures/pm_trace-event_087.pdf")
|
||||
|
||||
alog504 <- activitylog(dat[dat$artwork == "504",],
|
||||
case_id = "trace",
|
||||
activity_id = "event",
|
||||
resource_id = "artwork",
|
||||
timestamps = c("start", "complete"))
|
||||
|
||||
map_as_pdf(alog504, file = "../figures/pm_trace-event_504.pdf")
|
||||
|
||||
#--------------- (3.3) Patterns of cases ---------------
|
||||
|
||||
# What kind of patterns do we have? Are their typical sequences for cases?
|
||||
# Do case patterns look different for ...
|
||||
# ... mornings and afternoons?
|
||||
# ... weekdays and weekends?
|
||||
# ... weekdays for "normal" and school vacation days?
|
||||
# ... pre and post corona?
|
||||
|
||||
alog <- activitylog(dat,
|
||||
case_id = "case",
|
||||
activity_id = "event",
|
||||
resource_id = "trace",
|
||||
timestamps = c("start", "complete"))
|
||||
|
||||
map_as_pdf(alog, file = "../figures/pm_case-event.pdf")
|
||||
|
||||
alog080 <- activitylog(dat[dat$artwork == "080",],
|
||||
case_id = "case",
|
||||
activity_id = "event",
|
||||
resource_id = "trace",
|
||||
timestamps = c("start", "complete"))
|
||||
|
||||
map_as_pdf(alog080, file = "../figures/pm_case-event_080.pdf")
|
||||
|
||||
alog087 <- activitylog(dat[dat$artwork == "087",],
|
||||
case_id = "case",
|
||||
activity_id = "event",
|
||||
resource_id = "trace",
|
||||
timestamps = c("start", "complete"))
|
||||
|
||||
map_as_pdf(alog087, file = "../figures/pm_case-event_087.pdf")
|
||||
|
||||
### Mornings and afternoons
|
||||
|
||||
dat$tod <- ifelse(lubridate::hour(dat$start) > 13, "afternoon", "morning")
|
||||
|
||||
alog <- activitylog(dat[dat$tod == "morning",],
|
||||
case_id = "case",
|
||||
activity_id = "event",
|
||||
resource_id = "trace",
|
||||
timestamps = c("start", "complete"))
|
||||
|
||||
map_as_pdf(alog, file = "../figures/pm_case-event_morning.pdf")
|
||||
|
||||
alog <- activitylog(dat[dat$tod == "afternoon",],
|
||||
case_id = "case",
|
||||
activity_id = "event",
|
||||
resource_id = "trace",
|
||||
timestamps = c("start", "complete"))
|
||||
|
||||
map_as_pdf(alog, file = "../figures/pm_case-event_afternoon.pdf")
|
||||
|
||||
### Weekdays and weekends
|
||||
|
||||
dat$wd <- ifelse(dat$weekdays %in% c("Saturday", "Sunday"), "weekend", "weekday")
|
||||
|
||||
alog <- activitylog(dat[dat$wd == "weekend",],
|
||||
case_id = "case",
|
||||
activity_id = "event",
|
||||
resource_id = "trace",
|
||||
timestamps = c("start", "complete"))
|
||||
|
||||
map_as_pdf(alog, file = "../figures/pm_case-event_weekend.pdf")
|
||||
|
||||
alog <- activitylog(dat[dat$wd == "weekday",],
|
||||
case_id = "case",
|
||||
activity_id = "event",
|
||||
resource_id = "trace",
|
||||
timestamps = c("start", "complete"))
|
||||
|
||||
map_as_pdf(alog, file = "../figures/pm_case-event_weekday.pdf")
|
||||
|
||||
### Weekdays vs. school vacation weekdays
|
||||
|
||||
dat$wds <- ifelse(!is.na(dat$vacation), "vacation", "school")
|
||||
dat$wds[dat$wd == "weekend"] <- NA
|
||||
|
||||
alog <- activitylog(dat[which(dat$wds == "school"),],
|
||||
case_id = "case",
|
||||
activity_id = "event",
|
||||
resource_id = "trace",
|
||||
timestamps = c("start", "complete"))
|
||||
|
||||
map_as_pdf(alog, file = "../figures/pm_case-event_school.pdf")
|
||||
|
||||
alog <- activitylog(dat[which(dat$wds == "vacation"),],
|
||||
case_id = "case",
|
||||
activity_id = "event",
|
||||
resource_id = "trace",
|
||||
timestamps = c("start", "complete"))
|
||||
|
||||
map_as_pdf(alog, file = "../figures/pm_case-event_vacation.pdf")
|
||||
|
||||
### Pre and post Corona
|
||||
|
||||
dat$corona <- ifelse(dat$date < "2020-03-14", "pre", "post")
|
||||
|
||||
alog <- activitylog(dat[which(dat$corona == "pre"),],
|
||||
case_id = "case",
|
||||
activity_id = "event",
|
||||
resource_id = "trace",
|
||||
timestamps = c("start", "complete"))
|
||||
|
||||
map_as_pdf(alog, file = "../figures/pm_case-event_pre-corona.pdf")
|
||||
|
||||
alog <- activitylog(dat[which(dat$corona == "post"),],
|
||||
case_id = "case",
|
||||
activity_id = "event",
|
||||
resource_id = "trace",
|
||||
timestamps = c("start", "complete"))
|
||||
|
||||
map_as_pdf(alog, file = "../figures/pm_case-event_post-corona.pdf")
|
||||
|
||||
#--------------- (3.4) Artwork sequences ---------------
|
||||
# Order in which artworks are looked at
|
||||
|
||||
nart <- 5 # select 5 artworks randomly
|
||||
alog <- activitylog(dat,#[dat$artwork %in% sample(unique(dat$artwork), nart), ],
|
||||
case_id = "case",
|
||||
activity_id = "artwork",
|
||||
resource_id = "trace",
|
||||
timestamps = c("start", "complete"))
|
||||
|
||||
#map <- process_map(alog, frequency("relative"))
|
||||
|
||||
## select cases with Vermeer
|
||||
length(unique(dat[dat$artwork == "080", "case"]))
|
||||
# 12615
|
||||
case080 <- unique(dat[dat$artwork == "080", "case"])
|
||||
tmp <- dat[dat$case %in% case080, ]
|
||||
table(tmp$artwork)
|
||||
# --> all :)
|
||||
|
||||
# select the ones most often (I am aiming for 10...)
|
||||
barplot(table(tmp$artwork))
|
||||
abline(h = 12000, col = "red")
|
||||
which(table(tmp$artwork) > 12000)
|
||||
|
||||
often080 <- names(which(table(tmp$artwork) > 12000))
|
||||
|
||||
alog <- activitylog(dat[dat$artwork %in% often080, ],
|
||||
case_id = "case",
|
||||
activity_id = "artwork",
|
||||
resource_id = "trace",
|
||||
timestamps = c("start", "complete"))
|
||||
|
||||
map_as_pdf(alog, file = "../figures/pm_case-artwork_often080.pdf")
|
||||
|
||||
|
||||
#--------------- (3.5) Topics ---------------
|
||||
|
||||
# Are there certain topics that people are interested in more than others?
|
||||
# Do these topic distributions differ for comparable artworks?
|
||||
|
||||
# Order of topics for Vermeer
|
||||
# alog080 <- activitylog(dat[dat$artwork == "080",],
|
||||
# case_id = "case",
|
||||
# activity_id = "topic",
|
||||
# resource_id = "trace",
|
||||
# timestamps = c("start", "complete"))
|
||||
#
|
||||
# map_as_pdf(alog080, file = "../figures/pm_case-topic_080.pdf")
|
||||
#
|
||||
#
|
||||
# alog080 <- activitylog(dat[dat$artwork == "080",],
|
||||
# case_id = "case",
|
||||
# activity_id = "topicFile",
|
||||
# resource_id = "trace",
|
||||
# timestamps = c("start", "complete"))
|
||||
#
|
||||
# #process_map(alog080, frequency("relative"))
|
||||
#
|
||||
# # Comparable artwork
|
||||
# alog083 <- activitylog(dat[dat$artwork == "083",],
|
||||
# case_id = "case",
|
||||
# activity_id = "topic",
|
||||
# resource_id = "trace",
|
||||
# timestamps = c("start", "complete"))
|
||||
#
|
||||
# map_as_pdf(alog083, file = "../figures/pm_case-topic_083.pdf")
|
||||
|
||||
# artworks that have the same topics than Vermeer
|
||||
which(rowSums(xtabs( ~ artwork + topic, dat[dat$topic %in%
|
||||
c("artist", "details", "extra info", "komposition",
|
||||
"licht und farbe", "thema"), ]) != 0) == 6)
|
||||
|
||||
#037 046 062 080 083 109
|
||||
|
||||
for (art in c("037", "046", "062", "080", "083", "109")) {
|
||||
|
||||
alog <- activitylog(dat[dat$artwork == art,],
|
||||
case_id = "case",
|
||||
activity_id = "topic",
|
||||
resource_id = "trace",
|
||||
timestamps = c("start", "complete"))
|
||||
|
||||
map_as_pdf(alog, file = paste0("../figures/pm_case-topic_", art, ".pdf"))
|
||||
}
|
||||
|
||||
|
||||
|
||||
# Angewandte Kunst, Graphik, Gemälde, Kultur
|
||||
|
||||
|
||||
c("Kultur", "Kultur", "Graphik", "Gemälde", "Gemälde", "Gemälde",
|
||||
"Gemälde", "Gemälde", "Graphik", "Gemälde", "Angewandte Kunst", "",
|
||||
"Gemälde", "Angewandte Kunst", "", "", "Graphik", "Angewandte Kunst",
|
||||
"Angewandte Kunst", "Gemälde", "Angewandte Kunst", "Gemälde", "",
|
||||
"Gemälde", "Gemälde", "Gemälde", "Graphik", "Gemälde", "Gemälde",
|
||||
"Gemälde", "", "Angewandte Kunst", "Angewandte Kunst", "Gemälde",
|
||||
"Graphik", "Gemälde", "Gemälde", "Gemälde", "Gemälde",
|
||||
"Angewandte Kunst", "Gemälde", "Gemälde", "Gemälde", "Kultur", "Kultur",
|
||||
"Gemälde", "Kultur", "", "Gemälde", "", "Graphik", "Kultur", "Gemälde",
|
||||
"", "Kultur", "Gemälde", "Kultur", "Gemälde", "Gemälde", "Gemälde",
|
||||
"Kultur", "Kultur", "Kultur", "Kultur", "Kultur", "Kultur",
|
||||
"Angewandte Kunst", "Info", "Info", "Info", "Kultur", "Kultur")
|
||||
|
||||
|
||||
|
||||
# BURSTS
|
||||
which.max(table(dat$date))
|
||||
tmp <- dat[dat$date == "2017-02-12", ]
|
||||
|
||||
# number of traces per case on 2017-02-12
|
||||
rowSums(xtabs( ~ case + trace, tmp) != 0)
|
||||
|
||||
range(tmp$start)
|
||||
startcut <- cut(tmp$start, 7)
|
||||
xtabs( ~ case + startcut, tmp)
|
||||
|
||||
# distribution of cases over the day
|
||||
colSums(xtabs( ~ case + startcut, tmp) != 0)
|
||||
barplot(colSums(xtabs( ~ case + startcut, tmp) != 0))
|
||||
|
||||
aggregate(trace ~ case + startcut, tmp, length)
|
||||
|
||||
|
||||
###########################################################################
|
||||
# HELPER
|
||||
|
||||
map_as_pdf <- function(alog, file, type = frequency("relative")) {
|
||||
map <- process_map(alog, type = type)
|
||||
g <- DiagrammeR::grViz(map$x$diagram) |> DiagrammeRsvg::export_svg() |> charToRaw()
|
||||
rsvg::rsvg_pdf(g, file)
|
||||
}
|
||||
|
Loading…
Reference in New Issue
Block a user