Finished talk for meeting with AK

This commit is contained in:
Nora Wickelmaier 2023-11-15 15:54:23 +01:00
parent 3bdb138b69
commit ab064bf22b

View File

@ -19,39 +19,6 @@ library(bupaverse)
# Overall Research Question: How do museum visitors interact with the
# artworks presented on the MTT?
#---------------------------------------------------------------------------
# https://billster45.github.io/rapid_r_data_vis_book/process-mining.html
# TODO: checkout different specifications for process maps:
# edeaR::filter_trace_frequency(percentage = 0.9)
# processmapR::process_map(type_nodes = processmapR::frequency("absolute"),
# sec_nodes = processmapR::frequency("relative"),
# type_edges = processmapR::frequency("absolute"),
# sec_edges = processmapR::frequency("relative"),
# rankdir = "TB")
# processmapR::trace_explorer(coverage = 1,
# type = "frequent",
# .abbreviate = T)
# edeaR::resource_frequency("resource-activity") %>%
# plot()
# edeaR::resource_frequency(level = "case") %>%
# plot()
# edeaR::resource_specialisation(level = "activity") %>%
# plot()
# Functions for conformance checking:
# https://bupaverse.github.io/processcheckR/
#---------------------------------------------------------------------------
# Distribution of bursts
# Can this be visualized in a nice way?
@ -77,6 +44,11 @@ dat$weekdays <- factor(weekdays(dat$date.start),
names(dat)[names(dat) %in% c("date.start", "date.stop")] <- c("start", "complete")
dat$trail <- dat$trace
dat$trace <- NULL
# --> needs to be changed since "trace" is an unbuilt variable name in
# bupar
#--------------- (2) Descriptives ---------------
# How many events per topic, per trace, ...
# How many popups per artwork?
@ -103,7 +75,7 @@ dev.off()
# Proportion of events
proportions(xtabs( ~ event, dat))
# Mean proportion of event per trace
colMeans(proportions(xtabs( ~ trace + event, dat), margin = "trace"))
colMeans(proportions(xtabs( ~ trail + event, dat), margin = "trail"))
# Mean proportion of event per artwork
colMeans(proportions(tab, margin = "artwork"))
@ -121,11 +93,8 @@ sum(dat$fileId.start != dat$fileId.stop, na.rm = TRUE) / nrow(dat)
#--------------- (3.1) Check data quality ---------------
dat$trace2 <- dat$trace
dat$trace <- NULL
alog <- activitylog(dat,
case_id = "trace2",
case_id = "trail",
activity_id = "event",
#resource_id = "case",
resource_id = "artwork",
@ -144,16 +113,23 @@ processmapR::process_map(alogf, # alog,
rankdir = "TB")
alog_no_move <- alog[alog$event != "move", ]
pdf("../figures/traceexplore_trace-event.pdf", height = 8, width = 12, pointsize = 10)
set.seed(1447)
processmapR::trace_explorer(alog_no_move[alog_no_move$trace2 %in%
sample(unique(alog_no_move$trace2), 400),],
processmapR::trace_explorer(alog_no_move[alog_no_move$trail %in%
sample(unique(alog_no_move$trail), 400),],
coverage = 1, type = "frequent",
abbreviate = T)
dev.off()
pdf("../figures/ra_trace-event.pdf", height = 8, width = 12, pointsize = 10)
ra_no_move <- edeaR::resource_frequency(alog_no_move, "resource-activity")
levels(ra_no_move$event) <- c("flipCard", "flipCard", "openTopic", "openPopup")
plot(ra_no_move)
dev.off()
ra <- edeaR::resource_frequency(alog, "resource-activity")
plot(ra)
@ -174,7 +150,7 @@ which.min(table(dat$artwork))
which.min(table(dat$artwork)[-c(71,72)])
alog080 <- activitylog(dat[dat$artwork == "080",],
case_id = "trace",
case_id = "trail",
activity_id = "event",
resource_id = "artwork",
timestamps = c("start", "complete"))
@ -182,7 +158,7 @@ alog080 <- activitylog(dat[dat$artwork == "080",],
map_as_pdf(alog080, file = "../figures/pm_trace-event_080.pdf")
alog087 <- activitylog(dat[dat$artwork == "087",],
case_id = "trace",
case_id = "trail",
activity_id = "event",
resource_id = "artwork",
timestamps = c("start", "complete"))
@ -190,7 +166,7 @@ alog087 <- activitylog(dat[dat$artwork == "087",],
map_as_pdf(alog087, file = "../figures/pm_trace-event_087.pdf")
alog504 <- activitylog(dat[dat$artwork == "504",],
case_id = "trace",
case_id = "trail",
activity_id = "event",
resource_id = "artwork",
timestamps = c("start", "complete"))
@ -209,23 +185,27 @@ map_as_pdf(alog504, file = "../figures/pm_trace-event_504.pdf")
alog <- activitylog(dat,
case_id = "case",
activity_id = "event",
resource_id = "trace",
resource_id = "trail",
timestamps = c("start", "complete"))
map_as_pdf(alog, file = "../figures/pm_case-event.pdf")
alog080 <- activitylog(dat[dat$artwork == "080",],
case_id = "case",
activity_id = "event",
resource_id = "trace",
timestamps = c("start", "complete"))
alog_no_move <- alog[alog$event != "move", ]
pdf("../figures/traceexplore_case-event.pdf", height = 8, width = 12, pointsize = 10)
set.seed(1050)
processmapR::trace_explorer(alog_no_move[alog_no_move$trail %in%
sample(unique(alog_no_move$trail), 300),],
coverage = 1, type = "frequent",
abbreviate = T)
dev.off()
map_as_pdf(alog080, file = "../figures/pm_case-event_080.pdf")
alog087 <- activitylog(dat[dat$artwork == "087",],
case_id = "case",
activity_id = "event",
resource_id = "trace",
resource_id = "trail",
timestamps = c("start", "complete"))
map_as_pdf(alog087, file = "../figures/pm_case-event_087.pdf")
@ -237,7 +217,7 @@ dat$tod <- ifelse(lubridate::hour(dat$start) > 13, "afternoon", "morning")
alog <- activitylog(dat[dat$tod == "morning",],
case_id = "case",
activity_id = "event",
resource_id = "trace",
resource_id = "trail",
timestamps = c("start", "complete"))
map_as_pdf(alog, file = "../figures/pm_case-event_morning.pdf")
@ -245,7 +225,7 @@ map_as_pdf(alog, file = "../figures/pm_case-event_morning.pdf")
alog <- activitylog(dat[dat$tod == "afternoon",],
case_id = "case",
activity_id = "event",
resource_id = "trace",
resource_id = "trail",
timestamps = c("start", "complete"))
map_as_pdf(alog, file = "../figures/pm_case-event_afternoon.pdf")
@ -267,7 +247,7 @@ dat$wd <- ifelse(dat$weekdays %in% c("Saturday", "Sunday"), "weekend", "weekday"
alog <- activitylog(dat[dat$wd == "weekend",],
case_id = "case",
activity_id = "event",
resource_id = "trace",
resource_id = "trail",
timestamps = c("start", "complete"))
map_as_pdf(alog, file = "../figures/pm_case-event_weekend.pdf")
@ -275,7 +255,7 @@ map_as_pdf(alog, file = "../figures/pm_case-event_weekend.pdf")
alog <- activitylog(dat[dat$wd == "weekday",],
case_id = "case",
activity_id = "event",
resource_id = "trace",
resource_id = "trail",
timestamps = c("start", "complete"))
map_as_pdf(alog, file = "../figures/pm_case-event_weekday.pdf")
@ -298,7 +278,7 @@ dat$wds[dat$wd == "weekend"] <- NA
alog <- activitylog(dat[which(dat$wds == "school"),],
case_id = "case",
activity_id = "event",
resource_id = "trace",
resource_id = "trail",
timestamps = c("start", "complete"))
map_as_pdf(alog, file = "../figures/pm_case-event_school.pdf")
@ -306,7 +286,7 @@ map_as_pdf(alog, file = "../figures/pm_case-event_school.pdf")
alog <- activitylog(dat[which(dat$wds == "vacation"),],
case_id = "case",
activity_id = "event",
resource_id = "trace",
resource_id = "trail",
timestamps = c("start", "complete"))
map_as_pdf(alog, file = "../figures/pm_case-event_vacation.pdf")
@ -329,7 +309,7 @@ dat$corona <- ifelse(dat$date < "2020-03-14", "pre", "post")
alog <- activitylog(dat[which(dat$corona == "pre"),],
case_id = "case",
activity_id = "event",
resource_id = "trace",
resource_id = "trail",
timestamps = c("start", "complete"))
map_as_pdf(alog, file = "../figures/pm_case-event_pre-corona.pdf")
@ -337,7 +317,7 @@ map_as_pdf(alog, file = "../figures/pm_case-event_pre-corona.pdf")
alog <- activitylog(dat[which(dat$corona == "post"),],
case_id = "case",
activity_id = "event",
resource_id = "trace",
resource_id = "trail",
timestamps = c("start", "complete"))
map_as_pdf(alog, file = "../figures/pm_case-event_post-corona.pdf")
@ -359,7 +339,7 @@ nart <- 5 # select 5 artworks randomly
alog <- activitylog(dat,#[dat$artwork %in% sample(unique(dat$artwork), nart), ],
case_id = "case",
activity_id = "artwork",
resource_id = "trace",
resource_id = "trail",
timestamps = c("start", "complete"))
#map <- process_map(alog, frequency("relative"))
@ -382,12 +362,20 @@ often080 <- names(which(table(tmp$artwork) > 14000))
alog <- activitylog(dat[dat$artwork %in% often080, ],
case_id = "case",
activity_id = "artwork",
resource_id = "trace",
resource_id = "trail",
timestamps = c("start", "complete"))
map_as_pdf(alog, file = "../figures/pm_case-artwork_often080.pdf")
pdf("../figures/traceexplore_case-artwork_often080.pdf", height = 8, width = 12, pointsize = 10)
processmapR::trace_explorer(alog,
n_traces = 30, type = "frequent",
abbreviate = TRUE)
dev.off()
#--------------- (3.5) Topics ---------------
# Are there certain topics that people are interested in more than others?
@ -396,7 +384,7 @@ map_as_pdf(alog, file = "../figures/pm_case-artwork_often080.pdf")
alog <- activitylog(dat[which(dat$event == "openTopic"),],
case_id = "case",
activity_id = "topic",
resource_id = "trace",
resource_id = "trail",
timestamps = c("start", "complete"))
map_as_pdf(alog, file = "../figures/pm_case-topic.pdf")
@ -405,7 +393,7 @@ map_as_pdf(alog, file = "../figures/pm_case-topic.pdf")
# alog080 <- activitylog(dat[dat$artwork == "080",],
# case_id = "case",
# activity_id = "topic",
# resource_id = "trace",
# resource_id = "trail",
# timestamps = c("start", "complete"))
#
# map_as_pdf(alog080, file = "../figures/pm_case-topic_080.pdf")
@ -414,7 +402,7 @@ map_as_pdf(alog, file = "../figures/pm_case-topic.pdf")
# alog080 <- activitylog(dat[dat$artwork == "080",],
# case_id = "case",
# activity_id = "topicFile",
# resource_id = "trace",
# resource_id = "trail",
# timestamps = c("start", "complete"))
#
# #process_map(alog080, frequency("relative"))
@ -423,7 +411,7 @@ map_as_pdf(alog, file = "../figures/pm_case-topic.pdf")
# alog083 <- activitylog(dat[dat$artwork == "083",],
# case_id = "case",
# activity_id = "topic",
# resource_id = "trace",
# resource_id = "trail",
# timestamps = c("start", "complete"))
#
# map_as_pdf(alog083, file = "../figures/pm_case-topic_083.pdf")
@ -440,7 +428,7 @@ for (art in c("037", "046", "062", "080", "083", "109")) {
alog <- activitylog(dat[dat$event == "openTopic" & dat$artwork == art,],
case_id = "case",
activity_id = "topic",
resource_id = "trace",
resource_id = "trail",
timestamps = c("start", "complete"))
map_as_pdf(alog, file = paste0("../figures/pm_case-topic_", art, ".pdf"))
@ -471,7 +459,7 @@ which.max(table(dat$date))
tmp <- dat[dat$date == "2017-02-12", ]
# number of traces per case on 2017-02-12
rowSums(xtabs( ~ case + trace, tmp) != 0)
rowSums(xtabs( ~ case + trail, tmp) != 0)
range(tmp$start)
hours <- lubridate::hour(tmp$start)
@ -481,20 +469,20 @@ xtabs( ~ case + hours, tmp)
colSums(xtabs( ~ case + hours, tmp) != 0)
barplot(colSums(xtabs( ~ case + hours, tmp) != 0))
aggregate(trace ~ case + hours, tmp, length)
aggregate(trail ~ case + hours, tmp, length)
tmp <- aggregate(trace2 ~ case, dat, length)
tmp <- aggregate(trail ~ case, dat, length)
tmp$date <- as.Date(dat[!duplicated(dat$case), "start"])
tmp$time <- lubridate::hour(dat[!duplicated(dat$case), "start"])
tmp[tmp$trace2 > 200, ]
tmp[tmp$trail > 200, ]
plot(trace2 ~ time, tmp, cex = 2, col = rgb(0,0,0,.3))
plot(trail ~ time, tmp, cex = 2, col = rgb(0,0,0,.3))
lattice::barchart(trace2 ~ time, tmp, horizontal=F)
lattice::barchart(trail ~ time, tmp, horizontal=F)