Finished talk for meeting with AK
This commit is contained in:
parent
3bdb138b69
commit
ab064bf22b
@ -19,39 +19,6 @@ library(bupaverse)
|
||||
# Overall Research Question: How do museum visitors interact with the
|
||||
# artworks presented on the MTT?
|
||||
|
||||
#---------------------------------------------------------------------------
|
||||
# https://billster45.github.io/rapid_r_data_vis_book/process-mining.html
|
||||
# TODO: checkout different specifications for process maps:
|
||||
|
||||
# edeaR::filter_trace_frequency(percentage = 0.9)
|
||||
|
||||
# processmapR::process_map(type_nodes = processmapR::frequency("absolute"),
|
||||
# sec_nodes = processmapR::frequency("relative"),
|
||||
# type_edges = processmapR::frequency("absolute"),
|
||||
# sec_edges = processmapR::frequency("relative"),
|
||||
# rankdir = "TB")
|
||||
|
||||
# processmapR::trace_explorer(coverage = 1,
|
||||
# type = "frequent",
|
||||
# .abbreviate = T)
|
||||
|
||||
# edeaR::resource_frequency("resource-activity") %>%
|
||||
# plot()
|
||||
|
||||
# edeaR::resource_frequency(level = "case") %>%
|
||||
# plot()
|
||||
|
||||
# edeaR::resource_specialisation(level = "activity") %>%
|
||||
# plot()
|
||||
|
||||
# Functions for conformance checking:
|
||||
# https://bupaverse.github.io/processcheckR/
|
||||
|
||||
#---------------------------------------------------------------------------
|
||||
|
||||
|
||||
|
||||
|
||||
# Distribution of bursts
|
||||
# Can this be visualized in a nice way?
|
||||
|
||||
@ -77,6 +44,11 @@ dat$weekdays <- factor(weekdays(dat$date.start),
|
||||
|
||||
names(dat)[names(dat) %in% c("date.start", "date.stop")] <- c("start", "complete")
|
||||
|
||||
dat$trail <- dat$trace
|
||||
dat$trace <- NULL
|
||||
# --> needs to be changed since "trace" is an unbuilt variable name in
|
||||
# bupar
|
||||
|
||||
#--------------- (2) Descriptives ---------------
|
||||
# How many events per topic, per trace, ...
|
||||
# How many popups per artwork?
|
||||
@ -103,7 +75,7 @@ dev.off()
|
||||
# Proportion of events
|
||||
proportions(xtabs( ~ event, dat))
|
||||
# Mean proportion of event per trace
|
||||
colMeans(proportions(xtabs( ~ trace + event, dat), margin = "trace"))
|
||||
colMeans(proportions(xtabs( ~ trail + event, dat), margin = "trail"))
|
||||
# Mean proportion of event per artwork
|
||||
colMeans(proportions(tab, margin = "artwork"))
|
||||
|
||||
@ -121,11 +93,8 @@ sum(dat$fileId.start != dat$fileId.stop, na.rm = TRUE) / nrow(dat)
|
||||
|
||||
#--------------- (3.1) Check data quality ---------------
|
||||
|
||||
dat$trace2 <- dat$trace
|
||||
dat$trace <- NULL
|
||||
|
||||
alog <- activitylog(dat,
|
||||
case_id = "trace2",
|
||||
case_id = "trail",
|
||||
activity_id = "event",
|
||||
#resource_id = "case",
|
||||
resource_id = "artwork",
|
||||
@ -144,16 +113,23 @@ processmapR::process_map(alogf, # alog,
|
||||
rankdir = "TB")
|
||||
|
||||
alog_no_move <- alog[alog$event != "move", ]
|
||||
|
||||
pdf("../figures/traceexplore_trace-event.pdf", height = 8, width = 12, pointsize = 10)
|
||||
set.seed(1447)
|
||||
processmapR::trace_explorer(alog_no_move[alog_no_move$trace2 %in%
|
||||
sample(unique(alog_no_move$trace2), 400),],
|
||||
processmapR::trace_explorer(alog_no_move[alog_no_move$trail %in%
|
||||
sample(unique(alog_no_move$trail), 400),],
|
||||
coverage = 1, type = "frequent",
|
||||
abbreviate = T)
|
||||
dev.off()
|
||||
|
||||
pdf("../figures/ra_trace-event.pdf", height = 8, width = 12, pointsize = 10)
|
||||
|
||||
ra_no_move <- edeaR::resource_frequency(alog_no_move, "resource-activity")
|
||||
levels(ra_no_move$event) <- c("flipCard", "flipCard", "openTopic", "openPopup")
|
||||
plot(ra_no_move)
|
||||
|
||||
dev.off()
|
||||
|
||||
ra <- edeaR::resource_frequency(alog, "resource-activity")
|
||||
plot(ra)
|
||||
|
||||
@ -174,7 +150,7 @@ which.min(table(dat$artwork))
|
||||
which.min(table(dat$artwork)[-c(71,72)])
|
||||
|
||||
alog080 <- activitylog(dat[dat$artwork == "080",],
|
||||
case_id = "trace",
|
||||
case_id = "trail",
|
||||
activity_id = "event",
|
||||
resource_id = "artwork",
|
||||
timestamps = c("start", "complete"))
|
||||
@ -182,7 +158,7 @@ alog080 <- activitylog(dat[dat$artwork == "080",],
|
||||
map_as_pdf(alog080, file = "../figures/pm_trace-event_080.pdf")
|
||||
|
||||
alog087 <- activitylog(dat[dat$artwork == "087",],
|
||||
case_id = "trace",
|
||||
case_id = "trail",
|
||||
activity_id = "event",
|
||||
resource_id = "artwork",
|
||||
timestamps = c("start", "complete"))
|
||||
@ -190,7 +166,7 @@ alog087 <- activitylog(dat[dat$artwork == "087",],
|
||||
map_as_pdf(alog087, file = "../figures/pm_trace-event_087.pdf")
|
||||
|
||||
alog504 <- activitylog(dat[dat$artwork == "504",],
|
||||
case_id = "trace",
|
||||
case_id = "trail",
|
||||
activity_id = "event",
|
||||
resource_id = "artwork",
|
||||
timestamps = c("start", "complete"))
|
||||
@ -209,23 +185,27 @@ map_as_pdf(alog504, file = "../figures/pm_trace-event_504.pdf")
|
||||
alog <- activitylog(dat,
|
||||
case_id = "case",
|
||||
activity_id = "event",
|
||||
resource_id = "trace",
|
||||
resource_id = "trail",
|
||||
timestamps = c("start", "complete"))
|
||||
|
||||
map_as_pdf(alog, file = "../figures/pm_case-event.pdf")
|
||||
|
||||
alog080 <- activitylog(dat[dat$artwork == "080",],
|
||||
case_id = "case",
|
||||
activity_id = "event",
|
||||
resource_id = "trace",
|
||||
timestamps = c("start", "complete"))
|
||||
alog_no_move <- alog[alog$event != "move", ]
|
||||
|
||||
pdf("../figures/traceexplore_case-event.pdf", height = 8, width = 12, pointsize = 10)
|
||||
set.seed(1050)
|
||||
processmapR::trace_explorer(alog_no_move[alog_no_move$trail %in%
|
||||
sample(unique(alog_no_move$trail), 300),],
|
||||
coverage = 1, type = "frequent",
|
||||
abbreviate = T)
|
||||
dev.off()
|
||||
|
||||
map_as_pdf(alog080, file = "../figures/pm_case-event_080.pdf")
|
||||
|
||||
alog087 <- activitylog(dat[dat$artwork == "087",],
|
||||
case_id = "case",
|
||||
activity_id = "event",
|
||||
resource_id = "trace",
|
||||
resource_id = "trail",
|
||||
timestamps = c("start", "complete"))
|
||||
|
||||
map_as_pdf(alog087, file = "../figures/pm_case-event_087.pdf")
|
||||
@ -237,7 +217,7 @@ dat$tod <- ifelse(lubridate::hour(dat$start) > 13, "afternoon", "morning")
|
||||
alog <- activitylog(dat[dat$tod == "morning",],
|
||||
case_id = "case",
|
||||
activity_id = "event",
|
||||
resource_id = "trace",
|
||||
resource_id = "trail",
|
||||
timestamps = c("start", "complete"))
|
||||
|
||||
map_as_pdf(alog, file = "../figures/pm_case-event_morning.pdf")
|
||||
@ -245,7 +225,7 @@ map_as_pdf(alog, file = "../figures/pm_case-event_morning.pdf")
|
||||
alog <- activitylog(dat[dat$tod == "afternoon",],
|
||||
case_id = "case",
|
||||
activity_id = "event",
|
||||
resource_id = "trace",
|
||||
resource_id = "trail",
|
||||
timestamps = c("start", "complete"))
|
||||
|
||||
map_as_pdf(alog, file = "../figures/pm_case-event_afternoon.pdf")
|
||||
@ -267,7 +247,7 @@ dat$wd <- ifelse(dat$weekdays %in% c("Saturday", "Sunday"), "weekend", "weekday"
|
||||
alog <- activitylog(dat[dat$wd == "weekend",],
|
||||
case_id = "case",
|
||||
activity_id = "event",
|
||||
resource_id = "trace",
|
||||
resource_id = "trail",
|
||||
timestamps = c("start", "complete"))
|
||||
|
||||
map_as_pdf(alog, file = "../figures/pm_case-event_weekend.pdf")
|
||||
@ -275,7 +255,7 @@ map_as_pdf(alog, file = "../figures/pm_case-event_weekend.pdf")
|
||||
alog <- activitylog(dat[dat$wd == "weekday",],
|
||||
case_id = "case",
|
||||
activity_id = "event",
|
||||
resource_id = "trace",
|
||||
resource_id = "trail",
|
||||
timestamps = c("start", "complete"))
|
||||
|
||||
map_as_pdf(alog, file = "../figures/pm_case-event_weekday.pdf")
|
||||
@ -298,7 +278,7 @@ dat$wds[dat$wd == "weekend"] <- NA
|
||||
alog <- activitylog(dat[which(dat$wds == "school"),],
|
||||
case_id = "case",
|
||||
activity_id = "event",
|
||||
resource_id = "trace",
|
||||
resource_id = "trail",
|
||||
timestamps = c("start", "complete"))
|
||||
|
||||
map_as_pdf(alog, file = "../figures/pm_case-event_school.pdf")
|
||||
@ -306,7 +286,7 @@ map_as_pdf(alog, file = "../figures/pm_case-event_school.pdf")
|
||||
alog <- activitylog(dat[which(dat$wds == "vacation"),],
|
||||
case_id = "case",
|
||||
activity_id = "event",
|
||||
resource_id = "trace",
|
||||
resource_id = "trail",
|
||||
timestamps = c("start", "complete"))
|
||||
|
||||
map_as_pdf(alog, file = "../figures/pm_case-event_vacation.pdf")
|
||||
@ -329,7 +309,7 @@ dat$corona <- ifelse(dat$date < "2020-03-14", "pre", "post")
|
||||
alog <- activitylog(dat[which(dat$corona == "pre"),],
|
||||
case_id = "case",
|
||||
activity_id = "event",
|
||||
resource_id = "trace",
|
||||
resource_id = "trail",
|
||||
timestamps = c("start", "complete"))
|
||||
|
||||
map_as_pdf(alog, file = "../figures/pm_case-event_pre-corona.pdf")
|
||||
@ -337,7 +317,7 @@ map_as_pdf(alog, file = "../figures/pm_case-event_pre-corona.pdf")
|
||||
alog <- activitylog(dat[which(dat$corona == "post"),],
|
||||
case_id = "case",
|
||||
activity_id = "event",
|
||||
resource_id = "trace",
|
||||
resource_id = "trail",
|
||||
timestamps = c("start", "complete"))
|
||||
|
||||
map_as_pdf(alog, file = "../figures/pm_case-event_post-corona.pdf")
|
||||
@ -359,7 +339,7 @@ nart <- 5 # select 5 artworks randomly
|
||||
alog <- activitylog(dat,#[dat$artwork %in% sample(unique(dat$artwork), nart), ],
|
||||
case_id = "case",
|
||||
activity_id = "artwork",
|
||||
resource_id = "trace",
|
||||
resource_id = "trail",
|
||||
timestamps = c("start", "complete"))
|
||||
|
||||
#map <- process_map(alog, frequency("relative"))
|
||||
@ -382,12 +362,20 @@ often080 <- names(which(table(tmp$artwork) > 14000))
|
||||
alog <- activitylog(dat[dat$artwork %in% often080, ],
|
||||
case_id = "case",
|
||||
activity_id = "artwork",
|
||||
resource_id = "trace",
|
||||
resource_id = "trail",
|
||||
timestamps = c("start", "complete"))
|
||||
|
||||
map_as_pdf(alog, file = "../figures/pm_case-artwork_often080.pdf")
|
||||
|
||||
|
||||
pdf("../figures/traceexplore_case-artwork_often080.pdf", height = 8, width = 12, pointsize = 10)
|
||||
|
||||
processmapR::trace_explorer(alog,
|
||||
n_traces = 30, type = "frequent",
|
||||
abbreviate = TRUE)
|
||||
|
||||
dev.off()
|
||||
|
||||
#--------------- (3.5) Topics ---------------
|
||||
|
||||
# Are there certain topics that people are interested in more than others?
|
||||
@ -396,7 +384,7 @@ map_as_pdf(alog, file = "../figures/pm_case-artwork_often080.pdf")
|
||||
alog <- activitylog(dat[which(dat$event == "openTopic"),],
|
||||
case_id = "case",
|
||||
activity_id = "topic",
|
||||
resource_id = "trace",
|
||||
resource_id = "trail",
|
||||
timestamps = c("start", "complete"))
|
||||
|
||||
map_as_pdf(alog, file = "../figures/pm_case-topic.pdf")
|
||||
@ -405,7 +393,7 @@ map_as_pdf(alog, file = "../figures/pm_case-topic.pdf")
|
||||
# alog080 <- activitylog(dat[dat$artwork == "080",],
|
||||
# case_id = "case",
|
||||
# activity_id = "topic",
|
||||
# resource_id = "trace",
|
||||
# resource_id = "trail",
|
||||
# timestamps = c("start", "complete"))
|
||||
#
|
||||
# map_as_pdf(alog080, file = "../figures/pm_case-topic_080.pdf")
|
||||
@ -414,7 +402,7 @@ map_as_pdf(alog, file = "../figures/pm_case-topic.pdf")
|
||||
# alog080 <- activitylog(dat[dat$artwork == "080",],
|
||||
# case_id = "case",
|
||||
# activity_id = "topicFile",
|
||||
# resource_id = "trace",
|
||||
# resource_id = "trail",
|
||||
# timestamps = c("start", "complete"))
|
||||
#
|
||||
# #process_map(alog080, frequency("relative"))
|
||||
@ -423,7 +411,7 @@ map_as_pdf(alog, file = "../figures/pm_case-topic.pdf")
|
||||
# alog083 <- activitylog(dat[dat$artwork == "083",],
|
||||
# case_id = "case",
|
||||
# activity_id = "topic",
|
||||
# resource_id = "trace",
|
||||
# resource_id = "trail",
|
||||
# timestamps = c("start", "complete"))
|
||||
#
|
||||
# map_as_pdf(alog083, file = "../figures/pm_case-topic_083.pdf")
|
||||
@ -440,7 +428,7 @@ for (art in c("037", "046", "062", "080", "083", "109")) {
|
||||
alog <- activitylog(dat[dat$event == "openTopic" & dat$artwork == art,],
|
||||
case_id = "case",
|
||||
activity_id = "topic",
|
||||
resource_id = "trace",
|
||||
resource_id = "trail",
|
||||
timestamps = c("start", "complete"))
|
||||
|
||||
map_as_pdf(alog, file = paste0("../figures/pm_case-topic_", art, ".pdf"))
|
||||
@ -471,7 +459,7 @@ which.max(table(dat$date))
|
||||
tmp <- dat[dat$date == "2017-02-12", ]
|
||||
|
||||
# number of traces per case on 2017-02-12
|
||||
rowSums(xtabs( ~ case + trace, tmp) != 0)
|
||||
rowSums(xtabs( ~ case + trail, tmp) != 0)
|
||||
|
||||
range(tmp$start)
|
||||
hours <- lubridate::hour(tmp$start)
|
||||
@ -481,20 +469,20 @@ xtabs( ~ case + hours, tmp)
|
||||
colSums(xtabs( ~ case + hours, tmp) != 0)
|
||||
barplot(colSums(xtabs( ~ case + hours, tmp) != 0))
|
||||
|
||||
aggregate(trace ~ case + hours, tmp, length)
|
||||
aggregate(trail ~ case + hours, tmp, length)
|
||||
|
||||
|
||||
|
||||
|
||||
tmp <- aggregate(trace2 ~ case, dat, length)
|
||||
tmp <- aggregate(trail ~ case, dat, length)
|
||||
tmp$date <- as.Date(dat[!duplicated(dat$case), "start"])
|
||||
tmp$time <- lubridate::hour(dat[!duplicated(dat$case), "start"])
|
||||
|
||||
tmp[tmp$trace2 > 200, ]
|
||||
tmp[tmp$trail > 200, ]
|
||||
|
||||
plot(trace2 ~ time, tmp, cex = 2, col = rgb(0,0,0,.3))
|
||||
plot(trail ~ time, tmp, cex = 2, col = rgb(0,0,0,.3))
|
||||
|
||||
lattice::barchart(trace2 ~ time, tmp, horizontal=F)
|
||||
lattice::barchart(trail ~ time, tmp, horizontal=F)
|
||||
|
||||
|
||||
|
||||
|
Loading…
Reference in New Issue
Block a user