diff --git a/code/00_current-anaylsis.R b/code/00_current-anaylsis.R index ec0fae9..59c9462 100644 --- a/code/00_current-anaylsis.R +++ b/code/00_current-anaylsis.R @@ -5,7 +5,7 @@ # (3) Process Mining # # input: ../data/haum/event_logfiles_glossar_2023-10-29_10-26-42.csv -# output: +# output: # # last mod: 2023-11-02, NW @@ -19,9 +19,37 @@ library(bupaverse) # Overall Research Question: How do museum visitors interact with the # artworks presented on the MTT? +#--------------------------------------------------------------------------- +# https://billster45.github.io/rapid_r_data_vis_book/process-mining.html +# TODO: checkout different specifications for process maps: + +# edeaR::filter_trace_frequency(percentage = 0.9) + +# processmapR::process_map(type_nodes = processmapR::frequency("absolute"), +# sec_nodes = processmapR::frequency("relative"), +# type_edges = processmapR::frequency("absolute"), +# sec_edges = processmapR::frequency("relative"), +# rankdir = "TB") + +# processmapR::trace_explorer(coverage = 1, +# type = "frequent", +# .abbreviate = T) + +# edeaR::resource_frequency("resource-activity") %>% +# plot() + +# edeaR::resource_frequency(level = "case") %>% +# plot() + +# edeaR::resource_specialisation(level = "activity") %>% +# plot() + +# Functions for conformance checking: +# https://bupaverse.github.io/processcheckR/ + +#--------------------------------------------------------------------------- -# What about popups? # Distribution of bursts @@ -93,8 +121,11 @@ sum(dat$fileId.start != dat$fileId.stop, na.rm = TRUE) / nrow(dat) #--------------- (3.1) Check data quality --------------- +dat$trace2 <- dat$trace +dat$trace <- NULL + alog <- activitylog(dat, - case_id = "trace", + case_id = "trace2", activity_id = "event", #resource_id = "case", resource_id = "artwork", @@ -103,6 +134,37 @@ alog <- activitylog(dat, # process_map(alog, frequency("relative")) map_as_pdf(alog, file = "../figures/pm_trace-event.pdf") +alogf <- edeaR::filter_trace_frequency(alog, percentage = 0.9) + +processmapR::process_map(alogf, # alog, + type_nodes = processmapR::frequency("absolute"), + sec_nodes = processmapR::frequency("relative"), + type_edges = processmapR::frequency("absolute"), + sec_edges = processmapR::frequency("relative"), + rankdir = "TB") + +alog_no_move <- alog[alog$event != "move", ] +set.seed(1447) +processmapR::trace_explorer(alog_no_move[alog_no_move$trace2 %in% + sample(unique(alog_no_move$trace2), 400),], + coverage = 1, type = "frequent", + abbreviate = T) + +ra_no_move <- edeaR::resource_frequency(alog_no_move, "resource-activity") +levels(ra_no_move$event) <- c("flipCard", "flipCard", "openTopic", "openPopup") +plot(ra_no_move) + +ra <- edeaR::resource_frequency(alog, "resource-activity") +plot(ra) + +heatmap(xtabs(relative_activity ~ artwork + event, ra)) +heatmap(xtabs(relative_resource ~ artwork + event, ra_no_move)) +heatmap(xtabs(relative_activity ~ artwork + event, ra_no_move)) + +aggregate(relative_activity ~ event, ra, sum) +aggregate(relative_resource ~ artwork, ra, sum) + + #--------------- (3.2) Interactions for different artworks --------------- # Do interaction patterns for events per trace look different for different # artworks? @@ -345,25 +407,25 @@ map_as_pdf(alog, file = "../figures/pm_case-topic.pdf") # activity_id = "topic", # resource_id = "trace", # timestamps = c("start", "complete")) -# +# # map_as_pdf(alog080, file = "../figures/pm_case-topic_080.pdf") -# -# +# +# # alog080 <- activitylog(dat[dat$artwork == "080",], # case_id = "case", # activity_id = "topicFile", # resource_id = "trace", # timestamps = c("start", "complete")) -# +# # #process_map(alog080, frequency("relative")) -# +# # # Comparable artwork # alog083 <- activitylog(dat[dat$artwork == "083",], # case_id = "case", # activity_id = "topic", # resource_id = "trace", # timestamps = c("start", "complete")) -# +# # map_as_pdf(alog083, file = "../figures/pm_case-topic_083.pdf") # artworks that have the same topics than Vermeer @@ -371,7 +433,7 @@ which(rowSums(xtabs( ~ artwork + topic, dat[dat$topic %in% c("artist", "details", "extra info", "komposition", "licht und farbe", "thema"), ]) != 0) == 6) -#037 046 062 080 083 109 +#037 046 062 080 083 109 for (art in c("037", "046", "062", "080", "083", "109")) { @@ -380,7 +442,7 @@ for (art in c("037", "046", "062", "080", "083", "109")) { activity_id = "topic", resource_id = "trace", timestamps = c("start", "complete")) - + map_as_pdf(alog, file = paste0("../figures/pm_case-topic_", art, ".pdf")) } @@ -412,14 +474,28 @@ tmp <- dat[dat$date == "2017-02-12", ] rowSums(xtabs( ~ case + trace, tmp) != 0) range(tmp$start) -startcut <- cut(tmp$start, 7) -xtabs( ~ case + startcut, tmp) +hours <- lubridate::hour(tmp$start) +xtabs( ~ case + hours, tmp) # distribution of cases over the day -colSums(xtabs( ~ case + startcut, tmp) != 0) -barplot(colSums(xtabs( ~ case + startcut, tmp) != 0)) +colSums(xtabs( ~ case + hours, tmp) != 0) +barplot(colSums(xtabs( ~ case + hours, tmp) != 0)) + +aggregate(trace ~ case + hours, tmp, length) + + + + +tmp <- aggregate(trace2 ~ case, dat, length) +tmp$date <- as.Date(dat[!duplicated(dat$case), "start"]) +tmp$time <- lubridate::hour(dat[!duplicated(dat$case), "start"]) + +tmp[tmp$trace2 > 200, ] + +plot(trace2 ~ time, tmp, cex = 2, col = rgb(0,0,0,.3)) + +lattice::barchart(trace2 ~ time, tmp, horizontal=F) -aggregate(trace ~ case + startcut, tmp, length) ########################################################################### diff --git a/code/03_plots_haum.R b/code/03_plots_haum.R index d12b674..34bbaac 100644 --- a/code/03_plots_haum.R +++ b/code/03_plots_haum.R @@ -210,8 +210,8 @@ pdf("../figures/fun_depend_mtt.pdf", width = 8, height = 4, pointsize = 10) foodweb(where = "package:mtt", prune = c("parse_logfiles", "create_eventlogs", "extract_artworks", "extract_topics", "add_topic"), - expand.ybox = 1.8, #cex = .6, - border = TRUE, + #expand.ybox = 1.8, #cex = .6, + #border = TRUE, #boxcolor = "gray", lwd = 2, mai = c(0, 0, 0, 0)) dev.off()