From ab064bf22be06869d85a5fbf11d9058231b7310f Mon Sep 17 00:00:00 2001 From: nwickel Date: Wed, 15 Nov 2023 15:54:23 +0100 Subject: [PATCH] Finished talk for meeting with AK --- code/00_current-anaylsis.R | 130 +++++++++++++++++-------------------- 1 file changed, 59 insertions(+), 71 deletions(-) diff --git a/code/00_current-anaylsis.R b/code/00_current-anaylsis.R index 59c9462..e62e88f 100644 --- a/code/00_current-anaylsis.R +++ b/code/00_current-anaylsis.R @@ -19,39 +19,6 @@ library(bupaverse) # Overall Research Question: How do museum visitors interact with the # artworks presented on the MTT? -#--------------------------------------------------------------------------- -# https://billster45.github.io/rapid_r_data_vis_book/process-mining.html -# TODO: checkout different specifications for process maps: - -# edeaR::filter_trace_frequency(percentage = 0.9) - -# processmapR::process_map(type_nodes = processmapR::frequency("absolute"), -# sec_nodes = processmapR::frequency("relative"), -# type_edges = processmapR::frequency("absolute"), -# sec_edges = processmapR::frequency("relative"), -# rankdir = "TB") - -# processmapR::trace_explorer(coverage = 1, -# type = "frequent", -# .abbreviate = T) - -# edeaR::resource_frequency("resource-activity") %>% -# plot() - -# edeaR::resource_frequency(level = "case") %>% -# plot() - -# edeaR::resource_specialisation(level = "activity") %>% -# plot() - -# Functions for conformance checking: -# https://bupaverse.github.io/processcheckR/ - -#--------------------------------------------------------------------------- - - - - # Distribution of bursts # Can this be visualized in a nice way? @@ -77,6 +44,11 @@ dat$weekdays <- factor(weekdays(dat$date.start), names(dat)[names(dat) %in% c("date.start", "date.stop")] <- c("start", "complete") +dat$trail <- dat$trace +dat$trace <- NULL +# --> needs to be changed since "trace" is an unbuilt variable name in +# bupar + #--------------- (2) Descriptives --------------- # How many events per topic, per trace, ... # How many popups per artwork? @@ -103,7 +75,7 @@ dev.off() # Proportion of events proportions(xtabs( ~ event, dat)) # Mean proportion of event per trace -colMeans(proportions(xtabs( ~ trace + event, dat), margin = "trace")) +colMeans(proportions(xtabs( ~ trail + event, dat), margin = "trail")) # Mean proportion of event per artwork colMeans(proportions(tab, margin = "artwork")) @@ -121,11 +93,8 @@ sum(dat$fileId.start != dat$fileId.stop, na.rm = TRUE) / nrow(dat) #--------------- (3.1) Check data quality --------------- -dat$trace2 <- dat$trace -dat$trace <- NULL - alog <- activitylog(dat, - case_id = "trace2", + case_id = "trail", activity_id = "event", #resource_id = "case", resource_id = "artwork", @@ -144,16 +113,23 @@ processmapR::process_map(alogf, # alog, rankdir = "TB") alog_no_move <- alog[alog$event != "move", ] + +pdf("../figures/traceexplore_trace-event.pdf", height = 8, width = 12, pointsize = 10) set.seed(1447) -processmapR::trace_explorer(alog_no_move[alog_no_move$trace2 %in% - sample(unique(alog_no_move$trace2), 400),], +processmapR::trace_explorer(alog_no_move[alog_no_move$trail %in% + sample(unique(alog_no_move$trail), 400),], coverage = 1, type = "frequent", abbreviate = T) +dev.off() + +pdf("../figures/ra_trace-event.pdf", height = 8, width = 12, pointsize = 10) ra_no_move <- edeaR::resource_frequency(alog_no_move, "resource-activity") levels(ra_no_move$event) <- c("flipCard", "flipCard", "openTopic", "openPopup") plot(ra_no_move) +dev.off() + ra <- edeaR::resource_frequency(alog, "resource-activity") plot(ra) @@ -174,7 +150,7 @@ which.min(table(dat$artwork)) which.min(table(dat$artwork)[-c(71,72)]) alog080 <- activitylog(dat[dat$artwork == "080",], - case_id = "trace", + case_id = "trail", activity_id = "event", resource_id = "artwork", timestamps = c("start", "complete")) @@ -182,7 +158,7 @@ alog080 <- activitylog(dat[dat$artwork == "080",], map_as_pdf(alog080, file = "../figures/pm_trace-event_080.pdf") alog087 <- activitylog(dat[dat$artwork == "087",], - case_id = "trace", + case_id = "trail", activity_id = "event", resource_id = "artwork", timestamps = c("start", "complete")) @@ -190,7 +166,7 @@ alog087 <- activitylog(dat[dat$artwork == "087",], map_as_pdf(alog087, file = "../figures/pm_trace-event_087.pdf") alog504 <- activitylog(dat[dat$artwork == "504",], - case_id = "trace", + case_id = "trail", activity_id = "event", resource_id = "artwork", timestamps = c("start", "complete")) @@ -209,23 +185,27 @@ map_as_pdf(alog504, file = "../figures/pm_trace-event_504.pdf") alog <- activitylog(dat, case_id = "case", activity_id = "event", - resource_id = "trace", + resource_id = "trail", timestamps = c("start", "complete")) map_as_pdf(alog, file = "../figures/pm_case-event.pdf") -alog080 <- activitylog(dat[dat$artwork == "080",], - case_id = "case", - activity_id = "event", - resource_id = "trace", - timestamps = c("start", "complete")) +alog_no_move <- alog[alog$event != "move", ] + +pdf("../figures/traceexplore_case-event.pdf", height = 8, width = 12, pointsize = 10) +set.seed(1050) +processmapR::trace_explorer(alog_no_move[alog_no_move$trail %in% + sample(unique(alog_no_move$trail), 300),], + coverage = 1, type = "frequent", + abbreviate = T) +dev.off() map_as_pdf(alog080, file = "../figures/pm_case-event_080.pdf") alog087 <- activitylog(dat[dat$artwork == "087",], case_id = "case", activity_id = "event", - resource_id = "trace", + resource_id = "trail", timestamps = c("start", "complete")) map_as_pdf(alog087, file = "../figures/pm_case-event_087.pdf") @@ -237,7 +217,7 @@ dat$tod <- ifelse(lubridate::hour(dat$start) > 13, "afternoon", "morning") alog <- activitylog(dat[dat$tod == "morning",], case_id = "case", activity_id = "event", - resource_id = "trace", + resource_id = "trail", timestamps = c("start", "complete")) map_as_pdf(alog, file = "../figures/pm_case-event_morning.pdf") @@ -245,7 +225,7 @@ map_as_pdf(alog, file = "../figures/pm_case-event_morning.pdf") alog <- activitylog(dat[dat$tod == "afternoon",], case_id = "case", activity_id = "event", - resource_id = "trace", + resource_id = "trail", timestamps = c("start", "complete")) map_as_pdf(alog, file = "../figures/pm_case-event_afternoon.pdf") @@ -267,7 +247,7 @@ dat$wd <- ifelse(dat$weekdays %in% c("Saturday", "Sunday"), "weekend", "weekday" alog <- activitylog(dat[dat$wd == "weekend",], case_id = "case", activity_id = "event", - resource_id = "trace", + resource_id = "trail", timestamps = c("start", "complete")) map_as_pdf(alog, file = "../figures/pm_case-event_weekend.pdf") @@ -275,7 +255,7 @@ map_as_pdf(alog, file = "../figures/pm_case-event_weekend.pdf") alog <- activitylog(dat[dat$wd == "weekday",], case_id = "case", activity_id = "event", - resource_id = "trace", + resource_id = "trail", timestamps = c("start", "complete")) map_as_pdf(alog, file = "../figures/pm_case-event_weekday.pdf") @@ -298,7 +278,7 @@ dat$wds[dat$wd == "weekend"] <- NA alog <- activitylog(dat[which(dat$wds == "school"),], case_id = "case", activity_id = "event", - resource_id = "trace", + resource_id = "trail", timestamps = c("start", "complete")) map_as_pdf(alog, file = "../figures/pm_case-event_school.pdf") @@ -306,7 +286,7 @@ map_as_pdf(alog, file = "../figures/pm_case-event_school.pdf") alog <- activitylog(dat[which(dat$wds == "vacation"),], case_id = "case", activity_id = "event", - resource_id = "trace", + resource_id = "trail", timestamps = c("start", "complete")) map_as_pdf(alog, file = "../figures/pm_case-event_vacation.pdf") @@ -329,7 +309,7 @@ dat$corona <- ifelse(dat$date < "2020-03-14", "pre", "post") alog <- activitylog(dat[which(dat$corona == "pre"),], case_id = "case", activity_id = "event", - resource_id = "trace", + resource_id = "trail", timestamps = c("start", "complete")) map_as_pdf(alog, file = "../figures/pm_case-event_pre-corona.pdf") @@ -337,7 +317,7 @@ map_as_pdf(alog, file = "../figures/pm_case-event_pre-corona.pdf") alog <- activitylog(dat[which(dat$corona == "post"),], case_id = "case", activity_id = "event", - resource_id = "trace", + resource_id = "trail", timestamps = c("start", "complete")) map_as_pdf(alog, file = "../figures/pm_case-event_post-corona.pdf") @@ -359,7 +339,7 @@ nart <- 5 # select 5 artworks randomly alog <- activitylog(dat,#[dat$artwork %in% sample(unique(dat$artwork), nart), ], case_id = "case", activity_id = "artwork", - resource_id = "trace", + resource_id = "trail", timestamps = c("start", "complete")) #map <- process_map(alog, frequency("relative")) @@ -382,12 +362,20 @@ often080 <- names(which(table(tmp$artwork) > 14000)) alog <- activitylog(dat[dat$artwork %in% often080, ], case_id = "case", activity_id = "artwork", - resource_id = "trace", + resource_id = "trail", timestamps = c("start", "complete")) map_as_pdf(alog, file = "../figures/pm_case-artwork_often080.pdf") +pdf("../figures/traceexplore_case-artwork_often080.pdf", height = 8, width = 12, pointsize = 10) + +processmapR::trace_explorer(alog, + n_traces = 30, type = "frequent", + abbreviate = TRUE) + +dev.off() + #--------------- (3.5) Topics --------------- # Are there certain topics that people are interested in more than others? @@ -396,7 +384,7 @@ map_as_pdf(alog, file = "../figures/pm_case-artwork_often080.pdf") alog <- activitylog(dat[which(dat$event == "openTopic"),], case_id = "case", activity_id = "topic", - resource_id = "trace", + resource_id = "trail", timestamps = c("start", "complete")) map_as_pdf(alog, file = "../figures/pm_case-topic.pdf") @@ -405,7 +393,7 @@ map_as_pdf(alog, file = "../figures/pm_case-topic.pdf") # alog080 <- activitylog(dat[dat$artwork == "080",], # case_id = "case", # activity_id = "topic", -# resource_id = "trace", +# resource_id = "trail", # timestamps = c("start", "complete")) # # map_as_pdf(alog080, file = "../figures/pm_case-topic_080.pdf") @@ -414,7 +402,7 @@ map_as_pdf(alog, file = "../figures/pm_case-topic.pdf") # alog080 <- activitylog(dat[dat$artwork == "080",], # case_id = "case", # activity_id = "topicFile", -# resource_id = "trace", +# resource_id = "trail", # timestamps = c("start", "complete")) # # #process_map(alog080, frequency("relative")) @@ -423,7 +411,7 @@ map_as_pdf(alog, file = "../figures/pm_case-topic.pdf") # alog083 <- activitylog(dat[dat$artwork == "083",], # case_id = "case", # activity_id = "topic", -# resource_id = "trace", +# resource_id = "trail", # timestamps = c("start", "complete")) # # map_as_pdf(alog083, file = "../figures/pm_case-topic_083.pdf") @@ -440,7 +428,7 @@ for (art in c("037", "046", "062", "080", "083", "109")) { alog <- activitylog(dat[dat$event == "openTopic" & dat$artwork == art,], case_id = "case", activity_id = "topic", - resource_id = "trace", + resource_id = "trail", timestamps = c("start", "complete")) map_as_pdf(alog, file = paste0("../figures/pm_case-topic_", art, ".pdf")) @@ -471,7 +459,7 @@ which.max(table(dat$date)) tmp <- dat[dat$date == "2017-02-12", ] # number of traces per case on 2017-02-12 -rowSums(xtabs( ~ case + trace, tmp) != 0) +rowSums(xtabs( ~ case + trail, tmp) != 0) range(tmp$start) hours <- lubridate::hour(tmp$start) @@ -481,20 +469,20 @@ xtabs( ~ case + hours, tmp) colSums(xtabs( ~ case + hours, tmp) != 0) barplot(colSums(xtabs( ~ case + hours, tmp) != 0)) -aggregate(trace ~ case + hours, tmp, length) +aggregate(trail ~ case + hours, tmp, length) -tmp <- aggregate(trace2 ~ case, dat, length) +tmp <- aggregate(trail ~ case, dat, length) tmp$date <- as.Date(dat[!duplicated(dat$case), "start"]) tmp$time <- lubridate::hour(dat[!duplicated(dat$case), "start"]) -tmp[tmp$trace2 > 200, ] +tmp[tmp$trail > 200, ] -plot(trace2 ~ time, tmp, cex = 2, col = rgb(0,0,0,.3)) +plot(trail ~ time, tmp, cex = 2, col = rgb(0,0,0,.3)) -lattice::barchart(trace2 ~ time, tmp, horizontal=F) +lattice::barchart(trail ~ time, tmp, horizontal=F)