# 11_investigate-variants.R # # content: (1) Read data # (2) Investigate variants # # input: results/haum/eventlogs_pre-corona_case-clusters.RData # output: # # last mod: 2024-03-12 # setwd("C:/Users/nwickelmaier/Nextcloud/Documents/MDS/2023ss/60100_master_thesis/analysis/code") library(bupaverse) #--------------- (1) Read data --------------- load("results/haum/eventlogs_pre-corona_cleaned.RData") #--------------- (2) Investigate variants --------------- dat$start <- dat$date.start dat$complete <- dat$date.stop alog <- activitylog(dat, case_id = "case", activity_id = "item", resource_id = "path", timestamps = c("start", "complete")) pdf("results/figures/freq-traces.pdf", height = 7, width = 6, pointsize = 10) trace_explorer(alog, n_traces = 25) # --> sequences of artworks are just too rare dev.off() trace_explorer(alog, n_traces = 10, type = "infrequent") tr <- traces(alog) trace_length <- pbapply::pbsapply(strsplit(tr$trace, ","), length) tr[trace_length > 10, ] trace_varied <- pbapply::pbsapply(strsplit(tr$trace, ","), function(x) length(unique(x))) tr[trace_varied > 1, ] table(tr[trace_varied > 2, "absolute_frequency"]) table(tr[trace_varied > 3, "absolute_frequency"]) summary(tr$absolute_frequency) vioplot::vioplot(tr$absolute_frequency) # Power law for frequencies of traces tab <- table(tr$absolute_frequency) x <- as.numeric(tab) y <- as.numeric(names(tab)) plot(x, y, log = "xy") p1 <- lm(log(y) ~ log(x)) pre <- exp(coef(p1)[1]) * x^coef(p1)[2] lines(x, pre) # Look at individual traces as examples tr[trace_varied == 5 & trace_length > 50, ] # --> every variant exists only once, of course