# 11_investigate-variants.R # # content: (1) Read data # (2) Extract characteristics for cases # (3) Select features for navigation behavior # (4) Export data frames # # input: results/haum/event_logfiles_2024-02-21_16-07-33.csv # output: results/haum/eventlogs_pre-corona_case-clusters.csv # # last mod: 2024-03-08 # setwd("C:/Users/nwickelmaier/Nextcloud/Documents/MDS/2023ss/60100_master_thesis/analysis/code") #--------------- (1) Read data --------------- load("results/haum/eventlogs_pre-corona_cleaned.RData") #--------------- (4) Investigate variants --------------- res$start <- res$date.start res$complete <- res$date.stop alog <- activitylog(res, case_id = "case", activity_id = "item", resource_id = "path", timestamps = c("start", "complete")) trace_explorer(alog, n_traces = 25) # --> sequences of artworks are just too rare tr <- traces(alog) trace_length <- pbapply::pbsapply(strsplit(tr$trace, ","), length) tr[trace_length > 10, ] trace_varied <- pbapply::pbsapply(strsplit(tr$trace, ","), function(x) length(unique(x))) tr[trace_varied > 1, ] table(tr[trace_varied > 2, "absolute_frequency"]) table(tr[trace_varied > 3, "absolute_frequency"]) summary(tr$absolute_frequency) vioplot::vioplot(tr$absolute_frequency) # Power law for frequencies of traces tab <- table(tr$absolute_frequency) x <- as.numeric(tab) y <- as.numeric(names(tab)) plot(x, y, log = "xy") p1 <- lm(log(y) ~ log(x)) pre <- exp(coef(p1)[1]) * x^coef(p1)[2] lines(x, pre) # Look at individual traces as examples tr[trace_varied == 5 & trace_length > 50, ] # --> every variant exists only once, of course datcase[datcase$nitems == 5 & datcase$length > 50,] pbapply::pbsapply(datcase[, -c(1, 9)], median) #ex <- datcase[datcase$nitems == 4 & datcase$length == 15,] ex <- datcase[datcase$nitems == 5,] ex <- ex[sample(1:nrow(ex), 20), ] # --> pretty randomly chosen... TODO: case_ids <- NULL for (case in ex$case) { if ("080" %in% res$item[res$case == case] | "503" %in% res$item[res$case == case]) { case_ids <- c(case_ids, TRUE) } else { case_ids <- c(case_ids, FALSE) } } cases <- ex$case[case_ids] for (case in cases) { alog <- activitylog(res[res$case == case, ], case_id = "case", activity_id = "item", resource_id = "path", timestamps = c("start", "complete")) dfg <- process_map(alog, type_nodes = frequency("absolute", color_scale = "Greys"), type_edges = frequency("absolute", color_edges = "#FF6900"), rankdir = "LR", render = FALSE) export_map(dfg, file_name = paste0("results/processmaps/dfg_example_cases_", case, "_R.pdf"), file_type = "pdf", title = paste("Case", case)) }