# 13_dfgs-case-clusters.R # # content: # # input: results/haum/tmp_user-navigation.RData # output: # # last mod: 2024-03-19 # setwd("C:/Users/nwickelmaier/Nextcloud/Documents/MDS/2023ss/60100_master_thesis/analysis/code") load("results/haum/tmp_user-navigation.RData") #dat <- read.table("results/haum/eventlogs_2019_case-clusters.csv", header = TRUE, sep = ";") dat <- res dat$start <- as.POSIXct(dat$date.start) dat$complete <- as.POSIXct(dat$date.stop) alog <- bupaR::activitylog(dat[dat$cluster == cluster, ], case_id = "case", activity_id = "item", resource_id = "path", timestamps = c("start", "complete")) processmapR::trace_explorer(alog, n_traces = 25) tr <- bupaR::traces(alog) tab <- table(tr$absolute_frequency) tab[1] / nrow(tr) alog |> edeaR::filter_infrequent_flows(min_n = 20) |> processmapR::process_map() ## Export DFGs for clusters mycols <- c("#3CB4DC", "#FF6900", "#78004B", "#91C86E", "#434F4F") cl_names <- c("Scanning", "Exploring", "Flitting", "Searching", "Info") ns <- c(30, 20, 10, 5, 30) for (i in 1:5) { alog <- bupaR::activitylog(dat[dat$cluster == i, ], case_id = "case", activity_id = "item", resource_id = "path", timestamps = c("start", "complete")) dfg <- processmapR::process_map(edeaR::filter_infrequent_flows(alog, min_n = ns[i]), type_nodes = processmapR::frequency("relative", color_scale = "Greys"), sec_nodes = processmapR::frequency("absolute"), type_edges = processmapR::frequency("relative", color_edges = mycols[i]), sec_edges = processmapR::frequency("absolute"), rankdir = "LR", render = FALSE) processmapR::export_map(dfg, file_name = paste0("results/processmaps/dfg_cases_cluster", i, "_R.pdf"), file_type = "pdf", title = cl_names[i]) } # cluster 1: 50 # cluster 2: 30 o. 20 # cluster 3: 20 - 30 # cluster 4: 5 # cluster 5: 20 get_percent_variants <- function(log, cluster, min_n) { alog <- bupaR::activitylog(log[log$cluster == cluster, ], case_id = "case", activity_id = "item", resource_id = "path", timestamps = c("start", "complete")) nrow(edeaR::filter_infrequent_flows(alog, min_n = min_n)) / nrow(alog) } perc <- numeric(5) for (i in 1:5) { perc[i] <- get_percent_variants(log = dat, cluster = i, min_n = ns[i]) }