From 42cb777d36299e93988bc2bcbba01483d7da1cb4 Mon Sep 17 00:00:00 2001 From: nwickel Date: Wed, 27 Mar 2024 10:07:36 +0100 Subject: [PATCH] Updated DFGs for case clusters; exported data for tables --- code/12_dfgs-case-clusters.R | 36 ++++++++++++++++++++---------------- 1 file changed, 20 insertions(+), 16 deletions(-) diff --git a/code/12_dfgs-case-clusters.R b/code/12_dfgs-case-clusters.R index 232b2c6..a5b2083 100644 --- a/code/12_dfgs-case-clusters.R +++ b/code/12_dfgs-case-clusters.R @@ -23,7 +23,7 @@ dat <- res dat$start <- as.POSIXct(dat$date.start) dat$complete <- as.POSIXct(dat$date.stop) -alog <- bupaR::activitylog(dat[dat$cluster == cluster, ], +alog <- bupaR::activitylog(dat[dat$cluster == 4, ], case_id = "case", activity_id = "item", resource_id = "path", @@ -36,7 +36,7 @@ tab <- table(tr$absolute_frequency) tab[1] / nrow(tr) -alog |> edeaR::filter_infrequent_flows(min_n = 20) |> processmapR::process_map() +alog |> edeaR::filter_infrequent_flows(min_n = 5) |> processmapR::process_map() #--------------- (2) Export DFGs for clusters --------------- @@ -44,6 +44,7 @@ mycols <- c("#3CB4DC", "#FF6900", "#78004B", "#91C86E", "#434F4F") cl_names <- c("Scanning", "Exploring", "Flitting", "Searching", "Info") ns <- c(30, 20, 10, 5, 30) +#ns <- c(20, 20, 20, 5, 20) for (i in 1:5) { @@ -52,7 +53,7 @@ for (i in 1:5) { activity_id = "item", resource_id = "path", timestamps = c("start", "complete")) - + dfg <- processmapR::process_map(edeaR::filter_infrequent_flows(alog, min_n = ns[i]), type_nodes = processmapR::frequency("relative", color_scale = "Greys"), sec_nodes = processmapR::frequency("absolute"), @@ -67,27 +68,30 @@ for (i in 1:5) { title = cl_names[i]) } -# cluster 1: 50 -# cluster 2: 30 o. 20 -# cluster 3: 20 - 30 -# cluster 4: 5 -# cluster 5: 20 +# What data is used and how many traces are unique -get_percent_variants <- function(log, cluster, min_n) { +perc_filter <- numeric(5) +perc_unqiue <- numeric(5) - alog <- bupaR::activitylog(log[log$cluster == cluster, ], +for (i in 1:5) { + + alog <- bupaR::activitylog(dat[dat$cluster == i, ], case_id = "case", activity_id = "item", resource_id = "path", timestamps = c("start", "complete")) - nrow(edeaR::filter_infrequent_flows(alog, min_n = min_n)) / + perc_filter[i] <- nrow(edeaR::filter_infrequent_flows(alog, min_n = ns[i])) / nrow(alog) + + cs <- bupaR::cases(alog) + infreq_tr <- names(which(table(cs$trace) == 1)) + infreq_cs <- cs$case[cs$trace %in% infreq_tr] + + perc_unqiue[i] <- nrow(alog[alog$case %in% infreq_cs, ]) / nrow(alog) + } -perc <- numeric(5) - -for (i in 1:5) { - perc[i] <- get_percent_variants(log = dat, cluster = i, min_n = ns[i]) -} +save(ns, perc_filter, perc_unqiue, + file = "../../thesis/figures/data/dfgs_case-cluster.RData")