Updated DFGs for case clusters; exported data for tables

2024-03-27 10:07:36 +01:00 · 2024-03-27 10:07:36 +01:00 · 42cb777d36
commit 42cb777d36
parent 48a4bcbcdb
1 changed files with 20 additions and 16 deletions
--- a/code/12_dfgs-case-clusters.R
+++ b/code/12_dfgs-case-clusters.R
@ -23,7 +23,7 @@ dat <- res
 dat$start <- as.POSIXct(dat$date.start)
 dat$complete <- as.POSIXct(dat$date.stop)
-alog <- bupaR::activitylog(dat[dat$cluster == cluster, ],
+alog <- bupaR::activitylog(dat[dat$cluster == 4, ],
                            case_id     = "case",
                            activity_id = "item",
                            resource_id = "path",
@ -36,7 +36,7 @@ tab <- table(tr$absolute_frequency)
 tab[1] / nrow(tr)
-alog |> edeaR::filter_infrequent_flows(min_n = 20) |> processmapR::process_map()
+alog |> edeaR::filter_infrequent_flows(min_n = 5) |> processmapR::process_map()
 #--------------- (2) Export DFGs for clusters ---------------
@ -44,6 +44,7 @@ mycols <- c("#3CB4DC", "#FF6900", "#78004B", "#91C86E", "#434F4F")
 cl_names <- c("Scanning", "Exploring", "Flitting", "Searching", "Info")
 ns <- c(30, 20, 10, 5, 30)
 #ns <- c(20, 20, 20, 5, 20)
 for (i in 1:5) {
@ -52,7 +53,7 @@ for (i in 1:5) {
                             activity_id = "item",
                             resource_id = "path",
                             timestamps  = c("start", "complete"))
-  
+
  dfg <- processmapR::process_map(edeaR::filter_infrequent_flows(alog, min_n = ns[i]),
    type_nodes = processmapR::frequency("relative", color_scale = "Greys"),
    sec_nodes  = processmapR::frequency("absolute"),
@ -67,27 +68,30 @@ for (i in 1:5) {
    title     = cl_names[i])
 }
-# cluster 1: 50
+# What data is used and how many traces are unique
 # cluster 2: 30 o. 20
 # cluster 3: 20 - 30
 # cluster 4: 5
 # cluster 5: 20
-get_percent_variants <- function(log, cluster, min_n) {
+perc_filter <- numeric(5)
 perc_unqiue <- numeric(5)
-  alog <- bupaR::activitylog(log[log$cluster == cluster, ],
+for (i in 1:5) {
  alog <- bupaR::activitylog(dat[dat$cluster == i, ],
                             case_id     = "case",
                             activity_id = "item",
                             resource_id = "path",
                             timestamps  = c("start", "complete"))
-  nrow(edeaR::filter_infrequent_flows(alog, min_n = min_n)) /
+  perc_filter[i] <- nrow(edeaR::filter_infrequent_flows(alog, min_n = ns[i])) /
    nrow(alog)
  cs <- bupaR::cases(alog)
  infreq_tr <- names(which(table(cs$trace) == 1))
  infreq_cs <- cs$case[cs$trace %in% infreq_tr]
  perc_unqiue[i] <- nrow(alog[alog$case %in% infreq_cs, ]) / nrow(alog)
 }
-perc <- numeric(5)
+save(ns, perc_filter, perc_unqiue,
-
+     file = "../../thesis/figures/data/dfgs_case-cluster.RData")
 for (i in 1:5) {
  perc[i] <- get_percent_variants(log = dat, cluster = i, min_n = ns[i])
 }