diff --git a/code/12_dfgs-case-clusters.R b/code/12_dfgs-case-clusters.R index a5b2083..574f62c 100644 --- a/code/12_dfgs-case-clusters.R +++ b/code/12_dfgs-case-clusters.R @@ -1,4 +1,4 @@ -# 13_dfgs-case-clusters.R +# 12_dfgs-case-clusters.R # # content: (1) Read data # (2) Export DFGs for clusters @@ -10,7 +10,7 @@ # ../../thesis/figures/dfg_cases_cluster4_R.pdf # ../../thesis/figures/dfg_cases_cluster5_R.pdf # -# last mod: 2024-03-22 +# last mod: 2024-03-27 # setwd("C:/Users/nwickelmaier/Nextcloud/Documents/MDS/2023ss/60100_master_thesis/analysis/code") @@ -53,25 +53,37 @@ for (i in 1:5) { activity_id = "item", resource_id = "path", timestamps = c("start", "complete")) + if (i %in% c(4, 5)) { - dfg <- processmapR::process_map(edeaR::filter_infrequent_flows(alog, min_n = ns[i]), - type_nodes = processmapR::frequency("relative", color_scale = "Greys"), - sec_nodes = processmapR::frequency("absolute"), - type_edges = processmapR::frequency("relative", color_edges = mycols[i]), - sec_edges = processmapR::frequency("absolute"), - rankdir = "LR", - render = FALSE) + dfg <- processmapR::process_map(edeaR::filter_infrequent_flows(alog, min_n = ns[i]), + type_nodes = processmapR::frequency("relative", color_scale = "Greys"), + sec_nodes = processmapR::frequency("absolute"), + type_edges = processmapR::frequency("relative", color_edges = mycols[i]), + sec_edges = processmapR::frequency("absolute"), + render = FALSE) + + } else { + + dfg <- processmapR::process_map(edeaR::filter_infrequent_flows(alog, min_n = ns[i]), + type_nodes = processmapR::frequency("relative", color_scale = "Greys"), + sec_nodes = processmapR::frequency("absolute"), + type_edges = processmapR::frequency("relative", color_edges = mycols[i]), + sec_edges = processmapR::frequency("absolute"), + rankdir = "TB", + render = FALSE) + + } processmapR::export_map(dfg, - file_name = paste0("../../thesis/figures/dfg_cases_cluster", i, "_R.pdf"), - file_type = "pdf", - title = cl_names[i]) + file_name = paste0("../../thesis/figures/dfg_cases_cluster_", cl_names[i], "_R.pdf"), + file_type = "pdf") } # What data is used and how many traces are unique +tr_unique <- numeric(5) perc_filter <- numeric(5) -perc_unqiue <- numeric(5) +n_cases <- numeric(5) for (i in 1:5) { @@ -81,17 +93,25 @@ for (i in 1:5) { resource_id = "path", timestamps = c("start", "complete")) - perc_filter[i] <- nrow(edeaR::filter_infrequent_flows(alog, min_n = ns[i])) / - nrow(alog) - cs <- bupaR::cases(alog) - infreq_tr <- names(which(table(cs$trace) == 1)) - infreq_cs <- cs$case[cs$trace %in% infreq_tr] - perc_unqiue[i] <- nrow(alog[alog$case %in% infreq_cs, ]) / nrow(alog) + cs_filtered <- edeaR::filter_infrequent_flows(alog, min_n = ns[i]) |> + bupaR::cases() + + n_cases[i] <- nrow(cs_filtered) + perc_filter[i] <- n_cases[i] / nrow(cs) + + tr <- bupaR::traces(alog) + + sum_tr <- sum(tr$absolute_frequency == 1) + tr_unique[i] <- sum_tr / nrow(tr) + + #infreq_tr <- names(which(table(cs$trace) == 1)) + #infreq_cs <- cs$case[cs$trace %in% infreq_tr] + #cs_filtered[i] <- length(infreq_cs) / length(cs$case) } -save(ns, perc_filter, perc_unqiue, +save(ns, n_cases, tr_unique, perc_filter, file = "../../thesis/figures/data/dfgs_case-cluster.RData")