More updates and some error fixing on the DFGs for case clusters

This commit is contained in:
Nora Wickelmaier 2024-03-27 15:48:58 +01:00
parent 42cb777d36
commit 07a64d8c59
1 changed files with 40 additions and 20 deletions

View File

@ -1,4 +1,4 @@
# 13_dfgs-case-clusters.R
# 12_dfgs-case-clusters.R
#
# content: (1) Read data
# (2) Export DFGs for clusters
@ -10,7 +10,7 @@
# ../../thesis/figures/dfg_cases_cluster4_R.pdf
# ../../thesis/figures/dfg_cases_cluster5_R.pdf
#
# last mod: 2024-03-22
# last mod: 2024-03-27
# setwd("C:/Users/nwickelmaier/Nextcloud/Documents/MDS/2023ss/60100_master_thesis/analysis/code")
@ -53,25 +53,37 @@ for (i in 1:5) {
activity_id = "item",
resource_id = "path",
timestamps = c("start", "complete"))
if (i %in% c(4, 5)) {
dfg <- processmapR::process_map(edeaR::filter_infrequent_flows(alog, min_n = ns[i]),
type_nodes = processmapR::frequency("relative", color_scale = "Greys"),
sec_nodes = processmapR::frequency("absolute"),
type_edges = processmapR::frequency("relative", color_edges = mycols[i]),
sec_edges = processmapR::frequency("absolute"),
rankdir = "LR",
render = FALSE)
dfg <- processmapR::process_map(edeaR::filter_infrequent_flows(alog, min_n = ns[i]),
type_nodes = processmapR::frequency("relative", color_scale = "Greys"),
sec_nodes = processmapR::frequency("absolute"),
type_edges = processmapR::frequency("relative", color_edges = mycols[i]),
sec_edges = processmapR::frequency("absolute"),
render = FALSE)
} else {
dfg <- processmapR::process_map(edeaR::filter_infrequent_flows(alog, min_n = ns[i]),
type_nodes = processmapR::frequency("relative", color_scale = "Greys"),
sec_nodes = processmapR::frequency("absolute"),
type_edges = processmapR::frequency("relative", color_edges = mycols[i]),
sec_edges = processmapR::frequency("absolute"),
rankdir = "TB",
render = FALSE)
}
processmapR::export_map(dfg,
file_name = paste0("../../thesis/figures/dfg_cases_cluster", i, "_R.pdf"),
file_type = "pdf",
title = cl_names[i])
file_name = paste0("../../thesis/figures/dfg_cases_cluster_", cl_names[i], "_R.pdf"),
file_type = "pdf")
}
# What data is used and how many traces are unique
tr_unique <- numeric(5)
perc_filter <- numeric(5)
perc_unqiue <- numeric(5)
n_cases <- numeric(5)
for (i in 1:5) {
@ -81,17 +93,25 @@ for (i in 1:5) {
resource_id = "path",
timestamps = c("start", "complete"))
perc_filter[i] <- nrow(edeaR::filter_infrequent_flows(alog, min_n = ns[i])) /
nrow(alog)
cs <- bupaR::cases(alog)
infreq_tr <- names(which(table(cs$trace) == 1))
infreq_cs <- cs$case[cs$trace %in% infreq_tr]
perc_unqiue[i] <- nrow(alog[alog$case %in% infreq_cs, ]) / nrow(alog)
cs_filtered <- edeaR::filter_infrequent_flows(alog, min_n = ns[i]) |>
bupaR::cases()
n_cases[i] <- nrow(cs_filtered)
perc_filter[i] <- n_cases[i] / nrow(cs)
tr <- bupaR::traces(alog)
sum_tr <- sum(tr$absolute_frequency == 1)
tr_unique[i] <- sum_tr / nrow(tr)
#infreq_tr <- names(which(table(cs$trace) == 1))
#infreq_cs <- cs$case[cs$trace %in% infreq_tr]
#cs_filtered[i] <- length(infreq_cs) / length(cs$case)
}
save(ns, perc_filter, perc_unqiue,
save(ns, n_cases, tr_unique, perc_filter,
file = "../../thesis/figures/data/dfgs_case-cluster.RData")