153 lines
4.9 KiB
R
153 lines
4.9 KiB
R
# 12_dfgs-case-clusters.R
|
|
#
|
|
# content: (1) Read data
|
|
# (2) Export DFGs for clusters
|
|
#
|
|
# input: results/user-navigation.RData
|
|
# output: ../thesis/figures/dfg_cases_cluster1_R.pdf
|
|
# ../thesis/figures/dfg_cases_cluster2_R.pdf
|
|
# ../thesis/figures/dfg_cases_cluster3_R.pdf
|
|
# ../thesis/figures/dfg_cases_cluster4_R.pdf
|
|
# ../thesis/figures/dfg_cases_cluster5_R.pdf
|
|
# ../thesis/results/dfgs_case-cluster.RData
|
|
#
|
|
# last mod: 2024-04-17
|
|
|
|
# setwd("C:/Users/nwickelmaier/Nextcloud/Documents/MDS/2023ss/60100_master_thesis/analysis/")
|
|
|
|
#--------------- (1) Read data ---------------
|
|
|
|
load("results/user-navigation.RData")
|
|
|
|
dat <- res
|
|
|
|
dat$start <- as.POSIXct(dat$date.start)
|
|
dat$complete <- as.POSIXct(dat$date.stop)
|
|
|
|
alog <- bupaR::activitylog(dat[dat$cluster == 4, ],
|
|
case_id = "case",
|
|
activity_id = "item",
|
|
resource_id = "path",
|
|
timestamps = c("start", "complete"))
|
|
|
|
processmapR::trace_explorer(alog, n_traces = 25)
|
|
|
|
tr <- bupaR::traces(alog)
|
|
tab <- table(tr$absolute_frequency)
|
|
|
|
tab[1] / nrow(tr)
|
|
|
|
alog |> edeaR::filter_infrequent_flows(min_n = 5) |> processmapR::process_map()
|
|
|
|
#--------------- (2) Export DFGs for clusters ---------------
|
|
|
|
mycols <- c("#3CB4DC", "#FF6900", "#78004B", "#91C86E", "#434F4F")
|
|
cl_names <- c("Scanning", "Exploring", "Flitting", "Searching", "Info")
|
|
|
|
ns <- c(30, 20, 10, 5, 30)
|
|
#ns <- c(20, 20, 20, 5, 20)
|
|
|
|
for (i in 1:5) {
|
|
|
|
alog <- bupaR::activitylog(dat[dat$cluster == i, ],
|
|
case_id = "case",
|
|
activity_id = "item",
|
|
resource_id = "path",
|
|
timestamps = c("start", "complete"))
|
|
if (i %in% c(4, 5)) {
|
|
|
|
dfg <- processmapR::process_map(edeaR::filter_infrequent_flows(alog, min_n = ns[i]),
|
|
type_nodes = processmapR::frequency("relative", color_scale = "Greys"),
|
|
sec_nodes = processmapR::frequency("absolute"),
|
|
type_edges = processmapR::frequency("relative", color_edges = mycols[i]),
|
|
sec_edges = processmapR::frequency("absolute"),
|
|
render = FALSE)
|
|
|
|
} else {
|
|
|
|
dfg <- processmapR::process_map(edeaR::filter_infrequent_flows(alog, min_n = ns[i]),
|
|
type_nodes = processmapR::frequency("relative", color_scale = "Greys"),
|
|
sec_nodes = processmapR::frequency("absolute"),
|
|
type_edges = processmapR::frequency("relative", color_edges = mycols[i]),
|
|
sec_edges = processmapR::frequency("absolute"),
|
|
rankdir = "TB",
|
|
render = FALSE)
|
|
|
|
}
|
|
|
|
processmapR::export_map(dfg,
|
|
file_name = paste0("../thesis/figures/dfg_cases_cluster_", cl_names[i], "_R.pdf"),
|
|
file_type = "pdf")
|
|
}
|
|
|
|
## Black and white
|
|
for (i in 1:5) {
|
|
|
|
alog <- bupaR::activitylog(dat[dat$cluster == i, ],
|
|
case_id = "case",
|
|
activity_id = "item",
|
|
resource_id = "path",
|
|
timestamps = c("start", "complete"))
|
|
if (i %in% c(4, 5)) {
|
|
|
|
dfg <- processmapR::process_map(edeaR::filter_infrequent_flows(alog, min_n = ns[i]),
|
|
type_nodes = processmapR::frequency("relative", color_scale = "Greys"),
|
|
sec_nodes = processmapR::frequency("absolute"),
|
|
type_edges = processmapR::frequency("relative", color_edges = "black"),
|
|
sec_edges = processmapR::frequency("absolute"),
|
|
render = FALSE)
|
|
|
|
} else {
|
|
|
|
dfg <- processmapR::process_map(edeaR::filter_infrequent_flows(alog, min_n = ns[i]),
|
|
type_nodes = processmapR::frequency("relative", color_scale = "Greys"),
|
|
sec_nodes = processmapR::frequency("absolute"),
|
|
type_edges = processmapR::frequency("relative", color_edges = "black"),
|
|
sec_edges = processmapR::frequency("absolute"),
|
|
rankdir = "TB",
|
|
render = FALSE)
|
|
|
|
}
|
|
|
|
processmapR::export_map(dfg,
|
|
file_name = paste0("../thesis/figures/dfg_cases_cluster_", cl_names[i], "_R_bw.pdf"),
|
|
file_type = "pdf")
|
|
}
|
|
|
|
# What data is used and how many traces are unique
|
|
|
|
tr_unique <- numeric(5)
|
|
perc_filter <- numeric(5)
|
|
n_cases <- numeric(5)
|
|
|
|
for (i in 1:5) {
|
|
|
|
alog <- bupaR::activitylog(dat[dat$cluster == i, ],
|
|
case_id = "case",
|
|
activity_id = "item",
|
|
resource_id = "path",
|
|
timestamps = c("start", "complete"))
|
|
|
|
cs <- bupaR::cases(alog)
|
|
|
|
cs_filtered <- edeaR::filter_infrequent_flows(alog, min_n = ns[i]) |>
|
|
bupaR::cases()
|
|
|
|
n_cases[i] <- nrow(cs_filtered)
|
|
perc_filter[i] <- n_cases[i] / nrow(cs)
|
|
|
|
tr <- bupaR::traces(alog)
|
|
|
|
sum_tr <- sum(tr$absolute_frequency == 1)
|
|
tr_unique[i] <- sum_tr / nrow(tr)
|
|
|
|
#infreq_tr <- names(which(table(cs$trace) == 1))
|
|
#infreq_cs <- cs$case[cs$trace %in% infreq_tr]
|
|
#cs_filtered[i] <- length(infreq_cs) / length(cs$case)
|
|
|
|
}
|
|
|
|
save(ns, n_cases, tr_unique, perc_filter,
|
|
file = "../thesis/results/dfgs_case-cluster.RData")
|
|
|