mtt_haum/code/12_dfgs-case-clusters.R

98 lines
3.0 KiB
R

# 13_dfgs-case-clusters.R
#
# content: (1) Read data
# (2) Export DFGs for clusters
#
# input: results/user-navigation.RData
# output: ../../thesis/figures/dfg_cases_cluster1_R.pdf
# ../../thesis/figures/dfg_cases_cluster2_R.pdf
# ../../thesis/figures/dfg_cases_cluster3_R.pdf
# ../../thesis/figures/dfg_cases_cluster4_R.pdf
# ../../thesis/figures/dfg_cases_cluster5_R.pdf
#
# last mod: 2024-03-22
# setwd("C:/Users/nwickelmaier/Nextcloud/Documents/MDS/2023ss/60100_master_thesis/analysis/code")
#--------------- (1) Read data ---------------
load("results/user-navigation.RData")
dat <- res
dat$start <- as.POSIXct(dat$date.start)
dat$complete <- as.POSIXct(dat$date.stop)
alog <- bupaR::activitylog(dat[dat$cluster == 4, ],
case_id = "case",
activity_id = "item",
resource_id = "path",
timestamps = c("start", "complete"))
processmapR::trace_explorer(alog, n_traces = 25)
tr <- bupaR::traces(alog)
tab <- table(tr$absolute_frequency)
tab[1] / nrow(tr)
alog |> edeaR::filter_infrequent_flows(min_n = 5) |> processmapR::process_map()
#--------------- (2) Export DFGs for clusters ---------------
mycols <- c("#3CB4DC", "#FF6900", "#78004B", "#91C86E", "#434F4F")
cl_names <- c("Scanning", "Exploring", "Flitting", "Searching", "Info")
ns <- c(30, 20, 10, 5, 30)
#ns <- c(20, 20, 20, 5, 20)
for (i in 1:5) {
alog <- bupaR::activitylog(dat[dat$cluster == i, ],
case_id = "case",
activity_id = "item",
resource_id = "path",
timestamps = c("start", "complete"))
dfg <- processmapR::process_map(edeaR::filter_infrequent_flows(alog, min_n = ns[i]),
type_nodes = processmapR::frequency("relative", color_scale = "Greys"),
sec_nodes = processmapR::frequency("absolute"),
type_edges = processmapR::frequency("relative", color_edges = mycols[i]),
sec_edges = processmapR::frequency("absolute"),
rankdir = "LR",
render = FALSE)
processmapR::export_map(dfg,
file_name = paste0("../../thesis/figures/dfg_cases_cluster", i, "_R.pdf"),
file_type = "pdf",
title = cl_names[i])
}
# What data is used and how many traces are unique
perc_filter <- numeric(5)
perc_unqiue <- numeric(5)
for (i in 1:5) {
alog <- bupaR::activitylog(dat[dat$cluster == i, ],
case_id = "case",
activity_id = "item",
resource_id = "path",
timestamps = c("start", "complete"))
perc_filter[i] <- nrow(edeaR::filter_infrequent_flows(alog, min_n = ns[i])) /
nrow(alog)
cs <- bupaR::cases(alog)
infreq_tr <- names(which(table(cs$trace) == 1))
infreq_cs <- cs$case[cs$trace %in% infreq_tr]
perc_unqiue[i] <- nrow(alog[alog$case %in% infreq_cs, ]) / nrow(alog)
}
save(ns, perc_filter, perc_unqiue,
file = "../../thesis/figures/data/dfgs_case-cluster.RData")