# 12_dfgs-case-clusters.R # # content: (1) Read data # (2) Export DFGs for clusters # # input: results/user-navigation.RData # output: ../../thesis/figures/dfg_cases_cluster1_R.pdf # ../../thesis/figures/dfg_cases_cluster2_R.pdf # ../../thesis/figures/dfg_cases_cluster3_R.pdf # ../../thesis/figures/dfg_cases_cluster4_R.pdf # ../../thesis/figures/dfg_cases_cluster5_R.pdf # # last mod: 2024-03-30 # setwd("C:/Users/nwickelmaier/Nextcloud/Documents/MDS/2023ss/60100_master_thesis/analysis/code") #--------------- (1) Read data --------------- load("results/user-navigation.RData") dat <- res dat$start <- as.POSIXct(dat$date.start) dat$complete <- as.POSIXct(dat$date.stop) alog <- bupaR::activitylog(dat[dat$cluster == 4, ], case_id = "case", activity_id = "item", resource_id = "path", timestamps = c("start", "complete")) processmapR::trace_explorer(alog, n_traces = 25) tr <- bupaR::traces(alog) tab <- table(tr$absolute_frequency) tab[1] / nrow(tr) alog |> edeaR::filter_infrequent_flows(min_n = 5) |> processmapR::process_map() #--------------- (2) Export DFGs for clusters --------------- mycols <- c("#3CB4DC", "#FF6900", "#78004B", "#91C86E", "#434F4F") cl_names <- c("Scanning", "Exploring", "Flitting", "Searching", "Info") ns <- c(30, 20, 10, 5, 30) #ns <- c(20, 20, 20, 5, 20) for (i in 1:5) { alog <- bupaR::activitylog(dat[dat$cluster == i, ], case_id = "case", activity_id = "item", resource_id = "path", timestamps = c("start", "complete")) if (i %in% c(4, 5)) { dfg <- processmapR::process_map(edeaR::filter_infrequent_flows(alog, min_n = ns[i]), type_nodes = processmapR::frequency("relative", color_scale = "Greys"), sec_nodes = processmapR::frequency("absolute"), type_edges = processmapR::frequency("relative", color_edges = mycols[i]), sec_edges = processmapR::frequency("absolute"), render = FALSE) } else { dfg <- processmapR::process_map(edeaR::filter_infrequent_flows(alog, min_n = ns[i]), type_nodes = processmapR::frequency("relative", color_scale = "Greys"), sec_nodes = processmapR::frequency("absolute"), type_edges = processmapR::frequency("relative", color_edges = mycols[i]), sec_edges = processmapR::frequency("absolute"), rankdir = "TB", render = FALSE) } processmapR::export_map(dfg, file_name = paste0("../../thesis/figures/dfg_cases_cluster_", cl_names[i], "_R.pdf"), file_type = "pdf") } ## Black and white for (i in 1:5) { alog <- bupaR::activitylog(dat[dat$cluster == i, ], case_id = "case", activity_id = "item", resource_id = "path", timestamps = c("start", "complete")) if (i %in% c(4, 5)) { dfg <- processmapR::process_map(edeaR::filter_infrequent_flows(alog, min_n = ns[i]), type_nodes = processmapR::frequency("relative", color_scale = "Greys"), sec_nodes = processmapR::frequency("absolute"), type_edges = processmapR::frequency("relative", color_edges = "black"), sec_edges = processmapR::frequency("absolute"), render = FALSE) } else { dfg <- processmapR::process_map(edeaR::filter_infrequent_flows(alog, min_n = ns[i]), type_nodes = processmapR::frequency("relative", color_scale = "Greys"), sec_nodes = processmapR::frequency("absolute"), type_edges = processmapR::frequency("relative", color_edges = "black"), sec_edges = processmapR::frequency("absolute"), rankdir = "TB", render = FALSE) } processmapR::export_map(dfg, file_name = paste0("../../thesis/figures/dfg_cases_cluster_", cl_names[i], "_R_bw.pdf"), file_type = "pdf") } # What data is used and how many traces are unique tr_unique <- numeric(5) perc_filter <- numeric(5) n_cases <- numeric(5) for (i in 1:5) { alog <- bupaR::activitylog(dat[dat$cluster == i, ], case_id = "case", activity_id = "item", resource_id = "path", timestamps = c("start", "complete")) cs <- bupaR::cases(alog) cs_filtered <- edeaR::filter_infrequent_flows(alog, min_n = ns[i]) |> bupaR::cases() n_cases[i] <- nrow(cs_filtered) perc_filter[i] <- n_cases[i] / nrow(cs) tr <- bupaR::traces(alog) sum_tr <- sum(tr$absolute_frequency == 1) tr_unique[i] <- sum_tr / nrow(tr) #infreq_tr <- names(which(table(cs$trace) == 1)) #infreq_cs <- cs$case[cs$trace %in% infreq_tr] #cs_filtered[i] <- length(infreq_cs) / length(cs$case) } save(ns, n_cases, tr_unique, perc_filter, file = "../../thesis/figures/data/dfgs_case-cluster.RData")