98 lines
		
	
	
		
			3.0 KiB
		
	
	
	
		
			R
		
	
	
	
	
	
			
		
		
	
	
			98 lines
		
	
	
		
			3.0 KiB
		
	
	
	
		
			R
		
	
	
	
	
	
| # 13_dfgs-case-clusters.R
 | |
| #
 | |
| # content: (1) Read data
 | |
| #          (2) Export DFGs for clusters
 | |
| #
 | |
| # input:  results/user-navigation.RData
 | |
| # output: ../../thesis/figures/dfg_cases_cluster1_R.pdf
 | |
| #         ../../thesis/figures/dfg_cases_cluster2_R.pdf
 | |
| #         ../../thesis/figures/dfg_cases_cluster3_R.pdf
 | |
| #         ../../thesis/figures/dfg_cases_cluster4_R.pdf
 | |
| #         ../../thesis/figures/dfg_cases_cluster5_R.pdf
 | |
| #
 | |
| # last mod: 2024-03-22
 | |
| 
 | |
| # setwd("C:/Users/nwickelmaier/Nextcloud/Documents/MDS/2023ss/60100_master_thesis/analysis/code")
 | |
| 
 | |
| #--------------- (1) Read data ---------------
 | |
| 
 | |
| load("results/user-navigation.RData")
 | |
| 
 | |
| dat <- res
 | |
| 
 | |
| dat$start <- as.POSIXct(dat$date.start)
 | |
| dat$complete <- as.POSIXct(dat$date.stop)
 | |
| 
 | |
| alog <- bupaR::activitylog(dat[dat$cluster == 4, ],
 | |
|                             case_id     = "case",
 | |
|                             activity_id = "item",
 | |
|                             resource_id = "path",
 | |
|                             timestamps  = c("start", "complete"))
 | |
| 
 | |
| processmapR::trace_explorer(alog, n_traces = 25)
 | |
| 
 | |
| tr <- bupaR::traces(alog)
 | |
| tab <- table(tr$absolute_frequency)
 | |
| 
 | |
| tab[1] / nrow(tr)
 | |
| 
 | |
| alog |> edeaR::filter_infrequent_flows(min_n = 5) |> processmapR::process_map()
 | |
| 
 | |
| #--------------- (2) Export DFGs for clusters ---------------
 | |
| 
 | |
| mycols <- c("#3CB4DC", "#FF6900", "#78004B", "#91C86E", "#434F4F")
 | |
| cl_names <- c("Scanning", "Exploring", "Flitting", "Searching", "Info")
 | |
| 
 | |
| ns <- c(30, 20, 10, 5, 30)
 | |
| #ns <- c(20, 20, 20, 5, 20)
 | |
| 
 | |
| for (i in 1:5) {
 | |
| 
 | |
|   alog <- bupaR::activitylog(dat[dat$cluster == i, ],
 | |
|                              case_id     = "case",
 | |
|                              activity_id = "item",
 | |
|                              resource_id = "path",
 | |
|                              timestamps  = c("start", "complete"))
 | |
| 
 | |
|   dfg <- processmapR::process_map(edeaR::filter_infrequent_flows(alog, min_n = ns[i]),
 | |
|     type_nodes = processmapR::frequency("relative", color_scale = "Greys"),
 | |
|     sec_nodes  = processmapR::frequency("absolute"),
 | |
|     type_edges = processmapR::frequency("relative", color_edges = mycols[i]),
 | |
|     sec_edges  = processmapR::frequency("absolute"),
 | |
|     rankdir    = "LR",
 | |
|     render     = FALSE)
 | |
| 
 | |
|   processmapR::export_map(dfg,
 | |
|     file_name = paste0("../../thesis/figures/dfg_cases_cluster", i, "_R.pdf"),
 | |
|     file_type = "pdf",
 | |
|     title     = cl_names[i])
 | |
| }
 | |
| 
 | |
| # What data is used and how many traces are unique
 | |
| 
 | |
| perc_filter <- numeric(5)
 | |
| perc_unqiue <- numeric(5)
 | |
| 
 | |
| for (i in 1:5) {
 | |
| 
 | |
|   alog <- bupaR::activitylog(dat[dat$cluster == i, ],
 | |
|                              case_id     = "case",
 | |
|                              activity_id = "item",
 | |
|                              resource_id = "path",
 | |
|                              timestamps  = c("start", "complete"))
 | |
| 
 | |
|   perc_filter[i] <- nrow(edeaR::filter_infrequent_flows(alog, min_n = ns[i])) /
 | |
|     nrow(alog)
 | |
| 
 | |
|   cs <- bupaR::cases(alog)
 | |
|   infreq_tr <- names(which(table(cs$trace) == 1))
 | |
|   infreq_cs <- cs$case[cs$trace %in% infreq_tr]
 | |
| 
 | |
|   perc_unqiue[i] <- nrow(alog[alog$case %in% infreq_cs, ]) / nrow(alog)
 | |
| 
 | |
| }
 | |
| 
 | |
| save(ns, perc_filter, perc_unqiue,
 | |
|      file = "../../thesis/figures/data/dfgs_case-cluster.RData")
 | |
| 
 |