diff --git a/code/09_user-navigation.R b/code/09_user-navigation.R index d603201..b78c1f5 100644 --- a/code/09_user-navigation.R +++ b/code/09_user-navigation.R @@ -4,18 +4,18 @@ # (2) Clustering # (3) Fit tree # -# input: results/haum/dataframes_case_2019.RData -# output: results/haum/eventlogs_2019_case-clusters.csv -# results/haum/tmp_user-navigation.RData +# input: results/dataframes_case_2019.RData +# output: results/eventlogs_2019_case-clusters.csv +# results/user-navigation.RData # ../../thesis/figures/data/clustering_cases.RData # -# last mod: 2024-03-15 +# last mod: 2024-03-22 # setwd("C:/Users/nwickelmaier/Nextcloud/Documents/MDS/2023ss/60100_master_thesis/analysis/code") #--------------- (1) Load data --------------- -load("results/haum/dataframes_case_2019.RData") +load("results/dataframes_case_2019.RData") #--------------- (2) Clustering --------------- @@ -119,13 +119,13 @@ aggregate(cbind(duration, distance, scaleSize, rotationDegree) ~ cluster, res, m aggregate(cbind(duration, distance, scaleSize, rotationDegree) ~ cluster, res, median) write.table(res, - file = "results/haum/eventlogs_2019_case-clusters.csv", + file = "results/eventlogs_2019_case-clusters.csv", sep = ";", quote = FALSE, row.names = FALSE) save(res, dist_mat, hcs, acs, coor_2d, coor_3d, - file = "results/haum/tmp_user-navigation.RData") + file = "results/user-navigation.RData") save(coor_2d, coor_3d, cluster, dattree, file = "../../thesis/figures/data/clustering_cases.RData") diff --git a/code/10_validation.R b/code/10_validation.R index 1623f1f..d4f6fbe 100644 --- a/code/10_validation.R +++ b/code/10_validation.R @@ -6,10 +6,10 @@ # (4) Clustering # (5) Fit tree # -# input: results/haum/event_logfiles_2024-02-21_16-07-33.csv -# output: results/haum/eventlogs_pre-corona_case-clusters.csv +# input: results/event_logfiles_2024-02-21_16-07-33.csv +# output: -- # -# last mod: 2024-03-15 +# last mod: 2024-03-22 # setwd("C:/Users/nwickelmaier/Nextcloud/Documents/MDS/2023ss/60100_master_thesis/analysis/code") @@ -17,7 +17,7 @@ source("R_helpers.R") #--------------- (1) Read data --------------- -load("results/haum/eventlogs_pre-corona_cleaned.RData") +load("results/eventlogs_pre-corona_cleaned.RData") # Select one year to handle number of cases dat <- dat[as.Date(dat$date.start) > "2017-12-31" & @@ -77,8 +77,8 @@ dattree18 <- data.frame(case = datcase18$case, ) # centrality <- pbapply::pbsapply(dattree18$case, get_centrality, data = dat) -# save(centrality, file = "results/haum/tmp_centrality_2018.RData") -load("results/haum/tmp_centrality_2018.RData") +# save(centrality, file = "results/centrality_2018.RData") +load("results/centrality_2018.RData") dattree18$BetweenCentrality <- centrality diff --git a/code/11_investigate-variants.R b/code/11_investigate-variants.R index 6f0d650..5fa9422 100644 --- a/code/11_investigate-variants.R +++ b/code/11_investigate-variants.R @@ -3,10 +3,11 @@ # content: (1) Read data # (2) Investigate variants # -# input: results/haum/eventlogs_pre-corona_case-clusters.RData -# output: +# input: results/eventlogs_pre-corona_cleaned.RData +# output: ../../thesis/figures/freq-traces.pdf +# ../../thesis/figures/freq-traces_powerlaw.pdf # -# last mod: 2024-03-12 +# last mod: 2024-03-22 # setwd("C:/Users/nwickelmaier/Nextcloud/Documents/MDS/2023ss/60100_master_thesis/analysis/code") @@ -14,7 +15,7 @@ library(bupaverse) #--------------- (1) Read data --------------- -load("results/haum/eventlogs_pre-corona_cleaned.RData") +load("results/eventlogs_pre-corona_cleaned.RData") #--------------- (2) Investigate variants --------------- @@ -27,7 +28,7 @@ alog <- activitylog(dat, resource_id = "path", timestamps = c("start", "complete")) -pdf("results/figures/freq-traces.pdf", height = 7, width = 6, pointsize = 10) +pdf("../../thesis/figures/freq-traces.pdf", height = 7, width = 6, pointsize = 10) trace_explorer(alog, n_traces = 25) # --> sequences of artworks are just too rare dev.off() @@ -54,7 +55,7 @@ y <- as.numeric(tab) p1 <- lm(log(y) ~ log(x)) pre <- exp(coef(p1)[1]) * x^coef(p1)[2] -pdf("results/figures/freq-traces_powerlaw.pdf", height = 3.375, +pdf("../../thesis/figures/freq-traces_powerlaw.pdf", height = 3.375, width = 3.375, pointsize = 10) par(mai = c(.6,.6,.1,.1), mgp = c(2.4, 1, 0)) diff --git a/code/12_dfgs-case-clusters.R b/code/12_dfgs-case-clusters.R index 038edb5..232b2c6 100644 --- a/code/12_dfgs-case-clusters.R +++ b/code/12_dfgs-case-clusters.R @@ -1,25 +1,28 @@ # 13_dfgs-case-clusters.R # -# content: +# content: (1) Read data +# (2) Export DFGs for clusters # -# input: results/haum/tmp_user-navigation.RData -# output: +# input: results/user-navigation.RData +# output: ../../thesis/figures/dfg_cases_cluster1_R.pdf +# ../../thesis/figures/dfg_cases_cluster2_R.pdf +# ../../thesis/figures/dfg_cases_cluster3_R.pdf +# ../../thesis/figures/dfg_cases_cluster4_R.pdf +# ../../thesis/figures/dfg_cases_cluster5_R.pdf # -# last mod: 2024-03-19 +# last mod: 2024-03-22 # setwd("C:/Users/nwickelmaier/Nextcloud/Documents/MDS/2023ss/60100_master_thesis/analysis/code") -load("results/haum/tmp_user-navigation.RData") +#--------------- (1) Read data --------------- -#dat <- read.table("results/haum/eventlogs_2019_case-clusters.csv", header = TRUE, sep = ";") +load("results/user-navigation.RData") dat <- res dat$start <- as.POSIXct(dat$date.start) dat$complete <- as.POSIXct(dat$date.stop) - - alog <- bupaR::activitylog(dat[dat$cluster == cluster, ], case_id = "case", activity_id = "item", @@ -33,13 +36,9 @@ tab <- table(tr$absolute_frequency) tab[1] / nrow(tr) - alog |> edeaR::filter_infrequent_flows(min_n = 20) |> processmapR::process_map() - - - -## Export DFGs for clusters +#--------------- (2) Export DFGs for clusters --------------- mycols <- c("#3CB4DC", "#FF6900", "#78004B", "#91C86E", "#434F4F") cl_names <- c("Scanning", "Exploring", "Flitting", "Searching", "Info") @@ -63,12 +62,11 @@ for (i in 1:5) { render = FALSE) processmapR::export_map(dfg, - file_name = paste0("results/processmaps/dfg_cases_cluster", i, "_R.pdf"), + file_name = paste0("../../thesis/figures/dfg_cases_cluster", i, "_R.pdf"), file_type = "pdf", title = cl_names[i]) } - # cluster 1: 50 # cluster 2: 30 o. 20 # cluster 3: 20 - 30 @@ -87,7 +85,6 @@ get_percent_variants <- function(log, cluster, min_n) { nrow(alog) } - perc <- numeric(5) for (i in 1:5) {