diff --git a/code/11_investigate-variants.R b/code/11_investigate-variants.R index 5fae799..8a12322 100644 --- a/code/11_investigate-variants.R +++ b/code/11_investigate-variants.R @@ -1,34 +1,38 @@ # 11_investigate-variants.R # # content: (1) Read data -# (2) Extract characteristics for cases -# (3) Select features for navigation behavior -# (4) Export data frames +# (2) Investigate variants # -# input: results/haum/event_logfiles_2024-02-21_16-07-33.csv -# output: results/haum/eventlogs_pre-corona_case-clusters.csv +# input: results/haum/eventlogs_pre-corona_case-clusters.RData +# output: # -# last mod: 2024-03-08 +# last mod: 2024-03-12 # setwd("C:/Users/nwickelmaier/Nextcloud/Documents/MDS/2023ss/60100_master_thesis/analysis/code") +library(bupaverse) + #--------------- (1) Read data --------------- load("results/haum/eventlogs_pre-corona_cleaned.RData") -#--------------- (4) Investigate variants --------------- +#--------------- (2) Investigate variants --------------- -res$start <- res$date.start -res$complete <- res$date.stop +dat$start <- dat$date.start +dat$complete <- dat$date.stop -alog <- activitylog(res, +alog <- activitylog(dat, case_id = "case", activity_id = "item", resource_id = "path", timestamps = c("start", "complete")) +pdf("results/figures/freq-traces.pdf", height = 7, width = 6, pointsize = 10) trace_explorer(alog, n_traces = 25) # --> sequences of artworks are just too rare +dev.off() + +trace_explorer(alog, n_traces = 10, type = "infrequent") tr <- traces(alog) trace_length <- pbapply::pbsapply(strsplit(tr$trace, ","), length) @@ -52,50 +56,7 @@ p1 <- lm(log(y) ~ log(x)) pre <- exp(coef(p1)[1]) * x^coef(p1)[2] lines(x, pre) - # Look at individual traces as examples tr[trace_varied == 5 & trace_length > 50, ] # --> every variant exists only once, of course -datcase[datcase$nitems == 5 & datcase$length > 50,] - -pbapply::pbsapply(datcase[, -c(1, 9)], median) - -#ex <- datcase[datcase$nitems == 4 & datcase$length == 15,] -ex <- datcase[datcase$nitems == 5,] -ex <- ex[sample(1:nrow(ex), 20), ] -# --> pretty randomly chosen... TODO: - -case_ids <- NULL - -for (case in ex$case) { - if ("080" %in% res$item[res$case == case] | "503" %in% res$item[res$case == case]) { - case_ids <- c(case_ids, TRUE) - } else { - case_ids <- c(case_ids, FALSE) - } -} - -cases <- ex$case[case_ids] - - -for (case in cases) { - - alog <- activitylog(res[res$case == case, ], - case_id = "case", - activity_id = "item", - resource_id = "path", - timestamps = c("start", "complete")) - - dfg <- process_map(alog, - type_nodes = frequency("absolute", color_scale = "Greys"), - type_edges = frequency("absolute", color_edges = "#FF6900"), - rankdir = "LR", - render = FALSE) - export_map(dfg, - file_name = paste0("results/processmaps/dfg_example_cases_", case, "_R.pdf"), - file_type = "pdf", - title = paste("Case", case)) - - -} diff --git a/code/12_pm-case-clusters.py b/code/12_pm-case-clusters.py index 1634a9a..80f809a 100644 --- a/code/12_pm-case-clusters.py +++ b/code/12_pm-case-clusters.py @@ -7,7 +7,7 @@ # input: results/haum/eventlogs_pre-corona_item-clusters.csv # output: results/haum/pn_infos_clusters.csv # -# last mod: 2024-03-06 +# last mod: 2024-03-10 import pm4py import pandas as pd @@ -16,9 +16,9 @@ from python_helpers import eval_pm, pn_infos #--------------- (1) Load data and create event logs --------------- -dat = pd.read_csv("results/haum/eventlogs_2019_case-clusters.csv", sep = ";") +dat = pd.read_csv("results/haum/eventlogs_2019_case-clusters_new.csv", sep = ";") -event_log = pm4py.format_dataframe(dat, case_id = "case", activity_key = "item", +event_log = pm4py.format_dataframe(dat, case_id = "case", activity_key = "event_new", timestamp_key = "date.start") #--------------- (2) Infos for clusters --------------- @@ -37,9 +37,10 @@ eval.to_csv("results/haum/pn_infos_clusters.csv", sep = ";") for cluster in event_log.cluster.unique().tolist(): subdata = event_log[event_log.cluster == cluster] - subnet, subim, subfm = pm4py.discover_petri_net_inductive(subdata, noise_threshold=.3) + subnet, subim, subfm = pm4py.discover_petri_net_inductive(subdata, noise_threshold = .7) pm4py.save_vis_petri_net(subnet, subim, subfm, "results/processmaps/petrinet_cluster" + str(cluster) + "_cases.png") bpmn = pm4py.convert.convert_to_bpmn(subnet, subim, subfm) pm4py.vis.save_vis_bpmn(bpmn, "results/processmaps/bpmn_cluster" + str(cluster) + "_cases.png") +