From 7f0b0d44adf573adc96322e8b4a3971bce2efa79 Mon Sep 17 00:00:00 2001 From: nwickel Date: Wed, 17 Apr 2024 14:25:04 +0200 Subject: [PATCH] Updated file structure: results and figures are now on top level of project; adjusted paths, but some might still be broken --- code/01_preprocessing.R | 20 +++++----- code/02_descriptives.R | 66 ++++++++++++++++----------------- code/03_create-petrinet.py | 12 +++--- code/04_conformance-checking.py | 18 ++++----- code/05_check-traces.R | 6 +-- code/06_infos-items.py | 10 ++--- code/07_item-clustering.R | 8 ++-- code/08_case-characteristics.R | 6 +-- code/10_validation.R | 17 +++++---- code/11_investigate-variants.R | 24 +++++++----- code/12_dfgs-case-clusters.R | 21 ++++++----- code/13_pm-case-clusters.py | 10 ++--- 12 files changed, 112 insertions(+), 106 deletions(-) diff --git a/code/01_preprocessing.R b/code/01_preprocessing.R index e7085cc..f8bd126 100644 --- a/code/01_preprocessing.R +++ b/code/01_preprocessing.R @@ -4,16 +4,16 @@ # (2) Create event logs # (3) Add meta data # -# input: raw log files from ../data/haum/*.log -# ../data/metadata/feiertage.csv -# ../data/metadata/schulferien_2016-2018_NI.csv -# ../data/metadata/schulferien_2019-2025_NI.csv +# input: raw log files from data/haum/*.log +# data/metadata/feiertage.csv +# data/metadata/schulferien_2016-2018_NI.csv +# data/metadata/schulferien_2019-2025_NI.csv # output: results/raw_logfiles_.csv # results/event_logfiles_.csv # -# last mod: 2024-02-23, NW +# last mod: 2024-04-17 -# setwd("C:/Users/nwickelmaier/Nextcloud/Documents/MDS/2023ss/60100_master_thesis/analysis/code") +# setwd("C:/Users/nwickelmaier/Nextcloud/Documents/MDS/2023ss/60100_master_thesis/analysis/") #library(mtt) devtools::load_all("../../../../../software/mtt") @@ -22,7 +22,7 @@ now <- format(Sys.time(), "%Y-%m-%d_%H-%M-%S") #--------------- (1) Parse raw log files --------------- -path <- "../data/haum/LogFiles/" +path <- "data/haum/LogFiles/" folders <- dir(path) #folders <- "2016" @@ -73,7 +73,7 @@ datlogs$topic <- factor(datlogs$topic, levels = 0:7, ## Read data for holiday -hd0 <- read.table("../data/metadata/feiertage.csv", sep = ";", header = TRUE) +hd0 <- read.table("data/metadata/feiertage.csv", sep = ";", header = TRUE) hd0$X.br. <- NULL hd <- hd0[hd0$Abkuerzung == "NI", ] @@ -100,9 +100,9 @@ hd$stateCode <- NULL # write.table(dat, "data/metadata/schulferien_2019-2025_NI.csv", sep = ";", # row.names = FALSE, quote = FALSE) -sf1 <- read.table("../data/metadata/schulferien_2016-2018_NI.csv", sep = ";", +sf1 <- read.table("data/metadata/schulferien_2016-2018_NI.csv", sep = ";", header = TRUE) -sf2 <- read.table("../data/metadata/schulferien_2019-2025_NI.csv", sep = ";", +sf2 <- read.table("data/metadata/schulferien_2019-2025_NI.csv", sep = ";", header = TRUE) sf <- rbind(sf1, sf2) sf$start <- as.Date(sf$start) diff --git a/code/02_descriptives.R b/code/02_descriptives.R index b279975..7e7a08e 100644 --- a/code/02_descriptives.R +++ b/code/02_descriptives.R @@ -11,25 +11,25 @@ # # input: results/event_logfiles_2024-02-21_16-07-33.csv # results/raw_logfiles_2024-02-21_16-07-33.csv -# output: results/figures/counts_item_firsttouch.pdf -# results/figures/duration.pdf -# results/figures/heatmap_start.pdf -# results/figures/heatmap_stop.pdf -# results/figures/timeMs.pdf -# results/figures/xycoord.pdf -# results/figures/event-dist.pdf -# results/figures/traceexplore_trace-event.pdf -# results/figures/ra_trace-event.pdf -# results/figures/traceexplore_case-event.pdf -# results/figures/bp_tod.pdf -# results/figures/bp_wd.pdf -# results/figures/bp_wds.pdf -# results/figures/bp_corona.pdf -# results/figures/traceexplore_case-artwork_often080.pdf +# output: figures/counts_item_firsttouch.pdf +# figures/duration.pdf +# figures/heatmap_start.pdf +# figures/heatmap_stop.pdf +# figures/timeMs.pdf +# figures/xycoord.pdf +# figures/event-dist.pdf +# figures/traceexplore_trace-event.pdf +# figures/ra_trace-event.pdf +# figures/traceexplore_case-event.pdf +# figures/bp_tod.pdf +# figures/bp_wd.pdf +# figures/bp_wds.pdf +# figures/bp_corona.pdf +# figures/traceexplore_case-artwork_often080.pdf # -# last mod: 2024-03-28 +# last mod: 2024-04-17 -# setwd("C:/Users/nwickelmaier/Nextcloud/Documents/MDS/2023ss/60100_master_thesis/analysis/code") +# setwd("C:/Users/nwickelmaier/Nextcloud/Documents/MDS/2023ss/60100_master_thesis/analysis/") #--------------- (1) Read data --------------- @@ -92,7 +92,7 @@ items <- unique(datlogs$item) #items <- items[!items %in% c("504", "505")] datart <- mtt::extract_artworks(items, paste0(items, ".xml"), - "../data/haum/ContentEyevisit/eyevisit_cards_light/") + "data/haum/ContentEyevisit/eyevisit_cards_light/") datart <- datart[order(datart$artwork), ] names(counts_item) <- datart$title @@ -110,7 +110,7 @@ text(tmp, counts_case + 100, datart$item) counts <- rbind(counts_item, counts_case) -pdf("results/figures/counts_item_firsttouch.pdf", +pdf("figures/counts_item_firsttouch.pdf", width = 20, height = 10, pointsize = 10) par(mai = c(5, .6, .1, .1)) @@ -135,7 +135,7 @@ lattice::bwplot(duration / 1000 / 60 ~ event, datlogs) set.seed(1027) -pdf("results/figures/duration.pdf", width = 5, height = 5, pointsize = 10) +pdf("figures/duration.pdf", width = 5, height = 5, pointsize = 10) lattice::bwplot(I(duration/1000/60) ~ event, datlogs[sample(nrow(datlogs), 100000), ], ylab = "Duration in min") dev.off() @@ -206,7 +206,7 @@ tab.start <- xtabs( ~ x.start.cat + y.start.cat, dattrim) colnames(tab.start) <- NULL rownames(tab.start) <- NULL -pdf("results/figures/heatmap_start.pdf", width = 5, height = 5, pointsize = 10) +pdf("figures/heatmap_start.pdf", width = 5, height = 5, pointsize = 10) heatmap(tab.start, Rowv = NA, Colv = NA) dev.off() @@ -217,7 +217,7 @@ tab.stop <- xtabs( ~ x.stop.cat + y.stop.cat, dattrim) colnames(tab.stop) <- NULL rownames(tab.stop) <- NULL -pdf("results/figures/heatmap_stop.pdf", width = 5, height = 5, pointsize = 10) +pdf("figures/heatmap_stop.pdf", width = 5, height = 5, pointsize = 10) heatmap(tab.stop, Rowv = NA, Colv = NA) dev.off() @@ -244,7 +244,7 @@ plot(case ~ date, datcase, type = "h", col = "#434F4F") ## weird behavior of timeMs -pdf("results/figures/timeMs.pdf", width = 9, height = 6, pointsize = 10) +pdf("figures/timeMs.pdf", width = 9, height = 6, pointsize = 10) #par(mai = c(.6, .6, .1, .1), mgp = c(2.4, 1, 0)) #plot(timeMs.start ~ as.factor(fileId), datlogs[1:2000,], xlab = "fileId") lattice::bwplot(timeMs.start ~ as.factor(fileId.start), datlogs[1:2000,], xlab = "", @@ -255,7 +255,7 @@ dev.off() set.seed(1522) -pdf("results/figures/xycoord.pdf", width = 5, height = 5, pointsize = 10) +pdf("figures/xycoord.pdf", width = 5, height = 5, pointsize = 10) par(mai = c(.6, .6, .1, .1), mgp = c(2.4, 1, 0)) #par(mfrow = c(1, 2)) plot(y.start ~ x.start, datlogs[sample(nrow(datlogs), 10000), ]) @@ -310,7 +310,7 @@ proportions(tab, margin = "event") cc <- palette.colors(palette = "Okabe-Ito")[c(3,2,4,8)] -pdf("results/figures/event-dist.pdf", height = 3.375, width = 12, pointsize = 10) +pdf("figures/event-dist.pdf", height = 3.375, width = 12, pointsize = 10) par(mai = c(.4,.4,.1,.1), mgp = c(2.4, 1, 0)) barplot(t(proportions(tab, margin = "item")), las = 2, col = c("#78004B", "#3CB4DC", "#91C86E", "#FF6900"), @@ -364,7 +364,7 @@ processmapR::process_map(alogf, # alog, alog_no_move <- alog[alog$event != "move", ] -pdf("results/figures/traceexplore_trace-event.pdf", height = 8, width = 12, pointsize = 10) +pdf("figures/traceexplore_trace-event.pdf", height = 8, width = 12, pointsize = 10) set.seed(1447) processmapR::trace_explorer(alog_no_move[alog_no_move$path %in% sample(unique(alog_no_move$path), 400),], @@ -372,7 +372,7 @@ processmapR::trace_explorer(alog_no_move[alog_no_move$path %in% abbreviate = T) dev.off() -pdf("results/figures/ra_trace-event.pdf", height = 8, width = 12, pointsize = 10) +pdf("figures/ra_trace-event.pdf", height = 8, width = 12, pointsize = 10) ra_no_move <- edeaR::resource_frequency(alog_no_move, "resource-activity") levels(ra_no_move$event) <- c("flipCard", "flipCard", "openTopic", "openPopup") @@ -442,7 +442,7 @@ processmapR::process_map(alog, processmapR::frequency("relative")) alog_no_move <- alog[alog$event != "move", ] -pdf("results/figures/traceexplore_case-event.pdf", height = 8, width = 12, pointsize = 10) +pdf("figures/traceexplore_case-event.pdf", height = 8, width = 12, pointsize = 10) set.seed(1050) processmapR::trace_explorer(alog_no_move[alog_no_move$path %in% sample(unique(alog_no_move$path), 300),], @@ -481,7 +481,7 @@ alog <- bupaR::activitylog(datlogs[datlogs$tod == "afternoon",], processmapR::process_map(alog, processmapR::frequency("relative")) # Are the same artworks looked at? -pdf("results/figures/bp_tod.pdf", height = 3.375, width = 12, pointsize = 10) +pdf("figures/bp_tod.pdf", height = 3.375, width = 12, pointsize = 10) par(mai = c(.5,.6,.1,.1), mgp = c(2.4, 1, 0)) barplot(proportions(xtabs( ~ tod + artwork, datlogs), margin = "tod"), #col = cc[1:2], @@ -511,7 +511,7 @@ alog <- bupaR::activitylog(datlogs[datlogs$wd == "weekday",], processmapR::process_map(alog, processmapR::frequency("relative")) # Are the same artworks looked at? -pdf("results/figures/bp_wd.pdf", height = 3.375, width = 12, pointsize = 10) +pdf("figures/bp_wd.pdf", height = 3.375, width = 12, pointsize = 10) par(mai = c(.5,.6,.1,.1), mgp = c(2.4, 1, 0)) barplot(proportions(xtabs( ~ wd + artwork, datlogs), margin = "wd"), @@ -542,7 +542,7 @@ alog <- bupaR::activitylog(datlogs[which(datlogs$wds == "vacation"),], processmapR::process_map(alog, processmapR::frequency("relative")) # Are the same artworks looked at? -pdf("results/figures/bp_wds.pdf", height = 3.375, width = 12, pointsize = 10) +pdf("figures/bp_wds.pdf", height = 3.375, width = 12, pointsize = 10) par(mai = c(.5,.6,.1,.1), mgp = c(2.4, 1, 0)) #barplot(xtabs( ~ wds + artwork, datlogs), las = 2, beside = TRUE, @@ -573,7 +573,7 @@ alog <- bupaR::activitylog(datlogs[which(datlogs$corona == "post"),], processmapR::process_map(alog, processmapR::frequency("relative")) # Are the same artworks looked at? -pdf("results/figures/bp_corona.pdf", height = 3.375, width = 12, pointsize = 10) +pdf("figures/bp_corona.pdf", height = 3.375, width = 12, pointsize = 10) par(mai = c(.5,.6,.1,.1), mgp = c(2.4, 1, 0)) barplot(proportions(xtabs( ~ corona + artwork, datlogs), margin = "corona"), @@ -618,7 +618,7 @@ alog <- bupaR::activitylog(datlogs[datlogs$artwork %in% often080, ], processmapR::process_map(alog, processmapR::frequency("relative")) -pdf("results/figures/traceexplore_case-artwork_often080.pdf", height = 8, width = 12, pointsize = 10) +pdf("figures/traceexplore_case-artwork_often080.pdf", height = 8, width = 12, pointsize = 10) processmapR::trace_explorer(alog, n_traces = 30, type = "frequent", diff --git a/code/03_create-petrinet.py b/code/03_create-petrinet.py index 57bf689..cb5fe46 100644 --- a/code/03_create-petrinet.py +++ b/code/03_create-petrinet.py @@ -4,9 +4,9 @@ # (2) Normative net # # input: -- -# output: results/normative_petrinet.pnml -# results/processmaps/normative_petrinet.png -# results/processmaps/normative_bpmn.png +# output: ../results/normative_petrinet.pnml +# ../results/processmaps/normative_petrinet.png +# ../results/processmaps/normative_bpmn.png # # last mod: 2024-03-22 @@ -151,12 +151,12 @@ final_marking[sink] = 1 pm4py.view_petri_net(net, initial_marking, final_marking) pm4py.write_pnml(net, initial_marking, final_marking, - "results/normative_petrinet.pnml") + "../results/normative_petrinet.pnml") pm4py.vis.save_vis_petri_net(net, initial_marking, final_marking, - "results/processmaps/normative_petrinet.png") + "../results/processmaps/normative_petrinet.png") bpmn = pm4py.convert.convert_to_bpmn(net, initial_marking, final_marking) pm4py.view_bpmn(bpmn) -pm4py.vis.save_vis_bpmn(bpmn, "results/processmaps/normative_bpmn.png") +pm4py.vis.save_vis_bpmn(bpmn, "../results/processmaps/normative_bpmn.png") diff --git a/code/04_conformance-checking.py b/code/04_conformance-checking.py index 25df866..b0b7d3b 100644 --- a/code/04_conformance-checking.py +++ b/code/04_conformance-checking.py @@ -3,10 +3,10 @@ # content: (1) Load data and create event log # (2) Check against normative Petri Net # -# input: results/event_logfiles_2024-02-21_16-07-33.csv -# results/normative_petrinet.pnml -# output: results/eval_all-miners_complete.csv -# results/eval_all-miners_clean.csv +# input: ../results/event_logfiles_2024-02-21_16-07-33.csv +# ../results/normative_petrinet.pnml +# output: ../results/eval_all-miners_complete.csv +# ../results/eval_all-miners_clean.csv # ../../thesis/figures/petrinet_normative.png # ../../thesis/figures/petrinet_heuristics_clean.png # ../../thesis/figures/petrinet_alpha_clean.png @@ -18,7 +18,7 @@ # ../../thesis/figures/bpmn_alpha_clean.png # ../../thesis/figures/bpmn_heuristics_clean.png # -# last mod: 2024-03-22 +# last mod: 2024-04-17 import pm4py import pandas as pd @@ -28,7 +28,7 @@ from python_helpers import eval_pm, pn_infos_miner #--------------- (1) Load data and create event logs --------------- -dat = pd.read_csv("results/event_logfiles_2024-02-21_16-07-33.csv", sep = ";") +dat = pd.read_csv("../results/event_logfiles_2024-02-21_16-07-33.csv", sep = ";") event_log = pm4py.format_dataframe(dat, case_id = "path", activity_key = "event", @@ -58,7 +58,7 @@ sorted_variants_no_move = dict(sorted(variants_no_move.items(), key=lambda item: #--------------- (2) Check against normative Petri Net --------------- -basenet, initial_marking, final_marking = pm4py.read_pnml("results/normative_petrinet.pnml") +basenet, initial_marking, final_marking = pm4py.read_pnml("../results/normative_petrinet.pnml") # TBR replayed_traces = pm4py.conformance_diagnostics_token_based_replay(event_log, basenet, initial_marking, final_marking) @@ -101,7 +101,7 @@ eval = pd.DataFrame(columns = ["fitness", "precision", "generalizability", for miner in ["normative", "alpha", "heuristics", "inductive", "ilp"]: eval = pd.concat([eval, pn_infos_miner(event_log, miner)]) -eval.to_csv("results/eval_all-miners_complete.csv", sep = ";") +eval.to_csv("../results/eval_all-miners_complete.csv", sep = ";") ## Without broken trace event_log_clean = event_log[event_log["@@case_index"] != index_broken[0]] @@ -113,7 +113,7 @@ eval_clean = pd.DataFrame(columns = ["fitness", "precision", "generalizability", for miner in ["normative", "alpha", "heuristics", "inductive", "ilp"]: eval_clean = pd.concat([eval_clean, pn_infos_miner(event_log_clean, miner)]) -eval_clean.to_csv("results/eval_all-miners_clean.csv", sep = ";") +eval_clean.to_csv("../results/eval_all-miners_clean.csv", sep = ";") ## Directly-follows graph dfg, start_activities, end_activities = pm4py.discover_dfg(event_log_clean) diff --git a/code/05_check-traces.R b/code/05_check-traces.R index 1b715d1..3fcd179 100644 --- a/code/05_check-traces.R +++ b/code/05_check-traces.R @@ -9,11 +9,11 @@ # results/raw_logfiles_2024-02-21_16-07-33.csv # output: results/eventlogs_pre-corona_cleaned.RData # results/eventlogs_pre-corona_cleaned.csv -# ../../thesis/figures/dfg_complete_WFnet_R.pdf +# ../thesis/figures/dfg_complete_WFnet_R.pdf # # last mod: 2024-03-23 -# setwd("C:/Users/nwickelmaier/Nextcloud/Documents/MDS/2023ss/60100_master_thesis/analysis/code") +# setwd("C:/Users/nwickelmaier/Nextcloud/Documents/MDS/2023ss/60100_master_thesis/analysis/") #--------------- (1) Look at broken trace --------------- @@ -85,7 +85,7 @@ dfg <- processmapR::process_map(alog, render = FALSE) processmapR::export_map(dfg, - file_name = paste0("../../thesis/figures/dfg_complete_WFnet_R.pdf"), + file_name = paste0("../thesis/figures/dfg_complete_WFnet_R.pdf"), file_type = "pdf") rm(tmp) diff --git a/code/06_infos-items.py b/code/06_infos-items.py index a292353..4767d32 100644 --- a/code/06_infos-items.py +++ b/code/06_infos-items.py @@ -3,10 +3,10 @@ # content: (1) Load data and create event log # (2) Infos for items # -# input: results/eventlogs_pre-corona_cleaned.csv -# output: results/pn_infos_items.csv +# input: ../results/eventlogs_pre-corona_cleaned.csv +# output: ../results/pn_infos_items.csv # -# last mod: 2024-03-22 +# last mod: 2024-04-17 import pm4py import pandas as pd @@ -16,7 +16,7 @@ from python_helpers import eval_pm, pn_infos #--------------- (1) Load data and create event logs --------------- -dat = pd.read_csv("results/eventlogs_pre-corona_cleaned", sep = ";") +dat = pd.read_csv("../results/eventlogs_pre-corona_cleaned", sep = ";") log_path = pm4py.format_dataframe(dat, case_id = "path", activity_key = "event", timestamp_key = "date.start") @@ -33,5 +33,5 @@ for item in log_path.item.unique().tolist(): eval = eval.sort_index() # Export -eval.to_csv("results/pn_infos_items.csv", sep = ";") +eval.to_csv("../results/pn_infos_items.csv", sep = ";") diff --git a/code/07_item-clustering.R b/code/07_item-clustering.R index f793145..2c6388f 100644 --- a/code/07_item-clustering.R +++ b/code/07_item-clustering.R @@ -10,13 +10,13 @@ # input: results/eventlogs_pre-corona_cleaned.RData # results/pn_infos_items.csv # output: results/eventlogs_pre-corona_item-clusters.csv -# ../../thesis/figures/data/clustering_items.RData" +# ../thesis/results/clustering_items.RData" # # last mod: 2024-03-22 -# setwd("C:/Users/nwickelmaier/Nextcloud/Documents/MDS/2023ss/60100_master_thesis/analysis/code") +# setwd("C:/Users/nwickelmaier/Nextcloud/Documents/MDS/2023ss/60100_master_thesis/analysis/") -source("R_helpers.R") +source("code/R_helpers.R") #--------------- (1) Read data --------------- @@ -164,5 +164,5 @@ write.table(res, # Save data for plots and tables save(hc, k, res, dist_mat, datitem, df, - file = "../../thesis/figures/data/clustering_items.RData") + file = "../thesis/results/clustering_items.RData") diff --git a/code/08_case-characteristics.R b/code/08_case-characteristics.R index 87b63e3..f7d1ac7 100644 --- a/code/08_case-characteristics.R +++ b/code/08_case-characteristics.R @@ -9,11 +9,11 @@ # output: results/dataframes_case_2019.RData # results/centrality_cases.RData # -# last mod: 2024-03-22 +# last mod: 2024-04-17 -# setwd("C:/Users/nwickelmaier/Nextcloud/Documents/MDS/2023ss/60100_master_thesis/analysis/code") +# setwd("C:/Users/nwickelmaier/Nextcloud/Documents/MDS/2023ss/60100_master_thesis/analysis/") -source("R_helpers.R") +source("code/R_helpers.R") #--------------- (1) Read data --------------- diff --git a/code/10_validation.R b/code/10_validation.R index d4f6fbe..d60ca68 100644 --- a/code/10_validation.R +++ b/code/10_validation.R @@ -7,13 +7,14 @@ # (5) Fit tree # # input: results/event_logfiles_2024-02-21_16-07-33.csv -# output: -- +# output: ../thesis/results/clustering_cases_2018.RData +# ../thesis/results/clustering_cases.RData # -# last mod: 2024-03-22 +# last mod: 2024-04-17 -# setwd("C:/Users/nwickelmaier/Nextcloud/Documents/MDS/2023ss/60100_master_thesis/analysis/code") +# setwd("C:/Users/nwickelmaier/Nextcloud/Documents/MDS/2023ss/60100_master_thesis/analysis/") -source("R_helpers.R") +source("code/R_helpers.R") #--------------- (1) Read data --------------- @@ -113,8 +114,8 @@ dattree18$AvDurItemNorm <- normalize(dattree18$AvDurItem) #--------------- (4) Clustering --------------- -df <- dattree18[, c("PropItems", "SearchInfo", "PropMoves", "AvDurItemNorm", - "Pattern", "InfocardOnly")] +df <- dattree18[, c("PropItems", "SearchInfo", "PropMoves", + "AvDurItemNorm", "Pattern", "InfocardOnly")] dist_mat18 <- cluster::daisy(df, metric = "gower") @@ -145,7 +146,7 @@ aggregate(. ~ cluster18, df, mean) aggregate(. ~ cluster18, dattree18[, -1], mean) save(coor_2d_18, coor_3d_18, cluster18, dattree18, dist_mat18, hc18, - file = "../../thesis/figures/data/clustering_cases_2018.RData") + file = "../thesis/results/clustering_cases_2018.RData") #--------------- (5) Fit tree --------------- @@ -161,7 +162,7 @@ plot(partykit::as.party(c1), tp_args = list(fill = mycols, col = mycols)) ## Load data -load("../../thesis/figures/data/clustering_cases.RData") +load("../thesis/results/clustering_cases.RData") c19 <- rpart::rpart(as.factor(cluster) ~ ., data = dattree[, c("PropMoves", "PropItems", diff --git a/code/11_investigate-variants.R b/code/11_investigate-variants.R index 6b3a195..fe39b29 100644 --- a/code/11_investigate-variants.R +++ b/code/11_investigate-variants.R @@ -5,12 +5,16 @@ # (3) Investigate variants (2019) # # input: results/eventlogs_pre-corona_cleaned.RData -# output: ../../thesis/figures/freq-traces.pdf -# ../../thesis/figures/freq-traces_powerlaw.pdf +# output: ../thesis/figures/freq-traces.pdf +# ../thesis/figures/freq-traces_powerlaw.pdf +# ../thesis/figures/freq-traces_powerlaw_bw.pdf +# ../thesis/figures/freq-traces_2019.pdf +# ../thesis/figures/freq-traces_powerlaw_2019.pdf +# ../thesis/figures/freq-traces_powerlaw_2019_bw.pdf # -# last mod: 2024-03-30 +# last mod: 2024-04-17 -# setwd("C:/Users/nwickelmaier/Nextcloud/Documents/MDS/2023ss/60100_master_thesis/analysis/code") +# setwd("C:/Users/nwickelmaier/Nextcloud/Documents/MDS/2023ss/60100_master_thesis/analysis/") library(bupaverse) @@ -29,7 +33,7 @@ alog <- activitylog(dat, resource_id = "path", timestamps = c("start", "complete")) -pdf("../../thesis/figures/freq-traces.pdf", height = 7, width = 6, pointsize = 10) +pdf("../thesis/figures/freq-traces.pdf", height = 7, width = 6, pointsize = 10) trace_explorer(alog, n_traces = 25) # --> sequences of artworks are just too rare dev.off() @@ -56,7 +60,7 @@ y <- as.numeric(tab) p1 <- lm(log(y) ~ log(x)) pre <- exp(coef(p1)[1]) * x^coef(p1)[2] -pdf("../../thesis/figures/freq-traces_powerlaw.pdf", height = 3.375, +pdf("../thesis/figures/freq-traces_powerlaw.pdf", height = 3.375, width = 3.375, pointsize = 10) par(mai = c(.6,.6,.1,.1), mgp = c(2.4, 1, 0)) @@ -69,7 +73,7 @@ legend("topright", paste0("Proportion of traces only occurring once: ", dev.off() # Black and white -pdf("../../thesis/figures/freq-traces_powerlaw_bw.pdf", height = 3.375, +pdf("../thesis/figures/freq-traces_powerlaw_bw.pdf", height = 3.375, width = 3.375, pointsize = 10) par(mai = c(.6,.6,.1,.1), mgp = c(2.4, 1, 0)) @@ -99,7 +103,7 @@ alog <- activitylog(dat, resource_id = "path", timestamps = c("start", "complete")) -pdf("../../thesis/figures/freq-traces_2019.pdf", height = 7, width = 6, pointsize = 10) +pdf("../thesis/figures/freq-traces_2019.pdf", height = 7, width = 6, pointsize = 10) trace_explorer(alog, n_traces = 25) dev.off() @@ -125,7 +129,7 @@ y <- as.numeric(tab) p1 <- lm(log(y) ~ log(x)) pre <- exp(coef(p1)[1]) * x^coef(p1)[2] -pdf("../../thesis/figures/freq-traces_powerlaw_2019.pdf", height = 3.375, +pdf("../thesis/figures/freq-traces_powerlaw_2019.pdf", height = 3.375, width = 3.375, pointsize = 10) par(mai = c(.6,.6,.1,.1), mgp = c(2.4, 1, 0)) @@ -138,7 +142,7 @@ legend("topright", paste0("Proportion of traces only occurring once: ", dev.off() # Black and white -pdf("../../thesis/figures/freq-traces_powerlaw_2019_bw.pdf", height = 3.375, +pdf("../thesis/figures/freq-traces_powerlaw_2019_bw.pdf", height = 3.375, width = 3.375, pointsize = 10) par(mai = c(.6,.6,.1,.1), mgp = c(2.4, 1, 0)) diff --git a/code/12_dfgs-case-clusters.R b/code/12_dfgs-case-clusters.R index 988070c..8234a4f 100644 --- a/code/12_dfgs-case-clusters.R +++ b/code/12_dfgs-case-clusters.R @@ -4,15 +4,16 @@ # (2) Export DFGs for clusters # # input: results/user-navigation.RData -# output: ../../thesis/figures/dfg_cases_cluster1_R.pdf -# ../../thesis/figures/dfg_cases_cluster2_R.pdf -# ../../thesis/figures/dfg_cases_cluster3_R.pdf -# ../../thesis/figures/dfg_cases_cluster4_R.pdf -# ../../thesis/figures/dfg_cases_cluster5_R.pdf +# output: ../thesis/figures/dfg_cases_cluster1_R.pdf +# ../thesis/figures/dfg_cases_cluster2_R.pdf +# ../thesis/figures/dfg_cases_cluster3_R.pdf +# ../thesis/figures/dfg_cases_cluster4_R.pdf +# ../thesis/figures/dfg_cases_cluster5_R.pdf +# ../thesis/results/dfgs_case-cluster.RData # -# last mod: 2024-03-30 +# last mod: 2024-04-17 -# setwd("C:/Users/nwickelmaier/Nextcloud/Documents/MDS/2023ss/60100_master_thesis/analysis/code") +# setwd("C:/Users/nwickelmaier/Nextcloud/Documents/MDS/2023ss/60100_master_thesis/analysis/") #--------------- (1) Read data --------------- @@ -75,7 +76,7 @@ for (i in 1:5) { } processmapR::export_map(dfg, - file_name = paste0("../../thesis/figures/dfg_cases_cluster_", cl_names[i], "_R.pdf"), + file_name = paste0("../thesis/figures/dfg_cases_cluster_", cl_names[i], "_R.pdf"), file_type = "pdf") } @@ -109,7 +110,7 @@ for (i in 1:5) { } processmapR::export_map(dfg, - file_name = paste0("../../thesis/figures/dfg_cases_cluster_", cl_names[i], "_R_bw.pdf"), + file_name = paste0("../thesis/figures/dfg_cases_cluster_", cl_names[i], "_R_bw.pdf"), file_type = "pdf") } @@ -147,5 +148,5 @@ for (i in 1:5) { } save(ns, n_cases, tr_unique, perc_filter, - file = "../../thesis/figures/data/dfgs_case-cluster.RData") + file = "../thesis/results/dfgs_case-cluster.RData") diff --git a/code/13_pm-case-clusters.py b/code/13_pm-case-clusters.py index c19f2b7..cfaf70d 100644 --- a/code/13_pm-case-clusters.py +++ b/code/13_pm-case-clusters.py @@ -3,10 +3,10 @@ # content: (1) Load data and create event log # (2) Evaluation for clusters # -# input: results/eventlogs_2019_case-clusters.csv -# output: results/eval_case_clusters.csv +# input: ../results/eventlogs_2019_case-clusters.csv +# output: ../results/eval_case_clusters_.csv # -# last mod: 2024-04-04 +# last mod: 2024-04-17 import pm4py import pandas as pd @@ -15,7 +15,7 @@ from python_helpers import eval_pm, eval_append #--------------- (1) Load data and create event logs --------------- -dat = pd.read_csv("results/eventlogs_2019_case-clusters.csv", sep = ";") +dat = pd.read_csv("../results/eventlogs_2019_case-clusters.csv", sep = ";") event_log = pm4py.format_dataframe(dat, case_id = "case", @@ -49,5 +49,5 @@ for nt in thresholds: # pm4py.vis.save_vis_bpmn(bpmn, "results/processmaps/bpmn_cluster" + str(cluster) + "_cases" + str(int(nt*10)).zfill(2) + ".png") eval.index = ["Complete", "Cluster 1", "Cluster 2", "Cluster 3", "Cluster 4", "Cluster 5"] - eval.to_csv("results/eval_case_clusters_" + str(int(nt*10)).zfill(2) + ".csv", sep = ";") + eval.to_csv("../results/eval_case_clusters_" + str(int(nt*10)).zfill(2) + ".csv", sep = ";")