Updated file structure: results and figures are now on top level of project; adjusted paths, but some might still be broken

This commit is contained in:
Nora Wickelmaier 2024-04-17 14:25:04 +02:00
parent d4f5aa427d
commit 7f0b0d44ad
12 changed files with 112 additions and 106 deletions

View File

@ -4,16 +4,16 @@
# (2) Create event logs # (2) Create event logs
# (3) Add meta data # (3) Add meta data
# #
# input: raw log files from ../data/haum/*.log # input: raw log files from data/haum/*.log
# ../data/metadata/feiertage.csv # data/metadata/feiertage.csv
# ../data/metadata/schulferien_2016-2018_NI.csv # data/metadata/schulferien_2016-2018_NI.csv
# ../data/metadata/schulferien_2019-2025_NI.csv # data/metadata/schulferien_2019-2025_NI.csv
# output: results/raw_logfiles_<timestamp>.csv # output: results/raw_logfiles_<timestamp>.csv
# results/event_logfiles_<timestamp>.csv # results/event_logfiles_<timestamp>.csv
# #
# last mod: 2024-02-23, NW # last mod: 2024-04-17
# setwd("C:/Users/nwickelmaier/Nextcloud/Documents/MDS/2023ss/60100_master_thesis/analysis/code") # setwd("C:/Users/nwickelmaier/Nextcloud/Documents/MDS/2023ss/60100_master_thesis/analysis/")
#library(mtt) #library(mtt)
devtools::load_all("../../../../../software/mtt") devtools::load_all("../../../../../software/mtt")
@ -22,7 +22,7 @@ now <- format(Sys.time(), "%Y-%m-%d_%H-%M-%S")
#--------------- (1) Parse raw log files --------------- #--------------- (1) Parse raw log files ---------------
path <- "../data/haum/LogFiles/" path <- "data/haum/LogFiles/"
folders <- dir(path) folders <- dir(path)
#folders <- "2016" #folders <- "2016"
@ -73,7 +73,7 @@ datlogs$topic <- factor(datlogs$topic, levels = 0:7,
## Read data for holiday ## Read data for holiday
hd0 <- read.table("../data/metadata/feiertage.csv", sep = ";", header = TRUE) hd0 <- read.table("data/metadata/feiertage.csv", sep = ";", header = TRUE)
hd0$X.br. <- NULL hd0$X.br. <- NULL
hd <- hd0[hd0$Abkuerzung == "NI", ] hd <- hd0[hd0$Abkuerzung == "NI", ]
@ -100,9 +100,9 @@ hd$stateCode <- NULL
# write.table(dat, "data/metadata/schulferien_2019-2025_NI.csv", sep = ";", # write.table(dat, "data/metadata/schulferien_2019-2025_NI.csv", sep = ";",
# row.names = FALSE, quote = FALSE) # row.names = FALSE, quote = FALSE)
sf1 <- read.table("../data/metadata/schulferien_2016-2018_NI.csv", sep = ";", sf1 <- read.table("data/metadata/schulferien_2016-2018_NI.csv", sep = ";",
header = TRUE) header = TRUE)
sf2 <- read.table("../data/metadata/schulferien_2019-2025_NI.csv", sep = ";", sf2 <- read.table("data/metadata/schulferien_2019-2025_NI.csv", sep = ";",
header = TRUE) header = TRUE)
sf <- rbind(sf1, sf2) sf <- rbind(sf1, sf2)
sf$start <- as.Date(sf$start) sf$start <- as.Date(sf$start)

View File

@ -11,25 +11,25 @@
# #
# input: results/event_logfiles_2024-02-21_16-07-33.csv # input: results/event_logfiles_2024-02-21_16-07-33.csv
# results/raw_logfiles_2024-02-21_16-07-33.csv # results/raw_logfiles_2024-02-21_16-07-33.csv
# output: results/figures/counts_item_firsttouch.pdf # output: figures/counts_item_firsttouch.pdf
# results/figures/duration.pdf # figures/duration.pdf
# results/figures/heatmap_start.pdf # figures/heatmap_start.pdf
# results/figures/heatmap_stop.pdf # figures/heatmap_stop.pdf
# results/figures/timeMs.pdf # figures/timeMs.pdf
# results/figures/xycoord.pdf # figures/xycoord.pdf
# results/figures/event-dist.pdf # figures/event-dist.pdf
# results/figures/traceexplore_trace-event.pdf # figures/traceexplore_trace-event.pdf
# results/figures/ra_trace-event.pdf # figures/ra_trace-event.pdf
# results/figures/traceexplore_case-event.pdf # figures/traceexplore_case-event.pdf
# results/figures/bp_tod.pdf # figures/bp_tod.pdf
# results/figures/bp_wd.pdf # figures/bp_wd.pdf
# results/figures/bp_wds.pdf # figures/bp_wds.pdf
# results/figures/bp_corona.pdf # figures/bp_corona.pdf
# results/figures/traceexplore_case-artwork_often080.pdf # figures/traceexplore_case-artwork_often080.pdf
# #
# last mod: 2024-03-28 # last mod: 2024-04-17
# setwd("C:/Users/nwickelmaier/Nextcloud/Documents/MDS/2023ss/60100_master_thesis/analysis/code") # setwd("C:/Users/nwickelmaier/Nextcloud/Documents/MDS/2023ss/60100_master_thesis/analysis/")
#--------------- (1) Read data --------------- #--------------- (1) Read data ---------------
@ -92,7 +92,7 @@ items <- unique(datlogs$item)
#items <- items[!items %in% c("504", "505")] #items <- items[!items %in% c("504", "505")]
datart <- mtt::extract_artworks(items, datart <- mtt::extract_artworks(items,
paste0(items, ".xml"), paste0(items, ".xml"),
"../data/haum/ContentEyevisit/eyevisit_cards_light/") "data/haum/ContentEyevisit/eyevisit_cards_light/")
datart <- datart[order(datart$artwork), ] datart <- datart[order(datart$artwork), ]
names(counts_item) <- datart$title names(counts_item) <- datart$title
@ -110,7 +110,7 @@ text(tmp, counts_case + 100, datart$item)
counts <- rbind(counts_item, counts_case) counts <- rbind(counts_item, counts_case)
pdf("results/figures/counts_item_firsttouch.pdf", pdf("figures/counts_item_firsttouch.pdf",
width = 20, height = 10, pointsize = 10) width = 20, height = 10, pointsize = 10)
par(mai = c(5, .6, .1, .1)) par(mai = c(5, .6, .1, .1))
@ -135,7 +135,7 @@ lattice::bwplot(duration / 1000 / 60 ~ event, datlogs)
set.seed(1027) set.seed(1027)
pdf("results/figures/duration.pdf", width = 5, height = 5, pointsize = 10) pdf("figures/duration.pdf", width = 5, height = 5, pointsize = 10)
lattice::bwplot(I(duration/1000/60) ~ event, datlogs[sample(nrow(datlogs), 100000), ], lattice::bwplot(I(duration/1000/60) ~ event, datlogs[sample(nrow(datlogs), 100000), ],
ylab = "Duration in min") ylab = "Duration in min")
dev.off() dev.off()
@ -206,7 +206,7 @@ tab.start <- xtabs( ~ x.start.cat + y.start.cat, dattrim)
colnames(tab.start) <- NULL colnames(tab.start) <- NULL
rownames(tab.start) <- NULL rownames(tab.start) <- NULL
pdf("results/figures/heatmap_start.pdf", width = 5, height = 5, pointsize = 10) pdf("figures/heatmap_start.pdf", width = 5, height = 5, pointsize = 10)
heatmap(tab.start, Rowv = NA, Colv = NA) heatmap(tab.start, Rowv = NA, Colv = NA)
dev.off() dev.off()
@ -217,7 +217,7 @@ tab.stop <- xtabs( ~ x.stop.cat + y.stop.cat, dattrim)
colnames(tab.stop) <- NULL colnames(tab.stop) <- NULL
rownames(tab.stop) <- NULL rownames(tab.stop) <- NULL
pdf("results/figures/heatmap_stop.pdf", width = 5, height = 5, pointsize = 10) pdf("figures/heatmap_stop.pdf", width = 5, height = 5, pointsize = 10)
heatmap(tab.stop, Rowv = NA, Colv = NA) heatmap(tab.stop, Rowv = NA, Colv = NA)
dev.off() dev.off()
@ -244,7 +244,7 @@ plot(case ~ date, datcase, type = "h", col = "#434F4F")
## weird behavior of timeMs ## weird behavior of timeMs
pdf("results/figures/timeMs.pdf", width = 9, height = 6, pointsize = 10) pdf("figures/timeMs.pdf", width = 9, height = 6, pointsize = 10)
#par(mai = c(.6, .6, .1, .1), mgp = c(2.4, 1, 0)) #par(mai = c(.6, .6, .1, .1), mgp = c(2.4, 1, 0))
#plot(timeMs.start ~ as.factor(fileId), datlogs[1:2000,], xlab = "fileId") #plot(timeMs.start ~ as.factor(fileId), datlogs[1:2000,], xlab = "fileId")
lattice::bwplot(timeMs.start ~ as.factor(fileId.start), datlogs[1:2000,], xlab = "", lattice::bwplot(timeMs.start ~ as.factor(fileId.start), datlogs[1:2000,], xlab = "",
@ -255,7 +255,7 @@ dev.off()
set.seed(1522) set.seed(1522)
pdf("results/figures/xycoord.pdf", width = 5, height = 5, pointsize = 10) pdf("figures/xycoord.pdf", width = 5, height = 5, pointsize = 10)
par(mai = c(.6, .6, .1, .1), mgp = c(2.4, 1, 0)) par(mai = c(.6, .6, .1, .1), mgp = c(2.4, 1, 0))
#par(mfrow = c(1, 2)) #par(mfrow = c(1, 2))
plot(y.start ~ x.start, datlogs[sample(nrow(datlogs), 10000), ]) plot(y.start ~ x.start, datlogs[sample(nrow(datlogs), 10000), ])
@ -310,7 +310,7 @@ proportions(tab, margin = "event")
cc <- palette.colors(palette = "Okabe-Ito")[c(3,2,4,8)] cc <- palette.colors(palette = "Okabe-Ito")[c(3,2,4,8)]
pdf("results/figures/event-dist.pdf", height = 3.375, width = 12, pointsize = 10) pdf("figures/event-dist.pdf", height = 3.375, width = 12, pointsize = 10)
par(mai = c(.4,.4,.1,.1), mgp = c(2.4, 1, 0)) par(mai = c(.4,.4,.1,.1), mgp = c(2.4, 1, 0))
barplot(t(proportions(tab, margin = "item")), las = 2, col = c("#78004B", "#3CB4DC", "#91C86E", "#FF6900"), barplot(t(proportions(tab, margin = "item")), las = 2, col = c("#78004B", "#3CB4DC", "#91C86E", "#FF6900"),
@ -364,7 +364,7 @@ processmapR::process_map(alogf, # alog,
alog_no_move <- alog[alog$event != "move", ] alog_no_move <- alog[alog$event != "move", ]
pdf("results/figures/traceexplore_trace-event.pdf", height = 8, width = 12, pointsize = 10) pdf("figures/traceexplore_trace-event.pdf", height = 8, width = 12, pointsize = 10)
set.seed(1447) set.seed(1447)
processmapR::trace_explorer(alog_no_move[alog_no_move$path %in% processmapR::trace_explorer(alog_no_move[alog_no_move$path %in%
sample(unique(alog_no_move$path), 400),], sample(unique(alog_no_move$path), 400),],
@ -372,7 +372,7 @@ processmapR::trace_explorer(alog_no_move[alog_no_move$path %in%
abbreviate = T) abbreviate = T)
dev.off() dev.off()
pdf("results/figures/ra_trace-event.pdf", height = 8, width = 12, pointsize = 10) pdf("figures/ra_trace-event.pdf", height = 8, width = 12, pointsize = 10)
ra_no_move <- edeaR::resource_frequency(alog_no_move, "resource-activity") ra_no_move <- edeaR::resource_frequency(alog_no_move, "resource-activity")
levels(ra_no_move$event) <- c("flipCard", "flipCard", "openTopic", "openPopup") levels(ra_no_move$event) <- c("flipCard", "flipCard", "openTopic", "openPopup")
@ -442,7 +442,7 @@ processmapR::process_map(alog, processmapR::frequency("relative"))
alog_no_move <- alog[alog$event != "move", ] alog_no_move <- alog[alog$event != "move", ]
pdf("results/figures/traceexplore_case-event.pdf", height = 8, width = 12, pointsize = 10) pdf("figures/traceexplore_case-event.pdf", height = 8, width = 12, pointsize = 10)
set.seed(1050) set.seed(1050)
processmapR::trace_explorer(alog_no_move[alog_no_move$path %in% processmapR::trace_explorer(alog_no_move[alog_no_move$path %in%
sample(unique(alog_no_move$path), 300),], sample(unique(alog_no_move$path), 300),],
@ -481,7 +481,7 @@ alog <- bupaR::activitylog(datlogs[datlogs$tod == "afternoon",],
processmapR::process_map(alog, processmapR::frequency("relative")) processmapR::process_map(alog, processmapR::frequency("relative"))
# Are the same artworks looked at? # Are the same artworks looked at?
pdf("results/figures/bp_tod.pdf", height = 3.375, width = 12, pointsize = 10) pdf("figures/bp_tod.pdf", height = 3.375, width = 12, pointsize = 10)
par(mai = c(.5,.6,.1,.1), mgp = c(2.4, 1, 0)) par(mai = c(.5,.6,.1,.1), mgp = c(2.4, 1, 0))
barplot(proportions(xtabs( ~ tod + artwork, datlogs), margin = "tod"), #col = cc[1:2], barplot(proportions(xtabs( ~ tod + artwork, datlogs), margin = "tod"), #col = cc[1:2],
@ -511,7 +511,7 @@ alog <- bupaR::activitylog(datlogs[datlogs$wd == "weekday",],
processmapR::process_map(alog, processmapR::frequency("relative")) processmapR::process_map(alog, processmapR::frequency("relative"))
# Are the same artworks looked at? # Are the same artworks looked at?
pdf("results/figures/bp_wd.pdf", height = 3.375, width = 12, pointsize = 10) pdf("figures/bp_wd.pdf", height = 3.375, width = 12, pointsize = 10)
par(mai = c(.5,.6,.1,.1), mgp = c(2.4, 1, 0)) par(mai = c(.5,.6,.1,.1), mgp = c(2.4, 1, 0))
barplot(proportions(xtabs( ~ wd + artwork, datlogs), margin = "wd"), barplot(proportions(xtabs( ~ wd + artwork, datlogs), margin = "wd"),
@ -542,7 +542,7 @@ alog <- bupaR::activitylog(datlogs[which(datlogs$wds == "vacation"),],
processmapR::process_map(alog, processmapR::frequency("relative")) processmapR::process_map(alog, processmapR::frequency("relative"))
# Are the same artworks looked at? # Are the same artworks looked at?
pdf("results/figures/bp_wds.pdf", height = 3.375, width = 12, pointsize = 10) pdf("figures/bp_wds.pdf", height = 3.375, width = 12, pointsize = 10)
par(mai = c(.5,.6,.1,.1), mgp = c(2.4, 1, 0)) par(mai = c(.5,.6,.1,.1), mgp = c(2.4, 1, 0))
#barplot(xtabs( ~ wds + artwork, datlogs), las = 2, beside = TRUE, #barplot(xtabs( ~ wds + artwork, datlogs), las = 2, beside = TRUE,
@ -573,7 +573,7 @@ alog <- bupaR::activitylog(datlogs[which(datlogs$corona == "post"),],
processmapR::process_map(alog, processmapR::frequency("relative")) processmapR::process_map(alog, processmapR::frequency("relative"))
# Are the same artworks looked at? # Are the same artworks looked at?
pdf("results/figures/bp_corona.pdf", height = 3.375, width = 12, pointsize = 10) pdf("figures/bp_corona.pdf", height = 3.375, width = 12, pointsize = 10)
par(mai = c(.5,.6,.1,.1), mgp = c(2.4, 1, 0)) par(mai = c(.5,.6,.1,.1), mgp = c(2.4, 1, 0))
barplot(proportions(xtabs( ~ corona + artwork, datlogs), margin = "corona"), barplot(proportions(xtabs( ~ corona + artwork, datlogs), margin = "corona"),
@ -618,7 +618,7 @@ alog <- bupaR::activitylog(datlogs[datlogs$artwork %in% often080, ],
processmapR::process_map(alog, processmapR::frequency("relative")) processmapR::process_map(alog, processmapR::frequency("relative"))
pdf("results/figures/traceexplore_case-artwork_often080.pdf", height = 8, width = 12, pointsize = 10) pdf("figures/traceexplore_case-artwork_often080.pdf", height = 8, width = 12, pointsize = 10)
processmapR::trace_explorer(alog, processmapR::trace_explorer(alog,
n_traces = 30, type = "frequent", n_traces = 30, type = "frequent",

View File

@ -4,9 +4,9 @@
# (2) Normative net # (2) Normative net
# #
# input: -- # input: --
# output: results/normative_petrinet.pnml # output: ../results/normative_petrinet.pnml
# results/processmaps/normative_petrinet.png # ../results/processmaps/normative_petrinet.png
# results/processmaps/normative_bpmn.png # ../results/processmaps/normative_bpmn.png
# #
# last mod: 2024-03-22 # last mod: 2024-03-22
@ -151,12 +151,12 @@ final_marking[sink] = 1
pm4py.view_petri_net(net, initial_marking, final_marking) pm4py.view_petri_net(net, initial_marking, final_marking)
pm4py.write_pnml(net, initial_marking, final_marking, pm4py.write_pnml(net, initial_marking, final_marking,
"results/normative_petrinet.pnml") "../results/normative_petrinet.pnml")
pm4py.vis.save_vis_petri_net(net, initial_marking, final_marking, pm4py.vis.save_vis_petri_net(net, initial_marking, final_marking,
"results/processmaps/normative_petrinet.png") "../results/processmaps/normative_petrinet.png")
bpmn = pm4py.convert.convert_to_bpmn(net, initial_marking, final_marking) bpmn = pm4py.convert.convert_to_bpmn(net, initial_marking, final_marking)
pm4py.view_bpmn(bpmn) pm4py.view_bpmn(bpmn)
pm4py.vis.save_vis_bpmn(bpmn, "results/processmaps/normative_bpmn.png") pm4py.vis.save_vis_bpmn(bpmn, "../results/processmaps/normative_bpmn.png")

View File

@ -3,10 +3,10 @@
# content: (1) Load data and create event log # content: (1) Load data and create event log
# (2) Check against normative Petri Net # (2) Check against normative Petri Net
# #
# input: results/event_logfiles_2024-02-21_16-07-33.csv # input: ../results/event_logfiles_2024-02-21_16-07-33.csv
# results/normative_petrinet.pnml # ../results/normative_petrinet.pnml
# output: results/eval_all-miners_complete.csv # output: ../results/eval_all-miners_complete.csv
# results/eval_all-miners_clean.csv # ../results/eval_all-miners_clean.csv
# ../../thesis/figures/petrinet_normative.png # ../../thesis/figures/petrinet_normative.png
# ../../thesis/figures/petrinet_heuristics_clean.png # ../../thesis/figures/petrinet_heuristics_clean.png
# ../../thesis/figures/petrinet_alpha_clean.png # ../../thesis/figures/petrinet_alpha_clean.png
@ -18,7 +18,7 @@
# ../../thesis/figures/bpmn_alpha_clean.png # ../../thesis/figures/bpmn_alpha_clean.png
# ../../thesis/figures/bpmn_heuristics_clean.png # ../../thesis/figures/bpmn_heuristics_clean.png
# #
# last mod: 2024-03-22 # last mod: 2024-04-17
import pm4py import pm4py
import pandas as pd import pandas as pd
@ -28,7 +28,7 @@ from python_helpers import eval_pm, pn_infos_miner
#--------------- (1) Load data and create event logs --------------- #--------------- (1) Load data and create event logs ---------------
dat = pd.read_csv("results/event_logfiles_2024-02-21_16-07-33.csv", sep = ";") dat = pd.read_csv("../results/event_logfiles_2024-02-21_16-07-33.csv", sep = ";")
event_log = pm4py.format_dataframe(dat, case_id = "path", event_log = pm4py.format_dataframe(dat, case_id = "path",
activity_key = "event", activity_key = "event",
@ -58,7 +58,7 @@ sorted_variants_no_move = dict(sorted(variants_no_move.items(), key=lambda item:
#--------------- (2) Check against normative Petri Net --------------- #--------------- (2) Check against normative Petri Net ---------------
basenet, initial_marking, final_marking = pm4py.read_pnml("results/normative_petrinet.pnml") basenet, initial_marking, final_marking = pm4py.read_pnml("../results/normative_petrinet.pnml")
# TBR # TBR
replayed_traces = pm4py.conformance_diagnostics_token_based_replay(event_log, basenet, initial_marking, final_marking) replayed_traces = pm4py.conformance_diagnostics_token_based_replay(event_log, basenet, initial_marking, final_marking)
@ -101,7 +101,7 @@ eval = pd.DataFrame(columns = ["fitness", "precision", "generalizability",
for miner in ["normative", "alpha", "heuristics", "inductive", "ilp"]: for miner in ["normative", "alpha", "heuristics", "inductive", "ilp"]:
eval = pd.concat([eval, pn_infos_miner(event_log, miner)]) eval = pd.concat([eval, pn_infos_miner(event_log, miner)])
eval.to_csv("results/eval_all-miners_complete.csv", sep = ";") eval.to_csv("../results/eval_all-miners_complete.csv", sep = ";")
## Without broken trace ## Without broken trace
event_log_clean = event_log[event_log["@@case_index"] != index_broken[0]] event_log_clean = event_log[event_log["@@case_index"] != index_broken[0]]
@ -113,7 +113,7 @@ eval_clean = pd.DataFrame(columns = ["fitness", "precision", "generalizability",
for miner in ["normative", "alpha", "heuristics", "inductive", "ilp"]: for miner in ["normative", "alpha", "heuristics", "inductive", "ilp"]:
eval_clean = pd.concat([eval_clean, pn_infos_miner(event_log_clean, miner)]) eval_clean = pd.concat([eval_clean, pn_infos_miner(event_log_clean, miner)])
eval_clean.to_csv("results/eval_all-miners_clean.csv", sep = ";") eval_clean.to_csv("../results/eval_all-miners_clean.csv", sep = ";")
## Directly-follows graph ## Directly-follows graph
dfg, start_activities, end_activities = pm4py.discover_dfg(event_log_clean) dfg, start_activities, end_activities = pm4py.discover_dfg(event_log_clean)

View File

@ -9,11 +9,11 @@
# results/raw_logfiles_2024-02-21_16-07-33.csv # results/raw_logfiles_2024-02-21_16-07-33.csv
# output: results/eventlogs_pre-corona_cleaned.RData # output: results/eventlogs_pre-corona_cleaned.RData
# results/eventlogs_pre-corona_cleaned.csv # results/eventlogs_pre-corona_cleaned.csv
# ../../thesis/figures/dfg_complete_WFnet_R.pdf # ../thesis/figures/dfg_complete_WFnet_R.pdf
# #
# last mod: 2024-03-23 # last mod: 2024-03-23
# setwd("C:/Users/nwickelmaier/Nextcloud/Documents/MDS/2023ss/60100_master_thesis/analysis/code") # setwd("C:/Users/nwickelmaier/Nextcloud/Documents/MDS/2023ss/60100_master_thesis/analysis/")
#--------------- (1) Look at broken trace --------------- #--------------- (1) Look at broken trace ---------------
@ -85,7 +85,7 @@ dfg <- processmapR::process_map(alog,
render = FALSE) render = FALSE)
processmapR::export_map(dfg, processmapR::export_map(dfg,
file_name = paste0("../../thesis/figures/dfg_complete_WFnet_R.pdf"), file_name = paste0("../thesis/figures/dfg_complete_WFnet_R.pdf"),
file_type = "pdf") file_type = "pdf")
rm(tmp) rm(tmp)

View File

@ -3,10 +3,10 @@
# content: (1) Load data and create event log # content: (1) Load data and create event log
# (2) Infos for items # (2) Infos for items
# #
# input: results/eventlogs_pre-corona_cleaned.csv # input: ../results/eventlogs_pre-corona_cleaned.csv
# output: results/pn_infos_items.csv # output: ../results/pn_infos_items.csv
# #
# last mod: 2024-03-22 # last mod: 2024-04-17
import pm4py import pm4py
import pandas as pd import pandas as pd
@ -16,7 +16,7 @@ from python_helpers import eval_pm, pn_infos
#--------------- (1) Load data and create event logs --------------- #--------------- (1) Load data and create event logs ---------------
dat = pd.read_csv("results/eventlogs_pre-corona_cleaned", sep = ";") dat = pd.read_csv("../results/eventlogs_pre-corona_cleaned", sep = ";")
log_path = pm4py.format_dataframe(dat, case_id = "path", activity_key = "event", log_path = pm4py.format_dataframe(dat, case_id = "path", activity_key = "event",
timestamp_key = "date.start") timestamp_key = "date.start")
@ -33,5 +33,5 @@ for item in log_path.item.unique().tolist():
eval = eval.sort_index() eval = eval.sort_index()
# Export # Export
eval.to_csv("results/pn_infos_items.csv", sep = ";") eval.to_csv("../results/pn_infos_items.csv", sep = ";")

View File

@ -10,13 +10,13 @@
# input: results/eventlogs_pre-corona_cleaned.RData # input: results/eventlogs_pre-corona_cleaned.RData
# results/pn_infos_items.csv # results/pn_infos_items.csv
# output: results/eventlogs_pre-corona_item-clusters.csv # output: results/eventlogs_pre-corona_item-clusters.csv
# ../../thesis/figures/data/clustering_items.RData" # ../thesis/results/clustering_items.RData"
# #
# last mod: 2024-03-22 # last mod: 2024-03-22
# setwd("C:/Users/nwickelmaier/Nextcloud/Documents/MDS/2023ss/60100_master_thesis/analysis/code") # setwd("C:/Users/nwickelmaier/Nextcloud/Documents/MDS/2023ss/60100_master_thesis/analysis/")
source("R_helpers.R") source("code/R_helpers.R")
#--------------- (1) Read data --------------- #--------------- (1) Read data ---------------
@ -164,5 +164,5 @@ write.table(res,
# Save data for plots and tables # Save data for plots and tables
save(hc, k, res, dist_mat, datitem, df, save(hc, k, res, dist_mat, datitem, df,
file = "../../thesis/figures/data/clustering_items.RData") file = "../thesis/results/clustering_items.RData")

View File

@ -9,11 +9,11 @@
# output: results/dataframes_case_2019.RData # output: results/dataframes_case_2019.RData
# results/centrality_cases.RData # results/centrality_cases.RData
# #
# last mod: 2024-03-22 # last mod: 2024-04-17
# setwd("C:/Users/nwickelmaier/Nextcloud/Documents/MDS/2023ss/60100_master_thesis/analysis/code") # setwd("C:/Users/nwickelmaier/Nextcloud/Documents/MDS/2023ss/60100_master_thesis/analysis/")
source("R_helpers.R") source("code/R_helpers.R")
#--------------- (1) Read data --------------- #--------------- (1) Read data ---------------

View File

@ -7,13 +7,14 @@
# (5) Fit tree # (5) Fit tree
# #
# input: results/event_logfiles_2024-02-21_16-07-33.csv # input: results/event_logfiles_2024-02-21_16-07-33.csv
# output: -- # output: ../thesis/results/clustering_cases_2018.RData
# ../thesis/results/clustering_cases.RData
# #
# last mod: 2024-03-22 # last mod: 2024-04-17
# setwd("C:/Users/nwickelmaier/Nextcloud/Documents/MDS/2023ss/60100_master_thesis/analysis/code") # setwd("C:/Users/nwickelmaier/Nextcloud/Documents/MDS/2023ss/60100_master_thesis/analysis/")
source("R_helpers.R") source("code/R_helpers.R")
#--------------- (1) Read data --------------- #--------------- (1) Read data ---------------
@ -113,8 +114,8 @@ dattree18$AvDurItemNorm <- normalize(dattree18$AvDurItem)
#--------------- (4) Clustering --------------- #--------------- (4) Clustering ---------------
df <- dattree18[, c("PropItems", "SearchInfo", "PropMoves", "AvDurItemNorm", df <- dattree18[, c("PropItems", "SearchInfo", "PropMoves",
"Pattern", "InfocardOnly")] "AvDurItemNorm", "Pattern", "InfocardOnly")]
dist_mat18 <- cluster::daisy(df, metric = "gower") dist_mat18 <- cluster::daisy(df, metric = "gower")
@ -145,7 +146,7 @@ aggregate(. ~ cluster18, df, mean)
aggregate(. ~ cluster18, dattree18[, -1], mean) aggregate(. ~ cluster18, dattree18[, -1], mean)
save(coor_2d_18, coor_3d_18, cluster18, dattree18, dist_mat18, hc18, save(coor_2d_18, coor_3d_18, cluster18, dattree18, dist_mat18, hc18,
file = "../../thesis/figures/data/clustering_cases_2018.RData") file = "../thesis/results/clustering_cases_2018.RData")
#--------------- (5) Fit tree --------------- #--------------- (5) Fit tree ---------------
@ -161,7 +162,7 @@ plot(partykit::as.party(c1), tp_args = list(fill = mycols, col = mycols))
## Load data ## Load data
load("../../thesis/figures/data/clustering_cases.RData") load("../thesis/results/clustering_cases.RData")
c19 <- rpart::rpart(as.factor(cluster) ~ ., data = dattree[, c("PropMoves", c19 <- rpart::rpart(as.factor(cluster) ~ ., data = dattree[, c("PropMoves",
"PropItems", "PropItems",

View File

@ -5,12 +5,16 @@
# (3) Investigate variants (2019) # (3) Investigate variants (2019)
# #
# input: results/eventlogs_pre-corona_cleaned.RData # input: results/eventlogs_pre-corona_cleaned.RData
# output: ../../thesis/figures/freq-traces.pdf # output: ../thesis/figures/freq-traces.pdf
# ../../thesis/figures/freq-traces_powerlaw.pdf # ../thesis/figures/freq-traces_powerlaw.pdf
# ../thesis/figures/freq-traces_powerlaw_bw.pdf
# ../thesis/figures/freq-traces_2019.pdf
# ../thesis/figures/freq-traces_powerlaw_2019.pdf
# ../thesis/figures/freq-traces_powerlaw_2019_bw.pdf
# #
# last mod: 2024-03-30 # last mod: 2024-04-17
# setwd("C:/Users/nwickelmaier/Nextcloud/Documents/MDS/2023ss/60100_master_thesis/analysis/code") # setwd("C:/Users/nwickelmaier/Nextcloud/Documents/MDS/2023ss/60100_master_thesis/analysis/")
library(bupaverse) library(bupaverse)
@ -29,7 +33,7 @@ alog <- activitylog(dat,
resource_id = "path", resource_id = "path",
timestamps = c("start", "complete")) timestamps = c("start", "complete"))
pdf("../../thesis/figures/freq-traces.pdf", height = 7, width = 6, pointsize = 10) pdf("../thesis/figures/freq-traces.pdf", height = 7, width = 6, pointsize = 10)
trace_explorer(alog, n_traces = 25) trace_explorer(alog, n_traces = 25)
# --> sequences of artworks are just too rare # --> sequences of artworks are just too rare
dev.off() dev.off()
@ -56,7 +60,7 @@ y <- as.numeric(tab)
p1 <- lm(log(y) ~ log(x)) p1 <- lm(log(y) ~ log(x))
pre <- exp(coef(p1)[1]) * x^coef(p1)[2] pre <- exp(coef(p1)[1]) * x^coef(p1)[2]
pdf("../../thesis/figures/freq-traces_powerlaw.pdf", height = 3.375, pdf("../thesis/figures/freq-traces_powerlaw.pdf", height = 3.375,
width = 3.375, pointsize = 10) width = 3.375, pointsize = 10)
par(mai = c(.6,.6,.1,.1), mgp = c(2.4, 1, 0)) par(mai = c(.6,.6,.1,.1), mgp = c(2.4, 1, 0))
@ -69,7 +73,7 @@ legend("topright", paste0("Proportion of traces only occurring once: ",
dev.off() dev.off()
# Black and white # Black and white
pdf("../../thesis/figures/freq-traces_powerlaw_bw.pdf", height = 3.375, pdf("../thesis/figures/freq-traces_powerlaw_bw.pdf", height = 3.375,
width = 3.375, pointsize = 10) width = 3.375, pointsize = 10)
par(mai = c(.6,.6,.1,.1), mgp = c(2.4, 1, 0)) par(mai = c(.6,.6,.1,.1), mgp = c(2.4, 1, 0))
@ -99,7 +103,7 @@ alog <- activitylog(dat,
resource_id = "path", resource_id = "path",
timestamps = c("start", "complete")) timestamps = c("start", "complete"))
pdf("../../thesis/figures/freq-traces_2019.pdf", height = 7, width = 6, pointsize = 10) pdf("../thesis/figures/freq-traces_2019.pdf", height = 7, width = 6, pointsize = 10)
trace_explorer(alog, n_traces = 25) trace_explorer(alog, n_traces = 25)
dev.off() dev.off()
@ -125,7 +129,7 @@ y <- as.numeric(tab)
p1 <- lm(log(y) ~ log(x)) p1 <- lm(log(y) ~ log(x))
pre <- exp(coef(p1)[1]) * x^coef(p1)[2] pre <- exp(coef(p1)[1]) * x^coef(p1)[2]
pdf("../../thesis/figures/freq-traces_powerlaw_2019.pdf", height = 3.375, pdf("../thesis/figures/freq-traces_powerlaw_2019.pdf", height = 3.375,
width = 3.375, pointsize = 10) width = 3.375, pointsize = 10)
par(mai = c(.6,.6,.1,.1), mgp = c(2.4, 1, 0)) par(mai = c(.6,.6,.1,.1), mgp = c(2.4, 1, 0))
@ -138,7 +142,7 @@ legend("topright", paste0("Proportion of traces only occurring once: ",
dev.off() dev.off()
# Black and white # Black and white
pdf("../../thesis/figures/freq-traces_powerlaw_2019_bw.pdf", height = 3.375, pdf("../thesis/figures/freq-traces_powerlaw_2019_bw.pdf", height = 3.375,
width = 3.375, pointsize = 10) width = 3.375, pointsize = 10)
par(mai = c(.6,.6,.1,.1), mgp = c(2.4, 1, 0)) par(mai = c(.6,.6,.1,.1), mgp = c(2.4, 1, 0))

View File

@ -4,15 +4,16 @@
# (2) Export DFGs for clusters # (2) Export DFGs for clusters
# #
# input: results/user-navigation.RData # input: results/user-navigation.RData
# output: ../../thesis/figures/dfg_cases_cluster1_R.pdf # output: ../thesis/figures/dfg_cases_cluster1_R.pdf
# ../../thesis/figures/dfg_cases_cluster2_R.pdf # ../thesis/figures/dfg_cases_cluster2_R.pdf
# ../../thesis/figures/dfg_cases_cluster3_R.pdf # ../thesis/figures/dfg_cases_cluster3_R.pdf
# ../../thesis/figures/dfg_cases_cluster4_R.pdf # ../thesis/figures/dfg_cases_cluster4_R.pdf
# ../../thesis/figures/dfg_cases_cluster5_R.pdf # ../thesis/figures/dfg_cases_cluster5_R.pdf
# ../thesis/results/dfgs_case-cluster.RData
# #
# last mod: 2024-03-30 # last mod: 2024-04-17
# setwd("C:/Users/nwickelmaier/Nextcloud/Documents/MDS/2023ss/60100_master_thesis/analysis/code") # setwd("C:/Users/nwickelmaier/Nextcloud/Documents/MDS/2023ss/60100_master_thesis/analysis/")
#--------------- (1) Read data --------------- #--------------- (1) Read data ---------------
@ -75,7 +76,7 @@ for (i in 1:5) {
} }
processmapR::export_map(dfg, processmapR::export_map(dfg,
file_name = paste0("../../thesis/figures/dfg_cases_cluster_", cl_names[i], "_R.pdf"), file_name = paste0("../thesis/figures/dfg_cases_cluster_", cl_names[i], "_R.pdf"),
file_type = "pdf") file_type = "pdf")
} }
@ -109,7 +110,7 @@ for (i in 1:5) {
} }
processmapR::export_map(dfg, processmapR::export_map(dfg,
file_name = paste0("../../thesis/figures/dfg_cases_cluster_", cl_names[i], "_R_bw.pdf"), file_name = paste0("../thesis/figures/dfg_cases_cluster_", cl_names[i], "_R_bw.pdf"),
file_type = "pdf") file_type = "pdf")
} }
@ -147,5 +148,5 @@ for (i in 1:5) {
} }
save(ns, n_cases, tr_unique, perc_filter, save(ns, n_cases, tr_unique, perc_filter,
file = "../../thesis/figures/data/dfgs_case-cluster.RData") file = "../thesis/results/dfgs_case-cluster.RData")

View File

@ -3,10 +3,10 @@
# content: (1) Load data and create event log # content: (1) Load data and create event log
# (2) Evaluation for clusters # (2) Evaluation for clusters
# #
# input: results/eventlogs_2019_case-clusters.csv # input: ../results/eventlogs_2019_case-clusters.csv
# output: results/eval_case_clusters.csv # output: ../results/eval_case_clusters_<threshold>.csv
# #
# last mod: 2024-04-04 # last mod: 2024-04-17
import pm4py import pm4py
import pandas as pd import pandas as pd
@ -15,7 +15,7 @@ from python_helpers import eval_pm, eval_append
#--------------- (1) Load data and create event logs --------------- #--------------- (1) Load data and create event logs ---------------
dat = pd.read_csv("results/eventlogs_2019_case-clusters.csv", sep = ";") dat = pd.read_csv("../results/eventlogs_2019_case-clusters.csv", sep = ";")
event_log = pm4py.format_dataframe(dat, event_log = pm4py.format_dataframe(dat,
case_id = "case", case_id = "case",
@ -49,5 +49,5 @@ for nt in thresholds:
# pm4py.vis.save_vis_bpmn(bpmn, "results/processmaps/bpmn_cluster" + str(cluster) + "_cases" + str(int(nt*10)).zfill(2) + ".png") # pm4py.vis.save_vis_bpmn(bpmn, "results/processmaps/bpmn_cluster" + str(cluster) + "_cases" + str(int(nt*10)).zfill(2) + ".png")
eval.index = ["Complete", "Cluster 1", "Cluster 2", "Cluster 3", "Cluster 4", "Cluster 5"] eval.index = ["Complete", "Cluster 1", "Cluster 2", "Cluster 3", "Cluster 4", "Cluster 5"]
eval.to_csv("results/eval_case_clusters_" + str(int(nt*10)).zfill(2) + ".csv", sep = ";") eval.to_csv("../results/eval_case_clusters_" + str(int(nt*10)).zfill(2) + ".csv", sep = ";")