Updated file structure: results and figures are now on top level of project; adjusted paths, but some might still be broken
This commit is contained in:
parent
d4f5aa427d
commit
7f0b0d44ad
@ -4,16 +4,16 @@
|
||||
# (2) Create event logs
|
||||
# (3) Add meta data
|
||||
#
|
||||
# input: raw log files from ../data/haum/*.log
|
||||
# ../data/metadata/feiertage.csv
|
||||
# ../data/metadata/schulferien_2016-2018_NI.csv
|
||||
# ../data/metadata/schulferien_2019-2025_NI.csv
|
||||
# input: raw log files from data/haum/*.log
|
||||
# data/metadata/feiertage.csv
|
||||
# data/metadata/schulferien_2016-2018_NI.csv
|
||||
# data/metadata/schulferien_2019-2025_NI.csv
|
||||
# output: results/raw_logfiles_<timestamp>.csv
|
||||
# results/event_logfiles_<timestamp>.csv
|
||||
#
|
||||
# last mod: 2024-02-23, NW
|
||||
# last mod: 2024-04-17
|
||||
|
||||
# setwd("C:/Users/nwickelmaier/Nextcloud/Documents/MDS/2023ss/60100_master_thesis/analysis/code")
|
||||
# setwd("C:/Users/nwickelmaier/Nextcloud/Documents/MDS/2023ss/60100_master_thesis/analysis/")
|
||||
|
||||
#library(mtt)
|
||||
devtools::load_all("../../../../../software/mtt")
|
||||
@ -22,7 +22,7 @@ now <- format(Sys.time(), "%Y-%m-%d_%H-%M-%S")
|
||||
|
||||
#--------------- (1) Parse raw log files ---------------
|
||||
|
||||
path <- "../data/haum/LogFiles/"
|
||||
path <- "data/haum/LogFiles/"
|
||||
folders <- dir(path)
|
||||
#folders <- "2016"
|
||||
|
||||
@ -73,7 +73,7 @@ datlogs$topic <- factor(datlogs$topic, levels = 0:7,
|
||||
|
||||
## Read data for holiday
|
||||
|
||||
hd0 <- read.table("../data/metadata/feiertage.csv", sep = ";", header = TRUE)
|
||||
hd0 <- read.table("data/metadata/feiertage.csv", sep = ";", header = TRUE)
|
||||
hd0$X.br. <- NULL
|
||||
|
||||
hd <- hd0[hd0$Abkuerzung == "NI", ]
|
||||
@ -100,9 +100,9 @@ hd$stateCode <- NULL
|
||||
# write.table(dat, "data/metadata/schulferien_2019-2025_NI.csv", sep = ";",
|
||||
# row.names = FALSE, quote = FALSE)
|
||||
|
||||
sf1 <- read.table("../data/metadata/schulferien_2016-2018_NI.csv", sep = ";",
|
||||
sf1 <- read.table("data/metadata/schulferien_2016-2018_NI.csv", sep = ";",
|
||||
header = TRUE)
|
||||
sf2 <- read.table("../data/metadata/schulferien_2019-2025_NI.csv", sep = ";",
|
||||
sf2 <- read.table("data/metadata/schulferien_2019-2025_NI.csv", sep = ";",
|
||||
header = TRUE)
|
||||
sf <- rbind(sf1, sf2)
|
||||
sf$start <- as.Date(sf$start)
|
||||
|
@ -11,25 +11,25 @@
|
||||
#
|
||||
# input: results/event_logfiles_2024-02-21_16-07-33.csv
|
||||
# results/raw_logfiles_2024-02-21_16-07-33.csv
|
||||
# output: results/figures/counts_item_firsttouch.pdf
|
||||
# results/figures/duration.pdf
|
||||
# results/figures/heatmap_start.pdf
|
||||
# results/figures/heatmap_stop.pdf
|
||||
# results/figures/timeMs.pdf
|
||||
# results/figures/xycoord.pdf
|
||||
# results/figures/event-dist.pdf
|
||||
# results/figures/traceexplore_trace-event.pdf
|
||||
# results/figures/ra_trace-event.pdf
|
||||
# results/figures/traceexplore_case-event.pdf
|
||||
# results/figures/bp_tod.pdf
|
||||
# results/figures/bp_wd.pdf
|
||||
# results/figures/bp_wds.pdf
|
||||
# results/figures/bp_corona.pdf
|
||||
# results/figures/traceexplore_case-artwork_often080.pdf
|
||||
# output: figures/counts_item_firsttouch.pdf
|
||||
# figures/duration.pdf
|
||||
# figures/heatmap_start.pdf
|
||||
# figures/heatmap_stop.pdf
|
||||
# figures/timeMs.pdf
|
||||
# figures/xycoord.pdf
|
||||
# figures/event-dist.pdf
|
||||
# figures/traceexplore_trace-event.pdf
|
||||
# figures/ra_trace-event.pdf
|
||||
# figures/traceexplore_case-event.pdf
|
||||
# figures/bp_tod.pdf
|
||||
# figures/bp_wd.pdf
|
||||
# figures/bp_wds.pdf
|
||||
# figures/bp_corona.pdf
|
||||
# figures/traceexplore_case-artwork_often080.pdf
|
||||
#
|
||||
# last mod: 2024-03-28
|
||||
# last mod: 2024-04-17
|
||||
|
||||
# setwd("C:/Users/nwickelmaier/Nextcloud/Documents/MDS/2023ss/60100_master_thesis/analysis/code")
|
||||
# setwd("C:/Users/nwickelmaier/Nextcloud/Documents/MDS/2023ss/60100_master_thesis/analysis/")
|
||||
|
||||
#--------------- (1) Read data ---------------
|
||||
|
||||
@ -92,7 +92,7 @@ items <- unique(datlogs$item)
|
||||
#items <- items[!items %in% c("504", "505")]
|
||||
datart <- mtt::extract_artworks(items,
|
||||
paste0(items, ".xml"),
|
||||
"../data/haum/ContentEyevisit/eyevisit_cards_light/")
|
||||
"data/haum/ContentEyevisit/eyevisit_cards_light/")
|
||||
datart <- datart[order(datart$artwork), ]
|
||||
names(counts_item) <- datart$title
|
||||
|
||||
@ -110,7 +110,7 @@ text(tmp, counts_case + 100, datart$item)
|
||||
|
||||
counts <- rbind(counts_item, counts_case)
|
||||
|
||||
pdf("results/figures/counts_item_firsttouch.pdf",
|
||||
pdf("figures/counts_item_firsttouch.pdf",
|
||||
width = 20, height = 10, pointsize = 10)
|
||||
par(mai = c(5, .6, .1, .1))
|
||||
|
||||
@ -135,7 +135,7 @@ lattice::bwplot(duration / 1000 / 60 ~ event, datlogs)
|
||||
|
||||
set.seed(1027)
|
||||
|
||||
pdf("results/figures/duration.pdf", width = 5, height = 5, pointsize = 10)
|
||||
pdf("figures/duration.pdf", width = 5, height = 5, pointsize = 10)
|
||||
lattice::bwplot(I(duration/1000/60) ~ event, datlogs[sample(nrow(datlogs), 100000), ],
|
||||
ylab = "Duration in min")
|
||||
dev.off()
|
||||
@ -206,7 +206,7 @@ tab.start <- xtabs( ~ x.start.cat + y.start.cat, dattrim)
|
||||
colnames(tab.start) <- NULL
|
||||
rownames(tab.start) <- NULL
|
||||
|
||||
pdf("results/figures/heatmap_start.pdf", width = 5, height = 5, pointsize = 10)
|
||||
pdf("figures/heatmap_start.pdf", width = 5, height = 5, pointsize = 10)
|
||||
heatmap(tab.start, Rowv = NA, Colv = NA)
|
||||
dev.off()
|
||||
|
||||
@ -217,7 +217,7 @@ tab.stop <- xtabs( ~ x.stop.cat + y.stop.cat, dattrim)
|
||||
colnames(tab.stop) <- NULL
|
||||
rownames(tab.stop) <- NULL
|
||||
|
||||
pdf("results/figures/heatmap_stop.pdf", width = 5, height = 5, pointsize = 10)
|
||||
pdf("figures/heatmap_stop.pdf", width = 5, height = 5, pointsize = 10)
|
||||
heatmap(tab.stop, Rowv = NA, Colv = NA)
|
||||
dev.off()
|
||||
|
||||
@ -244,7 +244,7 @@ plot(case ~ date, datcase, type = "h", col = "#434F4F")
|
||||
|
||||
## weird behavior of timeMs
|
||||
|
||||
pdf("results/figures/timeMs.pdf", width = 9, height = 6, pointsize = 10)
|
||||
pdf("figures/timeMs.pdf", width = 9, height = 6, pointsize = 10)
|
||||
#par(mai = c(.6, .6, .1, .1), mgp = c(2.4, 1, 0))
|
||||
#plot(timeMs.start ~ as.factor(fileId), datlogs[1:2000,], xlab = "fileId")
|
||||
lattice::bwplot(timeMs.start ~ as.factor(fileId.start), datlogs[1:2000,], xlab = "",
|
||||
@ -255,7 +255,7 @@ dev.off()
|
||||
|
||||
set.seed(1522)
|
||||
|
||||
pdf("results/figures/xycoord.pdf", width = 5, height = 5, pointsize = 10)
|
||||
pdf("figures/xycoord.pdf", width = 5, height = 5, pointsize = 10)
|
||||
par(mai = c(.6, .6, .1, .1), mgp = c(2.4, 1, 0))
|
||||
#par(mfrow = c(1, 2))
|
||||
plot(y.start ~ x.start, datlogs[sample(nrow(datlogs), 10000), ])
|
||||
@ -310,7 +310,7 @@ proportions(tab, margin = "event")
|
||||
|
||||
cc <- palette.colors(palette = "Okabe-Ito")[c(3,2,4,8)]
|
||||
|
||||
pdf("results/figures/event-dist.pdf", height = 3.375, width = 12, pointsize = 10)
|
||||
pdf("figures/event-dist.pdf", height = 3.375, width = 12, pointsize = 10)
|
||||
par(mai = c(.4,.4,.1,.1), mgp = c(2.4, 1, 0))
|
||||
|
||||
barplot(t(proportions(tab, margin = "item")), las = 2, col = c("#78004B", "#3CB4DC", "#91C86E", "#FF6900"),
|
||||
@ -364,7 +364,7 @@ processmapR::process_map(alogf, # alog,
|
||||
|
||||
alog_no_move <- alog[alog$event != "move", ]
|
||||
|
||||
pdf("results/figures/traceexplore_trace-event.pdf", height = 8, width = 12, pointsize = 10)
|
||||
pdf("figures/traceexplore_trace-event.pdf", height = 8, width = 12, pointsize = 10)
|
||||
set.seed(1447)
|
||||
processmapR::trace_explorer(alog_no_move[alog_no_move$path %in%
|
||||
sample(unique(alog_no_move$path), 400),],
|
||||
@ -372,7 +372,7 @@ processmapR::trace_explorer(alog_no_move[alog_no_move$path %in%
|
||||
abbreviate = T)
|
||||
dev.off()
|
||||
|
||||
pdf("results/figures/ra_trace-event.pdf", height = 8, width = 12, pointsize = 10)
|
||||
pdf("figures/ra_trace-event.pdf", height = 8, width = 12, pointsize = 10)
|
||||
|
||||
ra_no_move <- edeaR::resource_frequency(alog_no_move, "resource-activity")
|
||||
levels(ra_no_move$event) <- c("flipCard", "flipCard", "openTopic", "openPopup")
|
||||
@ -442,7 +442,7 @@ processmapR::process_map(alog, processmapR::frequency("relative"))
|
||||
|
||||
alog_no_move <- alog[alog$event != "move", ]
|
||||
|
||||
pdf("results/figures/traceexplore_case-event.pdf", height = 8, width = 12, pointsize = 10)
|
||||
pdf("figures/traceexplore_case-event.pdf", height = 8, width = 12, pointsize = 10)
|
||||
set.seed(1050)
|
||||
processmapR::trace_explorer(alog_no_move[alog_no_move$path %in%
|
||||
sample(unique(alog_no_move$path), 300),],
|
||||
@ -481,7 +481,7 @@ alog <- bupaR::activitylog(datlogs[datlogs$tod == "afternoon",],
|
||||
processmapR::process_map(alog, processmapR::frequency("relative"))
|
||||
|
||||
# Are the same artworks looked at?
|
||||
pdf("results/figures/bp_tod.pdf", height = 3.375, width = 12, pointsize = 10)
|
||||
pdf("figures/bp_tod.pdf", height = 3.375, width = 12, pointsize = 10)
|
||||
par(mai = c(.5,.6,.1,.1), mgp = c(2.4, 1, 0))
|
||||
|
||||
barplot(proportions(xtabs( ~ tod + artwork, datlogs), margin = "tod"), #col = cc[1:2],
|
||||
@ -511,7 +511,7 @@ alog <- bupaR::activitylog(datlogs[datlogs$wd == "weekday",],
|
||||
processmapR::process_map(alog, processmapR::frequency("relative"))
|
||||
|
||||
# Are the same artworks looked at?
|
||||
pdf("results/figures/bp_wd.pdf", height = 3.375, width = 12, pointsize = 10)
|
||||
pdf("figures/bp_wd.pdf", height = 3.375, width = 12, pointsize = 10)
|
||||
par(mai = c(.5,.6,.1,.1), mgp = c(2.4, 1, 0))
|
||||
|
||||
barplot(proportions(xtabs( ~ wd + artwork, datlogs), margin = "wd"),
|
||||
@ -542,7 +542,7 @@ alog <- bupaR::activitylog(datlogs[which(datlogs$wds == "vacation"),],
|
||||
processmapR::process_map(alog, processmapR::frequency("relative"))
|
||||
|
||||
# Are the same artworks looked at?
|
||||
pdf("results/figures/bp_wds.pdf", height = 3.375, width = 12, pointsize = 10)
|
||||
pdf("figures/bp_wds.pdf", height = 3.375, width = 12, pointsize = 10)
|
||||
par(mai = c(.5,.6,.1,.1), mgp = c(2.4, 1, 0))
|
||||
|
||||
#barplot(xtabs( ~ wds + artwork, datlogs), las = 2, beside = TRUE,
|
||||
@ -573,7 +573,7 @@ alog <- bupaR::activitylog(datlogs[which(datlogs$corona == "post"),],
|
||||
processmapR::process_map(alog, processmapR::frequency("relative"))
|
||||
|
||||
# Are the same artworks looked at?
|
||||
pdf("results/figures/bp_corona.pdf", height = 3.375, width = 12, pointsize = 10)
|
||||
pdf("figures/bp_corona.pdf", height = 3.375, width = 12, pointsize = 10)
|
||||
par(mai = c(.5,.6,.1,.1), mgp = c(2.4, 1, 0))
|
||||
|
||||
barplot(proportions(xtabs( ~ corona + artwork, datlogs), margin = "corona"),
|
||||
@ -618,7 +618,7 @@ alog <- bupaR::activitylog(datlogs[datlogs$artwork %in% often080, ],
|
||||
processmapR::process_map(alog, processmapR::frequency("relative"))
|
||||
|
||||
|
||||
pdf("results/figures/traceexplore_case-artwork_often080.pdf", height = 8, width = 12, pointsize = 10)
|
||||
pdf("figures/traceexplore_case-artwork_often080.pdf", height = 8, width = 12, pointsize = 10)
|
||||
|
||||
processmapR::trace_explorer(alog,
|
||||
n_traces = 30, type = "frequent",
|
||||
|
@ -4,9 +4,9 @@
|
||||
# (2) Normative net
|
||||
#
|
||||
# input: --
|
||||
# output: results/normative_petrinet.pnml
|
||||
# results/processmaps/normative_petrinet.png
|
||||
# results/processmaps/normative_bpmn.png
|
||||
# output: ../results/normative_petrinet.pnml
|
||||
# ../results/processmaps/normative_petrinet.png
|
||||
# ../results/processmaps/normative_bpmn.png
|
||||
#
|
||||
# last mod: 2024-03-22
|
||||
|
||||
@ -151,12 +151,12 @@ final_marking[sink] = 1
|
||||
|
||||
pm4py.view_petri_net(net, initial_marking, final_marking)
|
||||
pm4py.write_pnml(net, initial_marking, final_marking,
|
||||
"results/normative_petrinet.pnml")
|
||||
"../results/normative_petrinet.pnml")
|
||||
|
||||
pm4py.vis.save_vis_petri_net(net, initial_marking, final_marking,
|
||||
"results/processmaps/normative_petrinet.png")
|
||||
"../results/processmaps/normative_petrinet.png")
|
||||
|
||||
bpmn = pm4py.convert.convert_to_bpmn(net, initial_marking, final_marking)
|
||||
pm4py.view_bpmn(bpmn)
|
||||
|
||||
pm4py.vis.save_vis_bpmn(bpmn, "results/processmaps/normative_bpmn.png")
|
||||
pm4py.vis.save_vis_bpmn(bpmn, "../results/processmaps/normative_bpmn.png")
|
||||
|
@ -3,10 +3,10 @@
|
||||
# content: (1) Load data and create event log
|
||||
# (2) Check against normative Petri Net
|
||||
#
|
||||
# input: results/event_logfiles_2024-02-21_16-07-33.csv
|
||||
# results/normative_petrinet.pnml
|
||||
# output: results/eval_all-miners_complete.csv
|
||||
# results/eval_all-miners_clean.csv
|
||||
# input: ../results/event_logfiles_2024-02-21_16-07-33.csv
|
||||
# ../results/normative_petrinet.pnml
|
||||
# output: ../results/eval_all-miners_complete.csv
|
||||
# ../results/eval_all-miners_clean.csv
|
||||
# ../../thesis/figures/petrinet_normative.png
|
||||
# ../../thesis/figures/petrinet_heuristics_clean.png
|
||||
# ../../thesis/figures/petrinet_alpha_clean.png
|
||||
@ -18,7 +18,7 @@
|
||||
# ../../thesis/figures/bpmn_alpha_clean.png
|
||||
# ../../thesis/figures/bpmn_heuristics_clean.png
|
||||
#
|
||||
# last mod: 2024-03-22
|
||||
# last mod: 2024-04-17
|
||||
|
||||
import pm4py
|
||||
import pandas as pd
|
||||
@ -28,7 +28,7 @@ from python_helpers import eval_pm, pn_infos_miner
|
||||
|
||||
#--------------- (1) Load data and create event logs ---------------
|
||||
|
||||
dat = pd.read_csv("results/event_logfiles_2024-02-21_16-07-33.csv", sep = ";")
|
||||
dat = pd.read_csv("../results/event_logfiles_2024-02-21_16-07-33.csv", sep = ";")
|
||||
|
||||
event_log = pm4py.format_dataframe(dat, case_id = "path",
|
||||
activity_key = "event",
|
||||
@ -58,7 +58,7 @@ sorted_variants_no_move = dict(sorted(variants_no_move.items(), key=lambda item:
|
||||
|
||||
#--------------- (2) Check against normative Petri Net ---------------
|
||||
|
||||
basenet, initial_marking, final_marking = pm4py.read_pnml("results/normative_petrinet.pnml")
|
||||
basenet, initial_marking, final_marking = pm4py.read_pnml("../results/normative_petrinet.pnml")
|
||||
|
||||
# TBR
|
||||
replayed_traces = pm4py.conformance_diagnostics_token_based_replay(event_log, basenet, initial_marking, final_marking)
|
||||
@ -101,7 +101,7 @@ eval = pd.DataFrame(columns = ["fitness", "precision", "generalizability",
|
||||
for miner in ["normative", "alpha", "heuristics", "inductive", "ilp"]:
|
||||
eval = pd.concat([eval, pn_infos_miner(event_log, miner)])
|
||||
|
||||
eval.to_csv("results/eval_all-miners_complete.csv", sep = ";")
|
||||
eval.to_csv("../results/eval_all-miners_complete.csv", sep = ";")
|
||||
|
||||
## Without broken trace
|
||||
event_log_clean = event_log[event_log["@@case_index"] != index_broken[0]]
|
||||
@ -113,7 +113,7 @@ eval_clean = pd.DataFrame(columns = ["fitness", "precision", "generalizability",
|
||||
for miner in ["normative", "alpha", "heuristics", "inductive", "ilp"]:
|
||||
eval_clean = pd.concat([eval_clean, pn_infos_miner(event_log_clean, miner)])
|
||||
|
||||
eval_clean.to_csv("results/eval_all-miners_clean.csv", sep = ";")
|
||||
eval_clean.to_csv("../results/eval_all-miners_clean.csv", sep = ";")
|
||||
|
||||
## Directly-follows graph
|
||||
dfg, start_activities, end_activities = pm4py.discover_dfg(event_log_clean)
|
||||
|
@ -9,11 +9,11 @@
|
||||
# results/raw_logfiles_2024-02-21_16-07-33.csv
|
||||
# output: results/eventlogs_pre-corona_cleaned.RData
|
||||
# results/eventlogs_pre-corona_cleaned.csv
|
||||
# ../../thesis/figures/dfg_complete_WFnet_R.pdf
|
||||
# ../thesis/figures/dfg_complete_WFnet_R.pdf
|
||||
#
|
||||
# last mod: 2024-03-23
|
||||
|
||||
# setwd("C:/Users/nwickelmaier/Nextcloud/Documents/MDS/2023ss/60100_master_thesis/analysis/code")
|
||||
# setwd("C:/Users/nwickelmaier/Nextcloud/Documents/MDS/2023ss/60100_master_thesis/analysis/")
|
||||
|
||||
#--------------- (1) Look at broken trace ---------------
|
||||
|
||||
@ -85,7 +85,7 @@ dfg <- processmapR::process_map(alog,
|
||||
render = FALSE)
|
||||
|
||||
processmapR::export_map(dfg,
|
||||
file_name = paste0("../../thesis/figures/dfg_complete_WFnet_R.pdf"),
|
||||
file_name = paste0("../thesis/figures/dfg_complete_WFnet_R.pdf"),
|
||||
file_type = "pdf")
|
||||
|
||||
rm(tmp)
|
||||
|
@ -3,10 +3,10 @@
|
||||
# content: (1) Load data and create event log
|
||||
# (2) Infos for items
|
||||
#
|
||||
# input: results/eventlogs_pre-corona_cleaned.csv
|
||||
# output: results/pn_infos_items.csv
|
||||
# input: ../results/eventlogs_pre-corona_cleaned.csv
|
||||
# output: ../results/pn_infos_items.csv
|
||||
#
|
||||
# last mod: 2024-03-22
|
||||
# last mod: 2024-04-17
|
||||
|
||||
import pm4py
|
||||
import pandas as pd
|
||||
@ -16,7 +16,7 @@ from python_helpers import eval_pm, pn_infos
|
||||
|
||||
#--------------- (1) Load data and create event logs ---------------
|
||||
|
||||
dat = pd.read_csv("results/eventlogs_pre-corona_cleaned", sep = ";")
|
||||
dat = pd.read_csv("../results/eventlogs_pre-corona_cleaned", sep = ";")
|
||||
|
||||
log_path = pm4py.format_dataframe(dat, case_id = "path", activity_key = "event",
|
||||
timestamp_key = "date.start")
|
||||
@ -33,5 +33,5 @@ for item in log_path.item.unique().tolist():
|
||||
eval = eval.sort_index()
|
||||
|
||||
# Export
|
||||
eval.to_csv("results/pn_infos_items.csv", sep = ";")
|
||||
eval.to_csv("../results/pn_infos_items.csv", sep = ";")
|
||||
|
||||
|
@ -10,13 +10,13 @@
|
||||
# input: results/eventlogs_pre-corona_cleaned.RData
|
||||
# results/pn_infos_items.csv
|
||||
# output: results/eventlogs_pre-corona_item-clusters.csv
|
||||
# ../../thesis/figures/data/clustering_items.RData"
|
||||
# ../thesis/results/clustering_items.RData"
|
||||
#
|
||||
# last mod: 2024-03-22
|
||||
|
||||
# setwd("C:/Users/nwickelmaier/Nextcloud/Documents/MDS/2023ss/60100_master_thesis/analysis/code")
|
||||
# setwd("C:/Users/nwickelmaier/Nextcloud/Documents/MDS/2023ss/60100_master_thesis/analysis/")
|
||||
|
||||
source("R_helpers.R")
|
||||
source("code/R_helpers.R")
|
||||
|
||||
#--------------- (1) Read data ---------------
|
||||
|
||||
@ -164,5 +164,5 @@ write.table(res,
|
||||
# Save data for plots and tables
|
||||
|
||||
save(hc, k, res, dist_mat, datitem, df,
|
||||
file = "../../thesis/figures/data/clustering_items.RData")
|
||||
file = "../thesis/results/clustering_items.RData")
|
||||
|
||||
|
@ -9,11 +9,11 @@
|
||||
# output: results/dataframes_case_2019.RData
|
||||
# results/centrality_cases.RData
|
||||
#
|
||||
# last mod: 2024-03-22
|
||||
# last mod: 2024-04-17
|
||||
|
||||
# setwd("C:/Users/nwickelmaier/Nextcloud/Documents/MDS/2023ss/60100_master_thesis/analysis/code")
|
||||
# setwd("C:/Users/nwickelmaier/Nextcloud/Documents/MDS/2023ss/60100_master_thesis/analysis/")
|
||||
|
||||
source("R_helpers.R")
|
||||
source("code/R_helpers.R")
|
||||
|
||||
#--------------- (1) Read data ---------------
|
||||
|
||||
|
@ -7,13 +7,14 @@
|
||||
# (5) Fit tree
|
||||
#
|
||||
# input: results/event_logfiles_2024-02-21_16-07-33.csv
|
||||
# output: --
|
||||
# output: ../thesis/results/clustering_cases_2018.RData
|
||||
# ../thesis/results/clustering_cases.RData
|
||||
#
|
||||
# last mod: 2024-03-22
|
||||
# last mod: 2024-04-17
|
||||
|
||||
# setwd("C:/Users/nwickelmaier/Nextcloud/Documents/MDS/2023ss/60100_master_thesis/analysis/code")
|
||||
# setwd("C:/Users/nwickelmaier/Nextcloud/Documents/MDS/2023ss/60100_master_thesis/analysis/")
|
||||
|
||||
source("R_helpers.R")
|
||||
source("code/R_helpers.R")
|
||||
|
||||
#--------------- (1) Read data ---------------
|
||||
|
||||
@ -113,8 +114,8 @@ dattree18$AvDurItemNorm <- normalize(dattree18$AvDurItem)
|
||||
|
||||
#--------------- (4) Clustering ---------------
|
||||
|
||||
df <- dattree18[, c("PropItems", "SearchInfo", "PropMoves", "AvDurItemNorm",
|
||||
"Pattern", "InfocardOnly")]
|
||||
df <- dattree18[, c("PropItems", "SearchInfo", "PropMoves",
|
||||
"AvDurItemNorm", "Pattern", "InfocardOnly")]
|
||||
|
||||
dist_mat18 <- cluster::daisy(df, metric = "gower")
|
||||
|
||||
@ -145,7 +146,7 @@ aggregate(. ~ cluster18, df, mean)
|
||||
aggregate(. ~ cluster18, dattree18[, -1], mean)
|
||||
|
||||
save(coor_2d_18, coor_3d_18, cluster18, dattree18, dist_mat18, hc18,
|
||||
file = "../../thesis/figures/data/clustering_cases_2018.RData")
|
||||
file = "../thesis/results/clustering_cases_2018.RData")
|
||||
|
||||
#--------------- (5) Fit tree ---------------
|
||||
|
||||
@ -161,7 +162,7 @@ plot(partykit::as.party(c1), tp_args = list(fill = mycols, col = mycols))
|
||||
|
||||
|
||||
## Load data
|
||||
load("../../thesis/figures/data/clustering_cases.RData")
|
||||
load("../thesis/results/clustering_cases.RData")
|
||||
|
||||
c19 <- rpart::rpart(as.factor(cluster) ~ ., data = dattree[, c("PropMoves",
|
||||
"PropItems",
|
||||
|
@ -5,12 +5,16 @@
|
||||
# (3) Investigate variants (2019)
|
||||
#
|
||||
# input: results/eventlogs_pre-corona_cleaned.RData
|
||||
# output: ../../thesis/figures/freq-traces.pdf
|
||||
# ../../thesis/figures/freq-traces_powerlaw.pdf
|
||||
# output: ../thesis/figures/freq-traces.pdf
|
||||
# ../thesis/figures/freq-traces_powerlaw.pdf
|
||||
# ../thesis/figures/freq-traces_powerlaw_bw.pdf
|
||||
# ../thesis/figures/freq-traces_2019.pdf
|
||||
# ../thesis/figures/freq-traces_powerlaw_2019.pdf
|
||||
# ../thesis/figures/freq-traces_powerlaw_2019_bw.pdf
|
||||
#
|
||||
# last mod: 2024-03-30
|
||||
# last mod: 2024-04-17
|
||||
|
||||
# setwd("C:/Users/nwickelmaier/Nextcloud/Documents/MDS/2023ss/60100_master_thesis/analysis/code")
|
||||
# setwd("C:/Users/nwickelmaier/Nextcloud/Documents/MDS/2023ss/60100_master_thesis/analysis/")
|
||||
|
||||
library(bupaverse)
|
||||
|
||||
@ -29,7 +33,7 @@ alog <- activitylog(dat,
|
||||
resource_id = "path",
|
||||
timestamps = c("start", "complete"))
|
||||
|
||||
pdf("../../thesis/figures/freq-traces.pdf", height = 7, width = 6, pointsize = 10)
|
||||
pdf("../thesis/figures/freq-traces.pdf", height = 7, width = 6, pointsize = 10)
|
||||
trace_explorer(alog, n_traces = 25)
|
||||
# --> sequences of artworks are just too rare
|
||||
dev.off()
|
||||
@ -56,7 +60,7 @@ y <- as.numeric(tab)
|
||||
p1 <- lm(log(y) ~ log(x))
|
||||
pre <- exp(coef(p1)[1]) * x^coef(p1)[2]
|
||||
|
||||
pdf("../../thesis/figures/freq-traces_powerlaw.pdf", height = 3.375,
|
||||
pdf("../thesis/figures/freq-traces_powerlaw.pdf", height = 3.375,
|
||||
width = 3.375, pointsize = 10)
|
||||
par(mai = c(.6,.6,.1,.1), mgp = c(2.4, 1, 0))
|
||||
|
||||
@ -69,7 +73,7 @@ legend("topright", paste0("Proportion of traces only occurring once: ",
|
||||
dev.off()
|
||||
|
||||
# Black and white
|
||||
pdf("../../thesis/figures/freq-traces_powerlaw_bw.pdf", height = 3.375,
|
||||
pdf("../thesis/figures/freq-traces_powerlaw_bw.pdf", height = 3.375,
|
||||
width = 3.375, pointsize = 10)
|
||||
par(mai = c(.6,.6,.1,.1), mgp = c(2.4, 1, 0))
|
||||
|
||||
@ -99,7 +103,7 @@ alog <- activitylog(dat,
|
||||
resource_id = "path",
|
||||
timestamps = c("start", "complete"))
|
||||
|
||||
pdf("../../thesis/figures/freq-traces_2019.pdf", height = 7, width = 6, pointsize = 10)
|
||||
pdf("../thesis/figures/freq-traces_2019.pdf", height = 7, width = 6, pointsize = 10)
|
||||
trace_explorer(alog, n_traces = 25)
|
||||
dev.off()
|
||||
|
||||
@ -125,7 +129,7 @@ y <- as.numeric(tab)
|
||||
p1 <- lm(log(y) ~ log(x))
|
||||
pre <- exp(coef(p1)[1]) * x^coef(p1)[2]
|
||||
|
||||
pdf("../../thesis/figures/freq-traces_powerlaw_2019.pdf", height = 3.375,
|
||||
pdf("../thesis/figures/freq-traces_powerlaw_2019.pdf", height = 3.375,
|
||||
width = 3.375, pointsize = 10)
|
||||
par(mai = c(.6,.6,.1,.1), mgp = c(2.4, 1, 0))
|
||||
|
||||
@ -138,7 +142,7 @@ legend("topright", paste0("Proportion of traces only occurring once: ",
|
||||
dev.off()
|
||||
|
||||
# Black and white
|
||||
pdf("../../thesis/figures/freq-traces_powerlaw_2019_bw.pdf", height = 3.375,
|
||||
pdf("../thesis/figures/freq-traces_powerlaw_2019_bw.pdf", height = 3.375,
|
||||
width = 3.375, pointsize = 10)
|
||||
par(mai = c(.6,.6,.1,.1), mgp = c(2.4, 1, 0))
|
||||
|
||||
|
@ -4,15 +4,16 @@
|
||||
# (2) Export DFGs for clusters
|
||||
#
|
||||
# input: results/user-navigation.RData
|
||||
# output: ../../thesis/figures/dfg_cases_cluster1_R.pdf
|
||||
# ../../thesis/figures/dfg_cases_cluster2_R.pdf
|
||||
# ../../thesis/figures/dfg_cases_cluster3_R.pdf
|
||||
# ../../thesis/figures/dfg_cases_cluster4_R.pdf
|
||||
# ../../thesis/figures/dfg_cases_cluster5_R.pdf
|
||||
# output: ../thesis/figures/dfg_cases_cluster1_R.pdf
|
||||
# ../thesis/figures/dfg_cases_cluster2_R.pdf
|
||||
# ../thesis/figures/dfg_cases_cluster3_R.pdf
|
||||
# ../thesis/figures/dfg_cases_cluster4_R.pdf
|
||||
# ../thesis/figures/dfg_cases_cluster5_R.pdf
|
||||
# ../thesis/results/dfgs_case-cluster.RData
|
||||
#
|
||||
# last mod: 2024-03-30
|
||||
# last mod: 2024-04-17
|
||||
|
||||
# setwd("C:/Users/nwickelmaier/Nextcloud/Documents/MDS/2023ss/60100_master_thesis/analysis/code")
|
||||
# setwd("C:/Users/nwickelmaier/Nextcloud/Documents/MDS/2023ss/60100_master_thesis/analysis/")
|
||||
|
||||
#--------------- (1) Read data ---------------
|
||||
|
||||
@ -75,7 +76,7 @@ for (i in 1:5) {
|
||||
}
|
||||
|
||||
processmapR::export_map(dfg,
|
||||
file_name = paste0("../../thesis/figures/dfg_cases_cluster_", cl_names[i], "_R.pdf"),
|
||||
file_name = paste0("../thesis/figures/dfg_cases_cluster_", cl_names[i], "_R.pdf"),
|
||||
file_type = "pdf")
|
||||
}
|
||||
|
||||
@ -109,7 +110,7 @@ for (i in 1:5) {
|
||||
}
|
||||
|
||||
processmapR::export_map(dfg,
|
||||
file_name = paste0("../../thesis/figures/dfg_cases_cluster_", cl_names[i], "_R_bw.pdf"),
|
||||
file_name = paste0("../thesis/figures/dfg_cases_cluster_", cl_names[i], "_R_bw.pdf"),
|
||||
file_type = "pdf")
|
||||
}
|
||||
|
||||
@ -147,5 +148,5 @@ for (i in 1:5) {
|
||||
}
|
||||
|
||||
save(ns, n_cases, tr_unique, perc_filter,
|
||||
file = "../../thesis/figures/data/dfgs_case-cluster.RData")
|
||||
file = "../thesis/results/dfgs_case-cluster.RData")
|
||||
|
||||
|
@ -3,10 +3,10 @@
|
||||
# content: (1) Load data and create event log
|
||||
# (2) Evaluation for clusters
|
||||
#
|
||||
# input: results/eventlogs_2019_case-clusters.csv
|
||||
# output: results/eval_case_clusters.csv
|
||||
# input: ../results/eventlogs_2019_case-clusters.csv
|
||||
# output: ../results/eval_case_clusters_<threshold>.csv
|
||||
#
|
||||
# last mod: 2024-04-04
|
||||
# last mod: 2024-04-17
|
||||
|
||||
import pm4py
|
||||
import pandas as pd
|
||||
@ -15,7 +15,7 @@ from python_helpers import eval_pm, eval_append
|
||||
|
||||
#--------------- (1) Load data and create event logs ---------------
|
||||
|
||||
dat = pd.read_csv("results/eventlogs_2019_case-clusters.csv", sep = ";")
|
||||
dat = pd.read_csv("../results/eventlogs_2019_case-clusters.csv", sep = ";")
|
||||
|
||||
event_log = pm4py.format_dataframe(dat,
|
||||
case_id = "case",
|
||||
@ -49,5 +49,5 @@ for nt in thresholds:
|
||||
# pm4py.vis.save_vis_bpmn(bpmn, "results/processmaps/bpmn_cluster" + str(cluster) + "_cases" + str(int(nt*10)).zfill(2) + ".png")
|
||||
|
||||
eval.index = ["Complete", "Cluster 1", "Cluster 2", "Cluster 3", "Cluster 4", "Cluster 5"]
|
||||
eval.to_csv("results/eval_case_clusters_" + str(int(nt*10)).zfill(2) + ".csv", sep = ";")
|
||||
eval.to_csv("../results/eval_case_clusters_" + str(int(nt*10)).zfill(2) + ".csv", sep = ";")
|
||||
|
||||
|
Loading…
Reference in New Issue
Block a user