Updated file structure: results and figures are now on top level of project; adjusted paths, but some might still be broken
This commit is contained in:
parent
d4f5aa427d
commit
7f0b0d44ad
@ -4,16 +4,16 @@
|
|||||||
# (2) Create event logs
|
# (2) Create event logs
|
||||||
# (3) Add meta data
|
# (3) Add meta data
|
||||||
#
|
#
|
||||||
# input: raw log files from ../data/haum/*.log
|
# input: raw log files from data/haum/*.log
|
||||||
# ../data/metadata/feiertage.csv
|
# data/metadata/feiertage.csv
|
||||||
# ../data/metadata/schulferien_2016-2018_NI.csv
|
# data/metadata/schulferien_2016-2018_NI.csv
|
||||||
# ../data/metadata/schulferien_2019-2025_NI.csv
|
# data/metadata/schulferien_2019-2025_NI.csv
|
||||||
# output: results/raw_logfiles_<timestamp>.csv
|
# output: results/raw_logfiles_<timestamp>.csv
|
||||||
# results/event_logfiles_<timestamp>.csv
|
# results/event_logfiles_<timestamp>.csv
|
||||||
#
|
#
|
||||||
# last mod: 2024-02-23, NW
|
# last mod: 2024-04-17
|
||||||
|
|
||||||
# setwd("C:/Users/nwickelmaier/Nextcloud/Documents/MDS/2023ss/60100_master_thesis/analysis/code")
|
# setwd("C:/Users/nwickelmaier/Nextcloud/Documents/MDS/2023ss/60100_master_thesis/analysis/")
|
||||||
|
|
||||||
#library(mtt)
|
#library(mtt)
|
||||||
devtools::load_all("../../../../../software/mtt")
|
devtools::load_all("../../../../../software/mtt")
|
||||||
@ -22,7 +22,7 @@ now <- format(Sys.time(), "%Y-%m-%d_%H-%M-%S")
|
|||||||
|
|
||||||
#--------------- (1) Parse raw log files ---------------
|
#--------------- (1) Parse raw log files ---------------
|
||||||
|
|
||||||
path <- "../data/haum/LogFiles/"
|
path <- "data/haum/LogFiles/"
|
||||||
folders <- dir(path)
|
folders <- dir(path)
|
||||||
#folders <- "2016"
|
#folders <- "2016"
|
||||||
|
|
||||||
@ -73,7 +73,7 @@ datlogs$topic <- factor(datlogs$topic, levels = 0:7,
|
|||||||
|
|
||||||
## Read data for holiday
|
## Read data for holiday
|
||||||
|
|
||||||
hd0 <- read.table("../data/metadata/feiertage.csv", sep = ";", header = TRUE)
|
hd0 <- read.table("data/metadata/feiertage.csv", sep = ";", header = TRUE)
|
||||||
hd0$X.br. <- NULL
|
hd0$X.br. <- NULL
|
||||||
|
|
||||||
hd <- hd0[hd0$Abkuerzung == "NI", ]
|
hd <- hd0[hd0$Abkuerzung == "NI", ]
|
||||||
@ -100,9 +100,9 @@ hd$stateCode <- NULL
|
|||||||
# write.table(dat, "data/metadata/schulferien_2019-2025_NI.csv", sep = ";",
|
# write.table(dat, "data/metadata/schulferien_2019-2025_NI.csv", sep = ";",
|
||||||
# row.names = FALSE, quote = FALSE)
|
# row.names = FALSE, quote = FALSE)
|
||||||
|
|
||||||
sf1 <- read.table("../data/metadata/schulferien_2016-2018_NI.csv", sep = ";",
|
sf1 <- read.table("data/metadata/schulferien_2016-2018_NI.csv", sep = ";",
|
||||||
header = TRUE)
|
header = TRUE)
|
||||||
sf2 <- read.table("../data/metadata/schulferien_2019-2025_NI.csv", sep = ";",
|
sf2 <- read.table("data/metadata/schulferien_2019-2025_NI.csv", sep = ";",
|
||||||
header = TRUE)
|
header = TRUE)
|
||||||
sf <- rbind(sf1, sf2)
|
sf <- rbind(sf1, sf2)
|
||||||
sf$start <- as.Date(sf$start)
|
sf$start <- as.Date(sf$start)
|
||||||
|
@ -11,25 +11,25 @@
|
|||||||
#
|
#
|
||||||
# input: results/event_logfiles_2024-02-21_16-07-33.csv
|
# input: results/event_logfiles_2024-02-21_16-07-33.csv
|
||||||
# results/raw_logfiles_2024-02-21_16-07-33.csv
|
# results/raw_logfiles_2024-02-21_16-07-33.csv
|
||||||
# output: results/figures/counts_item_firsttouch.pdf
|
# output: figures/counts_item_firsttouch.pdf
|
||||||
# results/figures/duration.pdf
|
# figures/duration.pdf
|
||||||
# results/figures/heatmap_start.pdf
|
# figures/heatmap_start.pdf
|
||||||
# results/figures/heatmap_stop.pdf
|
# figures/heatmap_stop.pdf
|
||||||
# results/figures/timeMs.pdf
|
# figures/timeMs.pdf
|
||||||
# results/figures/xycoord.pdf
|
# figures/xycoord.pdf
|
||||||
# results/figures/event-dist.pdf
|
# figures/event-dist.pdf
|
||||||
# results/figures/traceexplore_trace-event.pdf
|
# figures/traceexplore_trace-event.pdf
|
||||||
# results/figures/ra_trace-event.pdf
|
# figures/ra_trace-event.pdf
|
||||||
# results/figures/traceexplore_case-event.pdf
|
# figures/traceexplore_case-event.pdf
|
||||||
# results/figures/bp_tod.pdf
|
# figures/bp_tod.pdf
|
||||||
# results/figures/bp_wd.pdf
|
# figures/bp_wd.pdf
|
||||||
# results/figures/bp_wds.pdf
|
# figures/bp_wds.pdf
|
||||||
# results/figures/bp_corona.pdf
|
# figures/bp_corona.pdf
|
||||||
# results/figures/traceexplore_case-artwork_often080.pdf
|
# figures/traceexplore_case-artwork_often080.pdf
|
||||||
#
|
#
|
||||||
# last mod: 2024-03-28
|
# last mod: 2024-04-17
|
||||||
|
|
||||||
# setwd("C:/Users/nwickelmaier/Nextcloud/Documents/MDS/2023ss/60100_master_thesis/analysis/code")
|
# setwd("C:/Users/nwickelmaier/Nextcloud/Documents/MDS/2023ss/60100_master_thesis/analysis/")
|
||||||
|
|
||||||
#--------------- (1) Read data ---------------
|
#--------------- (1) Read data ---------------
|
||||||
|
|
||||||
@ -92,7 +92,7 @@ items <- unique(datlogs$item)
|
|||||||
#items <- items[!items %in% c("504", "505")]
|
#items <- items[!items %in% c("504", "505")]
|
||||||
datart <- mtt::extract_artworks(items,
|
datart <- mtt::extract_artworks(items,
|
||||||
paste0(items, ".xml"),
|
paste0(items, ".xml"),
|
||||||
"../data/haum/ContentEyevisit/eyevisit_cards_light/")
|
"data/haum/ContentEyevisit/eyevisit_cards_light/")
|
||||||
datart <- datart[order(datart$artwork), ]
|
datart <- datart[order(datart$artwork), ]
|
||||||
names(counts_item) <- datart$title
|
names(counts_item) <- datart$title
|
||||||
|
|
||||||
@ -110,7 +110,7 @@ text(tmp, counts_case + 100, datart$item)
|
|||||||
|
|
||||||
counts <- rbind(counts_item, counts_case)
|
counts <- rbind(counts_item, counts_case)
|
||||||
|
|
||||||
pdf("results/figures/counts_item_firsttouch.pdf",
|
pdf("figures/counts_item_firsttouch.pdf",
|
||||||
width = 20, height = 10, pointsize = 10)
|
width = 20, height = 10, pointsize = 10)
|
||||||
par(mai = c(5, .6, .1, .1))
|
par(mai = c(5, .6, .1, .1))
|
||||||
|
|
||||||
@ -135,7 +135,7 @@ lattice::bwplot(duration / 1000 / 60 ~ event, datlogs)
|
|||||||
|
|
||||||
set.seed(1027)
|
set.seed(1027)
|
||||||
|
|
||||||
pdf("results/figures/duration.pdf", width = 5, height = 5, pointsize = 10)
|
pdf("figures/duration.pdf", width = 5, height = 5, pointsize = 10)
|
||||||
lattice::bwplot(I(duration/1000/60) ~ event, datlogs[sample(nrow(datlogs), 100000), ],
|
lattice::bwplot(I(duration/1000/60) ~ event, datlogs[sample(nrow(datlogs), 100000), ],
|
||||||
ylab = "Duration in min")
|
ylab = "Duration in min")
|
||||||
dev.off()
|
dev.off()
|
||||||
@ -206,7 +206,7 @@ tab.start <- xtabs( ~ x.start.cat + y.start.cat, dattrim)
|
|||||||
colnames(tab.start) <- NULL
|
colnames(tab.start) <- NULL
|
||||||
rownames(tab.start) <- NULL
|
rownames(tab.start) <- NULL
|
||||||
|
|
||||||
pdf("results/figures/heatmap_start.pdf", width = 5, height = 5, pointsize = 10)
|
pdf("figures/heatmap_start.pdf", width = 5, height = 5, pointsize = 10)
|
||||||
heatmap(tab.start, Rowv = NA, Colv = NA)
|
heatmap(tab.start, Rowv = NA, Colv = NA)
|
||||||
dev.off()
|
dev.off()
|
||||||
|
|
||||||
@ -217,7 +217,7 @@ tab.stop <- xtabs( ~ x.stop.cat + y.stop.cat, dattrim)
|
|||||||
colnames(tab.stop) <- NULL
|
colnames(tab.stop) <- NULL
|
||||||
rownames(tab.stop) <- NULL
|
rownames(tab.stop) <- NULL
|
||||||
|
|
||||||
pdf("results/figures/heatmap_stop.pdf", width = 5, height = 5, pointsize = 10)
|
pdf("figures/heatmap_stop.pdf", width = 5, height = 5, pointsize = 10)
|
||||||
heatmap(tab.stop, Rowv = NA, Colv = NA)
|
heatmap(tab.stop, Rowv = NA, Colv = NA)
|
||||||
dev.off()
|
dev.off()
|
||||||
|
|
||||||
@ -244,7 +244,7 @@ plot(case ~ date, datcase, type = "h", col = "#434F4F")
|
|||||||
|
|
||||||
## weird behavior of timeMs
|
## weird behavior of timeMs
|
||||||
|
|
||||||
pdf("results/figures/timeMs.pdf", width = 9, height = 6, pointsize = 10)
|
pdf("figures/timeMs.pdf", width = 9, height = 6, pointsize = 10)
|
||||||
#par(mai = c(.6, .6, .1, .1), mgp = c(2.4, 1, 0))
|
#par(mai = c(.6, .6, .1, .1), mgp = c(2.4, 1, 0))
|
||||||
#plot(timeMs.start ~ as.factor(fileId), datlogs[1:2000,], xlab = "fileId")
|
#plot(timeMs.start ~ as.factor(fileId), datlogs[1:2000,], xlab = "fileId")
|
||||||
lattice::bwplot(timeMs.start ~ as.factor(fileId.start), datlogs[1:2000,], xlab = "",
|
lattice::bwplot(timeMs.start ~ as.factor(fileId.start), datlogs[1:2000,], xlab = "",
|
||||||
@ -255,7 +255,7 @@ dev.off()
|
|||||||
|
|
||||||
set.seed(1522)
|
set.seed(1522)
|
||||||
|
|
||||||
pdf("results/figures/xycoord.pdf", width = 5, height = 5, pointsize = 10)
|
pdf("figures/xycoord.pdf", width = 5, height = 5, pointsize = 10)
|
||||||
par(mai = c(.6, .6, .1, .1), mgp = c(2.4, 1, 0))
|
par(mai = c(.6, .6, .1, .1), mgp = c(2.4, 1, 0))
|
||||||
#par(mfrow = c(1, 2))
|
#par(mfrow = c(1, 2))
|
||||||
plot(y.start ~ x.start, datlogs[sample(nrow(datlogs), 10000), ])
|
plot(y.start ~ x.start, datlogs[sample(nrow(datlogs), 10000), ])
|
||||||
@ -310,7 +310,7 @@ proportions(tab, margin = "event")
|
|||||||
|
|
||||||
cc <- palette.colors(palette = "Okabe-Ito")[c(3,2,4,8)]
|
cc <- palette.colors(palette = "Okabe-Ito")[c(3,2,4,8)]
|
||||||
|
|
||||||
pdf("results/figures/event-dist.pdf", height = 3.375, width = 12, pointsize = 10)
|
pdf("figures/event-dist.pdf", height = 3.375, width = 12, pointsize = 10)
|
||||||
par(mai = c(.4,.4,.1,.1), mgp = c(2.4, 1, 0))
|
par(mai = c(.4,.4,.1,.1), mgp = c(2.4, 1, 0))
|
||||||
|
|
||||||
barplot(t(proportions(tab, margin = "item")), las = 2, col = c("#78004B", "#3CB4DC", "#91C86E", "#FF6900"),
|
barplot(t(proportions(tab, margin = "item")), las = 2, col = c("#78004B", "#3CB4DC", "#91C86E", "#FF6900"),
|
||||||
@ -364,7 +364,7 @@ processmapR::process_map(alogf, # alog,
|
|||||||
|
|
||||||
alog_no_move <- alog[alog$event != "move", ]
|
alog_no_move <- alog[alog$event != "move", ]
|
||||||
|
|
||||||
pdf("results/figures/traceexplore_trace-event.pdf", height = 8, width = 12, pointsize = 10)
|
pdf("figures/traceexplore_trace-event.pdf", height = 8, width = 12, pointsize = 10)
|
||||||
set.seed(1447)
|
set.seed(1447)
|
||||||
processmapR::trace_explorer(alog_no_move[alog_no_move$path %in%
|
processmapR::trace_explorer(alog_no_move[alog_no_move$path %in%
|
||||||
sample(unique(alog_no_move$path), 400),],
|
sample(unique(alog_no_move$path), 400),],
|
||||||
@ -372,7 +372,7 @@ processmapR::trace_explorer(alog_no_move[alog_no_move$path %in%
|
|||||||
abbreviate = T)
|
abbreviate = T)
|
||||||
dev.off()
|
dev.off()
|
||||||
|
|
||||||
pdf("results/figures/ra_trace-event.pdf", height = 8, width = 12, pointsize = 10)
|
pdf("figures/ra_trace-event.pdf", height = 8, width = 12, pointsize = 10)
|
||||||
|
|
||||||
ra_no_move <- edeaR::resource_frequency(alog_no_move, "resource-activity")
|
ra_no_move <- edeaR::resource_frequency(alog_no_move, "resource-activity")
|
||||||
levels(ra_no_move$event) <- c("flipCard", "flipCard", "openTopic", "openPopup")
|
levels(ra_no_move$event) <- c("flipCard", "flipCard", "openTopic", "openPopup")
|
||||||
@ -442,7 +442,7 @@ processmapR::process_map(alog, processmapR::frequency("relative"))
|
|||||||
|
|
||||||
alog_no_move <- alog[alog$event != "move", ]
|
alog_no_move <- alog[alog$event != "move", ]
|
||||||
|
|
||||||
pdf("results/figures/traceexplore_case-event.pdf", height = 8, width = 12, pointsize = 10)
|
pdf("figures/traceexplore_case-event.pdf", height = 8, width = 12, pointsize = 10)
|
||||||
set.seed(1050)
|
set.seed(1050)
|
||||||
processmapR::trace_explorer(alog_no_move[alog_no_move$path %in%
|
processmapR::trace_explorer(alog_no_move[alog_no_move$path %in%
|
||||||
sample(unique(alog_no_move$path), 300),],
|
sample(unique(alog_no_move$path), 300),],
|
||||||
@ -481,7 +481,7 @@ alog <- bupaR::activitylog(datlogs[datlogs$tod == "afternoon",],
|
|||||||
processmapR::process_map(alog, processmapR::frequency("relative"))
|
processmapR::process_map(alog, processmapR::frequency("relative"))
|
||||||
|
|
||||||
# Are the same artworks looked at?
|
# Are the same artworks looked at?
|
||||||
pdf("results/figures/bp_tod.pdf", height = 3.375, width = 12, pointsize = 10)
|
pdf("figures/bp_tod.pdf", height = 3.375, width = 12, pointsize = 10)
|
||||||
par(mai = c(.5,.6,.1,.1), mgp = c(2.4, 1, 0))
|
par(mai = c(.5,.6,.1,.1), mgp = c(2.4, 1, 0))
|
||||||
|
|
||||||
barplot(proportions(xtabs( ~ tod + artwork, datlogs), margin = "tod"), #col = cc[1:2],
|
barplot(proportions(xtabs( ~ tod + artwork, datlogs), margin = "tod"), #col = cc[1:2],
|
||||||
@ -511,7 +511,7 @@ alog <- bupaR::activitylog(datlogs[datlogs$wd == "weekday",],
|
|||||||
processmapR::process_map(alog, processmapR::frequency("relative"))
|
processmapR::process_map(alog, processmapR::frequency("relative"))
|
||||||
|
|
||||||
# Are the same artworks looked at?
|
# Are the same artworks looked at?
|
||||||
pdf("results/figures/bp_wd.pdf", height = 3.375, width = 12, pointsize = 10)
|
pdf("figures/bp_wd.pdf", height = 3.375, width = 12, pointsize = 10)
|
||||||
par(mai = c(.5,.6,.1,.1), mgp = c(2.4, 1, 0))
|
par(mai = c(.5,.6,.1,.1), mgp = c(2.4, 1, 0))
|
||||||
|
|
||||||
barplot(proportions(xtabs( ~ wd + artwork, datlogs), margin = "wd"),
|
barplot(proportions(xtabs( ~ wd + artwork, datlogs), margin = "wd"),
|
||||||
@ -542,7 +542,7 @@ alog <- bupaR::activitylog(datlogs[which(datlogs$wds == "vacation"),],
|
|||||||
processmapR::process_map(alog, processmapR::frequency("relative"))
|
processmapR::process_map(alog, processmapR::frequency("relative"))
|
||||||
|
|
||||||
# Are the same artworks looked at?
|
# Are the same artworks looked at?
|
||||||
pdf("results/figures/bp_wds.pdf", height = 3.375, width = 12, pointsize = 10)
|
pdf("figures/bp_wds.pdf", height = 3.375, width = 12, pointsize = 10)
|
||||||
par(mai = c(.5,.6,.1,.1), mgp = c(2.4, 1, 0))
|
par(mai = c(.5,.6,.1,.1), mgp = c(2.4, 1, 0))
|
||||||
|
|
||||||
#barplot(xtabs( ~ wds + artwork, datlogs), las = 2, beside = TRUE,
|
#barplot(xtabs( ~ wds + artwork, datlogs), las = 2, beside = TRUE,
|
||||||
@ -573,7 +573,7 @@ alog <- bupaR::activitylog(datlogs[which(datlogs$corona == "post"),],
|
|||||||
processmapR::process_map(alog, processmapR::frequency("relative"))
|
processmapR::process_map(alog, processmapR::frequency("relative"))
|
||||||
|
|
||||||
# Are the same artworks looked at?
|
# Are the same artworks looked at?
|
||||||
pdf("results/figures/bp_corona.pdf", height = 3.375, width = 12, pointsize = 10)
|
pdf("figures/bp_corona.pdf", height = 3.375, width = 12, pointsize = 10)
|
||||||
par(mai = c(.5,.6,.1,.1), mgp = c(2.4, 1, 0))
|
par(mai = c(.5,.6,.1,.1), mgp = c(2.4, 1, 0))
|
||||||
|
|
||||||
barplot(proportions(xtabs( ~ corona + artwork, datlogs), margin = "corona"),
|
barplot(proportions(xtabs( ~ corona + artwork, datlogs), margin = "corona"),
|
||||||
@ -618,7 +618,7 @@ alog <- bupaR::activitylog(datlogs[datlogs$artwork %in% often080, ],
|
|||||||
processmapR::process_map(alog, processmapR::frequency("relative"))
|
processmapR::process_map(alog, processmapR::frequency("relative"))
|
||||||
|
|
||||||
|
|
||||||
pdf("results/figures/traceexplore_case-artwork_often080.pdf", height = 8, width = 12, pointsize = 10)
|
pdf("figures/traceexplore_case-artwork_often080.pdf", height = 8, width = 12, pointsize = 10)
|
||||||
|
|
||||||
processmapR::trace_explorer(alog,
|
processmapR::trace_explorer(alog,
|
||||||
n_traces = 30, type = "frequent",
|
n_traces = 30, type = "frequent",
|
||||||
|
@ -4,9 +4,9 @@
|
|||||||
# (2) Normative net
|
# (2) Normative net
|
||||||
#
|
#
|
||||||
# input: --
|
# input: --
|
||||||
# output: results/normative_petrinet.pnml
|
# output: ../results/normative_petrinet.pnml
|
||||||
# results/processmaps/normative_petrinet.png
|
# ../results/processmaps/normative_petrinet.png
|
||||||
# results/processmaps/normative_bpmn.png
|
# ../results/processmaps/normative_bpmn.png
|
||||||
#
|
#
|
||||||
# last mod: 2024-03-22
|
# last mod: 2024-03-22
|
||||||
|
|
||||||
@ -151,12 +151,12 @@ final_marking[sink] = 1
|
|||||||
|
|
||||||
pm4py.view_petri_net(net, initial_marking, final_marking)
|
pm4py.view_petri_net(net, initial_marking, final_marking)
|
||||||
pm4py.write_pnml(net, initial_marking, final_marking,
|
pm4py.write_pnml(net, initial_marking, final_marking,
|
||||||
"results/normative_petrinet.pnml")
|
"../results/normative_petrinet.pnml")
|
||||||
|
|
||||||
pm4py.vis.save_vis_petri_net(net, initial_marking, final_marking,
|
pm4py.vis.save_vis_petri_net(net, initial_marking, final_marking,
|
||||||
"results/processmaps/normative_petrinet.png")
|
"../results/processmaps/normative_petrinet.png")
|
||||||
|
|
||||||
bpmn = pm4py.convert.convert_to_bpmn(net, initial_marking, final_marking)
|
bpmn = pm4py.convert.convert_to_bpmn(net, initial_marking, final_marking)
|
||||||
pm4py.view_bpmn(bpmn)
|
pm4py.view_bpmn(bpmn)
|
||||||
|
|
||||||
pm4py.vis.save_vis_bpmn(bpmn, "results/processmaps/normative_bpmn.png")
|
pm4py.vis.save_vis_bpmn(bpmn, "../results/processmaps/normative_bpmn.png")
|
||||||
|
@ -3,10 +3,10 @@
|
|||||||
# content: (1) Load data and create event log
|
# content: (1) Load data and create event log
|
||||||
# (2) Check against normative Petri Net
|
# (2) Check against normative Petri Net
|
||||||
#
|
#
|
||||||
# input: results/event_logfiles_2024-02-21_16-07-33.csv
|
# input: ../results/event_logfiles_2024-02-21_16-07-33.csv
|
||||||
# results/normative_petrinet.pnml
|
# ../results/normative_petrinet.pnml
|
||||||
# output: results/eval_all-miners_complete.csv
|
# output: ../results/eval_all-miners_complete.csv
|
||||||
# results/eval_all-miners_clean.csv
|
# ../results/eval_all-miners_clean.csv
|
||||||
# ../../thesis/figures/petrinet_normative.png
|
# ../../thesis/figures/petrinet_normative.png
|
||||||
# ../../thesis/figures/petrinet_heuristics_clean.png
|
# ../../thesis/figures/petrinet_heuristics_clean.png
|
||||||
# ../../thesis/figures/petrinet_alpha_clean.png
|
# ../../thesis/figures/petrinet_alpha_clean.png
|
||||||
@ -18,7 +18,7 @@
|
|||||||
# ../../thesis/figures/bpmn_alpha_clean.png
|
# ../../thesis/figures/bpmn_alpha_clean.png
|
||||||
# ../../thesis/figures/bpmn_heuristics_clean.png
|
# ../../thesis/figures/bpmn_heuristics_clean.png
|
||||||
#
|
#
|
||||||
# last mod: 2024-03-22
|
# last mod: 2024-04-17
|
||||||
|
|
||||||
import pm4py
|
import pm4py
|
||||||
import pandas as pd
|
import pandas as pd
|
||||||
@ -28,7 +28,7 @@ from python_helpers import eval_pm, pn_infos_miner
|
|||||||
|
|
||||||
#--------------- (1) Load data and create event logs ---------------
|
#--------------- (1) Load data and create event logs ---------------
|
||||||
|
|
||||||
dat = pd.read_csv("results/event_logfiles_2024-02-21_16-07-33.csv", sep = ";")
|
dat = pd.read_csv("../results/event_logfiles_2024-02-21_16-07-33.csv", sep = ";")
|
||||||
|
|
||||||
event_log = pm4py.format_dataframe(dat, case_id = "path",
|
event_log = pm4py.format_dataframe(dat, case_id = "path",
|
||||||
activity_key = "event",
|
activity_key = "event",
|
||||||
@ -58,7 +58,7 @@ sorted_variants_no_move = dict(sorted(variants_no_move.items(), key=lambda item:
|
|||||||
|
|
||||||
#--------------- (2) Check against normative Petri Net ---------------
|
#--------------- (2) Check against normative Petri Net ---------------
|
||||||
|
|
||||||
basenet, initial_marking, final_marking = pm4py.read_pnml("results/normative_petrinet.pnml")
|
basenet, initial_marking, final_marking = pm4py.read_pnml("../results/normative_petrinet.pnml")
|
||||||
|
|
||||||
# TBR
|
# TBR
|
||||||
replayed_traces = pm4py.conformance_diagnostics_token_based_replay(event_log, basenet, initial_marking, final_marking)
|
replayed_traces = pm4py.conformance_diagnostics_token_based_replay(event_log, basenet, initial_marking, final_marking)
|
||||||
@ -101,7 +101,7 @@ eval = pd.DataFrame(columns = ["fitness", "precision", "generalizability",
|
|||||||
for miner in ["normative", "alpha", "heuristics", "inductive", "ilp"]:
|
for miner in ["normative", "alpha", "heuristics", "inductive", "ilp"]:
|
||||||
eval = pd.concat([eval, pn_infos_miner(event_log, miner)])
|
eval = pd.concat([eval, pn_infos_miner(event_log, miner)])
|
||||||
|
|
||||||
eval.to_csv("results/eval_all-miners_complete.csv", sep = ";")
|
eval.to_csv("../results/eval_all-miners_complete.csv", sep = ";")
|
||||||
|
|
||||||
## Without broken trace
|
## Without broken trace
|
||||||
event_log_clean = event_log[event_log["@@case_index"] != index_broken[0]]
|
event_log_clean = event_log[event_log["@@case_index"] != index_broken[0]]
|
||||||
@ -113,7 +113,7 @@ eval_clean = pd.DataFrame(columns = ["fitness", "precision", "generalizability",
|
|||||||
for miner in ["normative", "alpha", "heuristics", "inductive", "ilp"]:
|
for miner in ["normative", "alpha", "heuristics", "inductive", "ilp"]:
|
||||||
eval_clean = pd.concat([eval_clean, pn_infos_miner(event_log_clean, miner)])
|
eval_clean = pd.concat([eval_clean, pn_infos_miner(event_log_clean, miner)])
|
||||||
|
|
||||||
eval_clean.to_csv("results/eval_all-miners_clean.csv", sep = ";")
|
eval_clean.to_csv("../results/eval_all-miners_clean.csv", sep = ";")
|
||||||
|
|
||||||
## Directly-follows graph
|
## Directly-follows graph
|
||||||
dfg, start_activities, end_activities = pm4py.discover_dfg(event_log_clean)
|
dfg, start_activities, end_activities = pm4py.discover_dfg(event_log_clean)
|
||||||
|
@ -9,11 +9,11 @@
|
|||||||
# results/raw_logfiles_2024-02-21_16-07-33.csv
|
# results/raw_logfiles_2024-02-21_16-07-33.csv
|
||||||
# output: results/eventlogs_pre-corona_cleaned.RData
|
# output: results/eventlogs_pre-corona_cleaned.RData
|
||||||
# results/eventlogs_pre-corona_cleaned.csv
|
# results/eventlogs_pre-corona_cleaned.csv
|
||||||
# ../../thesis/figures/dfg_complete_WFnet_R.pdf
|
# ../thesis/figures/dfg_complete_WFnet_R.pdf
|
||||||
#
|
#
|
||||||
# last mod: 2024-03-23
|
# last mod: 2024-03-23
|
||||||
|
|
||||||
# setwd("C:/Users/nwickelmaier/Nextcloud/Documents/MDS/2023ss/60100_master_thesis/analysis/code")
|
# setwd("C:/Users/nwickelmaier/Nextcloud/Documents/MDS/2023ss/60100_master_thesis/analysis/")
|
||||||
|
|
||||||
#--------------- (1) Look at broken trace ---------------
|
#--------------- (1) Look at broken trace ---------------
|
||||||
|
|
||||||
@ -85,7 +85,7 @@ dfg <- processmapR::process_map(alog,
|
|||||||
render = FALSE)
|
render = FALSE)
|
||||||
|
|
||||||
processmapR::export_map(dfg,
|
processmapR::export_map(dfg,
|
||||||
file_name = paste0("../../thesis/figures/dfg_complete_WFnet_R.pdf"),
|
file_name = paste0("../thesis/figures/dfg_complete_WFnet_R.pdf"),
|
||||||
file_type = "pdf")
|
file_type = "pdf")
|
||||||
|
|
||||||
rm(tmp)
|
rm(tmp)
|
||||||
|
@ -3,10 +3,10 @@
|
|||||||
# content: (1) Load data and create event log
|
# content: (1) Load data and create event log
|
||||||
# (2) Infos for items
|
# (2) Infos for items
|
||||||
#
|
#
|
||||||
# input: results/eventlogs_pre-corona_cleaned.csv
|
# input: ../results/eventlogs_pre-corona_cleaned.csv
|
||||||
# output: results/pn_infos_items.csv
|
# output: ../results/pn_infos_items.csv
|
||||||
#
|
#
|
||||||
# last mod: 2024-03-22
|
# last mod: 2024-04-17
|
||||||
|
|
||||||
import pm4py
|
import pm4py
|
||||||
import pandas as pd
|
import pandas as pd
|
||||||
@ -16,7 +16,7 @@ from python_helpers import eval_pm, pn_infos
|
|||||||
|
|
||||||
#--------------- (1) Load data and create event logs ---------------
|
#--------------- (1) Load data and create event logs ---------------
|
||||||
|
|
||||||
dat = pd.read_csv("results/eventlogs_pre-corona_cleaned", sep = ";")
|
dat = pd.read_csv("../results/eventlogs_pre-corona_cleaned", sep = ";")
|
||||||
|
|
||||||
log_path = pm4py.format_dataframe(dat, case_id = "path", activity_key = "event",
|
log_path = pm4py.format_dataframe(dat, case_id = "path", activity_key = "event",
|
||||||
timestamp_key = "date.start")
|
timestamp_key = "date.start")
|
||||||
@ -33,5 +33,5 @@ for item in log_path.item.unique().tolist():
|
|||||||
eval = eval.sort_index()
|
eval = eval.sort_index()
|
||||||
|
|
||||||
# Export
|
# Export
|
||||||
eval.to_csv("results/pn_infos_items.csv", sep = ";")
|
eval.to_csv("../results/pn_infos_items.csv", sep = ";")
|
||||||
|
|
||||||
|
@ -10,13 +10,13 @@
|
|||||||
# input: results/eventlogs_pre-corona_cleaned.RData
|
# input: results/eventlogs_pre-corona_cleaned.RData
|
||||||
# results/pn_infos_items.csv
|
# results/pn_infos_items.csv
|
||||||
# output: results/eventlogs_pre-corona_item-clusters.csv
|
# output: results/eventlogs_pre-corona_item-clusters.csv
|
||||||
# ../../thesis/figures/data/clustering_items.RData"
|
# ../thesis/results/clustering_items.RData"
|
||||||
#
|
#
|
||||||
# last mod: 2024-03-22
|
# last mod: 2024-03-22
|
||||||
|
|
||||||
# setwd("C:/Users/nwickelmaier/Nextcloud/Documents/MDS/2023ss/60100_master_thesis/analysis/code")
|
# setwd("C:/Users/nwickelmaier/Nextcloud/Documents/MDS/2023ss/60100_master_thesis/analysis/")
|
||||||
|
|
||||||
source("R_helpers.R")
|
source("code/R_helpers.R")
|
||||||
|
|
||||||
#--------------- (1) Read data ---------------
|
#--------------- (1) Read data ---------------
|
||||||
|
|
||||||
@ -164,5 +164,5 @@ write.table(res,
|
|||||||
# Save data for plots and tables
|
# Save data for plots and tables
|
||||||
|
|
||||||
save(hc, k, res, dist_mat, datitem, df,
|
save(hc, k, res, dist_mat, datitem, df,
|
||||||
file = "../../thesis/figures/data/clustering_items.RData")
|
file = "../thesis/results/clustering_items.RData")
|
||||||
|
|
||||||
|
@ -9,11 +9,11 @@
|
|||||||
# output: results/dataframes_case_2019.RData
|
# output: results/dataframes_case_2019.RData
|
||||||
# results/centrality_cases.RData
|
# results/centrality_cases.RData
|
||||||
#
|
#
|
||||||
# last mod: 2024-03-22
|
# last mod: 2024-04-17
|
||||||
|
|
||||||
# setwd("C:/Users/nwickelmaier/Nextcloud/Documents/MDS/2023ss/60100_master_thesis/analysis/code")
|
# setwd("C:/Users/nwickelmaier/Nextcloud/Documents/MDS/2023ss/60100_master_thesis/analysis/")
|
||||||
|
|
||||||
source("R_helpers.R")
|
source("code/R_helpers.R")
|
||||||
|
|
||||||
#--------------- (1) Read data ---------------
|
#--------------- (1) Read data ---------------
|
||||||
|
|
||||||
|
@ -7,13 +7,14 @@
|
|||||||
# (5) Fit tree
|
# (5) Fit tree
|
||||||
#
|
#
|
||||||
# input: results/event_logfiles_2024-02-21_16-07-33.csv
|
# input: results/event_logfiles_2024-02-21_16-07-33.csv
|
||||||
# output: --
|
# output: ../thesis/results/clustering_cases_2018.RData
|
||||||
|
# ../thesis/results/clustering_cases.RData
|
||||||
#
|
#
|
||||||
# last mod: 2024-03-22
|
# last mod: 2024-04-17
|
||||||
|
|
||||||
# setwd("C:/Users/nwickelmaier/Nextcloud/Documents/MDS/2023ss/60100_master_thesis/analysis/code")
|
# setwd("C:/Users/nwickelmaier/Nextcloud/Documents/MDS/2023ss/60100_master_thesis/analysis/")
|
||||||
|
|
||||||
source("R_helpers.R")
|
source("code/R_helpers.R")
|
||||||
|
|
||||||
#--------------- (1) Read data ---------------
|
#--------------- (1) Read data ---------------
|
||||||
|
|
||||||
@ -113,8 +114,8 @@ dattree18$AvDurItemNorm <- normalize(dattree18$AvDurItem)
|
|||||||
|
|
||||||
#--------------- (4) Clustering ---------------
|
#--------------- (4) Clustering ---------------
|
||||||
|
|
||||||
df <- dattree18[, c("PropItems", "SearchInfo", "PropMoves", "AvDurItemNorm",
|
df <- dattree18[, c("PropItems", "SearchInfo", "PropMoves",
|
||||||
"Pattern", "InfocardOnly")]
|
"AvDurItemNorm", "Pattern", "InfocardOnly")]
|
||||||
|
|
||||||
dist_mat18 <- cluster::daisy(df, metric = "gower")
|
dist_mat18 <- cluster::daisy(df, metric = "gower")
|
||||||
|
|
||||||
@ -145,7 +146,7 @@ aggregate(. ~ cluster18, df, mean)
|
|||||||
aggregate(. ~ cluster18, dattree18[, -1], mean)
|
aggregate(. ~ cluster18, dattree18[, -1], mean)
|
||||||
|
|
||||||
save(coor_2d_18, coor_3d_18, cluster18, dattree18, dist_mat18, hc18,
|
save(coor_2d_18, coor_3d_18, cluster18, dattree18, dist_mat18, hc18,
|
||||||
file = "../../thesis/figures/data/clustering_cases_2018.RData")
|
file = "../thesis/results/clustering_cases_2018.RData")
|
||||||
|
|
||||||
#--------------- (5) Fit tree ---------------
|
#--------------- (5) Fit tree ---------------
|
||||||
|
|
||||||
@ -161,7 +162,7 @@ plot(partykit::as.party(c1), tp_args = list(fill = mycols, col = mycols))
|
|||||||
|
|
||||||
|
|
||||||
## Load data
|
## Load data
|
||||||
load("../../thesis/figures/data/clustering_cases.RData")
|
load("../thesis/results/clustering_cases.RData")
|
||||||
|
|
||||||
c19 <- rpart::rpart(as.factor(cluster) ~ ., data = dattree[, c("PropMoves",
|
c19 <- rpart::rpart(as.factor(cluster) ~ ., data = dattree[, c("PropMoves",
|
||||||
"PropItems",
|
"PropItems",
|
||||||
|
@ -5,12 +5,16 @@
|
|||||||
# (3) Investigate variants (2019)
|
# (3) Investigate variants (2019)
|
||||||
#
|
#
|
||||||
# input: results/eventlogs_pre-corona_cleaned.RData
|
# input: results/eventlogs_pre-corona_cleaned.RData
|
||||||
# output: ../../thesis/figures/freq-traces.pdf
|
# output: ../thesis/figures/freq-traces.pdf
|
||||||
# ../../thesis/figures/freq-traces_powerlaw.pdf
|
# ../thesis/figures/freq-traces_powerlaw.pdf
|
||||||
|
# ../thesis/figures/freq-traces_powerlaw_bw.pdf
|
||||||
|
# ../thesis/figures/freq-traces_2019.pdf
|
||||||
|
# ../thesis/figures/freq-traces_powerlaw_2019.pdf
|
||||||
|
# ../thesis/figures/freq-traces_powerlaw_2019_bw.pdf
|
||||||
#
|
#
|
||||||
# last mod: 2024-03-30
|
# last mod: 2024-04-17
|
||||||
|
|
||||||
# setwd("C:/Users/nwickelmaier/Nextcloud/Documents/MDS/2023ss/60100_master_thesis/analysis/code")
|
# setwd("C:/Users/nwickelmaier/Nextcloud/Documents/MDS/2023ss/60100_master_thesis/analysis/")
|
||||||
|
|
||||||
library(bupaverse)
|
library(bupaverse)
|
||||||
|
|
||||||
@ -29,7 +33,7 @@ alog <- activitylog(dat,
|
|||||||
resource_id = "path",
|
resource_id = "path",
|
||||||
timestamps = c("start", "complete"))
|
timestamps = c("start", "complete"))
|
||||||
|
|
||||||
pdf("../../thesis/figures/freq-traces.pdf", height = 7, width = 6, pointsize = 10)
|
pdf("../thesis/figures/freq-traces.pdf", height = 7, width = 6, pointsize = 10)
|
||||||
trace_explorer(alog, n_traces = 25)
|
trace_explorer(alog, n_traces = 25)
|
||||||
# --> sequences of artworks are just too rare
|
# --> sequences of artworks are just too rare
|
||||||
dev.off()
|
dev.off()
|
||||||
@ -56,7 +60,7 @@ y <- as.numeric(tab)
|
|||||||
p1 <- lm(log(y) ~ log(x))
|
p1 <- lm(log(y) ~ log(x))
|
||||||
pre <- exp(coef(p1)[1]) * x^coef(p1)[2]
|
pre <- exp(coef(p1)[1]) * x^coef(p1)[2]
|
||||||
|
|
||||||
pdf("../../thesis/figures/freq-traces_powerlaw.pdf", height = 3.375,
|
pdf("../thesis/figures/freq-traces_powerlaw.pdf", height = 3.375,
|
||||||
width = 3.375, pointsize = 10)
|
width = 3.375, pointsize = 10)
|
||||||
par(mai = c(.6,.6,.1,.1), mgp = c(2.4, 1, 0))
|
par(mai = c(.6,.6,.1,.1), mgp = c(2.4, 1, 0))
|
||||||
|
|
||||||
@ -69,7 +73,7 @@ legend("topright", paste0("Proportion of traces only occurring once: ",
|
|||||||
dev.off()
|
dev.off()
|
||||||
|
|
||||||
# Black and white
|
# Black and white
|
||||||
pdf("../../thesis/figures/freq-traces_powerlaw_bw.pdf", height = 3.375,
|
pdf("../thesis/figures/freq-traces_powerlaw_bw.pdf", height = 3.375,
|
||||||
width = 3.375, pointsize = 10)
|
width = 3.375, pointsize = 10)
|
||||||
par(mai = c(.6,.6,.1,.1), mgp = c(2.4, 1, 0))
|
par(mai = c(.6,.6,.1,.1), mgp = c(2.4, 1, 0))
|
||||||
|
|
||||||
@ -99,7 +103,7 @@ alog <- activitylog(dat,
|
|||||||
resource_id = "path",
|
resource_id = "path",
|
||||||
timestamps = c("start", "complete"))
|
timestamps = c("start", "complete"))
|
||||||
|
|
||||||
pdf("../../thesis/figures/freq-traces_2019.pdf", height = 7, width = 6, pointsize = 10)
|
pdf("../thesis/figures/freq-traces_2019.pdf", height = 7, width = 6, pointsize = 10)
|
||||||
trace_explorer(alog, n_traces = 25)
|
trace_explorer(alog, n_traces = 25)
|
||||||
dev.off()
|
dev.off()
|
||||||
|
|
||||||
@ -125,7 +129,7 @@ y <- as.numeric(tab)
|
|||||||
p1 <- lm(log(y) ~ log(x))
|
p1 <- lm(log(y) ~ log(x))
|
||||||
pre <- exp(coef(p1)[1]) * x^coef(p1)[2]
|
pre <- exp(coef(p1)[1]) * x^coef(p1)[2]
|
||||||
|
|
||||||
pdf("../../thesis/figures/freq-traces_powerlaw_2019.pdf", height = 3.375,
|
pdf("../thesis/figures/freq-traces_powerlaw_2019.pdf", height = 3.375,
|
||||||
width = 3.375, pointsize = 10)
|
width = 3.375, pointsize = 10)
|
||||||
par(mai = c(.6,.6,.1,.1), mgp = c(2.4, 1, 0))
|
par(mai = c(.6,.6,.1,.1), mgp = c(2.4, 1, 0))
|
||||||
|
|
||||||
@ -138,7 +142,7 @@ legend("topright", paste0("Proportion of traces only occurring once: ",
|
|||||||
dev.off()
|
dev.off()
|
||||||
|
|
||||||
# Black and white
|
# Black and white
|
||||||
pdf("../../thesis/figures/freq-traces_powerlaw_2019_bw.pdf", height = 3.375,
|
pdf("../thesis/figures/freq-traces_powerlaw_2019_bw.pdf", height = 3.375,
|
||||||
width = 3.375, pointsize = 10)
|
width = 3.375, pointsize = 10)
|
||||||
par(mai = c(.6,.6,.1,.1), mgp = c(2.4, 1, 0))
|
par(mai = c(.6,.6,.1,.1), mgp = c(2.4, 1, 0))
|
||||||
|
|
||||||
|
@ -4,15 +4,16 @@
|
|||||||
# (2) Export DFGs for clusters
|
# (2) Export DFGs for clusters
|
||||||
#
|
#
|
||||||
# input: results/user-navigation.RData
|
# input: results/user-navigation.RData
|
||||||
# output: ../../thesis/figures/dfg_cases_cluster1_R.pdf
|
# output: ../thesis/figures/dfg_cases_cluster1_R.pdf
|
||||||
# ../../thesis/figures/dfg_cases_cluster2_R.pdf
|
# ../thesis/figures/dfg_cases_cluster2_R.pdf
|
||||||
# ../../thesis/figures/dfg_cases_cluster3_R.pdf
|
# ../thesis/figures/dfg_cases_cluster3_R.pdf
|
||||||
# ../../thesis/figures/dfg_cases_cluster4_R.pdf
|
# ../thesis/figures/dfg_cases_cluster4_R.pdf
|
||||||
# ../../thesis/figures/dfg_cases_cluster5_R.pdf
|
# ../thesis/figures/dfg_cases_cluster5_R.pdf
|
||||||
|
# ../thesis/results/dfgs_case-cluster.RData
|
||||||
#
|
#
|
||||||
# last mod: 2024-03-30
|
# last mod: 2024-04-17
|
||||||
|
|
||||||
# setwd("C:/Users/nwickelmaier/Nextcloud/Documents/MDS/2023ss/60100_master_thesis/analysis/code")
|
# setwd("C:/Users/nwickelmaier/Nextcloud/Documents/MDS/2023ss/60100_master_thesis/analysis/")
|
||||||
|
|
||||||
#--------------- (1) Read data ---------------
|
#--------------- (1) Read data ---------------
|
||||||
|
|
||||||
@ -75,7 +76,7 @@ for (i in 1:5) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
processmapR::export_map(dfg,
|
processmapR::export_map(dfg,
|
||||||
file_name = paste0("../../thesis/figures/dfg_cases_cluster_", cl_names[i], "_R.pdf"),
|
file_name = paste0("../thesis/figures/dfg_cases_cluster_", cl_names[i], "_R.pdf"),
|
||||||
file_type = "pdf")
|
file_type = "pdf")
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -109,7 +110,7 @@ for (i in 1:5) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
processmapR::export_map(dfg,
|
processmapR::export_map(dfg,
|
||||||
file_name = paste0("../../thesis/figures/dfg_cases_cluster_", cl_names[i], "_R_bw.pdf"),
|
file_name = paste0("../thesis/figures/dfg_cases_cluster_", cl_names[i], "_R_bw.pdf"),
|
||||||
file_type = "pdf")
|
file_type = "pdf")
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -147,5 +148,5 @@ for (i in 1:5) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
save(ns, n_cases, tr_unique, perc_filter,
|
save(ns, n_cases, tr_unique, perc_filter,
|
||||||
file = "../../thesis/figures/data/dfgs_case-cluster.RData")
|
file = "../thesis/results/dfgs_case-cluster.RData")
|
||||||
|
|
||||||
|
@ -3,10 +3,10 @@
|
|||||||
# content: (1) Load data and create event log
|
# content: (1) Load data and create event log
|
||||||
# (2) Evaluation for clusters
|
# (2) Evaluation for clusters
|
||||||
#
|
#
|
||||||
# input: results/eventlogs_2019_case-clusters.csv
|
# input: ../results/eventlogs_2019_case-clusters.csv
|
||||||
# output: results/eval_case_clusters.csv
|
# output: ../results/eval_case_clusters_<threshold>.csv
|
||||||
#
|
#
|
||||||
# last mod: 2024-04-04
|
# last mod: 2024-04-17
|
||||||
|
|
||||||
import pm4py
|
import pm4py
|
||||||
import pandas as pd
|
import pandas as pd
|
||||||
@ -15,7 +15,7 @@ from python_helpers import eval_pm, eval_append
|
|||||||
|
|
||||||
#--------------- (1) Load data and create event logs ---------------
|
#--------------- (1) Load data and create event logs ---------------
|
||||||
|
|
||||||
dat = pd.read_csv("results/eventlogs_2019_case-clusters.csv", sep = ";")
|
dat = pd.read_csv("../results/eventlogs_2019_case-clusters.csv", sep = ";")
|
||||||
|
|
||||||
event_log = pm4py.format_dataframe(dat,
|
event_log = pm4py.format_dataframe(dat,
|
||||||
case_id = "case",
|
case_id = "case",
|
||||||
@ -49,5 +49,5 @@ for nt in thresholds:
|
|||||||
# pm4py.vis.save_vis_bpmn(bpmn, "results/processmaps/bpmn_cluster" + str(cluster) + "_cases" + str(int(nt*10)).zfill(2) + ".png")
|
# pm4py.vis.save_vis_bpmn(bpmn, "results/processmaps/bpmn_cluster" + str(cluster) + "_cases" + str(int(nt*10)).zfill(2) + ".png")
|
||||||
|
|
||||||
eval.index = ["Complete", "Cluster 1", "Cluster 2", "Cluster 3", "Cluster 4", "Cluster 5"]
|
eval.index = ["Complete", "Cluster 1", "Cluster 2", "Cluster 3", "Cluster 4", "Cluster 5"]
|
||||||
eval.to_csv("results/eval_case_clusters_" + str(int(nt*10)).zfill(2) + ".csv", sep = ";")
|
eval.to_csv("../results/eval_case_clusters_" + str(int(nt*10)).zfill(2) + ".csv", sep = ";")
|
||||||
|
|
||||||
|
Loading…
Reference in New Issue
Block a user