Added trace analysis for 2019 data
This commit is contained in:
parent
198a22f4d8
commit
48a4bcbcdb
@ -1,13 +1,14 @@
|
|||||||
# 11_investigate-variants.R
|
# 11_investigate-variants.R
|
||||||
#
|
#
|
||||||
# content: (1) Read data
|
# content: (1) Read data
|
||||||
# (2) Investigate variants
|
# (2) Investigate variants (pre-corona data set)
|
||||||
|
# (3) Investigate variants (2019)
|
||||||
#
|
#
|
||||||
# input: results/eventlogs_pre-corona_cleaned.RData
|
# input: results/eventlogs_pre-corona_cleaned.RData
|
||||||
# output: ../../thesis/figures/freq-traces.pdf
|
# output: ../../thesis/figures/freq-traces.pdf
|
||||||
# ../../thesis/figures/freq-traces_powerlaw.pdf
|
# ../../thesis/figures/freq-traces_powerlaw.pdf
|
||||||
#
|
#
|
||||||
# last mod: 2024-03-22
|
# last mod: 2024-03-26
|
||||||
|
|
||||||
# setwd("C:/Users/nwickelmaier/Nextcloud/Documents/MDS/2023ss/60100_master_thesis/analysis/code")
|
# setwd("C:/Users/nwickelmaier/Nextcloud/Documents/MDS/2023ss/60100_master_thesis/analysis/code")
|
||||||
|
|
||||||
@ -17,7 +18,7 @@ library(bupaverse)
|
|||||||
|
|
||||||
load("results/eventlogs_pre-corona_cleaned.RData")
|
load("results/eventlogs_pre-corona_cleaned.RData")
|
||||||
|
|
||||||
#--------------- (2) Investigate variants ---------------
|
#--------------- (2) Investigate variants (pre-corona data set) ---------------
|
||||||
|
|
||||||
dat$start <- dat$date.start
|
dat$start <- dat$date.start
|
||||||
dat$complete <- dat$date.stop
|
dat$complete <- dat$date.stop
|
||||||
@ -71,3 +72,58 @@ dev.off()
|
|||||||
tr[trace_varied == 5 & trace_length > 50, ]
|
tr[trace_varied == 5 & trace_length > 50, ]
|
||||||
# --> every variant exists only once, of course
|
# --> every variant exists only once, of course
|
||||||
|
|
||||||
|
#--------------- (3) Investigate variants (2019) ---------------
|
||||||
|
|
||||||
|
load("results/dataframes_case_2019.RData")
|
||||||
|
|
||||||
|
dat$start <- dat$date.start
|
||||||
|
dat$complete <- dat$date.stop
|
||||||
|
|
||||||
|
alog <- activitylog(dat,
|
||||||
|
case_id = "case",
|
||||||
|
activity_id = "item",
|
||||||
|
resource_id = "path",
|
||||||
|
timestamps = c("start", "complete"))
|
||||||
|
|
||||||
|
pdf("../../thesis/figures/freq-traces_2019.pdf", height = 7, width = 6, pointsize = 10)
|
||||||
|
trace_explorer(alog, n_traces = 25)
|
||||||
|
dev.off()
|
||||||
|
|
||||||
|
trace_explorer(alog, n_traces = 10, type = "infrequent")
|
||||||
|
|
||||||
|
tr <- traces(alog)
|
||||||
|
trace_length <- pbapply::pbsapply(strsplit(tr$trace, ","), length)
|
||||||
|
tr[trace_length > 10, ]
|
||||||
|
|
||||||
|
trace_varied <- pbapply::pbsapply(strsplit(tr$trace, ","), function(x) length(unique(x)))
|
||||||
|
tr[trace_varied > 1, ]
|
||||||
|
table(tr[trace_varied > 2, "absolute_frequency"])
|
||||||
|
table(tr[trace_varied > 3, "absolute_frequency"])
|
||||||
|
|
||||||
|
summary(tr$absolute_frequency)
|
||||||
|
vioplot::vioplot(tr$absolute_frequency)
|
||||||
|
|
||||||
|
# Power law for frequencies of traces
|
||||||
|
tab <- table(tr$absolute_frequency)
|
||||||
|
x <- as.numeric(names(tab))
|
||||||
|
y <- as.numeric(tab)
|
||||||
|
|
||||||
|
p1 <- lm(log(y) ~ log(x))
|
||||||
|
pre <- exp(coef(p1)[1]) * x^coef(p1)[2]
|
||||||
|
|
||||||
|
pdf("../../thesis/figures/freq-traces_powerlaw_2019.pdf", height = 3.375,
|
||||||
|
width = 3.375, pointsize = 10)
|
||||||
|
par(mai = c(.6,.6,.1,.1), mgp = c(2.4, 1, 0))
|
||||||
|
|
||||||
|
plot(x, y, log = "xy", xlab = "Absolute Frequency of Traces",
|
||||||
|
ylab = "Frequency", pch = 16, col = rgb(0.262, 0.309, 0.309, 0.5))
|
||||||
|
lines(x, pre, col = "#434F4F")
|
||||||
|
legend("topright", paste0("Proportion of traces only occurring once: ",
|
||||||
|
round(tab[1] / nrow(tr), 2)), cex = .7, bty = "n")
|
||||||
|
|
||||||
|
dev.off()
|
||||||
|
|
||||||
|
# Look at individual traces as examples
|
||||||
|
tr[trace_varied == 5 & trace_length > 50, ]
|
||||||
|
# --> every variant exists only once, of course
|
||||||
|
|
||||||
|
Loading…
Reference in New Issue
Block a user