Some script cleaning

This commit is contained in:
Nora Wickelmaier 2024-03-12 17:52:53 +01:00
parent 7eff903837
commit 7ee9195563
2 changed files with 19 additions and 57 deletions

View File

@ -1,34 +1,38 @@
# 11_investigate-variants.R # 11_investigate-variants.R
# #
# content: (1) Read data # content: (1) Read data
# (2) Extract characteristics for cases # (2) Investigate variants
# (3) Select features for navigation behavior
# (4) Export data frames
# #
# input: results/haum/event_logfiles_2024-02-21_16-07-33.csv # input: results/haum/eventlogs_pre-corona_case-clusters.RData
# output: results/haum/eventlogs_pre-corona_case-clusters.csv # output:
# #
# last mod: 2024-03-08 # last mod: 2024-03-12
# setwd("C:/Users/nwickelmaier/Nextcloud/Documents/MDS/2023ss/60100_master_thesis/analysis/code") # setwd("C:/Users/nwickelmaier/Nextcloud/Documents/MDS/2023ss/60100_master_thesis/analysis/code")
library(bupaverse)
#--------------- (1) Read data --------------- #--------------- (1) Read data ---------------
load("results/haum/eventlogs_pre-corona_cleaned.RData") load("results/haum/eventlogs_pre-corona_cleaned.RData")
#--------------- (4) Investigate variants --------------- #--------------- (2) Investigate variants ---------------
res$start <- res$date.start dat$start <- dat$date.start
res$complete <- res$date.stop dat$complete <- dat$date.stop
alog <- activitylog(res, alog <- activitylog(dat,
case_id = "case", case_id = "case",
activity_id = "item", activity_id = "item",
resource_id = "path", resource_id = "path",
timestamps = c("start", "complete")) timestamps = c("start", "complete"))
pdf("results/figures/freq-traces.pdf", height = 7, width = 6, pointsize = 10)
trace_explorer(alog, n_traces = 25) trace_explorer(alog, n_traces = 25)
# --> sequences of artworks are just too rare # --> sequences of artworks are just too rare
dev.off()
trace_explorer(alog, n_traces = 10, type = "infrequent")
tr <- traces(alog) tr <- traces(alog)
trace_length <- pbapply::pbsapply(strsplit(tr$trace, ","), length) trace_length <- pbapply::pbsapply(strsplit(tr$trace, ","), length)
@ -52,50 +56,7 @@ p1 <- lm(log(y) ~ log(x))
pre <- exp(coef(p1)[1]) * x^coef(p1)[2] pre <- exp(coef(p1)[1]) * x^coef(p1)[2]
lines(x, pre) lines(x, pre)
# Look at individual traces as examples # Look at individual traces as examples
tr[trace_varied == 5 & trace_length > 50, ] tr[trace_varied == 5 & trace_length > 50, ]
# --> every variant exists only once, of course # --> every variant exists only once, of course
datcase[datcase$nitems == 5 & datcase$length > 50,]
pbapply::pbsapply(datcase[, -c(1, 9)], median)
#ex <- datcase[datcase$nitems == 4 & datcase$length == 15,]
ex <- datcase[datcase$nitems == 5,]
ex <- ex[sample(1:nrow(ex), 20), ]
# --> pretty randomly chosen... TODO:
case_ids <- NULL
for (case in ex$case) {
if ("080" %in% res$item[res$case == case] | "503" %in% res$item[res$case == case]) {
case_ids <- c(case_ids, TRUE)
} else {
case_ids <- c(case_ids, FALSE)
}
}
cases <- ex$case[case_ids]
for (case in cases) {
alog <- activitylog(res[res$case == case, ],
case_id = "case",
activity_id = "item",
resource_id = "path",
timestamps = c("start", "complete"))
dfg <- process_map(alog,
type_nodes = frequency("absolute", color_scale = "Greys"),
type_edges = frequency("absolute", color_edges = "#FF6900"),
rankdir = "LR",
render = FALSE)
export_map(dfg,
file_name = paste0("results/processmaps/dfg_example_cases_", case, "_R.pdf"),
file_type = "pdf",
title = paste("Case", case))
}

View File

@ -7,7 +7,7 @@
# input: results/haum/eventlogs_pre-corona_item-clusters.csv # input: results/haum/eventlogs_pre-corona_item-clusters.csv
# output: results/haum/pn_infos_clusters.csv # output: results/haum/pn_infos_clusters.csv
# #
# last mod: 2024-03-06 # last mod: 2024-03-10
import pm4py import pm4py
import pandas as pd import pandas as pd
@ -16,9 +16,9 @@ from python_helpers import eval_pm, pn_infos
#--------------- (1) Load data and create event logs --------------- #--------------- (1) Load data and create event logs ---------------
dat = pd.read_csv("results/haum/eventlogs_2019_case-clusters.csv", sep = ";") dat = pd.read_csv("results/haum/eventlogs_2019_case-clusters_new.csv", sep = ";")
event_log = pm4py.format_dataframe(dat, case_id = "case", activity_key = "item", event_log = pm4py.format_dataframe(dat, case_id = "case", activity_key = "event_new",
timestamp_key = "date.start") timestamp_key = "date.start")
#--------------- (2) Infos for clusters --------------- #--------------- (2) Infos for clusters ---------------
@ -37,9 +37,10 @@ eval.to_csv("results/haum/pn_infos_clusters.csv", sep = ";")
for cluster in event_log.cluster.unique().tolist(): for cluster in event_log.cluster.unique().tolist():
subdata = event_log[event_log.cluster == cluster] subdata = event_log[event_log.cluster == cluster]
subnet, subim, subfm = pm4py.discover_petri_net_inductive(subdata, noise_threshold=.3) subnet, subim, subfm = pm4py.discover_petri_net_inductive(subdata, noise_threshold = .7)
pm4py.save_vis_petri_net(subnet, subim, subfm, pm4py.save_vis_petri_net(subnet, subim, subfm,
"results/processmaps/petrinet_cluster" + str(cluster) + "_cases.png") "results/processmaps/petrinet_cluster" + str(cluster) + "_cases.png")
bpmn = pm4py.convert.convert_to_bpmn(subnet, subim, subfm) bpmn = pm4py.convert.convert_to_bpmn(subnet, subim, subfm)
pm4py.vis.save_vis_bpmn(bpmn, "results/processmaps/bpmn_cluster" + pm4py.vis.save_vis_bpmn(bpmn, "results/processmaps/bpmn_cluster" +
str(cluster) + "_cases.png") str(cluster) + "_cases.png")