From dc489b4aa9c0f41116e1fe438b9c4726cb3bb457 Mon Sep 17 00:00:00 2001 From: nwickel Date: Tue, 2 Apr 2024 21:29:37 +0200 Subject: [PATCH] Added python script for PM for case clusters (again) --- code/13_pm-case-clusters.py | 52 +++++++++++++++++++++++++++++++++++++ code/python_helpers.py | 9 +++++-- 2 files changed, 59 insertions(+), 2 deletions(-) create mode 100644 code/13_pm-case-clusters.py diff --git a/code/13_pm-case-clusters.py b/code/13_pm-case-clusters.py new file mode 100644 index 0000000..fbc49ab --- /dev/null +++ b/code/13_pm-case-clusters.py @@ -0,0 +1,52 @@ +# 13_pm-case-clusters.py +# +# content: (1) Load data and create event log +# (2) Evaluation for clusters +# +# input: results/eventlogs_2019_case-clusters.csv +# output: results/eval_case_clusters.csv +# +# last mod: 2024-04-02 + +import pm4py +import pandas as pd + +from python_helpers import eval_pm + +#--------------- (1) Load data and create event logs --------------- + +dat = pd.read_csv("results/eventlogs_2019_case-clusters.csv", sep = ";") + +event_log = pm4py.format_dataframe(dat, + case_id = "case", + activity_key = "item", + timestamp_key = "date.start") + +#--------------- (2) Evaluation for clusters --------------- + +thresholds = [0.1, 0.2, 0.3, 0.4, 0.5] + +for nt in thresholds: + + net, im, fm = pm4py.discover_petri_net_inductive(event_log, noise_threshold = nt) + + eval = pd.DataFrame(eval_pm(event_log, net, im, fm)).T + eval.columns = ["fitness", "generalization", "simplicity"] + #eval.columns = ["fitness", "precision", "generalization", "simplicity"] + + # Merge clusters into data frame + for cluster in [1, 2, 3, 4, 5]: + log_clst = pm4py.filter_event_attribute_values(event_log, "cluster", [cluster]) + net_clst, im_clst, fm_clst = pm4py.discover_petri_net_inductive(log_clst, noise_threshold = nt) + eval_clst = pd.DataFrame(eval_pm(log_clst, net_clst, im_clst, fm_clst)).T + eval_clst.columns = ["fitness", "generalization", "simplicity"] + #eval_clst.columns = ["fitness", "precision", "generalization", "simplicity"] + eval = pd.concat([eval, eval_clst]) + # Export process maps + bpmn = pm4py.convert.convert_to_bpmn(net_clst, im_clst, fm_clst) + pm4py.vis.save_vis_bpmn(bpmn, "results/processmaps/bpmn_cluster" + str(cluster) + + "_cases" + str(int(nt*10)).zfill(2) + ".png") + + eval.index = ["Complete", "Cluster 1", "Cluster 2", "Cluster 3", "Cluster 4", "Cluster 5"] + eval.to_csv("results/eval_case_clusters_" + str(int(nt*10)).zfill(2) + ".csv", sep = ";") + diff --git a/code/python_helpers.py b/code/python_helpers.py index 693acaa..783c30b 100644 --- a/code/python_helpers.py +++ b/code/python_helpers.py @@ -5,12 +5,17 @@ import pandas as pd def eval_pm(data, net, initial_marking, final_marking): """Caculate fitness, precision, generalizability, and simplicity for petri net""" + print("Fitness is calculated") fitness = pm4py.fitness_token_based_replay(data, net, initial_marking, final_marking) - precisison = pm4py.precision_token_based_replay(data, net, initial_marking, final_marking) + #print("Precision is calculated") + #precisison = pm4py.precision_token_based_replay(data, net, initial_marking, final_marking) + print("Generalizability is calculated") generalizability = pm4py.algo.evaluation.generalization.algorithm.apply(data, net, initial_marking, final_marking) + print("Simplicity is calculated") simplicity = pm4py.algo.evaluation.simplicity.algorithm.apply(net) - return [fitness['average_trace_fitness'], precisison, generalizability, simplicity] + #return [fitness['average_trace_fitness'], precisison, generalizability, simplicity] + return [fitness['average_trace_fitness'], generalizability, simplicity] def pn_infos(log, colname, filter):