# 13_pm-case-clusters.py # # content: (1) Load data and create event log # (2) Evaluation for clusters # # input: results/eventlogs_2019_case-clusters.csv # output: results/eval_case_clusters.csv # # last mod: 2024-04-02 import pm4py import pandas as pd from python_helpers import eval_pm #--------------- (1) Load data and create event logs --------------- dat = pd.read_csv("results/eventlogs_2019_case-clusters.csv", sep = ";") event_log = pm4py.format_dataframe(dat, case_id = "case", activity_key = "item", timestamp_key = "date.start") #--------------- (2) Evaluation for clusters --------------- thresholds = [0.1, 0.2, 0.3, 0.4, 0.5] for nt in thresholds: net, im, fm = pm4py.discover_petri_net_inductive(event_log, noise_threshold = nt) eval = pd.DataFrame(eval_pm(event_log, net, im, fm)).T eval.columns = ["fitness", "generalization", "simplicity"] #eval.columns = ["fitness", "precision", "generalization", "simplicity"] # Merge clusters into data frame for cluster in [1, 2, 3, 4, 5]: log_clst = pm4py.filter_event_attribute_values(event_log, "cluster", [cluster]) net_clst, im_clst, fm_clst = pm4py.discover_petri_net_inductive(log_clst, noise_threshold = nt) eval_clst = pd.DataFrame(eval_pm(log_clst, net_clst, im_clst, fm_clst)).T eval_clst.columns = ["fitness", "generalization", "simplicity"] #eval_clst.columns = ["fitness", "precision", "generalization", "simplicity"] eval = pd.concat([eval, eval_clst]) # Export process maps bpmn = pm4py.convert.convert_to_bpmn(net_clst, im_clst, fm_clst) pm4py.vis.save_vis_bpmn(bpmn, "results/processmaps/bpmn_cluster" + str(cluster) + "_cases" + str(int(nt*10)).zfill(2) + ".png") eval.index = ["Complete", "Cluster 1", "Cluster 2", "Cluster 3", "Cluster 4", "Cluster 5"] eval.to_csv("results/eval_case_clusters_" + str(int(nt*10)).zfill(2) + ".csv", sep = ";")