diff --git a/code/13_pm-case-clusters.py b/code/13_pm-case-clusters.py index fbc49ab..e1d3b81 100644 --- a/code/13_pm-case-clusters.py +++ b/code/13_pm-case-clusters.py @@ -6,12 +6,12 @@ # input: results/eventlogs_2019_case-clusters.csv # output: results/eval_case_clusters.csv # -# last mod: 2024-04-02 +# last mod: 2024-04-04 import pm4py import pandas as pd -from python_helpers import eval_pm +from python_helpers import eval_pm, eval_append #--------------- (1) Load data and create event logs --------------- @@ -24,29 +24,30 @@ event_log = pm4py.format_dataframe(dat, #--------------- (2) Evaluation for clusters --------------- -thresholds = [0.1, 0.2, 0.3, 0.4, 0.5] +thresholds = [0.0, 0.1, 0.2, 0.3, 0.4] for nt in thresholds: - net, im, fm = pm4py.discover_petri_net_inductive(event_log, noise_threshold = nt) - - eval = pd.DataFrame(eval_pm(event_log, net, im, fm)).T - eval.columns = ["fitness", "generalization", "simplicity"] - #eval.columns = ["fitness", "precision", "generalization", "simplicity"] - - # Merge clusters into data frame - for cluster in [1, 2, 3, 4, 5]: - log_clst = pm4py.filter_event_attribute_values(event_log, "cluster", [cluster]) - net_clst, im_clst, fm_clst = pm4py.discover_petri_net_inductive(log_clst, noise_threshold = nt) - eval_clst = pd.DataFrame(eval_pm(log_clst, net_clst, im_clst, fm_clst)).T - eval_clst.columns = ["fitness", "generalization", "simplicity"] - #eval_clst.columns = ["fitness", "precision", "generalization", "simplicity"] - eval = pd.concat([eval, eval_clst]) - # Export process maps - bpmn = pm4py.convert.convert_to_bpmn(net_clst, im_clst, fm_clst) - pm4py.vis.save_vis_bpmn(bpmn, "results/processmaps/bpmn_cluster" + str(cluster) + - "_cases" + str(int(nt*10)).zfill(2) + ".png") - - eval.index = ["Complete", "Cluster 1", "Cluster 2", "Cluster 3", "Cluster 4", "Cluster 5"] - eval.to_csv("results/eval_case_clusters_" + str(int(nt*10)).zfill(2) + ".csv", sep = ";") + net, im, fm = pm4py.discover_petri_net_inductive(event_log, noise_threshold = nt) + + eval = eval_append(event_log, net, im, fm) + + # Export process maps + pm4py.save_vis_petri_net(net, im, fm, "results/processmaps/petrinet_complete_cases" + str(int(nt*10)).zfill(2) + ".png") + bpmn = pm4py.convert.convert_to_bpmn(net, im, fm) + pm4py.vis.save_vis_bpmn(bpmn, "results/processmaps/bpmn_complete_cases" + str(int(nt*10)).zfill(2) + ".png") + + # Merge clusters into data frame + for cluster in [1, 2, 3, 4, 5]: + log_clst = pm4py.filter_event_attribute_values(event_log, "cluster", [cluster]) + net_clst, im_clst, fm_clst = pm4py.discover_petri_net_inductive(log_clst, noise_threshold = nt) + eval_clst = eval_append(log_clst, net_clst, im_clst, fm_clst) + eval = pd.concat([eval, eval_clst]) + # Export process maps + pm4py.save_vis_petri_net(net_clst, im_clst, fm_clst, "results/processmaps/petrinet_cluster" + str(cluster) + "_cases" + str(int(nt*10)).zfill(2) + ".png") + bpmn = pm4py.convert.convert_to_bpmn(net_clst, im_clst, fm_clst) + pm4py.vis.save_vis_bpmn(bpmn, "results/processmaps/bpmn_cluster" + str(cluster) + "_cases" + str(int(nt*10)).zfill(2) + ".png") + + eval.index = ["Complete", "Cluster 1", "Cluster 2", "Cluster 3", "Cluster 4", "Cluster 5"] + eval.to_csv("results/eval_case_clusters_" + str(int(nt*10)).zfill(2) + ".csv", sep = ";") diff --git a/code/python_helpers.py b/code/python_helpers.py index 783c30b..d2962ca 100644 --- a/code/python_helpers.py +++ b/code/python_helpers.py @@ -4,18 +4,18 @@ import pandas as pd ###### Extract metadata for petri nets on filtered logs ###### def eval_pm(data, net, initial_marking, final_marking): - """Caculate fitness, precision, generalizability, and simplicity for petri net""" + """Caculate fitness, precision, generalization, and simplicity for petri net""" print("Fitness is calculated") fitness = pm4py.fitness_token_based_replay(data, net, initial_marking, final_marking) #print("Precision is calculated") #precisison = pm4py.precision_token_based_replay(data, net, initial_marking, final_marking) print("Generalizability is calculated") - generalizability = pm4py.algo.evaluation.generalization.algorithm.apply(data, net, + generalization = pm4py.algo.evaluation.generalization.algorithm.apply(data, net, initial_marking, final_marking) print("Simplicity is calculated") simplicity = pm4py.algo.evaluation.simplicity.algorithm.apply(net) - #return [fitness['average_trace_fitness'], precisison, generalizability, simplicity] - return [fitness['average_trace_fitness'], generalizability, simplicity] + #return [fitness['average_trace_fitness'], precisison, generalization, simplicity] + return [fitness['average_trace_fitness'], generalization, simplicity] def pn_infos(log, colname, filter): @@ -51,8 +51,8 @@ def pn_infos_miner(log, miner): def eval_append(log, net, im, fm): eval = eval_pm(log, net, im, fm) - is_sound = pm4py.check_soundness(net, im, fm) - eval.append(is_sound[0]) + #is_sound = pm4py.check_soundness(net, im, fm) + #eval.append(is_sound[0]) eval.append(len(net.arcs)) eval.append(len(net.transitions)) eval.append(len(net.places)) @@ -63,7 +63,7 @@ def eval_append(log, net, im, fm): eval.append({k: sorted_variants[k] for k in list(sorted_variants)[:1]}) eval = pd.DataFrame(eval).T - eval.columns = ["fitness", "precision", "generalizability", "simplicity", - "sound", "narcs", "ntrans", "nplaces", "nvariants", "mostfreq"] + eval.columns = ["fitness", "generalization", "simplicity", "narcs", "ntrans", "nplaces", "nvariants", "mostfreq"] + #eval.columns = ["fitness", "precision", "generalization", "simplicity", "sound", "narcs", "ntrans", "nplaces", "nvariants", "mostfreq"] return eval