Updated python script(s) for pm on case clusters
This commit is contained in:
parent
76aa35da3f
commit
b1d2c5ec99
@ -6,12 +6,12 @@
|
|||||||
# input: results/eventlogs_2019_case-clusters.csv
|
# input: results/eventlogs_2019_case-clusters.csv
|
||||||
# output: results/eval_case_clusters.csv
|
# output: results/eval_case_clusters.csv
|
||||||
#
|
#
|
||||||
# last mod: 2024-04-02
|
# last mod: 2024-04-04
|
||||||
|
|
||||||
import pm4py
|
import pm4py
|
||||||
import pandas as pd
|
import pandas as pd
|
||||||
|
|
||||||
from python_helpers import eval_pm
|
from python_helpers import eval_pm, eval_append
|
||||||
|
|
||||||
#--------------- (1) Load data and create event logs ---------------
|
#--------------- (1) Load data and create event logs ---------------
|
||||||
|
|
||||||
@ -24,29 +24,30 @@ event_log = pm4py.format_dataframe(dat,
|
|||||||
|
|
||||||
#--------------- (2) Evaluation for clusters ---------------
|
#--------------- (2) Evaluation for clusters ---------------
|
||||||
|
|
||||||
thresholds = [0.1, 0.2, 0.3, 0.4, 0.5]
|
thresholds = [0.0, 0.1, 0.2, 0.3, 0.4]
|
||||||
|
|
||||||
for nt in thresholds:
|
for nt in thresholds:
|
||||||
|
|
||||||
net, im, fm = pm4py.discover_petri_net_inductive(event_log, noise_threshold = nt)
|
net, im, fm = pm4py.discover_petri_net_inductive(event_log, noise_threshold = nt)
|
||||||
|
|
||||||
eval = pd.DataFrame(eval_pm(event_log, net, im, fm)).T
|
eval = eval_append(event_log, net, im, fm)
|
||||||
eval.columns = ["fitness", "generalization", "simplicity"]
|
|
||||||
#eval.columns = ["fitness", "precision", "generalization", "simplicity"]
|
|
||||||
|
|
||||||
# Merge clusters into data frame
|
# Export process maps
|
||||||
for cluster in [1, 2, 3, 4, 5]:
|
pm4py.save_vis_petri_net(net, im, fm, "results/processmaps/petrinet_complete_cases" + str(int(nt*10)).zfill(2) + ".png")
|
||||||
log_clst = pm4py.filter_event_attribute_values(event_log, "cluster", [cluster])
|
bpmn = pm4py.convert.convert_to_bpmn(net, im, fm)
|
||||||
net_clst, im_clst, fm_clst = pm4py.discover_petri_net_inductive(log_clst, noise_threshold = nt)
|
pm4py.vis.save_vis_bpmn(bpmn, "results/processmaps/bpmn_complete_cases" + str(int(nt*10)).zfill(2) + ".png")
|
||||||
eval_clst = pd.DataFrame(eval_pm(log_clst, net_clst, im_clst, fm_clst)).T
|
|
||||||
eval_clst.columns = ["fitness", "generalization", "simplicity"]
|
|
||||||
#eval_clst.columns = ["fitness", "precision", "generalization", "simplicity"]
|
|
||||||
eval = pd.concat([eval, eval_clst])
|
|
||||||
# Export process maps
|
|
||||||
bpmn = pm4py.convert.convert_to_bpmn(net_clst, im_clst, fm_clst)
|
|
||||||
pm4py.vis.save_vis_bpmn(bpmn, "results/processmaps/bpmn_cluster" + str(cluster) +
|
|
||||||
"_cases" + str(int(nt*10)).zfill(2) + ".png")
|
|
||||||
|
|
||||||
eval.index = ["Complete", "Cluster 1", "Cluster 2", "Cluster 3", "Cluster 4", "Cluster 5"]
|
# Merge clusters into data frame
|
||||||
eval.to_csv("results/eval_case_clusters_" + str(int(nt*10)).zfill(2) + ".csv", sep = ";")
|
for cluster in [1, 2, 3, 4, 5]:
|
||||||
|
log_clst = pm4py.filter_event_attribute_values(event_log, "cluster", [cluster])
|
||||||
|
net_clst, im_clst, fm_clst = pm4py.discover_petri_net_inductive(log_clst, noise_threshold = nt)
|
||||||
|
eval_clst = eval_append(log_clst, net_clst, im_clst, fm_clst)
|
||||||
|
eval = pd.concat([eval, eval_clst])
|
||||||
|
# Export process maps
|
||||||
|
pm4py.save_vis_petri_net(net_clst, im_clst, fm_clst, "results/processmaps/petrinet_cluster" + str(cluster) + "_cases" + str(int(nt*10)).zfill(2) + ".png")
|
||||||
|
bpmn = pm4py.convert.convert_to_bpmn(net_clst, im_clst, fm_clst)
|
||||||
|
pm4py.vis.save_vis_bpmn(bpmn, "results/processmaps/bpmn_cluster" + str(cluster) + "_cases" + str(int(nt*10)).zfill(2) + ".png")
|
||||||
|
|
||||||
|
eval.index = ["Complete", "Cluster 1", "Cluster 2", "Cluster 3", "Cluster 4", "Cluster 5"]
|
||||||
|
eval.to_csv("results/eval_case_clusters_" + str(int(nt*10)).zfill(2) + ".csv", sep = ";")
|
||||||
|
|
||||||
|
@ -4,18 +4,18 @@ import pandas as pd
|
|||||||
###### Extract metadata for petri nets on filtered logs ######
|
###### Extract metadata for petri nets on filtered logs ######
|
||||||
|
|
||||||
def eval_pm(data, net, initial_marking, final_marking):
|
def eval_pm(data, net, initial_marking, final_marking):
|
||||||
"""Caculate fitness, precision, generalizability, and simplicity for petri net"""
|
"""Caculate fitness, precision, generalization, and simplicity for petri net"""
|
||||||
print("Fitness is calculated")
|
print("Fitness is calculated")
|
||||||
fitness = pm4py.fitness_token_based_replay(data, net, initial_marking, final_marking)
|
fitness = pm4py.fitness_token_based_replay(data, net, initial_marking, final_marking)
|
||||||
#print("Precision is calculated")
|
#print("Precision is calculated")
|
||||||
#precisison = pm4py.precision_token_based_replay(data, net, initial_marking, final_marking)
|
#precisison = pm4py.precision_token_based_replay(data, net, initial_marking, final_marking)
|
||||||
print("Generalizability is calculated")
|
print("Generalizability is calculated")
|
||||||
generalizability = pm4py.algo.evaluation.generalization.algorithm.apply(data, net,
|
generalization = pm4py.algo.evaluation.generalization.algorithm.apply(data, net,
|
||||||
initial_marking, final_marking)
|
initial_marking, final_marking)
|
||||||
print("Simplicity is calculated")
|
print("Simplicity is calculated")
|
||||||
simplicity = pm4py.algo.evaluation.simplicity.algorithm.apply(net)
|
simplicity = pm4py.algo.evaluation.simplicity.algorithm.apply(net)
|
||||||
#return [fitness['average_trace_fitness'], precisison, generalizability, simplicity]
|
#return [fitness['average_trace_fitness'], precisison, generalization, simplicity]
|
||||||
return [fitness['average_trace_fitness'], generalizability, simplicity]
|
return [fitness['average_trace_fitness'], generalization, simplicity]
|
||||||
|
|
||||||
|
|
||||||
def pn_infos(log, colname, filter):
|
def pn_infos(log, colname, filter):
|
||||||
@ -51,8 +51,8 @@ def pn_infos_miner(log, miner):
|
|||||||
def eval_append(log, net, im, fm):
|
def eval_append(log, net, im, fm):
|
||||||
|
|
||||||
eval = eval_pm(log, net, im, fm)
|
eval = eval_pm(log, net, im, fm)
|
||||||
is_sound = pm4py.check_soundness(net, im, fm)
|
#is_sound = pm4py.check_soundness(net, im, fm)
|
||||||
eval.append(is_sound[0])
|
#eval.append(is_sound[0])
|
||||||
eval.append(len(net.arcs))
|
eval.append(len(net.arcs))
|
||||||
eval.append(len(net.transitions))
|
eval.append(len(net.transitions))
|
||||||
eval.append(len(net.places))
|
eval.append(len(net.places))
|
||||||
@ -63,7 +63,7 @@ def eval_append(log, net, im, fm):
|
|||||||
eval.append({k: sorted_variants[k] for k in list(sorted_variants)[:1]})
|
eval.append({k: sorted_variants[k] for k in list(sorted_variants)[:1]})
|
||||||
|
|
||||||
eval = pd.DataFrame(eval).T
|
eval = pd.DataFrame(eval).T
|
||||||
eval.columns = ["fitness", "precision", "generalizability", "simplicity",
|
eval.columns = ["fitness", "generalization", "simplicity", "narcs", "ntrans", "nplaces", "nvariants", "mostfreq"]
|
||||||
"sound", "narcs", "ntrans", "nplaces", "nvariants", "mostfreq"]
|
#eval.columns = ["fitness", "precision", "generalization", "simplicity", "sound", "narcs", "ntrans", "nplaces", "nvariants", "mostfreq"]
|
||||||
return eval
|
return eval
|
||||||
|
|
||||||
|
Loading…
Reference in New Issue
Block a user