2024-01-12 16:44:33 +01:00
|
|
|
#%% # needed for shortcuts to run properly in VSCode *eyeroll*
|
|
|
|
%reset
|
|
|
|
|
|
|
|
import pm4py
|
|
|
|
|
|
|
|
import pandas as pd
|
|
|
|
import numpy as np
|
|
|
|
import matplotlib.pyplot as plt
|
|
|
|
|
|
|
|
###### Load data and create event logs ######
|
|
|
|
|
|
|
|
dat = pd.read_csv("results/haum/event_logfiles_2024-01-02_19-44-50.csv", sep = ";")
|
|
|
|
dat = dat[dat["date.start"] < "2020-03-13"]
|
|
|
|
# --> only pre corona (before artworks were updated)
|
|
|
|
|
|
|
|
event_log = pm4py.format_dataframe(dat, case_id='path', activity_key='event',
|
|
|
|
timestamp_key='date.start')
|
2024-01-16 09:59:23 +01:00
|
|
|
event_log = event_log.rename(columns={'item': 'case:item'})
|
2024-01-12 16:44:33 +01:00
|
|
|
|
|
|
|
###### Descrptives of log data ######
|
|
|
|
|
|
|
|
# Distribution of events
|
|
|
|
event_log.event.value_counts()
|
|
|
|
event_log.event.value_counts(normalize=True)
|
|
|
|
|
|
|
|
# Number of paths
|
|
|
|
len(event_log.path.unique())
|
|
|
|
|
|
|
|
# Number of variants
|
|
|
|
variants = pm4py.get_variants(event_log)
|
|
|
|
len(variants)
|
|
|
|
|
|
|
|
sorted_variants = dict(sorted(variants.items(), key=lambda item: item[1], reverse = True))
|
|
|
|
{k: sorted_variants[k] for k in list(sorted_variants)[:20]}
|
|
|
|
|
|
|
|
filtered_log = event_log[event_log["event"] != "move"]
|
2024-01-16 09:59:23 +01:00
|
|
|
variants_no_move = pm4py.get_variants(filtered_log)
|
|
|
|
len(variants_no_move)
|
|
|
|
sorted_variants_no_move = dict(sorted(variants_no_move.items(), key=lambda item: item[1], reverse = True))
|
|
|
|
{k: sorted_variants_no_move[k] for k in list(sorted_variants_no_move)[:20]}
|
2024-01-12 16:44:33 +01:00
|
|
|
|
|
|
|
# Path length
|
|
|
|
event_log.path.value_counts()
|
|
|
|
event_log.path.value_counts().mean()
|
|
|
|
event_log.path.value_counts().median()
|
|
|
|
event_log.path.value_counts().min()
|
|
|
|
event_log.path.value_counts().max()
|
|
|
|
|
|
|
|
plt.hist(event_log.path.value_counts(), bins=200)
|
|
|
|
plt.show()
|
|
|
|
|
|
|
|
# TODO: Do it again in R -- much smoother and more info, better plots
|
|
|
|
|
|
|
|
###### Read "conformative" Petri Net ######
|
|
|
|
|
|
|
|
basenet, initial_marking, final_marking = pm4py.read_pnml("results/conformative_petrinet_con.pnml")
|
|
|
|
|
|
|
|
def eval_pm(data, net, initial_marking, final_marking):
|
|
|
|
"""Caculate fitness, precision, generalizability, and simplicity for petri net"""
|
|
|
|
fitness = pm4py.fitness_token_based_replay(data, net, initial_marking, final_marking)
|
|
|
|
precisison = pm4py.precision_token_based_replay(data, net,
|
|
|
|
initial_marking, final_marking)
|
|
|
|
generalizability = pm4py.algo.evaluation.generalization.algorithm.apply(data, net,
|
|
|
|
initial_marking, final_marking)
|
|
|
|
simplicity = pm4py.algo.evaluation.simplicity.algorithm.apply(net)
|
|
|
|
return [fitness['average_trace_fitness'], precisison, generalizability, simplicity]
|
|
|
|
|
|
|
|
baseline_eval = eval_pm(event_log, basenet, initial_marking, final_marking)
|
|
|
|
|
|
|
|
# TBR
|
|
|
|
replayed_traces = pm4py.conformance_diagnostics_token_based_replay(event_log, basenet, initial_marking, final_marking)
|
|
|
|
|
|
|
|
l1 = list()
|
|
|
|
l2 = list()
|
|
|
|
l3 = list()
|
|
|
|
l4 = list()
|
|
|
|
for i in range(len(replayed_traces)):
|
|
|
|
l1.append(replayed_traces[i]["remaining_tokens"])
|
|
|
|
l2.append(replayed_traces[i]["missing_tokens"])
|
|
|
|
l3.append(replayed_traces[i]["reached_marking"])
|
|
|
|
l4.append(replayed_traces[i]["transitions_with_problems"])
|
|
|
|
|
|
|
|
np.mean(l1)
|
|
|
|
set(l1)
|
|
|
|
index_broken = l1.index(1)
|
|
|
|
np.mean(l2)
|
|
|
|
set(l2)
|
|
|
|
l2.index(1)
|
|
|
|
set(l3)
|
|
|
|
l4.count([])
|
|
|
|
|
|
|
|
l3[index_broken]
|
|
|
|
l4[index_broken]
|
|
|
|
|
|
|
|
replayed_traces[index_broken]
|
|
|
|
|
2024-01-16 09:59:23 +01:00
|
|
|
event_log[event_log['@@case_index'] == index_broken].event
|
|
|
|
event_log[event_log['@@case_index'] == index_broken].path
|
|
|
|
event_log[event_log['@@case_index'] == index_broken].item
|
|
|
|
event_log[event_log['@@case_index'] == index_broken]["fileId.start"]
|
|
|
|
# --> logging error in file!
|
2024-01-12 16:44:33 +01:00
|
|
|
|
|
|
|
from pm4py.algo.conformance.tokenreplay import algorithm as token_based_replay
|
|
|
|
parameters_tbr = {token_based_replay.Variants.TOKEN_REPLAY.value.Parameters.DISABLE_VARIANTS: True, token_based_replay.Variants.TOKEN_REPLAY.value.Parameters.ENABLE_PLTR_FITNESS: True}
|
|
|
|
replayed_traces, place_fitness, trans_fitness, unwanted_activities = token_based_replay.apply(event_log, basenet,
|
|
|
|
initial_marking,
|
|
|
|
final_marking,
|
|
|
|
parameters=parameters_tbr)
|
|
|
|
|
|
|
|
from pm4py.algo.conformance.tokenreplay.diagnostics import duration_diagnostics
|
|
|
|
trans_diagnostics = duration_diagnostics.diagnose_from_trans_fitness(event_log, trans_fitness)
|
|
|
|
for trans in trans_diagnostics:
|
|
|
|
print(trans, trans_diagnostics[trans])
|
|
|
|
|
|
|
|
# Footprints
|
|
|
|
from pm4py.algo.discovery.footprints import algorithm as footprints_discovery
|
|
|
|
fp_log = footprints_discovery.apply(event_log, variant=footprints_discovery.Variants.ENTIRE_EVENT_LOG)
|
|
|
|
|
|
|
|
fp_trace_by_trace = footprints_discovery.apply(event_log, variant=footprints_discovery.Variants.TRACE_BY_TRACE)
|
|
|
|
|
|
|
|
fp_net = footprints_discovery.apply(basenet, initial_marking, final_marking)
|
|
|
|
|
|
|
|
from pm4py.visualization.footprints import visualizer as fp_visualizer
|
|
|
|
gviz = fp_visualizer.apply(fp_net, parameters={fp_visualizer.Variants.SINGLE.value.Parameters.FORMAT: "svg"})
|
|
|
|
fp_visualizer.view(gviz)
|
|
|
|
|
|
|
|
gviz = fp_visualizer.apply(fp_log, fp_net, parameters={fp_visualizer.Variants.COMPARISON.value.Parameters.FORMAT: "svg"})
|
|
|
|
fp_visualizer.view(gviz)
|
|
|
|
|
|
|
|
conf_fp = pm4py.conformance_diagnostics_footprints(fp_trace_by_trace, fp_net)
|
|
|
|
|
|
|
|
from pm4py.algo.conformance.footprints import algorithm as fp_conformance
|
|
|
|
conf_result = fp_conformance.apply(fp_log, fp_net, variant=fp_conformance.Variants.LOG_EXTENSIVE)
|
|
|
|
|
|
|
|
from pm4py.algo.conformance.footprints.util import evaluation
|
|
|
|
fitness = evaluation.fp_fitness(fp_log, fp_net, conf_result)
|
|
|
|
precision = evaluation.fp_precision(fp_log, fp_net)
|
|
|
|
|
|
|
|
# Skeleton
|
|
|
|
from pm4py.algo.discovery.log_skeleton import algorithm as lsk_discovery
|
|
|
|
skeleton = lsk_discovery.apply(event_log, parameters={lsk_discovery.Variants.CLASSIC.value.Parameters.NOISE_THRESHOLD: 0.0})
|
|
|
|
|
|
|
|
from pm4py.algo.conformance.log_skeleton import algorithm as lsk_conformance
|
|
|
|
conf_result = lsk_conformance.apply(event_log, skeleton)
|
|
|
|
|
|
|
|
pm4py.vis.view_petri_net(basenet, initial_marking, final_marking)
|
|
|
|
is_sound = pm4py.check_soundness(basenet, initial_marking, final_marking)
|
|
|
|
is_sound[0]
|
|
|
|
len(basenet.arcs)
|
|
|
|
len(basenet.transitions)
|
|
|
|
len(basenet.places)
|
|
|
|
|
|
|
|
efg_graph = pm4py.discover_eventually_follows_graph(event_log)
|
|
|
|
|
|
|
|
## Directly-follows graph
|
|
|
|
dfg, start_activities, end_activities = pm4py.discover_dfg(event_log)
|
|
|
|
pm4py.view_dfg(dfg, start_activities, end_activities)
|
|
|
|
pm4py.save_vis_dfg(dfg, start_activities, end_activities, '../figures/processmaps/dfg_complete.png')
|
|
|
|
|
|
|
|
## Heuristics Miner
|
|
|
|
h_net, im, fm = pm4py.discover_petri_net_heuristics(event_log)
|
|
|
|
pm4py.vis.view_petri_net(h_net, im, fm)
|
2024-01-16 09:59:23 +01:00
|
|
|
pm4py.vis.save_vis_petri_net(h_net, im, fm, "../figures/processmaps/petrinet_heuristics_complete.png")
|
|
|
|
h_eval = eval_pm(event_log, h_net, im, fm)
|
2024-01-12 16:44:33 +01:00
|
|
|
|
|
|
|
is_sound = pm4py.check_soundness(h_net, im, fm)
|
|
|
|
is_sound[0]
|
|
|
|
|
|
|
|
len(h_net.arcs)
|
|
|
|
len(h_net.transitions)
|
|
|
|
len(h_net.places)
|
|
|
|
|
|
|
|
|
|
|
|
# decorated petri net
|
|
|
|
from pm4py.visualization.petri_net import visualizer as pn_visualizer
|
|
|
|
parameters = {pn_visualizer.Variants.FREQUENCY.value.Parameters.FORMAT: "png"}
|
|
|
|
gviz = pn_visualizer.apply(h_net, im, fm, parameters=parameters, variant=pn_visualizer.Variants.FREQUENCY, log=event_log)
|
2024-01-16 09:59:23 +01:00
|
|
|
pn_visualizer.save(gviz, "../figures/processmaps/petrinet_heuristics_complete_decorated.png")
|
2024-01-12 16:44:33 +01:00
|
|
|
|
|
|
|
# convert to BPMN
|
|
|
|
bpmn = pm4py.convert.convert_to_bpmn(h_net, im, fm)
|
|
|
|
pm4py.vis.view_bpmn(bpmn)
|
|
|
|
|
|
|
|
## Alpha Miner
|
|
|
|
a_net, im, fm = pm4py.discover_petri_net_alpha(event_log)
|
|
|
|
pm4py.vis.view_petri_net(a_net, im, fm)
|
2024-01-16 09:59:23 +01:00
|
|
|
pm4py.vis.save_vis_petri_net(a_net, im, fm, "../figures/processmaps/petrinet_alpha_complete.png")
|
|
|
|
a_eval = eval_pm(event_log, a_net, im, fm)
|
2024-01-12 16:44:33 +01:00
|
|
|
|
|
|
|
is_sound = pm4py.check_soundness(a_net, im, fm)
|
|
|
|
is_sound[0]
|
|
|
|
|
|
|
|
len(a_net.arcs)
|
|
|
|
len(a_net.transitions)
|
|
|
|
len(a_net.places)
|
|
|
|
|
|
|
|
## Inductive Miner
|
|
|
|
i_net, im, fm = pm4py.discover_petri_net_inductive(event_log)
|
|
|
|
pm4py.vis.view_petri_net(i_net, im, fm)
|
2024-01-16 09:59:23 +01:00
|
|
|
pm4py.vis.save_vis_petri_net(i_net, im, fm, "../figures/processmaps/petrinet_induction_complete.png")
|
|
|
|
i_eval = eval_pm(event_log, i_net, im, fm)
|
2024-01-12 16:44:33 +01:00
|
|
|
|
|
|
|
# as process tree (does not work for heuristics miner!)
|
|
|
|
pt = pm4py.discover_process_tree_inductive(event_log)
|
|
|
|
pm4py.vis.view_process_tree(pt)
|
|
|
|
|
|
|
|
is_sound = pm4py.check_soundness(i_net, im, fm)
|
|
|
|
is_sound[0]
|
|
|
|
|
|
|
|
# TODO: Can I show that this simpler net does not include all traces? (Probably not,
|
|
|
|
# since fitness is 1, but WHY?)
|
|
|
|
|
|
|
|
len(i_net.arcs)
|
|
|
|
len(i_net.transitions)
|
|
|
|
len(i_net.places)
|
|
|
|
|
|
|
|
bpmn = pm4py.convert.convert_to_bpmn(i_net, im, fm)
|
|
|
|
pm4py.view_bpmn(bpmn)
|
|
|
|
|
|
|
|
from pm4py.algo.conformance.tokenreplay import algorithm as token_based_replay
|
|
|
|
parameters_tbr = {token_based_replay.Variants.TOKEN_REPLAY.value.Parameters.DISABLE_VARIANTS: True, token_based_replay.Variants.TOKEN_REPLAY.value.Parameters.ENABLE_PLTR_FITNESS: True}
|
|
|
|
replayed_traces, place_fitness, trans_fitness, unwanted_activities = token_based_replay.apply(event_log, i_net,
|
2024-01-16 09:59:23 +01:00
|
|
|
im,
|
|
|
|
fm,
|
|
|
|
parameters=parameters_tbr)
|
2024-01-12 16:44:33 +01:00
|
|
|
|
|
|
|
l1 = list()
|
|
|
|
l2 = list()
|
|
|
|
l3 = list()
|
|
|
|
l4 = list()
|
|
|
|
for i in range(len(replayed_traces)):
|
|
|
|
l1.append(replayed_traces[i]["remaining_tokens"])
|
|
|
|
l2.append(replayed_traces[i]["missing_tokens"])
|
|
|
|
l3.append(replayed_traces[i]["reached_marking"])
|
|
|
|
l4.append(replayed_traces[i]["transitions_with_problems"])
|
|
|
|
|
|
|
|
np.mean(l1)
|
2024-01-16 09:59:23 +01:00
|
|
|
set(l1)
|
|
|
|
index_broken = l1.index(1)
|
2024-01-12 16:44:33 +01:00
|
|
|
np.mean(l2)
|
2024-01-16 09:59:23 +01:00
|
|
|
set(l2)
|
|
|
|
l2.index(1)
|
2024-01-12 16:44:33 +01:00
|
|
|
set(l3)
|
|
|
|
l4.count([])
|
|
|
|
|
2024-01-16 09:59:23 +01:00
|
|
|
l3[index_broken]
|
|
|
|
l4[index_broken]
|
|
|
|
|
|
|
|
replayed_traces[index_broken]
|
|
|
|
|
|
|
|
event_log[event_log['@@case_index'] == index_broken].event
|
|
|
|
event_log[event_log['@@case_index'] == index_broken].path
|
|
|
|
event_log[event_log['@@case_index'] == index_broken].item
|
|
|
|
event_log[event_log['@@case_index'] == index_broken]["fileId.start"]
|
|
|
|
|
2024-01-12 16:44:33 +01:00
|
|
|
## ILP Miner
|
|
|
|
ilp_net, im, fm = pm4py.discover_petri_net_ilp(event_log)
|
|
|
|
pm4py.vis.view_petri_net(ilp_net, im, fm)
|
2024-01-16 09:59:23 +01:00
|
|
|
pm4py.vis.save_vis_petri_net(ilp_net, im, fm, "../figures/processmaps/petrinet_ilp_complete.png")
|
|
|
|
ilp_eval = eval_pm(event_log, ilp_net, im, fm)
|
2024-01-12 16:44:33 +01:00
|
|
|
|
|
|
|
is_sound = pm4py.check_soundness(ilp_net, im, fm)
|
|
|
|
is_sound[0]
|
|
|
|
|
|
|
|
len(ilp_net.arcs)
|
|
|
|
len(ilp_net.transitions)
|
|
|
|
len(ilp_net.places)
|
|
|
|
|
|
|
|
## Export for all miners
|
|
|
|
eval = pd.DataFrame(np.row_stack([baseline_eval, h_eval, a_eval, i_eval, ilp_eval]))
|
|
|
|
eval.columns = ["fitness", "precision", "generalizability", "simplicity"]
|
|
|
|
eval.index = ["conformative", "heuristics", "alpha", "inductive", "ilp"]
|
|
|
|
eval
|
|
|
|
|
|
|
|
eval.to_csv("results/eval_all-miners_complete.csv", sep=";")
|
|
|
|
|
2024-01-16 09:59:23 +01:00
|
|
|
## Without broken trace
|
|
|
|
event_log_clean = event_log[event_log['@@case_index'] != index_broken]
|
|
|
|
h_net, a_im, h_fm = pm4py.discover_petri_net_heuristics(event_log_clean)
|
|
|
|
a_net, h_im, a_fm = pm4py.discover_petri_net_alpha(event_log_clean)
|
|
|
|
i_net, i_im, i_fm = pm4py.discover_petri_net_inductive(event_log_clean)
|
|
|
|
ilp_net, ilp_im, ilp_fm = pm4py.discover_petri_net_ilp(event_log_clean)
|
|
|
|
|
|
|
|
baseline_eval = eval_pm(event_log_clean, basenet, initial_marking, final_marking)
|
|
|
|
h_eval = eval_pm(event_log_clean, h_net, h_im, h_fm)
|
|
|
|
a_eval = eval_pm(event_log_clean, a_net, a_im, a_fm)
|
|
|
|
i_eval = eval_pm(event_log_clean, i_net, i_im, i_fm)
|
|
|
|
ilp_eval = eval_pm(event_log_clean, ilp_net, ilp_im, ilp_fm)
|
|
|
|
|
|
|
|
eval = pd.DataFrame(np.row_stack([baseline_eval, h_eval, a_eval, i_eval, ilp_eval]))
|
|
|
|
eval.columns = ["fitness", "precision", "generalizability", "simplicity"]
|
|
|
|
eval.index = ["conformative", "heuristics", "alpha", "inductive", "ilp"]
|
|
|
|
eval
|
|
|
|
|
|
|
|
eval.to_csv("results/eval_all-miners_clean.csv", sep=";")
|
|
|
|
|
|
|
|
|
2024-01-12 16:44:33 +01:00
|
|
|
###### Process Mining - individual artworks ######
|
|
|
|
|
|
|
|
def pm_artworks(miner):
|
|
|
|
|
|
|
|
retval1 = np.empty((len(event_log["case:artwork"].unique()), 4))
|
|
|
|
retval2 = np.empty((len(event_log["case:artwork"].unique()), 4))
|
|
|
|
|
|
|
|
if miner == "heuristics":
|
|
|
|
net, im, fm = pm4py.discover_petri_net_heuristics(event_log)
|
|
|
|
elif miner == "inductive":
|
|
|
|
net, im, fm = pm4py.discover_petri_net_inductive(event_log)
|
|
|
|
elif miner == "alpha":
|
|
|
|
net, im, fm = pm4py.discover_petri_net_alpha(event_log)
|
|
|
|
elif miner == "ilp":
|
|
|
|
net, im, fm = pm4py.discover_petri_net_ilp(event_log)
|
|
|
|
|
|
|
|
for i in range(len(event_log["case:artwork"].unique())):
|
|
|
|
artwork = event_log["case:artwork"].unique()[i]
|
|
|
|
subdata = pm4py.filter_event_attribute_values(event_log, "case:artwork",
|
|
|
|
[artwork],
|
|
|
|
level="case", retain=True)
|
|
|
|
if miner == "heuristics":
|
|
|
|
subnet, subim, subfm = pm4py.discover_petri_net_heuristics(subdata)
|
|
|
|
elif miner == "inductive":
|
|
|
|
subnet, subim, subfm = pm4py.discover_petri_net_inductive(subdata)
|
|
|
|
elif miner == "alpha":
|
|
|
|
subnet, subim, subfm = pm4py.discover_petri_net_alpha(subdata)
|
|
|
|
elif miner == "ilp":
|
|
|
|
subnet, subim, subfm = pm4py.discover_petri_net_ilp(subdata)
|
|
|
|
#pm4py.save_vis_petri_net(subnet, subim, subfm,
|
|
|
|
# "../figures/processmaps/artworks/petrinet_" + miner + "_" + str(artwork).zfill(3) + ".png")
|
|
|
|
retval1[i] = eval_pm(subdata, net, im, fm)
|
|
|
|
retval2[i] = eval_pm(subdata, subnet, subim, subfm)
|
|
|
|
|
|
|
|
retval1 = pd.DataFrame(retval1)
|
|
|
|
retval1.columns = ["fitness", "precision", "generalizability", "simplicity"]
|
|
|
|
retval1.index = event_log["case:artwork"].unique()
|
|
|
|
retval1.insert(0, "nettype", "alldata")
|
|
|
|
retval2 = pd.DataFrame(retval2)
|
|
|
|
retval2.columns = ["fitness", "precision", "generalizability", "simplicity"]
|
|
|
|
retval2.index = event_log["case:artwork"].unique()
|
|
|
|
retval2.insert(0, "nettype", "subdata")
|
|
|
|
return pd.concat([retval1, retval2])
|
|
|
|
|
|
|
|
|
|
|
|
for miner in ["heuristics", "inductive", "alpha", "ilp"]:
|
|
|
|
eval_art = pm_artworks(miner = miner)
|
|
|
|
eval_art.to_csv("results/eval_artworks_" + miner + ".csv", sep=";")
|
|
|
|
|
|
|
|
eval_art = pm_artworks(miner = "inductive")
|
|
|
|
|
|
|
|
|