2023-12-13 15:47:47 +01:00
|
|
|
#%% # needed for shortcuts to run properly in VSCode *eyeroll*
|
|
|
|
%reset
|
|
|
|
|
|
|
|
import pm4py
|
2023-12-21 16:44:26 +01:00
|
|
|
#from pm4py.algo.evaluation.generalization import algorithm as generalization_evaluator
|
|
|
|
#from pm4py.algo.evaluation.simplicity import algorithm as simplicity_evaluator
|
2023-12-13 15:47:47 +01:00
|
|
|
|
|
|
|
import pandas as pd
|
|
|
|
import numpy as np
|
|
|
|
import matplotlib.pyplot as plt
|
|
|
|
from sklearn.cluster import KMeans
|
|
|
|
|
|
|
|
###### Load data and create event logs ######
|
|
|
|
|
2023-12-21 14:46:59 +01:00
|
|
|
dat = pd.read_csv("results/haum/event_logfiles_glossar_2023-11-03_17-46-28.csv", sep = ";")
|
2023-12-13 15:47:47 +01:00
|
|
|
dat = dat[dat.date < "2020-03-13"]
|
|
|
|
# --> only pre corona (before artworks were updated)
|
|
|
|
|
|
|
|
event_log = pm4py.format_dataframe(dat, case_id='trace', activity_key='event',
|
|
|
|
timestamp_key='date.start')
|
2024-01-09 14:34:15 +01:00
|
|
|
# event_log = pm4py.format_dataframe(dat, case_id='trace', activity_key='event',
|
|
|
|
# timestamp_key='date.stop', start_timestamp_key='date.start')
|
2023-12-13 15:47:47 +01:00
|
|
|
event_log = event_log.rename(columns={'artwork': 'case:artwork'})
|
|
|
|
#event_log = pm4py.convert_to_event_log(dat_log) # deprecated
|
|
|
|
|
|
|
|
###### Process Mining - complete data set #####
|
|
|
|
|
|
|
|
def eval_pm(data, net, initial_marking, final_marking):
|
|
|
|
"""Caculate fitness, precision, generalizability, and simplicity for petri net"""
|
|
|
|
fitness = pm4py.fitness_token_based_replay(data, net, initial_marking, final_marking)
|
|
|
|
#fitness = pm4py.fitness_alignments(data, net, initial_marking, final_marking)
|
|
|
|
precisison = pm4py.precision_token_based_replay(data, net, initial_marking, final_marking)
|
|
|
|
#precision = pm4py.precision_alignments(data, net, initial_marking, final_marking)
|
|
|
|
generalizability = pm4py.algo.evaluation.generalization.algorithm.apply(data, net, initial_marking, final_marking)
|
|
|
|
simplicity = pm4py.algo.evaluation.simplicity.algorithm.apply(net)
|
|
|
|
return [fitness['average_trace_fitness'], precisison, generalizability, simplicity]
|
|
|
|
|
|
|
|
|
|
|
|
## Directly-follows graph
|
|
|
|
dfg, start_activities, end_activities = pm4py.discover_dfg(event_log)
|
|
|
|
pm4py.view_dfg(dfg, start_activities, end_activities)
|
|
|
|
pm4py.save_vis_dfg(dfg, start_activities, end_activities, '../figures/processmaps/dfg_complete.png')
|
|
|
|
|
|
|
|
## Heuristics Miner
|
|
|
|
net, im, fm = pm4py.discover_petri_net_heuristics(event_log)
|
|
|
|
h_eval = eval_pm(event_log, net, im, fm)
|
|
|
|
pm4py.vis.view_petri_net(net, im, fm)
|
|
|
|
pm4py.vis.save_vis_petri_net(net, im, fm, "../figures/processmaps/pn_heuristics_complete.png")
|
|
|
|
|
2023-12-21 13:45:40 +01:00
|
|
|
is_sound = pm4py.check_soundness(net, im, fm)
|
2023-12-22 15:35:51 +01:00
|
|
|
is_sound[0]
|
2023-12-21 13:45:40 +01:00
|
|
|
|
2024-01-09 14:34:15 +01:00
|
|
|
len(is_sound[1]["s_c_net"].arcs)
|
|
|
|
# 46
|
|
|
|
len(is_sound[1]["s_c_net"].transitions)
|
|
|
|
# 23
|
|
|
|
len(is_sound[1]["s_c_net"].places)
|
|
|
|
# 10
|
|
|
|
|
|
|
|
|
|
|
|
|
2023-12-13 15:47:47 +01:00
|
|
|
# decorated petri net
|
|
|
|
from pm4py.visualization.petri_net import visualizer as pn_visualizer
|
|
|
|
parameters = {pn_visualizer.Variants.FREQUENCY.value.Parameters.FORMAT: "png"}
|
|
|
|
gviz = pn_visualizer.apply(net, im, fm, parameters=parameters, variant=pn_visualizer.Variants.FREQUENCY, log=event_log)
|
|
|
|
pn_visualizer.save(gviz, "../figures/processmaps/pn_heuristics_complete_decorated.png")
|
|
|
|
|
2023-12-21 13:45:40 +01:00
|
|
|
# convert to process tree
|
|
|
|
bpmn = pm4py.convert.convert_to_bpmn(net, im, fm)
|
|
|
|
pm4py.vis.view_bpmn(bpmn)
|
|
|
|
|
2023-12-13 15:47:47 +01:00
|
|
|
## Alpha Miner
|
|
|
|
net, im, fm = pm4py.discover_petri_net_alpha(event_log)
|
|
|
|
a_eval = eval_pm(event_log, net, im, fm)
|
|
|
|
pm4py.vis.view_petri_net(net, im, fm)
|
|
|
|
pm4py.vis.save_vis_petri_net(net, im, fm, "../figures/processmaps/pn_alpha_complete.png")
|
|
|
|
|
2023-12-22 15:35:51 +01:00
|
|
|
is_sound = pm4py.check_soundness(net, im, fm)
|
|
|
|
is_sound[0]
|
|
|
|
|
2024-01-09 14:34:15 +01:00
|
|
|
len(is_sound[1]["s_c_net"].arcs)
|
|
|
|
len(is_sound[1]["s_c_net"].transitions)
|
|
|
|
len(is_sound[1]["s_c_net"].places)
|
|
|
|
|
2023-12-13 15:47:47 +01:00
|
|
|
## Inductive Miner
|
|
|
|
net, im, fm = pm4py.discover_petri_net_inductive(event_log)
|
|
|
|
i_eval = eval_pm(event_log, net, im, fm)
|
|
|
|
pm4py.vis.view_petri_net(net, im, fm)
|
|
|
|
pm4py.vis.save_vis_petri_net(net, im, fm, "../figures/processmaps/pn_induction_complete.png")
|
|
|
|
|
2023-12-21 13:45:40 +01:00
|
|
|
# as process tree (does not work for heuristics miner!)
|
|
|
|
pt = pm4py.discover_process_tree_inductive(event_log)
|
|
|
|
pm4py.vis.view_process_tree(pt)
|
2023-12-13 15:47:47 +01:00
|
|
|
|
2023-12-22 15:35:51 +01:00
|
|
|
is_sound = pm4py.check_soundness(net, im, fm)
|
|
|
|
is_sound[0]
|
|
|
|
|
2023-12-13 15:47:47 +01:00
|
|
|
## ILP Miner
|
|
|
|
net, im, fm = pm4py.discover_petri_net_ilp(event_log)
|
|
|
|
ilp_eval = eval_pm(event_log, net, im, fm)
|
|
|
|
pm4py.vis.view_petri_net(net, im, fm)
|
|
|
|
pm4py.vis.save_vis_petri_net(net, im, fm, "../figures/processmaps/pn_ilp_complete.png")
|
|
|
|
|
2023-12-22 15:35:51 +01:00
|
|
|
is_sound = pm4py.check_soundness(net, im, fm)
|
|
|
|
is_sound[0]
|
2023-12-13 15:47:47 +01:00
|
|
|
|
|
|
|
eval = pd.DataFrame(np.row_stack([h_eval, a_eval, i_eval, ilp_eval]))
|
|
|
|
eval.columns = ["fitness", "precision", "generalizability", "simplicity"]
|
|
|
|
eval.index = ["heuristics", "alpha", "inductive", "ilp"]
|
|
|
|
eval
|
|
|
|
|
|
|
|
eval.to_csv("results/eval_all-miners_complete.csv", sep=";")
|
|
|
|
|
|
|
|
|
|
|
|
###### Process Mining - individual artworks ######
|
|
|
|
|
2023-12-21 16:44:26 +01:00
|
|
|
def pm_artworks(miner):
|
|
|
|
|
2023-12-22 15:35:51 +01:00
|
|
|
retval1 = np.empty((len(event_log["case:artwork"].unique()), 4))
|
|
|
|
retval2 = np.empty((len(event_log["case:artwork"].unique()), 4))
|
2023-12-21 16:44:26 +01:00
|
|
|
|
|
|
|
if miner == "heuristics":
|
|
|
|
net, im, fm = pm4py.discover_petri_net_heuristics(event_log)
|
|
|
|
elif miner == "inductive":
|
|
|
|
net, im, fm = pm4py.discover_petri_net_inductive(event_log)
|
|
|
|
elif miner == "alpha":
|
|
|
|
net, im, fm = pm4py.discover_petri_net_alpha(event_log)
|
|
|
|
elif miner == "ilp":
|
|
|
|
net, im, fm = pm4py.discover_petri_net_ilp(event_log)
|
|
|
|
|
|
|
|
for i in range(len(event_log["case:artwork"].unique())):
|
|
|
|
artwork = event_log["case:artwork"].unique()[i]
|
|
|
|
subdata = pm4py.filter_event_attribute_values(event_log, "case:artwork",
|
|
|
|
[artwork],
|
|
|
|
level="case", retain=True)
|
|
|
|
if miner == "heuristics":
|
|
|
|
subnet, subim, subfm = pm4py.discover_petri_net_heuristics(subdata)
|
|
|
|
elif miner == "inductive":
|
|
|
|
subnet, subim, subfm = pm4py.discover_petri_net_inductive(subdata)
|
|
|
|
elif miner == "alpha":
|
|
|
|
subnet, subim, subfm = pm4py.discover_petri_net_alpha(subdata)
|
|
|
|
elif miner == "ilp":
|
|
|
|
subnet, subim, subfm = pm4py.discover_petri_net_ilp(subdata)
|
2024-01-09 14:34:15 +01:00
|
|
|
#pm4py.save_vis_petri_net(subnet, subim, subfm,
|
|
|
|
# "../figures/processmaps/artworks/petrinet_" + miner + "_" + str(artwork).zfill(3) + ".png")
|
2023-12-22 15:35:51 +01:00
|
|
|
retval1[i] = eval_pm(subdata, net, im, fm)
|
|
|
|
retval2[i] = eval_pm(subdata, subnet, subim, subfm)
|
|
|
|
|
|
|
|
retval1 = pd.DataFrame(retval1)
|
|
|
|
retval1.columns = ["fitness", "precision", "generalizability", "simplicity"]
|
|
|
|
retval1.index = event_log["case:artwork"].unique()
|
|
|
|
retval1.insert(0, "nettype", "alldata")
|
|
|
|
retval2 = pd.DataFrame(retval2)
|
|
|
|
retval2.columns = ["fitness", "precision", "generalizability", "simplicity"]
|
|
|
|
retval2.index = event_log["case:artwork"].unique()
|
|
|
|
retval2.insert(0, "nettype", "subdata")
|
|
|
|
return pd.concat([retval1, retval2])
|
2023-12-21 16:44:26 +01:00
|
|
|
|
|
|
|
|
|
|
|
for miner in ["heuristics", "inductive", "alpha", "ilp"]:
|
|
|
|
eval_art = pm_artworks(miner = miner)
|
|
|
|
eval_art.to_csv("results/eval_artworks_" + miner + ".csv", sep=";")
|
2023-12-13 15:47:47 +01:00
|
|
|
|
2024-01-09 14:34:15 +01:00
|
|
|
eval_art = pm_artworks(miner = "inductive")
|
2023-12-13 15:47:47 +01:00
|
|
|
|
|
|
|
##### Clustering ######
|
|
|
|
|
|
|
|
## KMeans
|
|
|
|
|
2024-01-09 14:34:15 +01:00
|
|
|
#eval_artworks = eval_art[eval_art.nettype == "alldata"].iloc[:,range(1,5)]
|
|
|
|
eval_artworks = eval_art[eval_art.nettype == "subdata"].iloc[:,range(1,5)]
|
|
|
|
|
|
|
|
kmeans = KMeans(n_clusters=4, max_iter=1000).fit(eval_artworks)
|
2023-12-13 15:47:47 +01:00
|
|
|
|
|
|
|
#from sklearn.manifold import MDS
|
2024-01-09 14:34:15 +01:00
|
|
|
#coord = pd.DataFrame(MDS(normalized_stress='auto').fit_transform(eval_artworks))
|
2023-12-13 15:47:47 +01:00
|
|
|
|
2024-01-09 14:34:15 +01:00
|
|
|
coord = eval_artworks
|
2023-12-13 15:47:47 +01:00
|
|
|
coord["clusters"] = kmeans.labels_
|
|
|
|
|
|
|
|
for i in coord.clusters.unique():
|
|
|
|
#plt.scatter(coord[coord.clusters == i].iloc[:,0], coord[coord.clusters == i].iloc[:,1],
|
|
|
|
plt.scatter(coord[coord.clusters == i].iloc[:,1], coord[coord.clusters == i].iloc[:,2],
|
|
|
|
#plt.scatter(coord[coord.clusters == i].iloc[:,2], coord[coord.clusters == i].iloc[:,4],
|
|
|
|
label = i)
|
|
|
|
plt.legend()
|
|
|
|
plt.show()
|
|
|
|
|
|
|
|
### Scree plot
|
|
|
|
|
|
|
|
sse = {}
|
|
|
|
for k in range(1, 10):
|
2024-01-09 14:34:15 +01:00
|
|
|
kmeans = KMeans(n_clusters=k, max_iter=1000).fit(eval_artworks[["precision", "generalizability"]])
|
2023-12-13 15:47:47 +01:00
|
|
|
#data["clusters"] = kmeans.labels_
|
|
|
|
#print(data["clusters"])
|
|
|
|
sse[k] = kmeans.inertia_ # Inertia: Sum of distances of samples to their closest cluster center
|
|
|
|
plt.figure()
|
|
|
|
plt.plot(list(sse.keys()), list(sse.values()))
|
|
|
|
plt.xlabel("Number of clusters")
|
|
|
|
plt.ylabel("SSE")
|
|
|
|
plt.show()
|
2024-01-09 14:34:15 +01:00
|
|
|
|