#%% # needed for shortcuts to run properly in VSCode *eyeroll* %reset import pm4py from pm4py.algo.evaluation.generalization import algorithm as generalization_evaluator from pm4py.algo.evaluation.simplicity import algorithm as simplicity_evaluator import pandas as pd import numpy as np import matplotlib.pyplot as plt from sklearn.cluster import KMeans ###### Load data and create event logs ###### dat = pd.read_csv("../data/haum/event_logfiles_glossar_2023-11-03_17-46-28.csv", sep = ";") dat = dat[dat.date < "2020-03-13"] # --> only pre corona (before artworks were updated) event_log = pm4py.format_dataframe(dat, case_id='trace', activity_key='event', timestamp_key='date.start') event_log = event_log.rename(columns={'artwork': 'case:artwork'}) #event_log = pm4py.convert_to_event_log(dat_log) # deprecated start_activities = pm4py.get_start_activities(event_log) start_activities end_activities = pm4py.get_end_activities(event_log) end_activities ###### Process Mining - complete data set ##### def eval_pm(data, net, initial_marking, final_marking): """Caculate fitness, precision, generalizability, and simplicity for petri net""" fitness = pm4py.fitness_token_based_replay(data, net, initial_marking, final_marking) #fitness = pm4py.fitness_alignments(data, net, initial_marking, final_marking) precisison = pm4py.precision_token_based_replay(data, net, initial_marking, final_marking) #precision = pm4py.precision_alignments(data, net, initial_marking, final_marking) generalizability = pm4py.algo.evaluation.generalization.algorithm.apply(data, net, initial_marking, final_marking) simplicity = pm4py.algo.evaluation.simplicity.algorithm.apply(net) return [fitness['average_trace_fitness'], precisison, generalizability, simplicity] ## Directly-follows graph dfg, start_activities, end_activities = pm4py.discover_dfg(event_log) pm4py.view_dfg(dfg, start_activities, end_activities) pm4py.save_vis_dfg(dfg, start_activities, end_activities, '../figures/processmaps/dfg_complete.png') ## Heuristics Miner net, im, fm = pm4py.discover_petri_net_heuristics(event_log) h_eval = eval_pm(event_log, net, im, fm) pm4py.vis.view_petri_net(net, im, fm) pm4py.vis.save_vis_petri_net(net, im, fm, "../figures/processmaps/pn_heuristics_complete.png") is_sound = pm4py.check_soundness(net, im, fm) # decorated petri net from pm4py.visualization.petri_net import visualizer as pn_visualizer parameters = {pn_visualizer.Variants.FREQUENCY.value.Parameters.FORMAT: "png"} gviz = pn_visualizer.apply(net, im, fm, parameters=parameters, variant=pn_visualizer.Variants.FREQUENCY, log=event_log) pn_visualizer.save(gviz, "../figures/processmaps/pn_heuristics_complete_decorated.png") # convert to process tree bpmn = pm4py.convert.convert_to_bpmn(net, im, fm) pm4py.vis.view_bpmn(bpmn) ## Alpha Miner net, im, fm = pm4py.discover_petri_net_alpha(event_log) a_eval = eval_pm(event_log, net, im, fm) pm4py.vis.view_petri_net(net, im, fm) pm4py.vis.save_vis_petri_net(net, im, fm, "../figures/processmaps/pn_alpha_complete.png") ## Inductive Miner net, im, fm = pm4py.discover_petri_net_inductive(event_log) i_eval = eval_pm(event_log, net, im, fm) pm4py.vis.view_petri_net(net, im, fm) pm4py.vis.save_vis_petri_net(net, im, fm, "../figures/processmaps/pn_induction_complete.png") # as process tree (does not work for heuristics miner!) pt = pm4py.discover_process_tree_inductive(event_log) pm4py.vis.view_process_tree(pt) ## ILP Miner net, im, fm = pm4py.discover_petri_net_ilp(event_log) ilp_eval = eval_pm(event_log, net, im, fm) pm4py.vis.view_petri_net(net, im, fm) pm4py.vis.save_vis_petri_net(net, im, fm, "../figures/processmaps/pn_ilp_complete.png") eval = pd.DataFrame(np.row_stack([h_eval, a_eval, i_eval, ilp_eval])) eval.columns = ["fitness", "precision", "generalizability", "simplicity"] eval.index = ["heuristics", "alpha", "inductive", "ilp"] eval eval.to_csv("results/eval_all-miners_complete.csv", sep=";") ###### Process Mining - individual artworks ###### net, im, fm = pm4py.discover_petri_net_heuristics(event_log) #net, im, fm = pm4py.discover_petri_net_inductive(event_log) eval_art = np.empty((len(event_log["case:artwork"].unique()), 4)) for i in range(len(event_log["case:artwork"].unique())): subdata = pm4py.filter_event_attribute_values(event_log, "case:artwork", [event_log["case:artwork"].unique()[i]], level="case", retain=True) #net, im, fm = pm4py.discover_petri_net_heuristics(subdata) eval_art[i] = eval_pm(subdata, net, im, fm) eval_art = pd.DataFrame(eval_art) eval_art.columns = ["fitness", "precision", "generalizability", "simplicity"] eval_art.index = event_log["case:artwork"].unique() #eval_art.to_csv("results/eval_heuristics_artworks.csv", sep=";") eval_art.to_csv("results/eval_inductive_artworks.csv", sep=";") ##### Clustering ###### ## KMeans kmeans = KMeans(n_clusters=4, max_iter=1000).fit(eval_art) #from sklearn.manifold import MDS #coord = pd.DataFrame(MDS(normalized_stress='auto').fit_transform(eval_art)) coord = eval_art coord["clusters"] = kmeans.labels_ for i in coord.clusters.unique(): #plt.scatter(coord[coord.clusters == i].iloc[:,0], coord[coord.clusters == i].iloc[:,1], plt.scatter(coord[coord.clusters == i].iloc[:,1], coord[coord.clusters == i].iloc[:,2], #plt.scatter(coord[coord.clusters == i].iloc[:,2], coord[coord.clusters == i].iloc[:,4], label = i) plt.legend() plt.show() ### Scree plot sse = {} for k in range(1, 10): kmeans = KMeans(n_clusters=k, max_iter=1000).fit(eval_art[["precision", "generalizability"]]) #data["clusters"] = kmeans.labels_ #print(data["clusters"]) sse[k] = kmeans.inertia_ # Inertia: Sum of distances of samples to their closest cluster center plt.figure() plt.plot(list(sse.keys()), list(sse.values())) plt.xlabel("Number of clusters") plt.ylabel("SSE") plt.show() # TODO: Redo it for data pre corona, so I do not have artefacts for 504 and 505 # TODO: Create plot with artworks in it: # https://stackoverflow.com/questions/27800307/adding-a-picture-to-plot-in-r