2024-01-16 09:59:23 +01:00
|
|
|
%reset
|
|
|
|
|
|
|
|
import pm4py
|
|
|
|
|
|
|
|
import pandas as pd
|
|
|
|
import numpy as np
|
|
|
|
import matplotlib.pyplot as plt
|
|
|
|
from pm4py.visualization.petri_net import visualizer as pn_visualizer
|
|
|
|
parameters = {pn_visualizer.Variants.FREQUENCY.value.Parameters.FORMAT: "png"}
|
|
|
|
|
|
|
|
###### Load data and create event logs ######
|
|
|
|
|
2024-01-25 17:21:18 +01:00
|
|
|
dat = pd.read_csv("results/haum/event_logfiles_2024-01-18_09-58-52.csv", sep = ";")
|
2024-01-16 09:59:23 +01:00
|
|
|
dat = dat[dat["date.start"] < "2020-03-13"]
|
2024-01-25 17:21:18 +01:00
|
|
|
dat = dat[dat["path"] != 106098] # exclude broken trace
|
2024-01-16 09:59:23 +01:00
|
|
|
# --> only pre corona (before artworks were updated)
|
|
|
|
|
|
|
|
event_log = pm4py.format_dataframe(dat, case_id='case', activity_key='event',
|
|
|
|
timestamp_key='date.start')
|
|
|
|
|
|
|
|
event_log.event.value_counts()
|
|
|
|
event_log.event.value_counts(normalize=True)
|
|
|
|
|
|
|
|
dfg, start_activities, end_activities = pm4py.discover_dfg(event_log)
|
|
|
|
pm4py.view_dfg(dfg, start_activities, end_activities)
|
|
|
|
|
|
|
|
#filtered_log = pm4py.filter_event_attribute_values(event_log, 'item', [80])
|
|
|
|
|
2024-01-25 17:21:18 +01:00
|
|
|
net, im, fm = pm4py.discover_petri_net_inductive(event_log)
|
|
|
|
pm4py.vis.view_petri_net(net, im, fm)
|
|
|
|
|
|
|
|
pm4py.vis.view_petri_net(net, im, fm)
|
|
|
|
gviz = pn_visualizer.apply(net, im, fm, parameters=parameters,
|
2024-01-16 09:59:23 +01:00
|
|
|
variant=pn_visualizer.Variants.FREQUENCY,
|
|
|
|
log=event_log)
|
|
|
|
pn_visualizer.view(gviz)
|
|
|
|
|
2024-01-25 17:21:18 +01:00
|
|
|
bpmn = pm4py.convert.convert_to_bpmn(net, im, fm)
|
|
|
|
pm4py.vis.view_bpmn(bpmn)
|
2024-01-16 09:59:23 +01:00
|
|
|
|
2024-01-25 17:21:18 +01:00
|
|
|
net2, im2, fm2 = pm4py.discover_petri_net_inductive(event_log, noise_threshold=0.1)
|
|
|
|
pm4py.vis.view_petri_net(net2, im2, fm2)
|
|
|
|
|
|
|
|
def eval_pm(data, net, initial_marking, final_marking):
|
|
|
|
"""Caculate fitness, precision, generalizability, and simplicity for petri net"""
|
|
|
|
fitness = pm4py.fitness_token_based_replay(data, net, initial_marking, final_marking)
|
|
|
|
precisison = pm4py.precision_token_based_replay(data, net, initial_marking, final_marking)
|
|
|
|
#generalizability = pm4py.algo.evaluation.generalization.algorithm.apply(data, net,
|
|
|
|
# initial_marking, final_marking)
|
|
|
|
simplicity = pm4py.algo.evaluation.simplicity.algorithm.apply(net)
|
|
|
|
#return [fitness['average_trace_fitness'], precisison, generalizability, simplicity]
|
|
|
|
return [fitness['average_trace_fitness'], precisison, simplicity]
|
|
|
|
|
|
|
|
eval = eval_pm(event_log, net, im, fm)
|
|
|
|
eval2 = eval_pm(event_log, net2, im2, fm2)
|
|
|
|
|
|
|
|
len(net.places)
|
|
|
|
len(net.transitions)
|
|
|
|
len(net.arcs)
|
|
|
|
|
|
|
|
# Number of cases
|
|
|
|
len(event_log.case.unique())
|
|
|
|
|
|
|
|
# Number of variants
|
|
|
|
variants = pm4py.get_variants(event_log)
|
|
|
|
len(variants)
|
|
|
|
|
|
|
|
sorted_variants = dict(sorted(variants.items(), key=lambda item: item[1], reverse = True))
|
|
|
|
{k: sorted_variants[k] for k in list(sorted_variants)[:20]}
|
|
|
|
|
|
|
|
filtered_log = event_log[event_log["event"] != "move"]
|
|
|
|
variants_no_move = pm4py.get_variants(filtered_log)
|
|
|
|
len(variants_no_move)
|
|
|
|
sorted_variants_no_move = dict(sorted(variants_no_move.items(), key=lambda item: item[1], reverse = True))
|
|
|
|
{k: sorted_variants_no_move[k] for k in list(sorted_variants_no_move)[:20]}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
###### Navigation behavior for case ######
|
|
|
|
|
|
|
|
log_case = pm4py.format_dataframe(dat, case_id = "case", activity_key = "item",
|
|
|
|
timestamp_key = "date.start")
|
|
|
|
log_case = log_case.merge(tmp, on = "item", how = "left")
|
|
|
|
|
|
|
|
#filtered_log = pm4py.filter_event_attribute_values(log_case, "kcluster", [3])
|
|
|
|
filtered_log = log_case[log_case.hcluster == 1]
|
|
|
|
|
|
|
|
net, im, fm = pm4py.discover_dfg(filtered_log)
|
|
|
|
pm4py.vis.view_dfg(net, im, fm)
|
|
|
|
|
|
|
|
|
|
|
|
net, im, fm = pm4py.discover_petri_net_inductive(filtered_log)
|
|
|
|
pm4py.vis.view_petri_net(net, im, fm)
|
|
|
|
|
|
|
|
tree = pm4py.discovery.discover_process_tree_inductive(filtered_log)
|
|
|
|
pm4py.vis.view_process_tree(tree)
|
|
|
|
|
|
|
|
|
|
|
|
datcase = dat[~dat.duplicated(["case", "path", "item"])]
|
|
|
|
datcase = datcase[["case", "path", "event", "item", "date.start"]]
|
|
|
|
datcase = datcase.reset_index().drop("index", axis = 1)
|
|
|
|
#datcase = pd.concat([datcase, pd.get_dummies(datcase["item"], dtype = "int")], axis = 1)
|
|
|
|
|
|
|
|
datcase["duration"] = dat.groupby("path")["duration"].mean().tolist()
|
|
|
|
datcase["distance"] = dat.groupby("path")["distance"].mean().tolist()
|
|
|
|
datcase["scaleSize"] = dat.groupby("path")["scaleSize"].mean().tolist()
|
|
|
|
datcase["rotationDegree"] = dat.groupby("path")["rotationDegree"].mean().tolist()
|
|
|
|
|
|
|
|
datcase["item"] = [str(item).zfill(3) for item in datcase.item]
|
|
|
|
datcase = datcase.merge(xy[["item", "hcluster"]], on = "item", how = "left")
|
|
|
|
|
|
|
|
log_case = pm4py.format_dataframe(dat, case_id = "case", activity_key = "item",
|
|
|
|
timestamp_key = "date.start")
|
|
|
|
|
|
|
|
net, im, fm = pm4py.discover_dfg(log_case)
|
|
|
|
pm4py.vis.view_dfg(net, im, fm)
|
|
|
|
# don't know if this will eventually finish?
|
2024-01-16 09:59:23 +01:00
|
|
|
|
2024-01-25 17:21:18 +01:00
|
|
|
net, im, fm = pm4py.discover_dfg(log_case[log_case.hcluster == 1])
|
|
|
|
pm4py.vis.view_dfg(net, im, fm)
|
2024-01-16 09:59:23 +01:00
|
|
|
|
|
|
|
|
|
|
|
|