mtt_haum/code/pm_navigation-behavior.py

127 lines
4.6 KiB
Python

%reset
import pm4py
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from pm4py.visualization.petri_net import visualizer as pn_visualizer
parameters = {pn_visualizer.Variants.FREQUENCY.value.Parameters.FORMAT: "png"}
###### Load data and create event logs ######
dat = pd.read_csv("results/haum/event_logfiles_2024-01-18_09-58-52.csv", sep = ";")
dat = dat[dat["date.start"] < "2020-03-13"]
dat = dat[dat["path"] != 106098] # exclude broken trace
# --> only pre corona (before artworks were updated)
event_log = pm4py.format_dataframe(dat, case_id='case', activity_key='event',
timestamp_key='date.start')
event_log.event.value_counts()
event_log.event.value_counts(normalize=True)
dfg, start_activities, end_activities = pm4py.discover_dfg(event_log)
pm4py.view_dfg(dfg, start_activities, end_activities)
#filtered_log = pm4py.filter_event_attribute_values(event_log, 'item', [80])
net, im, fm = pm4py.discover_petri_net_inductive(event_log)
pm4py.vis.view_petri_net(net, im, fm)
pm4py.vis.view_petri_net(net, im, fm)
gviz = pn_visualizer.apply(net, im, fm, parameters=parameters,
variant=pn_visualizer.Variants.FREQUENCY,
log=event_log)
pn_visualizer.view(gviz)
bpmn = pm4py.convert.convert_to_bpmn(net, im, fm)
pm4py.vis.view_bpmn(bpmn)
net2, im2, fm2 = pm4py.discover_petri_net_inductive(event_log, noise_threshold=0.1)
pm4py.vis.view_petri_net(net2, im2, fm2)
def eval_pm(data, net, initial_marking, final_marking):
"""Caculate fitness, precision, generalizability, and simplicity for petri net"""
fitness = pm4py.fitness_token_based_replay(data, net, initial_marking, final_marking)
precisison = pm4py.precision_token_based_replay(data, net, initial_marking, final_marking)
#generalizability = pm4py.algo.evaluation.generalization.algorithm.apply(data, net,
# initial_marking, final_marking)
simplicity = pm4py.algo.evaluation.simplicity.algorithm.apply(net)
#return [fitness['average_trace_fitness'], precisison, generalizability, simplicity]
return [fitness['average_trace_fitness'], precisison, simplicity]
eval = eval_pm(event_log, net, im, fm)
eval2 = eval_pm(event_log, net2, im2, fm2)
len(net.places)
len(net.transitions)
len(net.arcs)
# Number of cases
len(event_log.case.unique())
# Number of variants
variants = pm4py.get_variants(event_log)
len(variants)
sorted_variants = dict(sorted(variants.items(), key=lambda item: item[1], reverse = True))
{k: sorted_variants[k] for k in list(sorted_variants)[:20]}
filtered_log = event_log[event_log["event"] != "move"]
variants_no_move = pm4py.get_variants(filtered_log)
len(variants_no_move)
sorted_variants_no_move = dict(sorted(variants_no_move.items(), key=lambda item: item[1], reverse = True))
{k: sorted_variants_no_move[k] for k in list(sorted_variants_no_move)[:20]}
###### Navigation behavior for case ######
log_case = pm4py.format_dataframe(dat, case_id = "case", activity_key = "item",
timestamp_key = "date.start")
log_case = log_case.merge(tmp, on = "item", how = "left")
#filtered_log = pm4py.filter_event_attribute_values(log_case, "kcluster", [3])
filtered_log = log_case[log_case.hcluster == 1]
net, im, fm = pm4py.discover_dfg(filtered_log)
pm4py.vis.view_dfg(net, im, fm)
net, im, fm = pm4py.discover_petri_net_inductive(filtered_log)
pm4py.vis.view_petri_net(net, im, fm)
tree = pm4py.discovery.discover_process_tree_inductive(filtered_log)
pm4py.vis.view_process_tree(tree)
datcase = dat[~dat.duplicated(["case", "path", "item"])]
datcase = datcase[["case", "path", "event", "item", "date.start"]]
datcase = datcase.reset_index().drop("index", axis = 1)
#datcase = pd.concat([datcase, pd.get_dummies(datcase["item"], dtype = "int")], axis = 1)
datcase["duration"] = dat.groupby("path")["duration"].mean().tolist()
datcase["distance"] = dat.groupby("path")["distance"].mean().tolist()
datcase["scaleSize"] = dat.groupby("path")["scaleSize"].mean().tolist()
datcase["rotationDegree"] = dat.groupby("path")["rotationDegree"].mean().tolist()
datcase["item"] = [str(item).zfill(3) for item in datcase.item]
datcase = datcase.merge(xy[["item", "hcluster"]], on = "item", how = "left")
log_case = pm4py.format_dataframe(dat, case_id = "case", activity_key = "item",
timestamp_key = "date.start")
net, im, fm = pm4py.discover_dfg(log_case)
pm4py.vis.view_dfg(net, im, fm)
# don't know if this will eventually finish?
net, im, fm = pm4py.discover_dfg(log_case[log_case.hcluster == 1])
pm4py.vis.view_dfg(net, im, fm)