%reset import pm4py import pandas as pd import numpy as np import matplotlib.pyplot as plt from pm4py.visualization.petri_net import visualizer as pn_visualizer parameters = {pn_visualizer.Variants.FREQUENCY.value.Parameters.FORMAT: "png"} ###### Load data and create event logs ###### dat = pd.read_csv("results/haum/event_logfiles_2024-01-18_09-58-52.csv", sep = ";") dat = dat[dat["date.start"] < "2020-03-13"] dat = dat[dat["path"] != 106098] # exclude broken trace # --> only pre corona (before artworks were updated) event_log = pm4py.format_dataframe(dat, case_id='case', activity_key='event', timestamp_key='date.start') event_log.event.value_counts() event_log.event.value_counts(normalize=True) dfg, start_activities, end_activities = pm4py.discover_dfg(event_log) pm4py.view_dfg(dfg, start_activities, end_activities) #filtered_log = pm4py.filter_event_attribute_values(event_log, 'item', [80]) net, im, fm = pm4py.discover_petri_net_inductive(event_log) pm4py.vis.view_petri_net(net, im, fm) pm4py.vis.view_petri_net(net, im, fm) gviz = pn_visualizer.apply(net, im, fm, parameters=parameters, variant=pn_visualizer.Variants.FREQUENCY, log=event_log) pn_visualizer.view(gviz) bpmn = pm4py.convert.convert_to_bpmn(net, im, fm) pm4py.vis.view_bpmn(bpmn) net2, im2, fm2 = pm4py.discover_petri_net_inductive(event_log, noise_threshold=0.1) pm4py.vis.view_petri_net(net2, im2, fm2) def eval_pm(data, net, initial_marking, final_marking): """Caculate fitness, precision, generalizability, and simplicity for petri net""" fitness = pm4py.fitness_token_based_replay(data, net, initial_marking, final_marking) precisison = pm4py.precision_token_based_replay(data, net, initial_marking, final_marking) #generalizability = pm4py.algo.evaluation.generalization.algorithm.apply(data, net, # initial_marking, final_marking) simplicity = pm4py.algo.evaluation.simplicity.algorithm.apply(net) #return [fitness['average_trace_fitness'], precisison, generalizability, simplicity] return [fitness['average_trace_fitness'], precisison, simplicity] eval = eval_pm(event_log, net, im, fm) eval2 = eval_pm(event_log, net2, im2, fm2) len(net.places) len(net.transitions) len(net.arcs) # Number of cases len(event_log.case.unique()) # Number of variants variants = pm4py.get_variants(event_log) len(variants) sorted_variants = dict(sorted(variants.items(), key=lambda item: item[1], reverse = True)) {k: sorted_variants[k] for k in list(sorted_variants)[:20]} filtered_log = event_log[event_log["event"] != "move"] variants_no_move = pm4py.get_variants(filtered_log) len(variants_no_move) sorted_variants_no_move = dict(sorted(variants_no_move.items(), key=lambda item: item[1], reverse = True)) {k: sorted_variants_no_move[k] for k in list(sorted_variants_no_move)[:20]} ###### Navigation behavior for case ###### log_case = pm4py.format_dataframe(dat, case_id = "case", activity_key = "item", timestamp_key = "date.start") log_case = log_case.merge(tmp, on = "item", how = "left") #filtered_log = pm4py.filter_event_attribute_values(log_case, "kcluster", [3]) filtered_log = log_case[log_case.hcluster == 1] net, im, fm = pm4py.discover_dfg(filtered_log) pm4py.vis.view_dfg(net, im, fm) net, im, fm = pm4py.discover_petri_net_inductive(filtered_log) pm4py.vis.view_petri_net(net, im, fm) tree = pm4py.discovery.discover_process_tree_inductive(filtered_log) pm4py.vis.view_process_tree(tree) datcase = dat[~dat.duplicated(["case", "path", "item"])] datcase = datcase[["case", "path", "event", "item", "date.start"]] datcase = datcase.reset_index().drop("index", axis = 1) #datcase = pd.concat([datcase, pd.get_dummies(datcase["item"], dtype = "int")], axis = 1) datcase["duration"] = dat.groupby("path")["duration"].mean().tolist() datcase["distance"] = dat.groupby("path")["distance"].mean().tolist() datcase["scaleSize"] = dat.groupby("path")["scaleSize"].mean().tolist() datcase["rotationDegree"] = dat.groupby("path")["rotationDegree"].mean().tolist() datcase["item"] = [str(item).zfill(3) for item in datcase.item] datcase = datcase.merge(xy[["item", "hcluster"]], on = "item", how = "left") log_case = pm4py.format_dataframe(dat, case_id = "case", activity_key = "item", timestamp_key = "date.start") net, im, fm = pm4py.discover_dfg(log_case) pm4py.vis.view_dfg(net, im, fm) # don't know if this will eventually finish? net, im, fm = pm4py.discover_dfg(log_case[log_case.hcluster == 1]) pm4py.vis.view_dfg(net, im, fm)