%reset import pm4py import pandas as pd import numpy as np import matplotlib.pyplot as plt from python_helpers import eval_pm, pn_infos ###### Load data and create event logs ###### dat = pd.read_csv("results/haum/event_logfiles_2024-01-18_09-58-52.csv", sep = ";") dat = dat[dat["date.start"] < "2020-03-13"] dat = dat[dat["path"] != 106098] # exclude broken trace # --> only pre corona (before artworks were updated) log_path = pm4py.format_dataframe(dat, case_id = "path", activity_key = "event", timestamp_key = "date.start") ###### Infos for items ###### mdi = pd.DataFrame(columns = ["fitness", "precision", "generalizability", "simplicity", "sound", "narcs", "ntrans", "nplaces", "nvariants", "mostfreq"]) for item in log_path.item.unique().tolist(): mdi = pd.concat([mdi, pn_infos(log_path, "item", item)]) mdi = mdi.sort_index() # Export mdi.to_csv("results/haum/pn_infos_items.csv", sep = ";") # datitem = dat.groupby("item")[["duration", "distance", # "scaleSize", "rotationDegree"]].mean() # # def length_path(data): # x = data.path # return len(x.unique()) # def length_case(data): # x = data.case # return len(x.unique()) # def length_topic(data): # x = data.topic.dropna() # return len(x.unique()) # # datitem["npaths"] = dat.groupby(["item"]).apply(length_path) # datitem["ncases"] = dat.groupby(["item"]).apply(length_case) # datitem["ntopics"] = dat.groupby(["item"]).apply(length_topic) # # datitem.index = datitem.index.astype(str).str.rjust(3, "0") # datitem = datitem.sort_index() # datitem.index = mdi.index # # datitem = pd.concat([mdi, datitem], yaxis = 1)