diff --git a/code/00_pm.py b/code/00_pm.py index 7e07a36..dc31d30 100644 --- a/code/00_pm.py +++ b/code/00_pm.py @@ -2,8 +2,8 @@ %reset import pm4py -from pm4py.algo.evaluation.generalization import algorithm as generalization_evaluator -from pm4py.algo.evaluation.simplicity import algorithm as simplicity_evaluator +#from pm4py.algo.evaluation.generalization import algorithm as generalization_evaluator +#from pm4py.algo.evaluation.simplicity import algorithm as simplicity_evaluator import pandas as pd import numpy as np @@ -97,22 +97,44 @@ eval.to_csv("results/eval_all-miners_complete.csv", sep=";") net, im, fm = pm4py.discover_petri_net_heuristics(event_log) #net, im, fm = pm4py.discover_petri_net_inductive(event_log) -eval_art = np.empty((len(event_log["case:artwork"].unique()), 4)) +def pm_artworks(miner): -for i in range(len(event_log["case:artwork"].unique())): + retval = np.empty((len(event_log["case:artwork"].unique()), 4)) - subdata = pm4py.filter_event_attribute_values(event_log, "case:artwork", - [event_log["case:artwork"].unique()[i]], - level="case", retain=True) - #net, im, fm = pm4py.discover_petri_net_heuristics(subdata) - eval_art[i] = eval_pm(subdata, net, im, fm) + if miner == "heuristics": + net, im, fm = pm4py.discover_petri_net_heuristics(event_log) + elif miner == "inductive": + net, im, fm = pm4py.discover_petri_net_inductive(event_log) + elif miner == "alpha": + net, im, fm = pm4py.discover_petri_net_alpha(event_log) + elif miner == "ilp": + net, im, fm = pm4py.discover_petri_net_ilp(event_log) -eval_art = pd.DataFrame(eval_art) -eval_art.columns = ["fitness", "precision", "generalizability", "simplicity"] -eval_art.index = event_log["case:artwork"].unique() + for i in range(len(event_log["case:artwork"].unique())): + artwork = event_log["case:artwork"].unique()[i] + subdata = pm4py.filter_event_attribute_values(event_log, "case:artwork", + [artwork], + level="case", retain=True) + if miner == "heuristics": + subnet, subim, subfm = pm4py.discover_petri_net_heuristics(subdata) + elif miner == "inductive": + subnet, subim, subfm = pm4py.discover_petri_net_inductive(subdata) + elif miner == "alpha": + subnet, subim, subfm = pm4py.discover_petri_net_alpha(subdata) + elif miner == "ilp": + subnet, subim, subfm = pm4py.discover_petri_net_ilp(subdata) + pm4py.save_vis_petri_net(subnet, subim, subfm, + "../figures/processmaps/artworks/petrinet_" + miner + "_" + str(artwork).zfill(3) + ".png") + retval[i] = eval_pm(subdata, net, im, fm) -#eval_art.to_csv("results/eval_heuristics_artworks.csv", sep=";") -eval_art.to_csv("results/eval_inductive_artworks.csv", sep=";") + retval = pd.DataFrame(retval) + retval.columns = ["fitness", "precision", "generalizability", "simplicity"] + retval.index = event_log["case:artwork"].unique() + return retval + +for miner in ["heuristics", "inductive", "alpha", "ilp"]: + eval_art = pm_artworks(miner = miner) + eval_art.to_csv("results/eval_artworks_" + miner + ".csv", sep=";") ##### Clustering ###### @@ -148,7 +170,3 @@ plt.plot(list(sse.keys()), list(sse.values())) plt.xlabel("Number of clusters") plt.ylabel("SSE") plt.show() - -# TODO: Redo it for data pre corona, so I do not have artefacts for 504 and 505 -# TODO: Create plot with artworks in it: -# https://stackoverflow.com/questions/27800307/adding-a-picture-to-plot-in-r diff --git a/code/01_clustering.R b/code/01_clustering.R index 8648934..9afff5d 100644 --- a/code/01_clustering.R +++ b/code/01_clustering.R @@ -25,9 +25,13 @@ #--------------- (1) Read evaluation data --------------- -eval_heuristics <- read.table("results/eval_heuristics_artworks.csv", header = TRUE, +eval_heuristics <- read.table("results/eval_artworks_heuristics.csv", header = TRUE, sep = ";", row.names = 1) -eval_inductive <- read.table("results/eval_inductive_artworks.csv", header = TRUE, +eval_inductive <- read.table("results/eval_artworks_inductive.csv", header = TRUE, + sep = ";", row.names = 1) +eval_alpha <- read.table("results/eval_artworks_alpha.csv", header = TRUE, + sep = ";", row.names = 1) +eval_ilp <- read.table("results/eval_artworks_ilp.csv", header = TRUE, sep = ";", row.names = 1) #--------------- (2) Clustering --------------- @@ -42,7 +46,6 @@ colors <- c("#3CB4DC", "#78004B", "#91C86E", "#FF6900") plot(generalizability ~ precision, eval_heuristics, pch = 16, col = colors[k1$cluster]) - ## Scree plot ks <- 1:10 @@ -58,7 +61,6 @@ k2 <- kmeans(eval_inductive, 4) plot(generalizability ~ precision, eval_inductive, pch = 16, col = colors[k2$cluster]) - ## Scree plot ks <- 1:10 @@ -68,6 +70,40 @@ for (k in ks) sse <- c(sse, kmeans(eval_inductive, k)$tot.withinss) plot(sse ~ ks, type = "l") +# Alpha Miner + +k3 <- kmeans(eval_alpha, 4) + +par(mfrow = c(2, 2)) +plot(generalizability ~ precision, eval_alpha, pch = 16, col = colors[k3$cluster]) +plot(fitness ~ precision, eval_alpha, pch = 16, col = colors[k3$cluster]) +plot(fitness ~ generalizability, eval_alpha, pch = 16, col = colors[k3$cluster]) + +## Scree plot + +ks <- 1:10 + +sse <- NULL +for (k in ks) sse <- c(sse, kmeans(eval_alpha, k)$tot.withinss) + +plot(sse ~ ks, type = "l") + + +# ILP Miner + +k4 <- kmeans(eval_ilp, 4) + +plot(generalizability ~ precision, eval_ilp, pch = 16, col = colors[k4$cluster]) + +## Scree plot + +ks <- 1:10 + +sse <- NULL +for (k in ks) sse <- c(sse, kmeans(eval_ilp, k)$tot.withinss) + +plot(sse ~ ks, type = "l") + #--------------- (3) Visualization with pictures --------------- library(png)