Added clustering for all miners

This commit is contained in:
Nora Wickelmaier 2023-12-21 16:44:26 +01:00
parent e2137dec04
commit 0a533d7deb
2 changed files with 76 additions and 22 deletions

View File

@ -2,8 +2,8 @@
%reset
import pm4py
from pm4py.algo.evaluation.generalization import algorithm as generalization_evaluator
from pm4py.algo.evaluation.simplicity import algorithm as simplicity_evaluator
#from pm4py.algo.evaluation.generalization import algorithm as generalization_evaluator
#from pm4py.algo.evaluation.simplicity import algorithm as simplicity_evaluator
import pandas as pd
import numpy as np
@ -97,22 +97,44 @@ eval.to_csv("results/eval_all-miners_complete.csv", sep=";")
net, im, fm = pm4py.discover_petri_net_heuristics(event_log)
#net, im, fm = pm4py.discover_petri_net_inductive(event_log)
eval_art = np.empty((len(event_log["case:artwork"].unique()), 4))
def pm_artworks(miner):
retval = np.empty((len(event_log["case:artwork"].unique()), 4))
if miner == "heuristics":
net, im, fm = pm4py.discover_petri_net_heuristics(event_log)
elif miner == "inductive":
net, im, fm = pm4py.discover_petri_net_inductive(event_log)
elif miner == "alpha":
net, im, fm = pm4py.discover_petri_net_alpha(event_log)
elif miner == "ilp":
net, im, fm = pm4py.discover_petri_net_ilp(event_log)
for i in range(len(event_log["case:artwork"].unique())):
artwork = event_log["case:artwork"].unique()[i]
subdata = pm4py.filter_event_attribute_values(event_log, "case:artwork",
[event_log["case:artwork"].unique()[i]],
[artwork],
level="case", retain=True)
#net, im, fm = pm4py.discover_petri_net_heuristics(subdata)
eval_art[i] = eval_pm(subdata, net, im, fm)
if miner == "heuristics":
subnet, subim, subfm = pm4py.discover_petri_net_heuristics(subdata)
elif miner == "inductive":
subnet, subim, subfm = pm4py.discover_petri_net_inductive(subdata)
elif miner == "alpha":
subnet, subim, subfm = pm4py.discover_petri_net_alpha(subdata)
elif miner == "ilp":
subnet, subim, subfm = pm4py.discover_petri_net_ilp(subdata)
pm4py.save_vis_petri_net(subnet, subim, subfm,
"../figures/processmaps/artworks/petrinet_" + miner + "_" + str(artwork).zfill(3) + ".png")
retval[i] = eval_pm(subdata, net, im, fm)
eval_art = pd.DataFrame(eval_art)
eval_art.columns = ["fitness", "precision", "generalizability", "simplicity"]
eval_art.index = event_log["case:artwork"].unique()
retval = pd.DataFrame(retval)
retval.columns = ["fitness", "precision", "generalizability", "simplicity"]
retval.index = event_log["case:artwork"].unique()
return retval
#eval_art.to_csv("results/eval_heuristics_artworks.csv", sep=";")
eval_art.to_csv("results/eval_inductive_artworks.csv", sep=";")
for miner in ["heuristics", "inductive", "alpha", "ilp"]:
eval_art = pm_artworks(miner = miner)
eval_art.to_csv("results/eval_artworks_" + miner + ".csv", sep=";")
##### Clustering ######
@ -148,7 +170,3 @@ plt.plot(list(sse.keys()), list(sse.values()))
plt.xlabel("Number of clusters")
plt.ylabel("SSE")
plt.show()
# TODO: Redo it for data pre corona, so I do not have artefacts for 504 and 505
# TODO: Create plot with artworks in it:
# https://stackoverflow.com/questions/27800307/adding-a-picture-to-plot-in-r

View File

@ -25,9 +25,13 @@
#--------------- (1) Read evaluation data ---------------
eval_heuristics <- read.table("results/eval_heuristics_artworks.csv", header = TRUE,
eval_heuristics <- read.table("results/eval_artworks_heuristics.csv", header = TRUE,
sep = ";", row.names = 1)
eval_inductive <- read.table("results/eval_inductive_artworks.csv", header = TRUE,
eval_inductive <- read.table("results/eval_artworks_inductive.csv", header = TRUE,
sep = ";", row.names = 1)
eval_alpha <- read.table("results/eval_artworks_alpha.csv", header = TRUE,
sep = ";", row.names = 1)
eval_ilp <- read.table("results/eval_artworks_ilp.csv", header = TRUE,
sep = ";", row.names = 1)
#--------------- (2) Clustering ---------------
@ -42,7 +46,6 @@ colors <- c("#3CB4DC", "#78004B", "#91C86E", "#FF6900")
plot(generalizability ~ precision, eval_heuristics, pch = 16, col = colors[k1$cluster])
## Scree plot
ks <- 1:10
@ -58,7 +61,6 @@ k2 <- kmeans(eval_inductive, 4)
plot(generalizability ~ precision, eval_inductive, pch = 16, col = colors[k2$cluster])
## Scree plot
ks <- 1:10
@ -68,6 +70,40 @@ for (k in ks) sse <- c(sse, kmeans(eval_inductive, k)$tot.withinss)
plot(sse ~ ks, type = "l")
# Alpha Miner
k3 <- kmeans(eval_alpha, 4)
par(mfrow = c(2, 2))
plot(generalizability ~ precision, eval_alpha, pch = 16, col = colors[k3$cluster])
plot(fitness ~ precision, eval_alpha, pch = 16, col = colors[k3$cluster])
plot(fitness ~ generalizability, eval_alpha, pch = 16, col = colors[k3$cluster])
## Scree plot
ks <- 1:10
sse <- NULL
for (k in ks) sse <- c(sse, kmeans(eval_alpha, k)$tot.withinss)
plot(sse ~ ks, type = "l")
# ILP Miner
k4 <- kmeans(eval_ilp, 4)
plot(generalizability ~ precision, eval_ilp, pch = 16, col = colors[k4$cluster])
## Scree plot
ks <- 1:10
sse <- NULL
for (k in ks) sse <- c(sse, kmeans(eval_ilp, k)$tot.withinss)
plot(sse ~ ks, type = "l")
#--------------- (3) Visualization with pictures ---------------
library(png)