Added clustering for all miners

2023-12-21 16:44:26 +01:00 · 2023-12-21 16:44:26 +01:00 · 0a533d7deb
commit 0a533d7deb
parent e2137dec04
2 changed files with 76 additions and 22 deletions
--- a/code/00_pm.py
+++ b/code/00_pm.py
@ -2,8 +2,8 @@
 %reset

 import pm4py
-from pm4py.algo.evaluation.generalization import algorithm as generalization_evaluator
-from pm4py.algo.evaluation.simplicity import algorithm as simplicity_evaluator
+#from pm4py.algo.evaluation.generalization import algorithm as generalization_evaluator
+#from pm4py.algo.evaluation.simplicity import algorithm as simplicity_evaluator

 import pandas as pd
 import numpy as np
@ -97,22 +97,44 @@ eval.to_csv("results/eval_all-miners_complete.csv", sep=";")
 net, im, fm = pm4py.discover_petri_net_heuristics(event_log)
 #net, im, fm = pm4py.discover_petri_net_inductive(event_log)

-eval_art = np.empty((len(event_log["case:artwork"].unique()), 4))
+def pm_artworks(miner):
+
+    retval = np.empty((len(event_log["case:artwork"].unique()), 4))
+
+    if miner == "heuristics":
+        net, im, fm = pm4py.discover_petri_net_heuristics(event_log)
+    elif miner == "inductive":
+        net, im, fm = pm4py.discover_petri_net_inductive(event_log)
+    elif miner == "alpha":
+        net, im, fm = pm4py.discover_petri_net_alpha(event_log)
+    elif miner == "ilp":
+        net, im, fm = pm4py.discover_petri_net_ilp(event_log)

    for i in range(len(event_log["case:artwork"].unique())):
-
+        artwork = event_log["case:artwork"].unique()[i]    
        subdata = pm4py.filter_event_attribute_values(event_log, "case:artwork",
-                                                  [event_log["case:artwork"].unique()[i]],
+                                                    [artwork],
                                                    level="case", retain=True)
-    #net, im, fm = pm4py.discover_petri_net_heuristics(subdata)
-    eval_art[i] = eval_pm(subdata, net, im, fm)
+        if miner == "heuristics":
+            subnet, subim, subfm = pm4py.discover_petri_net_heuristics(subdata)
+        elif miner == "inductive":
+            subnet, subim, subfm = pm4py.discover_petri_net_inductive(subdata)
+        elif miner == "alpha":
+            subnet, subim, subfm = pm4py.discover_petri_net_alpha(subdata)
+        elif miner == "ilp":
+            subnet, subim, subfm = pm4py.discover_petri_net_ilp(subdata)
+        pm4py.save_vis_petri_net(subnet, subim, subfm,
+                 "../figures/processmaps/artworks/petrinet_" + miner + "_" + str(artwork).zfill(3) + ".png")
+        retval[i] = eval_pm(subdata, net, im, fm)

-eval_art = pd.DataFrame(eval_art)
-eval_art.columns = ["fitness", "precision", "generalizability", "simplicity"]
-eval_art.index = event_log["case:artwork"].unique()
+    retval = pd.DataFrame(retval)
+    retval.columns = ["fitness", "precision", "generalizability", "simplicity"]
+    retval.index = event_log["case:artwork"].unique()
+    return retval

-#eval_art.to_csv("results/eval_heuristics_artworks.csv", sep=";")
-eval_art.to_csv("results/eval_inductive_artworks.csv", sep=";")
+for miner in ["heuristics", "inductive", "alpha", "ilp"]:
+    eval_art = pm_artworks(miner = miner)
+    eval_art.to_csv("results/eval_artworks_" + miner + ".csv", sep=";")


 ##### Clustering ######
@ -148,7 +170,3 @@ plt.plot(list(sse.keys()), list(sse.values()))
 plt.xlabel("Number of clusters")
 plt.ylabel("SSE")
 plt.show()
-
-# TODO: Redo it for data pre corona, so I do not have artefacts for 504 and 505
-# TODO: Create plot with artworks in it:
-#       https://stackoverflow.com/questions/27800307/adding-a-picture-to-plot-in-r
--- a/code/01_clustering.R
+++ b/code/01_clustering.R
@ -25,9 +25,13 @@

 #--------------- (1) Read evaluation data ---------------

-eval_heuristics <- read.table("results/eval_heuristics_artworks.csv", header = TRUE,
+eval_heuristics <- read.table("results/eval_artworks_heuristics.csv", header = TRUE,
                       sep = ";", row.names = 1)
-eval_inductive <- read.table("results/eval_inductive_artworks.csv", header = TRUE,
+eval_inductive <- read.table("results/eval_artworks_inductive.csv", header = TRUE,
+                       sep = ";", row.names = 1)
+eval_alpha <- read.table("results/eval_artworks_alpha.csv", header = TRUE,
+                       sep = ";", row.names = 1)
+eval_ilp <- read.table("results/eval_artworks_ilp.csv", header = TRUE,
                       sep = ";", row.names = 1)

 #--------------- (2) Clustering ---------------
@ -42,7 +46,6 @@ colors <- c("#3CB4DC", "#78004B", "#91C86E", "#FF6900")

 plot(generalizability ~ precision, eval_heuristics, pch = 16, col = colors[k1$cluster])

-
 ## Scree plot

 ks <- 1:10
@ -58,7 +61,6 @@ k2 <- kmeans(eval_inductive, 4)

 plot(generalizability ~ precision, eval_inductive, pch = 16, col = colors[k2$cluster])

-
 ## Scree plot

 ks <- 1:10
@ -68,6 +70,40 @@ for (k in ks) sse <- c(sse, kmeans(eval_inductive, k)$tot.withinss)

 plot(sse ~ ks, type = "l")

+# Alpha Miner
+
+k3 <- kmeans(eval_alpha, 4)
+
+par(mfrow = c(2, 2))
+plot(generalizability ~ precision, eval_alpha, pch = 16, col = colors[k3$cluster])
+plot(fitness ~ precision, eval_alpha, pch = 16, col = colors[k3$cluster])
+plot(fitness ~ generalizability, eval_alpha, pch = 16, col = colors[k3$cluster])
+
+## Scree plot
+
+ks <- 1:10
+
+sse <- NULL
+for (k in ks) sse <- c(sse, kmeans(eval_alpha, k)$tot.withinss)
+
+plot(sse ~ ks, type = "l")
+
+
+# ILP Miner
+
+k4 <- kmeans(eval_ilp, 4)
+
+plot(generalizability ~ precision, eval_ilp, pch = 16, col = colors[k4$cluster])
+
+## Scree plot
+
+ks <- 1:10
+
+sse <- NULL
+for (k in ks) sse <- c(sse, kmeans(eval_ilp, k)$tot.withinss)
+
+plot(sse ~ ks, type = "l")
+
 #--------------- (3) Visualization with pictures ---------------

 library(png)