Added clustering for all miners

2023-12-21 16:44:26 +01:00 · 2023-12-21 16:44:26 +01:00 · 0a533d7deb
commit 0a533d7deb
parent e2137dec04
2 changed files with 76 additions and 22 deletions
--- a/code/00_pm.py
+++ b/code/00_pm.py
@ -2,8 +2,8 @@
 %reset
 import pm4py
-from pm4py.algo.evaluation.generalization import algorithm as generalization_evaluator
+#from pm4py.algo.evaluation.generalization import algorithm as generalization_evaluator
-from pm4py.algo.evaluation.simplicity import algorithm as simplicity_evaluator
+#from pm4py.algo.evaluation.simplicity import algorithm as simplicity_evaluator
 import pandas as pd
 import numpy as np
@ -97,22 +97,44 @@ eval.to_csv("results/eval_all-miners_complete.csv", sep=";")
 net, im, fm = pm4py.discover_petri_net_heuristics(event_log)
 #net, im, fm = pm4py.discover_petri_net_inductive(event_log)
-eval_art = np.empty((len(event_log["case:artwork"].unique()), 4))
+def pm_artworks(miner):
    retval = np.empty((len(event_log["case:artwork"].unique()), 4))
    if miner == "heuristics":
        net, im, fm = pm4py.discover_petri_net_heuristics(event_log)
    elif miner == "inductive":
        net, im, fm = pm4py.discover_petri_net_inductive(event_log)
    elif miner == "alpha":
        net, im, fm = pm4py.discover_petri_net_alpha(event_log)
    elif miner == "ilp":
        net, im, fm = pm4py.discover_petri_net_ilp(event_log)
    for i in range(len(event_log["case:artwork"].unique())):
-
+        artwork = event_log["case:artwork"].unique()[i]    
        subdata = pm4py.filter_event_attribute_values(event_log, "case:artwork",
-                                                  [event_log["case:artwork"].unique()[i]],
+                                                    [artwork],
                                                    level="case", retain=True)
-    #net, im, fm = pm4py.discover_petri_net_heuristics(subdata)
+        if miner == "heuristics":
-    eval_art[i] = eval_pm(subdata, net, im, fm)
+            subnet, subim, subfm = pm4py.discover_petri_net_heuristics(subdata)
        elif miner == "inductive":
            subnet, subim, subfm = pm4py.discover_petri_net_inductive(subdata)
        elif miner == "alpha":
            subnet, subim, subfm = pm4py.discover_petri_net_alpha(subdata)
        elif miner == "ilp":
            subnet, subim, subfm = pm4py.discover_petri_net_ilp(subdata)
        pm4py.save_vis_petri_net(subnet, subim, subfm,
                 "../figures/processmaps/artworks/petrinet_" + miner + "_" + str(artwork).zfill(3) + ".png")
        retval[i] = eval_pm(subdata, net, im, fm)
-eval_art = pd.DataFrame(eval_art)
+    retval = pd.DataFrame(retval)
-eval_art.columns = ["fitness", "precision", "generalizability", "simplicity"]
+    retval.columns = ["fitness", "precision", "generalizability", "simplicity"]
-eval_art.index = event_log["case:artwork"].unique()
+    retval.index = event_log["case:artwork"].unique()
    return retval
-#eval_art.to_csv("results/eval_heuristics_artworks.csv", sep=";")
+for miner in ["heuristics", "inductive", "alpha", "ilp"]:
-eval_art.to_csv("results/eval_inductive_artworks.csv", sep=";")
+    eval_art = pm_artworks(miner = miner)
    eval_art.to_csv("results/eval_artworks_" + miner + ".csv", sep=";")
 ##### Clustering ######
@ -148,7 +170,3 @@ plt.plot(list(sse.keys()), list(sse.values()))
 plt.xlabel("Number of clusters")
 plt.ylabel("SSE")
 plt.show()
 # TODO: Redo it for data pre corona, so I do not have artefacts for 504 and 505
 # TODO: Create plot with artworks in it:
 #       https://stackoverflow.com/questions/27800307/adding-a-picture-to-plot-in-r
--- a/code/01_clustering.R
+++ b/code/01_clustering.R
@ -25,9 +25,13 @@
 #--------------- (1) Read evaluation data ---------------
-eval_heuristics <- read.table("results/eval_heuristics_artworks.csv", header = TRUE,
+eval_heuristics <- read.table("results/eval_artworks_heuristics.csv", header = TRUE,
                       sep = ";", row.names = 1)
-eval_inductive <- read.table("results/eval_inductive_artworks.csv", header = TRUE,
+eval_inductive <- read.table("results/eval_artworks_inductive.csv", header = TRUE,
                       sep = ";", row.names = 1)
 eval_alpha <- read.table("results/eval_artworks_alpha.csv", header = TRUE,
                       sep = ";", row.names = 1)
 eval_ilp <- read.table("results/eval_artworks_ilp.csv", header = TRUE,
                       sep = ";", row.names = 1)
 #--------------- (2) Clustering ---------------
@ -42,7 +46,6 @@ colors <- c("#3CB4DC", "#78004B", "#91C86E", "#FF6900")
 plot(generalizability ~ precision, eval_heuristics, pch = 16, col = colors[k1$cluster])
 ## Scree plot
 ks <- 1:10
@ -58,7 +61,6 @@ k2 <- kmeans(eval_inductive, 4)
 plot(generalizability ~ precision, eval_inductive, pch = 16, col = colors[k2$cluster])
 ## Scree plot
 ks <- 1:10
@ -68,6 +70,40 @@ for (k in ks) sse <- c(sse, kmeans(eval_inductive, k)$tot.withinss)
 plot(sse ~ ks, type = "l")
 # Alpha Miner
 k3 <- kmeans(eval_alpha, 4)
 par(mfrow = c(2, 2))
 plot(generalizability ~ precision, eval_alpha, pch = 16, col = colors[k3$cluster])
 plot(fitness ~ precision, eval_alpha, pch = 16, col = colors[k3$cluster])
 plot(fitness ~ generalizability, eval_alpha, pch = 16, col = colors[k3$cluster])
 ## Scree plot
 ks <- 1:10
 sse <- NULL
 for (k in ks) sse <- c(sse, kmeans(eval_alpha, k)$tot.withinss)
 plot(sse ~ ks, type = "l")
 # ILP Miner
 k4 <- kmeans(eval_ilp, 4)
 plot(generalizability ~ precision, eval_ilp, pch = 16, col = colors[k4$cluster])
 ## Scree plot
 ks <- 1:10
 sse <- NULL
 for (k in ks) sse <- c(sse, kmeans(eval_ilp, k)$tot.withinss)
 plot(sse ~ ks, type = "l")
 #--------------- (3) Visualization with pictures ---------------
 library(png)