3 changed files with 6 additions and 63 deletions
--- a/code/11_investigate-variants.R
+++ b/code/11_investigate-variants.R
@ -60,7 +60,7 @@ pdf("../../thesis/figures/freq-traces_powerlaw.pdf", height = 3.375,
    width = 3.375, pointsize = 10)
 par(mai = c(.6,.6,.1,.1), mgp = c(2.4, 1, 0))
-plot(x, y, log = "xy", xlab = "Process variants sorted by frequency",
+plot(x, y, log = "xy", xlab = "Absolute Frequency of Traces",
     ylab = "Frequency", pch = 16, col = rgb(0.262, 0.309, 0.309, 0.5))
 lines(x, pre, col = "#434F4F")
 legend("topright", paste0("Proportion of traces only occurring once: ",
@ -73,7 +73,7 @@ pdf("../../thesis/figures/freq-traces_powerlaw_bw.pdf", height = 3.375,
    width = 3.375, pointsize = 10)
 par(mai = c(.6,.6,.1,.1), mgp = c(2.4, 1, 0))
-plot(x, y, log = "xy", xlab = "Process variants sorted by frequency",
+plot(x, y, log = "xy", xlab = "Absolute Frequency of Traces",
     ylab = "Frequency", pch = 16, col = rgb(0.3, 0.3, 0.3, 0.5))
 lines(x, pre, col = "#434F4F")
 legend("topright", paste0("Proportion of traces only occurring once: ",
@ -129,7 +129,7 @@ pdf("../../thesis/figures/freq-traces_powerlaw_2019.pdf", height = 3.375,
    width = 3.375, pointsize = 10)
 par(mai = c(.6,.6,.1,.1), mgp = c(2.4, 1, 0))
-plot(x, y, log = "xy", xlab = "Process variants sorted by frequency",
+plot(x, y, log = "xy", xlab = "Absolute Frequency of Traces",
     ylab = "Frequency", pch = 16, col = rgb(0.262, 0.309, 0.309, 0.5))
 lines(x, pre, col = "#434F4F")
 legend("topright", paste0("Proportion of traces only occurring once: ",
@ -142,7 +142,7 @@ pdf("../../thesis/figures/freq-traces_powerlaw_2019_bw.pdf", height = 3.375,
    width = 3.375, pointsize = 10)
 par(mai = c(.6,.6,.1,.1), mgp = c(2.4, 1, 0))
-plot(x, y, log = "xy", xlab = "Process variants sorted by frequency",
+plot(x, y, log = "xy", xlab = "Absolute Frequency of Traces",
     ylab = "Frequency", pch = 16, col = rgb(0.3, 0.3, 0.3, 0.5))
 lines(x, pre, col = "#434F4F")
 legend("topright", paste0("Proportion of traces only occurring once: ",
--- a/code/13_pm-case-clusters.py
+++ b/code/13_pm-case-clusters.py
@ -1,52 +0,0 @@
 # 13_pm-case-clusters.py
 #
 # content: (1) Load data and create event log
 #          (2) Evaluation for clusters
 #
 # input:  results/eventlogs_2019_case-clusters.csv
 # output: results/eval_case_clusters.csv
 #
 # last mod: 2024-04-02
 import pm4py
 import pandas as pd
 from python_helpers import eval_pm
 #--------------- (1) Load data and create event logs ---------------
 dat = pd.read_csv("results/eventlogs_2019_case-clusters.csv", sep = ";")
 event_log = pm4py.format_dataframe(dat,
                                   case_id = "case",
                                   activity_key = "item",
                                   timestamp_key = "date.start")
 #--------------- (2) Evaluation for clusters ---------------
 thresholds = [0.1, 0.2, 0.3, 0.4, 0.5]
 for nt in thresholds:
  net, im, fm = pm4py.discover_petri_net_inductive(event_log, noise_threshold = nt)
  eval = pd.DataFrame(eval_pm(event_log, net, im, fm)).T
  eval.columns = ["fitness", "generalization", "simplicity"]
  #eval.columns = ["fitness", "precision", "generalization", "simplicity"]
  # Merge clusters into data frame
  for cluster in [1, 2, 3, 4, 5]:
      log_clst = pm4py.filter_event_attribute_values(event_log, "cluster", [cluster])
      net_clst, im_clst, fm_clst = pm4py.discover_petri_net_inductive(log_clst, noise_threshold = nt)
      eval_clst = pd.DataFrame(eval_pm(log_clst, net_clst, im_clst, fm_clst)).T
      eval_clst.columns = ["fitness", "generalization", "simplicity"]
      #eval_clst.columns = ["fitness", "precision", "generalization", "simplicity"]
      eval = pd.concat([eval, eval_clst])
      # Export process maps
      bpmn = pm4py.convert.convert_to_bpmn(net_clst, im_clst, fm_clst)
      pm4py.vis.save_vis_bpmn(bpmn, "results/processmaps/bpmn_cluster" + str(cluster) +
            "_cases" + str(int(nt*10)).zfill(2) + ".png")
  eval.index = ["Complete", "Cluster 1", "Cluster 2", "Cluster 3", "Cluster 4", "Cluster 5"]
  eval.to_csv("results/eval_case_clusters_" + str(int(nt*10)).zfill(2) + ".csv", sep = ";")
--- a/code/python_helpers.py
+++ b/code/python_helpers.py
@ -5,17 +5,12 @@ import pandas as pd
 def eval_pm(data, net, initial_marking, final_marking):
    """Caculate fitness, precision, generalizability, and simplicity for petri net"""
    print("Fitness is calculated")
    fitness          = pm4py.fitness_token_based_replay(data, net, initial_marking, final_marking)
-    #print("Precision is calculated")
+    precisison       = pm4py.precision_token_based_replay(data, net, initial_marking, final_marking)
    #precisison       = pm4py.precision_token_based_replay(data, net, initial_marking, final_marking)
    print("Generalizability is calculated")
    generalizability = pm4py.algo.evaluation.generalization.algorithm.apply(data, net,
                                                                            initial_marking, final_marking)
    print("Simplicity is calculated")
    simplicity       = pm4py.algo.evaluation.simplicity.algorithm.apply(net)
-    #return [fitness['average_trace_fitness'], precisison, generalizability, simplicity]
+    return [fitness['average_trace_fitness'], precisison, generalizability, simplicity]
    return [fitness['average_trace_fitness'], generalizability, simplicity]
 def pn_infos(log, colname, filter):