Script cleaning; data are now exported better
This commit is contained in:
parent
f8c1767074
commit
6cfc19a874
@ -11,7 +11,7 @@
|
|||||||
# output: raw_logfiles_<timestamp>.csv
|
# output: raw_logfiles_<timestamp>.csv
|
||||||
# event_logfiles_<timestamp>.csv
|
# event_logfiles_<timestamp>.csv
|
||||||
#
|
#
|
||||||
# last mod: 2024-01-18, NW
|
# last mod: 2024-02-23, NW
|
||||||
|
|
||||||
# setwd("C:/Users/nwickelmaier/Nextcloud/Documents/MDS/2023ss/60100_master_thesis/analysis/code")
|
# setwd("C:/Users/nwickelmaier/Nextcloud/Documents/MDS/2023ss/60100_master_thesis/analysis/code")
|
||||||
|
|
||||||
|
@ -1,7 +1,25 @@
|
|||||||
|
# 03_create-petrinet.py
|
||||||
|
#
|
||||||
|
# content: (1) Create places and transitions
|
||||||
|
# (2) Sequential net
|
||||||
|
# (3) Concurrent net
|
||||||
|
#
|
||||||
|
# input: --
|
||||||
|
# output: results/haum/conformative_petrinet_con.pnml
|
||||||
|
# results/processmaps/conformative_petrinet_con.png
|
||||||
|
# results/processmaps/conformative_bpmn_con.png
|
||||||
|
# results/haum/conformative_petrinet_seq.pnml
|
||||||
|
# results/processmaps/conformative_petrinet_seq.png
|
||||||
|
# results/processmaps/conformative_bpmn_seq.png
|
||||||
|
#
|
||||||
|
# last mod: 2024-03-06
|
||||||
|
|
||||||
import pm4py
|
import pm4py
|
||||||
from pm4py.objects.petri_net.obj import PetriNet, Marking
|
from pm4py.objects.petri_net.obj import PetriNet, Marking
|
||||||
from pm4py.objects.petri_net.utils import petri_utils
|
from pm4py.objects.petri_net.utils import petri_utils
|
||||||
|
|
||||||
|
#--------------- (1) Create places and transitions ---------------
|
||||||
|
|
||||||
# Create places
|
# Create places
|
||||||
source = PetriNet.Place("source")
|
source = PetriNet.Place("source")
|
||||||
sink = PetriNet.Place("sink")
|
sink = PetriNet.Place("sink")
|
||||||
@ -44,7 +62,8 @@ t_16 = PetriNet.Transition("t_16")
|
|||||||
t_17 = PetriNet.Transition("t_17")
|
t_17 = PetriNet.Transition("t_17")
|
||||||
t_18 = PetriNet.Transition("t_18")
|
t_18 = PetriNet.Transition("t_18")
|
||||||
|
|
||||||
## Sequential net
|
#--------------- (2) Sequential net ---------------
|
||||||
|
|
||||||
net_seq = PetriNet("new_petri_net")
|
net_seq = PetriNet("new_petri_net")
|
||||||
|
|
||||||
# Add places
|
# Add places
|
||||||
@ -149,7 +168,8 @@ pm4py.view_bpmn(bpmn)
|
|||||||
pm4py.vis.save_vis_bpmn(bpmn, "results/processmaps/conformative_bpmn_seq.png")
|
pm4py.vis.save_vis_bpmn(bpmn, "results/processmaps/conformative_bpmn_seq.png")
|
||||||
|
|
||||||
|
|
||||||
## Concurrent net
|
#--------------- (3) Concurrent net ---------------
|
||||||
|
|
||||||
net_con = PetriNet("new_petri_net")
|
net_con = PetriNet("new_petri_net")
|
||||||
|
|
||||||
# Add places
|
# Add places
|
||||||
|
@ -1,9 +1,33 @@
|
|||||||
|
# 04_conformance-checking.py
|
||||||
|
#
|
||||||
|
# content: (1) Load data and create event log
|
||||||
|
# (2) Infos for items
|
||||||
|
#
|
||||||
|
# input: results/haum/event_logfiles_2024-02-21_16-07-33.csv
|
||||||
|
# results/haum/conformative_petrinet_con.pnml
|
||||||
|
# output: results/processmaps/dfg_complete_python.png
|
||||||
|
# results/eval_all-miners_complete.csv
|
||||||
|
# results/eval_all-miners_clean.csv
|
||||||
|
# results/processmaps/petrinet_conformative.png
|
||||||
|
# results/processmaps/petrinet_heuristics_clean.png
|
||||||
|
# results/processmaps/petrinet_alpha_clean.png
|
||||||
|
# results/processmaps/petrinet_inductive_clean.png
|
||||||
|
# results/processmaps/petrinet_ilp_clean.png
|
||||||
|
# results/processmaps/bpmn_conformative.png
|
||||||
|
# results/processmaps/bpmn_inductive_clean.png
|
||||||
|
# results/processmaps/bpmn_ilp_clean.png
|
||||||
|
# results/processmaps/bpmn_alpha_clean.png
|
||||||
|
# results/processmaps/bpmn_heuristics_clean.png
|
||||||
|
#
|
||||||
|
# last mod: 2024-03-06
|
||||||
|
|
||||||
import pm4py
|
import pm4py
|
||||||
import pandas as pd
|
import pandas as pd
|
||||||
import numpy as np
|
import numpy as np
|
||||||
|
|
||||||
from python_helpers import eval_pm, pn_infos_miner
|
from python_helpers import eval_pm, pn_infos_miner
|
||||||
|
|
||||||
###### Load data and create event logs ######
|
#--------------- (1) Load data and create event logs ---------------
|
||||||
|
|
||||||
dat = pd.read_csv("results/haum/event_logfiles_2024-02-21_16-07-33.csv", sep = ";")
|
dat = pd.read_csv("results/haum/event_logfiles_2024-02-21_16-07-33.csv", sep = ";")
|
||||||
|
|
||||||
@ -129,3 +153,4 @@ a_bpmn = pm4py.convert.convert_to_bpmn(a_net, a_im, a_fm)
|
|||||||
pm4py.vis.save_vis_bpmn(a_bpmn, "results/processmaps/bpmn_alpha_clean.png")
|
pm4py.vis.save_vis_bpmn(a_bpmn, "results/processmaps/bpmn_alpha_clean.png")
|
||||||
h_bpmn = pm4py.convert.convert_to_bpmn(h_net, h_im, h_fm)
|
h_bpmn = pm4py.convert.convert_to_bpmn(h_net, h_im, h_fm)
|
||||||
pm4py.vis.save_vis_bpmn(h_bpmn, "results/processmaps/bpmn_heuristics_clean.png")
|
pm4py.vis.save_vis_bpmn(h_bpmn, "results/processmaps/bpmn_heuristics_clean.png")
|
||||||
|
|
||||||
|
@ -1,3 +1,16 @@
|
|||||||
|
# 05_check-traces.R
|
||||||
|
#
|
||||||
|
# content: (1) Look at broken trace
|
||||||
|
# (2) Function to find broken traces
|
||||||
|
# (3) Export data frame for analyses
|
||||||
|
#
|
||||||
|
# input: results/haum/event_logfiles_2024-02-21_16-07-33.csv
|
||||||
|
# results/haum/raw_logfiles_2024-02-21_16-07-33.csv
|
||||||
|
# output: results/haum/eventlogs_pre-corona_cleaned.RData
|
||||||
|
# results/haum/eventlogs_pre-corona_cleaned.csv
|
||||||
|
#
|
||||||
|
# last mod: 2024-03-06
|
||||||
|
|
||||||
# setwd("C:/Users/nwickelmaier/Nextcloud/Documents/MDS/2023ss/60100_master_thesis/analysis/code")
|
# setwd("C:/Users/nwickelmaier/Nextcloud/Documents/MDS/2023ss/60100_master_thesis/analysis/code")
|
||||||
|
|
||||||
#--------------- (1) Look at broken trace ---------------
|
#--------------- (1) Look at broken trace ---------------
|
||||||
@ -49,3 +62,31 @@ check <- check_traces(tmp)
|
|||||||
|
|
||||||
check[check$check, ]
|
check[check$check, ]
|
||||||
|
|
||||||
|
#--------------- (3) Export data frame for analyses ---------------
|
||||||
|
|
||||||
|
datlogs$event <- factor(datlogs$event, levels = c("move", "flipCard",
|
||||||
|
"openTopic",
|
||||||
|
"openPopup"))
|
||||||
|
datlogs$topic <- factor(datlogs$topic)
|
||||||
|
|
||||||
|
datlogs$weekdays <- factor(weekdays(datlogs$date.start),
|
||||||
|
levels = c("Montag", "Dienstag", "Mittwoch",
|
||||||
|
"Donnerstag", "Freitag", "Samstag",
|
||||||
|
"Sonntag"),
|
||||||
|
labels = c("Monday", "Tuesday", "Wednesday",
|
||||||
|
"Thursday", "Friday", "Saturday",
|
||||||
|
"Sunday"))
|
||||||
|
|
||||||
|
# Select data pre Corona
|
||||||
|
dat <- datlogs[as.Date(datlogs$date.start) < "2020-03-13", ]
|
||||||
|
# Remove corrupt trace
|
||||||
|
dat <- dat[dat$path != 106098, ]
|
||||||
|
|
||||||
|
save(dat, file = "results/haum/eventlogs_pre-corona_cleaned.RData")
|
||||||
|
|
||||||
|
write.table(dat,
|
||||||
|
file = "results/haum/eventlogs_pre-corona_cleaned.csv",
|
||||||
|
sep = ";",
|
||||||
|
quote = FALSE,
|
||||||
|
row.names = FALSE)
|
||||||
|
|
||||||
|
@ -1,28 +1,37 @@
|
|||||||
|
# 06_infos-items.py
|
||||||
|
#
|
||||||
|
# content: (1) Load data and create event log
|
||||||
|
# (2) Infos for items
|
||||||
|
#
|
||||||
|
# input: results/haum/eventlogs_pre-corona_cleaned.csv
|
||||||
|
# output: results/haum/pn_infos_items.csv
|
||||||
|
#
|
||||||
|
# last mod: 2024-03-06
|
||||||
|
|
||||||
import pm4py
|
import pm4py
|
||||||
import pandas as pd
|
import pandas as pd
|
||||||
import numpy as np
|
import numpy as np
|
||||||
|
|
||||||
from python_helpers import eval_pm, pn_infos
|
from python_helpers import eval_pm, pn_infos
|
||||||
|
|
||||||
###### Load data and create event logs ######
|
#--------------- (1) Load data and create event logs ---------------
|
||||||
|
|
||||||
dat = pd.read_csv("results/haum/event_logfiles_2024-02-21_16-07-33.csv", sep = ";")
|
dat = pd.read_csv("results/haum/eventlogs_pre-corona_cleaned", sep = ";")
|
||||||
dat = dat[dat["date.start"] < "2020-03-13"]
|
|
||||||
# --> only pre corona (before artworks were updated)
|
|
||||||
dat = dat[dat["path"] != 106098]
|
|
||||||
# exclude broken trace
|
|
||||||
|
|
||||||
log_path = pm4py.format_dataframe(dat, case_id = "path", activity_key = "event",
|
log_path = pm4py.format_dataframe(dat, case_id = "path", activity_key = "event",
|
||||||
timestamp_key = "date.start")
|
timestamp_key = "date.start")
|
||||||
|
|
||||||
###### Infos for items ######
|
#--------------- (2) Infos for items ---------------
|
||||||
|
|
||||||
eval = pd.DataFrame(columns = ["fitness", "precision", "generalizability",
|
eval = pd.DataFrame(columns = ["fitness", "precision", "generalizability",
|
||||||
"simplicity", "sound", "narcs", "ntrans",
|
"simplicity", "sound", "narcs", "ntrans",
|
||||||
"nplaces", "nvariants", "mostfreq"])
|
"nplaces", "nvariants", "mostfreq"])
|
||||||
|
|
||||||
for item in log_path.item.unique().tolist():
|
for item in log_path.item.unique().tolist():
|
||||||
eval = pd.concat([eval, pn_infos(log_path, "item", item)])
|
eval = pd.concat([eval, pn_infos(log_path, "item", item)])
|
||||||
|
|
||||||
eval = eval.sort_index()
|
eval = eval.sort_index()
|
||||||
|
|
||||||
# Export
|
# Export
|
||||||
eval.to_csv("results/haum/pn_infos_items.csv", sep = ";")
|
eval.to_csv("results/haum/pn_infos_items.csv", sep = ";")
|
||||||
|
|
||||||
|
@ -7,12 +7,11 @@
|
|||||||
# (2) Clustering
|
# (2) Clustering
|
||||||
# (3) Visualization with pictures
|
# (3) Visualization with pictures
|
||||||
#
|
#
|
||||||
# input: results/haum/event_logfiles_2024-02-21_16-07-33.csv
|
# input: results/haum/eventlogs_pre-corona_cleaned.RData
|
||||||
# results/haum/pn_infos_items.csv
|
# results/haum/pn_infos_items.csv
|
||||||
# output: results/haum/event_logfiles_pre-corona_with-clusters.csv
|
# output: results/haum/eventlogs_pre-corona_item-clusters.csv
|
||||||
#
|
#
|
||||||
# last mod: 2024-02-23
|
# last mod: 2024-03-06
|
||||||
|
|
||||||
|
|
||||||
# setwd("C:/Users/nwickelmaier/Nextcloud/Documents/MDS/2023ss/60100_master_thesis/analysis/code")
|
# setwd("C:/Users/nwickelmaier/Nextcloud/Documents/MDS/2023ss/60100_master_thesis/analysis/code")
|
||||||
|
|
||||||
@ -23,34 +22,16 @@ library(factoextra)
|
|||||||
|
|
||||||
#--------------- (1.1) Read log event data ---------------
|
#--------------- (1.1) Read log event data ---------------
|
||||||
|
|
||||||
dat0 <- read.table("results/haum/event_logfiles_2024-02-21_16-07-33.csv",
|
load("results/haum/eventlogs_pre-corona_cleaned.RData")
|
||||||
colClasses = c("character", "character", "POSIXct",
|
|
||||||
"POSIXct", "character", "integer",
|
|
||||||
"numeric", "character", "character",
|
|
||||||
rep("numeric", 3), "character",
|
|
||||||
"character", rep("numeric", 11),
|
|
||||||
"character", "character"),
|
|
||||||
sep = ";", header = TRUE)
|
|
||||||
dat0$event <- factor(dat0$event, levels = c("move", "flipCard", "openTopic",
|
|
||||||
"openPopup"))
|
|
||||||
|
|
||||||
# TODO: Maybe look at this with complete data?
|
|
||||||
|
|
||||||
# Select data pre Corona
|
|
||||||
dat <- dat0[as.Date(dat0$date.start) < "2020-03-13", ]
|
|
||||||
dat <- dat[dat$path != 106098, ]
|
|
||||||
|
|
||||||
#--------------- (1.2) Read infos for PM for items ---------------
|
#--------------- (1.2) Read infos for PM for items ---------------
|
||||||
|
|
||||||
datitem <- read.table("results/haum/pn_infos_items.csv", header = TRUE,
|
datitem <- read.table("results/haum/pn_infos_items.csv", header = TRUE,
|
||||||
sep = ";", row.names = 1)
|
sep = ";", row.names = 1)
|
||||||
|
|
||||||
|
|
||||||
#--------------- (1.3) Extract additional infos for clustering ---------------
|
#--------------- (1.3) Extract additional infos for clustering ---------------
|
||||||
|
|
||||||
dat_split <- split(dat, ~ path)
|
time_minmax_ms <- function(subdata) {
|
||||||
|
|
||||||
time_minmax <- function(subdata) {
|
|
||||||
subdata$min_time <- min(subdata$timeMs.start)
|
subdata$min_time <- min(subdata$timeMs.start)
|
||||||
if (all(is.na(subdata$timeMs.stop))) {
|
if (all(is.na(subdata$timeMs.stop))) {
|
||||||
subdata$max_time <- NA
|
subdata$max_time <- NA
|
||||||
@ -59,18 +40,18 @@ time_minmax <- function(subdata) {
|
|||||||
}
|
}
|
||||||
subdata
|
subdata
|
||||||
}
|
}
|
||||||
|
# TODO: Move to helper file
|
||||||
|
|
||||||
dat_list <- pbapply::pblapply(dat_split, time_minmax)
|
# Get average duration per path
|
||||||
|
dat_split <- split(dat, ~ path)
|
||||||
|
dat_list <- pbapply::pblapply(dat_split, time_minmax_ms)
|
||||||
dat_minmax <- dplyr::bind_rows(dat_list)
|
dat_minmax <- dplyr::bind_rows(dat_list)
|
||||||
|
|
||||||
datpath <- aggregate(duration ~ item + path, dat, mean, na.action = NULL)
|
datpath <- aggregate(duration ~ item + path, dat, mean, na.action = NULL)
|
||||||
|
|
||||||
datpath$min_time <- aggregate(min_time ~ path, dat_minmax, unique, na.action = NULL)$min_time
|
datpath$min_time <- aggregate(min_time ~ path, dat_minmax, unique, na.action = NULL)$min_time
|
||||||
datpath$max_time <- aggregate(max_time ~ path, dat_minmax, unique, na.action = NULL)$max_time
|
datpath$max_time <- aggregate(max_time ~ path, dat_minmax, unique, na.action = NULL)$max_time
|
||||||
|
datpath$duration <- datpath$max_time - datpath$min_time
|
||||||
|
|
||||||
datpath$duration_path <- datpath$max_time - datpath$min_time
|
|
||||||
|
|
||||||
# average duration per path
|
|
||||||
datitem$duration <- aggregate(duration ~ item, datpath, mean)$duration
|
datitem$duration <- aggregate(duration ~ item, datpath, mean)$duration
|
||||||
datitem$distance <- aggregate(distance ~ item, dat, mean)$distance
|
datitem$distance <- aggregate(distance ~ item, dat, mean)$distance
|
||||||
datitem$scaleSize <- aggregate(scaleSize ~ item, dat, mean)$scaleSize
|
datitem$scaleSize <- aggregate(scaleSize ~ item, dat, mean)$scaleSize
|
||||||
@ -89,66 +70,39 @@ df <- datitem[, c("precision", "generalizability", "nvariants", "duration",
|
|||||||
"ncases", "nmoves", "nopenTopic", "nopenPopup")] |>
|
"ncases", "nmoves", "nopenTopic", "nopenPopup")] |>
|
||||||
scale()
|
scale()
|
||||||
|
|
||||||
mat <- dist(df)
|
dist_mat <- dist(df)
|
||||||
|
|
||||||
heatmap(as.matrix(mat))
|
heatmap(as.matrix(dist_mat))
|
||||||
|
|
||||||
# Choosing best linkage method
|
# Choosing best linkage method
|
||||||
h1 <- hclust(mat, method = "average")
|
method <- c(average = "average", single = "single", complete = "complete",
|
||||||
h2 <- hclust(mat, method = "complete")
|
ward = "ward")
|
||||||
h3 <- hclust(mat, method = "ward.D")
|
|
||||||
h4 <- hclust(mat, method = "ward.D2")
|
|
||||||
h5 <- hclust(mat, method = "single")
|
|
||||||
|
|
||||||
# Cophenetic Distances, for each linkage
|
hcs <- lapply(method, function(x) cluster::agnes(dist_mat, method = x))
|
||||||
c1 <- cophenetic(h1)
|
acs <- sapply(hcs, function(x) x$ac)
|
||||||
c2 <- cophenetic(h2)
|
|
||||||
c3 <- cophenetic(h3)
|
|
||||||
c4 <- cophenetic(h4)
|
|
||||||
c5 <- cophenetic(h5)
|
|
||||||
|
|
||||||
# Correlations
|
|
||||||
cor(mat, c1)
|
|
||||||
cor(mat, c2)
|
|
||||||
cor(mat, c3)
|
|
||||||
cor(mat, c4)
|
|
||||||
cor(mat, c5)
|
|
||||||
# https://en.wikipedia.org/wiki/Cophenetic_correlation
|
|
||||||
# https://stats.stackexchange.com/questions/195446/choosing-the-right-linkage-method-for-hierarchical-clustering
|
|
||||||
|
|
||||||
# Dendograms
|
# Dendograms
|
||||||
par(mfrow=c(3,2))
|
par(mfrow=c(4,2))
|
||||||
plot(h1, main = "Average Linkage")
|
for (hc in hcs) plot(hc, main = "")
|
||||||
plot(h2, main = "Complete Linkage")
|
|
||||||
plot(h3, main = "Ward Linkage")
|
|
||||||
plot(h4, main = "Ward 2 Linkage")
|
|
||||||
plot(h5, main = "Single Linkage")
|
|
||||||
|
|
||||||
|
hc <- hcs$ward
|
||||||
hc <- h1
|
|
||||||
# Note that ‘agnes(*, method="ward")’ corresponds to ‘hclust(*, "ward.D2")’
|
|
||||||
|
|
||||||
k <- 4 # number of clusters
|
k <- 4 # number of clusters
|
||||||
|
|
||||||
|
mycols <- c("#78004B", "#FF6900", "#3CB4DC", "#91C86E")
|
||||||
|
|
||||||
grp <- cutree(hc, k = k)
|
grp <- cutree(hc, k = k)
|
||||||
datitem$grp <- grp
|
datitem$grp <- grp
|
||||||
|
|
||||||
fviz_dend(hc, k = k,
|
fviz_dend(hc, k = k,
|
||||||
cex = 0.5,
|
cex = 0.5,
|
||||||
k_colors = c("#78004B", "#FF6900", "#3CB4DC", "#91C86E",
|
k_colors = mycols,
|
||||||
"#000000", "gold", "#434F4F"),
|
|
||||||
#type = "phylogenic",
|
#type = "phylogenic",
|
||||||
rect = TRUE
|
rect = TRUE
|
||||||
)
|
)
|
||||||
|
|
||||||
plot(hc)
|
|
||||||
rect.hclust(hc, k=8, border="red")
|
|
||||||
rect.hclust(hc, k=7, border="blue")
|
|
||||||
rect.hclust(hc, k=6, border="green")
|
|
||||||
|
|
||||||
p <- fviz_cluster(list(data = df, cluster = grp),
|
p <- fviz_cluster(list(data = df, cluster = grp),
|
||||||
palette = c("#78004B", "#FF6900", "#3CB4DC", "#91C86E",
|
palette = mycols,
|
||||||
"#000000", "#434F4F", "gold"),
|
|
||||||
ellipse.type = "convex",
|
ellipse.type = "convex",
|
||||||
repel = TRUE,
|
repel = TRUE,
|
||||||
show.clust.cent = FALSE, ggtheme = theme_bw())
|
show.clust.cent = FALSE, ggtheme = theme_bw())
|
||||||
@ -156,14 +110,16 @@ p
|
|||||||
|
|
||||||
aggregate(cbind(duration, distance, scaleSize , rotationDegree, npaths,
|
aggregate(cbind(duration, distance, scaleSize , rotationDegree, npaths,
|
||||||
ncases, nmoves, nflipCard, nopenTopic, nopenPopup) ~ grp,
|
ncases, nmoves, nflipCard, nopenTopic, nopenPopup) ~ grp,
|
||||||
datitem, median)
|
datitem, mean)
|
||||||
|
|
||||||
|
|
||||||
|
aggregate(cbind(duration, distance, scaleSize , rotationDegree, npaths,
|
||||||
|
ncases, nmoves, nflipCard, nopenTopic, nopenPopup) ~ grp,
|
||||||
|
datitem, max)
|
||||||
|
|
||||||
|
|
||||||
# Something like a scree plot (??)
|
# Something like a scree plot (??)
|
||||||
plot(rev(seq_along(hc$height)), hc$height, type = "l")
|
plot(rev(hc$height), type = "b", pch = 16, cex = .5)
|
||||||
points(rev(seq_along(hc$height)), hc$height, pch = 16, cex = .5)
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
datitem$item <- sprintf("%03d",
|
datitem$item <- sprintf("%03d",
|
||||||
as.numeric(gsub("item_([0-9]{3})", "\\1", row.names(datitem))))
|
as.numeric(gsub("item_([0-9]{3})", "\\1", row.names(datitem))))
|
||||||
@ -179,7 +135,7 @@ vioplot::vioplot(scaleSize ~ grp, res)
|
|||||||
vioplot::vioplot(rotationDegree ~ grp, res)
|
vioplot::vioplot(rotationDegree ~ grp, res)
|
||||||
|
|
||||||
write.table(res,
|
write.table(res,
|
||||||
file = "results/haum/event_logfiles_pre-corona_with-clusters.csv",
|
file = "results/haum/eventlogs_pre-corona_item-clusters.csv",
|
||||||
sep = ";",
|
sep = ";",
|
||||||
quote = FALSE,
|
quote = FALSE,
|
||||||
row.names = FALSE)
|
row.names = FALSE)
|
||||||
@ -207,8 +163,6 @@ for (cluster in sort(unique(res$grp))) {
|
|||||||
file_name = paste0("results/processmaps/dfg_cluster", cluster, "_R.pdf"),
|
file_name = paste0("results/processmaps/dfg_cluster", cluster, "_R.pdf"),
|
||||||
file_type = "pdf",
|
file_type = "pdf",
|
||||||
title = paste("DFG Cluster", cluster))
|
title = paste("DFG Cluster", cluster))
|
||||||
|
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
#--------------- (3) Visualization with pictures ---------------
|
#--------------- (3) Visualization with pictures ---------------
|
||||||
@ -217,8 +171,6 @@ library(png)
|
|||||||
library(jpeg)
|
library(jpeg)
|
||||||
library(grid)
|
library(grid)
|
||||||
|
|
||||||
colors <- c("#78004B", "#FF6900", "#3CB4DC", "#91C86E")
|
|
||||||
|
|
||||||
pdf("results/figures/clustering_artworks.pdf", height = 8, width = 8, pointsize = 10)
|
pdf("results/figures/clustering_artworks.pdf", height = 8, width = 8, pointsize = 10)
|
||||||
#png("results/figures/clustering_artworks.png", units = "in", height = 8, width = 8, pointsize = 10, res = 300)
|
#png("results/figures/clustering_artworks.png", units = "in", height = 8, width = 8, pointsize = 10, res = 300)
|
||||||
|
|
||||||
@ -244,7 +196,7 @@ for (item in sprintf("%03d", as.numeric(rownames(p$data)))) {
|
|||||||
y <- p$data$y[sprintf("%03d", as.numeric(rownames(p$data))) == item]
|
y <- p$data$y[sprintf("%03d", as.numeric(rownames(p$data))) == item]
|
||||||
|
|
||||||
points(x, y,
|
points(x, y,
|
||||||
col = colors[p$data$cluster[sprintf("%03d", as.numeric(rownames(p$data))) == item]],
|
col = mycols[p$data$cluster[sprintf("%03d", as.numeric(rownames(p$data))) == item]],
|
||||||
cex = 9,
|
cex = 9,
|
||||||
pch = 15)
|
pch = 15)
|
||||||
|
|
||||||
@ -255,7 +207,7 @@ for (item in sprintf("%03d", as.numeric(rownames(p$data)))) {
|
|||||||
ytop = y + .2)
|
ytop = y + .2)
|
||||||
|
|
||||||
}
|
}
|
||||||
legend("topright", paste("Cluster", 1:k), col = colors, pch = 15, bty = "n")
|
legend("topright", paste("Cluster", 1:k), col = mycols, pch = 15, bty = "n")
|
||||||
|
|
||||||
dev.off()
|
dev.off()
|
||||||
|
|
||||||
|
@ -1,16 +1,27 @@
|
|||||||
|
# 08_infos-clusters.py
|
||||||
|
#
|
||||||
|
# content: (1) Load data and create event log
|
||||||
|
# (2) Infos for clusters
|
||||||
|
# (3) Process maps for clusters
|
||||||
|
#
|
||||||
|
# input: results/haum/eventlogs_pre-corona_item-clusters.csv
|
||||||
|
# output: results/haum/pn_infos_clusters.csv
|
||||||
|
#
|
||||||
|
# last mod: 2024-03-06
|
||||||
|
|
||||||
import pm4py
|
import pm4py
|
||||||
import pandas as pd
|
import pandas as pd
|
||||||
|
|
||||||
from python_helpers import eval_pm, pn_infos
|
from python_helpers import eval_pm, pn_infos
|
||||||
|
|
||||||
###### Load data and create event logs ######
|
#--------------- (1) Load data and create event logs ---------------
|
||||||
|
|
||||||
dat = pd.read_csv("results/haum/event_logfiles_pre-corona_with-clusters.csv", sep = ";")
|
dat = pd.read_csv("results/haum/eventlogs_pre-corona_item-clusters.csv", sep = ";")
|
||||||
|
|
||||||
log_path = pm4py.format_dataframe(dat, case_id = "path", activity_key = "event",
|
log_path = pm4py.format_dataframe(dat, case_id = "path", activity_key = "event",
|
||||||
timestamp_key = "date.start")
|
timestamp_key = "date.start")
|
||||||
|
|
||||||
###### Infos for clusters ######
|
#--------------- (2) Infos for clusters ---------------
|
||||||
|
|
||||||
# Merge clusters into data frame
|
# Merge clusters into data frame
|
||||||
eval = pd.DataFrame(columns = ["fitness", "precision", "generalizability",
|
eval = pd.DataFrame(columns = ["fitness", "precision", "generalizability",
|
||||||
@ -22,12 +33,13 @@ eval = eval.sort_index()
|
|||||||
|
|
||||||
eval.to_csv("results/haum/pn_infos_clusters.csv", sep = ";")
|
eval.to_csv("results/haum/pn_infos_clusters.csv", sep = ";")
|
||||||
|
|
||||||
###### Process maps for clusters ######
|
#--------------- (3) Process maps for clusters ---------------
|
||||||
|
|
||||||
for cluster in log_path.grp.unique().tolist():
|
for cluster in log_path.grp.unique().tolist():
|
||||||
subdata = log_path[log_path.grp == cluster]
|
subdata = log_path[log_path.grp == cluster]
|
||||||
subnet, subim, subfm = pm4py.discover_petri_net_inductive(subdata)
|
subnet, subim, subfm = pm4py.discover_petri_net_inductive(subdata, noise_threshold=0.5)
|
||||||
pm4py.save_vis_petri_net(subnet, subim, subfm,
|
pm4py.save_vis_petri_net(subnet, subim, subfm,
|
||||||
"results/processmaps/petrinet_cluster" + str(cluster).zfill(3) + ".png")
|
"results/processmaps/petrinet_cluster" + str(cluster).zfill(3) + ".png")
|
||||||
bpmn = pm4py.convert.convert_to_bpmn(subnet, subim, subfm)
|
bpmn = pm4py.convert.convert_to_bpmn(subnet, subim, subfm)
|
||||||
pm4py.vis.save_vis_bpmn(bpmn, "results/processmaps/bpmn_cluster_" + str(cluster).zfill(3) + ".png")
|
pm4py.vis.save_vis_bpmn(bpmn, "results/processmaps/bpmn_cluster_" +
|
||||||
|
str(cluster).zfill(3) + ".png")
|
||||||
|
Loading…
Reference in New Issue
Block a user