Working on finalizing the clustering
This commit is contained in:
parent
5cc2135c4a
commit
b29790dfc1
@ -103,11 +103,6 @@ fp_visualizer.view(gviz)
|
|||||||
|
|
||||||
efg_graph = pm4py.discover_eventually_follows_graph(event_log)
|
efg_graph = pm4py.discover_eventually_follows_graph(event_log)
|
||||||
|
|
||||||
## Directly-follows graph
|
|
||||||
dfg, start_activities, end_activities = pm4py.discover_dfg(event_log)
|
|
||||||
pm4py.view_dfg(dfg, start_activities, end_activities)
|
|
||||||
pm4py.save_vis_dfg(dfg, start_activities, end_activities, "results/processmaps/dfg_complete_python.png")
|
|
||||||
|
|
||||||
## Fitting different miners
|
## Fitting different miners
|
||||||
|
|
||||||
eval = pd.DataFrame(columns = ["fitness", "precision", "generalizability",
|
eval = pd.DataFrame(columns = ["fitness", "precision", "generalizability",
|
||||||
@ -131,6 +126,11 @@ for miner in ["conformative", "alpha", "heuristics", "inductive", "ilp"]:
|
|||||||
|
|
||||||
eval_clean.to_csv("results/eval_all-miners_clean.csv", sep = ";")
|
eval_clean.to_csv("results/eval_all-miners_clean.csv", sep = ";")
|
||||||
|
|
||||||
|
## Directly-follows graph
|
||||||
|
dfg, start_activities, end_activities = pm4py.discover_dfg(event_log_clean)
|
||||||
|
pm4py.view_dfg(dfg, start_activities, end_activities)
|
||||||
|
pm4py.save_vis_dfg(dfg, start_activities, end_activities, "results/processmaps/dfg_complete_python.png")
|
||||||
|
|
||||||
## Export petri nets
|
## Export petri nets
|
||||||
pm4py.vis.save_vis_petri_net(basenet, initial_marking, final_marking, "results/processmaps/petrinet_conformative.png")
|
pm4py.vis.save_vis_petri_net(basenet, initial_marking, final_marking, "results/processmaps/petrinet_conformative.png")
|
||||||
h_net, h_im, h_fm = pm4py.discover_petri_net_heuristics(event_log_clean)
|
h_net, h_im, h_fm = pm4py.discover_petri_net_heuristics(event_log_clean)
|
||||||
|
@ -2,7 +2,8 @@
|
|||||||
#
|
#
|
||||||
# content: (1) Look at broken trace
|
# content: (1) Look at broken trace
|
||||||
# (2) Function to find broken traces
|
# (2) Function to find broken traces
|
||||||
# (3) Export data frame for analyses
|
# (3) DFG for complete data
|
||||||
|
# (4) Export data frame for analyses
|
||||||
#
|
#
|
||||||
# input: results/haum/event_logfiles_2024-02-21_16-07-33.csv
|
# input: results/haum/event_logfiles_2024-02-21_16-07-33.csv
|
||||||
# results/haum/raw_logfiles_2024-02-21_16-07-33.csv
|
# results/haum/raw_logfiles_2024-02-21_16-07-33.csv
|
||||||
@ -62,7 +63,33 @@ check <- check_traces(tmp)
|
|||||||
|
|
||||||
check[check$check, ]
|
check[check$check, ]
|
||||||
|
|
||||||
#--------------- (3) Export data frame for analyses ---------------
|
#--------------- (3) DFG for complete data ---------------
|
||||||
|
|
||||||
|
tmp <- datlogs[datlogs$path != 106098, ]
|
||||||
|
tmp$start <- tmp$date.start
|
||||||
|
tmp$complete <- tmp$date.stop
|
||||||
|
|
||||||
|
alog <- bupaR::activitylog(tmp,
|
||||||
|
case_id = "path",
|
||||||
|
activity_id = "event",
|
||||||
|
resource_id = "item",
|
||||||
|
timestamps = c("start", "complete"))
|
||||||
|
|
||||||
|
dfg <- processmapR::process_map(alog,
|
||||||
|
type_nodes = processmapR::frequency("relative", color_scale = "Greys"),
|
||||||
|
sec_nodes = processmapR::frequency("absolute"),
|
||||||
|
type_edges = processmapR::frequency("relative", color_edges = "#FF6900"),
|
||||||
|
sec_edges = processmapR::frequency("absolute"),
|
||||||
|
rankdir = "LR",
|
||||||
|
render = FALSE)
|
||||||
|
|
||||||
|
processmapR::export_map(dfg,
|
||||||
|
file_name = paste0("results/processmaps/dfg_complete_R.pdf"),
|
||||||
|
file_type = "pdf")
|
||||||
|
|
||||||
|
rm(tmp)
|
||||||
|
|
||||||
|
#--------------- (4) Export data frame for analyses ---------------
|
||||||
|
|
||||||
datlogs$event <- factor(datlogs$event, levels = c("move", "flipCard",
|
datlogs$event <- factor(datlogs$event, levels = c("move", "flipCard",
|
||||||
"openTopic",
|
"openTopic",
|
||||||
|
@ -10,6 +10,8 @@
|
|||||||
# input: results/haum/eventlogs_pre-corona_cleaned.RData
|
# input: results/haum/eventlogs_pre-corona_cleaned.RData
|
||||||
# results/haum/pn_infos_items.csv
|
# results/haum/pn_infos_items.csv
|
||||||
# output: results/haum/eventlogs_pre-corona_item-clusters.csv
|
# output: results/haum/eventlogs_pre-corona_item-clusters.csv
|
||||||
|
# results/figures/dendrogram_items.pdf
|
||||||
|
# results/figures/clustering_items.pdf
|
||||||
# results/figures/clustering_artworks.pdf
|
# results/figures/clustering_artworks.pdf
|
||||||
# results/figures/clustering_artworks.png
|
# results/figures/clustering_artworks.png
|
||||||
#
|
#
|
||||||
@ -85,7 +87,7 @@ factoextra::fviz_nbclust(df, FUNcluster = factoextra::hcut, method = "silhouette
|
|||||||
|
|
||||||
gap_stat <- cluster::clusGap(df, FUNcluster = factoextra::hcut,
|
gap_stat <- cluster::clusGap(df, FUNcluster = factoextra::hcut,
|
||||||
hc_func = "agnes", hc_method = "ward",
|
hc_func = "agnes", hc_method = "ward",
|
||||||
K.max = 10)
|
K.max = 15)
|
||||||
factoextra::fviz_gap_stat(gap_stat)
|
factoextra::fviz_gap_stat(gap_stat)
|
||||||
|
|
||||||
k <- 6 # number of clusters
|
k <- 6 # number of clusters
|
||||||
@ -94,23 +96,36 @@ mycols <- c("#434F4F", "#78004B", "#FF6900", "#3CB4DC", "#91C86E", "Black")
|
|||||||
|
|
||||||
cluster <- cutree(hc, k = k)
|
cluster <- cutree(hc, k = k)
|
||||||
|
|
||||||
|
pdf("results/figures/dendrogram_items.pdf", width = 6.5, height = 5.5, pointsize = 10)
|
||||||
|
|
||||||
factoextra::fviz_dend(hc, k = k,
|
factoextra::fviz_dend(hc, k = k,
|
||||||
cex = 0.5,
|
cex = 0.5,
|
||||||
k_colors = mycols,
|
k_colors = mycols,
|
||||||
#type = "phylogenic",
|
#type = "phylogenic",
|
||||||
rect = TRUE
|
rect = TRUE,
|
||||||
|
main = "",
|
||||||
|
ylab = ""
|
||||||
|
#ggtheme = ggplot2::theme_bw()
|
||||||
)
|
)
|
||||||
|
|
||||||
|
dev.off()
|
||||||
|
|
||||||
|
pdf("results/figures/clustering_items.pdf", width = 6.5, height = 5.5, pointsize = 10)
|
||||||
|
|
||||||
factoextra::fviz_cluster(list(data = df, cluster = cluster),
|
factoextra::fviz_cluster(list(data = df, cluster = cluster),
|
||||||
palette = mycols,
|
palette = mycols,
|
||||||
ellipse.type = "convex",
|
ellipse.type = "convex",
|
||||||
repel = TRUE,
|
repel = TRUE,
|
||||||
show.clust.cent = FALSE,
|
show.clust.cent = FALSE,
|
||||||
|
main = "",
|
||||||
ggtheme = ggplot2::theme_bw())
|
ggtheme = ggplot2::theme_bw())
|
||||||
|
|
||||||
aggregate(cbind(duration, distance, scaleSize , rotationDegree, npaths,
|
dev.off()
|
||||||
ncases, nmoves, nflipCard, nopenTopic, nopenPopup) ~ cluster,
|
|
||||||
datitem, mean)
|
aggregate(cbind(precision, generalizability, nvariants, duration, distance,
|
||||||
|
scaleSize , rotationDegree, npaths, ncases, nmoves,
|
||||||
|
nflipCard, nopenTopic, nopenPopup) ~ cluster, datitem,
|
||||||
|
mean)
|
||||||
|
|
||||||
aggregate(cbind(duration, distance, scaleSize , rotationDegree, npaths,
|
aggregate(cbind(duration, distance, scaleSize , rotationDegree, npaths,
|
||||||
ncases, nmoves, nflipCard, nopenTopic, nopenPopup) ~ cluster,
|
ncases, nmoves, nflipCard, nopenTopic, nopenPopup) ~ cluster,
|
||||||
|
@ -171,7 +171,7 @@ dattree$AvDurItemNorm <- normalize(dattree$AvDurItem)
|
|||||||
|
|
||||||
#--------------- (4) Export data frames ---------------
|
#--------------- (4) Export data frames ---------------
|
||||||
|
|
||||||
save(datcase, dattree, file = "results/haum/dataframes_case_2019.RData")
|
save(dat, datcase, dattree, file = "results/haum/dataframes_case_2019.RData")
|
||||||
|
|
||||||
write.table(datcase,
|
write.table(datcase,
|
||||||
file = "results/haum/datcase.csv",
|
file = "results/haum/datcase.csv",
|
||||||
|
@ -27,25 +27,24 @@ summary(df)
|
|||||||
dist_mat <- cluster::daisy(df, metric = "gower")
|
dist_mat <- cluster::daisy(df, metric = "gower")
|
||||||
|
|
||||||
# "Flatten" with MDS
|
# "Flatten" with MDS
|
||||||
# coor_2d <- as.data.frame(cmdscale(dist_mat, k = 2))
|
|
||||||
# coor_3d <- as.data.frame(cmdscale(dist_mat, k = 3))
|
# coor_3d <- as.data.frame(cmdscale(dist_mat, k = 3))
|
||||||
|
|
||||||
# coor_2d <- prcomp(df)$x[, 1:2]
|
|
||||||
# coor_3d <- prcomp(df)$x[, 1:3]
|
# coor_3d <- prcomp(df)$x[, 1:3]
|
||||||
|
|
||||||
coor_2d <- smacof::mds(dist_mat, ndim = 2, type = "ordinal")$conf
|
coor_3d <- smacof::mds(dist_mat, ndim = 3, type = "ordinal")$conf
|
||||||
coor_3d <- smacof::mds(dist_mat, ndim = 2, type = "ordinal")$conf
|
coor_2d <- coor_3d[, 1:2]
|
||||||
|
|
||||||
plot(coor_2d)
|
plot(coor_2d)
|
||||||
rgl::plot3d(coor_3d)
|
rgl::plot3d(coor_3d)
|
||||||
|
|
||||||
# method <- c(average = "average", single = "single", complete = "complete",
|
method <- c(average = "average", single = "single", complete = "complete",
|
||||||
# ward = "ward")
|
ward = "ward")
|
||||||
# hcs <- pbapply::pblapply(method, function(x) cluster::agnes(dist_mat, method = x))
|
hcs <- pbapply::pblapply(method, function(x) cluster::agnes(dist_mat, method = x))
|
||||||
# acs <- pbapply::pbsapply(hcs, function(x) x$ac)
|
acs <- pbapply::pbsapply(hcs, function(x) x$ac)
|
||||||
# hc <- hcs$ward
|
# average single complete ward
|
||||||
|
# 0.9881224 0.9725661 0.9937669 0.9994267
|
||||||
|
hc <- hcs$ward
|
||||||
|
|
||||||
hc <- cluster::agnes(dist_mat, method = "ward")
|
#hc <- cluster::agnes(dist_mat, method = "ward")
|
||||||
|
|
||||||
k <- 5
|
k <- 5
|
||||||
|
|
||||||
@ -59,17 +58,7 @@ plot(coor_2d, col = mycols[cluster])
|
|||||||
legend("topleft", paste("Cl", 1:4), col = mycols, pch = 21)
|
legend("topleft", paste("Cl", 1:4), col = mycols, pch = 21)
|
||||||
rgl::plot3d(coor_3d, col = mycols[cluster])
|
rgl::plot3d(coor_3d, col = mycols[cluster])
|
||||||
|
|
||||||
table(dattree[cluster == 1, "Pattern"])
|
ftable(xtabs( ~ InfocardOnly + Pattern + cluster, dattree))
|
||||||
table(dattree[cluster == 2, "Pattern"])
|
|
||||||
table(dattree[cluster == 3, "Pattern"])
|
|
||||||
table(dattree[cluster == 4, "Pattern"])
|
|
||||||
table(dattree[cluster == 5, "Pattern"])
|
|
||||||
|
|
||||||
table(dattree[cluster == 1, "InfocardOnly"])
|
|
||||||
table(dattree[cluster == 2, "InfocardOnly"])
|
|
||||||
table(dattree[cluster == 3, "InfocardOnly"])
|
|
||||||
table(dattree[cluster == 4, "InfocardOnly"])
|
|
||||||
table(dattree[cluster == 5, "InfocardOnly"])
|
|
||||||
|
|
||||||
aggregate(. ~ cluster, df, mean)
|
aggregate(. ~ cluster, df, mean)
|
||||||
|
|
||||||
@ -78,9 +67,6 @@ aggregate(cbind(duration, distance, scaleSize, rotationDegree, length, nitems,
|
|||||||
mean)
|
mean)
|
||||||
|
|
||||||
### Look at selected cases ###########################################
|
### Look at selected cases ###########################################
|
||||||
|
|
||||||
load("")
|
|
||||||
|
|
||||||
tmp <- dat
|
tmp <- dat
|
||||||
tmp$start <- tmp$date.start
|
tmp$start <- tmp$date.start
|
||||||
tmp$complete <- tmp$date.stop
|
tmp$complete <- tmp$date.stop
|
||||||
@ -133,7 +119,9 @@ c1 <- rpart::rpart(as.factor(cluster) ~ ., data = dattree[, c("PropMoves",
|
|||||||
"InfocardOnly")],
|
"InfocardOnly")],
|
||||||
method = "class")
|
method = "class")
|
||||||
|
|
||||||
|
pdf("results/figures/tree_items_rpart.pdf", height = 5, width = 15, pointsize = 10)
|
||||||
plot(partykit::as.party(c1))
|
plot(partykit::as.party(c1))
|
||||||
|
dev.off()
|
||||||
|
|
||||||
# with conditional tree
|
# with conditional tree
|
||||||
c2 <- partykit::ctree(as.factor(cluster) ~ ., data = dattree[, c("PropMoves",
|
c2 <- partykit::ctree(as.factor(cluster) ~ ., data = dattree[, c("PropMoves",
|
||||||
@ -143,5 +131,8 @@ c2 <- partykit::ctree(as.factor(cluster) ~ ., data = dattree[, c("PropMoves",
|
|||||||
"Pattern",
|
"Pattern",
|
||||||
"InfocardOnly")],
|
"InfocardOnly")],
|
||||||
alpha = 0.001)
|
alpha = 0.001)
|
||||||
plot(c2)
|
|
||||||
|
pdf("results/figures/tree_items_ctree.pdf", height = 7, width = 20, pointsize = 10)
|
||||||
|
plot(c2)
|
||||||
|
dev.off()
|
||||||
|
|
||||||
|
Loading…
Reference in New Issue
Block a user