Updated DFGs for case clusters; exported data for tables
This commit is contained in:
parent
48a4bcbcdb
commit
42cb777d36
@ -23,7 +23,7 @@ dat <- res
|
|||||||
dat$start <- as.POSIXct(dat$date.start)
|
dat$start <- as.POSIXct(dat$date.start)
|
||||||
dat$complete <- as.POSIXct(dat$date.stop)
|
dat$complete <- as.POSIXct(dat$date.stop)
|
||||||
|
|
||||||
alog <- bupaR::activitylog(dat[dat$cluster == cluster, ],
|
alog <- bupaR::activitylog(dat[dat$cluster == 4, ],
|
||||||
case_id = "case",
|
case_id = "case",
|
||||||
activity_id = "item",
|
activity_id = "item",
|
||||||
resource_id = "path",
|
resource_id = "path",
|
||||||
@ -36,7 +36,7 @@ tab <- table(tr$absolute_frequency)
|
|||||||
|
|
||||||
tab[1] / nrow(tr)
|
tab[1] / nrow(tr)
|
||||||
|
|
||||||
alog |> edeaR::filter_infrequent_flows(min_n = 20) |> processmapR::process_map()
|
alog |> edeaR::filter_infrequent_flows(min_n = 5) |> processmapR::process_map()
|
||||||
|
|
||||||
#--------------- (2) Export DFGs for clusters ---------------
|
#--------------- (2) Export DFGs for clusters ---------------
|
||||||
|
|
||||||
@ -44,6 +44,7 @@ mycols <- c("#3CB4DC", "#FF6900", "#78004B", "#91C86E", "#434F4F")
|
|||||||
cl_names <- c("Scanning", "Exploring", "Flitting", "Searching", "Info")
|
cl_names <- c("Scanning", "Exploring", "Flitting", "Searching", "Info")
|
||||||
|
|
||||||
ns <- c(30, 20, 10, 5, 30)
|
ns <- c(30, 20, 10, 5, 30)
|
||||||
|
#ns <- c(20, 20, 20, 5, 20)
|
||||||
|
|
||||||
for (i in 1:5) {
|
for (i in 1:5) {
|
||||||
|
|
||||||
@ -52,7 +53,7 @@ for (i in 1:5) {
|
|||||||
activity_id = "item",
|
activity_id = "item",
|
||||||
resource_id = "path",
|
resource_id = "path",
|
||||||
timestamps = c("start", "complete"))
|
timestamps = c("start", "complete"))
|
||||||
|
|
||||||
dfg <- processmapR::process_map(edeaR::filter_infrequent_flows(alog, min_n = ns[i]),
|
dfg <- processmapR::process_map(edeaR::filter_infrequent_flows(alog, min_n = ns[i]),
|
||||||
type_nodes = processmapR::frequency("relative", color_scale = "Greys"),
|
type_nodes = processmapR::frequency("relative", color_scale = "Greys"),
|
||||||
sec_nodes = processmapR::frequency("absolute"),
|
sec_nodes = processmapR::frequency("absolute"),
|
||||||
@ -67,27 +68,30 @@ for (i in 1:5) {
|
|||||||
title = cl_names[i])
|
title = cl_names[i])
|
||||||
}
|
}
|
||||||
|
|
||||||
# cluster 1: 50
|
# What data is used and how many traces are unique
|
||||||
# cluster 2: 30 o. 20
|
|
||||||
# cluster 3: 20 - 30
|
|
||||||
# cluster 4: 5
|
|
||||||
# cluster 5: 20
|
|
||||||
|
|
||||||
get_percent_variants <- function(log, cluster, min_n) {
|
perc_filter <- numeric(5)
|
||||||
|
perc_unqiue <- numeric(5)
|
||||||
|
|
||||||
alog <- bupaR::activitylog(log[log$cluster == cluster, ],
|
for (i in 1:5) {
|
||||||
|
|
||||||
|
alog <- bupaR::activitylog(dat[dat$cluster == i, ],
|
||||||
case_id = "case",
|
case_id = "case",
|
||||||
activity_id = "item",
|
activity_id = "item",
|
||||||
resource_id = "path",
|
resource_id = "path",
|
||||||
timestamps = c("start", "complete"))
|
timestamps = c("start", "complete"))
|
||||||
|
|
||||||
nrow(edeaR::filter_infrequent_flows(alog, min_n = min_n)) /
|
perc_filter[i] <- nrow(edeaR::filter_infrequent_flows(alog, min_n = ns[i])) /
|
||||||
nrow(alog)
|
nrow(alog)
|
||||||
|
|
||||||
|
cs <- bupaR::cases(alog)
|
||||||
|
infreq_tr <- names(which(table(cs$trace) == 1))
|
||||||
|
infreq_cs <- cs$case[cs$trace %in% infreq_tr]
|
||||||
|
|
||||||
|
perc_unqiue[i] <- nrow(alog[alog$case %in% infreq_cs, ]) / nrow(alog)
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
perc <- numeric(5)
|
save(ns, perc_filter, perc_unqiue,
|
||||||
|
file = "../../thesis/figures/data/dfgs_case-cluster.RData")
|
||||||
for (i in 1:5) {
|
|
||||||
perc[i] <- get_percent_variants(log = dat, cluster = i, min_n = ns[i])
|
|
||||||
}
|
|
||||||
|
|
||||||
|
Loading…
Reference in New Issue
Block a user