mtt_haum/code/04_modeling_haum.R

160 lines
5.6 KiB
R

# setwd("C:/Users/nwickelmaier/Nextcloud/Documents/MDS/2023ss/60100_master_thesis/code")
# Read data
# dat0 <- read.table("../data/haum/event_logfiles_metadata_2023-09-23_01-31-30.csv",
# sep = ";", header = TRUE)
dat0 <- read.table("../data/haum/event_logfiles_small_metadata_2023-10-15_10-08-43.csv",
sep = ";", header = TRUE)
dat0$date <- as.Date(dat0$date)
dat0$date.start <- as.POSIXct(dat0$date.start)
dat0$date.stop <- as.POSIXct(dat0$date.stop)
dat0$artwork <- sprintf("%03d", dat0$artwork)
# TODO: Write a function that closes events spanning different log files
# OR: Remove openTopic and OpenPopup events that do not start with a
# flipCard (AND openPopup events without openTopic event beforehand)
table(dat0[!duplicated(dat0$trace), "event"])
# flipCard move openPopup openTopic
# 45528 247718 981 3457
proportions(table(dat0[!duplicated(dat0$trace), "event"]))
tmp <- dat0[!duplicated(dat0$trace) & dat0$event %in% c("openTopic",
"openPopup"), ]
dat <- dat0
i <- 1
stop <- 1
while (stop > 0) {
stop <- sum(!duplicated(dat$trace) & dat$event %in% c("openTopic", "openPopup"))
dat <- dat[!(!duplicated(dat$trace) &
dat$event %in% c("openTopic", "openPopup")), ]
print(i)
i <- i + 1
print(table(dat[!duplicated(dat$trace), "event"]))
}
# Creating event logs
library(bupaverse)
names(dat)[names(dat) %in% c("date.start", "date.stop")] <- c("start",
"complete")
table(table(dat$start))
# --> hmm...
summary(aggregate(duration ~ trace, dat, mean))
# remove fragmented traces
tab <- xtabs( ~ trace + event, dat)
fragments <- NULL
for (i in seq_len(nrow(tab))) {
if (tab[i, "openPopup"] != 0 & tab[i, "flipCard"] == 0) {
fragments <- c(fragments, rownames(tab)[i])
} else if (tab[i, "openTopic"] != 0 & tab[i, "flipCard"] == 0) {
fragments <- c(fragments, rownames(tab)[i])
} else if (tab[i, "openPopup"] != 0 & tab[i, "openTopic"] == 0) {
fragments <- c(fragments, rownames(tab)[i])
}
}
datrm <- dat[!dat$trace %in% fragments, ]
# TODO: Find trace that has flipCard --> openPopup --> openTopic
dato <- datrm[datrm$event != "move", ]
tmp <- lapply(unique(dato$trace), function(x) unique(dato[dato$trace == x, "event"]))
names(tmp) <- unique(dato$trace)
ids <- sapply(tmp, length) == 3
do.call(rbind, tmp[ids])
# TODO:
# fragmentary traces: for 4591 openTopic for topic 1 is in the raw log files, but gets
# probably removed in close_events(); how can I prevent that? How can I fix
# the traces and eventIds that do not match correctly ???
ct <- c(4591, 5937, 7080, 8412, 8279)
datrm[datrm$trace %in% ct, 1:10]
### WHY?????
alog <- activitylog(datrm,
case_id = "trace",
activity_id = "event",
#resource_id = "case",
resource_id = "artwork",
timestamps = c("start", "complete"))
process_map(alog)
process_map(alog, frequency("relative"))
process_map(alog, frequency("relative_consequent"))
library(processanimateR)
animate_process(to_eventlog(alog))
col_vector <- c("#7FC97F", "#BEAED4", "#FDC086", "#FFFF99", "#386CB0",
"#F0027F", "#BF5B17", "#666666", "#1B9E77", "#D95F02",
"#7570B3", "#E7298A", "#66A61E", "#E6AB02", "#A6761D",
"#666666", "#A6CEE3", "#1F78B4", "#B2DF8A", "#33A02C",
"#FB9A99", "#E31A1C", "#FDBF6F", "#FF7F00", "#CAB2D6",
"#6A3D9A", "#FFFF99", "#B15928", "#FBB4AE", "#B3CDE3",
"#CCEBC5", "#DECBE4", "#FED9A6", "#FFFFCC", "#E5D8BD",
"#FDDAEC", "#F2F2F2", "#B3E2CD", "#FDCDAC", "#CBD5E8",
"#F4CAE4", "#E6F5C9", "#FFF2AE", "#F1E2CC", "#CCCCCC",
"#E41A1C", "#377EB8", "#4DAF4A", "#984EA3", "#FF7F00",
"#FFFF33", "#A65628", "#F781BF", "#999999", "#66C2A5",
"#FC8D62", "#8DA0CB", "#E78AC3", "#A6D854", "#FFD92F",
"#E5C494", "#B3B3B3", "#8DD3C7", "#FFFFB3", "#BEBADA",
"#FB8072", "#80B1D3", "#FDB462", "#B3DE69", "#FCCDE5",
"#D9D9D9")
animate_process(to_eventlog(alog), mode = "relative", jitter = 10, legend = "color",
mapping = token_aes(color = token_scale("artwork",
scale = "ordinal",
range = col_vector)))
elog <- to_eventlog(alog)
animate_process(elog[elog$artwork == "054", ])
animate_process(elog[elog$artwork == "080", ])
animate_process(elog[elog$artwork == "501", ])
process_map(alog[alog$artwork == "054", ])
animate_process(elog[elog$artwork %in% c("080", "054"), ],
mode = "relative", jitter = 10, legend = "color",
mapping = token_aes(color = token_scale("artwork",
scale = "ordinal",
range = c("black", "gray"))))
# --> not sure, yet, how to interpret this...
alog080 <- activitylog(dat[dat$artwork %in% "080", ],
#case_id = "case",
case_id = "trace",
activity_id = "event",
#resource_id = "trace",
resource_id = "case",
timestamps = c("start", "complete"))
process_map(alog080, frequency("relative"))
alog054 <- activitylog(dat[dat$artwork %in% "054", ],
#case_id = "case",
case_id = "trace",
activity_id = "event",
#resource_id = "trace",
resource_id = "case",
timestamps = c("start", "complete"))
process_map(alog054, frequency("relative"))