# setwd("C:/Users/nwickelmaier/Nextcloud/Documents/MDS/2023ss/60100_master_thesis/code") # Read data # dat0 <- read.table("../data/haum/event_logfiles_metadata_2023-09-23_01-31-30.csv", # sep = ";", header = TRUE) dat0 <- read.table("../data/haum/event_logfiles_small_metadata_2023-10-15_10-08-43.csv", sep = ";", header = TRUE) dat0$date <- as.Date(dat0$date) dat0$date.start <- as.POSIXct(dat0$date.start) dat0$date.stop <- as.POSIXct(dat0$date.stop) dat0$artwork <- sprintf("%03d", dat0$artwork) # TODO: Write a function that closes events spanning different log files # OR: Remove openTopic and OpenPopup events that do not start with a # flipCard (AND openPopup events without openTopic event beforehand) table(dat0[!duplicated(dat0$trace), "event"]) # flipCard move openPopup openTopic # 45528 247718 981 3457 proportions(table(dat0[!duplicated(dat0$trace), "event"])) tmp <- dat0[!duplicated(dat0$trace) & dat0$event %in% c("openTopic", "openPopup"), ] dat <- dat0 i <- 1 stop <- 1 while (stop > 0) { stop <- sum(!duplicated(dat$trace) & dat$event %in% c("openTopic", "openPopup")) dat <- dat[!(!duplicated(dat$trace) & dat$event %in% c("openTopic", "openPopup")), ] print(i) i <- i + 1 print(table(dat[!duplicated(dat$trace), "event"])) } # Creating event logs library(bupaverse) names(dat)[names(dat) %in% c("date.start", "date.stop")] <- c("start", "complete") table(table(dat$start)) # --> hmm... summary(aggregate(duration ~ trace, dat, mean)) # remove fragmented traces tab <- xtabs( ~ trace + event, dat) fragments <- NULL for (i in seq_len(nrow(tab))) { if (tab[i, "openPopup"] != 0 & tab[i, "flipCard"] == 0) { fragments <- c(fragments, rownames(tab)[i]) } else if (tab[i, "openTopic"] != 0 & tab[i, "flipCard"] == 0) { fragments <- c(fragments, rownames(tab)[i]) } else if (tab[i, "openPopup"] != 0 & tab[i, "openTopic"] == 0) { fragments <- c(fragments, rownames(tab)[i]) } } datrm <- dat[!dat$trace %in% fragments, ] # TODO: Find trace that has flipCard --> openPopup --> openTopic dato <- datrm[datrm$event != "move", ] tmp <- lapply(unique(dato$trace), function(x) unique(dato[dato$trace == x, "event"])) names(tmp) <- unique(dato$trace) ids <- sapply(tmp, length) == 3 do.call(rbind, tmp[ids]) # TODO: # fragmentary traces: for 4591 openTopic for topic 1 is in the raw log files, but gets # probably removed in close_events(); how can I prevent that? How can I fix # the traces and eventIds that do not match correctly ??? ct <- c(4591, 5937, 7080, 8412, 8279) datrm[datrm$trace %in% ct, 1:10] ### WHY????? alog <- activitylog(datrm, case_id = "trace", activity_id = "event", #resource_id = "case", resource_id = "artwork", timestamps = c("start", "complete")) process_map(alog) process_map(alog, frequency("relative")) process_map(alog, frequency("relative_consequent")) library(processanimateR) animate_process(to_eventlog(alog)) col_vector <- c("#7FC97F", "#BEAED4", "#FDC086", "#FFFF99", "#386CB0", "#F0027F", "#BF5B17", "#666666", "#1B9E77", "#D95F02", "#7570B3", "#E7298A", "#66A61E", "#E6AB02", "#A6761D", "#666666", "#A6CEE3", "#1F78B4", "#B2DF8A", "#33A02C", "#FB9A99", "#E31A1C", "#FDBF6F", "#FF7F00", "#CAB2D6", "#6A3D9A", "#FFFF99", "#B15928", "#FBB4AE", "#B3CDE3", "#CCEBC5", "#DECBE4", "#FED9A6", "#FFFFCC", "#E5D8BD", "#FDDAEC", "#F2F2F2", "#B3E2CD", "#FDCDAC", "#CBD5E8", "#F4CAE4", "#E6F5C9", "#FFF2AE", "#F1E2CC", "#CCCCCC", "#E41A1C", "#377EB8", "#4DAF4A", "#984EA3", "#FF7F00", "#FFFF33", "#A65628", "#F781BF", "#999999", "#66C2A5", "#FC8D62", "#8DA0CB", "#E78AC3", "#A6D854", "#FFD92F", "#E5C494", "#B3B3B3", "#8DD3C7", "#FFFFB3", "#BEBADA", "#FB8072", "#80B1D3", "#FDB462", "#B3DE69", "#FCCDE5", "#D9D9D9") animate_process(to_eventlog(alog), mode = "relative", jitter = 10, legend = "color", mapping = token_aes(color = token_scale("artwork", scale = "ordinal", range = col_vector))) elog <- to_eventlog(alog) animate_process(elog[elog$artwork == "054", ]) animate_process(elog[elog$artwork == "080", ]) animate_process(elog[elog$artwork == "501", ]) process_map(alog[alog$artwork == "054", ]) animate_process(elog[elog$artwork %in% c("080", "054"), ], mode = "relative", jitter = 10, legend = "color", mapping = token_aes(color = token_scale("artwork", scale = "ordinal", range = c("black", "gray")))) # --> not sure, yet, how to interpret this... alog080 <- activitylog(dat[dat$artwork %in% "080", ], #case_id = "case", case_id = "trace", activity_id = "event", #resource_id = "trace", resource_id = "case", timestamps = c("start", "complete")) process_map(alog080, frequency("relative")) alog054 <- activitylog(dat[dat$artwork %in% "054", ], #case_id = "case", case_id = "trace", activity_id = "event", #resource_id = "trace", resource_id = "case", timestamps = c("start", "complete")) process_map(alog054, frequency("relative"))