# 05_check-traces.R # # content: (1) Look at broken trace # (2) Function to find broken traces # (3) DFG for complete data # (4) Export data frame for analyses # # input: results/event_logfiles_2024-02-21_16-07-33.csv # results/raw_logfiles_2024-02-21_16-07-33.csv # output: results/eventlogs_pre-corona_cleaned.RData # results/eventlogs_pre-corona_cleaned.csv # ../../thesis/figures/dfg_complete_WFnet_R.pdf # # last mod: 2024-03-23 # setwd("C:/Users/nwickelmaier/Nextcloud/Documents/MDS/2023ss/60100_master_thesis/analysis/code") #--------------- (1) Look at broken trace --------------- datraw <- read.table("results/raw_logfiles_2024-02-21_16-07-33.csv", header = TRUE, sep = ";") datlogs <- read.table("results/event_logfiles_2024-02-21_16-07-33.csv", colClasses = c("character", "character", "POSIXct", "POSIXct", "character", "integer", "numeric", "character", "character", rep("numeric", 3), "character", "character", rep("numeric", 11), "character", "character"), sep = ";", header = TRUE) artwork <- "176" fileId <- c('2017_06_16-13_49_00.log', '2017_06_16-13_59_00.log') path <- 106098 datraw[datraw$item == artwork & datraw$fileId %in% fileId, ] datlogs[datlogs$path == path, ] #--------------- (2) Function to find broken traces --------------- tmp <- datlogs[datlogs$event != "move", ] check_traces <- function(data) { datagg <- aggregate(event ~ path, data, function(x) ifelse("openPopup" %in% x, T, F)) paths <- datagg$path[datagg$event] datcheck <- data[data$path %in% paths, c("path", "event")] datcheck <- datcheck[!duplicated(datcheck), ] datcheck <- datcheck[order(datcheck$path), ] retval <- NULL for (path in unique(datcheck$path)) { check <- !all(as.character(datcheck$event[datcheck$path == path]) == c("flipCard", "openTopic", "openPopup")) retval <- rbind(retval, data.frame(path, check)) } retval } check <- check_traces(tmp) check[check$check, ] #--------------- (3) DFG for complete data --------------- tmp <- datlogs[datlogs$path != 106098, ] tmp$start <- tmp$date.start tmp$complete <- tmp$date.stop alog <- bupaR::activitylog(tmp, case_id = "path", activity_id = "event", resource_id = "item", timestamps = c("start", "complete")) dfg <- processmapR::process_map(alog, type_nodes = processmapR::frequency("relative", color_scale = "Greys"), sec_nodes = processmapR::frequency("absolute"), type_edges = processmapR::frequency("relative", color_edges = "#FF6900"), sec_edges = processmapR::frequency("absolute"), rankdir = "LR", render = FALSE) processmapR::export_map(dfg, file_name = paste0("../../thesis/figures/dfg_complete_WFnet_R.pdf"), file_type = "pdf") rm(tmp) #--------------- (4) Export data frame for analyses --------------- datlogs$event <- factor(datlogs$event, levels = c("move", "flipCard", "openTopic", "openPopup")) datlogs$topic <- factor(datlogs$topic) datlogs$weekdays <- factor(weekdays(datlogs$date.start), levels = c("Montag", "Dienstag", "Mittwoch", "Donnerstag", "Freitag", "Samstag", "Sonntag"), labels = c("Monday", "Tuesday", "Wednesday", "Thursday", "Friday", "Saturday", "Sunday")) # Select data pre Corona dat <- datlogs[as.Date(datlogs$date.start) < "2020-03-13", ] # Remove corrupt trace dat <- dat[dat$path != 106098, ] save(dat, file = "results/eventlogs_pre-corona_cleaned.RData") write.table(dat, file = "results/eventlogs_pre-corona_cleaned.csv", sep = ";", quote = FALSE, row.names = FALSE)