# setwd("C:/Users/nwickelmaier/Nextcloud/Documents/MDS/2023ss/60100_master_thesis/analysis/code") library(bupaverse) # Read data dat0 <- read.table("results/haum/event_logfiles_2024-01-18_09-58-52.csv", colClasses = c("character", "character", "POSIXct", "POSIXct", "character", "integer", "numeric", "character", "character", rep("numeric", 3), "character", "character", rep("numeric", 11), "character", "character"), sep = ";", header = TRUE) dat0$event <- factor(dat0$event, levels = c("move", "flipCard", "openTopic", "openPopup")) dat0$weekdays <- factor(weekdays(dat0$date.start), levels = c("Montag", "Dienstag", "Mittwoch", "Donnerstag", "Freitag", "Samstag", "Sonntag"), labels = c("Monday", "Tuesday", "Wednesday", "Thursday", "Friday", "Saturday", "Sunday")) # Select data pre Corona dat <- dat0[as.Date(dat0$date.start) < "2020-03-13", ] dat <- dat[dat$path != 106098, ] table(table(dat$start)) table(dat$event) proportions(table(dat$event)) dat_dur <- aggregate(duration ~ item, dat, mean) barplot(duration - mean(dat_dur$duration) ~ item, dat_dur, col = "#434F4F", las = 3) # Investigate paths (will separate items and give clusters of artworks!) length(unique(dat$path)) # DFGs per Cluster dat$start <- dat$date.start dat$complete <- dat$date.stop summary(aggregate(duration ~ path, dat, mean)) alog <- activitylog(dat, case_id = "path", activity_id = "event", resource_id = "item", timestamps = c("start", "complete")) process_map(alog, type_nodes = frequency("absolute"), sec_nodes = frequency("relative"), type_edges = frequency("absolute"), sec_edges = frequency("relative"), rankdir = "LR") ### Separate for items datitem <- aggregate(cbind(duration, distance, scaleSize, rotationDegree) ~ item, dat, function(x) mean(x, na.rm = TRUE), na.action = NULL) datitem$npaths <- aggregate(path ~ item, dat, function(x) length(unique(x)), na.action = NULL)$path datitem$ncases <- aggregate(case ~ item, dat, function(x) length(unique(x)), na.action = NULL)$case datitem$ntopics <- aggregate(topic ~ item, dat, function(x) ifelse(all(is.na(x)), NA, length(unique(na.omit(x)))), na.action = NULL)$topic set.seed(1211) nclusters <- 6 k1 <- kmeans(datitem[, -1], nclusters) #colors <- c("#3CB4DC", "#78004B", "#91C86E", "#FF6900") colors <- palette.colors(palette = "Okabe-Ito") xy <- cmdscale(dist(datitem[, -1])) plot(xy, type = "n") text(xy[,1], xy[,2], datitem$item, col = colors[k1$cluster]) legend("topright", paste("Cluster", 1:nclusters), col = colors, lty = 1) ## Scree plot ks <- 1:10 sse <- NULL for (k in ks) sse <- c(sse, kmeans(datitem[, -1], k)$tot.withinss) plot(sse ~ ks, type = "l") datitem$cluster <- k1$cluster datitem_agg <- aggregate(. ~ cluster, datitem[, -1], mean) dat_cl <- merge(dat, datitem[, c("item", "cluster")], by = "item", all.x = TRUE) dat_cl <- dat_cl[order(dat_cl$fileId.start, dat_cl$date.start, dat_cl$timeMs.start), ] write.table(dat_cl, "results/haum/event_logfiles_with-clusters_kmeans.csv", sep = ";", row.names = FALSE) vioplot::vioplot(datitem$duration) vioplot::vioplot(duration ~ item, dat, las = 3) vioplot::vioplot(duration ~ cluster, dat_cl) vioplot::vioplot(distance ~ cluster, dat_cl) vioplot::vioplot(scaleSize ~ cluster, dat_cl) vioplot::vioplot(rotationDegree ~ cluster, dat_cl) for (cluster in sort(unique(dat_cl$cluster))) { alog <- activitylog(dat_cl[dat_cl$cluster == cluster, ], case_id = "path", activity_id = "event", resource_id = "item", timestamps = c("start", "complete")) dfg <- process_map(alog, type_nodes = frequency("relative"), sec_nodes = frequency("absolute"), type_edges = frequency("relative"), sec_edges = frequency("absolute"), rankdir = "LR", render = FALSE) export_map(dfg, file_name = paste0("results/processmaps/dfg_cluster", cluster, "_R.pdf"), file_type = "pdf", title = paste("DFG Cluster", cluster)) } tmp <- dat[dat$event != "move", ] check_traces <- function(data) { datagg <- aggregate(event ~ path, data, function(x) ifelse("openPopup" %in% x, T, F)) paths <- datagg$path[datagg$event] datcheck <- data[data$path %in% paths, c("path", "event")] datcheck <- datcheck[!duplicated(datcheck), ] datcheck <- datcheck[order(datcheck$path), ] retval <- NULL for (path in unique(datcheck$path)) { check <- !all(as.character(datcheck$event[datcheck$path == path]) == c("flipCard", "openTopic", "openPopup")) retval <- rbind(retval, data.frame(path, check)) } retval } check <- check_traces(tmp) sum(check$check) alog <- activitylog(dat, case_id = "case", activity_id = "item", resource_id = "path", timestamps = c("start", "complete")) process_map(alog, type_nodes = frequency("absolute"), sec_nodes = frequency("relative"), type_edges = frequency("absolute"), sec_edges = frequency("relative"), rankdir = "LR") datcase <- dat[!duplicated(dat[, c("case", "path", "item")]), c("case", "path", "event", "item")] datcase$duration <- aggregate(duration ~ path, dat, function(x) mean(x, na.rm = TRUE), na.action = NULL)$duration datcase$distance <- aggregate(distance ~ path, dat, function(x) mean(x, na.rm = TRUE), na.action = NULL)$distance datcase$scaleSize <- aggregate(scaleSize ~ path, dat, function(x) mean(x, na.rm = TRUE), na.action = NULL)$scaleSize datcase$rotationDegree <- aggregate(rotationDegree ~ path, dat, function(x) mean(x, na.rm = TRUE), na.action = NULL)$rotationDegree # datcase$ntopics <- aggregate(topic ~ path, dat, # function(x) ifelse(all(is.na(x)), NA, length(unique(na.omit(x)))), # na.action = NULL)$topic datcase$move <- ifelse(datcase$event == "move", 1, 0) # paths that start with move for (item in sort(unique(datcase$item))) { datcase[paste0("item_", item)] <- ifelse(datcase$item == item, 1, 0) } mat <- na.omit(datcase[, -c(1:4)]) set.seed(1610) nclusters <- 6 k1 <- kmeans(mat, nclusters) #colors <- c("#3CB4DC", "#78004B", "#91C86E", "#FF6900") colors <- palette.colors(palette = "Okabe-Ito")[1:nclusters] library(distances) mat_dist <- distances(mat) xy <- cmdscale(mat_dist) plot(xy, type = "n") text(xy[,1], xy[,2], datcase$path, col = colors[k1$cluster]) legend("topright", paste("Cluster", 1:nclusters), col = colors, lty = 1) ## Scree plot ks <- 1:10 sse <- NULL for (k in ks) sse <- c(sse, kmeans(datitem[, -1], k)$tot.withinss) plot(sse ~ ks, type = "l") alog <- activitylog(datcase, case_id = "case", activity_id = "item", resource_id = "path", timestamps = c("start", "complete")) process_map(alog, type_nodes = frequency("relative"), sec_nodes = frequency("absolute"), type_edges = frequency("relative"), sec_edges = frequency("absolute"), rankdir = "LR")