2024-01-30 09:46:40 +01:00
|
|
|
# TODO: Clean me up! I am a mix of useful and useless!!!
|
|
|
|
|
2024-01-25 17:21:18 +01:00
|
|
|
# setwd("C:/Users/nwickelmaier/Nextcloud/Documents/MDS/2023ss/60100_master_thesis/analysis/code")
|
|
|
|
|
|
|
|
library(bupaverse)
|
2023-08-14 16:57:03 +02:00
|
|
|
|
2023-09-28 15:04:59 +02:00
|
|
|
# Read data
|
|
|
|
|
2024-01-25 17:21:18 +01:00
|
|
|
dat0 <- read.table("results/haum/event_logfiles_2024-01-18_09-58-52.csv",
|
2024-01-16 09:59:23 +01:00
|
|
|
colClasses = c("character", "character", "POSIXct",
|
|
|
|
"POSIXct", "character", "integer",
|
|
|
|
"numeric", "character", "character",
|
|
|
|
rep("numeric", 3), "character",
|
|
|
|
"character", rep("numeric", 11),
|
|
|
|
"character", "character"),
|
2023-09-28 15:04:59 +02:00
|
|
|
sep = ";", header = TRUE)
|
|
|
|
|
2024-01-25 17:21:18 +01:00
|
|
|
dat0$event <- factor(dat0$event, levels = c("move", "flipCard", "openTopic",
|
2024-01-16 09:59:23 +01:00
|
|
|
"openPopup"))
|
|
|
|
|
2024-01-25 17:21:18 +01:00
|
|
|
dat0$weekdays <- factor(weekdays(dat0$date.start),
|
2024-01-16 09:59:23 +01:00
|
|
|
levels = c("Montag", "Dienstag", "Mittwoch",
|
|
|
|
"Donnerstag", "Freitag", "Samstag",
|
|
|
|
"Sonntag"),
|
|
|
|
labels = c("Monday", "Tuesday", "Wednesday",
|
|
|
|
"Thursday", "Friday", "Saturday",
|
|
|
|
"Sunday"))
|
|
|
|
|
|
|
|
# Select data pre Corona
|
2024-01-25 17:21:18 +01:00
|
|
|
dat <- dat0[as.Date(dat0$date.start) < "2020-03-13", ]
|
|
|
|
dat <- dat[dat$path != 106098, ]
|
|
|
|
|
|
|
|
|
|
|
|
table(table(dat$start))
|
2024-01-16 09:59:23 +01:00
|
|
|
|
|
|
|
table(dat$event)
|
|
|
|
proportions(table(dat$event))
|
|
|
|
|
2024-01-25 17:21:18 +01:00
|
|
|
dat_dur <- aggregate(duration ~ item, dat, mean)
|
|
|
|
barplot(duration - mean(dat_dur$duration) ~ item, dat_dur, col = "#434F4F",
|
|
|
|
las = 3)
|
|
|
|
|
|
|
|
|
2024-01-16 09:59:23 +01:00
|
|
|
# Investigate paths (will separate items and give clusters of artworks!)
|
|
|
|
length(unique(dat$path))
|
2024-01-25 17:21:18 +01:00
|
|
|
# DFGs per Cluster
|
|
|
|
dat$start <- dat$date.start
|
|
|
|
dat$complete <- dat$date.stop
|
2024-01-16 09:59:23 +01:00
|
|
|
|
2024-01-25 17:21:18 +01:00
|
|
|
summary(aggregate(duration ~ path, dat, mean))
|
2024-01-16 09:59:23 +01:00
|
|
|
|
2024-01-25 17:21:18 +01:00
|
|
|
alog <- activitylog(dat,
|
|
|
|
case_id = "path",
|
|
|
|
activity_id = "event",
|
|
|
|
resource_id = "item",
|
|
|
|
timestamps = c("start", "complete"))
|
2024-01-16 09:59:23 +01:00
|
|
|
|
2024-01-25 17:21:18 +01:00
|
|
|
process_map(alog,
|
|
|
|
type_nodes = frequency("absolute"),
|
|
|
|
sec_nodes = frequency("relative"),
|
|
|
|
type_edges = frequency("absolute"),
|
|
|
|
sec_edges = frequency("relative"),
|
|
|
|
rankdir = "LR")
|
|
|
|
|
|
|
|
### Separate for items
|
|
|
|
|
|
|
|
datitem <- aggregate(cbind(duration, distance, scaleSize, rotationDegree) ~
|
|
|
|
item, dat, function(x) mean(x, na.rm = TRUE), na.action = NULL)
|
|
|
|
datitem$npaths <- aggregate(path ~ item, dat,
|
|
|
|
function(x) length(unique(x)),
|
|
|
|
na.action = NULL)$path
|
|
|
|
datitem$ncases <- aggregate(case ~ item, dat,
|
|
|
|
function(x) length(unique(x)),
|
|
|
|
na.action = NULL)$case
|
|
|
|
datitem$ntopics <- aggregate(topic ~ item, dat,
|
2024-01-16 09:59:23 +01:00
|
|
|
function(x) ifelse(all(is.na(x)), NA, length(unique(na.omit(x)))),
|
|
|
|
na.action = NULL)$topic
|
|
|
|
|
|
|
|
|
2024-01-25 17:21:18 +01:00
|
|
|
set.seed(1211)
|
|
|
|
|
|
|
|
nclusters <- 6
|
|
|
|
k1 <- kmeans(datitem[, -1], nclusters)
|
|
|
|
|
|
|
|
#colors <- c("#3CB4DC", "#78004B", "#91C86E", "#FF6900")
|
|
|
|
|
|
|
|
colors <- palette.colors(palette = "Okabe-Ito")
|
|
|
|
|
|
|
|
xy <- cmdscale(dist(datitem[, -1]))
|
|
|
|
|
|
|
|
plot(xy, type = "n")
|
|
|
|
text(xy[,1], xy[,2], datitem$item, col = colors[k1$cluster])
|
|
|
|
legend("topright", paste("Cluster", 1:nclusters), col = colors, lty = 1)
|
|
|
|
|
|
|
|
## Scree plot
|
|
|
|
|
|
|
|
ks <- 1:10
|
|
|
|
|
|
|
|
sse <- NULL
|
|
|
|
for (k in ks) sse <- c(sse, kmeans(datitem[, -1], k)$tot.withinss)
|
|
|
|
|
|
|
|
plot(sse ~ ks, type = "l")
|
|
|
|
|
|
|
|
|
|
|
|
datitem$cluster <- k1$cluster
|
|
|
|
|
|
|
|
datitem_agg <- aggregate(. ~ cluster, datitem[, -1], mean)
|
|
|
|
|
|
|
|
|
|
|
|
dat_cl <- merge(dat, datitem[, c("item", "cluster")], by = "item", all.x = TRUE)
|
|
|
|
dat_cl <- dat_cl[order(dat_cl$fileId.start, dat_cl$date.start, dat_cl$timeMs.start), ]
|
|
|
|
|
|
|
|
write.table(dat_cl, "results/haum/event_logfiles_with-clusters_kmeans.csv",
|
|
|
|
sep = ";", row.names = FALSE)
|
|
|
|
|
|
|
|
vioplot::vioplot(datitem$duration)
|
|
|
|
|
|
|
|
vioplot::vioplot(duration ~ item, dat, las = 3)
|
|
|
|
|
|
|
|
vioplot::vioplot(duration ~ cluster, dat_cl)
|
|
|
|
vioplot::vioplot(distance ~ cluster, dat_cl)
|
|
|
|
vioplot::vioplot(scaleSize ~ cluster, dat_cl)
|
|
|
|
vioplot::vioplot(rotationDegree ~ cluster, dat_cl)
|
2024-01-16 09:59:23 +01:00
|
|
|
|
|
|
|
|
|
|
|
|
2024-01-25 17:21:18 +01:00
|
|
|
for (cluster in sort(unique(dat_cl$cluster))) {
|
|
|
|
|
|
|
|
alog <- activitylog(dat_cl[dat_cl$cluster == cluster, ],
|
|
|
|
case_id = "path",
|
|
|
|
activity_id = "event",
|
|
|
|
resource_id = "item",
|
|
|
|
timestamps = c("start", "complete"))
|
|
|
|
|
|
|
|
dfg <- process_map(alog,
|
|
|
|
type_nodes = frequency("relative"),
|
|
|
|
sec_nodes = frequency("absolute"),
|
|
|
|
type_edges = frequency("relative"),
|
|
|
|
sec_edges = frequency("absolute"),
|
|
|
|
rankdir = "LR",
|
|
|
|
render = FALSE)
|
|
|
|
export_map(dfg,
|
|
|
|
file_name = paste0("results/processmaps/dfg_cluster", cluster, "_R.pdf"),
|
|
|
|
file_type = "pdf",
|
|
|
|
title = paste("DFG Cluster", cluster))
|
2024-01-16 09:59:23 +01:00
|
|
|
|
|
|
|
|
|
|
|
}
|
2023-09-28 15:04:59 +02:00
|
|
|
|
2023-08-14 16:57:03 +02:00
|
|
|
|
2024-01-25 17:21:18 +01:00
|
|
|
tmp <- dat[dat$event != "move", ]
|
2023-08-14 16:57:03 +02:00
|
|
|
|
2024-01-25 17:21:18 +01:00
|
|
|
check_traces <- function(data) {
|
2023-09-28 15:04:59 +02:00
|
|
|
|
2024-01-25 17:21:18 +01:00
|
|
|
datagg <- aggregate(event ~ path, data,
|
|
|
|
function(x) ifelse("openPopup" %in% x, T, F))
|
|
|
|
paths <- datagg$path[datagg$event]
|
|
|
|
datcheck <- data[data$path %in% paths, c("path", "event")]
|
|
|
|
datcheck <- datcheck[!duplicated(datcheck), ]
|
|
|
|
datcheck <- datcheck[order(datcheck$path), ]
|
|
|
|
|
|
|
|
retval <- NULL
|
|
|
|
for (path in unique(datcheck$path)) {
|
|
|
|
check <- !all(as.character(datcheck$event[datcheck$path == path]) ==
|
|
|
|
c("flipCard", "openTopic", "openPopup"))
|
|
|
|
retval <- rbind(retval, data.frame(path, check))
|
|
|
|
}
|
|
|
|
retval
|
|
|
|
}
|
|
|
|
|
|
|
|
check <- check_traces(tmp)
|
|
|
|
|
|
|
|
sum(check$check)
|
2023-08-14 16:57:03 +02:00
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
2023-10-18 12:57:15 +02:00
|
|
|
|
2023-10-23 15:11:08 +02:00
|
|
|
alog <- activitylog(dat,
|
2024-01-25 17:21:18 +01:00
|
|
|
case_id = "case",
|
|
|
|
activity_id = "item",
|
|
|
|
resource_id = "path",
|
|
|
|
timestamps = c("start", "complete"))
|
2023-08-14 16:57:03 +02:00
|
|
|
|
2024-01-16 09:59:23 +01:00
|
|
|
process_map(alog,
|
|
|
|
type_nodes = frequency("absolute"),
|
|
|
|
sec_nodes = frequency("relative"),
|
|
|
|
type_edges = frequency("absolute"),
|
|
|
|
sec_edges = frequency("relative"),
|
|
|
|
rankdir = "LR")
|
|
|
|
|
|
|
|
|
|
|
|
|
2024-01-25 17:21:18 +01:00
|
|
|
datcase <- dat[!duplicated(dat[, c("case", "path", "item")]),
|
|
|
|
c("case", "path", "event", "item")]
|
|
|
|
datcase$duration <- aggregate(duration ~ path, dat,
|
|
|
|
function(x) mean(x, na.rm = TRUE), na.action = NULL)$duration
|
|
|
|
datcase$distance <- aggregate(distance ~ path, dat,
|
|
|
|
function(x) mean(x, na.rm = TRUE), na.action = NULL)$distance
|
|
|
|
datcase$scaleSize <- aggregate(scaleSize ~ path, dat,
|
|
|
|
function(x) mean(x, na.rm = TRUE), na.action = NULL)$scaleSize
|
|
|
|
datcase$rotationDegree <- aggregate(rotationDegree ~ path, dat,
|
|
|
|
function(x) mean(x, na.rm = TRUE), na.action = NULL)$rotationDegree
|
|
|
|
# datcase$ntopics <- aggregate(topic ~ path, dat,
|
|
|
|
# function(x) ifelse(all(is.na(x)), NA, length(unique(na.omit(x)))),
|
|
|
|
# na.action = NULL)$topic
|
|
|
|
datcase$move <- ifelse(datcase$event == "move", 1, 0)
|
|
|
|
# paths that start with move
|
|
|
|
|
|
|
|
for (item in sort(unique(datcase$item))) {
|
|
|
|
datcase[paste0("item_", item)] <- ifelse(datcase$item == item, 1, 0)
|
|
|
|
}
|
|
|
|
|
|
|
|
mat <- na.omit(datcase[, -c(1:4)])
|
|
|
|
|
|
|
|
|
|
|
|
set.seed(1610)
|
2023-08-14 16:57:03 +02:00
|
|
|
|
2024-01-25 17:21:18 +01:00
|
|
|
nclusters <- 6
|
|
|
|
k1 <- kmeans(mat, nclusters)
|
2023-08-14 16:57:03 +02:00
|
|
|
|
2024-01-25 17:21:18 +01:00
|
|
|
#colors <- c("#3CB4DC", "#78004B", "#91C86E", "#FF6900")
|
|
|
|
|
|
|
|
colors <- palette.colors(palette = "Okabe-Ito")[1:nclusters]
|
|
|
|
|
|
|
|
library(distances)
|
|
|
|
mat_dist <- distances(mat)
|
|
|
|
|
|
|
|
xy <- cmdscale(mat_dist)
|
|
|
|
|
|
|
|
plot(xy, type = "n")
|
|
|
|
text(xy[,1], xy[,2], datcase$path, col = colors[k1$cluster])
|
|
|
|
legend("topright", paste("Cluster", 1:nclusters), col = colors, lty = 1)
|
|
|
|
|
|
|
|
## Scree plot
|
|
|
|
|
|
|
|
ks <- 1:10
|
|
|
|
|
|
|
|
sse <- NULL
|
|
|
|
for (k in ks) sse <- c(sse, kmeans(datitem[, -1], k)$tot.withinss)
|
|
|
|
|
|
|
|
plot(sse ~ ks, type = "l")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
alog <- activitylog(datcase,
|
|
|
|
case_id = "case",
|
|
|
|
activity_id = "item",
|
|
|
|
resource_id = "path",
|
|
|
|
timestamps = c("start", "complete"))
|
|
|
|
|
|
|
|
process_map(alog,
|
|
|
|
type_nodes = frequency("relative"),
|
|
|
|
sec_nodes = frequency("absolute"),
|
|
|
|
type_edges = frequency("relative"),
|
|
|
|
sec_edges = frequency("absolute"),
|
|
|
|
rankdir = "LR")
|
2023-08-14 16:57:03 +02:00
|
|
|
|
2024-01-30 09:46:40 +01:00
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
# setwd("C:/Users/nwickelmaier/Nextcloud/Documents/MDS/2023ss/60100_master_thesis/analysis/code")
|
|
|
|
|
|
|
|
datraw <- read.table("results/haum/raw_logfiles_2024-01-18_09-58-52.csv",
|
|
|
|
header = TRUE, sep = ";")
|
|
|
|
|
|
|
|
|
|
|
|
# Read data
|
|
|
|
|
|
|
|
datlogs <- read.table("results/haum/event_logfiles_2024-01-18_09-58-52.csv",
|
|
|
|
colClasses = c("character", "character", "POSIXct",
|
|
|
|
"POSIXct", "character", "integer",
|
|
|
|
"numeric", "character", "character",
|
|
|
|
rep("numeric", 3), "character",
|
|
|
|
"character", rep("numeric", 11),
|
|
|
|
"character", "character"),
|
|
|
|
sep = ";", header = TRUE)
|
|
|
|
|
|
|
|
datlogs <- datlogs[order(datlogs$fileId.start, datlogs$date.start, datlogs$timeMs.start), ]
|
|
|
|
|
|
|
|
artwork <- "176"
|
|
|
|
fileId <- c('2017_06_16-13_49_00.log', '2017_06_16-13_59_00.log')
|
|
|
|
path <- 106098
|
|
|
|
|
|
|
|
datraw[datraw$item == artwork & datraw$fileId %in% fileId, ]
|
|
|
|
|
|
|
|
datlogs[datlogs$path == path, ]
|
|
|
|
|
|
|
|
|
|
|
|
|