mtt_haum/code/04_modeling_haum.R
2024-01-16 09:59:23 +01:00

186 lines
7.3 KiB
R

# setwd("C:/Users/nwickelmaier/Nextcloud/Documents/MDS/2023ss/60100_master_thesis/code")
# Read data
dat <- read.table("results/haum/event_logfiles_2024-01-02_19-44-50.csv",
colClasses = c("character", "character", "POSIXct",
"POSIXct", "character", "integer",
"numeric", "character", "character",
rep("numeric", 3), "character",
"character", rep("numeric", 11),
"character", "character"),
sep = ";", header = TRUE)
dat$event <- factor(dat$event, levels = c("move", "flipCard", "openTopic",
"openPopup"))
dat$weekdays <- factor(weekdays(dat$date.start),
levels = c("Montag", "Dienstag", "Mittwoch",
"Donnerstag", "Freitag", "Samstag",
"Sonntag"),
labels = c("Monday", "Tuesday", "Wednesday",
"Thursday", "Friday", "Saturday",
"Sunday"))
# Select data pre Corona
dat <- dat[as.Date(dat$date.start) < "2020-03-13", ]
dat <- dat[dat["path"] != 81621, ]
table(dat$event)
proportions(table(dat$event))
# Investigate paths (will separate items and give clusters of artworks!)
length(unique(dat$path))
datpath <- aggregate(cbind(duration, distance, scaleSize, rotationDegree) ~
path, dat, function(x) mean(x, na.rm = TRUE), na.action = NULL)
datpath$length <- aggregate(item ~ path, dat, length)$item
datpath$nitems <- aggregate(item ~ path, dat, function(x)
length(unique(x)), na.action = NULL)$item
datpath$ntopics <- aggregate(topic ~ path, dat,
function(x) ifelse(all(is.na(x)), NA, length(unique(na.omit(x)))),
na.action = NULL)$topic
datpath$vacation <- aggregate(vacation ~ path, dat,
function(x) ifelse(all(is.na(x)), 0, 1),
na.action = NULL)$vacation
datpath$holiday <- aggregate(holiday ~ path, dat,
function(x) ifelse(all(is.na(x)), 0, 1),
na.action = NULL)$holiday
datpath$weekend <- aggregate(weekdays ~ path, dat,
function(x) ifelse(any(x %in% c("Saturday", "Sunday")), 1, 0),
na.action = NULL)$weekdays
datpath$morning <- aggregate(date.start ~ path, dat,
function(x) ifelse(lubridate::hour(x[1]) > 13, 0, 1),
na.action = NULL)$date.start
# Investigate cases (= interactions per time intervall)
length(unique(dat$case))
datcase <- aggregate(cbind(duration, distance, scaleSize, rotationDegree) ~
case, dat, function(x) mean(x, na.rm = TRUE), na.action = NULL)
datcase$length <- aggregate(item ~ case, dat, length)$item
datcase$nitems <- aggregate(item ~ case, dat, function(x)
length(unique(x)), na.action = NULL)$item
datcase$ntopics <- aggregate(topic ~ case, dat,
function(x) ifelse(all(is.na(x)), NA, length(unique(na.omit(x)))),
na.action = NULL)$topic
datcase$vacation <- aggregate(vacation ~ case, dat,
function(x) ifelse(all(is.na(x)), 0, 1),
na.action = NULL)$vacation
datcase$holiday <- aggregate(holiday ~ case, dat,
function(x) ifelse(all(is.na(x)), 0, 1),
na.action = NULL)$holiday
datcase$weekend <- aggregate(weekdays ~ case, dat,
function(x) ifelse(any(x %in% c("Saturday", "Sunday")), 1, 0),
na.action = NULL)$weekdays
datcase$morning <- aggregate(date.start ~ case, dat,
function(x) ifelse(lubridate::hour(x[1]) > 13, 0, 1),
na.action = NULL)$date.start
# Paths with more than one case associated
tmp <- aggregate(case ~ path, dat, function(x) length(unique(x)))
sum(tmp$case > 1)
table(tmp$case)
dat$date <- as.Date(dat$date.start)
tmp <- aggregate(date ~ path, dat, function(x) length(unique(x)))
sum(tmp$date > 1)
table(tmp$date)
tmp[tmp$date > 1, ]
for (p in tmp$path[tmp$date > 1]) {
print(dat[dat$path == p, 3:9])
cat("\n\n")
}
dat[dat$date == "2017-02-28" & dat$item == "503", ]
# Creating event logs
library(bupaverse)
dat$start <- dat$date.start
dat$complete <- dat$date.stop
table(table(dat$start))
# --> hmm...
summary(aggregate(duration ~ path, dat, mean))
alog <- activitylog(dat,
case_id = "path",
activity_id = "event",
resource_id = "item",
timestamps = c("start", "complete"))
process_map(alog,
type_nodes = frequency("absolute"),
sec_nodes = frequency("relative"),
type_edges = frequency("absolute"),
sec_edges = frequency("relative"),
rankdir = "LR")
alog2 <- activitylog(dat,
case_id = "case",
activity_id = "event",
resource_id = "item",
timestamps = c("start", "complete"))
process_map(alog2,
type_nodes = frequency("absolute"),
sec_nodes = frequency("relative"),
type_edges = frequency("absolute"),
sec_edges = frequency("relative"),
rankdir = "LR")
library(processanimateR)
animate_process(to_eventlog(alog))
col_vector <- c("#7FC97F", "#BEAED4", "#FDC086", "#FFFF99", "#386CB0",
"#F0027F", "#BF5B17", "#666666", "#1B9E77", "#D95F02",
"#7570B3", "#E7298A", "#66A61E", "#E6AB02", "#A6761D",
"#666666", "#A6CEE3", "#1F78B4", "#B2DF8A", "#33A02C",
"#FB9A99", "#E31A1C", "#FDBF6F", "#FF7F00", "#CAB2D6",
"#6A3D9A", "#FFFF99", "#B15928", "#FBB4AE", "#B3CDE3",
"#CCEBC5", "#DECBE4", "#FED9A6", "#FFFFCC", "#E5D8BD",
"#FDDAEC", "#F2F2F2", "#B3E2CD", "#FDCDAC", "#CBD5E8",
"#F4CAE4", "#E6F5C9", "#FFF2AE", "#F1E2CC", "#CCCCCC",
"#E41A1C", "#377EB8", "#4DAF4A", "#984EA3", "#FF7F00",
"#FFFF33", "#A65628", "#F781BF", "#999999", "#66C2A5",
"#FC8D62", "#8DA0CB", "#E78AC3", "#A6D854", "#FFD92F",
"#E5C494", "#B3B3B3", "#8DD3C7", "#FFFFB3", "#BEBADA",
"#FB8072", "#80B1D3", "#FDB462", "#B3DE69", "#FCCDE5",
"#D9D9D9")
animate_process(to_eventlog(alog), mode = "relative", jitter = 10, legend = "color",
mapping = token_aes(color = token_scale("artwork",
scale = "ordinal",
range = col_vector)))
elog <- to_eventlog(alog)
animate_process(elog[elog$artwork == "054", ])
animate_process(elog[elog$artwork == "080", ])
animate_process(elog[elog$artwork == "501", ])
process_map(alog[alog$artwork == "054", ])
animate_process(elog[elog$artwork %in% c("080", "054"), ],
mode = "relative", jitter = 10, legend = "color",
mapping = token_aes(color = token_scale("artwork",
scale = "ordinal",
range = c("black", "gray"))))