mtt_haum/code/item_clustering.R

159 lines
5.5 KiB
R

# setwd("C:/Users/nwickelmaier/Nextcloud/Documents/MDS/2023ss/60100_master_thesis/analysis/code")
#--------------- (1) Read data ---------------
#--------------- (1.1) Read log event data ---------------
dat0 <- read.table("results/haum/event_logfiles_2024-01-18_09-58-52.csv",
colClasses = c("character", "character", "POSIXct",
"POSIXct", "character", "integer",
"numeric", "character", "character",
rep("numeric", 3), "character",
"character", rep("numeric", 11),
"character", "character"),
sep = ";", header = TRUE)
dat0$event <- factor(dat0$event, levels = c("move", "flipCard", "openTopic",
"openPopup"))
# Select data pre Corona
dat <- dat0[as.Date(dat0$date.start) < "2020-03-13", ]
dat <- dat[dat$path != 106098, ]
#--------------- (1.2) Read infos for PM for items ---------------
datitem <- read.table("results/haum/pn_infos_items.csv", header = TRUE,
sep = ";", row.names = 1)
#--------------- (1.3) Extract additional infos for clustering ---------------
datitem$duration <- aggregate(duration ~ item, dat, mean)$duration
datitem$distance <- aggregate(distance ~ item, dat, mean)$distance
datitem$scaleSize <- aggregate(scaleSize ~ item, dat, mean)$scaleSize
datitem$rotationDegree <- aggregate(rotationDegree ~ item, dat, mean)$rotationDegree
datitem$npaths <- aggregate(path ~ item, dat, function(x) length(unique(x)))$path
datitem$ncases <- aggregate(case ~ item, dat, function(x) length(unique(x)))$case
datitem$ntopics <- aggregate(topic ~ item, dat, function(x) length(unique(x)))$topic
datitem$mostfreq_num <- as.numeric(gsub(".*: (.*)}", "\\1", datitem$mostfreq))
#--------------- (2) Clustering ---------------
df <- datitem[, c("precision", "generalizability", "nvariants",
"mostfreq_num", "duration", "distance", "scaleSize",
"rotationDegree", "npaths", "ncases", "ntopics")] |>
scale()
mat <- dist(df)
hc <- hclust(mat, method = "ward.D2")
library(factoextra)
fviz_dend(hc, cex = 0.5)
datitem$grp <- cutree(hc, k = 6)
fviz_dend(hc, k = 6,
cex = 0.5,
k_colors = c("#78004B", "#000000", "#3CB4DC", "#91C86E",
"#FF6900", "#434F4F"),
#type = "phylogenic",
rect = TRUE
)
p <- fviz_cluster(list(data = df, cluster = grp),
palette = c("#78004B", "#000000", "#3CB4DC", "#91C86E",
"#FF6900", "#434F4F"),
ellipse.type = "convex",
repel = TRUE,
show.clust.cent = FALSE, ggtheme = theme_bw())
p
aggregate(cbind(duration, distance, scaleSize , rotationDegree, npaths,
ncases, ntopics) ~ grp, datitem, mean)
datitem$item <- gsub("item_([0-9]{3})", "\\1", row.names(datitem))
res <- merge(dat, datitem[, c("item", "grp")], by = "item", all.x = TRUE)
res <- res[order(res$fileId.start, res$date.start, res$timeMs.start), ]
write.table(res,
file = "results/haum/event_logfiles_pre-corona_with-clusters.csv",
sep = ";",
quote = FALSE,
row.names = FALSE)
library(bupaverse)
res$start <- res$date.start
res$complete <- res$date.stop
for (cluster in sort(unique(res$grp))) {
alog <- activitylog(res[res$grp == cluster, ],
case_id = "path",
activity_id = "event",
resource_id = "item",
timestamps = c("start", "complete"))
dfg <- process_map(alog,
type_nodes = frequency("relative"),
sec_nodes = frequency("absolute"),
type_edges = frequency("relative"),
sec_edges = frequency("absolute"),
rankdir = "LR",
render = FALSE)
export_map(dfg,
file_name = paste0("results/processmaps/dfg_cluster", cluster, "_R.pdf"),
file_type = "pdf",
title = paste("DFG Cluster", cluster))
}
#--------------- (3) Visualization with pictures ---------------
library(png)
library(jpeg)
library(grid)
colors <- c("#78004B", "#000000", "#3CB4DC", "#91C86E", "#FF6900",
"#434F4F")
#pdf("results/haum/figures/clustering_artworks.pdf", height = 8, width = 8, pointsize = 10)
png("results/haum/figures/clustering_artworks.png", units = "in", height = 8, width = 8, pointsize = 10, res = 300)
par(mai = c(.6,.6,.1,.1), mgp = c(2.4, 1, 0))
plot(y ~ x, p$data, type = "n", ylim = c(-3.2, 3), xlim = c(-4.7, 6.4))
for (item in sprintf("%03d", as.numeric(rownames(p$data)))) {
if (item == "125") {
pic <- readJPEG(paste0("../data/haum/ContentEyevisit/eyevisit_cards_light/",
item, "/", item, ".jpg"))
} else {
pic <- readPNG(paste0("../data/haum/ContentEyevisit/eyevisit_cards_light/",
item, "/", item, ".png"))
}
img <- as.raster(pic[,,1:3])
x <- p$data$x[sprintf("%03d", as.numeric(rownames(p$data))) == item]
y <- p$data$y[sprintf("%03d", as.numeric(rownames(p$data))) == item]
points(x, y,
col = colors[p$data$cluster[sprintf("%03d", as.numeric(rownames(p$data))) == item]],
cex = 9,
pch = 15)
rasterImage(img,
xleft = x - .4,
xright = x + .4,
ybottom = y - .2,
ytop = y + .2)
}
dev.off()