diff --git a/code/07_item-clustering.R b/code/07_item-clustering.R index bd0e37a..e5a7ab8 100644 --- a/code/07_item-clustering.R +++ b/code/07_item-clustering.R @@ -10,12 +10,9 @@ # input: results/haum/eventlogs_pre-corona_cleaned.RData # results/haum/pn_infos_items.csv # output: results/haum/eventlogs_pre-corona_item-clusters.csv -# results/figures/dendrogram_items.pdf -# results/figures/clustering_items.pdf -# results/figures/clustering_artworks.pdf -# results/figures/clustering_artworks.png +# ../../thesis/figures/data/clustering_items.RData" # -# last mod: 2024-03-08 +# last mod: 2024-03-21 # setwd("C:/Users/nwickelmaier/Nextcloud/Documents/MDS/2023ss/60100_master_thesis/analysis/code") @@ -96,9 +93,6 @@ mycols <- c("#434F4F", "#78004B", "#FF6900", "#3CB4DC", "#91C86E", "Black") cluster <- cutree(hc, k = k) -pdf("results/figures/dendrogram_items.pdf", width = 6.5, height = 5.5, pointsize = 10) -# TODO: Move code for plots to /thesis/ - factoextra::fviz_dend(hc, k = k, cex = 0.5, k_colors = mycols, @@ -109,10 +103,6 @@ factoextra::fviz_dend(hc, k = k, #ggtheme = ggplot2::theme_bw() ) -dev.off() - -pdf("results/figures/clustering_items.pdf", width = 6.5, height = 5.5, pointsize = 10) - factoextra::fviz_cluster(list(data = df, cluster = cluster), palette = mycols, ellipse.type = "convex", @@ -121,8 +111,6 @@ factoextra::fviz_cluster(list(data = df, cluster = cluster), main = "", ggtheme = ggplot2::theme_bw()) -dev.off() - aggregate(cbind(precision, generalizability, nvariants, duration, distance, scaleSize , rotationDegree, npaths, ncases, nmoves, nflipCard, nopenTopic, nopenPopup) ~ cluster, datitem, @@ -151,75 +139,8 @@ write.table(res, quote = FALSE, row.names = FALSE) -# DFGs for clusters -res$start <- res$date.start -res$complete <- res$date.stop +# Save data for plots and tables -for (clst in sort(unique(res$cluster))) { - - alog <- bupaR::activitylog(res[res$cluster == clst, ], - case_id = "path", - activity_id = "event", - resource_id = "item", - timestamps = c("start", "complete")) - - dfg <- processmapR::process_map(alog, - type_nodes = processmapR::frequency("relative", color_scale = "Greys"), - sec_nodes = processmapR::frequency("absolute"), - type_edges = processmapR::frequency("relative", color_edges = "#FF6900"), - sec_edges = processmapR::frequency("absolute"), - rankdir = "LR", - render = FALSE) - - processmapR::export_map(dfg, - file_name = paste0("results/processmaps/dfg_items_cluster", clst, "_R.pdf"), - file_type = "pdf", - title = paste("Cluster", clst)) -} - -#--------------- (3) Visualization with pictures --------------- - -coor_2d <- cmdscale(dist_mat, k = 2) - -items <- sprintf("%03d", as.numeric(rownames(datitem))) - -pdf("results/figures/clustering_artworks.pdf", height = 8, width = 8, pointsize = 16) -#png("results/figures/clustering_artworks.png", units = "in", height = 8, width = 8, pointsize = 16, res = 300) - -par(mai = c(.6,.6,.1,.1), mgp = c(2.4, 1, 0)) - -plot(coor_2d, type = "n", ylim = c(-3.7, 2.6), xlim = c(-5, 10.5), - xlab = "", ylab = "") - -for (item in items) { - - if (item == "125") { - - pic <- jpeg::readJPEG(paste0("../data/haum/ContentEyevisit/eyevisit_cards_light/", - item, "/", item, ".jpg")) - } else { - pic <- png::readPNG(paste0("../data/haum/ContentEyevisit/eyevisit_cards_light/", - item, "/", item, ".png")) - } - - img <- as.raster(pic[,,1:3]) - - x <- coor_2d[items == item, 1] - y <- coor_2d[items == item, 2] - - points(x, y, - col = mycols[cluster[items == item]], - cex = 6, - pch = 15) - - rasterImage(img, - xleft = x - .45, - xright = x + .45, - ybottom = y - .2, - ytop = y + .2) - -} -legend("topright", paste("Cluster", 1:k), col = mycols, pch = 15, bty = "n") - -dev.off() +save(hc, k, res, dist_mat, datitem, df, + file = "../../thesis/figures/data/clustering_items.RData")