# 10_user-navigation.R # # content: (1) Load data # (2) Clustering # (3) Fit tree # # input: results/dataframes_case_2019.RData # output: results/eventlogs_2019_case-clusters.csv # results/user-navigation.RData # ../../thesis/figures/data/clustering_cases.RData # # last mod: 2024-03-22 # setwd("C:/Users/nwickelmaier/Nextcloud/Documents/MDS/2023ss/60100_master_thesis/analysis/code") #--------------- (1) Load data --------------- load("results/dataframes_case_2019.RData") #--------------- (2) Clustering --------------- df <- dattree[, c("PropItems", "SearchInfo", "PropMoves", "AvDurItemNorm", "Pattern", "InfocardOnly")] summary(df) #dist_mat <- cluster::daisy(df, metric = "euclidean") dist_mat <- cluster::daisy(df, metric = "gower") # "Flatten" with MDS # coor_3d <- as.data.frame(cmdscale(dist_mat, k = 3)) # coor_3d <- prcomp(df)$x[, 1:3] coor_3d <- smacof::mds(dist_mat, ndim = 3, type = "ordinal")$conf coor_2d <- coor_3d[, 1:2] plot(coor_2d) rgl::plot3d(coor_3d) # pm <- cluster::pam(dist_mat, k = k) # cluster <- pm$clustering # --> Does not look as good as the hierarchical clustring method <- c(average = "average", single = "single", complete = "complete", ward = "ward") hcs <- pbapply::pblapply(method, function(x) cluster::agnes(dist_mat, method = x)) acs <- pbapply::pbsapply(hcs, function(x) x$ac) # average single complete ward # 0.9881224 0.9725661 0.9937669 0.9994267 hc <- hcs$ward #hc <- cluster::agnes(dist_mat, method = "ward") k <- 5 mycols <- c("#3CB4DC", "#FF6900", "#78004B", "#91C86E", "#434F4F") cluster <- cutree(as.hclust(hc), k = k) table(cluster) plot(coor_2d, col = mycols[cluster], pch = 16) #legend("topleft", paste("Cl", 1:5), col = mycols, pch = 21) legend("topleft", c("Scanning", "Exploring", "Flitting", "Searching", "Info"), col = mycols, bty = "n", pch = 16) rgl::plot3d(coor_3d, col = mycols[cluster]) print(ftable(xtabs( ~ InfocardOnly + Pattern + cluster, dattree)), zero = "-") aggregate(. ~ cluster, df, mean) aggregate(cbind(duration, distance, scaleSize, rotationDegree, length, nitems, nmove, nflipCard, nopenTopic, nopenPopup) ~ cluster, datcase, mean) ### Look at selected cases ########################################### tmp <- dat tmp$start <- tmp$date.start tmp$complete <- tmp$date.stop # Examples: ## Scholar: 29679 ## Star: 24456 ## Dispersion: 26000 ## only info cards: 24299 ## not only info cards: 24013 #head(dattree[dattree$Pattern == "Dispersion" & datcase$nitems == 10, ]) #head(dattree[dattree$InfocardOnly == "yes" & datcase$nitems == 3, ]) alog <- bupaR::activitylog(tmp[tmp$case == 24013, ], case_id = "case", activity_id = "item", resource_id = "path", timestamps = c("start", "complete")) processmapR::process_map(alog) rm(tmp) ###################################################################### res <- merge(dat, data.frame(case = dattree$case, cluster), by = "case", all.x = TRUE) res <- res[order(res$fileId.start, res$date.start, res$timeMs.start), ] xtabs( ~ item + cluster, res) aggregate(event ~ cluster, res, table) # Look at clusters par(mfrow = c(2, 2)) vioplot::vioplot(duration ~ cluster, res) vioplot::vioplot(distance ~ cluster, res) vioplot::vioplot(scaleSize ~ cluster, res) vioplot::vioplot(rotationDegree ~ cluster, res) aggregate(cbind(duration, distance, scaleSize, rotationDegree) ~ cluster, res, mean) aggregate(cbind(duration, distance, scaleSize, rotationDegree) ~ cluster, res, median) write.table(res, file = "results/eventlogs_2019_case-clusters.csv", sep = ";", quote = FALSE, row.names = FALSE) save(res, dist_mat, hcs, acs, coor_2d, coor_3d, file = "results/user-navigation.RData") save(coor_2d, coor_3d, cluster, dattree, file = "../../thesis/figures/data/clustering_cases.RData") #--------------- (3) Fit tree --------------- c1 <- rpart::rpart(as.factor(cluster) ~ ., data = dattree[, c("PropMoves", "PropItems", "SearchInfo", "AvDurItem", "Pattern", "InfocardOnly")], method = "class") plot(partykit::as.party(c1), tp_args = list(fill = mycols, col = mycols)) # with conditional tree c2 <- partykit::ctree(as.factor(cluster) ~ ., data = dattree[, c("PropMoves", "PropItems", "SearchInfo", "AvDurItem", "Pattern", "InfocardOnly")], alpha = 0.001) plot(c2, tp_args = list(fill = mycols, col = mycols)) factoextra::fviz_dend(as.hclust(hc), k = k, cex = 0.5, k_colors = mycols, type = "phylogenic", rect = TRUE, main = "", ylab = "" #ggtheme = ggplot2::theme_bw() )