More work on case clustering
This commit is contained in:
parent
26ba7265f5
commit
29ac6c15d4
@ -10,57 +10,46 @@
|
|||||||
#
|
#
|
||||||
# last mod: 2024-03-08
|
# last mod: 2024-03-08
|
||||||
|
|
||||||
|
|
||||||
# setwd("C:/Users/nwickelmaier/Nextcloud/Documents/MDS/2023ss/60100_master_thesis/analysis/code")
|
# setwd("C:/Users/nwickelmaier/Nextcloud/Documents/MDS/2023ss/60100_master_thesis/analysis/code")
|
||||||
|
|
||||||
library(bupaverse)
|
|
||||||
library(factoextra)
|
|
||||||
library(rpart)
|
|
||||||
library(partykit)
|
|
||||||
|
|
||||||
#--------------- (1) Load data ---------------
|
#--------------- (1) Load data ---------------
|
||||||
|
|
||||||
load("results/haum/dataframes_case_2019.RData")
|
load("results/haum/dataframes_case_2019.RData")
|
||||||
|
|
||||||
#--------------- (2) Clustering ---------------
|
#--------------- (2) Clustering ---------------
|
||||||
|
|
||||||
df <- dattree[, -1]
|
df <- dattree[, c("PropItems", "SearchInfo", "PropMoves", "AvDurItemNorm",
|
||||||
|
"Pattern", "InfocardOnly")]
|
||||||
|
|
||||||
summary(df)
|
summary(df)
|
||||||
|
|
||||||
# Look at collinearity
|
#dist_mat <- cluster::daisy(df, metric = "euclidean")
|
||||||
cor_mat <- cor(df)
|
|
||||||
diag(cor_mat) <- NA
|
|
||||||
heatmap(cor_mat)
|
|
||||||
|
|
||||||
#--------------- (2.2) Hierarchical clustering ---------------
|
|
||||||
|
|
||||||
dist_mat <- cluster::daisy(df, metric = "gower")
|
dist_mat <- cluster::daisy(df, metric = "gower")
|
||||||
|
|
||||||
# "Flatten" with MDS
|
# "Flatten" with MDS
|
||||||
coor_2d <- as.data.frame(cmdscale(dist_mat, k = 2))
|
# coor_2d <- as.data.frame(cmdscale(dist_mat, k = 2))
|
||||||
coor_3d <- as.data.frame(cmdscale(dist_mat, k = 3))
|
# coor_3d <- as.data.frame(cmdscale(dist_mat, k = 3))
|
||||||
# TODO: Better use MASS::isoMDS() since I am not using Euclidean distances?
|
|
||||||
|
# coor_2d <- prcomp(df)$x[, 1:2]
|
||||||
|
# coor_3d <- prcomp(df)$x[, 1:3]
|
||||||
|
|
||||||
|
coor_2d <- smacof::mds(dist_mat, ndim = 2, type = "ordinal")$conf
|
||||||
|
coor_3d <- smacof::mds(dist_mat, ndim = 2, type = "ordinal")$conf
|
||||||
|
|
||||||
plot(coor_2d)
|
plot(coor_2d)
|
||||||
rgl::plot3d(coor_3d)
|
rgl::plot3d(coor_3d)
|
||||||
|
|
||||||
method <- c(average = "average", single = "single", complete = "complete",
|
# method <- c(average = "average", single = "single", complete = "complete",
|
||||||
ward = "ward")
|
# ward = "ward")
|
||||||
|
# hcs <- pbapply::pblapply(method, function(x) cluster::agnes(dist_mat, method = x))
|
||||||
|
# acs <- pbapply::pbsapply(hcs, function(x) x$ac)
|
||||||
|
# hc <- hcs$ward
|
||||||
|
|
||||||
method <- "ward"
|
hc <- cluster::agnes(dist_mat, method = "ward")
|
||||||
|
|
||||||
hcs <- pbapply::pblapply(method, function(x) cluster::agnes(dist_mat, method = x))
|
k <- 5
|
||||||
acs <- pbapply::pbsapply(hcs, function(x) x$ac)
|
|
||||||
|
|
||||||
hc <- hcs$ward
|
mycols <- c("#434F4F", "#78004B", "#FF6900", "#3CB4DC", "#91C86E")
|
||||||
|
|
||||||
# Something like a scree plot (??)
|
|
||||||
plot(rev(hc$height)[1:100], type = "b", pch = 16, cex = .5)
|
|
||||||
|
|
||||||
k <- 4
|
|
||||||
|
|
||||||
mycols <- c("#78004B", "#FF6900", "#3CB4DC", "#91C86E")
|
|
||||||
|
|
||||||
cluster <- cutree(as.hclust(hc), k = k)
|
cluster <- cutree(as.hclust(hc), k = k)
|
||||||
|
|
||||||
@ -74,30 +63,35 @@ table(dattree[cluster == 1, "Pattern"])
|
|||||||
table(dattree[cluster == 2, "Pattern"])
|
table(dattree[cluster == 2, "Pattern"])
|
||||||
table(dattree[cluster == 3, "Pattern"])
|
table(dattree[cluster == 3, "Pattern"])
|
||||||
table(dattree[cluster == 4, "Pattern"])
|
table(dattree[cluster == 4, "Pattern"])
|
||||||
|
table(dattree[cluster == 5, "Pattern"])
|
||||||
|
|
||||||
table(dattree[cluster == 1, "InfocardOnly"])
|
table(dattree[cluster == 1, "InfocardOnly"])
|
||||||
table(dattree[cluster == 2, "InfocardOnly"])
|
table(dattree[cluster == 2, "InfocardOnly"])
|
||||||
table(dattree[cluster == 3, "InfocardOnly"])
|
table(dattree[cluster == 3, "InfocardOnly"])
|
||||||
table(dattree[cluster == 4, "InfocardOnly"])
|
table(dattree[cluster == 4, "InfocardOnly"])
|
||||||
|
table(dattree[cluster == 5, "InfocardOnly"])
|
||||||
|
|
||||||
aggregate(. ~ cluster, df, mean)
|
aggregate(. ~ cluster, df, mean)
|
||||||
|
|
||||||
aggregate(cbind(duration, distance, scaleSize, rotationDegree, length,
|
aggregate(cbind(duration, distance, scaleSize, rotationDegree, length, nitems,
|
||||||
nmove, nflipCard, nopenTopic, nopenPopup) ~ cluster, datcase,
|
nmove, nflipCard, nopenTopic, nopenPopup) ~ cluster, datcase,
|
||||||
mean)
|
mean)
|
||||||
|
|
||||||
### Look at selected cases ###########################################
|
### Look at selected cases ###########################################
|
||||||
|
|
||||||
|
load("")
|
||||||
|
|
||||||
tmp <- dat
|
tmp <- dat
|
||||||
tmp$start <- tmp$date.start
|
tmp$start <- tmp$date.start
|
||||||
tmp$complete <- tmp$date.stop
|
tmp$complete <- tmp$date.stop
|
||||||
|
|
||||||
alog <- activitylog(tmp[tmp$case == 24016, ],
|
alog <- bupaR::activitylog(tmp[tmp$case == 24016, ],
|
||||||
case_id = "case",
|
case_id = "case",
|
||||||
activity_id = "item",
|
activity_id = "item",
|
||||||
resource_id = "path",
|
resource_id = "path",
|
||||||
timestamps = c("start", "complete"))
|
timestamps = c("start", "complete"))
|
||||||
|
|
||||||
process_map(alog)
|
processmapR::process_map(alog)
|
||||||
|
|
||||||
rm(tmp)
|
rm(tmp)
|
||||||
|
|
||||||
@ -126,16 +120,28 @@ write.table(res,
|
|||||||
quote = FALSE,
|
quote = FALSE,
|
||||||
row.names = FALSE)
|
row.names = FALSE)
|
||||||
|
|
||||||
save(res, dist_mat, hcs, acs, datcase, dattree, coor_2d, coor_3d,
|
save(res, dist_mat, hcs, acs, coor_2d, coor_3d,
|
||||||
file = "results/haum/tmp_user-navigation.RData")
|
file = "results/haum/tmp_user-navigation.RData")
|
||||||
|
|
||||||
#--------------- (3) Fit tree ---------------
|
#--------------- (3) Fit tree ---------------
|
||||||
|
|
||||||
c1 <- rpart(as.factor(cluster) ~ ., data = dattree[, -1], method = "class")
|
c1 <- rpart::rpart(as.factor(cluster) ~ ., data = dattree[, c("PropMoves",
|
||||||
plot(as.party(c1))
|
"PropItems",
|
||||||
|
"SearchInfo",
|
||||||
|
"AvDurItem",
|
||||||
|
"Pattern",
|
||||||
|
"InfocardOnly")],
|
||||||
|
method = "class")
|
||||||
|
|
||||||
|
plot(partykit::as.party(c1))
|
||||||
|
|
||||||
# with conditional tree
|
# with conditional tree
|
||||||
c2 <- ctree(as.factor(cluster) ~ ., data = dattree[, -1], alpha = 0)
|
c2 <- partykit::ctree(as.factor(cluster) ~ ., data = dattree[, c("PropMoves",
|
||||||
|
"PropItems",
|
||||||
|
"SearchInfo",
|
||||||
|
"AvDurItem",
|
||||||
|
"Pattern",
|
||||||
|
"InfocardOnly")],
|
||||||
|
alpha = 0.001)
|
||||||
plot(c2)
|
plot(c2)
|
||||||
|
|
||||||
|
|
||||||
|
Loading…
Reference in New Issue
Block a user