mtt_haum/code/07_item-clustering.R

# 07_item-clustering.R
#
# content: (1) Read data
#           (1.1) Read log event data
#           (1.2) Read infos for PM for infos
#           (1.3) Extract additional infos for clustering
#          (2) Clustering
#          (3) Visualization with pictures
#
# input:  results/haum/event_logfiles_2024-02-21_16-07-33.csv
#         results/haum/pn_infos_items.csv
# output: results/haum/event_logfiles_pre-corona_with-clusters.csv
#
# last mod: 2024-02-23


# setwd("C:/Users/nwickelmaier/Nextcloud/Documents/MDS/2023ss/60100_master_thesis/analysis/code")

library(bupaverse)
library(factoextra)

#--------------- (1) Read data ---------------

#--------------- (1.1) Read log event data ---------------

dat0 <- read.table("results/haum/event_logfiles_2024-02-21_16-07-33.csv",
                  colClasses = c("character", "character", "POSIXct",
                                 "POSIXct", "character", "integer",
                                 "numeric", "character", "character",
                                 rep("numeric", 3), "character",
                                 "character", rep("numeric", 11),
                                 "character", "character"),
                  sep = ";", header = TRUE)
dat0$event <- factor(dat0$event, levels = c("move", "flipCard", "openTopic",
                                          "openPopup"))

# TODO: Maybe look at this with complete data?

# Select data pre Corona
dat <- dat0[as.Date(dat0$date.start) < "2020-03-13", ]
dat <- dat[dat$path != 106098, ]

#--------------- (1.2) Read infos for PM for items ---------------

datitem <- read.table("results/haum/pn_infos_items.csv", header = TRUE,
                      sep = ";", row.names = 1)


#--------------- (1.3) Extract additional infos for clustering ---------------

dat_split <- split(dat, ~ path)

time_minmax <- function(subdata) {
  subdata$min_time <- min(subdata$timeMs.start)
    if (all(is.na(subdata$timeMs.stop))) {
      subdata$max_time <- NA
    } else {
       subdata$max_time <- max(subdata$timeMs.stop, na.rm = TRUE)
    }
  subdata
}

dat_list <- pbapply::pblapply(dat_split, time_minmax)
dat_minmax <- dplyr::bind_rows(dat_list)

datpath <- aggregate(duration ~ item + path, dat, mean, na.action = NULL)

datpath$min_time <- aggregate(min_time ~ path, dat_minmax, unique, na.action = NULL)$min_time
datpath$max_time <- aggregate(max_time ~ path, dat_minmax, unique, na.action = NULL)$max_time

datpath$duration_path <- datpath$max_time - datpath$min_time

# average duration per path
datitem$duration <- aggregate(duration ~ item, datpath, mean)$duration
datitem$distance <- aggregate(distance ~ item, dat, mean)$distance
datitem$scaleSize <- aggregate(scaleSize ~ item, dat, mean)$scaleSize
datitem$rotationDegree <- aggregate(rotationDegree ~ item, dat, mean)$rotationDegree
datitem$npaths <- aggregate(path ~ item, dat, function(x) length(unique(x)))$path
datitem$ncases <- aggregate(case ~ item, dat, function(x) length(unique(x)))$case
datitem$nmoves <- aggregate(event ~ item, dat, table)$event[,"move"]
datitem$nflipCard <- aggregate(event ~ item, dat, table)$event[,"flipCard"]
datitem$nopenTopic <- aggregate(event ~ item, dat, table)$event[,"openTopic"]
datitem$nopenPopup <- aggregate(event ~ item, dat, table)$event[,"openPopup"]

#--------------- (2) Clustering ---------------

df <- datitem[, c("precision", "generalizability", "nvariants", "duration",
                  "distance", "scaleSize", "rotationDegree", "npaths",
                  "ncases", "nmoves", "nopenTopic", "nopenPopup")] |>
      scale()

mat <- dist(df)

heatmap(as.matrix(mat))

# Choosing best linkage method
h1 <- hclust(mat, method = "average")
h2 <- hclust(mat, method = "complete")
h3 <- hclust(mat, method = "ward.D")
h4 <- hclust(mat, method = "ward.D2")
h5 <- hclust(mat, method = "single")

# Cophenetic Distances, for each linkage
c1 <- cophenetic(h1)
c2 <- cophenetic(h2)
c3 <- cophenetic(h3)
c4 <- cophenetic(h4)
c5 <- cophenetic(h5)

# Correlations
cor(mat, c1)
cor(mat, c2)
cor(mat, c3)
cor(mat, c4)
cor(mat, c5)
# https://en.wikipedia.org/wiki/Cophenetic_correlation
# https://stats.stackexchange.com/questions/195446/choosing-the-right-linkage-method-for-hierarchical-clustering

# Dendograms
par(mfrow=c(3,2))
plot(h1, main = "Average Linkage")
plot(h2, main = "Complete Linkage")
plot(h3, main = "Ward Linkage")
plot(h4, main = "Ward 2 Linkage")
plot(h5, main = "Single Linkage")


hc <- h1
# Note that ‘agnes(*, method="ward")’ corresponds to ‘hclust(*, "ward.D2")’

k <- 4    # number of clusters

grp <- cutree(hc, k = k)
datitem$grp <- grp

fviz_dend(hc, k = k,
          cex = 0.5,
          k_colors = c("#78004B", "#FF6900", "#3CB4DC", "#91C86E",
                       "#000000", "gold", "#434F4F"),
          #type = "phylogenic",
          rect = TRUE
)

plot(hc)
rect.hclust(hc, k=8, border="red")
rect.hclust(hc, k=7, border="blue")
rect.hclust(hc, k=6, border="green")

p <- fviz_cluster(list(data = df, cluster = grp),
                  palette = c("#78004B", "#FF6900", "#3CB4DC", "#91C86E",
                              "#000000", "#434F4F", "gold"),
                  ellipse.type = "convex",
                  repel = TRUE,
                  show.clust.cent = FALSE, ggtheme = theme_bw())
p

aggregate(cbind(duration, distance, scaleSize , rotationDegree, npaths,
                ncases, nmoves, nflipCard, nopenTopic, nopenPopup) ~ grp,
          datitem, median)

# Something like a scree plot (??)
plot(rev(seq_along(hc$height)), hc$height, type = "l")
points(rev(seq_along(hc$height)), hc$height, pch = 16, cex = .5)


datitem$item <- sprintf("%03d",
                        as.numeric(gsub("item_([0-9]{3})", "\\1", row.names(datitem))))

res <- merge(dat, datitem[, c("item", "grp")], by = "item", all.x = TRUE)
res <- res[order(res$fileId.start, res$date.start, res$timeMs.start), ]

# Look at clusters
par(mfrow = c(2,2))
vioplot::vioplot(duration ~ grp, res)
vioplot::vioplot(distance ~ grp, res)
vioplot::vioplot(scaleSize ~ grp, res)
vioplot::vioplot(rotationDegree ~ grp, res)

write.table(res,
            file = "results/haum/event_logfiles_pre-corona_with-clusters.csv",
            sep = ";",
            quote = FALSE,
            row.names = FALSE)

# DFGs for clusters
res$start <- res$date.start
res$complete <- res$date.stop

for (cluster in sort(unique(res$grp))) {

  alog <- activitylog(res[res$grp == cluster, ],
                      case_id     = "path",
                      activity_id = "event",
                      resource_id = "item",
                      timestamps  = c("start", "complete"))
  
  dfg <- process_map(alog,
                     type_nodes = frequency("relative", color_scale = "Greys"),
                     sec_nodes  = frequency("absolute"),
                     type_edges = frequency("relative", color_edges = "#FF6900"),
                     sec_edges  = frequency("absolute"),
                     rankdir    = "LR",
                     render     = FALSE)
  export_map(dfg,
             file_name = paste0("results/processmaps/dfg_cluster", cluster, "_R.pdf"),
             file_type = "pdf",
             title     = paste("DFG Cluster", cluster))


}

#--------------- (3) Visualization with pictures ---------------

library(png)
library(jpeg)
library(grid)

colors <- c("#78004B", "#FF6900", "#3CB4DC", "#91C86E")

pdf("results/figures/clustering_artworks.pdf", height = 8, width = 8, pointsize = 10)
#png("results/figures/clustering_artworks.png", units = "in", height = 8, width = 8, pointsize = 10, res = 300)

par(mai = c(.4,.4,.1,.1), mgp = c(2.4, 1, 0))

plot(y ~ x, p$data, type = "n", ylim = c(-3.5, 2.8), xlim = c(-5, 10),
     xlab = "", ylab = "")

for (item in sprintf("%03d", as.numeric(rownames(p$data)))) {

  if (item == "125") {

    pic <- readJPEG(paste0("../data/haum/ContentEyevisit/eyevisit_cards_light/",
                          item, "/", item, ".jpg"))
  } else {
    pic <- readPNG(paste0("../data/haum/ContentEyevisit/eyevisit_cards_light/",
                          item, "/", item, ".png"))
  }

  img <- as.raster(pic[,,1:3])

  x <- p$data$x[sprintf("%03d", as.numeric(rownames(p$data))) == item]
  y <- p$data$y[sprintf("%03d", as.numeric(rownames(p$data))) == item]

  points(x, y,
         col = colors[p$data$cluster[sprintf("%03d", as.numeric(rownames(p$data))) == item]],
         cex = 9,
         pch = 15)

  rasterImage(img,
              xleft = x - .45,
              xright = x + .45,
              ybottom = y - .2,
              ytop = y + .2)

}
legend("topright", paste("Cluster", 1:k), col = colors, pch = 15, bty = "n")

dev.off()
-												Re-ran preprocessing and updated files; worked on user navigation behavior -- intermediate step

											
										
										
											2024-02-27 09:08:20 +01:00
+								# 07_item-clustering.R
-												Went over clustering and helper scripts

											
										
										
											2024-01-30 11:48:48 +01:00
+								#
 								# content: (1) Read data
 								#           (1.1) Read log event data
 								#           (1.2) Read infos for PM for infos
 								#           (1.3) Extract additional infos for clustering
 								#          (2) Clustering
 								#          (3) Visualization with pictures
 								#
-												Re-ran preprocessing and updated files; worked on user navigation behavior -- intermediate step

											
										
										
											2024-02-27 09:08:20 +01:00
+								# input:  results/haum/event_logfiles_2024-02-21_16-07-33.csv
-												Went over clustering and helper scripts

											
										
										
											2024-01-30 11:48:48 +01:00
+								#         results/haum/pn_infos_items.csv
 								# output: results/haum/event_logfiles_pre-corona_with-clusters.csv
 								#
-												Re-ran preprocessing and updated files; worked on user navigation behavior -- intermediate step

											
										
										
											2024-02-27 09:08:20 +01:00
+								# last mod: 2024-02-23
-												Went over clustering and helper scripts

											
										
										
											2024-01-30 11:48:48 +01:00
-												Script dump after trying out a hundred things; needs serious cleaning

											
										
										
											2024-01-25 17:21:18 +01:00
+								# setwd("C:/Users/nwickelmaier/Nextcloud/Documents/MDS/2023ss/60100_master_thesis/analysis/code")
-												Went over clustering and helper scripts

											
										
										
											2024-01-30 11:48:48 +01:00
+								library(bupaverse)
 								library(factoextra)
-												Script dump after trying out a hundred things; needs serious cleaning

											
										
										
											2024-01-25 17:21:18 +01:00
+								#--------------- (1) Read data ---------------
 								#--------------- (1.1) Read log event data ---------------
-												Re-ran preprocessing and updated files; worked on user navigation behavior -- intermediate step

											
										
										
											2024-02-27 09:08:20 +01:00
+								dat0 <- read.table("results/haum/event_logfiles_2024-02-21_16-07-33.csv",
-												Script dump after trying out a hundred things; needs serious cleaning

											
										
										
											2024-01-25 17:21:18 +01:00
+								                  colClasses = c("character", "character", "POSIXct",
 								                                 "POSIXct", "character", "integer",
 								                                 "numeric", "character", "character",
 								                                 rep("numeric", 3), "character",
 								                                 "character", rep("numeric", 11),
 								                                 "character", "character"),
 								                  sep = ";", header = TRUE)
 								dat0$event <- factor(dat0$event, levels = c("move", "flipCard", "openTopic",
 								                                          "openPopup"))
-												Looked at item clustering again

											
										
										
											2024-02-07 18:00:25 +01:00
+								# TODO: Maybe look at this with complete data?
-												Script dump after trying out a hundred things; needs serious cleaning

											
										
										
											2024-01-25 17:21:18 +01:00
+								# Select data pre Corona
 								dat <- dat0[as.Date(dat0$date.start) < "2020-03-13", ]
 								dat <- dat[dat$path != 106098, ]
 								#--------------- (1.2) Read infos for PM for items ---------------
 								datitem <- read.table("results/haum/pn_infos_items.csv", header = TRUE,
 								                      sep = ";", row.names = 1)
 								#--------------- (1.3) Extract additional infos for clustering ---------------
-												Re-ran preprocessing and updated files; worked on user navigation behavior -- intermediate step

											
										
										
											2024-02-27 09:08:20 +01:00
+								dat_split <- split(dat, ~ path)
 								time_minmax <- function(subdata) {
 								  subdata$min_time <- min(subdata$timeMs.start)
 								    if (all(is.na(subdata$timeMs.stop))) {
 								      subdata$max_time <- NA
 								    } else {
 								       subdata$max_time <- max(subdata$timeMs.stop, na.rm = TRUE)
 								    }
 								  subdata
 								}
 								dat_list <- pbapply::pblapply(dat_split, time_minmax)
 								dat_minmax <- dplyr::bind_rows(dat_list)
 								datpath <- aggregate(duration ~ item + path, dat, mean, na.action = NULL)
 								datpath$min_time <- aggregate(min_time ~ path, dat_minmax, unique, na.action = NULL)$min_time
 								datpath$max_time <- aggregate(max_time ~ path, dat_minmax, unique, na.action = NULL)$max_time
 								datpath$duration_path <- datpath$max_time - datpath$min_time
 								# average duration per path
 								datitem$duration <- aggregate(duration ~ item, datpath, mean)$duration
-												Script dump after trying out a hundred things; needs serious cleaning

											
										
										
											2024-01-25 17:21:18 +01:00
+								datitem$distance <- aggregate(distance ~ item, dat, mean)$distance
 								datitem$scaleSize <- aggregate(scaleSize ~ item, dat, mean)$scaleSize
 								datitem$rotationDegree <- aggregate(rotationDegree ~ item, dat, mean)$rotationDegree
 								datitem$npaths <- aggregate(path ~ item, dat, function(x) length(unique(x)))$path
 								datitem$ncases <- aggregate(case ~ item, dat, function(x) length(unique(x)))$case
-												Re-ran preprocessing and updated files; worked on user navigation behavior -- intermediate step

											
										
										
											2024-02-27 09:08:20 +01:00
+								datitem$nmoves <- aggregate(event ~ item, dat, table)$event[,"move"]
 								datitem$nflipCard <- aggregate(event ~ item, dat, table)$event[,"flipCard"]
 								datitem$nopenTopic <- aggregate(event ~ item, dat, table)$event[,"openTopic"]
 								datitem$nopenPopup <- aggregate(event ~ item, dat, table)$event[,"openPopup"]
-												Script dump after trying out a hundred things; needs serious cleaning

											
										
										
											2024-01-25 17:21:18 +01:00
 								#--------------- (2) Clustering ---------------
-												Re-ran preprocessing and updated files; worked on user navigation behavior -- intermediate step

											
										
										
											2024-02-27 09:08:20 +01:00
+								df <- datitem[, c("precision", "generalizability", "nvariants", "duration",
 								                  "distance", "scaleSize", "rotationDegree", "npaths",
 								                  "ncases", "nmoves", "nopenTopic", "nopenPopup")] |>
 								      scale()
-												Script dump after trying out a hundred things; needs serious cleaning

											
										
										
											2024-01-25 17:21:18 +01:00
+								mat <- dist(df)
-												Looked at item clustering again

											
										
										
											2024-02-07 18:00:25 +01:00
+								heatmap(as.matrix(mat))
 								# Choosing best linkage method
 								h1 <- hclust(mat, method = "average")
 								h2 <- hclust(mat, method = "complete")
 								h3 <- hclust(mat, method = "ward.D")
 								h4 <- hclust(mat, method = "ward.D2")
 								h5 <- hclust(mat, method = "single")
 								# Cophenetic Distances, for each linkage
 								c1 <- cophenetic(h1)
 								c2 <- cophenetic(h2)
 								c3 <- cophenetic(h3)
 								c4 <- cophenetic(h4)
 								c5 <- cophenetic(h5)
 								# Correlations
 								cor(mat, c1)
 								cor(mat, c2)
 								cor(mat, c3)
 								cor(mat, c4)
 								cor(mat, c5)
 								# https://en.wikipedia.org/wiki/Cophenetic_correlation
-												Re-ran preprocessing and updated files; worked on user navigation behavior -- intermediate step

											
										
										
											2024-02-27 09:08:20 +01:00
+								# https://stats.stackexchange.com/questions/195446/choosing-the-right-linkage-method-for-hierarchical-clustering
-												Looked at item clustering again

											
										
										
											2024-02-07 18:00:25 +01:00
 								# Dendograms
 								par(mfrow=c(3,2))
 								plot(h1, main = "Average Linkage")
 								plot(h2, main = "Complete Linkage")
 								plot(h3, main = "Ward Linkage")
 								plot(h4, main = "Ward 2 Linkage")
 								plot(h5, main = "Single Linkage")
 								hc <- h1
 								# Note that ‘agnes(*, method="ward")’ corresponds to ‘hclust(*, "ward.D2")’
-												Re-ran preprocessing and updated files; worked on user navigation behavior -- intermediate step

											
										
										
											2024-02-27 09:08:20 +01:00
+								k <- 4    # number of clusters
-												Looked at item clustering again

											
										
										
											2024-02-07 18:00:25 +01:00
 								grp <- cutree(hc, k = k)
-												Went over clustering and helper scripts

											
										
										
											2024-01-30 11:48:48 +01:00
+								datitem$grp <- grp
-												Script dump after trying out a hundred things; needs serious cleaning

											
										
										
											2024-01-25 17:21:18 +01:00
-												Looked at item clustering again

											
										
										
											2024-02-07 18:00:25 +01:00
+								fviz_dend(hc, k = k,
-												Script dump after trying out a hundred things; needs serious cleaning

											
										
										
											2024-01-25 17:21:18 +01:00
+								          cex = 0.5,
-												Re-ran preprocessing and updated files; worked on user navigation behavior -- intermediate step

											
										
										
											2024-02-27 09:08:20 +01:00
+								          k_colors = c("#78004B", "#FF6900", "#3CB4DC", "#91C86E",
 								                       "#000000", "gold", "#434F4F"),
-												Script dump after trying out a hundred things; needs serious cleaning

											
										
										
											2024-01-25 17:21:18 +01:00
+								          #type = "phylogenic",
 								          rect = TRUE
 								)
-												Looked at item clustering again

											
										
										
											2024-02-07 18:00:25 +01:00
+								plot(hc)
 								rect.hclust(hc, k=8, border="red")
 								rect.hclust(hc, k=7, border="blue")
 								rect.hclust(hc, k=6, border="green")
-												Script dump after trying out a hundred things; needs serious cleaning

											
										
										
											2024-01-25 17:21:18 +01:00
+								p <- fviz_cluster(list(data = df, cluster = grp),
-												Re-ran preprocessing and updated files; worked on user navigation behavior -- intermediate step

											
										
										
											2024-02-27 09:08:20 +01:00
+								                  palette = c("#78004B", "#FF6900", "#3CB4DC", "#91C86E",
 								                              "#000000", "#434F4F", "gold"),
-												Script dump after trying out a hundred things; needs serious cleaning

											
										
										
											2024-01-25 17:21:18 +01:00
+								                  ellipse.type = "convex",
 								                  repel = TRUE,
 								                  show.clust.cent = FALSE, ggtheme = theme_bw())
 								p
 								aggregate(cbind(duration, distance, scaleSize , rotationDegree, npaths,
-												Re-ran preprocessing and updated files; worked on user navigation behavior -- intermediate step

											
										
										
											2024-02-27 09:08:20 +01:00
+								                ncases, nmoves, nflipCard, nopenTopic, nopenPopup) ~ grp,
 								          datitem, median)
-												Looked at item clustering again

											
										
										
											2024-02-07 18:00:25 +01:00
 								# Something like a scree plot (??)
 								plot(rev(seq_along(hc$height)), hc$height, type = "l")
 								points(rev(seq_along(hc$height)), hc$height, pch = 16, cex = .5)
-												Script dump after trying out a hundred things; needs serious cleaning

											
										
										
											2024-01-25 17:21:18 +01:00
-												Went over clustering and helper scripts

											
										
										
											2024-01-30 11:48:48 +01:00
+								datitem$item <- sprintf("%03d",
 								                        as.numeric(gsub("item_([0-9]{3})", "\\1", row.names(datitem))))
-												Script dump after trying out a hundred things; needs serious cleaning

											
										
										
											2024-01-25 17:21:18 +01:00
 								res <- merge(dat, datitem[, c("item", "grp")], by = "item", all.x = TRUE)
 								res <- res[order(res$fileId.start, res$date.start, res$timeMs.start), ]
-												Went over clustering and helper scripts

											
										
										
											2024-01-30 11:48:48 +01:00
+								# Look at clusters
-												Looked at item clustering again

											
										
										
											2024-02-07 18:00:25 +01:00
+								par(mfrow = c(2,2))
-												Went over clustering and helper scripts

											
										
										
											2024-01-30 11:48:48 +01:00
+								vioplot::vioplot(duration ~ grp, res)
 								vioplot::vioplot(distance ~ grp, res)
 								vioplot::vioplot(scaleSize ~ grp, res)
 								vioplot::vioplot(rotationDegree ~ grp, res)
-												Script dump after trying out a hundred things; needs serious cleaning

											
										
										
											2024-01-25 17:21:18 +01:00
+								write.table(res,
 								            file = "results/haum/event_logfiles_pre-corona_with-clusters.csv",
 								            sep = ";",
 								            quote = FALSE,
 								            row.names = FALSE)
-												Went over clustering and helper scripts

											
										
										
											2024-01-30 11:48:48 +01:00
+								# DFGs for clusters
-												Script dump after trying out a hundred things; needs serious cleaning

											
										
										
											2024-01-25 17:21:18 +01:00
+								res$start <- res$date.start
 								res$complete <- res$date.stop
 								for (cluster in sort(unique(res$grp))) {
 								  alog <- activitylog(res[res$grp == cluster, ],
 								                      case_id     = "path",
 								                      activity_id = "event",
 								                      resource_id = "item",
 								                      timestamps  = c("start", "complete"))
 								  dfg <- process_map(alog,
-												Went over clustering and helper scripts

											
										
										
											2024-01-30 11:48:48 +01:00
+								                     type_nodes = frequency("relative", color_scale = "Greys"),
-												Script dump after trying out a hundred things; needs serious cleaning

											
										
										
											2024-01-25 17:21:18 +01:00
+								                     sec_nodes  = frequency("absolute"),
-												Went over clustering and helper scripts

											
										
										
											2024-01-30 11:48:48 +01:00
+								                     type_edges = frequency("relative", color_edges = "#FF6900"),
-												Script dump after trying out a hundred things; needs serious cleaning

											
										
										
											2024-01-25 17:21:18 +01:00
+								                     sec_edges  = frequency("absolute"),
 								                     rankdir    = "LR",
 								                     render     = FALSE)
 								  export_map(dfg,
 								             file_name = paste0("results/processmaps/dfg_cluster", cluster, "_R.pdf"),
 								             file_type = "pdf",
 								             title     = paste("DFG Cluster", cluster))
 								}
 								#--------------- (3) Visualization with pictures ---------------
 								library(png)
 								library(jpeg)
 								library(grid)
-												Re-ran preprocessing and updated files; worked on user navigation behavior -- intermediate step

											
										
										
											2024-02-27 09:08:20 +01:00
+								colors <- c("#78004B", "#FF6900", "#3CB4DC", "#91C86E")
-												Script dump after trying out a hundred things; needs serious cleaning

											
										
										
											2024-01-25 17:21:18 +01:00
-												Re-ran preprocessing and updated files; worked on user navigation behavior -- intermediate step

											
										
										
											2024-02-27 09:08:20 +01:00
+								pdf("results/figures/clustering_artworks.pdf", height = 8, width = 8, pointsize = 10)
 								#png("results/figures/clustering_artworks.png", units = "in", height = 8, width = 8, pointsize = 10, res = 300)
-												Script dump after trying out a hundred things; needs serious cleaning

											
										
										
											2024-01-25 17:21:18 +01:00
-												Re-ran preprocessing and updated files; worked on user navigation behavior -- intermediate step

											
										
										
											2024-02-27 09:08:20 +01:00
+								par(mai = c(.4,.4,.1,.1), mgp = c(2.4, 1, 0))
-												Script dump after trying out a hundred things; needs serious cleaning

											
										
										
											2024-01-25 17:21:18 +01:00
-												Re-ran preprocessing and updated files; worked on user navigation behavior -- intermediate step

											
										
										
											2024-02-27 09:08:20 +01:00
+								plot(y ~ x, p$data, type = "n", ylim = c(-3.5, 2.8), xlim = c(-5, 10),
 								     xlab = "", ylab = "")
-												Script dump after trying out a hundred things; needs serious cleaning

											
										
										
											2024-01-25 17:21:18 +01:00
 								for (item in sprintf("%03d", as.numeric(rownames(p$data)))) {
 								  if (item == "125") {
 								    pic <- readJPEG(paste0("../data/haum/ContentEyevisit/eyevisit_cards_light/",
 								                          item, "/", item, ".jpg"))
 								  } else {
 								    pic <- readPNG(paste0("../data/haum/ContentEyevisit/eyevisit_cards_light/",
 								                          item, "/", item, ".png"))
 								  }
 								  img <- as.raster(pic[,,1:3])
 								  x <- p$data$x[sprintf("%03d", as.numeric(rownames(p$data))) == item]
 								  y <- p$data$y[sprintf("%03d", as.numeric(rownames(p$data))) == item]
 								  points(x, y,
 								         col = colors[p$data$cluster[sprintf("%03d", as.numeric(rownames(p$data))) == item]],
 								         cex = 9,
 								         pch = 15)
 								  rasterImage(img,
-												Re-ran preprocessing and updated files; worked on user navigation behavior -- intermediate step

											
										
										
											2024-02-27 09:08:20 +01:00
+								              xleft = x - .45,
 								              xright = x + .45,
-												Script dump after trying out a hundred things; needs serious cleaning

											
										
										
											2024-01-25 17:21:18 +01:00
+								              ybottom = y - .2,
 								              ytop = y + .2)
 								}
-												Re-ran preprocessing and updated files; worked on user navigation behavior -- intermediate step

											
										
										
											2024-02-27 09:08:20 +01:00
+								legend("topright", paste("Cluster", 1:k), col = colors, pch = 15, bty = "n")
-												Script dump after trying out a hundred things; needs serious cleaning

											
										
										
											2024-01-25 17:21:18 +01:00
 								dev.off()