188 lines
8.1 KiB
R
188 lines
8.1 KiB
R
# 09_user-navigation.R
|
|
#
|
|
# content: (1) Read data
|
|
# (2) Extract characteristics for cases
|
|
# (3) Select features for navigation behavior
|
|
# (4) Export data frames
|
|
#
|
|
# input: results/haum/event_logfiles_2024-02-21_16-07-33.csv
|
|
# output: results/haum/eventlogs_pre-corona_case-clusters.csv
|
|
#
|
|
# last mod: 2024-03-08
|
|
|
|
# setwd("C:/Users/nwickelmaier/Nextcloud/Documents/MDS/2023ss/60100_master_thesis/analysis/code")
|
|
|
|
source("R_helpers.R")
|
|
|
|
#--------------- (1) Read data ---------------
|
|
|
|
load("results/haum/eventlogs_pre-corona_cleaned.RData")
|
|
|
|
# Select one year to handle number of cases
|
|
dat <- dat[as.Date(dat$date.start) > "2018-12-31" &
|
|
as.Date(dat$date.start) < "2020-01-01", ]
|
|
|
|
#--------------- (2) Extract characteristics for cases ---------------
|
|
|
|
datcase <- aggregate(cbind(distance, scaleSize, rotationDegree) ~
|
|
case, dat, function(x) mean(x, na.rm = TRUE), na.action = NULL)
|
|
|
|
datcase$length <- aggregate(item ~ case, dat, length)$item
|
|
|
|
eventtab <- aggregate(event ~ case, dat, table)["case"]
|
|
eventtab$nmove <- aggregate(event ~ case, dat, table)$event[, "move"]
|
|
eventtab$nflipCard <- aggregate(event ~ case, dat, table)$event[, "flipCard"]
|
|
eventtab$nopenTopic <- aggregate(event ~ case, dat, table)$event[, "openTopic"]
|
|
eventtab$nopenPopup <- aggregate(event ~ case, dat, table)$event[, "openPopup"]
|
|
|
|
topictab <- aggregate(topic ~ case, dat, table)["case"]
|
|
topictab$artist <- aggregate(topic ~ case, dat, table)$topic[, 1]
|
|
topictab$details <- aggregate(topic ~ case, dat, table)$topic[, 2]
|
|
topictab$extra_info <- aggregate(topic ~ case, dat, table)$topic[, 3]
|
|
topictab$komposition <- aggregate(topic ~ case, dat, table)$topic[, 4]
|
|
topictab$leben_des_kunstwerks <- aggregate(topic ~ case, dat, table)$topic[, 5]
|
|
topictab$licht_und_farbe <- aggregate(topic ~ case, dat, table)$topic[, 6]
|
|
topictab$technik <- aggregate(topic ~ case, dat, table)$topic[, 7]
|
|
topictab$thema <- aggregate(topic ~ case, dat, table)$topic[, 8]
|
|
|
|
datcase <- datcase |>
|
|
merge(eventtab, by = "case", all = TRUE) |>
|
|
merge(topictab, by = "case", all = TRUE)
|
|
|
|
rm(eventtab, topictab)
|
|
|
|
datcase$ntopiccards <- aggregate(topic ~ case, dat,
|
|
function(x) ifelse(all(is.na(x)), NA,
|
|
length(na.omit(x))), na.action =
|
|
NULL)$topic
|
|
datcase$ntopics <- aggregate(topic ~ case, dat,
|
|
function(x) ifelse(all(is.na(x)), NA,
|
|
length(unique(na.omit(x)))), na.action =
|
|
NULL)$topic
|
|
datcase$nitems <- aggregate(item ~ case, dat, function(x)
|
|
length(unique(x)), na.action = NULL)$item
|
|
datcase$npaths <- aggregate(path ~ case, dat, function(x)
|
|
length(unique(x)), na.action = NULL)$path
|
|
datcase$vacation <- aggregate(vacation ~ case, dat,
|
|
function(x) ifelse(all(is.na(x)), 0, 1),
|
|
na.action = NULL)$vacation
|
|
datcase$holiday <- aggregate(holiday ~ case, dat,
|
|
function(x) ifelse(all(is.na(x)), 0, 1),
|
|
na.action = NULL)$holiday
|
|
datcase$weekend <- aggregate(weekdays ~ case, dat,
|
|
function(x) ifelse(any(x %in% c("Saturday", "Sunday")), 1, 0),
|
|
na.action = NULL)$weekdays
|
|
datcase$morning <- aggregate(date.start ~ case, dat,
|
|
function(x) ifelse(lubridate::hour(x[1]) > 13, 0, 1),
|
|
na.action = NULL)$date.start
|
|
|
|
dat_split <- split(dat, ~ case)
|
|
dat_list <- pbapply::pblapply(dat_split, time_minmax_ms)
|
|
dat_minmax <- dplyr::bind_rows(dat_list)
|
|
|
|
datcase$min_time <- aggregate(min_time ~ case, dat_minmax, unique)$min_time
|
|
datcase$max_time <- aggregate(max_time ~ case, dat_minmax, unique)$max_time
|
|
|
|
datcase$duration <- datcase$max_time - datcase$min_time
|
|
datcase$min_time <- NULL
|
|
datcase$max_time <- NULL
|
|
|
|
artworks <- unique(dat$item)[!unique(dat$item) %in% c("501", "502", "503")]
|
|
datcase$infocardOnly <- pbapply::pbsapply(dat_split, check_infocards, artworks = artworks)
|
|
|
|
# Clean up NAs
|
|
datcase$distance <- ifelse(is.na(datcase$distance), 0, datcase$distance)
|
|
datcase$scaleSize <- ifelse(is.na(datcase$scaleSize), 1, datcase$scaleSize)
|
|
datcase$rotationDegree <- ifelse(is.na(datcase$rotationDegree), 0, datcase$rotationDegree)
|
|
datcase$artist <- ifelse(is.na(datcase$artist), 0, datcase$artist)
|
|
datcase$details <- ifelse(is.na(datcase$details), 0, datcase$details)
|
|
datcase$extra_info <- ifelse(is.na(datcase$extra_info), 0, datcase$extra_info)
|
|
datcase$komposition <- ifelse(is.na(datcase$komposition), 0, datcase$komposition)
|
|
datcase$leben_des_kunstwerks <- ifelse(is.na(datcase$leben_des_kunstwerks), 0, datcase$leben_des_kunstwerks)
|
|
datcase$licht_und_farbe <- ifelse(is.na(datcase$licht_und_farbe), 0, datcase$licht_und_farbe)
|
|
datcase$technik <- ifelse(is.na(datcase$technik), 0, datcase$technik)
|
|
datcase$thema <- ifelse(is.na(datcase$thema), 0, datcase$thema)
|
|
datcase$ntopics <- ifelse(is.na(datcase$ntopics), 0, datcase$ntopics)
|
|
datcase$ntopiccards <- ifelse(is.na(datcase$ntopiccards), 0, datcase$ntopiccards)
|
|
|
|
#--------------- (3) Select features for navigation behavior ---------------
|
|
|
|
# Features for navigation types for MTT:
|
|
# - Scanning / Overviewing:
|
|
# * Proportion of artworks looked at is high
|
|
# * Duration per artwork is low: "ave_duration_item" / datcase$duration
|
|
# - Exploring:
|
|
# * Looking at additional information is high
|
|
# - Searching / Studying:
|
|
# * Proportion of artworks looked at is low
|
|
# * Opening few cards
|
|
# datcase$nflipCard / mean(datcase$nflipCard) or median(datcase$nflipCard) is low
|
|
# * but for most cards popups are opened:
|
|
# datcase$nopenPopup / datcase$nflipCard is high
|
|
# - Wandering / Flitting:
|
|
# * Proportion of moves is high
|
|
# * Duration per case is low:
|
|
# datcase$duration / mean(datcase$duration) or median(datcase$duration)
|
|
# * Duration per artwork is low: "ave_duration_item" / datcase$duration
|
|
|
|
dattree <- data.frame(case = datcase$case,
|
|
PropItems = datcase$nitems / length(unique(dat$item)),
|
|
SearchInfo = (datcase$nopenTopic +
|
|
datcase$nopenPopup) / datcase$length,
|
|
PropMoves = datcase$nmove / datcase$length,
|
|
PathLinearity = datcase$nitems / datcase$npaths,
|
|
Singularity = datcase$npaths / datcase$length
|
|
)
|
|
|
|
# centrality <- pbapply::pbsapply(dattree$case, get_centrality, data = dat)
|
|
# save(centrality, file = "results/haum/tmp_centrality.RData")
|
|
load("results/haum/tmp_centrality.RData")
|
|
|
|
dattree$BetweenCentrality <- centrality
|
|
|
|
# Average duration per item
|
|
dat_split <- split(dat[, c("item", "case", "path", "timeMs.start", "timeMs.stop")], ~ path)
|
|
dat_list <- pbapply::pblapply(dat_split, time_minmax_ms)
|
|
dat_minmax <- dplyr::bind_rows(dat_list)
|
|
|
|
tmp <- aggregate(min_time ~ path, dat_minmax, unique)
|
|
tmp$max_time <- aggregate(max_time ~ path, dat_minmax, unique, na.action = NULL)$max_time
|
|
tmp$duration <- tmp$max_time - tmp$min_time
|
|
tmp$case <- aggregate(case ~ path, dat_minmax, unique)$case
|
|
|
|
dattree$AvDurItem <- aggregate(duration ~ case, tmp, mean)$duration
|
|
|
|
rm(tmp)
|
|
|
|
# Indicator variable if table was used as info terminal only
|
|
dattree$InfocardOnly <- factor(datcase$infocardOnly, levels = 0:1,
|
|
labels = c("no", "yes"))
|
|
|
|
# Add pattern to datcase; loosely based on Bousbia et al. (2009)
|
|
dattree$Pattern <- "Dispersion"
|
|
dattree$Pattern <- ifelse(dattree$PathLinearity > 0.8, "Scholar",
|
|
dattree$Pattern)
|
|
dattree$Pattern <- ifelse(dattree$PathLinearity <= 0.8 &
|
|
dattree$BetweenCentrality >= 0.5, "Star",
|
|
dattree$Pattern)
|
|
dattree$Pattern <- factor(dattree$Pattern)
|
|
|
|
dattree$AvDurItemNorm <- normalize(dattree$AvDurItem)
|
|
|
|
#--------------- (4) Export data frames ---------------
|
|
|
|
save(dat, datcase, dattree, file = "results/haum/dataframes_case_2019.RData")
|
|
|
|
write.table(datcase,
|
|
file = "results/haum/datcase.csv",
|
|
sep = ";",
|
|
quote = FALSE,
|
|
row.names = FALSE)
|
|
|
|
write.table(datcase,
|
|
file = "results/haum/dattree.csv",
|
|
sep = ";",
|
|
quote = FALSE,
|
|
row.names = FALSE)
|
|
|