59 lines
1.9 KiB
R
59 lines
1.9 KiB
R
#' ---
|
|
#' title: "Open Questions -- Card indices"
|
|
#' author: "Nora Wickelmaier"
|
|
#' date: "`r Sys.Date()`"
|
|
#' output:
|
|
#' html_document:
|
|
#' number_sections: true
|
|
#' toc: true
|
|
#' ---
|
|
|
|
#+ include = FALSE
|
|
# setwd("C:/Users/nwickelmaier/Nextcloud/Documents/MDS/2023ss/60100_master_thesis/code")
|
|
dat <- read.table("../data/event_logfiles.csv", sep = ";", header = TRUE)
|
|
dat$date.start <- as.POSIXct(dat$date.start)
|
|
dat$date.stop <- as.POSIXct(dat$date.stop)
|
|
dat$artwork <- sprintf("%03d", dat$artwork)
|
|
|
|
#' The following table shows an overview of the card indices. The indices
|
|
#' should have values between 0 and 5. It is unclear what the numbers mean.
|
|
|
|
table(dat$card)
|
|
|
|
#' Number of cards for each artwork in the data set (subset from 2016)
|
|
|
|
artworks <- sort(unique(dat$artwork))
|
|
|
|
count <- function(x) length(table(dat[which(dat$artwork == x), "card"]))
|
|
max_index <- function(x) max(dat[which(dat$artwork == x), "card"], na.rm = TRUE)
|
|
num_cards <- sapply(artworks, count)
|
|
highest_index <- sapply(artworks, max_index)
|
|
|
|
#' Check how many XML-files for cards are present
|
|
|
|
path <- "../data/ContentEyevisit/eyevisit_cards_light"
|
|
|
|
num_files <- NULL
|
|
for (artwork in artworks) {
|
|
fnames <- dir(pattern = paste0(artwork, "_"), path = paste(path, artwork, sep = "/"))
|
|
num_files <- c(num_files, length(fnames))
|
|
}
|
|
|
|
#' The table shows that each artwork has 6 cards the most (as expected).
|
|
#' This is a subset of the data, so not all cards have been opened.
|
|
|
|
cards <- data.frame(artwork = artworks, num_cards, highest_index,
|
|
num_files, diff = num_files - highest_index)
|
|
cards
|
|
|
|
#' There are more than 8 files for a couple of artworks:
|
|
|
|
subset(cards, cards$num_files >= 8)
|
|
|
|
#' It might be possible, that the number indicates the index of the file
|
|
#' and not the actual card that was displayed. BUT: In many cases, there
|
|
#' are only 6 (or less) files, but a higher index is present...
|
|
|
|
subset(cards, cards$diff < 0)
|
|
|