43 lines
1.4 KiB
R
43 lines
1.4 KiB
R
# setwd("C:/Users/nwickelmaier/Nextcloud/Documents/MDS/2023ss/60100_master_thesis/data/ContentEyevisit/eyevisit_cards_light")
|
|
rm(list=ls())
|
|
|
|
dat0 <- read.table("../../event_logfiles.csv", sep = ";", header = TRUE)
|
|
dat0$artwork <- sprintf("%03d", dat0$artwork)
|
|
|
|
# artwork names
|
|
artworks <- sort(unique(dat0$artwork))
|
|
|
|
# create data frame with file names and topics for each artwork
|
|
|
|
dat <- NULL
|
|
file_order <- NULL
|
|
|
|
for (artwork in artworks) {
|
|
fnames <- dir(pattern = paste0(artwork, "_"), path = artwork, full.names = TRUE)
|
|
topic <- NULL
|
|
for (fname in fnames) {
|
|
topic <- c(topic, gsub("^<card type=.(.*).>$", "\\1",
|
|
grep("^<card type=", trimws(readLines(fname)), value = T)))
|
|
|
|
}
|
|
index <- paste(artwork, "index.xml", sep = "/")
|
|
file_order <- c(file_order, gsub("^<card src=.(.*)./>$", "\\1",
|
|
grep("^<card src=", trimws(readLines(index)), value = TRUE)))
|
|
in_index <- fnames %in% file_order
|
|
dat <- rbind(dat, data.frame(artwork, file_name = fnames, in_index, topic))
|
|
}
|
|
|
|
table(dat$artwork)
|
|
table(dat$topic)
|
|
|
|
# take only the ones that are actually displayed and sort in the same order
|
|
# as indicated in index.html
|
|
|
|
dat2 <- dat[dat$in_index, -3]
|
|
dat2 <- dat2[order(file_order, dat2$file_name), ]
|
|
|
|
dat2$index <- unlist(sapply(table(dat2$artwork), seq_len))
|
|
|
|
write.table(dat2, file = "../../topics.csv", sep = ";", row.names = FALSE)
|
|
|