mtt_haum/code/02_glossar_artworks.R

38 lines
1.1 KiB
R

# setwd("C:/Users/nwickelmaier/Nextcloud/Documents/MDS/2023ss/60100_master_thesis/data/ContentEyevisit/eyevisit_cards_light")
dat0 <- read.table("../../rawdata_logfiles.csv", sep = ";",
header = TRUE)
# artwork names
artworks <- unique(na.omit(dat0$artwork))[unique(na.omit(dat0$artwork)) != "glossar"]
dat <- subset(dat0, dat0$artwork == "glossar")
glossar_files <- unique(dat$popup)
x <- NULL
for (glossar_file in glossar_files) {
for (artwork in artworks) {
fnames <- dir(pattern = paste0(artwork, "_"), path = artwork)
for (fname in fnames) {
lines <- readLines(paste0(artwork, "/", fname))
if (any(grepl(glossar_file, lines))) {
x <- rbind(x, data.frame(glossar_file, artwork))
break
}
}
}
}
head(x, 20)
glossar_dict <- as.data.frame(tapply(x$artwork, x$glossar_file, FUN = c))
names(glossar_dict) <- "artwork"
glossar_dict$glossar_file <- rownames(glossar_dict)
rownames(glossar_dict) <- NULL
glossar_dict <- glossar_dict[, c("glossar_file", "artwork")]
save(glossar_dict, file = "../../glossar_dict.RData")
# TODO: Save in interoperable format