diff --git a/code/02_glossar_artworks.R b/code/02_glossar_artworks.R new file mode 100644 index 0000000..f50e2ab --- /dev/null +++ b/code/02_glossar_artworks.R @@ -0,0 +1,37 @@ +# setwd("C:/Users/nwickelmaier/Nextcloud/Documents/MDS/2023ss/60100_master_thesis/data/ContentEyevisit/eyevisit_cards_light") + +dat0 <- read.table("../../rawdata_logfiles.csv", sep = ";", + header = TRUE) +# artwork names +artworks <- unique(na.omit(dat0$artwork))[unique(na.omit(dat0$artwork)) != "glossar"] + +dat <- subset(dat0, dat0$artwork == "glossar") + +glossar_files <- unique(dat$popup) + +x <- NULL + +for (glossar_file in glossar_files) { + for (artwork in artworks) { + fnames <- dir(pattern = paste0(artwork, "_"), path = artwork) + for (fname in fnames) { + lines <- readLines(paste0(artwork, "/", fname)) + if (any(grepl(glossar_file, lines))) { + x <- rbind(x, data.frame(glossar_file, artwork)) + break + } + } + } +} + +head(x, 20) + +glossar_dict <- as.data.frame(tapply(x$artwork, x$glossar_file, FUN = c)) +names(glossar_dict) <- "artwork" +glossar_dict$glossar_file <- rownames(glossar_dict) +rownames(glossar_dict) <- NULL +glossar_dict <- glossar_dict[, c("glossar_file", "artwork")] + +save(glossar_dict, file = "../../glossar_artwork.RData") +# TODO: Save in interoperable format +