From e416974906c819ebb64d70aaa4c332dc698e58bc Mon Sep 17 00:00:00 2001 From: nwickel Date: Thu, 14 Sep 2023 12:29:04 +0200 Subject: [PATCH] Fixed some stuff in artwork extraction --- code/functions.R | 5 ++++- code/overview_artworks.R | 4 ++-- 2 files changed, 6 insertions(+), 3 deletions(-) diff --git a/code/functions.R b/code/functions.R index e2972f8..857adec 100644 --- a/code/functions.R +++ b/code/functions.R @@ -377,7 +377,6 @@ add_topic <- function(data, topics) { extract_artworks <- function(artworks, files = paste0(artworks, ".xml"), path = path) { - out <- NULL i <- 1 @@ -397,6 +396,10 @@ extract_artworks <- function(artworks, files = paste0(artworks, ".xml"), names(xmllist) <- varnames xmllist[which(sapply(xmllist, is.null))] <- NA } + # remove ugly quotes + xmllist <- lapply(xmllist, function(x) gsub("„|“", "", x)) + # remove HTML tags + xmllist <- lapply(xmllist, function(x) gsub("
", " ", x)) xmldat <- as.data.frame(xmllist) xmldat$artwork <- artwork # trim white space from strings diff --git a/code/overview_artworks.R b/code/overview_artworks.R index 7a5c380..f203547 100644 --- a/code/overview_artworks.R +++ b/code/overview_artworks.R @@ -2,7 +2,7 @@ source("functions.R") -fname <- "overview_artworks.tex" +fname <- "../haum/overview_artworks.tex" if (file.exists(fname)) file.remove(fname) path <- "C:/Users/nwickelmaier/Nextcloud/Documents/MDS/2023ss/60100_master_thesis/data/ContentEyevisit/eyevisit_cards_light" @@ -40,7 +40,7 @@ for (artwork in dat$artwork) { writeLines("\\end{center}", fout) writeLines("", fout) writeLines("\\noindent", fout) - #writeLines(paste0("Info: ", dat[dat$artwork == artwork, "misc"]), fout) + writeLines(paste0("Info: ", dat[dat$artwork == artwork, "misc"]), fout) writeLines("\\newpage", fout) }