Fixed some stuff in artwork extraction

This commit is contained in:
Nora Wickelmaier 2023-09-14 12:29:04 +02:00
parent 4d783e1f64
commit e416974906
2 changed files with 6 additions and 3 deletions

View File

@ -377,7 +377,6 @@ add_topic <- function(data, topics) {
extract_artworks <- function(artworks, files = paste0(artworks, ".xml"), extract_artworks <- function(artworks, files = paste0(artworks, ".xml"),
path = path) { path = path) {
out <- NULL out <- NULL
i <- 1 i <- 1
@ -397,6 +396,10 @@ extract_artworks <- function(artworks, files = paste0(artworks, ".xml"),
names(xmllist) <- varnames names(xmllist) <- varnames
xmllist[which(sapply(xmllist, is.null))] <- NA xmllist[which(sapply(xmllist, is.null))] <- NA
} }
# remove ugly quotes
xmllist <- lapply(xmllist, function(x) gsub("„|“", "", x))
# remove HTML tags
xmllist <- lapply(xmllist, function(x) gsub("<br/>", " ", x))
xmldat <- as.data.frame(xmllist) xmldat <- as.data.frame(xmllist)
xmldat$artwork <- artwork xmldat$artwork <- artwork
# trim white space from strings # trim white space from strings

View File

@ -2,7 +2,7 @@
source("functions.R") source("functions.R")
fname <- "overview_artworks.tex" fname <- "../haum/overview_artworks.tex"
if (file.exists(fname)) file.remove(fname) if (file.exists(fname)) file.remove(fname)
path <- "C:/Users/nwickelmaier/Nextcloud/Documents/MDS/2023ss/60100_master_thesis/data/ContentEyevisit/eyevisit_cards_light" path <- "C:/Users/nwickelmaier/Nextcloud/Documents/MDS/2023ss/60100_master_thesis/data/ContentEyevisit/eyevisit_cards_light"
@ -40,7 +40,7 @@ for (artwork in dat$artwork) {
writeLines("\\end{center}", fout) writeLines("\\end{center}", fout)
writeLines("", fout) writeLines("", fout)
writeLines("\\noindent", fout) writeLines("\\noindent", fout)
#writeLines(paste0("Info: ", dat[dat$artwork == artwork, "misc"]), fout) writeLines(paste0("Info: ", dat[dat$artwork == artwork, "misc"]), fout)
writeLines("\\newpage", fout) writeLines("\\newpage", fout)
} }