Wrote function to extract information about artworks from XML files

This commit is contained in:
Nora Wickelmaier 2023-09-13 18:34:12 +02:00
parent 11cfa6d7aa
commit 4d783e1f64
2 changed files with 87 additions and 0 deletions

View File

@ -371,4 +371,42 @@ add_topic <- function(data, topics) {
out
}
###########################################################################
# Create data frame with information on artworks
extract_artworks <- function(artworks, files = paste0(artworks, ".xml"),
path = path) {
out <- NULL
i <- 1
for (artwork in artworks) {
if (length(files) == 1) {
index_file <- files
} else {
index_file <- files[i]
}
index <- paste(path, artwork, index_file, sep = "/")
varnames <- c("artist", "title", "misc", "description")
xmllist <- XML::xmlToList(index)$header[varnames]
if (any(sapply(xmllist, is.null))) {# necessary for missing entries
names(xmllist) <- varnames
xmllist[which(sapply(xmllist, is.null))] <- NA
}
xmldat <- as.data.frame(xmllist)
xmldat$artwork <- artwork
# trim white space from strings
xmldat$artist <- trimws(xmldat$artist)
xmldat$title <- trimws(xmldat$title)
xmldat$misc <- trimws(xmldat$misc)
xmldat$description <- trimws(xmldat$description)
out <- rbind(out, xmldat)
i <- i + 1
}
out
}

49
code/overview_artworks.R Normal file
View File

@ -0,0 +1,49 @@
# setwd("C:/Users/nwickelmaier/Nextcloud/Documents/MDS/2023ss/60100_master_thesis/code")
source("functions.R")
fname <- "overview_artworks.tex"
if (file.exists(fname)) file.remove(fname)
path <- "C:/Users/nwickelmaier/Nextcloud/Documents/MDS/2023ss/60100_master_thesis/data/ContentEyevisit/eyevisit_cards_light"
artworks <- dir(path = path)
artworks <- artworks[artworks != "glossar"]
dat <- extract_artworks(artworks, path = path)
fout <- file(fname, "a") # open in append mode
writeLines("\\documentclass[a4paper,12pt]{article}", fout)
writeLines("\\usepackage{graphicx}", fout)
writeLines("\\usepackage[margin = 2cm]{geometry}", fout)
writeLines("\\author{Nora Wickelmaier}", fout)
writeLines("\\title{Overview of Artworks from MTT in HAUM}", fout)
writeLines("\\date{\\today}", fout)
writeLines("\\begin{document}", fout)
writeLines("\\maketitle", fout)
#writeLines("\\newpage", fout)
for (artwork in dat$artwork) {
writeLines(paste0("\\section*{Artwork Number ", artwork, "}"), fout)
writeLines("", fout)
writeLines("\\noindent", fout)
writeLines(paste0("Artist: ", dat[dat$artwork == artwork, "artist"]), fout)
writeLines("", fout)
writeLines("\\noindent", fout)
writeLines(paste0("Title: ", dat[dat$artwork == artwork, "title"]), fout)
writeLines("", fout)
writeLines("\\begin{center}", fout)
art_path <- paste(path, artwork, artwork, sep = "/")
writeLines(paste0("\\includegraphics[width = 12cm]{", art_path, "}"), fout)
writeLines("\\end{center}", fout)
writeLines("", fout)
writeLines("\\noindent", fout)
#writeLines(paste0("Info: ", dat[dat$artwork == artwork, "misc"]), fout)
writeLines("\\newpage", fout)
}
writeLines("\\end{document}", fout)
close(fout)