47 lines
1.4 KiB
R
47 lines
1.4 KiB
R
#' Creating data frame with artworks and topics
|
|
#'
|
|
#' Topics are extracted from XML files and written to a data frame that
|
|
#' shows which artworks belong to which topics.
|
|
#'
|
|
#' @param artworks A character vector with names of the artworks. Needs to
|
|
#' correspond to the folder names which contain the XML files.
|
|
#' @param xmlfiles Vector of names of index files, often something like
|
|
#' `<artwork>.xml`. Need to be in the same order as artworks!
|
|
#' @param xmlpath Path to folder where XML definitions of artworks live.
|
|
#' @return Data frame.
|
|
#' @export
|
|
#' @examples
|
|
#' # tbd
|
|
extract_topics <- function(artworks, xmlfiles, xmlpath) {
|
|
|
|
out <- NULL
|
|
i <- 1
|
|
|
|
for (artwork in artworks) {
|
|
|
|
index_file <- paste0(xmlpath, artwork, "/", xmlfiles[i])
|
|
suppressWarnings(
|
|
fnames <- gsub("^<card src=.*/(.*)./>$", "\\1",
|
|
grep("^<card src=", trimws(readLines(index_file)),
|
|
value = TRUE))
|
|
)
|
|
|
|
topic <- NULL
|
|
for (fname in fnames) {
|
|
suppressWarnings(
|
|
topic <- c(topic, gsub("^<card type=.(.*).>$", "\\1",
|
|
grep("^<card type=",
|
|
trimws(readLines(paste(xmlpath, artwork, fname, sep = "/"))),
|
|
value = TRUE)))
|
|
)
|
|
}
|
|
out <- rbind(out, data.frame(artwork, file_name = fnames, topic))
|
|
i <- i + 1
|
|
}
|
|
|
|
out <- out[order(out$artwork), ]
|
|
rownames(out) <- NULL
|
|
out
|
|
}
|
|
|