From a130ee596d35a4060a967cdb0b70e85542809d91 Mon Sep 17 00:00:00 2001 From: nwickel Date: Fri, 22 Sep 2023 16:16:20 +0200 Subject: [PATCH] Updated analysis files for haum and 8o8m --- code/02_preprocessing.R | 108 ------------------ code/02_preprocessing_8o8m.R | 22 ++++ code/02_preprocessing_haum.R | 23 ++++ code/03_modeling.R | 5 +- code/overview_artworks_8o8m.R | 50 ++++++++ ...ew_artworks.R => overview_artworks_haum.R} | 2 +- 6 files changed, 99 insertions(+), 111 deletions(-) delete mode 100644 code/02_preprocessing.R create mode 100644 code/02_preprocessing_8o8m.R create mode 100644 code/02_preprocessing_haum.R create mode 100644 code/overview_artworks_8o8m.R rename code/{overview_artworks.R => overview_artworks_haum.R} (95%) diff --git a/code/02_preprocessing.R b/code/02_preprocessing.R deleted file mode 100644 index 2c67729..0000000 --- a/code/02_preprocessing.R +++ /dev/null @@ -1,108 +0,0 @@ -# TODO: This script is obsolete and needs to be updated! - -# setwd("C:/Users/nwickelmaier/Nextcloud/Documents/MDS/2023ss/60100_master_thesis/code") - -#source("functions.R") -devtools::load_all("../../../../software/mtt") - -small <- TRUE - -now <- Sys.time() - -# Read data ############################################################## -cat("########## Reading in data... ##########", "\n") - -if (small) { - dat0 <- read.table("../data/haum/rawdata_logfiles_small.csv", sep = ";", - header = TRUE) -} else { - dat0 <- read.table("../data/haum/rawdata_logfiles.csv", sep = ";", - header = TRUE) -} -dat0$date <- as.POSIXct(dat0$date) -dat0$glossar <- ifelse(dat0$artwork == "glossar", 1, 0) - -# Remove irrelevant events -dat <- subset(dat0, !(dat0$event %in% c("Start Application", - "Show Application"))) - -save(dat, file = paste0("tmp/dat_", ifelse(small, "small_", "full_"), - format(now, "%Y-%m-%d_%H-%M-%S"), ".RData")) - -# Add trace variable ##################################################### -cat("########## Adding trace variable... ##########", "\n") -dat1 <- add_trace(dat) - -save(dat1, file = paste("tmp/dat1", ifelse(small, "small_", "full_"), - format(now, "%Y-%m-%d_%H-%M-%S"), ".RData")) - -# Close events -cat("########## Closing events... ##########", "\n") -c1 <- close_events(dat1, "move") -cat("## --> move events closed.", "\n") -c2 <- close_events(dat1, "flipCard") -cat("## --> flipCard events closed.", "\n") -c3 <- close_events(dat1, "openTopic") -cat("## --> openTopic events closed.", "\n") -c4 <- close_events(dat1, "openPopup") -cat("## --> openPopup events closed.", "\n") -dat2 <- rbind(c1, c2, c3, c4) - -dat2 <- dat2[order(dat2$date.start, dat2$fileId.start), ] - -# Remove durations when event spans more than one log file, since they are -# not interpretable -#dat2[which(dat2$fileId.start != dat2$fileId.stop), "duration"] <- NA - -# Remove all events that do not have a `date.start` -dat2 <- dat2[!is.na(dat2$date.start), ] -rownames(dat2) <- NULL - -save(dat2, file = paste("tmp/dat2", ifelse(small, "small_", "full_"), - format(now, "%Y-%m-%d_%H-%M-%S"), ".RData")) - -# Add case variable ###################################################### -cat("########## Adding case and eventId variables... ##########", "\n") -dat3 <- add_case(dat2) - -# Add event ID ########################################################### -dat3$eventId <- seq_len(nrow(dat3)) -dat3 <- dat3[, c("fileId.start", "fileId.stop", "eventId", "case", - "trace", "glossar", "event", "artwork", - "date.start", "date.stop", "timeMs.start", - "timeMs.stop", "duration", "topicNumber", "popup", - "x.start", "y.start", "x.stop", "y.stop", - "distance", "scale.start", "scale.stop", - "scaleSize", "rotation.start", "rotation.stop", - "rotationDegree")] - -save(dat3, file = paste("tmp/dat3", ifelse(small, "small_", "full_"), - format(now, "%Y-%m-%d_%H-%M-%S"), ".RData")) - -# Add trace for move events ############################################## -cat("\n########## Adding trace variable for move events... ##########", "\n") -dat4 <- add_trace_moves(dat3) - -save(dat4, file = paste("tmp/dat4", ifelse(small, "small_", "full_"), - format(now, "%Y-%m-%d_%H-%M-%S"), ".RData")) - -# Add topics: file names and topics ###################################### -cat("########## Adding information about topics... ##########", "\n") -artworks <- unique(dat4$artwork) -# remove artworks without XML information -artworks <- artworks[!artworks %in% c("504", "505")] -topics <- extract_topics(artworks, pattern = paste0(artworks, ".xml"), - path = "../data/haum/ContentEyevisit/eyevisit_cards_light/") - -dat5 <- add_topic(dat4, topics = topics) - -save(dat5, file = paste("tmp/dat5", ifelse(small, "small_", "full_"), - format(now, "%Y-%m-%d_%H-%M-%S"), ".RData")) - -# TODO: Replace artwork with informative strings - -# Export data ############################################################ -cat("########## Exporting data frame with event logs... ##########", "\n") -write.table(dat5, "../data/haum/event_logfiles.csv", sep = ";", - row.names = FALSE) - diff --git a/code/02_preprocessing_8o8m.R b/code/02_preprocessing_8o8m.R new file mode 100644 index 0000000..434a761 --- /dev/null +++ b/code/02_preprocessing_8o8m.R @@ -0,0 +1,22 @@ +# setwd("C:/Users/nwickelmaier/Nextcloud/Documents/MDS/2023ss/60100_master_thesis/code") + +#library(mtt) +devtools::load_all("../../../../software/mtt") + +now <- format(Sys.time(), , "%Y-%m-%d_%H-%M-%S") + +folders <- dir("../data/8o8m/LogFiles/") +#folders <- "Berlin" + +# parse raw log files +datraw <- parse_logfiles(folders, path = "../data/8o8m/LogFiles/") +artworks <- unique(na.omit(datraw$artwork)) + +# convert to log events +datlogs <- create_eventlogs(datraw, xmlfiles = paste0(artworks, "_de.xml"), + xmlpath = "../data/8o8m/Content8o8m/") + +# export data +write.table(datlogs, paste0("../data/8o8m/event_logfiles_", now, ".csv"), + sep = ";", row.names = FALSE) + diff --git a/code/02_preprocessing_haum.R b/code/02_preprocessing_haum.R new file mode 100644 index 0000000..397680b --- /dev/null +++ b/code/02_preprocessing_haum.R @@ -0,0 +1,23 @@ +# setwd("C:/Users/nwickelmaier/Nextcloud/Documents/MDS/2023ss/60100_master_thesis/code") + +#library(mtt) +devtools::load_all("../../../../software/mtt") + +now <- format(Sys.time(), , "%Y-%m-%d_%H-%M-%S") + +path <- "../data/haum/LogFiles/" + +folders <- dir(path) + +# parse raw log files +datraw <- parse_logfiles(folders, path) +artworks <- unique(na.omit(datraw$artwork)) + +# convert to log events +datlogs <- create_eventlogs(datraw, xmlfiles = paste0(artworks, "_de.xml"), + xmlpath = "../data/haum/ContentEyevisit/eyevisit_cards_light/") + +# export data +write.table(datlogs, paste0("../data/haum/event_logfiles_", now, ".csv"), + sep = ";", row.names = FALSE) + diff --git a/code/03_modeling.R b/code/03_modeling.R index 6093c71..1a4e0f0 100644 --- a/code/03_modeling.R +++ b/code/03_modeling.R @@ -16,7 +16,7 @@ #' # Read data -dat <- read.table("../data/haum/event_logfiles.csv", sep = ";", header = TRUE) +dat <- read.table("../data/haum/event_logfiles_small.csv", sep = ";", header = TRUE) dat$date.start <- as.POSIXct(dat$date.start) dat$date.stop <- as.POSIXct(dat$date.stop) @@ -24,7 +24,8 @@ dat$date.stop <- as.POSIXct(dat$date.stop) library(bupaverse) -names(dat)[6:7] <- c("start", "complete") +names(dat)[names(dat) %in% c("date.start", "date.stop")] <- c("start", + "complete") table(table(dat$start)) # --> hmm... diff --git a/code/overview_artworks_8o8m.R b/code/overview_artworks_8o8m.R new file mode 100644 index 0000000..ebe1d0c --- /dev/null +++ b/code/overview_artworks_8o8m.R @@ -0,0 +1,50 @@ +# setwd("C:/Users/nwickelmaier/Nextcloud/Documents/MDS/2023ss/60100_master_thesis/code") + +# TODO: Write an Rmd file this way and render at the end? Then put it in a +# function, so everybody can do it??? +devtools::load_all("../../../../software/mtt") + +fname <- "../overview_artworks_8o8m.tex" +if (file.exists(fname)) file.remove(fname) + +path <- "C:/Users/nwickelmaier/Nextcloud/Documents/MDS/2023ss/60100_master_thesis/data/8o8m/Content8o8m" +artworks <- dir(path = path) + +dat <- extract_artworks(artworks, file = paste0(artworks, "_de.xml"), path = path) + +fout <- file(fname, "a") # open in append mode + +writeLines("\\documentclass[a4paper,12pt]{article}", fout) +writeLines("\\usepackage{graphicx}", fout) +writeLines("\\usepackage[margin = 2cm]{geometry}", fout) +writeLines("\\author{Nora Wickelmaier}", fout) +writeLines("\\title{Overview of Artworks from 8 Objects 8 Museums (8o8m)}", fout) +writeLines("\\date{\\today}", fout) +writeLines("\\begin{document}", fout) +writeLines("\\maketitle", fout) +#writeLines("\\newpage", fout) + +for (artwork in dat$artwork) { + + writeLines(paste0("\\section*{Artwork Number ", artwork, "}"), fout) + writeLines("", fout) + writeLines("\\noindent", fout) + writeLines(paste0("Artist: ", dat[dat$artwork == artwork, "artist"]), fout) + writeLines("", fout) + writeLines("\\noindent", fout) + writeLines(paste0("Title: ", dat[dat$artwork == artwork, "title"]), fout) + writeLines("", fout) + + writeLines("\\begin{center}", fout) + art_path <- paste(path, artwork, artwork, sep = "/") + writeLines(paste0("\\includegraphics[width = 12cm]{", art_path, "}"), fout) + writeLines("\\end{center}", fout) + writeLines("", fout) + writeLines("\\noindent", fout) + writeLines(paste0("Info: ", dat[dat$artwork == artwork, "misc"]), fout) + writeLines("\\newpage", fout) +} + +writeLines("\\end{document}", fout) +close(fout) + diff --git a/code/overview_artworks.R b/code/overview_artworks_haum.R similarity index 95% rename from code/overview_artworks.R rename to code/overview_artworks_haum.R index f203547..1fd34b1 100644 --- a/code/overview_artworks.R +++ b/code/overview_artworks_haum.R @@ -9,7 +9,7 @@ path <- "C:/Users/nwickelmaier/Nextcloud/Documents/MDS/2023ss/60100_master_thesi artworks <- dir(path = path) artworks <- artworks[artworks != "glossar"] -dat <- extract_artworks(artworks, path = path) +dat <- extract_artworks(artworks, files = paste0(artworks, ".xml"), path = path) fout <- file(fname, "a") # open in append mode