# setwd("C:/Users/nwickelmaier/Nextcloud/Documents/MDS/2023ss/60100_master_thesis/code") #library(mtt) devtools::load_all("../../../../software/mtt") now <- format(Sys.time(), "%Y-%m-%d_%H-%M-%S") folders <- dir("../data/8o8m/LogFiles/") #folders <- "Berlin" # parse raw log files datraw <- parse_logfiles(folders, path = "../data/8o8m/LogFiles/") #artworks <- unique(na.omit(datraw$artwork)) # export data write.table(datraw, paste0("results/8o8m/raw_logfiles_", now, ".csv"), sep = ";", row.names = FALSE) #datraw[is.na(datraw$artwork), ] datraw <- datraw[!is.na(datraw$artwork), ] # TODO: Why is this happening? # convert to log events datlogs <- create_eventlogs(datraw, xmlpath = "../data/8o8m/Content8o8m/") artworks <- unique(datlogs$artwork) topics <- extract_topics(artworks, xmlfiles = paste0(artworks, "_en.xml"), xmlpath = "../data/8o8m/Content8o8m/") # TODO: What is wrong with the German XML files that the topics are # extracted like this? (It works fine for the English versions...) # export data write.table(datlogs, paste0("results/8o8m/event_logfiles_", now, ".csv"), sep = ";", row.names = FALSE)