mtt_haum/code/01_preprocessing_haum.R

39 lines
1.2 KiB
R

# setwd("C:/Users/nwickelmaier/Nextcloud/Documents/MDS/2023ss/60100_master_thesis/code")
#library(mtt)
devtools::load_all("../../../../software/mtt")
now <- format(Sys.time(), "%Y-%m-%d_%H-%M-%S")
#now <- "2023-09-23_01-31-30"
path <- "../data/haum/LogFiles/"
folders <- dir(path)
# parse raw log files
datraw <- parse_logfiles(folders, path)
# export data
write.table(datraw, paste0("../data/haum/raw_logfiles_", now, ".csv"),
sep = ";", row.names = FALSE)
#save(datraw, file = paste0("../data/haum/datraw_", now, ".RData"))
#load("../data/haum/datraw_2023-09-23_01-31-30.RData")
artworks <- unique(na.omit(datraw$artwork))
# convert to log events
datlogs <- create_eventlogs(datraw,
xmlpath = "../data/haum/ContentEyevisit/eyevisit_cards_light/")
artworks <- unique(na.omit(datlogs$artwork))
artworks <- artworks[!artworks %in% c("504", "505")]
topics <- extract_topics(artworks, xmlfiles = paste0(artworks, ".xml"),
xmlpath = "../data/haum/ContentEyevisit/eyevisit_cards_light/")
datlogs_topics <- add_topic(datlogs, topics = topics)
# export data
write.table(datlogs_topics, paste0("../data/haum/event_logfiles_", now, ".csv"),
sep = ";", row.names = FALSE)