diff --git a/code/01_preprocessing_haum.R b/code/01_preprocessing_haum.R index a840b44..8c31476 100644 --- a/code/01_preprocessing_haum.R +++ b/code/01_preprocessing_haum.R @@ -1,3 +1,19 @@ +# 01_preprocessing_haum.R +# +# content: (1) Parse raw log files +# (2) Create event logs +# (3) Add meta data +# +# input: raw log files from ../data/haum/*.log +# ../data/metadata/feiertage.csv +# ../data/metadata/schulferien_2016-2018_NI.csv +# ../data/metadata/schulferien_2019-2025_NI.csv +# output: raw_logfiles_.csv +# event_logfiles_.csv +# event_logfiles_.csv +# +# last mod: 2023-10-23, NW + # setwd("C:/Users/nwickelmaier/Nextcloud/Documents/MDS/2023ss/60100_master_thesis/code") #library(mtt) @@ -6,20 +22,19 @@ devtools::load_all("../../../../software/mtt") now <- format(Sys.time(), "%Y-%m-%d_%H-%M-%S") #now <- "2023-09-23_01-31-30" -#--------------- Parse raw log files --------------- +#--------------- (1) Parse raw log files --------------- path <- "../data/haum/LogFiles/" folders <- dir(path) -#datraw <- parse_logfiles(folders, path) -datraw <- read.table("../data/haum/raw_logfiles_small_2023-09-26_13-50-20.csv", - sep = ";", header = TRUE) +datraw <- parse_logfiles(folders, path) -# export data -# write.table(datraw, paste0("../data/haum/raw_logfiles_", now, ".csv"), -# sep = ";", row.names = FALSE) +## Export data -#--------------- Create event logs --------------- +write.table(datraw, paste0("../data/haum/raw_logfiles_", now, ".csv"), + sep = ";", row.names = FALSE) + +#--------------- (2) Create event logs --------------- datlogs <- create_eventlogs(datraw, xmlpath = "../data/haum/ContentEyevisit/eyevisit_cards_light/") @@ -30,12 +45,7 @@ topics <- extract_topics(artworks, xmlfiles = paste0(artworks, ".xml"), datlogs_topics <- add_topic(datlogs, topics = topics) - -# export data -# write.table(datlogs_topics, paste0("../data/haum/event_logfiles_", now, ".csv"), -# sep = ";", row.names = FALSE) - -#--------------- Add meta data --------------- +#--------------- (3) Add meta data --------------- ## Read data for holiday @@ -83,21 +93,15 @@ for (i in seq_len(nrow(sf))) { # TODO: How to handle stateCode? There will be several for certain types of # data sets... Not important here, since I only do NI. -## Add metadata +## Merge data -# holidays dat1 <- merge(datlogs_topics, hd, by.x = "date.start", by.y = "date", all.x = TRUE) -# school vacations dat2 <- merge(dat1, sfdat, by.x = "date.start", by.y = "date", all.x = TRUE) ## Export data -write.table(dat2, - file = paste0("../data/haum/event_logfiles_small_metadata_", now, ".csv"), +write.table(dat2, paste0("../data/haum/event_logfiles_", now, ".csv"), sep = ";", row.names = FALSE) # TODO: Maybe add infos about artworks? - - - diff --git a/code/02_metadata_haum.R b/code/02_metadata_haum.R deleted file mode 100644 index 5e1f730..0000000 --- a/code/02_metadata_haum.R +++ /dev/null @@ -1,72 +0,0 @@ -# setwd("C:/Users/nwickelmaier/Nextcloud/Documents/MDS/2023ss/60100_master_thesis/") - -## Read data for holiday - -hd0 <- read.table("data/metadata/feiertage.csv", sep = ";", header = TRUE) -hd0$X.br. <- NULL - -hd <- hd0[hd0$Abkuerzung == "NI", ] -names(hd) <- c("state", "stateCode", "date", "holiday") -hd$date <- as.POSIXct(hd$date) - -## Read data for school vacations - -# https://ferien-api.de/#holidaysPerStateAndYear -# Data extracted (on Linux) via: -# curl https://ferien-api.de/api/v1/holidays/NI > schulferien_NI.json - -# library(jsonlite) -# -# dat <- read_json("data/metadata/schulferien_NI.json", simplify = TRUE) -# dat$slug <- NULL -# -# dat$name <- paste0(gsub("^(.*).niedersachsen.*", "\\1", dat$name), -# gsub("^.*niedersachsen [0-9]{4}(.*)", "\\1", -# dat$name)) -# -# write.table(dat, "data/metadata/schulferien_2019-2025_NI.csv", sep = ";", -# row.names = FALSE, quote = FALSE) - -sf1 <- read.table("data/metadata/schulferien_2016-2018_NI.csv", sep = ";", - header = TRUE) -sf2 <- read.table("data/metadata/schulferien_2019-2025_NI.csv", sep = ";", - header = TRUE) -sf <- rbind(sf1, sf2) -sf$start <- as.Date(sf$start) -sf$end <- as.Date(sf$end) - -sfdat <- NULL - -for (i in seq_len(nrow(sf))) { - date <- seq(sf$start[i], sf$end[i], by = 1) - sfdat <- rbind(sfdat, data.frame(date, vacations = sf$name[i], - stateCodeVacations = sf$stateCode[i])) -} - -# TODO: How to handle stateCode? There will be several for certain types of -# data sets... Not important here, since I only do NI. - -# load (small) event log data set -#dat <- read.table("data/haum/event_logfiles_2023-09-23_01-31-30.csv", -dat <- read.table("data/haum/event_logfiles_small_2023-10-19_18-25-26.csv", - sep = ";", header = TRUE) -dat$date.start <- as.POSIXct(dat$date.start) -dat$date.stop <- as.POSIXct(dat$date.stop) -dat$artwork <- sprintf("%03d", dat$artwork) -dat$date <- as.Date(dat$date.start) - -## Add metadata - -# holidays -dat1 <- merge(dat, hd, by = "date", all.x = TRUE) -# school vacations -dat2 <- merge(dat1, sfdat, by = "date", all.x = TRUE) - -## Export data - -write.table(dat2, - file = "data/haum/event_logfiles_small_metadata_2023-10-19_18-25-26.csv", - sep = ";", row.names = FALSE) - -# TODO: Maybe add infos about artworks? - diff --git a/code/read_trans_matrix.R b/code/read_trans_matrix.R index fa09b83..aa67370 100644 --- a/code/read_trans_matrix.R +++ b/code/read_trans_matrix.R @@ -33,5 +33,5 @@ points(y.stop ~ x.stop, datmove, xlim = c(0, 3840), ylim = c(0, 2160), col = "bl points(ty ~ tx, dat, xlim = c(0, 3840), ylim = c(0, 2160), col = "red") legend("topleft", c("start", "stop", "trans"), col = c("black", "blue", "red"), pch = 21) -# --> does not seem to be a good way to compare if trans values ae valid +# --> does not seem to be a good way to compare *if* trans values are valid