72 lines
2.2 KiB
R
72 lines
2.2 KiB
R
|
# setwd("C:/Users/nwickelmaier/Nextcloud/Documents/MDS/2023ss/60100_master_thesis/")
|
||
|
|
||
|
## Read data for holiday
|
||
|
|
||
|
hd0 <- read.table("data/metadata/feiertage.csv", sep = ";", header = TRUE)
|
||
|
hd0$X.br. <- NULL
|
||
|
|
||
|
hd <- hd0[hd0$Abkuerzung == "NI", ]
|
||
|
names(hd) <- c("state", "stateCode", "date", "holiday")
|
||
|
hd$date <- as.POSIXct(hd$date)
|
||
|
|
||
|
## Read data for school vacations
|
||
|
|
||
|
# https://ferien-api.de/#holidaysPerStateAndYear
|
||
|
# Data extracted (on Linux) via:
|
||
|
# curl https://ferien-api.de/api/v1/holidays/NI > schulferien_NI.json
|
||
|
|
||
|
# library(jsonlite)
|
||
|
#
|
||
|
# dat <- read_json("data/metadata/schulferien_NI.json", simplify = TRUE)
|
||
|
# dat$slug <- NULL
|
||
|
#
|
||
|
# dat$name <- paste0(gsub("^(.*).niedersachsen.*", "\\1", dat$name),
|
||
|
# gsub("^.*niedersachsen [0-9]{4}(.*)", "\\1",
|
||
|
# dat$name))
|
||
|
#
|
||
|
# write.table(dat, "data/metadata/schulferien_2019-2025_NI.csv", sep = ";",
|
||
|
# row.names = FALSE, quote = FALSE)
|
||
|
|
||
|
sf1 <- read.table("data/metadata/schulferien_2016-2018_NI.csv", sep = ";",
|
||
|
header = TRUE)
|
||
|
sf2 <- read.table("data/metadata/schulferien_2019-2025_NI.csv", sep = ";",
|
||
|
header = TRUE)
|
||
|
sf <- rbind(sf1, sf2)
|
||
|
sf$start <- as.Date(sf$start)
|
||
|
sf$end <- as.Date(sf$end)
|
||
|
|
||
|
sfdat <- NULL
|
||
|
|
||
|
for (i in seq_len(nrow(sf))) {
|
||
|
date <- seq(sf$start[i], sf$end[i], by = 1)
|
||
|
sfdat <- rbind(sfdat, data.frame(date, vacations = sf$name[i],
|
||
|
stateCodeVacations = sf$stateCode[i]))
|
||
|
}
|
||
|
|
||
|
# TODO: How to handle stateCode? There will be several for certain types of
|
||
|
# data sets... Not important here, since I only do NI.
|
||
|
|
||
|
# load (small) event log data set
|
||
|
dat <- read.table("data/haum/event_logfiles_2023-09-23_01-31-30.csv",
|
||
|
sep = ";", header = TRUE)
|
||
|
dat$date.start <- as.POSIXct(dat$date.start)
|
||
|
dat$date.stop <- as.POSIXct(dat$date.stop)
|
||
|
dat$artwork <- sprintf("%03d", dat$artwork)
|
||
|
dat$date <- as.Date(dat$date.start)
|
||
|
|
||
|
## Add metadata
|
||
|
|
||
|
# holidays
|
||
|
dat1 <- merge(dat, hd, by = "date", all.x = TRUE)
|
||
|
# school vacations
|
||
|
dat2 <- merge(dat1, sfdat, by = "date", all.x = TRUE)
|
||
|
|
||
|
## Export data
|
||
|
|
||
|
write.table(dat2,
|
||
|
file = "data/haum/event_logfiles_metadata_2023-09-23_01-31-30.csv",
|
||
|
sep = ";", row.names = FALSE)
|
||
|
|
||
|
# TODO: Maybe add infos about artworks?
|
||
|
|