Removed variable stateCode from final data set

This commit is contained in:
Nora Wickelmaier 2024-01-02 15:42:48 +01:00
parent 16fa9c4f2c
commit 34f15da955
1 changed files with 9 additions and 16 deletions

View File

@ -10,9 +10,8 @@
# ../data/metadata/schulferien_2019-2025_NI.csv
# output: raw_logfiles_<timestamp>.csv
# event_logfiles_<timestamp>.csv
# event_logfiles_<timestamp>.csv
#
# last mod: 2023-10-23, NW
# last mod: 2024-01-01, NW
# setwd("C:/Users/nwickelmaier/Nextcloud/Documents/MDS/2023ss/60100_master_thesis/code")
@ -42,15 +41,15 @@ write.table(datraw, paste0("results/haum/raw_logfiles_", now, ".csv"),
datlogs <- create_eventlogs(datraw,
#xmlpath = "../data/haum/ContentEyevisit/eyevisit_cards_light/",
glossar = FALSE, save = FALSE)
glossar = FALSE)
# 6,064 glossar entries, that could not be matched, have been removed
# 2,136,715 no change move events have been removed
items <- unique(datlogs$item)
topics <- extract_topics(items, xmlfiles = paste0(items, ".xml"),
xmlpath = "../data/haum/ContentEyevisit/eyevisit_cards_light/")
# items <- unique(datlogs$item)
# topics <- extract_topics(items, xmlfiles = paste0(items, ".xml"),
# xmlpath = "../data/haum/ContentEyevisit/eyevisit_cards_light/")
# Indices for topics:
# 0 artist
@ -80,6 +79,7 @@ hd <- hd0[hd0$Abkuerzung == "NI", ]
names(hd) <- c("state", "stateCode", "date", "holiday")
hd$date <- as.POSIXct(hd$date)
hd$state <- NULL
hd$stateCode <- NULL
## Read data for school vacations
@ -111,12 +111,11 @@ sfdat <- NULL
for (i in seq_len(nrow(sf))) {
date <- seq(sf$start[i], sf$end[i], by = 1)
sfdat <- rbind(sfdat, data.frame(date, vacations = sf$name[i],
sfdat <- rbind(sfdat, data.frame(date, vacation = sf$name[i],
stateCode = sf$stateCode[i]))
}
# TODO: How to handle stateCode? There will be several for certain types of
# data sets... Not important here, since I only do NI.
sfdat$stateCode <- NULL
## Merge data
@ -124,11 +123,7 @@ datlogs$date <- as.Date(datlogs$date.start)
dat1 <- merge(datlogs, hd, by.x = "date", by.y = "date", all.x = TRUE)
dat2 <- merge(dat1, sfdat, by.x = "date", by.y = "date", all.x = TRUE)
dat2$stateCode <- dat2$stateCode.y
dat2$stateCode <- ifelse(!is.na(dat2$stateCode.x),
dat2$stateCode.x, dat2$stateCode.y)
dat2$stateCode.x <- NULL
dat2$stateCode.y <- NULL
dat2$date <- NULL
## Export data
@ -136,5 +131,3 @@ dat2$date <- NULL
write.table(dat2, paste0("results/haum/event_logfiles_", now, ".csv"),
sep = ";", row.names = FALSE)
# TODO: Maybe add infos about artworks?