From aec52e7683412ba86d3a525cb2b0ae2a0400a5e6 Mon Sep 17 00:00:00 2001 From: nwickel Date: Thu, 31 Aug 2023 16:03:21 +0200 Subject: [PATCH] Removed code for merge and join; deleted some TODOs --- code/02_preprocessing.R | 36 ------------------------------------ 1 file changed, 36 deletions(-) diff --git a/code/02_preprocessing.R b/code/02_preprocessing.R index a6438b6..e66394d 100644 --- a/code/02_preprocessing.R +++ b/code/02_preprocessing.R @@ -186,7 +186,6 @@ for (art in aws) { # select artwork head(dat2[, c("artwork", "event", "trace")], 50) tail(dat2[, c("artwork", "event", "trace")], 50) -# TODO: How to handle popups from glossar??? rm(aws, i, j, last_event, art) @@ -246,9 +245,6 @@ table(is.na(dat2[dat2$glossar == 1, "trace"])) # How many glossar_files are only associated with one artwork? lut[sapply(lut$artwork, length) == 1, "glossar_file"] -# TODO: Fill in the ones that are associated with one artwork -# --> Can't come up with something -- maybe ask AK??? - single <- lut[sapply(lut$artwork, length) == 1, "glossar_file"] tmp <- subset(dat2, is.na(dat2$trace))$popup inside <- unique(tmp[tmp %in% lut[sapply(lut$artwork, length) == 1, "glossar_file"]]) @@ -284,8 +280,6 @@ dat2[14110:14130, ] # TODO: Integrate for loop into for loop above -# TODO: "glossar" entry should be changed to the corresponding artwork - # TODO: For now: Exclude not matched glossar entries df <- subset(dat2, !is.na(dat2$trace)) @@ -434,38 +428,8 @@ dat_openPopup <- openPopup_wide[, c("fileid.start", "fileid.stop", "event", rm(num_start, openPopup_wide) -# Merge all -# system.time({ -# dat_all <- merge(dat_trans, dat_flipCard, all = TRUE) -# dat_all <- merge(dat_all, dat_openTopic, all = TRUE) -# dat_all <- merge(dat_all, dat_openPopup, all = TRUE) -# }) -# -# # check -# nrow(dat_all) == (nrow(dat_trans) + nrow(dat_flipCard) + -# nrow(dat_openTopic) + nrow(dat_openPopup)) -# -# dat_all <- dat_all[order(dat_all$date.start), ] -# rownames(dat_all) <- NULL -# - -# TODO: from here on NA... WHY?? -# dat_all[19426:19435, ] - # TODO: Should card maybe also be filled in for "openPopup"? -# dat_all2 <- dplyr::full_join(dat_trans, dat_flipCard) -# dat_all2 <- dplyr::full_join(dat_all, dat_openTopic) -# dat_all2 <- dplyr::full_join(dat_all, dat_openPopup) -# -# nrow(dat_all2) == (nrow(dat_trans) + nrow(dat_flipCard) + -# nrow(dat_openTopic) + nrow(dat_openPopup)) -# -# dat_all2 <- dat_all2[order(dat_all2$date.start), ] -# rownames(dat_all2) <- NULL -# TODO: --> same result - but faster. Need it? -# --> Would hate to depend on dplyr... - #' ## Use `rbind()` instead... # --> unbeatable in terms of time!