Removed code for merge and join; deleted some TODOs

This commit is contained in:
Nora Wickelmaier 2023-08-31 16:03:21 +02:00
parent 2c4f48531a
commit aec52e7683

View File

@ -186,7 +186,6 @@ for (art in aws) { # select artwork
head(dat2[, c("artwork", "event", "trace")], 50) head(dat2[, c("artwork", "event", "trace")], 50)
tail(dat2[, c("artwork", "event", "trace")], 50) tail(dat2[, c("artwork", "event", "trace")], 50)
# TODO: How to handle popups from glossar???
rm(aws, i, j, last_event, art) rm(aws, i, j, last_event, art)
@ -246,9 +245,6 @@ table(is.na(dat2[dat2$glossar == 1, "trace"]))
# How many glossar_files are only associated with one artwork? # How many glossar_files are only associated with one artwork?
lut[sapply(lut$artwork, length) == 1, "glossar_file"] lut[sapply(lut$artwork, length) == 1, "glossar_file"]
# TODO: Fill in the ones that are associated with one artwork
# --> Can't come up with something -- maybe ask AK???
single <- lut[sapply(lut$artwork, length) == 1, "glossar_file"] single <- lut[sapply(lut$artwork, length) == 1, "glossar_file"]
tmp <- subset(dat2, is.na(dat2$trace))$popup tmp <- subset(dat2, is.na(dat2$trace))$popup
inside <- unique(tmp[tmp %in% lut[sapply(lut$artwork, length) == 1, "glossar_file"]]) inside <- unique(tmp[tmp %in% lut[sapply(lut$artwork, length) == 1, "glossar_file"]])
@ -284,8 +280,6 @@ dat2[14110:14130, ]
# TODO: Integrate for loop into for loop above # TODO: Integrate for loop into for loop above
# TODO: "glossar" entry should be changed to the corresponding artwork
# TODO: For now: Exclude not matched glossar entries # TODO: For now: Exclude not matched glossar entries
df <- subset(dat2, !is.na(dat2$trace)) df <- subset(dat2, !is.na(dat2$trace))
@ -434,38 +428,8 @@ dat_openPopup <- openPopup_wide[, c("fileid.start", "fileid.stop", "event",
rm(num_start, openPopup_wide) rm(num_start, openPopup_wide)
# Merge all
# system.time({
# dat_all <- merge(dat_trans, dat_flipCard, all = TRUE)
# dat_all <- merge(dat_all, dat_openTopic, all = TRUE)
# dat_all <- merge(dat_all, dat_openPopup, all = TRUE)
# })
#
# # check
# nrow(dat_all) == (nrow(dat_trans) + nrow(dat_flipCard) +
# nrow(dat_openTopic) + nrow(dat_openPopup))
#
# dat_all <- dat_all[order(dat_all$date.start), ]
# rownames(dat_all) <- NULL
#
# TODO: from here on NA... WHY??
# dat_all[19426:19435, ]
# TODO: Should card maybe also be filled in for "openPopup"? # TODO: Should card maybe also be filled in for "openPopup"?
# dat_all2 <- dplyr::full_join(dat_trans, dat_flipCard)
# dat_all2 <- dplyr::full_join(dat_all, dat_openTopic)
# dat_all2 <- dplyr::full_join(dat_all, dat_openPopup)
#
# nrow(dat_all2) == (nrow(dat_trans) + nrow(dat_flipCard) +
# nrow(dat_openTopic) + nrow(dat_openPopup))
#
# dat_all2 <- dat_all2[order(dat_all2$date.start), ]
# rownames(dat_all2) <- NULL
# TODO: --> same result - but faster. Need it?
# --> Would hate to depend on dplyr...
#' ## Use `rbind()` instead... #' ## Use `rbind()` instead...
# --> unbeatable in terms of time! # --> unbeatable in terms of time!