mtt/R/add_path.R

226 lines
7.3 KiB
R

###########################################################################
add_path_items <- function(subdata) {
pbapply::pboptions(style = 3, char = "=")
subdata_glossar <- subdata[subdata$item == "glossar", ]
subdata_glossar$path <- NA
items <- unique(subdata$item)[unique(subdata$item) != "glossar"]
path_per_item <- function(subdata, item) {
subsubdata <- subdata[subdata$item == item, ]
last_event <- subsubdata$event[1]
subsubdata$path <- NA
for (i in 1:nrow(subsubdata)) {
if (last_event == "Show Info") {
subsubdata$path[i] <- i
j <- i
status <- "path_started"
} else if (last_event == "Show Front") {
subsubdata$path[i] <- j
status <- "path_ended"
} else if (!(last_event %in% c("Show Info", "Show Front")) &
status == "path_started") {
subsubdata$path[i] <- j
}
if (i <= nrow(subsubdata)) {
last_event <- subsubdata$event[i + 1]
}
}
subsubdata$path <- ifelse(is.na(subsubdata$path), NA,
paste0(item, "_", subsubdata$path))
subsubdata
}
subdata_list <- pbapply::pblapply(items, path_per_item, subdata = subdata)
subdata <- dplyr::bind_rows(subdata_list)
# Remove popup events that occur after item has been closed
subdata <- subdata[!is.na(subdata$path), ]
# Add glossar events back in
subdata <- rbind(subdata, subdata_glossar)
# Bring back in time order
subdata <- subdata[order(subdata$fileId, subdata$date), ]
# Make path a consecutive number
subdata$path <- as.numeric(factor(subdata$path, levels = unique(subdata$path)))
rownames(subdata) <- NULL
subdata
}
###########################################################################
add_path_glossar <- function(subdata, xmlpath) {
# TODO: I think this needs to be completely redone
pb <- utils::txtProgressBar(min = 0, max = nrow(subdata), initial = NA,
style = 3)
cat("\n\n########## Creating glossar dictionary ##########", "\n")
items <- unique(subdata$item[subdata$item != "glossar"])
glossar_files <- unique(subdata[subdata$item == "glossar", "popup"])
lut <- create_glossardict(items, glossar_files, xmlpath = xmlpath)
inside <- glossar_files[glossar_files %in%
names(lut[sapply(lut, length) == 1])]
single_art <- unlist(lut[names(lut) %in% inside])
m <- 1
for (file in names(lut)) {
cat("\n\nAdding path variable for glossar entry", file,
paste0("(", m, "/", length(lut), ")"), "\n")
item_list <- unlist(lut[names(lut) == file])
for (i in seq_len(nrow(subdata))) {
if (subdata$event[i] == "Show Info" |
(subdata$event[i] == "Artwork/OpenCard" &
subdata$item[i] %in% single_art)) {
current_item <- subdata[i, "item"]
j <- i
k <- i
} else {
current_item <- current_item
}
if (subdata$event[i] == "Show Front" & subdata$item[i] == current_item) {
# make sure item has not been closed, yet!
k <- i
}
if (subdata$item[i] == "glossar" &
(current_item %in% item_list) &
subdata$popup[i] == file & (j - k == 0)) {
subdata[i, "path"] <- subdata[j, "path"]
subdata[i, "item"] <- current_item
}
utils::setTxtProgressBar(pb, i)
}
m <- m + 1
}
# Exclude not matched glossar entries
cat("\n\nINFORMATION: glossar entries that are not matched will be removed:",
sum(is.na(subdata[subdata$glossar == 1, "path"])), "entries",
fill = TRUE)
subset(subdata, !is.na(subdata$path))
}
###########################################################################
add_path <- function(data, xmlpath, glossar) {
data$path <- NA
subdata1 <- data[data$event %in% c("Transform start", "Transform stop"), ]
subdata2 <- data[!data$event %in% c("Transform start", "Transform stop"), ]
subdata2 <- add_path_items(subdata2)
if (glossar) {
subdata2 <- add_path_glossar(subdata2, xmlpath)
} else {
subdata2 <- subdata2[subdata2$glossar != 1, ]
}
out <- rbind(subdata1, subdata2)
out <- out[order(out$fileId, out$date, out$timeMs), ]
out
}
###########################################################################
# Add path for moves
add_path_moves <- function(data, cutoff) {
pbapply::pboptions(style = 3, char = "=")
subdata_item <- split(data, ~ item)
subdata_path <- pbapply::pblapply(subdata_item,
add_path_subdata, cutoff = cutoff)
out <- dplyr::bind_rows(subdata_path)
out <- out[order(out$fileId.start, out$date.start, out$timeMs.start), ]
# Make path a consecutive number
out$path <- as.numeric(factor(out$path, levels = unique(out$path)))
rownames(out) <- NULL
out
}
add_path_subdata <- function(subdata, cutoff) {
index_flipCard <- which(subdata$event == "flipCard")
current_item <- unique(subdata$item)
for (j in seq_along(index_flipCard)) {
# forwards pass
if (j < max(seq_along(index_flipCard))) {
for (i in seq(index_flipCard[j], index_flipCard[j + 1])) {
if (subdata$event[i] == "move" & !is.na(subdata$date.stop[index_flipCard[j]])) {
timediff <- difftime(subdata$date.start[i],
subdata$date.stop[index_flipCard[j]],
units = "secs")
if (timediff <= cutoff){
subdata$path[i] <- subdata$path[index_flipCard[j]]
} else {
subdata$path[i] <- paste(current_item, "mv", j, sep = "_")
}
}
}
} else {
for (i in seq(index_flipCard[j], nrow(subdata))) {
if (subdata$event[i] == "move" & (!is.na(subdata$date.stop[index_flipCard[j]]))) {
timediff <- difftime(subdata$date.start[i],
subdata$date.stop[index_flipCard[j]],
units = "secs")
if (timediff <= cutoff) {
subdata$path[i] <- subdata$path[index_flipCard[j]]
} else {
subdata$path[i] <- paste(current_item, "mv", j, sep = "_")
}
}
}
}
# backwards pass
if (j > min(seq_along(index_flipCard))) {
for (i in seq(index_flipCard[j - 1], index_flipCard[j])) {
if (grepl("mv", subdata$path[i])) {
timediff <- difftime(subdata$date.start[index_flipCard[j]],
subdata$date.stop[i],
units = "secs")
if (timediff <= cutoff){
subdata$path[i] <- subdata$path[index_flipCard[j]]
} else {
subdata$path[i] <- paste(current_item, "mv", j, sep = "_")
}
}
}
}
}
# fix moves with same path and timediff > cutoff
subdata_moves <- split(subdata, ~ path)
check_moves <- function(subsubdata, cutoff) {
if (any(grepl("mv", subsubdata$path))) {
for (i in seq_len(nrow(subsubdata) - 1)) {
timediff <- difftime(subsubdata$date.start[i + 1], subsubdata$date.stop[i],
units = "secs")
if (timediff > cutoff) {
subsubdata$path[i + 1] <- paste(subsubdata$path[i], i, "new", sep = "_")
} else {
subsubdata$path[i + 1] <- subsubdata$path[i]
}
}
}
subsubdata
}
subdata_path <- lapply(subdata_moves, check_moves, cutoff = cutoff)
subdata <- dplyr::bind_rows(subdata_path)
subdata
}