Changed how path variable for move events is created

This commit is contained in:
Nora Wickelmaier 2024-01-16 18:51:13 +01:00
parent 8bedadd18e
commit 8233b151d9
2 changed files with 93 additions and 58 deletions

View File

@ -1,5 +1,8 @@
###########################################################################
add_path_items <- function(subdata) {
pbapply::pboptions(style = 3, char = "=")
subdata_glossar <- subdata[subdata$item == "glossar", ]
subdata_glossar$path <- NA
@ -52,6 +55,7 @@ add_path_items <- function(subdata) {
###########################################################################
add_path_glossar <- function(subdata, xmlpath) {
# TODO: I think this needs to be completely redone
pb <- utils::txtProgressBar(min = 0, max = nrow(subdata), initial = NA,
style = 3)
@ -129,62 +133,93 @@ add_path <- function(data, xmlpath, glossar) {
# Add path for moves
add_path_moves <- function(data) {
add_path_moves <- function(data, cutoff) {
pbapply::pboptions(style = 3, char = "=")
path_max <- max(data$path, na.rm = TRUE)
subdata_item <- split(data, ~ item)
#subdata_art <- split(data, ~ item)
subdata_case <- split(data, ~ case)
#subdata_list <- split(data, ~ item + case)
# --> does not work with complete data set
cat("Splitting data...", "\n")
subdata_list <- pbapply::pblapply(subdata_case, split, f = ~item)
subdata_list <- unlist(subdata_list, recursive = FALSE)
cat("Adding path...", "\n")
subdata_path <- pbapply::pblapply(subdata_list,
function(x) {
path_max <<- path_max + 1
add_path_subdata(x, max_path = path_max)
}
)
subdata_path <- pbapply::pblapply(subdata_item,
add_path_subdata, cutoff = cutoff)
out <- dplyr::bind_rows(subdata_path)
out <- out[order(out$fileId.start, out$date.start, out$timeMs.start), ]
# Make path a consecutive number
out$path <- as.numeric(factor(out$path, levels = unique(out$path)))
rownames(out) <- NULL
out
}
add_path_subdata <- function(subdata, max_path) {
if (nrow(subdata) != 0) {
if (length(stats::na.omit(unique(subdata$path))) == 1) {
subdata[subdata$event == "move", "path"] <- stats::na.omit(unique(subdata$path))
} else if (length(stats::na.omit(unique(subdata$path))) > 1) {
for (i in 1:nrow(subdata)) {
if (subdata$event[i] == "move") {
if (i == 1) {
subdata$path[i] <- stats::na.omit(unique(subdata$path))[1]
add_path_subdata <- function(subdata, cutoff) {
index_flipCard <- which(subdata$event == "flipCard")
current_item <- unique(subdata$item)
for (j in seq_along(index_flipCard)) {
# forwards pass
if (j < max(seq_along(index_flipCard))) {
for (i in seq(index_flipCard[j], index_flipCard[j + 1])) {
if (subdata$event[i] == "move" & !is.na(subdata$date.stop[index_flipCard[j]])) {
timediff <- difftime(subdata$date.start[i],
subdata$date.stop[index_flipCard[j]],
units = "secs")
if (timediff <= cutoff){
subdata$path[i] <- subdata$path[index_flipCard[j]]
} else {
subdata$path[i] <- subdata$path[i - 1]
subdata$path[i] <- paste(current_item, "mv", j, sep = "_")
}
}
}
} else if (all(is.na(subdata$path))) {
for (i in 1:nrow(subdata)) {
subdata$path[i] <- max_path
} else {
for (i in seq(index_flipCard[j], nrow(subdata))) {
if (subdata$event[i] == "move" & (!is.na(subdata$date.stop[index_flipCard[j]]))) {
timediff <- difftime(subdata$date.start[i],
subdata$date.stop[index_flipCard[j]],
units = "secs")
if (timediff <= cutoff) {
subdata$path[i] <- subdata$path[index_flipCard[j]]
} else {
subdata$path[i] <- paste(current_item, "mv", j, sep = "_")
}
}
}
}
# backwards pass
if (j > min(seq_along(index_flipCard))) {
for (i in seq(index_flipCard[j - 1], index_flipCard[j])) {
if (grepl("mv", subdata$path[i])) {
timediff <- difftime(subdata$date.start[index_flipCard[j]],
subdata$date.stop[i],
units = "secs")
if (timediff <= cutoff){
subdata$path[i] <- subdata$path[index_flipCard[j]]
} else {
subdata$path[i] <- paste(current_item, "mv", j, sep = "_")
}
}
}
}
} else {
warning("subdata has nrow = 0")
}
# fix moves with same path and timediff > cutoff
subdata_moves <- split(subdata, ~ path)
check_moves <- function(subsubdata, cutoff) {
if (any(grepl("mv", subsubdata$path))) {
for (i in seq_len(nrow(subsubdata) - 1)) {
timediff <- difftime(subsubdata$date.start[i + 1], subsubdata$date.stop[i],
units = "secs")
if (timediff > cutoff) {
subsubdata$path[i + 1] <- paste(subsubdata$path[i], i, "new", sep = "_")
} else {
subsubdata$path[i + 1] <- subsubdata$path[i]
}
}
}
subsubdata
}
subdata_path <- lapply(subdata_moves, check_moves, cutoff = cutoff)
subdata <- dplyr::bind_rows(subdata_path)
subdata
}

View File

@ -54,10 +54,14 @@ create_eventlogs <- function(data, xmlpath = NULL, case_cutoff = 20,
dat2 <- dat2[order(dat2$fileId.start, dat2$date.start, dat2$timeMs.start), ]
# Add path for move events ##############################################
cat("\n\n########## Adding path variable for move events... ##########", "\n")
dat3 <- add_path_moves(dat2)
# Add case variable ######################################################
cat("\n########## Adding case and eventId variables... ##########", "\n\n")
dat3 <- add_case(dat2, cutoff = case_cutoff)
dat3 <- dat3[, c("fileId.start", "fileId.stop", "date.start",
cat("\n########## Adding case variable... ##########", "\n\n")
dat4 <- add_case(dat3, cutoff = case_cutoff)
dat4 <- dat4[, c("fileId.start", "fileId.stop", "date.start",
"date.stop", "folder", "case", "path", "glossar",
"event", "item", "timeMs.start", "timeMs.stop",
"duration", "topic", "popup", "x.start", "y.start",
@ -65,11 +69,6 @@ create_eventlogs <- function(data, xmlpath = NULL, case_cutoff = 20,
"scale.stop", "scaleSize", "rotation.start",
"rotation.stop", "rotationDegree")]
# Add path for move events ##############################################
cat("\n\n########## Adding path variable for move events... ##########", "\n")
dat4 <- add_path_moves(dat3)
# Fix durations that span more than one log file #########################
levels_fId <- sort(unique(c(dat4$fileId.start, dat4$fileId.stop)))
dat4$fIdNum.start <- factor(dat4$fileId.start, levels = levels_fId)
@ -92,20 +91,22 @@ create_eventlogs <- function(data, xmlpath = NULL, case_cutoff = 20,
dat4$fIdDiff <- NULL
# Remove fragmented paths ###############################################
tab <- stats::xtabs( ~ path + event, dat4)
# tab <- stats::xtabs( ~ path + event, dat4)
fragments <- NULL
# fragments <- NULL
for (i in seq_len(nrow(tab))) {
if (tab[i, "openPopup"] != 0 & tab[i, "flipCard"] == 0) {
fragments <- c(fragments, rownames(tab)[i])
} else if (tab[i, "openTopic"] != 0 & tab[i, "flipCard"] == 0) {
fragments <- c(fragments, rownames(tab)[i])
} else if (tab[i, "openPopup"] != 0 & tab[i, "openTopic"] == 0) {
fragments <- c(fragments, rownames(tab)[i])
}
}
dat5 <- dat4[!dat4$path %in% fragments, ]
# for (i in seq_len(nrow(tab))) {
# if (tab[i, "openPopup"] != 0 & tab[i, "flipCard"] == 0) {
# fragments <- c(fragments, rownames(tab)[i])
# } else if (tab[i, "openTopic"] != 0 & tab[i, "flipCard"] == 0) {
# fragments <- c(fragments, rownames(tab)[i])
# } else if (tab[i, "openPopup"] != 0 & tab[i, "openTopic"] == 0) {
# fragments <- c(fragments, rownames(tab)[i])
# }
# }
# dat5 <- dat4[!dat4$path %in% fragments, ]
# TODO: Decide if I want this or not - are all these log errors?
dat5 <- dat4
if (glossar) {
# Check for wrong order of events: flipCard -> openPopup -> openTopic
@ -126,8 +127,7 @@ create_eventlogs <- function(data, xmlpath = NULL, case_cutoff = 20,
dat7$glossar <- NULL
}
if (save) save(dat, dat1, dat2, dat3, dat4, dat5, dat7, file = "../data/tmp_intermediate-df.RData")
if (save) save(dat, dat1, dat2, dat3, dat4, dat5, dat7, file = "results/tmp_intermediate-df.RData")
dat7
}