Changed how path variable for move events is created

This commit is contained in:
Nora Wickelmaier 2024-01-16 18:51:13 +01:00
parent 8bedadd18e
commit 8233b151d9
2 changed files with 93 additions and 58 deletions

View File

@ -1,5 +1,8 @@
########################################################################### ###########################################################################
add_path_items <- function(subdata) { add_path_items <- function(subdata) {
pbapply::pboptions(style = 3, char = "=")
subdata_glossar <- subdata[subdata$item == "glossar", ] subdata_glossar <- subdata[subdata$item == "glossar", ]
subdata_glossar$path <- NA subdata_glossar$path <- NA
@ -52,6 +55,7 @@ add_path_items <- function(subdata) {
########################################################################### ###########################################################################
add_path_glossar <- function(subdata, xmlpath) { add_path_glossar <- function(subdata, xmlpath) {
# TODO: I think this needs to be completely redone
pb <- utils::txtProgressBar(min = 0, max = nrow(subdata), initial = NA, pb <- utils::txtProgressBar(min = 0, max = nrow(subdata), initial = NA,
style = 3) style = 3)
@ -129,62 +133,93 @@ add_path <- function(data, xmlpath, glossar) {
# Add path for moves # Add path for moves
add_path_moves <- function(data) { add_path_moves <- function(data, cutoff) {
pbapply::pboptions(style = 3, char = "=") pbapply::pboptions(style = 3, char = "=")
path_max <- max(data$path, na.rm = TRUE) subdata_item <- split(data, ~ item)
#subdata_art <- split(data, ~ item) subdata_path <- pbapply::pblapply(subdata_item,
subdata_case <- split(data, ~ case) add_path_subdata, cutoff = cutoff)
#subdata_list <- split(data, ~ item + case)
# --> does not work with complete data set
cat("Splitting data...", "\n")
subdata_list <- pbapply::pblapply(subdata_case, split, f = ~item)
subdata_list <- unlist(subdata_list, recursive = FALSE)
cat("Adding path...", "\n")
subdata_path <- pbapply::pblapply(subdata_list,
function(x) {
path_max <<- path_max + 1
add_path_subdata(x, max_path = path_max)
}
)
out <- dplyr::bind_rows(subdata_path) out <- dplyr::bind_rows(subdata_path)
out <- out[order(out$fileId.start, out$date.start, out$timeMs.start), ] out <- out[order(out$fileId.start, out$date.start, out$timeMs.start), ]
# Make path a consecutive number
out$path <- as.numeric(factor(out$path, levels = unique(out$path)))
rownames(out) <- NULL rownames(out) <- NULL
out out
} }
add_path_subdata <- function(subdata, max_path) { add_path_subdata <- function(subdata, cutoff) {
index_flipCard <- which(subdata$event == "flipCard")
if (nrow(subdata) != 0) { current_item <- unique(subdata$item)
for (j in seq_along(index_flipCard)) {
if (length(stats::na.omit(unique(subdata$path))) == 1) { # forwards pass
subdata[subdata$event == "move", "path"] <- stats::na.omit(unique(subdata$path)) if (j < max(seq_along(index_flipCard))) {
} else if (length(stats::na.omit(unique(subdata$path))) > 1) { for (i in seq(index_flipCard[j], index_flipCard[j + 1])) {
for (i in 1:nrow(subdata)) { if (subdata$event[i] == "move" & !is.na(subdata$date.stop[index_flipCard[j]])) {
if (subdata$event[i] == "move") { timediff <- difftime(subdata$date.start[i],
if (i == 1) { subdata$date.stop[index_flipCard[j]],
subdata$path[i] <- stats::na.omit(unique(subdata$path))[1] units = "secs")
if (timediff <= cutoff){
subdata$path[i] <- subdata$path[index_flipCard[j]]
} else { } else {
subdata$path[i] <- subdata$path[i - 1] subdata$path[i] <- paste(current_item, "mv", j, sep = "_")
} }
} }
} }
} else if (all(is.na(subdata$path))) { } else {
for (i in 1:nrow(subdata)) { for (i in seq(index_flipCard[j], nrow(subdata))) {
subdata$path[i] <- max_path if (subdata$event[i] == "move" & (!is.na(subdata$date.stop[index_flipCard[j]]))) {
timediff <- difftime(subdata$date.start[i],
subdata$date.stop[index_flipCard[j]],
units = "secs")
if (timediff <= cutoff) {
subdata$path[i] <- subdata$path[index_flipCard[j]]
} else {
subdata$path[i] <- paste(current_item, "mv", j, sep = "_")
}
}
}
}
# backwards pass
if (j > min(seq_along(index_flipCard))) {
for (i in seq(index_flipCard[j - 1], index_flipCard[j])) {
if (grepl("mv", subdata$path[i])) {
timediff <- difftime(subdata$date.start[index_flipCard[j]],
subdata$date.stop[i],
units = "secs")
if (timediff <= cutoff){
subdata$path[i] <- subdata$path[index_flipCard[j]]
} else {
subdata$path[i] <- paste(current_item, "mv", j, sep = "_")
}
}
} }
} }
} else {
warning("subdata has nrow = 0")
} }
# fix moves with same path and timediff > cutoff
subdata_moves <- split(subdata, ~ path)
check_moves <- function(subsubdata, cutoff) {
if (any(grepl("mv", subsubdata$path))) {
for (i in seq_len(nrow(subsubdata) - 1)) {
timediff <- difftime(subsubdata$date.start[i + 1], subsubdata$date.stop[i],
units = "secs")
if (timediff > cutoff) {
subsubdata$path[i + 1] <- paste(subsubdata$path[i], i, "new", sep = "_")
} else {
subsubdata$path[i + 1] <- subsubdata$path[i]
}
}
}
subsubdata
}
subdata_path <- lapply(subdata_moves, check_moves, cutoff = cutoff)
subdata <- dplyr::bind_rows(subdata_path)
subdata subdata
} }

View File

@ -54,10 +54,14 @@ create_eventlogs <- function(data, xmlpath = NULL, case_cutoff = 20,
dat2 <- dat2[order(dat2$fileId.start, dat2$date.start, dat2$timeMs.start), ] dat2 <- dat2[order(dat2$fileId.start, dat2$date.start, dat2$timeMs.start), ]
# Add path for move events ##############################################
cat("\n\n########## Adding path variable for move events... ##########", "\n")
dat3 <- add_path_moves(dat2)
# Add case variable ###################################################### # Add case variable ######################################################
cat("\n########## Adding case and eventId variables... ##########", "\n\n") cat("\n########## Adding case variable... ##########", "\n\n")
dat3 <- add_case(dat2, cutoff = case_cutoff) dat4 <- add_case(dat3, cutoff = case_cutoff)
dat3 <- dat3[, c("fileId.start", "fileId.stop", "date.start", dat4 <- dat4[, c("fileId.start", "fileId.stop", "date.start",
"date.stop", "folder", "case", "path", "glossar", "date.stop", "folder", "case", "path", "glossar",
"event", "item", "timeMs.start", "timeMs.stop", "event", "item", "timeMs.start", "timeMs.stop",
"duration", "topic", "popup", "x.start", "y.start", "duration", "topic", "popup", "x.start", "y.start",
@ -65,11 +69,6 @@ create_eventlogs <- function(data, xmlpath = NULL, case_cutoff = 20,
"scale.stop", "scaleSize", "rotation.start", "scale.stop", "scaleSize", "rotation.start",
"rotation.stop", "rotationDegree")] "rotation.stop", "rotationDegree")]
# Add path for move events ##############################################
cat("\n\n########## Adding path variable for move events... ##########", "\n")
dat4 <- add_path_moves(dat3)
# Fix durations that span more than one log file ######################### # Fix durations that span more than one log file #########################
levels_fId <- sort(unique(c(dat4$fileId.start, dat4$fileId.stop))) levels_fId <- sort(unique(c(dat4$fileId.start, dat4$fileId.stop)))
dat4$fIdNum.start <- factor(dat4$fileId.start, levels = levels_fId) dat4$fIdNum.start <- factor(dat4$fileId.start, levels = levels_fId)
@ -92,20 +91,22 @@ create_eventlogs <- function(data, xmlpath = NULL, case_cutoff = 20,
dat4$fIdDiff <- NULL dat4$fIdDiff <- NULL
# Remove fragmented paths ############################################### # Remove fragmented paths ###############################################
tab <- stats::xtabs( ~ path + event, dat4) # tab <- stats::xtabs( ~ path + event, dat4)
fragments <- NULL # fragments <- NULL
for (i in seq_len(nrow(tab))) { # for (i in seq_len(nrow(tab))) {
if (tab[i, "openPopup"] != 0 & tab[i, "flipCard"] == 0) { # if (tab[i, "openPopup"] != 0 & tab[i, "flipCard"] == 0) {
fragments <- c(fragments, rownames(tab)[i]) # fragments <- c(fragments, rownames(tab)[i])
} else if (tab[i, "openTopic"] != 0 & tab[i, "flipCard"] == 0) { # } else if (tab[i, "openTopic"] != 0 & tab[i, "flipCard"] == 0) {
fragments <- c(fragments, rownames(tab)[i]) # fragments <- c(fragments, rownames(tab)[i])
} else if (tab[i, "openPopup"] != 0 & tab[i, "openTopic"] == 0) { # } else if (tab[i, "openPopup"] != 0 & tab[i, "openTopic"] == 0) {
fragments <- c(fragments, rownames(tab)[i]) # fragments <- c(fragments, rownames(tab)[i])
} # }
} # }
dat5 <- dat4[!dat4$path %in% fragments, ] # dat5 <- dat4[!dat4$path %in% fragments, ]
# TODO: Decide if I want this or not - are all these log errors?
dat5 <- dat4
if (glossar) { if (glossar) {
# Check for wrong order of events: flipCard -> openPopup -> openTopic # Check for wrong order of events: flipCard -> openPopup -> openTopic
@ -126,8 +127,7 @@ create_eventlogs <- function(data, xmlpath = NULL, case_cutoff = 20,
dat7$glossar <- NULL dat7$glossar <- NULL
} }
if (save) save(dat, dat1, dat2, dat3, dat4, dat5, dat7, file = "../data/tmp_intermediate-df.RData") if (save) save(dat, dat1, dat2, dat3, dat4, dat5, dat7, file = "results/tmp_intermediate-df.RData")
dat7 dat7
} }