Changed how path variable for move events is created
This commit is contained in:
parent
8bedadd18e
commit
10d7a44ff9
107
R/add_path.R
107
R/add_path.R
@ -1,5 +1,8 @@
|
||||
###########################################################################
|
||||
add_path_items <- function(subdata) {
|
||||
|
||||
pbapply::pboptions(style = 3, char = "=")
|
||||
|
||||
subdata_glossar <- subdata[subdata$item == "glossar", ]
|
||||
subdata_glossar$path <- NA
|
||||
|
||||
@ -52,6 +55,7 @@ add_path_items <- function(subdata) {
|
||||
|
||||
###########################################################################
|
||||
add_path_glossar <- function(subdata, xmlpath) {
|
||||
# TODO: I think this needs to be completely redone
|
||||
|
||||
pb <- utils::txtProgressBar(min = 0, max = nrow(subdata), initial = NA,
|
||||
style = 3)
|
||||
@ -129,62 +133,93 @@ add_path <- function(data, xmlpath, glossar) {
|
||||
|
||||
# Add path for moves
|
||||
|
||||
add_path_moves <- function(data) {
|
||||
add_path_moves <- function(data, cutoff) {
|
||||
|
||||
pbapply::pboptions(style = 3, char = "=")
|
||||
|
||||
path_max <- max(data$path, na.rm = TRUE)
|
||||
subdata_item <- split(data, ~ item)
|
||||
|
||||
#subdata_art <- split(data, ~ item)
|
||||
subdata_case <- split(data, ~ case)
|
||||
|
||||
#subdata_list <- split(data, ~ item + case)
|
||||
# --> does not work with complete data set
|
||||
cat("Splitting data...", "\n")
|
||||
subdata_list <- pbapply::pblapply(subdata_case, split, f = ~item)
|
||||
subdata_list <- unlist(subdata_list, recursive = FALSE)
|
||||
|
||||
cat("Adding path...", "\n")
|
||||
subdata_path <- pbapply::pblapply(subdata_list,
|
||||
function(x) {
|
||||
path_max <<- path_max + 1
|
||||
add_path_subdata(x, max_path = path_max)
|
||||
}
|
||||
)
|
||||
subdata_path <- pbapply::pblapply(subdata_item,
|
||||
add_path_subdata, cutoff = cutoff)
|
||||
|
||||
out <- dplyr::bind_rows(subdata_path)
|
||||
out <- out[order(out$fileId.start, out$date.start, out$timeMs.start), ]
|
||||
# Make path a consecutive number
|
||||
out$path <- as.numeric(factor(out$path, levels = unique(out$path)))
|
||||
rownames(out) <- NULL
|
||||
|
||||
out
|
||||
}
|
||||
|
||||
|
||||
add_path_subdata <- function(subdata, max_path) {
|
||||
|
||||
if (nrow(subdata) != 0) {
|
||||
|
||||
if (length(stats::na.omit(unique(subdata$path))) == 1) {
|
||||
subdata[subdata$event == "move", "path"] <- stats::na.omit(unique(subdata$path))
|
||||
} else if (length(stats::na.omit(unique(subdata$path))) > 1) {
|
||||
for (i in 1:nrow(subdata)) {
|
||||
if (subdata$event[i] == "move") {
|
||||
if (i == 1) {
|
||||
subdata$path[i] <- stats::na.omit(unique(subdata$path))[1]
|
||||
add_path_subdata <- function(subdata, cutoff) {
|
||||
index_flipCard <- which(subdata$event == "flipCard")
|
||||
current_item <- unique(subdata$item)
|
||||
for (j in seq_along(index_flipCard)) {
|
||||
# forwards pass
|
||||
if (j < max(seq_along(index_flipCard))) {
|
||||
for (i in seq(index_flipCard[j], index_flipCard[j + 1])) {
|
||||
if (subdata$event[i] == "move" & !is.na(subdata$date.stop[index_flipCard[j]])) {
|
||||
timediff <- difftime(subdata$date.start[i],
|
||||
subdata$date.stop[index_flipCard[j]],
|
||||
units = "secs")
|
||||
if (timediff <= cutoff){
|
||||
subdata$path[i] <- subdata$path[index_flipCard[j]]
|
||||
} else {
|
||||
subdata$path[i] <- subdata$path[i - 1]
|
||||
subdata$path[i] <- paste(current_item, "mv", j, sep = "_")
|
||||
}
|
||||
}
|
||||
}
|
||||
} else if (all(is.na(subdata$path))) {
|
||||
for (i in 1:nrow(subdata)) {
|
||||
subdata$path[i] <- max_path
|
||||
} else {
|
||||
for (i in seq(index_flipCard[j], nrow(subdata))) {
|
||||
if (subdata$event[i] == "move" & (!is.na(subdata$date.stop[index_flipCard[j]]))) {
|
||||
timediff <- difftime(subdata$date.start[i],
|
||||
subdata$date.stop[index_flipCard[j]],
|
||||
units = "secs")
|
||||
if (timediff <= cutoff) {
|
||||
subdata$path[i] <- subdata$path[index_flipCard[j]]
|
||||
} else {
|
||||
subdata$path[i] <- paste(current_item, "mv", j, sep = "_")
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
# backwards pass
|
||||
if (j > min(seq_along(index_flipCard))) {
|
||||
for (i in seq(index_flipCard[j - 1], index_flipCard[j])) {
|
||||
if (grepl("mv", subdata$path[i])) {
|
||||
timediff <- difftime(subdata$date.start[index_flipCard[j]],
|
||||
subdata$date.stop[i],
|
||||
units = "secs")
|
||||
if (timediff <= cutoff){
|
||||
subdata$path[i] <- subdata$path[index_flipCard[j]]
|
||||
} else {
|
||||
subdata$path[i] <- paste(current_item, "mv", j, sep = "_")
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
} else {
|
||||
warning("subdata has nrow = 0")
|
||||
}
|
||||
# fix moves with same path and timediff > cutoff
|
||||
subdata_moves <- split(subdata, ~ path)
|
||||
|
||||
check_moves <- function(subsubdata, cutoff) {
|
||||
if (any(grepl("mv", subsubdata$path))) {
|
||||
for (i in seq_len(nrow(subsubdata) - 1)) {
|
||||
timediff <- difftime(subsubdata$date.start[i + 1], subsubdata$date.stop[i],
|
||||
units = "secs")
|
||||
if (timediff > cutoff) {
|
||||
subsubdata$path[i + 1] <- paste(subsubdata$path[i], i, "new", sep = "_")
|
||||
} else {
|
||||
subsubdata$path[i + 1] <- subsubdata$path[i]
|
||||
}
|
||||
}
|
||||
}
|
||||
subsubdata
|
||||
}
|
||||
|
||||
subdata_path <- lapply(subdata_moves, check_moves, cutoff = cutoff)
|
||||
subdata <- dplyr::bind_rows(subdata_path)
|
||||
subdata
|
||||
}
|
||||
|
||||
|
@ -54,10 +54,14 @@ create_eventlogs <- function(data, xmlpath = NULL, case_cutoff = 20,
|
||||
|
||||
dat2 <- dat2[order(dat2$fileId.start, dat2$date.start, dat2$timeMs.start), ]
|
||||
|
||||
# Add path for move events ##############################################
|
||||
cat("\n\n########## Adding path variable for move events... ##########", "\n")
|
||||
dat3 <- add_path_moves(dat2, cutoff = case_cutoff)
|
||||
|
||||
# Add case variable ######################################################
|
||||
cat("\n########## Adding case and eventId variables... ##########", "\n\n")
|
||||
dat3 <- add_case(dat2, cutoff = case_cutoff)
|
||||
dat3 <- dat3[, c("fileId.start", "fileId.stop", "date.start",
|
||||
cat("\n########## Adding case variable... ##########", "\n\n")
|
||||
dat4 <- add_case(dat3, cutoff = case_cutoff)
|
||||
dat4 <- dat4[, c("fileId.start", "fileId.stop", "date.start",
|
||||
"date.stop", "folder", "case", "path", "glossar",
|
||||
"event", "item", "timeMs.start", "timeMs.stop",
|
||||
"duration", "topic", "popup", "x.start", "y.start",
|
||||
@ -65,11 +69,6 @@ create_eventlogs <- function(data, xmlpath = NULL, case_cutoff = 20,
|
||||
"scale.stop", "scaleSize", "rotation.start",
|
||||
"rotation.stop", "rotationDegree")]
|
||||
|
||||
# Add path for move events ##############################################
|
||||
cat("\n\n########## Adding path variable for move events... ##########", "\n")
|
||||
dat4 <- add_path_moves(dat3)
|
||||
|
||||
|
||||
# Fix durations that span more than one log file #########################
|
||||
levels_fId <- sort(unique(c(dat4$fileId.start, dat4$fileId.stop)))
|
||||
dat4$fIdNum.start <- factor(dat4$fileId.start, levels = levels_fId)
|
||||
@ -92,20 +91,22 @@ create_eventlogs <- function(data, xmlpath = NULL, case_cutoff = 20,
|
||||
dat4$fIdDiff <- NULL
|
||||
|
||||
# Remove fragmented paths ###############################################
|
||||
tab <- stats::xtabs( ~ path + event, dat4)
|
||||
# tab <- stats::xtabs( ~ path + event, dat4)
|
||||
|
||||
fragments <- NULL
|
||||
# fragments <- NULL
|
||||
|
||||
for (i in seq_len(nrow(tab))) {
|
||||
if (tab[i, "openPopup"] != 0 & tab[i, "flipCard"] == 0) {
|
||||
fragments <- c(fragments, rownames(tab)[i])
|
||||
} else if (tab[i, "openTopic"] != 0 & tab[i, "flipCard"] == 0) {
|
||||
fragments <- c(fragments, rownames(tab)[i])
|
||||
} else if (tab[i, "openPopup"] != 0 & tab[i, "openTopic"] == 0) {
|
||||
fragments <- c(fragments, rownames(tab)[i])
|
||||
}
|
||||
}
|
||||
dat5 <- dat4[!dat4$path %in% fragments, ]
|
||||
# for (i in seq_len(nrow(tab))) {
|
||||
# if (tab[i, "openPopup"] != 0 & tab[i, "flipCard"] == 0) {
|
||||
# fragments <- c(fragments, rownames(tab)[i])
|
||||
# } else if (tab[i, "openTopic"] != 0 & tab[i, "flipCard"] == 0) {
|
||||
# fragments <- c(fragments, rownames(tab)[i])
|
||||
# } else if (tab[i, "openPopup"] != 0 & tab[i, "openTopic"] == 0) {
|
||||
# fragments <- c(fragments, rownames(tab)[i])
|
||||
# }
|
||||
# }
|
||||
# dat5 <- dat4[!dat4$path %in% fragments, ]
|
||||
# TODO: Decide if I want this or not - are all these log errors?
|
||||
dat5 <- dat4
|
||||
|
||||
if (glossar) {
|
||||
# Check for wrong order of events: flipCard -> openPopup -> openTopic
|
||||
@ -126,8 +127,7 @@ create_eventlogs <- function(data, xmlpath = NULL, case_cutoff = 20,
|
||||
dat7$glossar <- NULL
|
||||
}
|
||||
|
||||
if (save) save(dat, dat1, dat2, dat3, dat4, dat5, dat7, file = "../data/tmp_intermediate-df.RData")
|
||||
|
||||
if (save) save(dat, dat1, dat2, dat3, dat4, dat5, dat7, file = "results/tmp_intermediate-df.RData")
|
||||
dat7
|
||||
}
|
||||
|
||||
|
Loading…
Reference in New Issue
Block a user