Changed how path variable for move events is created
This commit is contained in:
parent
8bedadd18e
commit
8233b151d9
107
R/add_path.R
107
R/add_path.R
@ -1,5 +1,8 @@
|
|||||||
###########################################################################
|
###########################################################################
|
||||||
add_path_items <- function(subdata) {
|
add_path_items <- function(subdata) {
|
||||||
|
|
||||||
|
pbapply::pboptions(style = 3, char = "=")
|
||||||
|
|
||||||
subdata_glossar <- subdata[subdata$item == "glossar", ]
|
subdata_glossar <- subdata[subdata$item == "glossar", ]
|
||||||
subdata_glossar$path <- NA
|
subdata_glossar$path <- NA
|
||||||
|
|
||||||
@ -52,6 +55,7 @@ add_path_items <- function(subdata) {
|
|||||||
|
|
||||||
###########################################################################
|
###########################################################################
|
||||||
add_path_glossar <- function(subdata, xmlpath) {
|
add_path_glossar <- function(subdata, xmlpath) {
|
||||||
|
# TODO: I think this needs to be completely redone
|
||||||
|
|
||||||
pb <- utils::txtProgressBar(min = 0, max = nrow(subdata), initial = NA,
|
pb <- utils::txtProgressBar(min = 0, max = nrow(subdata), initial = NA,
|
||||||
style = 3)
|
style = 3)
|
||||||
@ -129,62 +133,93 @@ add_path <- function(data, xmlpath, glossar) {
|
|||||||
|
|
||||||
# Add path for moves
|
# Add path for moves
|
||||||
|
|
||||||
add_path_moves <- function(data) {
|
add_path_moves <- function(data, cutoff) {
|
||||||
|
|
||||||
pbapply::pboptions(style = 3, char = "=")
|
pbapply::pboptions(style = 3, char = "=")
|
||||||
|
|
||||||
path_max <- max(data$path, na.rm = TRUE)
|
subdata_item <- split(data, ~ item)
|
||||||
|
|
||||||
#subdata_art <- split(data, ~ item)
|
subdata_path <- pbapply::pblapply(subdata_item,
|
||||||
subdata_case <- split(data, ~ case)
|
add_path_subdata, cutoff = cutoff)
|
||||||
|
|
||||||
#subdata_list <- split(data, ~ item + case)
|
|
||||||
# --> does not work with complete data set
|
|
||||||
cat("Splitting data...", "\n")
|
|
||||||
subdata_list <- pbapply::pblapply(subdata_case, split, f = ~item)
|
|
||||||
subdata_list <- unlist(subdata_list, recursive = FALSE)
|
|
||||||
|
|
||||||
cat("Adding path...", "\n")
|
|
||||||
subdata_path <- pbapply::pblapply(subdata_list,
|
|
||||||
function(x) {
|
|
||||||
path_max <<- path_max + 1
|
|
||||||
add_path_subdata(x, max_path = path_max)
|
|
||||||
}
|
|
||||||
)
|
|
||||||
|
|
||||||
out <- dplyr::bind_rows(subdata_path)
|
out <- dplyr::bind_rows(subdata_path)
|
||||||
out <- out[order(out$fileId.start, out$date.start, out$timeMs.start), ]
|
out <- out[order(out$fileId.start, out$date.start, out$timeMs.start), ]
|
||||||
|
# Make path a consecutive number
|
||||||
|
out$path <- as.numeric(factor(out$path, levels = unique(out$path)))
|
||||||
rownames(out) <- NULL
|
rownames(out) <- NULL
|
||||||
|
|
||||||
out
|
out
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
add_path_subdata <- function(subdata, max_path) {
|
add_path_subdata <- function(subdata, cutoff) {
|
||||||
|
index_flipCard <- which(subdata$event == "flipCard")
|
||||||
if (nrow(subdata) != 0) {
|
current_item <- unique(subdata$item)
|
||||||
|
for (j in seq_along(index_flipCard)) {
|
||||||
if (length(stats::na.omit(unique(subdata$path))) == 1) {
|
# forwards pass
|
||||||
subdata[subdata$event == "move", "path"] <- stats::na.omit(unique(subdata$path))
|
if (j < max(seq_along(index_flipCard))) {
|
||||||
} else if (length(stats::na.omit(unique(subdata$path))) > 1) {
|
for (i in seq(index_flipCard[j], index_flipCard[j + 1])) {
|
||||||
for (i in 1:nrow(subdata)) {
|
if (subdata$event[i] == "move" & !is.na(subdata$date.stop[index_flipCard[j]])) {
|
||||||
if (subdata$event[i] == "move") {
|
timediff <- difftime(subdata$date.start[i],
|
||||||
if (i == 1) {
|
subdata$date.stop[index_flipCard[j]],
|
||||||
subdata$path[i] <- stats::na.omit(unique(subdata$path))[1]
|
units = "secs")
|
||||||
|
if (timediff <= cutoff){
|
||||||
|
subdata$path[i] <- subdata$path[index_flipCard[j]]
|
||||||
} else {
|
} else {
|
||||||
subdata$path[i] <- subdata$path[i - 1]
|
subdata$path[i] <- paste(current_item, "mv", j, sep = "_")
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
} else if (all(is.na(subdata$path))) {
|
} else {
|
||||||
for (i in 1:nrow(subdata)) {
|
for (i in seq(index_flipCard[j], nrow(subdata))) {
|
||||||
subdata$path[i] <- max_path
|
if (subdata$event[i] == "move" & (!is.na(subdata$date.stop[index_flipCard[j]]))) {
|
||||||
|
timediff <- difftime(subdata$date.start[i],
|
||||||
|
subdata$date.stop[index_flipCard[j]],
|
||||||
|
units = "secs")
|
||||||
|
if (timediff <= cutoff) {
|
||||||
|
subdata$path[i] <- subdata$path[index_flipCard[j]]
|
||||||
|
} else {
|
||||||
|
subdata$path[i] <- paste(current_item, "mv", j, sep = "_")
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
# backwards pass
|
||||||
|
if (j > min(seq_along(index_flipCard))) {
|
||||||
|
for (i in seq(index_flipCard[j - 1], index_flipCard[j])) {
|
||||||
|
if (grepl("mv", subdata$path[i])) {
|
||||||
|
timediff <- difftime(subdata$date.start[index_flipCard[j]],
|
||||||
|
subdata$date.stop[i],
|
||||||
|
units = "secs")
|
||||||
|
if (timediff <= cutoff){
|
||||||
|
subdata$path[i] <- subdata$path[index_flipCard[j]]
|
||||||
|
} else {
|
||||||
|
subdata$path[i] <- paste(current_item, "mv", j, sep = "_")
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
# fix moves with same path and timediff > cutoff
|
||||||
|
subdata_moves <- split(subdata, ~ path)
|
||||||
|
|
||||||
|
check_moves <- function(subsubdata, cutoff) {
|
||||||
|
if (any(grepl("mv", subsubdata$path))) {
|
||||||
|
for (i in seq_len(nrow(subsubdata) - 1)) {
|
||||||
|
timediff <- difftime(subsubdata$date.start[i + 1], subsubdata$date.stop[i],
|
||||||
|
units = "secs")
|
||||||
|
if (timediff > cutoff) {
|
||||||
|
subsubdata$path[i + 1] <- paste(subsubdata$path[i], i, "new", sep = "_")
|
||||||
|
} else {
|
||||||
|
subsubdata$path[i + 1] <- subsubdata$path[i]
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
subsubdata
|
||||||
|
}
|
||||||
|
|
||||||
} else {
|
subdata_path <- lapply(subdata_moves, check_moves, cutoff = cutoff)
|
||||||
warning("subdata has nrow = 0")
|
subdata <- dplyr::bind_rows(subdata_path)
|
||||||
}
|
|
||||||
subdata
|
subdata
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -54,10 +54,14 @@ create_eventlogs <- function(data, xmlpath = NULL, case_cutoff = 20,
|
|||||||
|
|
||||||
dat2 <- dat2[order(dat2$fileId.start, dat2$date.start, dat2$timeMs.start), ]
|
dat2 <- dat2[order(dat2$fileId.start, dat2$date.start, dat2$timeMs.start), ]
|
||||||
|
|
||||||
|
# Add path for move events ##############################################
|
||||||
|
cat("\n\n########## Adding path variable for move events... ##########", "\n")
|
||||||
|
dat3 <- add_path_moves(dat2)
|
||||||
|
|
||||||
# Add case variable ######################################################
|
# Add case variable ######################################################
|
||||||
cat("\n########## Adding case and eventId variables... ##########", "\n\n")
|
cat("\n########## Adding case variable... ##########", "\n\n")
|
||||||
dat3 <- add_case(dat2, cutoff = case_cutoff)
|
dat4 <- add_case(dat3, cutoff = case_cutoff)
|
||||||
dat3 <- dat3[, c("fileId.start", "fileId.stop", "date.start",
|
dat4 <- dat4[, c("fileId.start", "fileId.stop", "date.start",
|
||||||
"date.stop", "folder", "case", "path", "glossar",
|
"date.stop", "folder", "case", "path", "glossar",
|
||||||
"event", "item", "timeMs.start", "timeMs.stop",
|
"event", "item", "timeMs.start", "timeMs.stop",
|
||||||
"duration", "topic", "popup", "x.start", "y.start",
|
"duration", "topic", "popup", "x.start", "y.start",
|
||||||
@ -65,11 +69,6 @@ create_eventlogs <- function(data, xmlpath = NULL, case_cutoff = 20,
|
|||||||
"scale.stop", "scaleSize", "rotation.start",
|
"scale.stop", "scaleSize", "rotation.start",
|
||||||
"rotation.stop", "rotationDegree")]
|
"rotation.stop", "rotationDegree")]
|
||||||
|
|
||||||
# Add path for move events ##############################################
|
|
||||||
cat("\n\n########## Adding path variable for move events... ##########", "\n")
|
|
||||||
dat4 <- add_path_moves(dat3)
|
|
||||||
|
|
||||||
|
|
||||||
# Fix durations that span more than one log file #########################
|
# Fix durations that span more than one log file #########################
|
||||||
levels_fId <- sort(unique(c(dat4$fileId.start, dat4$fileId.stop)))
|
levels_fId <- sort(unique(c(dat4$fileId.start, dat4$fileId.stop)))
|
||||||
dat4$fIdNum.start <- factor(dat4$fileId.start, levels = levels_fId)
|
dat4$fIdNum.start <- factor(dat4$fileId.start, levels = levels_fId)
|
||||||
@ -92,20 +91,22 @@ create_eventlogs <- function(data, xmlpath = NULL, case_cutoff = 20,
|
|||||||
dat4$fIdDiff <- NULL
|
dat4$fIdDiff <- NULL
|
||||||
|
|
||||||
# Remove fragmented paths ###############################################
|
# Remove fragmented paths ###############################################
|
||||||
tab <- stats::xtabs( ~ path + event, dat4)
|
# tab <- stats::xtabs( ~ path + event, dat4)
|
||||||
|
|
||||||
fragments <- NULL
|
# fragments <- NULL
|
||||||
|
|
||||||
for (i in seq_len(nrow(tab))) {
|
# for (i in seq_len(nrow(tab))) {
|
||||||
if (tab[i, "openPopup"] != 0 & tab[i, "flipCard"] == 0) {
|
# if (tab[i, "openPopup"] != 0 & tab[i, "flipCard"] == 0) {
|
||||||
fragments <- c(fragments, rownames(tab)[i])
|
# fragments <- c(fragments, rownames(tab)[i])
|
||||||
} else if (tab[i, "openTopic"] != 0 & tab[i, "flipCard"] == 0) {
|
# } else if (tab[i, "openTopic"] != 0 & tab[i, "flipCard"] == 0) {
|
||||||
fragments <- c(fragments, rownames(tab)[i])
|
# fragments <- c(fragments, rownames(tab)[i])
|
||||||
} else if (tab[i, "openPopup"] != 0 & tab[i, "openTopic"] == 0) {
|
# } else if (tab[i, "openPopup"] != 0 & tab[i, "openTopic"] == 0) {
|
||||||
fragments <- c(fragments, rownames(tab)[i])
|
# fragments <- c(fragments, rownames(tab)[i])
|
||||||
}
|
# }
|
||||||
}
|
# }
|
||||||
dat5 <- dat4[!dat4$path %in% fragments, ]
|
# dat5 <- dat4[!dat4$path %in% fragments, ]
|
||||||
|
# TODO: Decide if I want this or not - are all these log errors?
|
||||||
|
dat5 <- dat4
|
||||||
|
|
||||||
if (glossar) {
|
if (glossar) {
|
||||||
# Check for wrong order of events: flipCard -> openPopup -> openTopic
|
# Check for wrong order of events: flipCard -> openPopup -> openTopic
|
||||||
@ -126,8 +127,7 @@ create_eventlogs <- function(data, xmlpath = NULL, case_cutoff = 20,
|
|||||||
dat7$glossar <- NULL
|
dat7$glossar <- NULL
|
||||||
}
|
}
|
||||||
|
|
||||||
if (save) save(dat, dat1, dat2, dat3, dat4, dat5, dat7, file = "../data/tmp_intermediate-df.RData")
|
if (save) save(dat, dat1, dat2, dat3, dat4, dat5, dat7, file = "results/tmp_intermediate-df.RData")
|
||||||
|
|
||||||
dat7
|
dat7
|
||||||
}
|
}
|
||||||
|
|
||||||
|
Loading…
Reference in New Issue
Block a user