Debugging and refactoring of code that closes events spanning more than one log file; tests on complete data set
This commit is contained in:
parent
2340e081ff
commit
64241bf5f9
@ -73,10 +73,18 @@ close_events <- function(data, event = c("move", "flipCard", "openTopic", "openP
|
|||||||
}
|
}
|
||||||
|
|
||||||
# Remove eventIds associated with more than one trace, usually logging
|
# Remove eventIds associated with more than one trace, usually logging
|
||||||
# errors that cannot be resolved
|
# errors that cannot be resolved for openTopic or openPopup
|
||||||
corrupt_eventIds <- names(which(rowSums(stats::xtabs( ~ eventId + trace,
|
if (event %in% c("openTopic", "openPopup")) {
|
||||||
subdata) != 0) != 1))
|
# corrupt_eventIds <- names(which(rowSums(stats::xtabs( ~ eventId + trace,
|
||||||
subdata <- subdata[!subdata$eventId %in% corrupt_eventIds, ]
|
# subdata) != 0) != 1))
|
||||||
|
# --> does not run on complete data set
|
||||||
|
|
||||||
|
subdata_eid <- split(subdata, ~ eventId)
|
||||||
|
tmp <- sapply(subdata_eid, function(x) length(stats::xtabs( ~ trace, x)))
|
||||||
|
corrupt_eventIds <- names(tmp[tmp > 1])
|
||||||
|
|
||||||
|
subdata <- subdata[!subdata$eventId %in% corrupt_eventIds, ]
|
||||||
|
}
|
||||||
|
|
||||||
# if (event == "flipCard") {
|
# if (event == "flipCard") {
|
||||||
# subdata$eventId <- subdata$trace
|
# subdata$eventId <- subdata$trace
|
||||||
@ -96,10 +104,9 @@ close_events <- function(data, event = c("move", "flipCard", "openTopic", "openP
|
|||||||
|
|
||||||
# Add start and stop variables that get lost because events span more
|
# Add start and stop variables that get lost because events span more
|
||||||
# than one log file
|
# than one log file
|
||||||
subdata_split_wide <- lapply(subdata_split_wide, add_variables, ncol = ncol,
|
data_wide <-
|
||||||
event = event)
|
lapply(subdata_split_wide, add_variables, ncol = ncol, event = event) |>
|
||||||
|
dplyr::bind_rows()
|
||||||
data_wide <- dplyr::bind_rows(subdata_split_wide)
|
|
||||||
|
|
||||||
select <- is.na(data_wide$date.start) | is.na(data_wide$date.stop)
|
select <- is.na(data_wide$date.start) | is.na(data_wide$date.stop)
|
||||||
correct <- data_wide[!select, ]
|
correct <- data_wide[!select, ]
|
||||||
@ -108,8 +115,11 @@ close_events <- function(data, event = c("move", "flipCard", "openTopic", "openP
|
|||||||
corrupt$identi <- "corrupt"
|
corrupt$identi <- "corrupt"
|
||||||
|
|
||||||
# Close events spanning more than one log file
|
# Close events spanning more than one log file
|
||||||
fixed <- dplyr::bind_rows(lapply(unique(corrupt$eventId),
|
cat("\n########## Closing start/stop events spanning several log files...",
|
||||||
close_open_eventIds, df = corrupt))
|
"\n")
|
||||||
|
fixed <-
|
||||||
|
pbapply::pblapply(unique(corrupt$eventId), close_open_eventIds, df = corrupt) |>
|
||||||
|
dplyr::bind_rows()
|
||||||
|
|
||||||
data_wide <- rbind(correct, fixed)
|
data_wide <- rbind(correct, fixed)
|
||||||
|
|
||||||
@ -136,37 +146,15 @@ close_events <- function(data, event = c("move", "flipCard", "openTopic", "openP
|
|||||||
data_wide$rotationDegree != 0 &
|
data_wide$rotationDegree != 0 &
|
||||||
data_wide$scaleSize != 1, ]
|
data_wide$scaleSize != 1, ]
|
||||||
d2 <- nrow(data_wide)
|
d2 <- nrow(data_wide)
|
||||||
cat(paste("INFORMATION:", d1 - d2,
|
cat(paste("\nINFORMATION:", d1 - d2,
|
||||||
"lines containing move events were removed since they did",
|
"lines containing move events were removed since they did",
|
||||||
"\nnot contain any change"), fill = TRUE)
|
"\nnot contain any change"), fill = TRUE)
|
||||||
}
|
}
|
||||||
data_wide
|
|
||||||
}
|
}
|
||||||
|
|
||||||
data_wide <- data_wide[order(data_wide$fileId.start,
|
out <- data_wide[order(data_wide$fileId.start,
|
||||||
data_wide$date.start,
|
data_wide$date.start,
|
||||||
data_wide$timeMs.start), ]
|
data_wide$timeMs.start),
|
||||||
|
|
||||||
# Fix durations that span more than one log file
|
|
||||||
if (event != "move") {
|
|
||||||
tab <- colSums(stats::xtabs( ~ fileId + trace, subdata) != 0)
|
|
||||||
number_logfiles <- data.frame(trace = names(tab), nlogfile = tab)
|
|
||||||
data_wide <- merge(data_wide, number_logfiles, by = "trace", all.x = TRUE)
|
|
||||||
data_wide$duration[data_wide$identi == "corrupt"] <-
|
|
||||||
(data_wide$nlogfile[data_wide$identi == "corrupt"] - 1) * 600000 -
|
|
||||||
data_wide$timeMs.start[data_wide$identi == "corrupt"] +
|
|
||||||
data_wide$timeMs.stop[data_wide$identi == "corrupt"]
|
|
||||||
# TODO: This assumes that no log files are skipped
|
|
||||||
# --> Is this assumption really valid??
|
|
||||||
} else {
|
|
||||||
data_wide$duration[data_wide$identi == "corrupt"] <-
|
|
||||||
600000 -
|
|
||||||
data_wide$timeMs.start[data_wide$identi == "corrupt"] +
|
|
||||||
data_wide$timeMs.stop[data_wide$identi == "corrupt"]
|
|
||||||
} # there should be no movements spanning more than two log files!
|
|
||||||
|
|
||||||
out <- data_wide[# !apply(data_wide, 1, function(x) all(is.na(x))),
|
|
||||||
# remove all NA rows
|
|
||||||
c("fileId.start", "fileId.stop", "folder", "event",
|
c("fileId.start", "fileId.stop", "folder", "event",
|
||||||
"artwork", "trace", "glossar", "date.start",
|
"artwork", "trace", "glossar", "date.start",
|
||||||
"date.stop", "timeMs.start", "timeMs.stop",
|
"date.stop", "timeMs.start", "timeMs.stop",
|
||||||
|
@ -85,8 +85,24 @@ create_eventlogs <- function(data, xmlpath, case_cutoff = 20, rm_nochange_moves
|
|||||||
cat("\n\n########## Adding trace variable for move events... ##########", "\n")
|
cat("\n\n########## Adding trace variable for move events... ##########", "\n")
|
||||||
dat4 <- add_trace_moves(dat3)
|
dat4 <- add_trace_moves(dat3)
|
||||||
|
|
||||||
|
|
||||||
|
# Fix durations that span more than one log file #########################
|
||||||
|
levels_fId <- sort(unique(c(dat4$fileId.start, dat4$fileId.stop)))
|
||||||
|
dat4$fIdNum.start <- factor(dat4$fileId.start, levels = levels_fId)
|
||||||
|
dat4$fIdNum.stop <- factor(dat4$fileId.stop, levels = levels_fId)
|
||||||
|
dat4$fIdNum.start <- as.numeric(dat4$fIdNum.start)
|
||||||
|
dat4$fIdNum.stop <- as.numeric(dat4$fIdNum.stop)
|
||||||
|
|
||||||
|
dat4$fIdDiff <- dat4$fIdNum.stop - dat4$fIdNum.start
|
||||||
|
|
||||||
|
# Remove moves where stop is before start
|
||||||
|
dat4 <- dat4[which(dat4$fIdDiff > 0), ]
|
||||||
|
|
||||||
|
dat4$duration[dat4$fIdDiff > 0] <- dat4$fIdDiff * 600000 -
|
||||||
|
dat4$timeMs.start + dat4$timeMs.stop
|
||||||
|
|
||||||
# Remove fragmented traces ###############################################
|
# Remove fragmented traces ###############################################
|
||||||
tab <- xtabs( ~ trace + event, dat4)
|
tab <- stats::xtabs( ~ trace + event, dat4)
|
||||||
|
|
||||||
fragments <- NULL
|
fragments <- NULL
|
||||||
|
|
||||||
@ -100,7 +116,6 @@ create_eventlogs <- function(data, xmlpath, case_cutoff = 20, rm_nochange_moves
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
dat5 <- dat4[!dat4$trace %in% fragments, ]
|
dat5 <- dat4[!dat4$trace %in% fragments, ]
|
||||||
# TODO: Should be tested more thouroughly on complete data set
|
|
||||||
|
|
||||||
dat5
|
dat5
|
||||||
}
|
}
|
||||||
|
Loading…
Reference in New Issue
Block a user