Debugging; added rm_nochange_moves as argument
This commit is contained in:
parent
d264e0e267
commit
daadb7a691
@ -135,7 +135,7 @@ add_trace_moves <- function(data) {
|
||||
)
|
||||
|
||||
out <- dplyr::bind_rows(subdata_trace)
|
||||
out <- out[order(out$fileId, out$date.start, out$timeMs.start), ]
|
||||
out <- out[order(out$fileId.start, out$date.start, out$timeMs.start), ]
|
||||
rownames(out) <- NULL
|
||||
|
||||
# Make trace a consecutive number
|
||||
|
321
R/close_events.R
321
R/close_events.R
@ -1,6 +1,7 @@
|
||||
###########################################################################
|
||||
|
||||
close_events <- function(data, event = c("move", "flipCard", "openTopic", "openPopup")) {
|
||||
close_events <- function(data, event = c("move", "flipCard", "openTopic", "openPopup"),
|
||||
rm_nochange_moves) {
|
||||
|
||||
event <- match.arg(event)
|
||||
|
||||
@ -38,36 +39,53 @@ close_events <- function(data, event = c("move", "flipCard", "openTopic", "openP
|
||||
)
|
||||
|
||||
subdata <- subset(data, data$event %in% actions)
|
||||
subdata <- subdata[order(subdata$artwork, subdata$popup, subdata$date, subdata$timeMs), ]
|
||||
subdata <- subdata[order(subdata$artwork, subdata$popup, subdata$date,
|
||||
subdata$timeMs), ]
|
||||
subdata$time <- ifelse(subdata$event == actions[1], "start", "stop")
|
||||
num_start <- diff(c(0, which(subdata$event == actions[2])))
|
||||
if (utils::tail(subdata, 1)$time == "start") {
|
||||
num_start <- c(num_start, 1)
|
||||
}
|
||||
subdata$eventId <- rep(seq_along(num_start), num_start)
|
||||
subdata$eventId <- as.character(rep(seq_along(num_start), num_start))
|
||||
|
||||
# remove start and stop events following directly each other
|
||||
subdata <- subdata[!duplicated(subdata[, c("event", "eventId")],
|
||||
fromLast = TRUE), ]
|
||||
if (event == "move") {
|
||||
# Remove start events following directly each other for move events
|
||||
subdata <- subdata[!duplicated(subdata[, c("event", "eventId")],
|
||||
fromLast = TRUE), ]
|
||||
} # there are so many that this is too time inefficient when done as below
|
||||
|
||||
# Remove stop events following directly each other
|
||||
id_stop <- which(subdata$event == actions[2])
|
||||
id_rm_stop <- id_stop[diff(id_stop) == 1]
|
||||
if (length(id_rm_stop) != 0) {
|
||||
subdata <- subdata[-(id_rm_stop + 1), ]
|
||||
}
|
||||
|
||||
# remove eventIds associated with more than one trace, usually logging
|
||||
# errors that I cannot resolve
|
||||
corrupt_eventIds <- names(which(rowSums(stats::xtabs( ~ eventId + trace, subdata) != 0) != 1))
|
||||
pbapply::pboptions(style = 3, char = "=")
|
||||
|
||||
if (event != "move") {
|
||||
# Fix eventIds for start events following each other
|
||||
cat("\n########## Checking unclosed start events. This can take awhile...",
|
||||
"\n")
|
||||
subdata_list <- pbapply::pblapply(unique(subdata$eventId),
|
||||
fix_start_eventIds, df = subdata)
|
||||
subdata <- dplyr::bind_rows(subdata_list)
|
||||
}
|
||||
|
||||
# Remove eventIds associated with more than one trace, usually logging
|
||||
# errors that cannot be resolved
|
||||
corrupt_eventIds <- names(which(rowSums(stats::xtabs( ~ eventId + trace,
|
||||
subdata) != 0) != 1))
|
||||
subdata <- subdata[!subdata$eventId %in% corrupt_eventIds, ]
|
||||
|
||||
if (event == "flipCard") {
|
||||
subdata$eventId <- subdata$trace
|
||||
}
|
||||
# if (event == "flipCard") {
|
||||
# subdata$eventId <- subdata$trace
|
||||
# }
|
||||
|
||||
subdata_split <- split(subdata, ~ fileId)
|
||||
|
||||
pbapply::pboptions(style = 3, char = "=")
|
||||
|
||||
cat("\n########## Closing start/stop events. This can take awhile...",
|
||||
"\n")
|
||||
subdata_split_wide <- pbapply::pblapply(subdata_split, stats::reshape,
|
||||
direction = "wide",
|
||||
idvar = idvar,
|
||||
@ -76,98 +94,8 @@ close_events <- function(data, event = c("move", "flipCard", "openTopic", "openP
|
||||
|
||||
#which(sapply(subdata_split_wide, ncol) != ncol)
|
||||
|
||||
# fix log files with *only* start or *only* stop events
|
||||
add_variables <- function(data_split_wide, ncol,
|
||||
event = c("move", "flipCard", "openTopic", "openPopup")) {
|
||||
|
||||
if (ncol(data_split_wide) != ncol) {
|
||||
if (!any(grepl("start", names(data_split_wide)))) {
|
||||
data_split_wide$fileId.start <- NA
|
||||
data_split_wide$date.start <- NA
|
||||
data_split_wide$timeMs.start <- NA
|
||||
data_split_wide$x.start <- NA
|
||||
data_split_wide$y.start <- NA
|
||||
data_split_wide$scale.start <- NA
|
||||
data_split_wide$rotation.start <- NA
|
||||
|
||||
event <- match.arg(event)
|
||||
|
||||
switch(event,
|
||||
"move" = {
|
||||
data_split_wide <- data_split_wide[, c("folder", "artwork",
|
||||
"glossar", "eventId",
|
||||
"fileId.start",
|
||||
"date.start",
|
||||
"timeMs.start",
|
||||
"x.start", "y.start",
|
||||
"scale.start",
|
||||
"rotation.start",
|
||||
"date.stop",
|
||||
"timeMs.stop", "x.stop",
|
||||
"y.stop", "scale.stop",
|
||||
"rotation.stop")]
|
||||
},
|
||||
"flipCard" = {
|
||||
data_split_wide <- data_split_wide[, c("folder", "artwork",
|
||||
"glossar", "trace",
|
||||
"eventId",
|
||||
"fileId.start",
|
||||
"date.start",
|
||||
"timeMs.start",
|
||||
"x.start", "y.start",
|
||||
"scale.start",
|
||||
"rotation.start",
|
||||
"date.stop",
|
||||
"timeMs.stop", "x.stop",
|
||||
"y.stop", "scale.stop",
|
||||
"rotation.stop")]
|
||||
},
|
||||
"openTopic" = {
|
||||
data_split_wide <- data_split_wide[, c("folder", "artwork",
|
||||
"topicNumber",
|
||||
"glossar", "trace",
|
||||
"eventId",
|
||||
"fileId.start",
|
||||
"date.start",
|
||||
"timeMs.start",
|
||||
"x.start", "y.start",
|
||||
"scale.start",
|
||||
"rotation.start",
|
||||
"date.stop",
|
||||
"timeMs.stop",
|
||||
"x.stop", "y.stop",
|
||||
"scale.stop",
|
||||
"rotation.stop")]
|
||||
},
|
||||
"openPopup" = {
|
||||
data_split_wide <- data_split_wide[, c("folder", "artwork",
|
||||
"popup", "glossar",
|
||||
"trace", "eventId",
|
||||
"fileId.start",
|
||||
"date.start",
|
||||
"timeMs.start",
|
||||
"x.start", "y.start",
|
||||
"scale.start",
|
||||
"rotation.start",
|
||||
"date.stop",
|
||||
"timeMs.stop", "x.stop",
|
||||
"y.stop", "scale.stop",
|
||||
"rotation.stop")]
|
||||
}
|
||||
)
|
||||
} else if (!any(grepl("stop", names(data_split_wide)))) {
|
||||
data_split_wide$fileId.stop <- NA
|
||||
data_split_wide$date.stop <- NA
|
||||
data_split_wide$timeMs.stop <- NA
|
||||
data_split_wide$x.stop <- NA
|
||||
data_split_wide$y.stop <- NA
|
||||
data_split_wide$scale.stop <- NA
|
||||
data_split_wide$rotation.stop <- NA
|
||||
}
|
||||
}
|
||||
data_split_wide
|
||||
}
|
||||
|
||||
# Add start and stop variables that get lost because events span more
|
||||
# than one log file
|
||||
subdata_split_wide <- lapply(subdata_split_wide, add_variables, ncol = ncol,
|
||||
event = event)
|
||||
|
||||
@ -179,24 +107,7 @@ close_events <- function(data, event = c("move", "flipCard", "openTopic", "openP
|
||||
corrupt <- data_wide[select, ]
|
||||
corrupt$identi <- "corrupt"
|
||||
|
||||
close_open_eventIds <- function(df, eventId) {
|
||||
dfid <- df[df$eventId == eventId, ]
|
||||
dfid <- dfid[!is.na(dfid$eventId), ]
|
||||
dfid <- dfid[order(dfid$fileId.start), ]
|
||||
if (nrow(dfid) == 2) {
|
||||
out <- dfid[1, ]
|
||||
out[, c("fileId.stop", "date.stop", "timeMs.stop", "x.stop", "y.stop", "scale.stop",
|
||||
"rotation.stop")] <-
|
||||
dfid[2, c("fileId.stop", "date.stop", "timeMs.stop", "x.stop",
|
||||
"y.stop", "scale.stop", "rotation.stop")]
|
||||
} else if (nrow(dfid) > 2) {
|
||||
stop("More than two rows for open eventIds. Something is wrong!")
|
||||
} else {
|
||||
out <- dfid
|
||||
}
|
||||
out
|
||||
}
|
||||
|
||||
# Close events spanning more than one log file
|
||||
fixed <- dplyr::bind_rows(lapply(unique(corrupt$eventId),
|
||||
close_open_eventIds, df = corrupt))
|
||||
|
||||
@ -217,21 +128,26 @@ close_events <- function(data, event = c("move", "flipCard", "openTopic", "openP
|
||||
data_wide$rotationDegree <- data_wide$rotation.stop -
|
||||
data_wide$rotation.start
|
||||
data_wide$scaleSize <- data_wide$scale.stop / data_wide$scale.start
|
||||
# remove moves without any change
|
||||
move_wide <- data_wide[data_wide$distance != 0 &
|
||||
data_wide$rotationDegree != 0 &
|
||||
data_wide$scaleSize != 1, ]
|
||||
cat(paste("INFORMATION:", nrow(data_wide) - nrow(move_wide),
|
||||
"lines containing move events were removed since they did",
|
||||
"\nnot contain any change"), fill = TRUE)
|
||||
data_wide <- move_wide
|
||||
|
||||
# Remove moves without any change
|
||||
if (rm_nochange_moves) {
|
||||
d1 <- nrow(data_wide)
|
||||
data_wide <- data_wide[data_wide$distance != 0 &
|
||||
data_wide$rotationDegree != 0 &
|
||||
data_wide$scaleSize != 1, ]
|
||||
d2 <- nrow(data_wide)
|
||||
cat(paste("INFORMATION:", d1 - d2,
|
||||
"lines containing move events were removed since they did",
|
||||
"\nnot contain any change"), fill = TRUE)
|
||||
}
|
||||
data_wide
|
||||
}
|
||||
|
||||
data_wide <- data_wide[order(data_wide$fileId.start,
|
||||
data_wide$date.start,
|
||||
data_wide$timeMs.start), ]
|
||||
|
||||
# fix durations that span more than one log file
|
||||
# Fix durations that span more than one log file
|
||||
if (event != "move") {
|
||||
tab <- colSums(stats::xtabs( ~ fileId + trace, subdata) != 0)
|
||||
number_logfiles <- data.frame(trace = names(tab), nlogfile = tab)
|
||||
@ -262,3 +178,142 @@ close_events <- function(data, event = c("move", "flipCard", "openTopic", "openP
|
||||
out
|
||||
}
|
||||
|
||||
###########################################################################
|
||||
|
||||
# Fix eventIds for start events following each other
|
||||
|
||||
fix_start_eventIds <- function(df, eventId) {
|
||||
dfid <- df[df$eventId == eventId, ]
|
||||
if (nrow(dfid) > 2) {
|
||||
dfid.start <- dfid[dfid$time == "start", ]
|
||||
dfid$eventId[dfid$time == "start"] <-
|
||||
c(paste(utils::head(dfid.start$eventId, nrow(dfid.start) - 1),
|
||||
1:(nrow(dfid.start) - 1), sep = ":"),
|
||||
utils::tail(dfid.start$eventId, 1))
|
||||
}
|
||||
dfid
|
||||
}
|
||||
|
||||
###########################################################################
|
||||
|
||||
# Add start and stop variables that get lost because events span more
|
||||
# than one log file
|
||||
|
||||
add_variables <- function(data_split_wide, ncol,
|
||||
event = c("move", "flipCard", "openTopic",
|
||||
"openPopup")) {
|
||||
|
||||
if (ncol(data_split_wide) != ncol) {
|
||||
if (!any(grepl("start", names(data_split_wide)))) {
|
||||
data_split_wide$fileId.start <- NA
|
||||
data_split_wide$date.start <- NA
|
||||
data_split_wide$timeMs.start <- NA
|
||||
data_split_wide$x.start <- NA
|
||||
data_split_wide$y.start <- NA
|
||||
data_split_wide$scale.start <- NA
|
||||
data_split_wide$rotation.start <- NA
|
||||
|
||||
event <- match.arg(event)
|
||||
|
||||
switch(event,
|
||||
"move" = {
|
||||
data_split_wide <- data_split_wide[, c("folder", "artwork",
|
||||
"glossar", "eventId",
|
||||
"fileId.start",
|
||||
"date.start",
|
||||
"timeMs.start",
|
||||
"x.start", "y.start",
|
||||
"scale.start",
|
||||
"rotation.start",
|
||||
"fileId.stop",
|
||||
"date.stop",
|
||||
"timeMs.stop", "x.stop",
|
||||
"y.stop", "scale.stop",
|
||||
"rotation.stop")]
|
||||
},
|
||||
"flipCard" = {
|
||||
data_split_wide <- data_split_wide[, c("folder", "artwork",
|
||||
"glossar", "trace",
|
||||
"eventId",
|
||||
"fileId.start",
|
||||
"date.start",
|
||||
"timeMs.start",
|
||||
"x.start", "y.start",
|
||||
"scale.start",
|
||||
"rotation.start",
|
||||
"fileId.stop",
|
||||
"date.stop",
|
||||
"timeMs.stop", "x.stop",
|
||||
"y.stop", "scale.stop",
|
||||
"rotation.stop")]
|
||||
},
|
||||
"openTopic" = {
|
||||
data_split_wide <- data_split_wide[, c("folder", "artwork",
|
||||
"topicNumber",
|
||||
"glossar", "trace",
|
||||
"eventId",
|
||||
"fileId.start",
|
||||
"date.start",
|
||||
"timeMs.start",
|
||||
"x.start", "y.start",
|
||||
"scale.start",
|
||||
"rotation.start",
|
||||
"fileId.stop",
|
||||
"date.stop",
|
||||
"timeMs.stop",
|
||||
"x.stop", "y.stop",
|
||||
"scale.stop",
|
||||
"rotation.stop")]
|
||||
},
|
||||
"openPopup" = {
|
||||
data_split_wide <- data_split_wide[, c("folder", "artwork",
|
||||
"popup", "glossar",
|
||||
"trace", "eventId",
|
||||
"fileId.start",
|
||||
"date.start",
|
||||
"timeMs.start",
|
||||
"x.start", "y.start",
|
||||
"scale.start",
|
||||
"rotation.start",
|
||||
"fileId.stop",
|
||||
"date.stop",
|
||||
"timeMs.stop", "x.stop",
|
||||
"y.stop", "scale.stop",
|
||||
"rotation.stop")]
|
||||
}
|
||||
)
|
||||
} else if (!any(grepl("stop", names(data_split_wide)))) {
|
||||
data_split_wide$fileId.stop <- NA
|
||||
data_split_wide$date.stop <- NA
|
||||
data_split_wide$timeMs.stop <- NA
|
||||
data_split_wide$x.stop <- NA
|
||||
data_split_wide$y.stop <- NA
|
||||
data_split_wide$scale.stop <- NA
|
||||
data_split_wide$rotation.stop <- NA
|
||||
}
|
||||
}
|
||||
data_split_wide
|
||||
}
|
||||
|
||||
###########################################################################
|
||||
|
||||
# Close events spanning more than one log file
|
||||
|
||||
close_open_eventIds <- function(df, eventId) {
|
||||
dfid <- df[df$eventId == eventId, ]
|
||||
dfid <- dfid[!is.na(dfid$eventId), ]
|
||||
dfid <- dfid[order(dfid$fileId.start), ]
|
||||
if (nrow(dfid) == 2) {
|
||||
out <- dfid[1, ]
|
||||
out[, c("fileId.stop", "date.stop", "timeMs.stop", "x.stop", "y.stop", "scale.stop",
|
||||
"rotation.stop")] <-
|
||||
dfid[2, c("fileId.stop", "date.stop", "timeMs.stop", "x.stop",
|
||||
"y.stop", "scale.stop", "rotation.stop")]
|
||||
} else if (nrow(dfid) > 2) {
|
||||
stop("More than two rows for open eventIds. Something is wrong!")
|
||||
} else {
|
||||
out <- dfid
|
||||
}
|
||||
out
|
||||
}
|
||||
|
||||
|
@ -6,11 +6,16 @@
|
||||
#' @param data Data frame of raw log files created with `parse_logfiles()`.
|
||||
#' See `?parse_logfiles` for more details.
|
||||
#' @param xmlpath Path to folder where XML definitions of artworks live.
|
||||
#' @param case_cutoff Number in seconds how long time interval between
|
||||
#' different cases should be.
|
||||
#' @param rm_nochange_moves Logical. Should move events that record no
|
||||
#' change, meaning distance and rotationDegree are 0 and scaleSize is 1, be
|
||||
#' removed. Default is TRUE.
|
||||
#' @return Data frame.
|
||||
#' @export
|
||||
#' @examples
|
||||
#' # tbd
|
||||
create_eventlogs <- function(data, xmlpath) {
|
||||
create_eventlogs <- function(data, xmlpath, case_cutoff = 20, rm_nochange_moves = TRUE) {
|
||||
|
||||
if (!lubridate::is.POSIXt(data$date)){
|
||||
cat("########## Converting variable `date` to POSIXct ##########", "\n")
|
||||
@ -40,47 +45,46 @@ create_eventlogs <- function(data, xmlpath) {
|
||||
|
||||
# Close events
|
||||
cat("\n\n########## Closing events... ##########", "\n")
|
||||
c1 <- close_events(dat1, "move")
|
||||
c1 <- close_events(dat1, "move", rm_nochange_moves = rm_nochange_moves)
|
||||
cat("## --> move events closed.", "\n")
|
||||
c2 <- close_events(dat1, "flipCard")
|
||||
c2 <- close_events(dat1, "flipCard", rm_nochange_moves = rm_nochange_moves)
|
||||
cat("## --> flipCard events closed.", "\n")
|
||||
c3 <- close_events(dat1, "openTopic")
|
||||
c3 <- close_events(dat1, "openTopic", rm_nochange_moves = rm_nochange_moves)
|
||||
cat("## --> openTopic events closed.", "\n")
|
||||
c4 <- close_events(dat1, "openPopup")
|
||||
c4 <- close_events(dat1, "openPopup", rm_nochange_moves = rm_nochange_moves)
|
||||
cat("## --> openPopup events closed.", "\n")
|
||||
dat2 <- rbind(c1, c2, c3, c4)
|
||||
|
||||
dat2 <- dat2[order(dat2$fileId, dat2$date.start, dat2$timeMs.start), ]
|
||||
dat2 <- dat2[order(dat2$fileId.start, dat2$date.start, dat2$timeMs.start), ]
|
||||
|
||||
# Remove all events that do not have a `date.start`
|
||||
d1 <- nrow(dat2)
|
||||
dat2 <- dat2[!is.na(dat2$date.start), ]
|
||||
d2 <- nrow(dat2)
|
||||
if(d1 > d2) {
|
||||
warning(paste0(d1-d2, " lines that do not contain a start event have been removed. This can happen when events span over more than one log file.\n"))
|
||||
}
|
||||
# d1 <- nrow(dat2)
|
||||
# dat2 <- dat2[!is.na(dat2$date.start), ]
|
||||
# d2 <- nrow(dat2)
|
||||
# if(d1 > d2) {
|
||||
# warning(paste0(d1-d2, " lines that do not contain a start event have been removed. This can happen when events span over more than one log file.\n"))
|
||||
# }
|
||||
|
||||
rownames(dat2) <- NULL
|
||||
# rownames(dat2) <- NULL
|
||||
|
||||
# Add case variable ######################################################
|
||||
cat("\n########## Adding case and eventId variables... ##########", "\n\n")
|
||||
dat3 <- add_case(dat2)
|
||||
dat3 <- add_case(dat2, cutoff = case_cutoff)
|
||||
|
||||
# Add event ID ###########################################################
|
||||
dat3$eventId <- seq_len(nrow(dat3))
|
||||
dat3 <- dat3[, c("fileId", "folder", "eventId", "case",
|
||||
"trace", "glossar", "event", "artwork",
|
||||
dat3 <- dat3[, c("folder", "eventId", "case", "trace", "glossar",
|
||||
"event", "artwork", "fileId.start", "fileId.stop",
|
||||
"date.start", "date.stop", "timeMs.start",
|
||||
"timeMs.stop", "duration", "topicNumber", "popup",
|
||||
"x.start", "y.start", "x.stop", "y.stop",
|
||||
"distance", "scale.start", "scale.stop",
|
||||
"scaleSize", "rotation.start", "rotation.stop",
|
||||
"rotationDegree")]
|
||||
"x.start", "y.start", "x.stop", "y.stop", "distance",
|
||||
"scale.start", "scale.stop", "scaleSize",
|
||||
"rotation.start", "rotation.stop", "rotationDegree")]
|
||||
|
||||
# Add trace for move events ##############################################
|
||||
cat("\n\n########## Adding trace variable for move events... ##########", "\n")
|
||||
dat4 <- add_trace_moves(dat3)
|
||||
|
||||
dat4
|
||||
}
|
||||
|
||||
|
||||
|
@ -85,7 +85,7 @@ add_topic <- function(data, topics) {
|
||||
#out <- do.call(rbind, dat_topic)
|
||||
out <- dplyr::bind_rows(dat_topic)
|
||||
out$topicIndex <- as.numeric(out$topicIndex)
|
||||
out <- out[order(out$fileId, out$date.start, out$timeMs.start), ]
|
||||
out <- out[order(out$fileId.start, out$date.start, out$timeMs.start), ]
|
||||
rownames(out) <- NULL
|
||||
out
|
||||
}
|
||||
|
@ -4,13 +4,20 @@
|
||||
\alias{create_eventlogs}
|
||||
\title{Creating log events from raw log files.}
|
||||
\usage{
|
||||
create_eventlogs(data, xmlpath)
|
||||
create_eventlogs(data, xmlpath, case_cutoff = 20, rm_nochange_moves = TRUE)
|
||||
}
|
||||
\arguments{
|
||||
\item{data}{Data frame of raw log files created with \code{parse_logfiles()}.
|
||||
See \code{?parse_logfiles} for more details.}
|
||||
|
||||
\item{xmlpath}{Path to folder where XML definitions of artworks live.}
|
||||
|
||||
\item{case_cutoff}{Number in seconds how long time interval between
|
||||
different cases should be.}
|
||||
|
||||
\item{rm_nochange_moves}{Logical. Should move events that record no
|
||||
change, meaning distance and rotationDegree are 0 and scaleSize is 1, be
|
||||
removed. Default is TRUE.}
|
||||
}
|
||||
\value{
|
||||
Data frame.
|
||||
|
Loading…
Reference in New Issue
Block a user