Optimized close_events() so it runs on complete data set, uses dplyr at the moment

This commit is contained in:
2023-09-16 16:29:24 +02:00
parent 1f55608ebf
commit b242eaddf3
2 changed files with 49 additions and 12 deletions
+32 -12
View File
@@ -122,21 +122,25 @@ close_events <- function(data, event = c("move", "flipCard", "openTopic", "openP
actions <- c("Transform start", "Transform stop")
idvar <- c("eventId", "artwork", "glossar")
drop <- c("popup", "topicNumber", "trace", "event")
ncol <- 17
} else if (event == "flipCard") {
actions <- c("Show Info", "Show Front")
idvar <- c("trace", "artwork", "glossar")
drop <- c("popup", "topicNumber", "eventId", "event")
ncol <- 17
} else if (event == "openTopic") {
actions <- c("Artwork/OpenCard", "Artwork/CloseCard")
idvar <- c("eventId", "trace", "glossar", "artwork", "topicNumber")
drop <- c("popup", "event")
ncol <- 19
} else if (event == "openPopup") {
actions <- c("ShowPopup", "HidePopup")
idvar <- c("eventId", "trace", "glossar", "artwork", "popup")
drop <- c("topicNumber", "event")
ncol <- 19
# TODO: Should topicNumber maybe also be filled in for "openPopup"?
} else {
@@ -149,11 +153,10 @@ close_events <- function(data, event = c("move", "flipCard", "openTopic", "openP
subdata <- subdata[order(subdata$artwork, subdata$popup, subdata$date, subdata$timeMs), ]
subdata$time <- ifelse(subdata$event == actions[1], "start", "stop")
num_start <- diff(c(0, which(subdata$event == actions[2])))
if (tail(subdata, 1)$time == "start") {
num_start <- c(num_start, 1)
}
subdata$eventId <- rep(seq_along(num_start), num_start)
# If last event is start event, it needs to be fixed:
# num_start <- c(num_start, 1)
# TODO: Needs to be caught in a function
# --> not anymore - still relevant???
if (event == "move") {
subdata <- subdata[!duplicated(subdata[, c("event", "eventId")]), ]
@@ -162,19 +165,36 @@ close_events <- function(data, event = c("move", "flipCard", "openTopic", "openP
subdata <- subdata[-(id_rm_stop + 1), ]
}
subdata_split <- split(subdata, subdata$fileId)
suppressWarnings(
data_wide <- reshape(subdata, direction = "wide",
idvar = idvar,
timevar = "time",
drop = drop)
subdata_split_wide <- lapply(subdata_split, reshape,
direction = "wide",
idvar = idvar,
timevar = "time",
drop = drop)
)
# suppressWarnings(
# data_wide <- reshape(subdata, direction = "wide",
# idvar = idvar,
# timevar = "time",
# drop = drop)
# )
# TODO: Suppress warnings? Better with tryCatch()?
# there is a pathological entry which gets deleted...
# df[df$trace == 4595, ]
# --> artwork 046 popup selene.xml gets opened twice
# TODO: This runs for quite some time
# --> Is this more efficient with tidyr::pivot_wider?
# remove entries with only start or stop events since they do not have
# all columns
subdata_split_wide <-
subdata_split_wide[-which(sapply(subdata_split_wide, ncol) != ncol)]
#data_wide <- do.call(rbind, subdata_split_wide)
# TODO: This runs quite some time
# --> There is a more efficient function in dplyr, which would also allow
# to keep the file IDs with only start or stop or a single entry...
data_wide <- dplyr::bind_rows(subdata_split_wide)
for (d in drop) data_wide[d] <- NA
@@ -194,8 +214,8 @@ close_events <- function(data, event = c("move", "flipCard", "openTopic", "openP
data_wide$scaleSize <- data_wide$scale.stop / data_wide$scale.start
# remove moves without any change
move_wide <- data_wide[data_wide$distance != 0 &
data_wide$rotationDegree != 0 &
data_wide$scaleSize != 1, ]
data_wide$rotationDegree != 0 &
data_wide$scaleSize != 1, ]
cat(paste("INFORMATION:", nrow(data_wide) - nrow(move_wide),
"lines containing move events were removed since they did",
"\nnot contain any change"), fill = TRUE)