265 lines
11 KiB
R
265 lines
11 KiB
R
###########################################################################
|
|
|
|
close_events <- function(data, event = c("move", "flipCard", "openTopic", "openPopup")) {
|
|
|
|
event <- match.arg(event)
|
|
|
|
switch(event,
|
|
"move" = {
|
|
actions <- c("Transform start", "Transform stop")
|
|
idvar <- c("folder", "eventId", "artwork", "glossar")
|
|
drop <- c("popup", "topicNumber", "trace", "event")
|
|
ncol <- 18
|
|
|
|
},
|
|
"flipCard" = {
|
|
actions <- c("Show Info", "Show Front")
|
|
idvar <- c("folder", "trace", "eventId", "artwork", "glossar")
|
|
drop <- c("popup", "topicNumber", "event")
|
|
ncol <- 19
|
|
|
|
},
|
|
"openTopic" = {
|
|
actions <- c("Artwork/OpenCard", "Artwork/CloseCard")
|
|
idvar <- c("folder", "eventId", "trace", "glossar",
|
|
"artwork", "topicNumber")
|
|
drop <- c("popup", "event")
|
|
ncol <- 20
|
|
|
|
},
|
|
"openPopup" = {
|
|
actions <- c("ShowPopup", "HidePopup")
|
|
idvar <- c("folder", "eventId", "trace", "glossar",
|
|
"artwork", "popup")
|
|
drop <- c("topicNumber", "event")
|
|
ncol <- 20
|
|
# TODO: Should topicNumber maybe also be filled in for "openPopup"?
|
|
}
|
|
)
|
|
|
|
subdata <- subset(data, data$event %in% actions)
|
|
subdata <- subdata[order(subdata$artwork, subdata$popup, subdata$date, subdata$timeMs), ]
|
|
subdata$time <- ifelse(subdata$event == actions[1], "start", "stop")
|
|
num_start <- diff(c(0, which(subdata$event == actions[2])))
|
|
if (utils::tail(subdata, 1)$time == "start") {
|
|
num_start <- c(num_start, 1)
|
|
}
|
|
subdata$eventId <- rep(seq_along(num_start), num_start)
|
|
|
|
# remove start and stop events following directly each other
|
|
subdata <- subdata[!duplicated(subdata[, c("event", "eventId")],
|
|
fromLast = TRUE), ]
|
|
id_stop <- which(subdata$event == actions[2])
|
|
id_rm_stop <- id_stop[diff(id_stop) == 1]
|
|
if (length(id_rm_stop) != 0) {
|
|
subdata <- subdata[-(id_rm_stop + 1), ]
|
|
}
|
|
|
|
# remove eventIds associated with more than one trace, usually logging
|
|
# errors that I cannot resolve
|
|
corrupt_eventIds <- names(which(rowSums(xtabs( ~ eventId + trace, subdata) != 0) != 1))
|
|
subdata <- subdata[!subdata$eventId %in% corrupt_eventIds, ]
|
|
|
|
if (event == "flipCard") {
|
|
subdata$eventId <- subdata$trace
|
|
}
|
|
|
|
subdata_split <- split(subdata, ~ fileId)
|
|
|
|
pbapply::pboptions(style = 3, char = "=")
|
|
|
|
subdata_split_wide <- pbapply::pblapply(subdata_split, stats::reshape,
|
|
direction = "wide",
|
|
idvar = idvar,
|
|
timevar = "time",
|
|
drop = drop)
|
|
|
|
#which(sapply(subdata_split_wide, ncol) != ncol)
|
|
|
|
# fix log files with *only* start or *only* stop events
|
|
add_variables <- function(data_split_wide, ncol,
|
|
event = c("move", "flipCard", "openTopic", "openPopup")) {
|
|
|
|
if (ncol(data_split_wide) != ncol) {
|
|
if (!any(grepl("start", names(data_split_wide)))) {
|
|
data_split_wide$fileId.start <- NA
|
|
data_split_wide$date.start <- NA
|
|
data_split_wide$timeMs.start <- NA
|
|
data_split_wide$x.start <- NA
|
|
data_split_wide$y.start <- NA
|
|
data_split_wide$scale.start <- NA
|
|
data_split_wide$rotation.start <- NA
|
|
|
|
event <- match.arg(event)
|
|
|
|
switch(event,
|
|
"move" = {
|
|
data_split_wide <- data_split_wide[, c("folder", "artwork",
|
|
"glossar", "eventId",
|
|
"fileId.start",
|
|
"date.start",
|
|
"timeMs.start",
|
|
"x.start", "y.start",
|
|
"scale.start",
|
|
"rotation.start",
|
|
"date.stop",
|
|
"timeMs.stop", "x.stop",
|
|
"y.stop", "scale.stop",
|
|
"rotation.stop")]
|
|
},
|
|
"flipCard" = {
|
|
data_split_wide <- data_split_wide[, c("folder", "artwork",
|
|
"glossar", "trace",
|
|
"eventId",
|
|
"fileId.start",
|
|
"date.start",
|
|
"timeMs.start",
|
|
"x.start", "y.start",
|
|
"scale.start",
|
|
"rotation.start",
|
|
"date.stop",
|
|
"timeMs.stop", "x.stop",
|
|
"y.stop", "scale.stop",
|
|
"rotation.stop")]
|
|
},
|
|
"openTopic" = {
|
|
data_split_wide <- data_split_wide[, c("folder", "artwork",
|
|
"topicNumber",
|
|
"glossar", "trace",
|
|
"eventId",
|
|
"fileId.start",
|
|
"date.start",
|
|
"timeMs.start",
|
|
"x.start", "y.start",
|
|
"scale.start",
|
|
"rotation.start",
|
|
"date.stop",
|
|
"timeMs.stop",
|
|
"x.stop", "y.stop",
|
|
"scale.stop",
|
|
"rotation.stop")]
|
|
},
|
|
"openPopup" = {
|
|
data_split_wide <- data_split_wide[, c("folder", "artwork",
|
|
"popup", "glossar",
|
|
"trace", "eventId",
|
|
"fileId.start",
|
|
"date.start",
|
|
"timeMs.start",
|
|
"x.start", "y.start",
|
|
"scale.start",
|
|
"rotation.start",
|
|
"date.stop",
|
|
"timeMs.stop", "x.stop",
|
|
"y.stop", "scale.stop",
|
|
"rotation.stop")]
|
|
}
|
|
)
|
|
} else if (!any(grepl("stop", names(data_split_wide)))) {
|
|
data_split_wide$fileId.stop <- NA
|
|
data_split_wide$date.stop <- NA
|
|
data_split_wide$timeMs.stop <- NA
|
|
data_split_wide$x.stop <- NA
|
|
data_split_wide$y.stop <- NA
|
|
data_split_wide$scale.stop <- NA
|
|
data_split_wide$rotation.stop <- NA
|
|
}
|
|
}
|
|
data_split_wide
|
|
}
|
|
|
|
subdata_split_wide <- lapply(subdata_split_wide, add_variables, ncol = ncol,
|
|
event = event)
|
|
|
|
data_wide <- dplyr::bind_rows(subdata_split_wide)
|
|
|
|
select <- is.na(data_wide$date.start) | is.na(data_wide$date.stop)
|
|
correct <- data_wide[!select, ]
|
|
correct$identi <- "correct"
|
|
corrupt <- data_wide[select, ]
|
|
corrupt$identi <- "corrupt"
|
|
|
|
close_open_eventIds <- function(df, eventId) {
|
|
dfid <- df[df$eventId == eventId, ]
|
|
dfid <- dfid[!is.na(dfid$eventId), ]
|
|
dfid <- dfid[order(dfid$fileId.start), ]
|
|
if (nrow(dfid) == 2) {
|
|
out <- dfid[1, ]
|
|
out[, c("fileId.stop", "date.stop", "timeMs.stop", "x.stop", "y.stop", "scale.stop",
|
|
"rotation.stop")] <-
|
|
dfid[2, c("fileId.stop", "date.stop", "timeMs.stop", "x.stop",
|
|
"y.stop", "scale.stop", "rotation.stop")]
|
|
} else if (nrow(dfid) > 2) {
|
|
stop("More than two rows for open eventIds. Something is wrong!")
|
|
} else {
|
|
out <- dfid
|
|
}
|
|
out
|
|
}
|
|
|
|
fixed <- dplyr::bind_rows(lapply(unique(corrupt$eventId),
|
|
close_open_eventIds, df = corrupt))
|
|
|
|
data_wide <- rbind(correct, fixed)
|
|
|
|
for (d in drop) data_wide[d] <- NA
|
|
data_wide$distance <- NA
|
|
data_wide$scaleSize <- NA
|
|
data_wide$rotationDegree <- NA
|
|
|
|
data_wide$event <- event
|
|
data_wide$duration <- data_wide$timeMs.stop - data_wide$timeMs.start
|
|
|
|
if (event == "move") {
|
|
data_wide$distance <- apply(
|
|
data_wide[, c("x.start", "y.start", "x.stop", "y.stop")], 1,
|
|
function(x) stats::dist(matrix(x, 2, 2, byrow = TRUE)))
|
|
data_wide$rotationDegree <- data_wide$rotation.stop -
|
|
data_wide$rotation.start
|
|
data_wide$scaleSize <- data_wide$scale.stop / data_wide$scale.start
|
|
# remove moves without any change
|
|
move_wide <- data_wide[data_wide$distance != 0 &
|
|
data_wide$rotationDegree != 0 &
|
|
data_wide$scaleSize != 1, ]
|
|
cat(paste("INFORMATION:", nrow(data_wide) - nrow(move_wide),
|
|
"lines containing move events were removed since they did",
|
|
"\nnot contain any change"), fill = TRUE)
|
|
data_wide <- move_wide
|
|
}
|
|
|
|
data_wide <- data_wide[order(data_wide$fileId.start,
|
|
data_wide$date.start,
|
|
data_wide$timeMs.start), ]
|
|
|
|
# fix durations that span more than one log file
|
|
if (event != "move") {
|
|
tab <- colSums(xtabs( ~ fileId + trace, subdata) != 0)
|
|
number_logfiles <- data.frame(trace = names(tab), nlogfile = tab)
|
|
data_wide <- merge(data_wide, number_logfiles, by = "trace", all.x = TRUE)
|
|
data_wide$duration[data_wide$identi == "corrupt"] <-
|
|
(data_wide$nlogfile[data_wide$identi == "corrupt"] - 1) * 600000 -
|
|
data_wide$timeMs.start[data_wide$identi == "corrupt"] +
|
|
data_wide$timeMs.stop[data_wide$identi == "corrupt"]
|
|
# TODO: This assumes that no log files are skipped
|
|
# --> Is this assumption really valid??
|
|
} else {
|
|
data_wide$duration[data_wide$identi == "corrupt"] <-
|
|
600000 -
|
|
data_wide$timeMs.start[data_wide$identi == "corrupt"] +
|
|
data_wide$timeMs.stop[data_wide$identi == "corrupt"]
|
|
} # there should be no movements spanning more than two log files!
|
|
|
|
out <- data_wide[# !apply(data_wide, 1, function(x) all(is.na(x))),
|
|
# remove all NA rows
|
|
c("fileId.start", "fileId.stop", "folder", "event",
|
|
"artwork", "trace", "glossar", "date.start",
|
|
"date.stop", "timeMs.start", "timeMs.stop",
|
|
"duration", "topicNumber", "popup", "x.start",
|
|
"y.start", "x.stop", "y.stop", "distance",
|
|
"scale.start", "scale.stop", "scaleSize",
|
|
"rotation.start", "rotation.stop", "rotationDegree")]
|
|
rownames(out) <- NULL
|
|
out
|
|
}
|
|
|