Fixed add_trace_moves() so it works with complete data set; first version that creates log event file for complete data set without crashing

This commit is contained in:
Nora Wickelmaier 2023-09-19 09:19:50 +02:00
parent 5ab190a4d8
commit fb9db8b908
2 changed files with 25 additions and 17 deletions

View File

@ -13,13 +13,13 @@ dat0$glossar <- ifelse(dat0$artwork == "glossar", 1, 0)
dat <- subset(dat0, !(dat0$event %in% c("Start Application",
"Show Application")))
#save(dat, file = "tmp/dat.RData")
save(dat, file = "tmp/dat.RData")
# Add trace variable #####################################################
cat("########## Adding trace variable... ##########", "\n")
dat1 <- add_trace(dat)
#save(dat1, file = "tmp/dat1.RData")
save(dat1, file = "tmp/dat1.RData")
# Close events
cat("########## Closing events... ##########", "\n")
@ -44,7 +44,7 @@ dat2 <- dat2[!is.na(dat2$date.start), ]
rownames(dat2) <- NULL
# TODO: Throw warning about this
#save(dat2, file = "tmp/dat2.RData")
save(dat2, file = "tmp/dat2.RData")
# Add case variable ######################################################
cat("########## Adding case and eventId variables... ##########", "\n")
@ -61,23 +61,25 @@ dat3 <- dat3[, c("fileId.start", "fileId.stop", "eventId", "case",
"scaleSize", "rotation.start", "rotation.stop",
"rotationDegree")]
#save(dat3, file = "tmp/dat3.RData")
save(dat3, file = "tmp/dat3.RData")
# Add trace for move events ##############################################
cat("########## Adding trace variable for move events... ##########", "\n")
cat("\n########## Adding trace variable for move events... ##########", "\n")
dat4 <- add_trace_moves(dat3)
#save(dat4, file = "tmp/dat4.RData")
save(dat4, file = "tmp/dat4.RData")
# Add topics: file names and topics ######################################
cat("########## Adding information about topics... ##########", "\n")
artworks <- unique(dat4$artwork)
# remove artworks without XML information
artworks <- artworks[!artworks %in% c("504", "505")]
topics <- extract_topics(artworks, pattern = paste0(artworks, ".xml"),
path = "../data/ContentEyevisit/eyevisit_cards_light/")
dat5 <- add_topic(dat4, topics = topics)
#save(dat5, file = "tmp/dat5.RData")
save(dat5, file = "tmp/dat5.RData")
# TODO: Replace artwork with informative strings

View File

@ -262,6 +262,7 @@ add_case <- function(data, cutoff = 20) {
data$timediff <- NULL
data
}
# TODO: Is this faster with lapply()?
###########################################################################
@ -269,14 +270,20 @@ add_case <- function(data, cutoff = 20) {
add_trace_moves <- function(data) {
cases <- unique(data$case)
artworks <- unique(data$artwork)[unique(data$artwork) != "glossar"]
pbapply::pboptions(style = 3, char = "=")
trace_max <- max(data$trace, na.rm = TRUE)
subdata_list <- split(data, ~ artwork + case)
subdata_list <- subdata_list[which(sapply(subdata_list, nrow) != 0)]
#subdata_art <- split(data, ~ artwork)
subdata_case <- split(data, ~ case)
pbapply::pboptions(style = 3, char = "=")
#subdata_list <- split(data, ~ artwork + case)
# --> does not work with complete data set
cat("Splitting data...", "\n")
subdata_list <- pbapply::pblapply(subdata_case, split, f = ~artwork)
subdata_list <- unlist(subdata_list, recursive = FALSE)
cat("Adding trace...", "\n")
subdata_trace <- pbapply::pblapply(subdata_list,
function(x) {
trace_max <<- trace_max + 1
@ -322,9 +329,6 @@ add_trace_subdata <- function(subdata, max_trace) {
subdata
}
#system.time(dat4a <- add_trace_moves2(dat3))
#system.time(dat4b <- add_trace_moves(dat3))
###########################################################################
@ -383,6 +387,7 @@ extract_topics <- function(artworks, pattern, path) {
add_topic <- function(data, topics) {
artworks <- unique(data$artwork)
tab_art <- lapply(artworks,
function(x) names(table(data$topicNumber[data$artwork == x])))
names(tab_art) <- artworks
@ -412,8 +417,9 @@ add_topic <- function(data, topics) {
dat_topic <- lapply(dat_label, set_topic)
out <- do.call(rbind, dat_topic)
out <- out[order(out$date.start, out$fileId.start), ]
#out <- do.call(rbind, dat_topic)
out <- dplyr::bind_rows(dat_topic)
out <- out[order(out$fileId.start, out$date.start, out$timeMs.start), ]
rownames(out) <- NULL
out
}