Worked on TODOs
This commit is contained in:
		
							parent
							
								
									fb9db8b908
								
							
						
					
					
						commit
						9645bc62f1
					
				@ -483,9 +483,16 @@ two raw log files cannot be closed and will then be removed from the data
 | 
			
		||||
set. The functions warns about this, but it is a random process getting rid
 | 
			
		||||
of these data and seems therefore not like a systematic problem. Another
 | 
			
		||||
reason why this is not bad, is that durations cannot be calculated for
 | 
			
		||||
events across log files, because the time stamps do not increase over
 | 
			
		||||
events across log files anyways, because the time stamps do not increase
 | 
			
		||||
systematically over log files (see above).
 | 
			
		||||
 | 
			
		||||
I meant to put the lists back together with `do.call(rbind, some_list)` but
 | 
			
		||||
this can also not handle big data sets. I therefore switched to
 | 
			
		||||
`dplyr::bind_rows(some_ist)` which is really fast and was developed
 | 
			
		||||
especially for this purpose. It means, that I have to depend on the dplyr
 | 
			
		||||
package (which I am not a big fan of, since I meant to keep the package
 | 
			
		||||
self-contained).
 | 
			
		||||
 | 
			
		||||
# Reading list
 | 
			
		||||
 | 
			
		||||
* @Arizmendi2022 [--]
 | 
			
		||||
 | 
			
		||||
@ -2,10 +2,18 @@
 | 
			
		||||
 | 
			
		||||
source("functions.R")
 | 
			
		||||
 | 
			
		||||
small <- TRUE
 | 
			
		||||
 | 
			
		||||
# Read data ##############################################################
 | 
			
		||||
cat("########## Reading in data... ##########", "\n")
 | 
			
		||||
dat0 <- read.table("../data/rawdata_logfiles.csv", sep = ";",
 | 
			
		||||
                   header = TRUE)
 | 
			
		||||
 | 
			
		||||
if (small) {
 | 
			
		||||
  dat0 <- read.table("../data/rawdata_logfiles_small.csv", sep = ";",
 | 
			
		||||
                     header = TRUE)
 | 
			
		||||
} else {
 | 
			
		||||
  dat0 <- read.table("../data/rawdata_logfiles.csv", sep = ";",
 | 
			
		||||
                     header = TRUE)
 | 
			
		||||
}
 | 
			
		||||
dat0$date <- as.POSIXct(dat0$date)
 | 
			
		||||
dat0$glossar <- ifelse(dat0$artwork == "glossar", 1, 0)
 | 
			
		||||
 | 
			
		||||
@ -13,13 +21,15 @@ dat0$glossar <- ifelse(dat0$artwork == "glossar", 1, 0)
 | 
			
		||||
dat <- subset(dat0, !(dat0$event %in% c("Start Application",
 | 
			
		||||
                                        "Show Application")))
 | 
			
		||||
 | 
			
		||||
save(dat, file = "tmp/dat.RData")
 | 
			
		||||
save(dat, file = paste0("tmp/dat_", ifelse(small, "small_", "full_"),
 | 
			
		||||
                       as.numeric(Sys.time()), ".RData"))
 | 
			
		||||
 | 
			
		||||
# Add trace variable #####################################################
 | 
			
		||||
cat("########## Adding trace variable... ##########", "\n")
 | 
			
		||||
dat1 <- add_trace(dat)
 | 
			
		||||
 | 
			
		||||
save(dat1, file = "tmp/dat1.RData")
 | 
			
		||||
save(dat1, file = paste("tmp/dat1", ifelse(small, "small_", "full_"),
 | 
			
		||||
                       as.numeric(Sys.time()), ".RData"))
 | 
			
		||||
 | 
			
		||||
# Close events
 | 
			
		||||
cat("########## Closing events... ##########", "\n")
 | 
			
		||||
@ -44,7 +54,8 @@ dat2 <- dat2[!is.na(dat2$date.start), ]
 | 
			
		||||
rownames(dat2) <- NULL
 | 
			
		||||
# TODO: Throw warning about this
 | 
			
		||||
 | 
			
		||||
save(dat2, file = "tmp/dat2.RData")
 | 
			
		||||
save(dat2, file = paste("tmp/dat2", ifelse(small, "small_", "full_"),
 | 
			
		||||
                       as.numeric(Sys.time()), ".RData"))
 | 
			
		||||
 | 
			
		||||
# Add case variable ######################################################
 | 
			
		||||
cat("########## Adding case and eventId variables... ##########", "\n")
 | 
			
		||||
@ -61,13 +72,15 @@ dat3 <- dat3[, c("fileId.start", "fileId.stop", "eventId", "case",
 | 
			
		||||
                 "scaleSize", "rotation.start", "rotation.stop",
 | 
			
		||||
                 "rotationDegree")]
 | 
			
		||||
 | 
			
		||||
save(dat3, file = "tmp/dat3.RData")
 | 
			
		||||
save(dat3, file = paste("tmp/dat3", ifelse(small, "small_", "full_"),
 | 
			
		||||
                       as.numeric(Sys.time()), ".RData"))
 | 
			
		||||
 | 
			
		||||
# Add trace for move events ##############################################
 | 
			
		||||
cat("\n########## Adding trace variable for move events... ##########", "\n")
 | 
			
		||||
dat4 <- add_trace_moves(dat3)
 | 
			
		||||
 | 
			
		||||
save(dat4, file = "tmp/dat4.RData")
 | 
			
		||||
save(dat4, file = paste("tmp/dat4", ifelse(small, "small_", "full_"),
 | 
			
		||||
                       as.numeric(Sys.time()), ".RData"))
 | 
			
		||||
 | 
			
		||||
# Add topics: file names and topics ######################################
 | 
			
		||||
cat("########## Adding information about topics... ##########", "\n")
 | 
			
		||||
@ -79,7 +92,8 @@ topics <- extract_topics(artworks, pattern = paste0(artworks, ".xml"),
 | 
			
		||||
 | 
			
		||||
dat5 <- add_topic(dat4, topics = topics)
 | 
			
		||||
 | 
			
		||||
save(dat5, file = "tmp/dat5.RData")
 | 
			
		||||
save(dat5, file = paste("tmp/dat5", ifelse(small, "small_", "full_"),
 | 
			
		||||
                       as.numeric(Sys.time()), ".RData"))
 | 
			
		||||
 | 
			
		||||
# TODO: Replace artwork with informative strings
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
@ -118,42 +118,43 @@ add_trace <- function(data, glossar_dict = "../data/glossar_dict.RData") {
 | 
			
		||||
###########################################################################
 | 
			
		||||
 | 
			
		||||
close_events <- function(data, event = c("move", "flipCard", "openTopic", "openPopup")) {
 | 
			
		||||
# TODO: How do I set default vector and partial matching of arguments?
 | 
			
		||||
  # --> `macht.arg()` and `pmatch()`
 | 
			
		||||
 | 
			
		||||
  if (event == "move") {
 | 
			
		||||
    actions <- c("Transform start", "Transform stop")
 | 
			
		||||
    idvar   <- c("eventId", "artwork", "glossar")
 | 
			
		||||
    drop    <- c("popup", "topicNumber", "trace", "event")
 | 
			
		||||
    ncol    <- 17
 | 
			
		||||
  event <- match.arg(event)
 | 
			
		||||
 | 
			
		||||
  } else if (event == "flipCard") {
 | 
			
		||||
    actions <- c("Show Info", "Show Front")
 | 
			
		||||
    idvar   <- c("trace", "artwork", "glossar")
 | 
			
		||||
    drop    <- c("popup", "topicNumber", "eventId", "event")
 | 
			
		||||
    ncol    <- 17
 | 
			
		||||
  switch(event,
 | 
			
		||||
    "move" = {
 | 
			
		||||
      actions <- c("Transform start", "Transform stop")
 | 
			
		||||
      idvar   <- c("fileId", "eventId", "artwork", "glossar")
 | 
			
		||||
      drop    <- c("popup", "topicNumber", "trace", "event")
 | 
			
		||||
      ncol    <- 16
 | 
			
		||||
 | 
			
		||||
  } else if (event == "openTopic") {
 | 
			
		||||
    actions <- c("Artwork/OpenCard", "Artwork/CloseCard")
 | 
			
		||||
    idvar   <- c("eventId", "trace", "glossar", "artwork", "topicNumber")
 | 
			
		||||
    drop    <- c("popup", "event")
 | 
			
		||||
    ncol    <- 19
 | 
			
		||||
    },
 | 
			
		||||
    "flipCard" = {
 | 
			
		||||
      actions <- c("Show Info", "Show Front")
 | 
			
		||||
      idvar   <- c("fileId", "trace", "artwork", "glossar")
 | 
			
		||||
      drop    <- c("popup", "topicNumber", "eventId", "event")
 | 
			
		||||
      ncol    <- 16
 | 
			
		||||
 | 
			
		||||
  } else if (event == "openPopup") {
 | 
			
		||||
    actions <- c("ShowPopup", "HidePopup")
 | 
			
		||||
    idvar   <- c("eventId", "trace", "glossar", "artwork", "popup")
 | 
			
		||||
    drop    <- c("topicNumber", "event")
 | 
			
		||||
    ncol    <- 19
 | 
			
		||||
# TODO: Should topicNumber maybe also be filled in for "openPopup"?
 | 
			
		||||
    },
 | 
			
		||||
    "openTopic" = {
 | 
			
		||||
      actions <- c("Artwork/OpenCard", "Artwork/CloseCard")
 | 
			
		||||
      idvar   <- c("fileId", "eventId", "trace", "glossar", "artwork",
 | 
			
		||||
                   "topicNumber")
 | 
			
		||||
      drop    <- c("popup", "event")
 | 
			
		||||
      ncol    <- 18
 | 
			
		||||
 | 
			
		||||
  } else {
 | 
			
		||||
    stop("`event` must be one of 'move', 'flipCard', 'openTopic',
 | 
			
		||||
         'openPopup'.")
 | 
			
		||||
  }
 | 
			
		||||
# TODO: `fileId` should now maybe go back into `idvar`
 | 
			
		||||
    },
 | 
			
		||||
    "openPopup" = {
 | 
			
		||||
      actions <- c("ShowPopup", "HidePopup")
 | 
			
		||||
      idvar   <- c("fileId", "eventId", "trace", "glossar", "artwork", "popup")
 | 
			
		||||
      drop    <- c("topicNumber", "event")
 | 
			
		||||
      ncol    <- 18
 | 
			
		||||
#   TODO: Should topicNumber maybe also be filled in for "openPopup"?
 | 
			
		||||
 | 
			
		||||
    }
 | 
			
		||||
  )
 | 
			
		||||
 | 
			
		||||
  subdata <- subset(data, data$event %in% actions)
 | 
			
		||||
  #subdata <- subdata[order(subdata$artwork, subdata$popup, subdata$date), ]
 | 
			
		||||
  subdata <- subdata[order(subdata$artwork, subdata$popup, subdata$date, subdata$timeMs), ]
 | 
			
		||||
  subdata$time <- ifelse(subdata$event == actions[1], "start", "stop")
 | 
			
		||||
  num_start <- diff(c(0, which(subdata$event == actions[2])))
 | 
			
		||||
@ -172,36 +173,26 @@ close_events <- function(data, event = c("move", "flipCard", "openTopic", "openP
 | 
			
		||||
  subdata_split <- split(subdata, ~ fileId)
 | 
			
		||||
 | 
			
		||||
  pbapply::pboptions(style = 3, char = "=")
 | 
			
		||||
  suppressWarnings(
 | 
			
		||||
 | 
			
		||||
  subdata_split_wide <- pbapply::pblapply(subdata_split, reshape,
 | 
			
		||||
                               direction = "wide",
 | 
			
		||||
                               idvar = idvar,
 | 
			
		||||
                               timevar = "time",
 | 
			
		||||
                               drop = drop)
 | 
			
		||||
  )
 | 
			
		||||
#  suppressWarnings(
 | 
			
		||||
#    data_wide <- reshape(subdata, direction = "wide",
 | 
			
		||||
#                         idvar = idvar,
 | 
			
		||||
#                         timevar = "time",
 | 
			
		||||
#                         drop = drop)
 | 
			
		||||
#  )
 | 
			
		||||
# TODO: Suppress warnings? Better with tryCatch()?
 | 
			
		||||
# there is a pathological entry which gets deleted...
 | 
			
		||||
# df[df$trace == 4595, ]
 | 
			
		||||
# --> artwork 046 popup selene.xml gets opened twice
 | 
			
		||||
 | 
			
		||||
  # remove entries with only start or stop events since they do not have
 | 
			
		||||
  # all columns
 | 
			
		||||
  subdata_split_wide <-
 | 
			
		||||
    subdata_split_wide[-which(sapply(subdata_split_wide, ncol) != ncol)]
 | 
			
		||||
  ids <- which(sapply(subdata_split_wide, ncol) != ncol)
 | 
			
		||||
  if (length(ids) > 0) subdata_split_wide <- subdata_split_wide[-ids]
 | 
			
		||||
 | 
			
		||||
  #data_wide <- do.call(rbind, subdata_split_wide)
 | 
			
		||||
# TODO: This runs quite some time
 | 
			
		||||
# --> There is a more efficient function in dplyr, which would also allow
 | 
			
		||||
# to keep the file IDs with only start or stop or a single entry...
 | 
			
		||||
  data_wide <- dplyr::bind_rows(subdata_split_wide)
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
  for (d in drop) data_wide[d] <- NA
 | 
			
		||||
  data_wide$distance        <- NA
 | 
			
		||||
  data_wide$scaleSize       <- NA
 | 
			
		||||
@ -227,13 +218,13 @@ close_events <- function(data, event = c("move", "flipCard", "openTopic", "openP
 | 
			
		||||
    data_wide <- move_wide
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
  out <- data_wide[, c("fileId.start", "fileId.stop", "event", "artwork",
 | 
			
		||||
                       "trace", "glossar", "date.start", "date.stop",
 | 
			
		||||
                       "timeMs.start", "timeMs.stop", "duration",
 | 
			
		||||
                       "topicNumber", "popup", "x.start", "y.start",
 | 
			
		||||
                       "x.stop", "y.stop", "distance", "scale.start",
 | 
			
		||||
                       "scale.stop", "scaleSize", "rotation.start",
 | 
			
		||||
                       "rotation.stop", "rotationDegree")]
 | 
			
		||||
  out <- data_wide[, c("fileId", "event", "artwork", "trace", "glossar",
 | 
			
		||||
                       "date.start", "date.stop", "timeMs.start",
 | 
			
		||||
                       "timeMs.stop", "duration", "topicNumber", "popup",
 | 
			
		||||
                       "x.start", "y.start", "x.stop", "y.stop",
 | 
			
		||||
                       "distance", "scale.start", "scale.stop",
 | 
			
		||||
                       "scaleSize", "rotation.start", "rotation.stop",
 | 
			
		||||
                       "rotationDegree")]
 | 
			
		||||
  rownames(out) <- NULL
 | 
			
		||||
  out
 | 
			
		||||
}
 | 
			
		||||
@ -262,7 +253,6 @@ add_case <- function(data, cutoff = 20) {
 | 
			
		||||
  data$timediff <- NULL
 | 
			
		||||
  data
 | 
			
		||||
}
 | 
			
		||||
# TODO: Is this faster with lapply()?
 | 
			
		||||
 | 
			
		||||
###########################################################################
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
		Loading…
	
	
			
			x
			
			
		
	
		Reference in New Issue
	
	Block a user