Added progress bars to parse_logfiles() and did some debugging
This commit is contained in:
		
							parent
							
								
									17e8e39cc3
								
							
						
					
					
						commit
						b9185a5645
					
				@ -34,7 +34,7 @@ create_eventlogs <- function(data) {
 | 
			
		||||
  cat("## --> openPopup events closed.", "\n")
 | 
			
		||||
  dat2 <- rbind(c1, c2, c3, c4)
 | 
			
		||||
  
 | 
			
		||||
  dat2 <- dat2[order(dat2$date.start, dat2$fileId.start), ]
 | 
			
		||||
  dat2 <- dat2[order(dat2$fileId, dat2$date.start, dat2$timeMs.start), ]
 | 
			
		||||
 | 
			
		||||
  # Remove all events that do not have a `date.start`
 | 
			
		||||
  d1 <- nrow(dat2)
 | 
			
		||||
@ -52,7 +52,7 @@ create_eventlogs <- function(data) {
 | 
			
		||||
  
 | 
			
		||||
  # Add event ID ###########################################################
 | 
			
		||||
  dat3$eventId <- seq_len(nrow(dat3))
 | 
			
		||||
  dat3 <- dat3[, c("fileId.start", "fileId.stop", "eventId", "case",
 | 
			
		||||
  dat3 <- dat3[, c("fileId", "eventId", "case",
 | 
			
		||||
                   "trace", "glossar", "event", "artwork",
 | 
			
		||||
                   "date.start", "date.stop", "timeMs.start",
 | 
			
		||||
                   "timeMs.stop", "duration", "topicNumber", "popup",
 | 
			
		||||
 | 
			
		||||
@ -282,7 +282,7 @@ add_trace_moves <- function(data) {
 | 
			
		||||
      )
 | 
			
		||||
 | 
			
		||||
  out <- dplyr::bind_rows(subdata_trace)
 | 
			
		||||
  out <- out[order(out$fileId.start, out$date.start, out$timeMs.start), ]
 | 
			
		||||
  out <- out[order(out$fileId, out$date.start, out$timeMs.start), ]
 | 
			
		||||
  rownames(out) <- NULL
 | 
			
		||||
 | 
			
		||||
  # Make trace a consecutive number
 | 
			
		||||
@ -409,7 +409,7 @@ add_topic <- function(data, topics) {
 | 
			
		||||
 | 
			
		||||
  #out <- do.call(rbind, dat_topic)
 | 
			
		||||
  out <- dplyr::bind_rows(dat_topic)
 | 
			
		||||
  out <- out[order(out$fileId.start, out$date.start, out$timeMs.start), ]
 | 
			
		||||
  out <- out[order(out$fileId, out$date.start, out$timeMs.start), ]
 | 
			
		||||
  rownames(out) <- NULL
 | 
			
		||||
  out
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
@ -13,9 +13,10 @@
 | 
			
		||||
#' # dirpaths <- paste0("../data/haum_logs_2016-2023/", folders)
 | 
			
		||||
#' # fnames <- dir(dirpaths, pattern = "*.log", full.names = TRUE)
 | 
			
		||||
#' # leftpad_fnames(fnames)
 | 
			
		||||
leftpad_fnames <- function(x, dirpaths) {
 | 
			
		||||
leftpad_fnames <- function(fnames) {
 | 
			
		||||
 | 
			
		||||
  z <- gsub(paste0(dirpaths, "/"), "\\1", x)
 | 
			
		||||
  z <- sapply(fnames, function(x) tail(strsplit(x, "/")[[1]], 1),
 | 
			
		||||
              USE.NAMES = FALSE)
 | 
			
		||||
  ys <- strsplit(z, "_")
 | 
			
		||||
 | 
			
		||||
  res <- NULL
 | 
			
		||||
@ -28,7 +29,6 @@ leftpad_fnames <- function(x, dirpaths) {
 | 
			
		||||
    e4 <- sprintf("%02d", as.numeric(y2[2]))
 | 
			
		||||
    e5 <- sprintf("%02d", as.numeric(y[4]))
 | 
			
		||||
    e6 <- sprintf("%02d", as.numeric(gsub(".log", "", y[5])))
 | 
			
		||||
    e6 <- sprintf("%02d", as.numeric(gsub(".log", "", y[5])))
 | 
			
		||||
 | 
			
		||||
    res <- c(res,
 | 
			
		||||
             paste0(e1, "_", e2, "_", e3, "-", e4, "_", e5, "_", e6, ".log"))
 | 
			
		||||
@ -36,7 +36,7 @@ leftpad_fnames <- function(x, dirpaths) {
 | 
			
		||||
  res
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
#' Creating data frame for raw log files.
 | 
			
		||||
#' Creating data frame from raw log files.
 | 
			
		||||
#'
 | 
			
		||||
#' Creates a data frame or CSV file from raw log files from a
 | 
			
		||||
#' Multi-Touch-Table at the IWM.
 | 
			
		||||
@ -58,11 +58,12 @@ parse_logfiles <- function(folders, path, file = "rawdata_logfiles.csv",
 | 
			
		||||
  # TODO: This is not very intutitive
 | 
			
		||||
  fnames <- dir(dirpaths, pattern = "*.log", full.names = TRUE)
 | 
			
		||||
  
 | 
			
		||||
  cat(paste0("\n########## Reading ", length(fnames), " log files... ##########"), "\n\n")
 | 
			
		||||
  suppressWarnings(
 | 
			
		||||
    logs <- lapply(fnames, readLines)
 | 
			
		||||
    logs <- pbapply::pblapply(fnames, readLines)
 | 
			
		||||
  )
 | 
			
		||||
  nlog <- sapply(logs, length)
 | 
			
		||||
  dat <- data.frame(fileId = rep(leftpad_fnames(fnames, dirpaths), nlog),
 | 
			
		||||
  dat <- data.frame(fileId = rep(leftpad_fnames(fnames), nlog),
 | 
			
		||||
                    logs = unlist(logs))
 | 
			
		||||
  
 | 
			
		||||
  # Remove corrupt lines
 | 
			
		||||
@ -75,40 +76,50 @@ parse_logfiles <- function(folders, path, file = "rawdata_logfiles.csv",
 | 
			
		||||
  }
 | 
			
		||||
  
 | 
			
		||||
  # Extract relevant infos
 | 
			
		||||
  date <- sapply(dat$logs, gsub,
 | 
			
		||||
  cat("\n########## Parsing individual data parts... ##########", "\n")
 | 
			
		||||
  cat("\n Extract dates...", "\n\n")
 | 
			
		||||
  date <- pbapply::pbsapply(dat$logs, gsub,
 | 
			
		||||
                 pattern = "^\\[(.*)\\], \\[.*$",
 | 
			
		||||
                 replacement = "\\1",
 | 
			
		||||
                 USE.NAMES = FALSE)
 | 
			
		||||
  
 | 
			
		||||
  timestamp <- sapply(dat$logs, gsub,
 | 
			
		||||
  cat("\n Extract timestamps...", "\n\n")
 | 
			
		||||
  timestamp <- pbapply::pbsapply(dat$logs, gsub,
 | 
			
		||||
                      pattern = "^\\[.*\\], \\[(.*)\\].*$",
 | 
			
		||||
                      replacement = "\\1",
 | 
			
		||||
                      USE.NAMES = FALSE)
 | 
			
		||||
  
 | 
			
		||||
  action <- sapply(dat$logs, gsub,
 | 
			
		||||
  cat("\n Extract events...", "\n\n")
 | 
			
		||||
  action <- pbapply::pbsapply(dat$logs, gsub,
 | 
			
		||||
                   pattern = "^.*EyeVisit, (.*):*.*$",
 | 
			
		||||
                   replacement = "\\1",
 | 
			
		||||
                   USE.NAMES = FALSE)
 | 
			
		||||
  
 | 
			
		||||
  events <- sapply(strsplit(action, ":"), function(x) x[1])
 | 
			
		||||
  cat("\n Parse separate events...", "\n\n")
 | 
			
		||||
  events <- pbapply::pbsapply(strsplit(action, ":"), function(x) x[1])
 | 
			
		||||
  
 | 
			
		||||
  topics <- sapply(strsplit(action, ":"), function(x) x[2])
 | 
			
		||||
  cat("\n Extract topics...", "\n\n")
 | 
			
		||||
  topics <- pbapply::pbsapply(strsplit(action, ":"), function(x) x[2])
 | 
			
		||||
  
 | 
			
		||||
  cat("\n Extract move information...", "\n\n")
 | 
			
		||||
  suppressWarnings(
 | 
			
		||||
    moves <- apply(do.call(rbind,
 | 
			
		||||
    moves <- pbapply::pbapply(do.call(rbind,
 | 
			
		||||
                     strsplit(sapply(strsplit(action, ":"),
 | 
			
		||||
                                     function(x) x[3]), ",")), 2,
 | 
			
		||||
                   as.numeric)
 | 
			
		||||
  )
 | 
			
		||||
  # ATTENTION: as.numeric() forces NAs for "OpenCard" and "CloseCard"
 | 
			
		||||
  
 | 
			
		||||
  card_action <- trimws(sapply(strsplit(action, ":"),
 | 
			
		||||
  cat("\n Extract popups...", "\n\n")
 | 
			
		||||
  card_action <- trimws(pbapply::pbsapply(strsplit(action, ":"),
 | 
			
		||||
                               function(x) x[3])[grep("Artwork", events)])
 | 
			
		||||
  
 | 
			
		||||
  card <- as.numeric(sapply(strsplit(action, ":"), function(x) x[4]))
 | 
			
		||||
  card <- as.numeric(pbapply::pbsapply(strsplit(action, ":"),
 | 
			
		||||
                                       function(x) x[4]))
 | 
			
		||||
  
 | 
			
		||||
  events[grep("Artwork", events)] <- paste("Artwork", card_action, sep = "/")
 | 
			
		||||
  
 | 
			
		||||
  cat("\n Transform timestamps to ms...", "\n\n")
 | 
			
		||||
  ts_elements <- strsplit(timestamp, ":")
 | 
			
		||||
  time_ms <- as.numeric(sapply(ts_elements, function(x) x[4])) +
 | 
			
		||||
             as.numeric(sapply(ts_elements, function(x) x[3])) * 1000 +
 | 
			
		||||
@ -130,8 +141,9 @@ parse_logfiles <- function(folders, path, file = "rawdata_logfiles.csv",
 | 
			
		||||
  
 | 
			
		||||
  # Export data
 | 
			
		||||
  if (save) {
 | 
			
		||||
    cat("Saving data...", "\n\n")
 | 
			
		||||
    utils::write.table(dat, file = file, sep = ";", row.names = FALSE)
 | 
			
		||||
    cat(paste0("INFORMATION: Data file", file, "has been written to ", getwd(), "\n"))
 | 
			
		||||
    cat(paste0("INFORMATION: Data file ", file, " has been written to ", getwd(), "\n\n"))
 | 
			
		||||
  } else {
 | 
			
		||||
    return(dat)
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
		Loading…
	
	
			
			x
			
			
		
	
		Reference in New Issue
	
	Block a user