Debugging and refactoring; glossar is an argument now; create glossar dictionary was moved to add_trace_glossar
This commit is contained in:
		
							parent
							
								
									ee35481130
								
							
						
					
					
						commit
						3786ae4b42
					
				@ -33,17 +33,15 @@ add_trace_artworks <- function(subdata) {
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
###########################################################################
 | 
			
		||||
add_trace_glossar <- function(subdata, glossar_dict) {
 | 
			
		||||
add_trace_glossar <- function(subdata, xmlpath) {
 | 
			
		||||
 | 
			
		||||
  pb <- utils::txtProgressBar(min = 0, max = nrow(subdata), initial = NA,
 | 
			
		||||
                       style = 3)
 | 
			
		||||
 | 
			
		||||
  # Fix glossar entries (find corresponding artworks and fill in trace)
 | 
			
		||||
  glossar_files <- unique(subdata[subdata$artwork == "glossar", "popup"])
 | 
			
		||||
 | 
			
		||||
  # load lookup table for artworks and glossar files
 | 
			
		||||
  lut <- glossar_dict[names(glossar_dict) %in% glossar_files]
 | 
			
		||||
 | 
			
		||||
  cat("\n\n########## Creating glossar dictionary ##########", "\n")
 | 
			
		||||
  artworks      <- unique(subdata$artwork[subdata$artwork != "glossar"])
 | 
			
		||||
  glossar_files <- unique(dat[dat$artwork == "glossar", "popup"])
 | 
			
		||||
  lut           <- create_glossardict(artworks, glossar_files, xmlpath = xmlpath)
 | 
			
		||||
  inside        <- glossar_files[glossar_files %in%
 | 
			
		||||
                                 names(lut[sapply(lut, length) == 1])]
 | 
			
		||||
  single_art    <- unlist(lut[names(lut) %in% inside])
 | 
			
		||||
@ -90,7 +88,7 @@ add_trace_glossar <- function(subdata, glossar_dict) {
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
###########################################################################
 | 
			
		||||
add_trace <- function(data, glossar_dict) {
 | 
			
		||||
add_trace <- function(data, xmlpath, glossar) {
 | 
			
		||||
 | 
			
		||||
  data$trace <- NA
 | 
			
		||||
  subdata1 <- data[data$event %in% c("Transform start", "Transform stop"), ]
 | 
			
		||||
@ -98,8 +96,10 @@ add_trace <- function(data, glossar_dict) {
 | 
			
		||||
 | 
			
		||||
  subdata2 <- add_trace_artworks(subdata2)
 | 
			
		||||
 | 
			
		||||
  if ("glossar" %in% unique(subdata2$artwork)) {
 | 
			
		||||
    subdata2 <- add_trace_glossar(subdata2, glossar_dict)
 | 
			
		||||
  if (glossar) {
 | 
			
		||||
    subdata2 <- add_trace_glossar(subdata2, xmlpath)
 | 
			
		||||
  } else {
 | 
			
		||||
    subdata2 <- subdata2[subdata2$glossar != 1, ]
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
  out <- rbind(subdata1, subdata2)
 | 
			
		||||
 | 
			
		||||
@ -11,42 +11,33 @@
 | 
			
		||||
#' @param rm_nochange_moves Logical. Should move events that record no
 | 
			
		||||
#' change, meaning distance and rotationDegree are 0 and scaleSize is 1, be
 | 
			
		||||
#' removed. Default is TRUE.
 | 
			
		||||
#' @param glossar Logical indicating of glossar folder is present and if it
 | 
			
		||||
#' @param glossar Logical indicating if glossar folder is present and if it
 | 
			
		||||
#' should be taken into account when preprocessing raw log files. Default
 | 
			
		||||
#' is FALSE.
 | 
			
		||||
#' is TRUE.
 | 
			
		||||
#' @return Data frame.
 | 
			
		||||
#' @export
 | 
			
		||||
#' @examples
 | 
			
		||||
#' # tbd
 | 
			
		||||
create_eventlogs <- function(data, xmlpath, case_cutoff = 20, rm_nochange_moves = TRUE,
 | 
			
		||||
                             glossar = FALSE) {
 | 
			
		||||
create_eventlogs <- function(data, xmlpath = NULL, case_cutoff = 20,
 | 
			
		||||
                             rm_nochange_moves = TRUE, glossar = FALSE) {
 | 
			
		||||
 | 
			
		||||
  if (!lubridate::is.POSIXt(data$date)){
 | 
			
		||||
    cat("########## Converting variable `date` to POSIXct ##########", "\n")
 | 
			
		||||
    data$date <- as.POSIXct(data$date)
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
  if (!glossar & is.null(xmlpath)) {
 | 
			
		||||
    stop("xmlpath is not specified and glossar = TRUE. Please enter the path to folder where XML definitions of artworks live.")
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
  # Remove irrelevant events
 | 
			
		||||
  dat <- subset(data, !(data$event %in% c("Start Application",
 | 
			
		||||
                                          "Show Application")))
 | 
			
		||||
 | 
			
		||||
  artworks <- unique(stats::na.omit(dat$artwork))
 | 
			
		||||
 | 
			
		||||
  # Create glossar dictionary ##############################################
 | 
			
		||||
  if (glossar) {
 | 
			
		||||
  dat$glossar <- ifelse(dat$artwork == "glossar", 1, 0)
 | 
			
		||||
 | 
			
		||||
    cat("\n########## Creating glossar dictionary ##########", "\n")
 | 
			
		||||
    artworks <- artworks[artworks != "glossar"]
 | 
			
		||||
    glossar_files <- unique(subset(dat, dat$artwork == "glossar")$popup)
 | 
			
		||||
    glossar_dict <- create_glossardict(artworks, glossar_files, xmlpath = xmlpath)
 | 
			
		||||
  } else {
 | 
			
		||||
    glossar_dict <- NULL
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
  # Add trace variable #####################################################
 | 
			
		||||
  cat("\n########## Adding trace variable... ##########", "\n")
 | 
			
		||||
  dat1 <- add_trace(dat, glossar_dict)
 | 
			
		||||
  dat1 <- add_trace(dat, xmlpath = xmlpath, glossar = glossar)
 | 
			
		||||
 | 
			
		||||
  # Close events
 | 
			
		||||
  cat("\n\n########## Closing events... ##########", "\n")
 | 
			
		||||
@ -62,16 +53,6 @@ create_eventlogs <- function(data, xmlpath, case_cutoff = 20, rm_nochange_moves
 | 
			
		||||
 | 
			
		||||
  dat2 <- dat2[order(dat2$fileId.start, dat2$date.start, dat2$timeMs.start), ]
 | 
			
		||||
 | 
			
		||||
  # Remove all events that do not have a `date.start`
 | 
			
		||||
  # d1 <- nrow(dat2)
 | 
			
		||||
  # dat2 <- dat2[!is.na(dat2$date.start), ]
 | 
			
		||||
  # d2 <- nrow(dat2)
 | 
			
		||||
  # if(d1 > d2) {
 | 
			
		||||
  #   warning(paste0(d1-d2, " lines that do not contain a start event have been removed. This can happen when events span over more than one log file.\n"))
 | 
			
		||||
  # }
 | 
			
		||||
 | 
			
		||||
  # rownames(dat2) <- NULL
 | 
			
		||||
 | 
			
		||||
  # Add case variable ######################################################
 | 
			
		||||
  cat("\n########## Adding case and eventId variables... ##########", "\n\n")
 | 
			
		||||
  dat3 <- add_case(dat2, cutoff = case_cutoff)
 | 
			
		||||
@ -100,11 +81,13 @@ create_eventlogs <- function(data, xmlpath, case_cutoff = 20, rm_nochange_moves
 | 
			
		||||
 | 
			
		||||
  dat4$fIdDiff      <- dat4$fIdNum.stop - dat4$fIdNum.start
 | 
			
		||||
 | 
			
		||||
  # Remove moves where stop is before start
 | 
			
		||||
  dat4 <- dat4[which(dat4$fIdDiff > 0), ]
 | 
			
		||||
  # Remove moves where stop is before start (3)
 | 
			
		||||
  dat4 <- dat4[dat4$fIdDiff >= 0 | is.na(dat4$fIdDiff), ]
 | 
			
		||||
 | 
			
		||||
  dat4$duration[dat4$fIdDiff > 0] <- dat4$fIdDiff * 600000 -
 | 
			
		||||
    dat4$timeMs.start + dat4$timeMs.stop
 | 
			
		||||
  dat4$duration[which(dat4$fIdDiff > 0)] <-
 | 
			
		||||
    dat4$fIdDiff[which(dat4$fIdDiff > 0)] * 600000 -
 | 
			
		||||
    dat4$timeMs.start[which(dat4$fIdDiff > 0)] +
 | 
			
		||||
    dat4$timeMs.stop[which(dat4$fIdDiff > 0)]
 | 
			
		||||
 | 
			
		||||
  # Remove fragmented traces ###############################################
 | 
			
		||||
  tab <- stats::xtabs( ~ trace + event, dat4)
 | 
			
		||||
@ -122,8 +105,26 @@ create_eventlogs <- function(data, xmlpath, case_cutoff = 20, rm_nochange_moves
 | 
			
		||||
  }
 | 
			
		||||
  dat5 <- dat4[!dat4$trace %in% fragments, ]
 | 
			
		||||
 | 
			
		||||
  if (!glossar) dat5$glossar <- NULL
 | 
			
		||||
  if (glossar) {
 | 
			
		||||
    # Check for wrong order of events: flipCard -> openPopup -> openTopic
 | 
			
		||||
    dat5_split  <- split(dat5[dat5$event != "move", ], ~ trace)
 | 
			
		||||
    event_list  <- lapply(dat5_split, function(x) unique(x$event))
 | 
			
		||||
 | 
			
		||||
  dat5
 | 
			
		||||
    ids         <- sapply(event_list, length) == 3
 | 
			
		||||
    event_dat   <- as.data.frame(do.call(rbind, event_list[ids]))
 | 
			
		||||
    names(event_dat) <- c("flipCard", "openTopic", "openPopup")
 | 
			
		||||
 | 
			
		||||
    frag_ids    <- which(event_dat$openTopic == "openPopup")
 | 
			
		||||
    dat6        <- dat5[dat5$trace %in% rownames(event_dat)[frag_ids], ]
 | 
			
		||||
    dat6b       <- dat6[!dat6$glossar == 1, ]
 | 
			
		||||
    dat7        <- rbind(dat5[!dat5$trace %in% rownames(event_dat)[frag_ids], ],
 | 
			
		||||
                         dat6b)
 | 
			
		||||
    # TODO: Check me!
 | 
			
		||||
  } else {
 | 
			
		||||
    dat7 <- dat5
 | 
			
		||||
    dat7$glossar <- NULL
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
  dat7
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
		Loading…
	
	
			
			x
			
			
		
	
		Reference in New Issue
	
	Block a user