Changed how path variable for move events is created
This commit is contained in:
		
							parent
							
								
									8bedadd18e
								
							
						
					
					
						commit
						8233b151d9
					
				
							
								
								
									
										107
									
								
								R/add_path.R
									
									
									
									
									
								
							
							
						
						
									
										107
									
								
								R/add_path.R
									
									
									
									
									
								
							| @ -1,5 +1,8 @@ | ||||
| ########################################################################### | ||||
| add_path_items <- function(subdata) { | ||||
| 
 | ||||
|   pbapply::pboptions(style = 3, char = "=") | ||||
| 
 | ||||
|   subdata_glossar <- subdata[subdata$item == "glossar", ] | ||||
|   subdata_glossar$path <- NA | ||||
| 
 | ||||
| @ -52,6 +55,7 @@ add_path_items <- function(subdata) { | ||||
| 
 | ||||
| ########################################################################### | ||||
| add_path_glossar <- function(subdata, xmlpath) { | ||||
| # TODO: I think this needs to be completely redone | ||||
| 
 | ||||
|   pb <- utils::txtProgressBar(min = 0, max = nrow(subdata), initial = NA, | ||||
|                        style = 3) | ||||
| @ -129,62 +133,93 @@ add_path <- function(data, xmlpath, glossar) { | ||||
| 
 | ||||
| # Add path for moves | ||||
| 
 | ||||
| add_path_moves <- function(data) { | ||||
| add_path_moves <- function(data, cutoff) { | ||||
| 
 | ||||
|   pbapply::pboptions(style = 3, char = "=") | ||||
| 
 | ||||
|   path_max <- max(data$path, na.rm = TRUE) | ||||
|   subdata_item <- split(data, ~ item) | ||||
| 
 | ||||
|   #subdata_art <- split(data, ~ item) | ||||
|   subdata_case <- split(data, ~ case) | ||||
| 
 | ||||
|   #subdata_list <- split(data, ~ item + case) | ||||
|   # --> does not work with complete data set | ||||
|   cat("Splitting data...", "\n") | ||||
|   subdata_list <- pbapply::pblapply(subdata_case, split, f = ~item) | ||||
|   subdata_list <- unlist(subdata_list, recursive = FALSE) | ||||
| 
 | ||||
|   cat("Adding path...", "\n") | ||||
|   subdata_path <- pbapply::pblapply(subdata_list, | ||||
|         function(x) { | ||||
|           path_max <<- path_max + 1 | ||||
|           add_path_subdata(x, max_path = path_max) | ||||
|         } | ||||
|       ) | ||||
|   subdata_path <- pbapply::pblapply(subdata_item, | ||||
|           add_path_subdata, cutoff = cutoff) | ||||
| 
 | ||||
|   out <- dplyr::bind_rows(subdata_path) | ||||
|   out <- out[order(out$fileId.start, out$date.start, out$timeMs.start), ] | ||||
|   # Make path a consecutive number | ||||
|   out$path <- as.numeric(factor(out$path, levels = unique(out$path))) | ||||
|   rownames(out) <- NULL | ||||
| 
 | ||||
|   out | ||||
| } | ||||
| 
 | ||||
| 
 | ||||
| add_path_subdata <- function(subdata, max_path) { | ||||
| 
 | ||||
|   if (nrow(subdata) != 0) { | ||||
| 
 | ||||
|     if (length(stats::na.omit(unique(subdata$path))) == 1) { | ||||
|       subdata[subdata$event == "move", "path"] <- stats::na.omit(unique(subdata$path)) | ||||
|     } else if (length(stats::na.omit(unique(subdata$path))) > 1) { | ||||
|       for (i in 1:nrow(subdata)) { | ||||
|         if (subdata$event[i] == "move") { | ||||
|           if (i == 1) { | ||||
|             subdata$path[i] <- stats::na.omit(unique(subdata$path))[1] | ||||
| add_path_subdata <- function(subdata, cutoff) { | ||||
|   index_flipCard <- which(subdata$event == "flipCard") | ||||
|   current_item <- unique(subdata$item) | ||||
|   for (j in seq_along(index_flipCard)) { | ||||
|     # forwards pass | ||||
|     if (j < max(seq_along(index_flipCard))) { | ||||
|       for (i in seq(index_flipCard[j], index_flipCard[j + 1])) { | ||||
|         if (subdata$event[i] == "move" & !is.na(subdata$date.stop[index_flipCard[j]])) { | ||||
|             timediff <- difftime(subdata$date.start[i], | ||||
|                                  subdata$date.stop[index_flipCard[j]], | ||||
|                                  units = "secs") | ||||
|           if (timediff <= cutoff){ | ||||
|             subdata$path[i] <- subdata$path[index_flipCard[j]] | ||||
|           } else { | ||||
|             subdata$path[i] <- subdata$path[i - 1] | ||||
|             subdata$path[i] <- paste(current_item, "mv", j, sep = "_") | ||||
|           } | ||||
|         } | ||||
|       } | ||||
|     } else if (all(is.na(subdata$path))) { | ||||
|       for (i in 1:nrow(subdata)) { | ||||
|         subdata$path[i] <- max_path | ||||
|     } else { | ||||
|       for (i in seq(index_flipCard[j], nrow(subdata))) { | ||||
|         if (subdata$event[i] == "move" & (!is.na(subdata$date.stop[index_flipCard[j]]))) { | ||||
|             timediff <- difftime(subdata$date.start[i], | ||||
|                                  subdata$date.stop[index_flipCard[j]], | ||||
|                                  units = "secs") | ||||
|           if (timediff <= cutoff) { | ||||
|             subdata$path[i] <- subdata$path[index_flipCard[j]] | ||||
|           } else { | ||||
|             subdata$path[i] <- paste(current_item, "mv", j, sep = "_") | ||||
|           } | ||||
|         } | ||||
|       } | ||||
|     } | ||||
|     # backwards pass | ||||
|     if (j > min(seq_along(index_flipCard))) { | ||||
|       for (i in seq(index_flipCard[j - 1], index_flipCard[j])) { | ||||
|         if (grepl("mv", subdata$path[i])) { | ||||
|           timediff <- difftime(subdata$date.start[index_flipCard[j]], | ||||
|                                subdata$date.stop[i], | ||||
|                                units = "secs") | ||||
|           if (timediff <= cutoff){ | ||||
|             subdata$path[i] <- subdata$path[index_flipCard[j]] | ||||
|           } else { | ||||
|             subdata$path[i] <- paste(current_item, "mv", j, sep = "_") | ||||
|           } | ||||
|         } | ||||
|       } | ||||
|     } | ||||
|   } | ||||
|   # fix moves with same path and timediff > cutoff | ||||
|   subdata_moves <- split(subdata, ~ path) | ||||
| 
 | ||||
|   check_moves <- function(subsubdata, cutoff) { | ||||
|     if (any(grepl("mv", subsubdata$path))) { | ||||
|       for (i in seq_len(nrow(subsubdata) - 1)) { | ||||
|         timediff <- difftime(subsubdata$date.start[i + 1], subsubdata$date.stop[i], | ||||
|                              units = "secs") | ||||
|         if (timediff > cutoff) { | ||||
|           subsubdata$path[i + 1] <- paste(subsubdata$path[i], i, "new", sep = "_") | ||||
|         } else { | ||||
|           subsubdata$path[i + 1] <- subsubdata$path[i] | ||||
|         } | ||||
|       } | ||||
|     } | ||||
|   subsubdata | ||||
|   } | ||||
| 
 | ||||
|   } else { | ||||
|     warning("subdata has nrow = 0") | ||||
|   } | ||||
|   subdata_path <- lapply(subdata_moves, check_moves, cutoff = cutoff) | ||||
|   subdata <- dplyr::bind_rows(subdata_path) | ||||
|   subdata | ||||
| } | ||||
| 
 | ||||
|  | ||||
| @ -54,10 +54,14 @@ create_eventlogs <- function(data, xmlpath = NULL, case_cutoff = 20, | ||||
| 
 | ||||
|   dat2 <- dat2[order(dat2$fileId.start, dat2$date.start, dat2$timeMs.start), ] | ||||
| 
 | ||||
|   # Add path for move events ############################################## | ||||
|   cat("\n\n########## Adding path variable for move events... ##########", "\n") | ||||
|   dat3 <- add_path_moves(dat2) | ||||
| 
 | ||||
|   # Add case variable ###################################################### | ||||
|   cat("\n########## Adding case and eventId variables... ##########", "\n\n") | ||||
|   dat3 <- add_case(dat2, cutoff = case_cutoff) | ||||
|   dat3 <- dat3[, c("fileId.start", "fileId.stop", "date.start", | ||||
|   cat("\n########## Adding case variable... ##########", "\n\n") | ||||
|   dat4 <- add_case(dat3, cutoff = case_cutoff) | ||||
|   dat4 <- dat4[, c("fileId.start", "fileId.stop", "date.start", | ||||
|                    "date.stop", "folder", "case", "path", "glossar", | ||||
|                    "event", "item", "timeMs.start", "timeMs.stop", | ||||
|                    "duration", "topic", "popup", "x.start", "y.start", | ||||
| @ -65,11 +69,6 @@ create_eventlogs <- function(data, xmlpath = NULL, case_cutoff = 20, | ||||
|                    "scale.stop", "scaleSize", "rotation.start", | ||||
|                    "rotation.stop", "rotationDegree")] | ||||
| 
 | ||||
|   # Add path for move events ############################################## | ||||
|   cat("\n\n########## Adding path variable for move events... ##########", "\n") | ||||
|   dat4 <- add_path_moves(dat3) | ||||
| 
 | ||||
| 
 | ||||
|   # Fix durations that span more than one log file ######################### | ||||
|   levels_fId <- sort(unique(c(dat4$fileId.start, dat4$fileId.stop))) | ||||
|   dat4$fIdNum.start <- factor(dat4$fileId.start, levels = levels_fId) | ||||
| @ -92,20 +91,22 @@ create_eventlogs <- function(data, xmlpath = NULL, case_cutoff = 20, | ||||
|   dat4$fIdDiff      <- NULL | ||||
| 
 | ||||
|   # Remove fragmented paths ############################################### | ||||
|   tab <- stats::xtabs( ~ path + event, dat4) | ||||
|   # tab <- stats::xtabs( ~ path + event, dat4) | ||||
| 
 | ||||
|   fragments <- NULL | ||||
|   # fragments <- NULL | ||||
| 
 | ||||
|   for (i in seq_len(nrow(tab))) { | ||||
|     if (tab[i, "openPopup"] != 0 & tab[i, "flipCard"] == 0) { | ||||
|       fragments <- c(fragments, rownames(tab)[i]) | ||||
|     } else if (tab[i, "openTopic"] != 0 & tab[i, "flipCard"] == 0) { | ||||
|       fragments <- c(fragments, rownames(tab)[i]) | ||||
|     } else if (tab[i, "openPopup"] != 0 & tab[i, "openTopic"] == 0) { | ||||
|       fragments <- c(fragments, rownames(tab)[i]) | ||||
|     } | ||||
|   } | ||||
|   dat5 <- dat4[!dat4$path %in% fragments, ] | ||||
|   # for (i in seq_len(nrow(tab))) { | ||||
|   #   if (tab[i, "openPopup"] != 0 & tab[i, "flipCard"] == 0) { | ||||
|   #     fragments <- c(fragments, rownames(tab)[i]) | ||||
|   #   } else if (tab[i, "openTopic"] != 0 & tab[i, "flipCard"] == 0) { | ||||
|   #     fragments <- c(fragments, rownames(tab)[i]) | ||||
|   #   } else if (tab[i, "openPopup"] != 0 & tab[i, "openTopic"] == 0) { | ||||
|   #     fragments <- c(fragments, rownames(tab)[i]) | ||||
|   #   } | ||||
|   # } | ||||
|   # dat5 <- dat4[!dat4$path %in% fragments, ] | ||||
|   # TODO: Decide if I want this or not - are all these log errors? | ||||
|   dat5 <- dat4 | ||||
| 
 | ||||
|   if (glossar) { | ||||
|     # Check for wrong order of events: flipCard -> openPopup -> openTopic | ||||
| @ -126,8 +127,7 @@ create_eventlogs <- function(data, xmlpath = NULL, case_cutoff = 20, | ||||
|     dat7$glossar <- NULL | ||||
|   } | ||||
| 
 | ||||
|   if (save) save(dat, dat1, dat2, dat3, dat4, dat5, dat7, file = "../data/tmp_intermediate-df.RData") | ||||
| 
 | ||||
|   if (save) save(dat, dat1, dat2, dat3, dat4, dat5, dat7, file = "results/tmp_intermediate-df.RData") | ||||
|   dat7 | ||||
| } | ||||
| 
 | ||||
|  | ||||
		Loading…
	
	
			
			x
			
			
		
	
		Reference in New Issue
	
	Block a user