39 lines
1.2 KiB
R
39 lines
1.2 KiB
R
###########################################################################
|
|
|
|
# Add case variable
|
|
|
|
add_case <- function(data, cutoff = 20) {
|
|
# TODO: What is the best choice for the cutoff here?
|
|
|
|
pbapply::pboptions(style = 3, char = "=")
|
|
|
|
dat_split <- split(data, ~ path)
|
|
|
|
time_minmax <- function(subdata) {
|
|
subdata$min_time <- min(subdata$date.start)
|
|
subdata$max_time <- ifelse(all(is.na(subdata$date.stop)), NA,
|
|
max(subdata$date.stop, na.rm = TRUE))
|
|
subdata
|
|
}
|
|
|
|
dat_list <- pbapply::pblapply(dat_split, time_minmax)
|
|
dat_minmax <- dplyr::bind_rows(dat_list)
|
|
|
|
dat_case <- dat_minmax[!duplicated(dat_minmax$path), ]
|
|
|
|
dat_case$timediff <- -c(difftime(utils::head(dat_case$max_time, nrow(dat_case) - 1),
|
|
dat_case$min_time[-1], units = "secs"), 0)
|
|
|
|
dat_case$timeindex <- ifelse(dat_case$timediff <= cutoff, 0, 1)
|
|
case_change <- diff(c(0, c(which(dat_case$timeindex == 1), nrow(dat_case))))
|
|
dat_case$case <- rep(seq_along(case_change), case_change)
|
|
|
|
npath <- table(data$path)
|
|
data <- data[order(data$path), ]
|
|
data$case <- rep(dat_case$case, npath)
|
|
|
|
data <- data[order(data$fileId.start, data$date.start, data$timeMs.start), ]
|
|
data
|
|
}
|
|
|