########################################################################### # Add trace variable add_trace <- function(data, glossar_dict = "../data/glossar_dict.RData") { data$trace <- NA subdata1 <- data[data$event %in% c("Transform start", "Transform stop"), ] subdata2 <- data[!data$event %in% c("Transform start", "Transform stop"), ] last_event <- subdata2$event[1] aws <- unique(subdata2$artwork)[unique(subdata2$artwork) != "glossar"] for (art in aws) { for (i in 1:nrow(subdata2)) { if (last_event == "Show Info" & subdata2$artwork[i] == art) { subdata2$trace[i] <- i j <- i } else if (last_event == "Show Front" & subdata2$artwork[i] == art) { subdata2$trace[i] <- j } else if (!(last_event %in% c("Show Info", "Show Front")) & subdata2$artwork[i] == art) { subdata2$trace[i] <- j } if (i <= nrow(subdata2)) { last_event <- subdata2$event[i + 1] } } } # Fix glossar entries (find corresponding artworks and fill in trace) glossar_files <- unique(subdata2[subdata2$artwork == "glossar", "popup"]) # load lookup table for artworks and glossar files load(glossar_dict) lut <- glossar_dict[glossar_dict$glossar_file %in% glossar_files, ] head(subdata2[, c("artwork", "event", "popup", "trace")], 20) inside <- glossar_files[glossar_files %in% lut[sapply(lut$artwork, length) == 1, "glossar_file"]] single_art <- unlist(lut[lut$glossar_file %in% inside, "artwork"]) for (file in lut$glossar_file) { artwork_list <- unlist(lut[lut$glossar_file == file, "artwork"]) for (i in seq_len(nrow(subdata2))) { if (subdata2$event[i] == "Show Info" | (subdata2$event[i] == "Artwork/OpenCard" & subdata2$artwork[i] %in% single_art)) { current_artwork <- subdata2[i, "artwork"] j <- i k <- i } else { current_artwork <- current_artwork } if (subdata2$event[i] == "Show Front" & subdata2$artwork[i] == current_artwork) { # make sure artwork has not been closed, yet! k <- i } if (subdata2$artwork[i] == "glossar" & (current_artwork %in% artwork_list) & subdata2$popup[i] == file & (j - k == 0)) { subdata2[i, "trace"] <- subdata2[j, "trace"] subdata2[i, "artwork"] <- current_artwork } } } # Exclude not matched glossar entries cat("INFORMATION: glossar entries that are not matched will be removed:", sum(is.na(subdata2[subdata2$glossar == 1, "trace"])), "entries", #proportions(table(is.na(subdata2[subdata2$glossar == 1, "trace"]))), fill = TRUE) subdata2 <- subset(subdata2, !is.na(subdata2$trace)) # REMEMBER: It can never be 100% correct, since it is always possible # that several cards are open and that they link to the same glossar # entry # dat2[14110:14130, ] # dat2[dat2$glossar == 1, ] out <- rbind(subdata1, subdata2) out <- out[order(out$fileId, out$date, out$timeMs), ] out } ########################################################################### close_events <- function(data, event = c("move", "flipCard", "openTopic", "openPopup")) { if (event == "move") { actions <- c("Transform start", "Transform stop") idvar <- c("eventId", "artwork", "glossar") drop <- c("popup", "topicNumber", "trace", "event") } else if (event == "flipCard") { actions <- c("Show Info", "Show Front") idvar <- c("trace", "artwork", "glossar") drop <- c("popup", "topicNumber", "eventId", "event") } else if (event == "openTopic") { actions <- c("Artwork/OpenCard", "Artwork/CloseCard") idvar <- c("eventId", "trace", "glossar", "artwork", "topicNumber") drop <- c("popup", "event") } else if (event == "openPopup") { actions <- c("ShowPopup", "HidePopup") idvar <- c("eventId", "trace", "glossar", "artwork", "popup") drop <- c("topicNumber", "event") # TODO: Should topicNumber maybe also be filled in for "openPopup"? } else { stop("`event` must be one of 'move', 'flipCard', 'openTopic', 'openPopup'.") } subdata <- subset(data, data$event %in% actions) #subdata <- subdata[order(subdata$artwork, subdata$popup, subdata$date), ] subdata <- subdata[order(subdata$artwork, subdata$popup, subdata$date, subdata$timeMs), ] subdata$time <- ifelse(subdata$event == actions[1], "start", "stop") num_start <- diff(c(0, which(subdata$event == actions[2]))) subdata$eventId <- rep(seq_along(num_start), num_start) # If last event is start event, it needs to be fixed: # num_start <- c(num_start, 1) # TODO: Needs to be caught in a function # --> not anymore - still relevant??? if (event == "move") { subdata <- subdata[!duplicated(subdata[, c("event", "eventId")]), ] id_stop <- which(subdata$event == actions[2]) id_rm_stop <- id_stop[diff(id_stop) == 1] subdata <- subdata[-(id_rm_stop + 1), ] } suppressWarnings( data_wide <- reshape(subdata, direction = "wide", idvar = idvar, timevar = "time", drop = drop) ) # TODO: Suppress warnings? Better with tryCatch()? # there is a pathological entry which gets deleted... # df[df$trace == 4595, ] # --> artwork 046 popup selene.xml gets opened twice # TODO: This runs for quite some time # --> Is this more efficient with tidyr::pivot_wider? for (d in drop) data_wide[d] <- NA data_wide$distance <- NA data_wide$scaleSize <- NA data_wide$rotationDegree <- NA data_wide$event <- event data_wide$duration <- data_wide$timeMs.stop - data_wide$timeMs.start if (event == "move") { data_wide$distance <- apply( data_wide[, c("x.start", "y.start", "x.stop", "y.stop")], 1, function(x) dist(matrix(x, 2, 2, byrow = TRUE))) data_wide$rotationDegree <- data_wide$rotation.stop - data_wide$rotation.start data_wide$scaleSize <- data_wide$scale.stop / data_wide$scale.start # remove moves without any change move_wide <- data_wide[data_wide$distance != 0 & data_wide$rotationDegree != 0 & data_wide$scaleSize != 1, ] cat(paste("INFORMATION:", nrow(data_wide) - nrow(move_wide), "lines containing move events were removed since they did", "\nnot contain any change"), fill = TRUE) data_wide <- move_wide } out <- data_wide[, c("fileId.start", "fileId.stop", "event", "artwork", "trace", "glossar", "date.start", "date.stop", "timeMs.start", "timeMs.stop", "duration", "topicNumber", "popup", "x.start", "y.start", "x.stop", "y.stop", "distance", "scale.start", "scale.stop", "scaleSize", "rotation.start", "rotation.stop", "rotationDegree")] rownames(out) <- NULL out } ########################################################################### # Add case variable add_case <- function(data, cutoff = 20) { # TODO: What is the best choice for the cutoff here? data$timediff <- as.numeric(diff(c(data$date.start[1], data$date.start))) data$case <- NA j <- 1 for (i in seq_len(nrow(data))) { if (data$timediff[i] <= cutoff) { data$case[i] <- j } else { j <- j + 1 data$case[i] <- j } } data$timediff <- NULL data } ########################################################################### # Add trace for moves add_trace_moves <- function(data) { cases <- unique(data$case) aws <- unique(data$artwork)[unique(data$artwork) != "glossar"] max_trace <- max(data$trace, na.rm = TRUE) + 1 out <- NULL for (case in cases) { for (art in aws) { tmp <- data[data$case == case & data$artwork == art, ] if (nrow(tmp) != 0) { if (length(na.omit(unique(tmp$trace))) == 1) { tmp[tmp$event == "move", "trace"] <- na.omit(unique(tmp$trace)) } else if (length(na.omit(unique(tmp$trace))) > 1) { for (i in 1:nrow(tmp)) { if (tmp$event[i] == "move") { if (i == 1) { tmp$trace[i] <- na.omit(unique(tmp$trace))[1] } else { tmp$trace[i] <- tmp$trace[i - 1] } } } } else if (all(is.na(tmp$trace))) { for (i in 1:nrow(tmp)) { if (tmp$event[i] == "move") { tmp$trace[i] <- max_trace } } } max_trace <- max_trace + 1 } if (nrow(tmp) > 0) { out <- rbind(out, tmp) } } } out <- out[order(out$date.start, out$fileId.start), ] rownames(out) <- NULL # Make trace a consecutive number out$trace <- as.numeric(factor(out$trace, levels = unique(out$trace))) out } # TODO: Get rid of the loops # --> This takes forever...