diff --git a/code/01_parse-logfiles.R b/code/01_parse-logfiles.R index c05cea9..32b19c1 100644 --- a/code/01_parse-logfiles.R +++ b/code/01_parse-logfiles.R @@ -33,8 +33,8 @@ knitr::opts_chunk$set(warning = FALSE, message = FALSE) #' Choose which folders with raw log files should be included: -#folders <- "all" -folders <- "_2016b" +folders <- "all" +#folders <- "_2016b" dirpaths <- paste0("../data/haum_logs_2016-2023/", folders) @@ -64,7 +64,8 @@ leftpad_fnames <- function(x) { e6 <- sprintf("%02d", as.numeric(gsub(".log", "", y[5]))) e6 <- sprintf("%02d", as.numeric(gsub(".log", "", y[5]))) - res <- c(res, paste0(e1, "_", e2, "_", e3, "-", e4, "_", e5, "_", e6, ".log")) + res <- c(res, + paste0(e1, "_", e2, "_", e3, "-", e4, "_", e5, "_", e6, ".log")) } res } @@ -72,7 +73,8 @@ leftpad_fnames <- function(x) { logs <- lapply(fnames, readLines) nlog <- sapply(logs, length) -dat <- data.frame(fileid = rep(leftpad_fnames(fnames), nlog), logs = unlist(logs)) +dat <- data.frame(fileId = rep(leftpad_fnames(fnames), nlog), + logs = unlist(logs)) head(dat$logs) #' Remove corrupted lines @@ -95,7 +97,8 @@ d1 <- dim(dat)[1] dat <- subset(dat, dat$logs != "") d2 <- dim(dat)[1] -#' The files contain `r d1-d2` corrupt lines that were remooved from the data. +#' The files contain `r d1-d2` corrupt lines that were remooved from the +#' data. #' #' ### Extract relevant infos @@ -139,16 +142,16 @@ time_ms <- as.numeric(sapply(ts_elements, function(x) x[4])) + # TODO: Maybe change to simple gsub()... # --> This is theoretically sound but a lot of lines for just removing ":" -dat$date <- lubridate::parse_date_time(date, "bdyHMSOp") -dat$time_ms <- time_ms -dat$event <- events -dat$artwork <- trimws(sapply(strsplit(topics, "/"), function(x) x[1])) -dat$popup <- sapply(strsplit(topics, "/"), function(x) x[2]) -dat$card <- card -dat$x <- moves[,1] -dat$y <- moves[,2] -dat$scale <- moves[,3] -dat$rotation <- moves[,4] +dat$date <- lubridate::parse_date_time(date, "bdyHMSOp") +dat$timeMs <- time_ms +dat$event <- events +dat$artwork <- trimws(sapply(strsplit(topics, "/"), function(x) x[1])) +dat$popup <- sapply(strsplit(topics, "/"), function(x) x[2]) +dat$topicNumber <- card +dat$x <- moves[,1] +dat$y <- moves[,2] +dat$scale <- moves[,3] +dat$rotation <- moves[,4] dat$logs <- NULL # remove original log files from data so file becomes smaller @@ -157,14 +160,14 @@ str(dat) head(dat, 20) -# sort by fileid, since reading in by file names does not make sense because of +# sort by fileId, since reading in by file names does not make sense because of # missing left zero padding -dat <- dat[order(dat$fileid, dat$date, dat$time_ms), ] +dat <- dat[order(dat$fileId, dat$date, dat$timeMs), ] ## TODO: Replace artwork and popup numbers with informative strings #' ### Save data frame -write.table(dat, "../data/rawdata_logfiles_small.csv", +write.table(dat, "../data/rawdata_logfiles.csv", sep = ";", quote = FALSE, row.names = FALSE) diff --git a/code/02_preprocessing.R b/code/02_preprocessing.R index 880b4bd..89991ea 100644 --- a/code/02_preprocessing.R +++ b/code/02_preprocessing.R @@ -40,10 +40,10 @@ rownames(dat1) <- NULL # Create event ID for closing events num_start <- diff(c(0, which(dat1$event == "Transform stop"))) -dat1$eventid <- rep(seq_along(num_start), num_start) -head(dat1[, c("event", "eventid")], 25) +dat1$eventId <- rep(seq_along(num_start), num_start) +head(dat1[, c("event", "eventId")], 25) -table(table(dat1$eventid)) +table(table(dat1$eventId)) # 1 2 3 4 5 6 7 8 10 11 # 70 78435 5153 842 222 66 18 14 3 1 # --> compare to table(num_start)! @@ -52,17 +52,8 @@ table(table(dat1$eventid)) num_stop <- c(diff(c(0, which(dat1$event == "Transform start")))) table(num_stop) -# TODO: Do I still need this? -# dat1$eventrep <- rep(num_start, num_start) -# dat1$dupl <- duplicated(dat1[, c("event", "eventid")]) # keep first -# dat1$dupl <- duplicated(dat1[, c("event", "eventid")], fromLast = TRUE) # keep last -# dat1[dat1$eventrep == 10, ] -# dat1$dupl <- NULL -# dat1$eventrep <- NULL - - # remove duplicated "Transform start" events -dat1 <- dat1[!duplicated(dat1[, c("event", "eventid")]), ] +dat1 <- dat1[!duplicated(dat1[, c("event", "eventId")]), ] # remove duplicated "Transform stop" events id_stop <- which(dat1$event == "Transform stop") @@ -74,36 +65,30 @@ dat1 <- dat1[-(id_rm_stop + 1), ] dat1$time <- ifelse(dat1$event == "Transform start", "start", "stop") trans_wide <- reshape(dat1, direction = "wide", - idvar = c("eventid", "artwork", "glossar"), + idvar = c("eventId", "artwork", "glossar"), timevar = "time", - drop = c("popup", "card", "event") + drop = c("popup", "topicNumber", "event") ) -# TODO: Should `card` remain? Or maybe rather topic? -# --> Rethink when you add topics, maybe card -> topicNumber? - # TODO: This runs for quite some time # --> Is this more efficient with tidyr::pivot_wider? -# --> when fileid is part of the reshape, it does not work correctly, since +# --> when fileId is part of the reshape, it does not work correctly, since # we sometimes have a start - stop event that is recorded in two separate -# log files, BUT: after finding out, that `time_ms` changes for each log -# file, I want to exclude those cases, so `fileid` has to be included!!! +# log files, BUT: after finding out, that `timeMs` changes for each log +# file, I want to exclude those cases, so `fileId` has to be included!!! -# check how often an eventid is associated with two fileids -nrow(subset(trans_wide, trans_wide$fileid.start != trans_wide$fileid.stop)) - -# TODO: exclude from data set ?? -# trans_wide <- subset(trans_wide, trans_wide$fileid.start != trans_wide$fileid.stop) +# check how often an eventId is associated with two fileIds +nrow(subset(trans_wide, trans_wide$fileId.start != trans_wide$fileId.stop)) # which(is.na(trans_wide$date.start)) trans_wide$event <- "move" -trans_wide$eventid <- NULL +trans_wide$eventId <- NULL rownames(trans_wide) <- NULL -trans_wide$duration <- trans_wide$time_ms.stop - trans_wide$time_ms.start +trans_wide$duration <- trans_wide$timeMs.stop - trans_wide$timeMs.start #trans_wide$duration <- trans_wide$date.stop - trans_wide$date.start # only seconds - not fine grained enough trans_wide$distance <- apply( @@ -114,19 +99,19 @@ trans_wide$rotationDegree <- trans_wide$rotation.stop - trans_wide$scaleSize <- trans_wide$scale.stop / trans_wide$scale.start trans_wide$trace <- NA -trans_wide$card <- NA +trans_wide$topicNumber <- NA trans_wide$popup <- NA dat_trans <- trans_wide[trans_wide$distance != 0 & trans_wide$rotationDegree != 0 & trans_wide$scaleSize != 1, - c("fileid.start", "fileid.stop", "event", "artwork", + c("fileId.start", "fileId.stop", "event", "artwork", "trace", "glossar", "date.start", "date.stop", - "time_ms.start", "time_ms.stop", "duration", - "card", "popup", "x.start", "y.start", "x.stop", - "y.stop", "distance", "scale.start", "scale.stop", - "scaleSize", "rotation.start", "rotation.stop", - "rotationDegree")] + "timeMs.start", "timeMs.stop", "duration", + "topicNumber", "popup", "x.start", "y.start", + "x.stop", "y.stop", "distance", "scale.start", + "scale.stop", "scaleSize", "rotation.start", + "rotation.stop", "rotationDegree")] 1 - nrow(dat_trans) / nrow(trans_wide) # removes almost 2/3 of the data (for small data set) @@ -166,9 +151,6 @@ for (art in aws) { # select artwork } } -head(dat2[, c("artwork", "event", "trace")], 50) -tail(dat2[, c("artwork", "event", "trace")], 50) - rm(aws, i, j, last_event, art) #' ## Fix glossar entries (find corresponding artworks and fill in trace) @@ -181,6 +163,11 @@ lut <- glossar_dict[glossar_dict$glossar_file %in% glossar_files, ] head(dat2[, c("artwork", "event", "popup", "trace")], 20) +inside <- glossar_files[glossar_files %in% + lut[sapply(lut$artwork, length) == 1, + "glossar_file"]] +single_art <- unlist(lut[lut$glossar_file %in% inside, "artwork"]) + for (file in lut$glossar_file) { @@ -188,7 +175,9 @@ for (file in lut$glossar_file) { for (i in seq_len(nrow(dat2))) { - if (dat2$event[i] == "Show Info") { + if (dat2$event[i] == "Show Info" | + (dat2$event[i] == "Artwork/OpenCard" & + dat2$artwork[i] %in% single_art)) { current_artwork <- dat2[i, "artwork"] j <- i @@ -207,67 +196,32 @@ for (file in lut$glossar_file) { if (dat2$artwork[i] == "glossar" & (current_artwork %in% artwork_list) & - dat2$popup[i] == file & (j-k == 0)) { + dat2$popup[i] == file & (j - k == 0)) { - dat2[i, "trace"] <- dat2[j, "trace"] + dat2[i, "trace"] <- dat2[j, "trace"] dat2[i, "artwork"] <- current_artwork } } } -# --> finds about half of the glossar entries for the small data set... -table(is.na(dat2[dat2$glossar == 1, "trace"])) +# --> finds a bit more than half of the glossar entries for the small data +# set... +proportions(table(is.na(dat2[dat2$glossar == 1, "trace"]))) -# REMEMBER: It can never bo 100% correct, since it is always possible that +# REMEMBER: It can never be 100% correct, since it is always possible that # several cards are open and that they link to the same glossar entry -# How many glossar_files are only associated with one artwork? -lut[sapply(lut$artwork, length) == 1, "glossar_file"] - -single <- lut[sapply(lut$artwork, length) == 1, "glossar_file"] -tmp <- subset(dat2, is.na(dat2$trace))$popup -inside <- unique(tmp[tmp %in% lut[sapply(lut$artwork, length) == 1, "glossar_file"]]) -single_art <- unlist(lut[lut$glossar_file %in% inside, "artwork"]) -tmp_lut <- data.frame(glossar_file = sort(inside), artwork = single_art) - - -dat2[dat2$glossar == 1, c("artwork", "popup", "glossar", "trace")] - -for (file in tmp_lut$glossar_file) { - - for (i in seq_len(nrow(dat2))) { - - if (dat2$event[i] == "Artwork/OpenCard" & dat2$artwork[i] %in% tmp_lut$artwork) { - - current_artwork <- dat2[i, "artwork"] - j <- i - - } - - if (dat2$artwork[i] == "glossar" & - dat2$popup[i] == file) { - - dat2[i, "trace"] <- dat2[j, "trace"] - dat2[i, "artwork"] <- current_artwork - - } - } -} - - dat2[14110:14130, ] +dat2[dat2$glossar == 1, ] -# TODO: Integrate for-loop into for-loop above - - +# Exclude not matched glossar entries df <- subset(dat2, !is.na(dat2$trace)) -# TODO: For now: Exclude not matched glossar entries df <- df[order(df$trace), ] rownames(df) <- NULL rm(lut, current_artwork, file, glossar_dict, i, j, k, artwork_list, - glossar_files) + glossar_files, inside, single_art) #' ## Close flipCard @@ -278,23 +232,23 @@ dat3$time <- ifelse(dat3$event == "Show Info", "start", "stop") flipCard_wide <- reshape(dat3, direction = "wide", idvar = c("trace", "artwork", "glossar"), timevar = "time", - drop = c("popup", "card")) + drop = c("popup", "topicNumber")) flipCard_wide$event <- "flipCard" -flipCard_wide$duration <- flipCard_wide$time_ms.stop - - flipCard_wide$time_ms.start +flipCard_wide$duration <- flipCard_wide$timeMs.stop - + flipCard_wide$timeMs.start -flipCard_wide$card <- NA +flipCard_wide$topicNumber <- NA flipCard_wide$popup <- NA flipCard_wide$distance <- NA flipCard_wide$scaleSize <- NA flipCard_wide$rotationDegree <- NA -dat_flipCard <- flipCard_wide[, c("fileid.start", "fileid.stop", "event", +dat_flipCard <- flipCard_wide[, c("fileId.start", "fileId.stop", "event", "artwork", "trace", "glossar", "date.start", "date.stop", - "time_ms.start", "time_ms.stop", - "duration", "card", "popup", "x.start", - "y.start", "x.stop", "y.stop", + "timeMs.start", "timeMs.stop", + "duration", "topicNumber", "popup", + "x.start", "y.start", "x.stop", "y.stop", "distance", "scale.start", "scale.stop", "scaleSize", "rotation.start", "rotation.stop", "rotationDegree")] @@ -308,33 +262,33 @@ dat4 <- dat4[order(dat4$artwork, dat4$date), ] rownames(dat4) <- NULL num_start <- diff(c(0, which(dat4$event == "Artwork/CloseCard"))) -dat4$eventid <- rep(seq_along(num_start), num_start) +dat4$eventId <- rep(seq_along(num_start), num_start) dat4$time <- ifelse(dat4$event == "Artwork/OpenCard", "start", "stop") openTopic_wide <- reshape(dat4, direction = "wide", - idvar = c("eventid", "trace", "glossar", "artwork", "card"), + idvar = c("eventId", "trace", "glossar", "artwork", "topicNumber"), timevar = "time", drop = "popup") openTopic_wide$event <- "openTopic" -openTopic_wide$duration <- openTopic_wide$time_ms.stop - - openTopic_wide$time_ms.start +openTopic_wide$duration <- openTopic_wide$timeMs.stop - + openTopic_wide$timeMs.start openTopic_wide$popup <- NA openTopic_wide$distance <- NA openTopic_wide$scaleSize <- NA openTopic_wide$rotationDegree <- NA -dat_openTopic <- openTopic_wide[, c("fileid.start", "fileid.stop", "event", +dat_openTopic <- openTopic_wide[, c("fileId.start", "fileId.stop", "event", "artwork", "trace", "glossar", "date.start", "date.stop", - "time_ms.start", "time_ms.stop", - "duration", "card", "popup", "x.start", - "y.start", "x.stop", "y.stop", - "distance", "scale.start", + "timeMs.start", "timeMs.stop", + "duration", "topicNumber", "popup", + "x.start", "y.start", "x.stop", + "y.stop", "distance", "scale.start", "scale.stop", "scaleSize", "rotation.start", "rotation.stop", "rotationDegree")] -# TODO: card should have a unique identifier for each artwork +# TODO: topicNumber should have a unique identifier for each artwork rm(openTopic_wide, num_start) @@ -347,42 +301,45 @@ rownames(dat5) <- NULL num_start <- diff(c(0, which(dat5$event == "HidePopup"))) # last event is "ShowPopup"! Needs to be fixed # num_start <- c(num_start, 1) -# TODO: Needs to be caught in a function --> not anymore - still relevant??? +# TODO: Needs to be caught in a function +# --> not anymore - still relevant??? -dat5$eventid <- rep(seq_along(num_start), num_start) +dat5$eventId <- rep(seq_along(num_start), num_start) dat5$time <- ifelse(dat5$event == "ShowPopup", "start", "stop") openPopup_wide <- reshape(dat5, direction = "wide", - idvar = c("eventid", "trace", "glossar", "artwork", "popup"), - timevar = "time", drop = "card") + idvar = c("eventId", "trace", "glossar", + "artwork", "popup"), + timevar = "time", + drop = "topicNumber") # there is a pathological entry which gets deleted... # df[df$trace == 4595, ] # --> artwork 046 popup selene.xml gets opened twice openPopup_wide$event <- "openPopup" -openPopup_wide$duration <- openPopup_wide$time_ms.stop - - openPopup_wide$time_ms.start +openPopup_wide$duration <- openPopup_wide$timeMs.stop - + openPopup_wide$timeMs.start -openPopup_wide$card <- NA +openPopup_wide$topicNumber <- NA openPopup_wide$distance <- NA openPopup_wide$scaleSize <- NA openPopup_wide$rotationDegree <- NA -dat_openPopup <- openPopup_wide[, c("fileid.start", "fileid.stop", "event", +dat_openPopup <- openPopup_wide[, c("fileId.start", "fileId.stop", "event", "artwork", "trace", "glossar", "date.start", "date.stop", - "time_ms.start", "time_ms.stop", - "duration", "card", "popup", "x.start", - "y.start", "x.stop", "y.stop", - "distance", "scale.start", + "timeMs.start", "timeMs.stop", + "duration", "topicNumber", "popup", + "x.start", "y.start", "x.stop", + "y.stop", "distance", "scale.start", "scale.stop", "scaleSize", "rotation.start", "rotation.stop", "rotationDegree")] rm(num_start, openPopup_wide) -# TODO: Should card maybe also be filled in for "openPopup"? +# TODO: Should topicNumber maybe also be filled in for "openPopup"? #' ## Merge data sets for different events @@ -408,7 +365,7 @@ dat[31000:31019,] # this one e.g. # Remove durations when event spans more than one log file, since they are # not interpretable -dat_all[which(dat_all$fileid.start != dat_all$fileid.stop), "duration"] <- NA +dat_all[which(dat_all$fileId.start != dat_all$fileId.stop), "duration"] <- NA # sort by `start.date` dat_all <- dat_all[order(dat_all$date.start), ] @@ -445,12 +402,12 @@ head(dat_all[, c("event", "artwork", "trace", "date.start", "timediff", "case")] #' ## Add event ID -dat_all$eventid <- seq_len(nrow(dat_all)) +dat_all$eventId <- seq_len(nrow(dat_all)) -dat_all <- dat_all[, c("fileid.start", "fileid.stop", "eventid", "case", +dat_all <- dat_all[, c("fileId.start", "fileId.stop", "eventId", "case", "trace", "glossar", "event", "artwork", - "date.start", "date.stop", "time_ms.start", - "time_ms.stop", "duration", "card", "popup", + "date.start", "date.stop", "timeMs.start", + "timeMs.stop", "duration", "topicNumber", "popup", "x.start", "y.start", "x.stop", "y.stop", "distance", "scale.start", "scale.stop", "scaleSize", "rotation.start", "rotation.stop", @@ -510,12 +467,10 @@ out$trace <- as.numeric(factor(out$trace, levels = unique(out$trace))) #' # Fill in topics topics <- read.table("../data/topics.csv", sep = ";", header = TRUE) -# TODO: +# TODO: Add topics to data frame #' # Export data write.table(out, "../data/event_logfiles.csv", sep = ";", row.names = FALSE) -# TODO: Write function for closing events - diff --git a/code/functions.R b/code/functions.R new file mode 100644 index 0000000..9b0b165 --- /dev/null +++ b/code/functions.R @@ -0,0 +1,283 @@ +# setwd("C:/Users/nwickelmaier/Nextcloud/Documents/MDS/2023ss/60100_master_thesis/code") + +# Read data + +dat0 <- read.table("../data/rawdata_logfiles_small.csv", sep = ";", + header = TRUE) + + dat0$date <- as.POSIXct(dat0$date) + dat0$glossar <- ifelse(dat0$artwork == "glossar", 1, 0) + # remove irrelevant events + dat <- subset(dat0, !(dat0$event %in% c("Start Application", + "Show Application"))) + +# Close move events +close_moves <- function(data) { + + # close move events + dat1 <- data[data$event %in% c("Transform start", "Transform stop"), ] + dat1 <- dat1[order(dat1$artwork, dat1$date), ] + num_start <- diff(c(0, which(dat1$event == "Transform stop"))) + dat1$eventId <- rep(seq_along(num_start), num_start) + + # remove duplicated "Transform start" events + dat1 <- dat1[!duplicated(dat1[, c("event", "eventId")]), ] + + # remove duplicated "Transform stop" events + id_stop <- which(dat1$event == "Transform stop") + id_rm_stop <- id_stop[diff(id_stop) == 1] + + dat1 <- dat1[-(id_rm_stop + 1), ] + + # transform to wide data format + dat1$time <- ifelse(dat1$event == "Transform start", "start", "stop") + + trans_wide <- reshape(dat1, direction = "wide", + idvar = c("eventId", "artwork", "glossar"), + timevar = "time", + drop = c("popup", "topicNumber", "event") + ) + trans_wide$event <- "move" + trans_wide$eventId <- NULL + + trans_wide$duration <- trans_wide$timeMs.stop - trans_wide$timeMs.start + trans_wide$distance <- apply( + trans_wide[, c("x.start", "y.start", "x.stop", "y.stop")], 1, + function(x) dist(matrix(x, 2, 2, byrow = TRUE))) + trans_wide$rotationDegree <- trans_wide$rotation.stop - + trans_wide$rotation.start + trans_wide$scaleSize <- trans_wide$scale.stop / trans_wide$scale.start + + trans_wide$trace <- NA + trans_wide$topicNumber <- NA + trans_wide$popup <- NA + + dat_trans <- trans_wide[trans_wide$distance != 0 & + trans_wide$rotationDegree != 0 & + trans_wide$scaleSize != 1, + c("fileId.start", "fileId.stop", "event", "artwork", + "trace", "glossar", "date.start", "date.stop", + "timeMs.start", "timeMs.stop", "duration", + "topicNumber", "popup", "x.start", "y.start", + "x.stop", "y.stop", "distance", "scale.start", + "scale.stop", "scaleSize", "rotation.start", + "rotation.stop", "rotationDegree")] + rownames(dat_trans) <- NULL + cat(paste("INFORMATION:", nrow(trans_wide) - nrow(dat_trans), + "lines containing move events were removed since they did", + "\nnot contain any change"), fill = TRUE) + dat_trans +} + + +dat1 <- close_moves(dat) +# TODO: Integrate this function into close_events? + +########################################################################### + +# Add trace variable +add_trace <- function(data) { + + dat2 <- data[!data$event %in% c("Transform start", "Transform stop"), ] + + dat2$trace <- NA + last_event <- dat2$event[1] + aws <- unique(dat2$artwork)[unique(dat2$artwork) != "glossar"] + # + for (art in aws) { # select artwork + + for (i in 1:nrow(dat2)) { # go through rows + + if (last_event == "Show Info" & dat2$artwork[i] == art) { + dat2$trace[i] <- i + j <- i + + } else if (last_event == "Show Front" & dat2$artwork[i] == art) { + dat2$trace[i] <- j + + } else if (!(last_event %in% c("Show Info", "Show Front")) & + dat2$artwork[i] == art) { + dat2$trace[i] <- j + } + + if (i <= nrow(dat2)) { + last_event <- dat2$event[i + 1] + } + } + } + dat2 +} + + +add_trace2 <- function(data, glossar_dict = "../data/glossar_dict.RData") { + + data$trace <- NA + dat1 <- data[data$event %in% c("Transform start", "Transform stop"), ] + dat2 <- data[!data$event %in% c("Transform start", "Transform stop"), ] + + last_event <- dat2$event[1] + aws <- unique(dat2$artwork)[unique(dat2$artwork) != "glossar"] + # + for (art in aws) { # select artwork + + for (i in 1:nrow(dat2)) { # go through rows + + if (last_event == "Show Info" & dat2$artwork[i] == art) { + dat2$trace[i] <- i + j <- i + + } else if (last_event == "Show Front" & dat2$artwork[i] == art) { + dat2$trace[i] <- j + + } else if (!(last_event %in% c("Show Info", "Show Front")) & + dat2$artwork[i] == art) { + dat2$trace[i] <- j + } + + if (i <= nrow(dat2)) { + last_event <- dat2$event[i + 1] + } + } + } + + glossar_files <- unique(dat2[dat2$artwork == "glossar", "popup"]) + + # load lookup table for artworks and glossar files + load(glossar_dict) + lut <- glossar_dict[glossar_dict$glossar_file %in% glossar_files, ] + + head(dat2[, c("artwork", "event", "popup", "trace")], 20) + + inside <- glossar_files[glossar_files %in% + lut[sapply(lut$artwork, length) == 1, + "glossar_file"]] + single_art <- unlist(lut[lut$glossar_file %in% inside, "artwork"]) + + + for (file in lut$glossar_file) { + + artwork_list <- unlist(lut[lut$glossar_file == file, "artwork"]) + + for (i in seq_len(nrow(dat2))) { + + if (dat2$event[i] == "Show Info" | + (dat2$event[i] == "Artwork/OpenCard" & + dat2$artwork[i] %in% single_art)) { + + current_artwork <- dat2[i, "artwork"] + j <- i + k <- i + + } else { + + current_artwork <- current_artwork + + } + + if (dat2$event[i] == "Show Front" & dat2$artwork[i] == current_artwork) { + # make sure artwork has not been closed, yet! + k <- i + } + + if (dat2$artwork[i] == "glossar" & + (current_artwork %in% artwork_list) & + dat2$popup[i] == file & (j - k == 0)) { + + dat2[i, "trace"] <- dat2[j, "trace"] + dat2[i, "artwork"] <- current_artwork + + } + } + } + + cat(proportions(table(is.na(dat2[dat2$glossar == 1, "trace"]))), fill = TRUE) + + out <- rbind(dat1, dat2) + out <- out[order(out$fileId, out$date, out$timeMs), ] + out +} + + +tmp <- add_trace2(dat) + +########################################################################### + +close_events <- function(data, event = c("flipCard", "openTopic", "openPopup")) { + + if (event == "flipCard") { + subdata <- subset(data, data$event %in% c("Show Info", "Show Front")) + subdata$time <- ifelse(subdata$event == "Show Info", "start", "stop") + subdata$eventId <- NA + idvar <- c("trace", "artwork", "glossar") + drop <- c("popup", "topicNumber") + + } else if (event == "openTopic") { + subdata <- subset(data, data$event %in% c("Artwork/OpenCard", "Artwork/CloseCard")) + subdata$time <- ifelse(subdata$event == "Artwork/OpenCard", "start", "stop") + num_start <- diff(c(0, which(subdata$event == "Artwork/CloseCard"))) + subdata$eventId <- rep(seq_along(num_start), num_start) + idvar <- c("eventId", "trace", "glossar", "artwork", "topicNumber") + drop <- "popup" + + } else if (event == "openPopup") { + subdata <- subset(data, data$event %in% c("ShowPopup", "HidePopup")) + subdata$time <- ifelse(subdata$event == "ShowPopup", "start", "stop") + num_start <- diff(c(0, which(subdata$event == "HidePopup"))) + subdata$eventId <- rep(seq_along(num_start), num_start) + idvar <- c("eventId", "trace", "glossar", "artwork", "popup") + drop <- "topicNumber" + } + + data_wide <- reshape(subdata, direction = "wide", + idvar = idvar, + timevar = "time", + drop = drop) + data_wide$event <- event + data_wide$duration <- data_wide$timeMs.stop - data_wide$timeMs.start + + for (d in drop) data_wide[d] <- NA + data_wide$distance <- NA + data_wide$scaleSize <- NA + data_wide$rotationDegree <- NA + + out <- data_wide[, c("fileId.start", "fileId.stop", "event", "artwork", + "trace", "glossar", "date.start", "date.stop", + "timeMs.start", "timeMs.stop", "duration", + "topicNumber", "popup", "x.start", "y.start", + "x.stop", "y.stop", "distance", "scale.start", + "scale.stop", "scaleSize", "rotation.start", + "rotation.stop", "rotationDegree")] + out + # TODO: Suppress warnings? +} + +tmp <- rbind(close_moves(dat), + close_events(df, "flipCard"), + close_events(df, "openTopic"), + close_events(df, "openPopup")) + +# 'data.frame': 38607 obs. of 24 variables: +# $ fileId.start : chr "2016_11_15-12_32_57.log" "2016_11_15-14_42_57.log" "2016_11_15-14_42_57.log" "2016_11_16-12_31_32.log" ... +# $ fileId.stop : chr "2016_11_15-12_32_57.log" "2016_11_15-14_42_57.log" "2016_11_15-14_42_57.log" "2016_11_16-12_31_32.log" ... +# $ event : chr "move" "move" "move" "move" ... +# $ artwork : chr "001" "001" "001" "001" ... +# $ trace : int NA NA NA NA NA NA NA NA NA NA ... +# $ glossar : num 0 0 0 0 0 0 0 0 0 0 ... +# $ date.start : POSIXct, format: "2016-12-15 12:39:49" "2016-12-15 14:49:37" ... +# $ date.stop : POSIXct, format: "2016-12-15 12:39:49" "2016-12-15 14:49:40" ... +# $ timeMs.start : int 412141 400777 554506 384312 406277 533864 548467 549396 158632 194982 ... +# $ timeMs.stop : int 412474 403784 556633 388313 407994 538185 549088 551116 160343 197099 ... +# $ duration : int 333 3007 2127 4001 1717 4321 621 1720 1711 2117 ... +# $ topicNumber : int NA NA NA NA NA NA NA NA NA NA ... +# $ popup : chr NA NA NA NA ... +# $ x.start : num 531 235 470 326 326 ... +# $ y.start : num 1221 734 2090 747 747 ... +# $ x.stop : num 513 360 1492 256 2459 ... +# $ y.stop : num 1212 809 1687 643 1430 ... +# $ distance : num 19.8 146.6 1098.5 125.2 2239.4 ... +# $ scale.start : num 0.8 0.301 0.8 0.301 0.301 ... +# $ scale.stop : num 0.8 0.331 0.822 0.391 0.397 ... +# $ scaleSize : num 1 1.1 1.03 1.3 1.32 ... +# $ rotation.start: num 116 116 90 116 116 ... +# $ rotation.stop : num 116.3 89.6 2.8 86.1 125.8 ... +# $ rotationDegree: num 0.00245 -26.72397 -87.19711 -30.14456 9.49951 ...