diff --git a/README.md b/README.md index 030d192..50f3082 100644 --- a/README.md +++ b/README.md @@ -2,18 +2,19 @@ ## Datenverständnis -* Welche Einheit haben x und y? Pixel? -* Welche Einheit hat scale? -* rotation wirklich degree? +* Welche Einheit haben x und y? Pixel? --> yes +* Welche Einheit hat scale? --> some kind if bit, does not matter, when + calculating a ratio +* rotation wirklich degree? --> yes * Nach welchem Zeitintervall resettet sich der Tisch wieder in die - Ausgangskonfiguration? + Ausgangskonfiguration? --> PM needs to look it up ## Tisch-Software * Gibt es Doku für die Bilder, die über die xml files hinausgeht? Sowas wie ein Manual oder ähnliches? * Gibt es evtl. irgendwo noch ein Tablet mit der Anwendung drauf? -* Was bedeuten die Farben der Topic Cards? +* Was bedeuten die Farben der Topic Cards? --> sieht man in den xml files ## Event Logs @@ -64,6 +65,97 @@ made. ## Weird behavior of `time_ms` and neg. `duration`values +I think the negative duration values happen, when an event starts in one +log file and completes in another one. The variable `time_ms` seems to be +continuous within one log file but not over several log files. + +```{r} +dat_all[which(dat_all$duration < 0), ][1:5, 1:10] + +# flipCard +## trace 56 +dat3[dat3$trace == 56,] + +dat[dat$fileid == "2016_11_15-11_12_57.log" & dat$date == "2016-12-15 11:17:26", ] +dat[dat$fileid == "2016_11_15-11_42_57.log" & dat$date == "2016-12-15 11:46:19", ] + +#dat[309:1405, ] + +tmp <- dat[300:1405, ] +tmp[tmp$artwork == "051", ] +## -> was closed correctly, but does it belong together? + + +## trace 61 +dat3[dat3$trace == 61,] + +dat[dat$fileid == "2016_11_15-11_12_57.log" & dat$date == "2016-12-15 11:17:52", ] +dat[dat$fileid == "2016_11_15-11_42_57.log" & dat$date == "2016-12-15 11:46:19", ] + +tmp <- dat[350:1408, ] +tmp[tmp$artwork == "057", ] +## -> was closed correctly, but does it belong together? + + +# openTopic +dat_all[which(dat_all$duration < 0), ][100:105, 1:10] + +# trace 2052 +dat4[dat4$trace == 2052,] + +dat[dat$fileid == "2016_11_17-14_12_10.log" & dat$date == "2016-12-17 14:21:51", ] +dat[dat$fileid == "2016_11_17-14_22_10.log" & dat$date == "2016-12-17 14:22:25", ] + +tmp <- dat[23801:23950, ] +tmp[tmp$artwork == "502", ] + +plot(time_ms ~ as.factor(fileid), dat[1:5000,]) +``` + +The boxplot shows that we have a continuous range of values within one log +file but that `time_ms` does not increase over log files. + +Since it seems not possible to fix this in a consistent way, I will set +negative durations to `NA`. I will keep `time_ms.start` and `time_ms.stop` +in the data frame, so it is clear why there are no durations. Maybe it +would also be useful to keep `logfileid.start` and `logfileid.stop` in the +data? Maybe just for proof checking this theory... + +Part of it was that timestamps that are part of the log file names are not +zero-left-padded. But this fixed only three `move` events, since it only +fixed irregularities *within* one log file. + +```{r} +table(dat_all[dat_all$duration < 0, "event"]) + +# flipCard move openPopup openTopic +# 562 100 34 284 + + +dat[dat$event %in% c("Transform start", "Transform stop"), ][1100:1300,] +# --> got fixed by left padding... but only three all together!! + +dat_all[735, ] + +## what it looked like before left padding +# 1422 ../data/haum_logs_2016-2023/_2016b/2016_11_15-12_2_57.log 2016-12-15 12:12:56 599671 Transform start 076 076.xml NA 2092.25 2008.00 0.3000000 13.26874254 +# 1423 ../data/haum_logs_2016-2023/_2016b/2016_11_15-12_12_57.log 2016-12-15 12:12:57 621 Transform start 076 076.xml NA 2092.25 2008.00 0.3000000 13.26523465 +# 1424 ../data/haum_logs_2016-2023/_2016b/2016_11_15-12_12_57.log 2016-12-15 12:12:57 677 Transform stop 076 076.xml NA 2092.25 2008.00 0.2997736 13.26239605 +# 1425 ../data/haum_logs_2016-2023/_2016b/2016_11_15-12_12_57.log 2016-12-15 12:12:57 774 Transform start 076 076.xml NA 2092.25 2008.00 0.2999345 13.26239605 +# 1426 ../data/haum_logs_2016-2023/_2016b/2016_11_15-12_12_57.log 2016-12-15 12:12:57 850 Transform stop 076 076.xml NA 2092.25 2008.00 0.2997107 13.26223362 +# 1427 ../data/haum_logs_2016-2023/_2016b/2016_11_15-12_2_57.log 2016-12-15 12:12:57 599916 Transform stop 076 076.xml NA 2092.25 2008.00 0.2997771 13.26523465 + +## what it looks like now +# 1422 2016_11_15-12_02_57.log 2016-12-15 12:12:56 599671 Transform start 076 076.xml NA 2092.25 2008.00 0.3000000 13.26874254 +# 1423 2016_11_15-12_02_57.log 2016-12-15 12:12:57 599916 Transform stop 076 076.xml NA 2092.25 2008.00 0.2997771 13.26523465 +# 1424 2016_11_15-12_12_57.log 2016-12-15 12:12:57 621 Transform start 076 076.xml NA 2092.25 2008.00 0.3000000 13.26523465 +# 1425 2016_11_15-12_12_57.log 2016-12-15 12:12:57 677 Transform stop 076 076.xml NA 2092.25 2008.00 0.2997736 13.26239605 +# 1426 2016_11_15-12_12_57.log 2016-12-15 12:12:57 774 Transform start 076 076.xml NA 2092.25 2008.00 0.2999345 13.26239605 +# 1427 2016_11_15-12_12_57.log 2016-12-15 12:12:57 850 Transform stop 076 076.xml NA 2092.25 2008.00 0.2997107 13.26223362 +``` ## Events that only close (`date.start` is NA) @@ -83,7 +175,6 @@ made. - # Reading list * @Arizmendi2022 [$-$] diff --git a/code/01_parse-logfiles.R b/code/01_parse-logfiles.R index 340e022..c05cea9 100644 --- a/code/01_parse-logfiles.R +++ b/code/01_parse-logfiles.R @@ -2,7 +2,7 @@ #' title: "Preprocessing raw log files" #' author: "Nora Wickelmaier" #' date: "`r Sys.Date()`" -#' output: +#' output: #' html_document: #' default #' pdf_document: @@ -14,10 +14,10 @@ # setwd("C:/Users/nwickelmaier/Nextcloud/Documents/MDS/2023ss/60100_master_thesis/code") #+ setup, include = FALSE -knitr::opts_chunk$set(warning = FALSE, message = FALSE) +knitr::opts_chunk$set(warning = FALSE, message = FALSE) #' The following events can be extracted from the log files: -#' +#' #' ``` #' LogEntry classes: #' TRANSFORM_START: "Transform start" --> "Transformation Start" in Tool @@ -42,9 +42,37 @@ fnames <- dir(dirpaths, pattern = "*.log", full.names = TRUE) length(fnames) head(fnames) +# Need to left pad file names. If I do not do this, the sorting of the +# timestamps will be off and I get negative durations later on since the +# wrong events get closed. + + +leftpad_fnames <- function(x) { + + z <- gsub(paste0(dirpaths, "/"), "\\1", x) + ys <- strsplit(z, "_") + + res <- NULL + + for (y in ys) { + y2 <- unlist(strsplit(y[3], "-")) + e1 <- y[1] + e2 <- sprintf("%02d", as.numeric(y[2])) + e3 <- sprintf("%02d", as.numeric(y2[1])) + e4 <- sprintf("%02d", as.numeric(y2[2])) + e5 <- sprintf("%02d", as.numeric(y[4])) + e6 <- sprintf("%02d", as.numeric(gsub(".log", "", y[5]))) + e6 <- sprintf("%02d", as.numeric(gsub(".log", "", y[5]))) + + res <- c(res, paste0(e1, "_", e2, "_", e3, "-", e4, "_", e5, "_", e6, ".log")) + } + res +} + + logs <- lapply(fnames, readLines) nlog <- sapply(logs, length) -dat <- data.frame(fileid = rep(fnames, nlog), logs = unlist(logs)) +dat <- data.frame(fileid = rep(leftpad_fnames(fnames), nlog), logs = unlist(logs)) head(dat$logs) #' Remove corrupted lines @@ -59,7 +87,7 @@ head(dat$logs) # From LogEntry.as: # //pm: inserted this check to account for some broken logfiles -# if (metaData[1] == null){ +# if (metaData[1] == null){ # trace("corrupt line... still do not know how these came to happen."); # corrupt lines are "" and need to be removed @@ -73,12 +101,12 @@ d2 <- dim(dat)[1] #' ### Extract relevant infos date <- sapply(dat$logs, gsub, - pattern = "^\\[(.*)\\], \\[.*$", + pattern = "^\\[(.*)\\], \\[.*$", replacement = "\\1", USE.NAMES = FALSE) timestamp <- sapply(dat$logs, gsub, - pattern = "^\\[.*\\], \\[(.*)\\].*$", + pattern = "^\\[.*\\], \\[(.*)\\].*$", replacement = "\\1", USE.NAMES = FALSE) @@ -108,6 +136,8 @@ ts_elements <- strsplit(timestamp, ":") time_ms <- as.numeric(sapply(ts_elements, function(x) x[4])) + as.numeric(sapply(ts_elements, function(x) x[3])) * 1000 + as.numeric(sapply(ts_elements, function(x) x[2])) * 1000 * 60 +# TODO: Maybe change to simple gsub()... +# --> This is theoretically sound but a lot of lines for just removing ":" dat$date <- lubridate::parse_date_time(date, "bdyHMSOp") dat$time_ms <- time_ms @@ -125,11 +155,11 @@ dat$logs <- NULL str(dat) -head(dat[, 2:ncol(dat)], 20) +head(dat, 20) -# sort by date, since sorting by file names does not make sense because of +# sort by fileid, since reading in by file names does not make sense because of # missing left zero padding -dat <- dat[order(dat$date), ] +dat <- dat[order(dat$fileid, dat$date, dat$time_ms), ] ## TODO: Replace artwork and popup numbers with informative strings diff --git a/code/02_preprocessing.R b/code/02_preprocessing.R index 58e2640..04151e3 100644 --- a/code/02_preprocessing.R +++ b/code/02_preprocessing.R @@ -43,46 +43,46 @@ dat <- subset(dat0, !(dat0$event %in% c("Start Application", ######## #' Do it for Transform events first -tmp <- dat[dat$event %in% c("Transform start", "Transform stop"), ] -tmp <- tmp[order(tmp$artwork, tmp$date), ] -rownames(tmp) <- NULL +dat1 <- dat[dat$event %in% c("Transform start", "Transform stop"), ] +dat1 <- dat1[order(dat1$artwork, dat1$date), ] +rownames(dat1) <- NULL # Create event ID for closing events -num_start <- diff(c(0, which(tmp$event == "Transform stop"))) -tmp$eventid <- rep(seq_along(num_start), num_start) -head(tmp[, c("event", "eventid")], 25) +num_start <- diff(c(0, which(dat1$event == "Transform stop"))) +dat1$eventid <- rep(seq_along(num_start), num_start) +head(dat1[, c("event", "eventid")], 25) -table(table(tmp$eventid)) +table(table(dat1$eventid)) # 1 2 3 4 5 6 7 8 10 11 # 73 78429 5156 842 222 66 18 14 3 1 # --> compare to table(num_start)! # Find out how often "Transform stop" follows each other -num_stop <- c(diff(c(0, which(tmp$event == "Transform start")))) +num_stop <- c(diff(c(0, which(dat1$event == "Transform start")))) table(num_stop) -tmp$eventrep <- rep(num_start, num_start) -tmp$dupl <- duplicated(tmp[, c("event", "eventid")]) # keep first -tmp$dupl <- duplicated(tmp[, c("event", "eventid")], fromLast = TRUE) # keep last -tmp[tmp$eventrep == 10, ] +dat1$eventrep <- rep(num_start, num_start) +dat1$dupl <- duplicated(dat1[, c("event", "eventid")]) # keep first +dat1$dupl <- duplicated(dat1[, c("event", "eventid")], fromLast = TRUE) # keep last +dat1[dat1$eventrep == 10, ] -tmp$dupl <- NULL -tmp$eventrep <- NULL +dat1$dupl <- NULL +dat1$eventrep <- NULL # remove duplicated "Transform start" events -tmp <- tmp[!duplicated(tmp[, c("event", "eventid")]), ] +dat1 <- dat1[!duplicated(dat1[, c("event", "eventid")]), ] # remove duplicated "Transform stop" events -id_stop <- which(tmp$event == "Transform stop") +id_stop <- which(dat1$event == "Transform stop") id_rm_stop <- id_stop[diff(id_stop) == 1] -tmp <- tmp[-(id_rm_stop + 1), ] +dat1 <- dat1[-(id_rm_stop + 1), ] # transform to wide data format -tmp$event <- ifelse(tmp$event == "Transform start", "start", "stop") +dat1$event <- ifelse(dat1$event == "Transform start", "start", "stop") -trans_wide <- reshape(tmp, direction = "wide", +trans_wide <- reshape(dat1, direction = "wide", idvar = c("eventid", "artwork"), timevar = "event", drop = c("fileid", "popup", "card") ) @@ -100,14 +100,14 @@ trans_wide$eventid <- NULL rownames(trans_wide) <- NULL trans_wide$duration <- trans_wide$time_ms.stop - trans_wide$time_ms.start -#trans_wide$duration2 <- trans_wide$date.stop - trans_wide$date.start +#trans_wide$duration <- trans_wide$date.stop - trans_wide$date.start # only seconds - not fine grained enough trans_wide$distance <- apply( trans_wide[, c("x.start", "y.start", "x.stop", "y.stop")], 1, function(x) dist(matrix(x, 2, 2, byrow = TRUE))) trans_wide$rotationDegree <- trans_wide$rotation.stop - trans_wide$rotation.start -trans_wide$scaleSize <- trans_wide$scale.stop - trans_wide$scale.start +trans_wide$scaleSize <- trans_wide$scale.stop / trans_wide$scale.start trans_wide$trace <- NA trans_wide$card <- NA @@ -115,7 +115,7 @@ trans_wide$popup <- NA dat_trans <- trans_wide[trans_wide$distance != 0 & trans_wide$rotationDegree != 0 & - trans_wide$scaleSize != 0, + trans_wide$scaleSize != 1, c("event", "artwork", "trace", "date.start", "date.stop", "time_ms.start", "time_ms.stop", "duration", "card", "popup", @@ -125,7 +125,7 @@ dat_trans <- trans_wide[trans_wide$distance != 0 & "rotationDegree")] # removes almost 2/3 of the data (for small data set) -rm(tmp, id_rm_stop, id_stop, trans_wide, num_start, num_stop) +rm(id_rm_stop, id_stop, trans_wide, num_start, num_stop) summary(dat_trans) @@ -138,56 +138,53 @@ plot(time_ms.stop ~ time_ms.start, dat_trans, plot(date.stop ~ date.start, dat_trans[1:1000,], type = "b") -# TODO: How to handle duration < 0 -# --> Replace with NA for now... -dat_trans[dat_trans$duration < 0, "duration"] <- NA #' # Close other events -tmp <- dat[!dat$event %in% c("Transform start", "Transform stop"), ] -tmp$x <- NULL -tmp$y <- NULL -tmp$scale <- NULL -tmp$rotation <- NULL -rownames(tmp) <- NULL +dat2 <- dat[!dat$event %in% c("Transform start", "Transform stop"), ] +dat2$x <- NULL +dat2$y <- NULL +dat2$scale <- NULL +dat2$rotation <- NULL +rownames(dat2) <- NULL # Create event ID for closing events -# num_start <- diff(c(0, which(tmp$event == "Show Front"))) -# tmp$trace <- rep(seq_along(num_start), num_start) -# head(tmp[, c("artwork", "event", "trace")], 50) +# num_start <- diff(c(0, which(dat2$event == "Show Front"))) +# dat2$trace <- rep(seq_along(num_start), num_start) +# head(dat2[, c("artwork", "event", "trace")], 50) # --> does not work because of glossar entries... can't sort by artwork -tmp$trace <- NA -last_event <- tmp$event[1] -aws <- unique(tmp$artwork)[unique(tmp$artwork) != "glossar"] +dat2$trace <- NA +last_event <- dat2$event[1] +aws <- unique(dat2$artwork)[unique(dat2$artwork) != "glossar"] # for (art in aws) { # select artwork - for (i in 1:nrow(tmp)) { # go through rows + for (i in 1:nrow(dat2)) { # go through rows - if (last_event == "Show Info" & tmp$artwork[i] == art) { - tmp$trace[i] <- i + if (last_event == "Show Info" & dat2$artwork[i] == art) { + dat2$trace[i] <- i j <- i - } else if (last_event == "Show Front" & tmp$artwork[i] == art) { - tmp$trace[i] <- j + } else if (last_event == "Show Front" & dat2$artwork[i] == art) { + dat2$trace[i] <- j } else if (!(last_event %in% c("Show Info", "Show Front")) & - tmp$artwork[i] == art) { - tmp$trace[i] <- j + dat2$artwork[i] == art) { + dat2$trace[i] <- j } - if (i <= nrow(tmp)) { - last_event <- tmp$event[i + 1] + if (i <= nrow(dat2)) { + last_event <- dat2$event[i + 1] } } } -head(tmp[, c("artwork", "event", "trace")], 50) -tail(tmp[, c("artwork", "event", "trace")], 50) +head(dat2[, c("artwork", "event", "trace")], 50) +tail(dat2[, c("artwork", "event", "trace")], 50) # TODO: How to handle popups from glossar??? rm(aws, i, j, last_event, art) @@ -196,13 +193,13 @@ rm(aws, i, j, last_event, art) ### Find artwork for glossar entry -glossar_files <- unique(tmp[tmp$artwork == "glossar", "popup"]) +glossar_files <- unique(dat2[dat2$artwork == "glossar", "popup"]) # load lookup table for artworks and glossar files load("../data/glossar_dict.RData") lut <- glossar_dict[glossar_dict$glossar_file %in% glossar_files, ] -head(tmp[, c("artwork", "event", "popup", "trace")], 20) +head(dat2[, c("artwork", "event", "popup", "trace")], 20) #df <- NULL @@ -211,11 +208,11 @@ for (file in lut$glossar_file) { artwork_list <- unlist(lut[lut$glossar_file == file, "artwork"]) - for (i in seq_len(nrow(tmp))) { + for (i in seq_len(nrow(dat2))) { - if (tmp$event[i] == "Show Info") { + if (dat2$event[i] == "Show Info") { - current_artwork <- tmp[i, "artwork"] + current_artwork <- dat2[i, "artwork"] j <- i k <- i @@ -225,25 +222,25 @@ for (file in lut$glossar_file) { } - if (tmp$event[i] == "Show Front" & tmp$artwork[i] == current_artwork) { + if (dat2$event[i] == "Show Front" & dat2$artwork[i] == current_artwork) { # make sure artwork has not been closed, yet! k <- i } - if (tmp$artwork[i] == "glossar" & + if (dat2$artwork[i] == "glossar" & (current_artwork %in% artwork_list) & - tmp$popup[i] == file & (j-k == 0)) { + dat2$popup[i] == file & (j-k == 0)) { #df <- rbind(df, data.frame(file, current_artwork, i, j)) - tmp[i, "trace"] <- tmp[j, "trace"] + dat2[i, "trace"] <- dat2[j, "trace"] } } } -# dim(tmp[is.na(tmp$trace), ]) +# dim(dat2[is.na(dat2$trace), ]) # --> finds about half of the glossar entries for the small data set... -# tmp[apply(df[, c("j", "i")], 1, c), c("artwork", "event", "popup", "trace")] +# dat2[apply(df[, c("j", "i")], 1, c), c("artwork", "event", "popup", "trace")] # REMEMBER: It can never bo 100% correct, since it is always possible that # several cards are open and that they link to the same glossar entry @@ -261,28 +258,28 @@ lut[sapply(lut$artwork, length) == 1, "glossar_file"] # # artwork_list <- unlist(lut[lut$glossar_file == file, "artwork"]) # -# for (i in seq_len(nrow(tmp))) { +# for (i in seq_len(nrow(dat2))) { # -# if (tmp$event[i] == "Show Info") { +# if (dat2$event[i] == "Show Info") { # # artworks <- NULL -# current_artwork <- tmp[i, "artwork"] +# current_artwork <- dat2[i, "artwork"] # j <- i # # } else { # # print(current_artwork) -# artworks <- c(artworks, tmp[i, "artwork"]) +# artworks <- c(artworks, dat2[i, "artwork"]) # print(artworks) # # } # -# # if (tmp$artwork[i] == "glossar" & +# # if (dat2$artwork[i] == "glossar" & # # (current_artwork %in% artwork_list) & -# # tmp$popup[i] == file) { +# # dat2$popup[i] == file) { # # # # #df <- rbind(df, data.frame(file, current_artwork, i, j)) -# # tmp[i, "trace"] <- tmp[j, "trace"] +# # dat2[i, "trace"] <- dat2[j, "trace"] # # # } # } @@ -296,20 +293,20 @@ lut[sapply(lut$artwork, length) == 1, "glossar_file"] # TODO: For now: Exclude not matched glossar entries -df <- subset(tmp, !is.na(tmp$trace)) +df <- subset(dat2, !is.na(dat2$trace)) df <- df[order(df$trace), ] rownames(df) <- NULL -rm(tmp, lut, current_artwork, file, glossar_dict, i, j, k, artwork_list, +rm(lut, current_artwork, file, glossar_dict, i, j, k, artwork_list, glossar_files) #' ## Close flipCard -tmp <- subset(df, df$event %in% c("Show Info", "Show Front")) +dat3 <- subset(df, df$event %in% c("Show Info", "Show Front")) -tmp$event <- ifelse(tmp$event == "Show Info", "start", "stop") +dat3$event <- ifelse(dat3$event == "Show Info", "start", "stop") -flipCard_wide <- reshape(tmp, direction = "wide", +flipCard_wide <- reshape(dat3, direction = "wide", idvar = c("trace", "artwork"), timevar = "event", drop = c("fileid", "popup", "card")) @@ -317,10 +314,6 @@ flipCard_wide$event <- "flipCard" flipCard_wide$duration <- flipCard_wide$time_ms.stop - flipCard_wide$time_ms.start -# TODO: How to handle duration < 0 -# --> Replace with NA for now... -flipCard_wide$duration <- ifelse(flipCard_wide$duration < 0, - NA, flipCard_wide$duration) flipCard_wide$card <- NA flipCard_wide$popup <- NA @@ -345,30 +338,26 @@ dat_flipCard <- flipCard_wide[, c("event", "artwork", "trace", "scaleSize", "rotation.start", "rotation.stop", "rotationDegree")] -rm(tmp, flipCard_wide) +rm(flipCard_wide) #' ## Close openTopic -tmp <- subset(df, df$event %in% c("Artwork/OpenCard", "Artwork/CloseCard")) -tmp <- tmp[order(tmp$artwork, tmp$date), ] -rownames(tmp) <- NULL +dat4 <- subset(df, df$event %in% c("Artwork/OpenCard", "Artwork/CloseCard")) +dat4 <- dat4[order(dat4$artwork, dat4$date), ] +rownames(dat4) <- NULL -num_start <- diff(c(0, which(tmp$event == "Artwork/CloseCard"))) -tmp$eventid <- rep(seq_along(num_start), num_start) +num_start <- diff(c(0, which(dat4$event == "Artwork/CloseCard"))) +dat4$eventid <- rep(seq_along(num_start), num_start) -tmp$event <- ifelse(tmp$event == "Artwork/OpenCard", "start", "stop") +dat4$event <- ifelse(dat4$event == "Artwork/OpenCard", "start", "stop") -openTopic_wide <- reshape(tmp, direction = "wide", +openTopic_wide <- reshape(dat4, direction = "wide", idvar = c("eventid", "trace", "artwork", "card"), timevar = "event", drop = c("fileid", "popup")) openTopic_wide$event <- "openTopic" openTopic_wide$duration <- openTopic_wide$time_ms.stop - openTopic_wide$time_ms.start -openTopic_wide$duration <- ifelse(openTopic_wide$duration < 0, - NA, openTopic_wide$duration) -# TODO: How to handle duration < 0 -# --> Replace with NA for now... openTopic_wide$popup <- NA openTopic_wide$x.start <- NA @@ -393,23 +382,23 @@ dat_openTopic <- openTopic_wide[, c("event", "artwork", "trace", "rotation.stop", "rotationDegree")] # TODO: card should have a unique identifier for each artwork -rm(openTopic_wide, num_start, tmp) +rm(openTopic_wide, num_start) #' ## close openPopup -tmp <- subset(df, df$event %in% c("ShowPopup", "HidePopup")) -tmp <- tmp[order(tmp$artwork, tmp$date), ] -rownames(tmp) <- NULL +dat5 <- subset(df, df$event %in% c("ShowPopup", "HidePopup")) +dat5 <- dat5[order(dat5$artwork, dat5$date), ] +rownames(dat5) <- NULL -num_start <- diff(c(0, which(tmp$event == "HidePopup"))) +num_start <- diff(c(0, which(dat5$event == "HidePopup"))) # last event is "ShowPopup"! Needs to be fixed num_start <- c(num_start, 1) # TODO: Needs to be caught in a function -tmp$eventid <- rep(seq_along(num_start), num_start) +dat5$eventid <- rep(seq_along(num_start), num_start) -tmp$event <- ifelse(tmp$event == "ShowPopup", "start", "stop") +dat5$event <- ifelse(dat5$event == "ShowPopup", "start", "stop") -openPopup_wide <- reshape(tmp, direction = "wide", +openPopup_wide <- reshape(dat5, direction = "wide", idvar = c("eventid", "trace", "artwork", "popup"), timevar = "event", drop = c("fileid", "card")) # there is a pathological entry which gets deleted... @@ -423,10 +412,6 @@ openPopup_wide$event <- "openPopup" openPopup_wide$duration <- openPopup_wide$time_ms.stop - openPopup_wide$time_ms.start -openPopup_wide$duration <- ifelse(openPopup_wide$duration < 0, - NA, openPopup_wide$duration) -# TODO: How to handle duration < 0 -# --> Replace with NA for now... openPopup_wide$card <- NA openPopup_wide$x.start <- NA @@ -449,7 +434,7 @@ dat_openPopup <- openPopup_wide[, c("event", "artwork", "trace", "distance", "scale.start", "scale.stop", "scaleSize", "rotation.start", "rotation.stop", "rotationDegree")] -rm(num_start, openPopup_wide, tmp) +rm(num_start, openPopup_wide) # Merge all @@ -609,6 +594,12 @@ rownames(dat_all) <- NULL # Make `trace` a consecutive number dat_all$trace <- as.numeric(as.factor(dat_all$trace)) + +# TODO: How to handle duration < 0 +# --> Replace with NA for now... +dat_all$duration <- ifelse(dat_all$duration < 0, NA, dat_all$duration) + + #' # Export data write.table(dat_all, "../data/event_logfiles.csv", @@ -630,3 +621,6 @@ write.table(dat_all, "../data/event_logfiles.csv", # TODO: Write function for closing events + + +