First version of event log data set; script is a complete mess
This commit is contained in:
parent
f736058e33
commit
626db38617
@ -44,12 +44,19 @@ dat <- subset(dat0, !(dat0$event %in% c("Start Application",
|
|||||||
|
|
||||||
#' # Close events
|
#' # Close events
|
||||||
|
|
||||||
#' Do it for Tranform events first
|
# TODO:
|
||||||
|
# Should every "Show front" be the beginning of a new trace?
|
||||||
|
# Should Transform events be handled separately and then be "added" again
|
||||||
|
# by timestamp?
|
||||||
|
|
||||||
|
########
|
||||||
|
|
||||||
|
#' Do it for Transform events first
|
||||||
tmp <- dat[dat$event %in% c("Transform start", "Transform stop"), ]
|
tmp <- dat[dat$event %in% c("Transform start", "Transform stop"), ]
|
||||||
tmp <- tmp[order(tmp$artwork, tmp$date), ]
|
tmp <- tmp[order(tmp$artwork, tmp$date), ]
|
||||||
rownames(tmp) <- NULL
|
rownames(tmp) <- NULL
|
||||||
|
|
||||||
# Find out how often "Transform start" follows each other
|
# Create event ID for closing events
|
||||||
num_start <- diff(c(0, which(tmp$event == "Transform stop")))
|
num_start <- diff(c(0, which(tmp$event == "Transform stop")))
|
||||||
tmp$eventid <- rep(seq_along(num_start), num_start)
|
tmp$eventid <- rep(seq_along(num_start), num_start)
|
||||||
head(tmp[, c("event", "eventid")], 25)
|
head(tmp[, c("event", "eventid")], 25)
|
||||||
@ -88,8 +95,6 @@ trans_wide <- reshape(tmp, direction = "wide",
|
|||||||
idvar = c("eventid", "artwork"),
|
idvar = c("eventid", "artwork"),
|
||||||
timevar = "event", drop = c("fileid", "popup", "card")
|
timevar = "event", drop = c("fileid", "popup", "card")
|
||||||
)
|
)
|
||||||
|
|
||||||
rownames(trans_wide) <- NULL
|
|
||||||
# --> when fileid is part of the reshape, it does not work correctly, since
|
# --> when fileid is part of the reshape, it does not work correctly, since
|
||||||
# we sometimes have a start - stop event that is recorded in two separate
|
# we sometimes have a start - stop event that is recorded in two separate
|
||||||
# log files
|
# log files
|
||||||
@ -97,67 +102,297 @@ rownames(trans_wide) <- NULL
|
|||||||
|
|
||||||
# which(is.na(trans_wide$date.start))
|
# which(is.na(trans_wide$date.start))
|
||||||
|
|
||||||
|
trans_wide$event <- "move"
|
||||||
|
trans_wide$eventid <- NULL
|
||||||
|
|
||||||
|
rownames(trans_wide) <- NULL
|
||||||
|
|
||||||
trans_wide$duration <- trans_wide$time_ms.stop - trans_wide$time_ms.start
|
trans_wide$duration <- trans_wide$time_ms.stop - trans_wide$time_ms.start
|
||||||
trans_wide$distance <- apply(trans_wide[, c("x.start", "y.start", "x.stop", "y.stop")],
|
#trans_wide$duration2 <- trans_wide$date.stop - trans_wide$date.start
|
||||||
1, function(x) dist(matrix(x, 2, 2, byrow = TRUE)))
|
# only seconds - not fine grained enough
|
||||||
trans_wide$rotationDegree <- trans_wide$rotation.stop - trans_wide$rotation.start
|
trans_wide$distance <- apply(
|
||||||
|
trans_wide[, c("x.start", "y.start", "x.stop", "y.stop")], 1,
|
||||||
|
function(x) dist(matrix(x, 2, 2, byrow = TRUE)))
|
||||||
|
trans_wide$rotationDegree <- trans_wide$rotation.stop -
|
||||||
|
trans_wide$rotation.start
|
||||||
trans_wide$scaleSize <- trans_wide$scale.stop - trans_wide$scale.start
|
trans_wide$scaleSize <- trans_wide$scale.stop - trans_wide$scale.start
|
||||||
|
|
||||||
trans_wide <- trans_wide[trans_wide$distance != 0 &
|
dat_trans <- trans_wide[trans_wide$distance != 0 &
|
||||||
trans_wide$rotationDegree != 0 &
|
trans_wide$rotationDegree != 0 &
|
||||||
trans_wide$scaleSize != 0, ]
|
trans_wide$scaleSize != 0, ]
|
||||||
# removes almost 2/3 of the data (for small data set)
|
# removes almost 2/3 of the data (for small data set)
|
||||||
|
|
||||||
|
dat_trans <- dat_trans[, c("event", "artwork",
|
||||||
|
"date.start", "date.stop",
|
||||||
|
"time_ms.start", "time_ms.stop", "duration",
|
||||||
|
"x.start", "y.start", "x.stop", "y.stop", "distance",
|
||||||
|
"scale.start", "scale.stop", "scaleSize",
|
||||||
|
"rotation.start", "rotation.stop", "rotationDegree")]
|
||||||
|
|
||||||
# TODO: How do I handle popups from glossar???
|
summary(dat_trans)
|
||||||
|
|
||||||
|
# TODO: Phillip fragen was mit `time_ms` schief läuft... Hat er eine
|
||||||
|
# Erklärung dafür?
|
||||||
|
#plot(time_ms.stop ~ time_ms.start, dat_trans, type = "b")
|
||||||
|
plot(time_ms.stop ~ time_ms.start, dat_trans, col = rgb(red = 0, green = 0, blue = 0, alpha = 0.2))
|
||||||
|
|
||||||
|
plot(date.stop ~ date.start, dat_trans[1:1000,], type = "b")
|
||||||
|
|
||||||
|
# TODO: Wie mit duration < 0 umgehen? Einfach auf NA setzen? For now...
|
||||||
|
dat_trans[dat_trans$duration < 0, "duration"] <- NA
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
#' # Close other events
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
# Should every "Show front" be the beginning of a new trace?
|
|
||||||
# Should Transform events be handled separately and then be "added" again
|
|
||||||
# by timestamp?
|
|
||||||
|
|
||||||
########
|
|
||||||
tmp <- dat[!dat$event %in% c("Transform start", "Transform stop"), ]
|
tmp <- dat[!dat$event %in% c("Transform start", "Transform stop"), ]
|
||||||
|
tmp$x <- NULL
|
||||||
|
tmp$y <- NULL
|
||||||
|
tmp$scale <- NULL
|
||||||
|
tmp$rotation <- NULL
|
||||||
rownames(tmp) <- NULL
|
rownames(tmp) <- NULL
|
||||||
|
|
||||||
|
# Create event ID for closing events
|
||||||
|
# num_start <- diff(c(0, which(tmp$event == "Show Front")))
|
||||||
|
# tmp$trace <- rep(seq_along(num_start), num_start)
|
||||||
|
# head(tmp[, c("artwork", "event", "trace")], 50)
|
||||||
|
# TODO: How do I handle popups from glossar???
|
||||||
|
# --> does not work because of glossar entries... can't sort by artwork
|
||||||
|
|
||||||
|
|
||||||
tmp$trace <- NA
|
tmp$trace <- NA
|
||||||
last_event <- tmp$event[1]
|
last_event <- tmp$event[1]
|
||||||
|
aws <- unique(tmp$artwork)[unique(tmp$artwork) != "glossar"]
|
||||||
|
#
|
||||||
|
for (art in aws) { # select artwork
|
||||||
|
|
||||||
for (art in unique(tmp$artwork)) {
|
for (i in 1:nrow(tmp)) { # go through rows
|
||||||
|
|
||||||
for (i in 1:nrow(tmp)) {
|
if (last_event == "Show Info" & tmp$artwork[i] == art) {
|
||||||
|
tmp$trace[i] <- i
|
||||||
|
j <- i
|
||||||
|
|
||||||
if (last_event == "Show Info" & (tmp$artwork[i] == art |
|
} else if (last_event == "Show Front" & tmp$artwork[i] == art) {
|
||||||
tmp$artwork[i] == "glossar")) {
|
tmp$trace[i] <- j
|
||||||
tmp$trace[i] <- "start"
|
|
||||||
} else if (last_event == "Show Front" & (tmp$artwork[i] == art |
|
} else if (!(last_event %in% c("Show Info", "Show Front")) &
|
||||||
tmp$artwork[i] == "glossar")) {
|
tmp$artwork[i] == art) {
|
||||||
tmp$trace[i] <- "stop"
|
tmp$trace[i] <- j
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if (i <= nrow(tmp)) {
|
||||||
last_event <- tmp$event[i + 1]
|
last_event <- tmp$event[i + 1]
|
||||||
}
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
head(tmp[, c("artwork", "event", "trace")], 50)
|
||||||
head(tmp[4:ncol(tmp)], 50)
|
tail(tmp[, c("artwork", "event", "trace")], 50)
|
||||||
# TODO: Great job! You used a for-loop to rename "Show info" and "Show
|
|
||||||
# front" to "start" and "stop" ;)
|
|
||||||
|
|
||||||
|
|
||||||
|
## Fix glossar entries
|
||||||
|
|
||||||
|
### Find artwork for glossar entry
|
||||||
|
|
||||||
|
tmp2 <- tmp[tmp$artwork == "glossar", ]
|
||||||
|
|
||||||
|
glossar_files <- unique(tmp2$popup)
|
||||||
|
|
||||||
|
# load lookup table for artworks and glossar files
|
||||||
|
load("../data/glossar_dict.RData")
|
||||||
|
lut <- glossar_dict[glossar_dict$glossar_file %in% glossar_files, ]
|
||||||
|
# TODO: Find last "Artwork/OpenCard" that matches possible artwork
|
||||||
|
|
||||||
|
head(tmp[, c("artwork", "event", "popup", "trace")], 20)
|
||||||
|
|
||||||
|
|
||||||
|
#df <- NULL
|
||||||
|
|
||||||
|
for (file in lut$glossar_file) {
|
||||||
|
|
||||||
|
artwork_list <- unlist(lut[lut$glossar_file == file, "artwork"])
|
||||||
|
|
||||||
|
for (i in seq_len(nrow(tmp))) {
|
||||||
|
|
||||||
|
if (tmp$event[i] == "Show Info") {
|
||||||
|
|
||||||
|
current_artwork <- tmp[i, "artwork"]
|
||||||
|
j <- i
|
||||||
|
k <- i
|
||||||
|
|
||||||
|
} else {
|
||||||
|
|
||||||
|
current_artwork <- current_artwork
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
if (tmp$event[i] == "Show Front" & tmp$artwork[i] == current_artwork) {
|
||||||
|
# make sure artwork has not been closed, yet!
|
||||||
|
k <- i
|
||||||
|
}
|
||||||
|
|
||||||
|
if (tmp$artwork[i] == "glossar" &
|
||||||
|
(current_artwork %in% artwork_list) &
|
||||||
|
tmp$popup[i] == file & (j-k == 0)) {
|
||||||
|
|
||||||
|
#df <- rbind(df, data.frame(file, current_artwork, i, j))
|
||||||
|
tmp[i, "trace"] <- tmp[j, "trace"]
|
||||||
|
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
# dim(tmp[is.na(tmp$trace), ])
|
||||||
|
# --> finds about half of the glossar entries for the small data set...
|
||||||
|
|
||||||
|
# tmp[apply(df[, c("j", "i")], 1, c), c("artwork", "event", "popup", "trace")]
|
||||||
|
|
||||||
|
# REMEMBER: It can never bo 100% correct, since it is always possible that
|
||||||
|
# several cards are open and that they link at the same glossar entry
|
||||||
|
|
||||||
|
# Fill in the glossar_files that are only associated with one artwork
|
||||||
|
# How many glossar_files are only associated with one artwork?
|
||||||
|
lut[sapply(lut$artwork, length) == 1, "glossar_file"]
|
||||||
|
|
||||||
|
# TODO: Can I fill in the ones that are only associated with one artwork
|
||||||
|
# easily? I can't come up with something --> maybe ask AK???
|
||||||
|
|
||||||
|
# TODO: How can I check if one of the former "Show Infos" is the correct
|
||||||
|
# one? I can't come up with something --> maybe ask AK???
|
||||||
|
|
||||||
|
# for (file in lut$glossar_file) {
|
||||||
|
#
|
||||||
|
# artwork_list <- unlist(lut[lut$glossar_file == file, "artwork"])
|
||||||
|
#
|
||||||
|
# for (i in seq_len(nrow(tmp))) {
|
||||||
|
#
|
||||||
|
# if (tmp$event[i] == "Show Info") {
|
||||||
|
#
|
||||||
|
# artworks <- NULL
|
||||||
|
# current_artwork <- tmp[i, "artwork"]
|
||||||
|
# j <- i
|
||||||
|
#
|
||||||
|
# } else {
|
||||||
|
#
|
||||||
|
# print(current_artwork)
|
||||||
|
# artworks <- c(artworks, tmp[i, "artwork"])
|
||||||
|
# print(artworks)
|
||||||
|
#
|
||||||
|
# }
|
||||||
|
#
|
||||||
|
# # if (tmp$artwork[i] == "glossar" &
|
||||||
|
# # (current_artwork %in% artwork_list) &
|
||||||
|
# # tmp$popup[i] == file) {
|
||||||
|
# #
|
||||||
|
# # #df <- rbind(df, data.frame(file, current_artwork, i, j))
|
||||||
|
# # tmp[i, "trace"] <- tmp[j, "trace"]
|
||||||
|
#
|
||||||
|
# # }
|
||||||
|
# }
|
||||||
|
# }
|
||||||
|
|
||||||
|
# correct: 17940
|
||||||
|
# incorrect: 17963
|
||||||
|
|
||||||
|
|
||||||
|
# TODO: For now: Exclude not matched glossar entries
|
||||||
|
|
||||||
|
df <- subset(tmp, !is.na(tmp$trace))
|
||||||
|
df <- df[order(df$trace), ]
|
||||||
|
rownames(df) <- NULL
|
||||||
|
|
||||||
|
|
||||||
|
#' ## Close flipCard
|
||||||
|
|
||||||
|
tmp <- subset(df, df$event %in% c("Show Info", "Show Front"))
|
||||||
|
|
||||||
|
tmp$event <- ifelse(tmp$event == "Show Info", "start", "stop")
|
||||||
|
|
||||||
|
dat_flipCard <- reshape(tmp, direction = "wide",
|
||||||
|
idvar = c("trace", "artwork"),
|
||||||
|
timevar = "event", drop = c("fileid", "popup", "card"))
|
||||||
|
dat_flipCard$event <- "flipCard"
|
||||||
|
dat_flipCard$duration <- dat_flipCard$time_ms.stop - dat_flipCard$time_ms.start
|
||||||
|
|
||||||
|
dat_flipCard$duration <- ifelse(dat_flipCard$duration < 0, NA, dat_flipCard$duration)
|
||||||
|
# TODO: Wie mit duration < 0 umgehen? Einfach auf NA setzen? For now...
|
||||||
|
|
||||||
|
dat_flipCard <- dat_flipCard[, c("event", "artwork", "trace",
|
||||||
|
"date.start", "date.stop",
|
||||||
|
"time_ms.start", "time_ms.stop", "duration")]
|
||||||
|
|
||||||
|
|
||||||
|
#' ## Close openTopic
|
||||||
|
|
||||||
|
tmp <- subset(df, df$event %in% c("Artwork/OpenCard", "Artwork/CloseCard"))
|
||||||
|
tmp <- tmp[order(tmp$artwork, tmp$date), ]
|
||||||
|
rownames(tmp) <- NULL
|
||||||
|
|
||||||
|
num_start <- diff(c(0, which(tmp$event == "Artwork/CloseCard")))
|
||||||
|
tmp$eventid <- rep(seq_along(num_start), num_start)
|
||||||
|
|
||||||
|
tmp$event <- ifelse(tmp$event == "Artwork/OpenCard", "start", "stop")
|
||||||
|
|
||||||
|
dat_openTopic <- reshape(tmp, direction = "wide",
|
||||||
|
idvar = c("eventid", "trace", "artwork", "card"),
|
||||||
|
timevar = "event", drop = c("fileid", "popup"))
|
||||||
|
dat_openTopic$event <- "openTopic"
|
||||||
|
dat_openTopic$duration <- dat_openTopic$time_ms.stop - dat_openTopic$time_ms.start
|
||||||
|
|
||||||
|
dat_openTopic$duration <- ifelse(dat_openTopic$duration < 0, NA, dat_openTopic$duration)
|
||||||
|
# TODO: Wie mit duration < 0 umgehen? Einfach auf NA setzen? For now...
|
||||||
|
|
||||||
|
dat_openTopic <- dat_openTopic[, c("event", "artwork", "card", "trace",
|
||||||
|
"date.start", "date.stop",
|
||||||
|
"time_ms.start", "time_ms.stop", "duration")]
|
||||||
|
# TODO: card should have a unique identifier for each artwork!
|
||||||
|
|
||||||
|
#' ## close openPopup
|
||||||
|
tmp <- subset(df, df$event %in% c("ShowPopup", "HidePopup"))
|
||||||
|
tmp <- tmp[order(tmp$artwork, tmp$date), ]
|
||||||
|
rownames(tmp) <- NULL
|
||||||
|
|
||||||
|
num_start <- diff(c(0, which(tmp$event == "HidePopup")))
|
||||||
|
# last event is "ShowPopup"! Needs to be fixed TODO: Needs to be cought in
|
||||||
|
# a function!
|
||||||
|
num_start <- c(num_start, 1)
|
||||||
|
|
||||||
|
tmp$eventid <- rep(seq_along(num_start), num_start)
|
||||||
|
|
||||||
|
tmp$event <- ifelse(tmp$event == "ShowPopup", "start", "stop")
|
||||||
|
|
||||||
|
dat_openPopup <- reshape(tmp, direction = "wide",
|
||||||
|
idvar = c("eventid", "trace", "artwork", "popup"),
|
||||||
|
timevar = "event", drop = c("fileid", "card"))
|
||||||
|
# there is a pathological entry which gets deleted...
|
||||||
|
# df[df$trace == 4595, ]
|
||||||
|
|
||||||
|
# TODO: Some correct entries are not closed:
|
||||||
|
df[df$trace == 1843, ]
|
||||||
|
# WHY???
|
||||||
|
|
||||||
|
dat_openPopup$event <- "openPopup"
|
||||||
|
dat_openPopup$duration <- dat_openPopup$time_ms.stop - dat_openPopup$time_ms.start
|
||||||
|
|
||||||
|
dat_openPopup$duration <- ifelse(dat_openPopup$duration < 0, NA, dat_openPopup$duration)
|
||||||
|
# TODO: Wie mit duration < 0 umgehen? Einfach auf NA setzen? For now...
|
||||||
|
|
||||||
|
dat_openPopup <- dat_openPopup[, c("event", "artwork", "popup", "trace",
|
||||||
|
"date.start", "date.stop",
|
||||||
|
"time_ms.start", "time_ms.stop", "duration")]
|
||||||
|
|
||||||
|
# Merge all
|
||||||
|
dat_all <- merge(dat_trans, dat_flipCard, all = TRUE)
|
||||||
|
dat_all <- merge(dat_all, dat_openTopic, all = TRUE)
|
||||||
|
dat_all <- merge(dat_all, dat_openPopup, all = TRUE)
|
||||||
|
|
||||||
|
# check
|
||||||
|
nrow(dat_all) == (nrow(dat_trans) + nrow(dat_flipCard) +
|
||||||
|
nrow(dat_openTopic) + nrow(dat_openPopup))
|
||||||
|
|
||||||
|
dat_all <- dat_all[order(dat_all$date.start), ]
|
||||||
|
rownames(dat_all) <- NULL
|
||||||
|
|
||||||
|
# TODO: Should card maybe also be filled in for "openPopup"?
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
Loading…
Reference in New Issue
Block a user