375 lines
12 KiB
R
375 lines
12 KiB
R
###########################################################################
|
|
|
|
# Add trace variable
|
|
|
|
add_trace <- function(data, glossar_dict = "../data/glossar_dict.RData") {
|
|
|
|
data$trace <- NA
|
|
subdata1 <- data[data$event %in% c("Transform start", "Transform stop"), ]
|
|
subdata2 <- data[!data$event %in% c("Transform start", "Transform stop"), ]
|
|
|
|
last_event <- subdata2$event[1]
|
|
aws <- unique(subdata2$artwork)[unique(subdata2$artwork) != "glossar"]
|
|
|
|
for (art in aws) {
|
|
|
|
for (i in 1:nrow(subdata2)) {
|
|
|
|
if (last_event == "Show Info" & subdata2$artwork[i] == art) {
|
|
subdata2$trace[i] <- i
|
|
j <- i
|
|
|
|
} else if (last_event == "Show Front" & subdata2$artwork[i] == art) {
|
|
subdata2$trace[i] <- j
|
|
|
|
} else if (!(last_event %in% c("Show Info", "Show Front")) &
|
|
subdata2$artwork[i] == art) {
|
|
subdata2$trace[i] <- j
|
|
}
|
|
|
|
if (i <= nrow(subdata2)) {
|
|
last_event <- subdata2$event[i + 1]
|
|
}
|
|
}
|
|
}
|
|
|
|
# Fix glossar entries (find corresponding artworks and fill in trace)
|
|
glossar_files <- unique(subdata2[subdata2$artwork == "glossar", "popup"])
|
|
|
|
# load lookup table for artworks and glossar files
|
|
load(glossar_dict)
|
|
lut <- glossar_dict[glossar_dict$glossar_file %in% glossar_files, ]
|
|
|
|
inside <- glossar_files[glossar_files %in%
|
|
lut[sapply(lut$artwork, length) == 1,
|
|
"glossar_file"]]
|
|
single_art <- unlist(lut[lut$glossar_file %in% inside, "artwork"])
|
|
|
|
for (file in lut$glossar_file) {
|
|
|
|
artwork_list <- unlist(lut[lut$glossar_file == file, "artwork"])
|
|
|
|
for (i in seq_len(nrow(subdata2))) {
|
|
|
|
if (subdata2$event[i] == "Show Info" |
|
|
(subdata2$event[i] == "Artwork/OpenCard" &
|
|
subdata2$artwork[i] %in% single_art)) {
|
|
|
|
current_artwork <- subdata2[i, "artwork"]
|
|
j <- i
|
|
k <- i
|
|
|
|
} else {
|
|
|
|
current_artwork <- current_artwork
|
|
|
|
}
|
|
|
|
if (subdata2$event[i] == "Show Front" & subdata2$artwork[i] == current_artwork) {
|
|
# make sure artwork has not been closed, yet!
|
|
k <- i
|
|
}
|
|
|
|
if (subdata2$artwork[i] == "glossar" &
|
|
(current_artwork %in% artwork_list) &
|
|
subdata2$popup[i] == file & (j - k == 0)) {
|
|
|
|
subdata2[i, "trace"] <- subdata2[j, "trace"]
|
|
subdata2[i, "artwork"] <- current_artwork
|
|
|
|
}
|
|
}
|
|
}
|
|
|
|
# Exclude not matched glossar entries
|
|
cat("INFORMATION: glossar entries that are not matched will be removed:",
|
|
sum(is.na(subdata2[subdata2$glossar == 1, "trace"])), "entries",
|
|
#proportions(table(is.na(subdata2[subdata2$glossar == 1, "trace"]))),
|
|
fill = TRUE)
|
|
subdata2 <- subset(subdata2, !is.na(subdata2$trace))
|
|
# REMEMBER: It can never be 100% correct, since it is always possible
|
|
# that several cards are open and that they link to the same glossar
|
|
# entry
|
|
|
|
# dat2[14110:14130, ]
|
|
# dat2[dat2$glossar == 1, ]
|
|
|
|
out <- rbind(subdata1, subdata2)
|
|
out <- out[order(out$fileId, out$date, out$timeMs), ]
|
|
out
|
|
}
|
|
|
|
###########################################################################
|
|
|
|
close_events <- function(data, event = c("move", "flipCard", "openTopic", "openPopup")) {
|
|
|
|
if (event == "move") {
|
|
actions <- c("Transform start", "Transform stop")
|
|
idvar <- c("eventId", "artwork", "glossar")
|
|
drop <- c("popup", "topicNumber", "trace", "event")
|
|
|
|
} else if (event == "flipCard") {
|
|
actions <- c("Show Info", "Show Front")
|
|
idvar <- c("trace", "artwork", "glossar")
|
|
drop <- c("popup", "topicNumber", "eventId", "event")
|
|
|
|
} else if (event == "openTopic") {
|
|
actions <- c("Artwork/OpenCard", "Artwork/CloseCard")
|
|
idvar <- c("eventId", "trace", "glossar", "artwork", "topicNumber")
|
|
drop <- c("popup", "event")
|
|
|
|
} else if (event == "openPopup") {
|
|
actions <- c("ShowPopup", "HidePopup")
|
|
idvar <- c("eventId", "trace", "glossar", "artwork", "popup")
|
|
drop <- c("topicNumber", "event")
|
|
# TODO: Should topicNumber maybe also be filled in for "openPopup"?
|
|
|
|
} else {
|
|
stop("`event` must be one of 'move', 'flipCard', 'openTopic',
|
|
'openPopup'.")
|
|
}
|
|
|
|
subdata <- subset(data, data$event %in% actions)
|
|
#subdata <- subdata[order(subdata$artwork, subdata$popup, subdata$date), ]
|
|
subdata <- subdata[order(subdata$artwork, subdata$popup, subdata$date, subdata$timeMs), ]
|
|
subdata$time <- ifelse(subdata$event == actions[1], "start", "stop")
|
|
num_start <- diff(c(0, which(subdata$event == actions[2])))
|
|
subdata$eventId <- rep(seq_along(num_start), num_start)
|
|
# If last event is start event, it needs to be fixed:
|
|
# num_start <- c(num_start, 1)
|
|
# TODO: Needs to be caught in a function
|
|
# --> not anymore - still relevant???
|
|
|
|
if (event == "move") {
|
|
subdata <- subdata[!duplicated(subdata[, c("event", "eventId")]), ]
|
|
id_stop <- which(subdata$event == actions[2])
|
|
id_rm_stop <- id_stop[diff(id_stop) == 1]
|
|
subdata <- subdata[-(id_rm_stop + 1), ]
|
|
}
|
|
|
|
suppressWarnings(
|
|
data_wide <- reshape(subdata, direction = "wide",
|
|
idvar = idvar,
|
|
timevar = "time",
|
|
drop = drop)
|
|
)
|
|
# TODO: Suppress warnings? Better with tryCatch()?
|
|
# there is a pathological entry which gets deleted...
|
|
# df[df$trace == 4595, ]
|
|
# --> artwork 046 popup selene.xml gets opened twice
|
|
|
|
# TODO: This runs for quite some time
|
|
# --> Is this more efficient with tidyr::pivot_wider?
|
|
|
|
|
|
for (d in drop) data_wide[d] <- NA
|
|
data_wide$distance <- NA
|
|
data_wide$scaleSize <- NA
|
|
data_wide$rotationDegree <- NA
|
|
|
|
data_wide$event <- event
|
|
data_wide$duration <- data_wide$timeMs.stop - data_wide$timeMs.start
|
|
|
|
if (event == "move") {
|
|
data_wide$distance <- apply(
|
|
data_wide[, c("x.start", "y.start", "x.stop", "y.stop")], 1,
|
|
function(x) dist(matrix(x, 2, 2, byrow = TRUE)))
|
|
data_wide$rotationDegree <- data_wide$rotation.stop -
|
|
data_wide$rotation.start
|
|
data_wide$scaleSize <- data_wide$scale.stop / data_wide$scale.start
|
|
# remove moves without any change
|
|
move_wide <- data_wide[data_wide$distance != 0 &
|
|
data_wide$rotationDegree != 0 &
|
|
data_wide$scaleSize != 1, ]
|
|
cat(paste("INFORMATION:", nrow(data_wide) - nrow(move_wide),
|
|
"lines containing move events were removed since they did",
|
|
"\nnot contain any change"), fill = TRUE)
|
|
data_wide <- move_wide
|
|
}
|
|
|
|
out <- data_wide[, c("fileId.start", "fileId.stop", "event", "artwork",
|
|
"trace", "glossar", "date.start", "date.stop",
|
|
"timeMs.start", "timeMs.stop", "duration",
|
|
"topicNumber", "popup", "x.start", "y.start",
|
|
"x.stop", "y.stop", "distance", "scale.start",
|
|
"scale.stop", "scaleSize", "rotation.start",
|
|
"rotation.stop", "rotationDegree")]
|
|
rownames(out) <- NULL
|
|
out
|
|
}
|
|
|
|
###########################################################################
|
|
|
|
# Add case variable
|
|
|
|
add_case <- function(data, cutoff = 20) {
|
|
# TODO: What is the best choice for the cutoff here?
|
|
|
|
data$timediff <- as.numeric(diff(c(data$date.start[1], data$date.start)))
|
|
|
|
data$case <- NA
|
|
j <- 1
|
|
|
|
for (i in seq_len(nrow(data))) {
|
|
if (data$timediff[i] <= cutoff) {
|
|
data$case[i] <- j
|
|
} else {
|
|
j <- j + 1
|
|
data$case[i] <- j
|
|
}
|
|
}
|
|
data$timediff <- NULL
|
|
data
|
|
}
|
|
|
|
###########################################################################
|
|
|
|
# Add trace for moves
|
|
|
|
add_trace_moves <- function(data) {
|
|
|
|
cases <- unique(data$case)
|
|
aws <- unique(data$artwork)[unique(data$artwork) != "glossar"]
|
|
max_trace <- max(data$trace, na.rm = TRUE) + 1
|
|
out <- NULL
|
|
|
|
for (case in cases) {
|
|
for (art in aws) {
|
|
tmp <- data[data$case == case & data$artwork == art, ]
|
|
if (nrow(tmp) != 0) {
|
|
|
|
if (length(na.omit(unique(tmp$trace))) == 1) {
|
|
tmp[tmp$event == "move", "trace"] <- na.omit(unique(tmp$trace))
|
|
} else if (length(na.omit(unique(tmp$trace))) > 1) {
|
|
for (i in 1:nrow(tmp)) {
|
|
if (tmp$event[i] == "move") {
|
|
if (i == 1) {
|
|
tmp$trace[i] <- na.omit(unique(tmp$trace))[1]
|
|
} else {
|
|
tmp$trace[i] <- tmp$trace[i - 1]
|
|
}
|
|
}
|
|
}
|
|
} else if (all(is.na(tmp$trace))) {
|
|
for (i in 1:nrow(tmp)) {
|
|
if (tmp$event[i] == "move") {
|
|
tmp$trace[i] <- max_trace
|
|
}
|
|
}
|
|
}
|
|
max_trace <- max_trace + 1
|
|
}
|
|
if (nrow(tmp) > 0) {
|
|
out <- rbind(out, tmp)
|
|
}
|
|
}
|
|
}
|
|
out <- out[order(out$date.start, out$fileId.start), ]
|
|
rownames(out) <- NULL
|
|
|
|
# Make trace a consecutive number
|
|
out$trace <- as.numeric(factor(out$trace, levels = unique(out$trace)))
|
|
out
|
|
}
|
|
# TODO: Get rid of the loops
|
|
# --> This takes forever...
|
|
|
|
###########################################################################
|
|
|
|
# Create data frame with file names and topics for each artwork
|
|
|
|
extract_topics <- function(artworks, pattern, path) {
|
|
|
|
dat <- NULL
|
|
file_order <- NULL
|
|
i <- 1
|
|
|
|
for (artwork in artworks) {
|
|
|
|
if (length(pattern) == 1) {
|
|
index_file <- pattern
|
|
} else {
|
|
index_file <- pattern[i]
|
|
}
|
|
|
|
fnames <- dir(pattern = paste0(artwork, "_"),
|
|
path = paste(path, artwork, sep = "/"))
|
|
topic <- NULL
|
|
for (fname in fnames) {
|
|
suppressWarnings(
|
|
topic <- c(topic, gsub("^<card type=.(.*).>$", "\\1",
|
|
grep("^<card type=",
|
|
trimws(readLines(paste(path, artwork, fname, sep = "/"))),
|
|
value = T)))
|
|
)
|
|
|
|
}
|
|
index <- paste(path, artwork, index_file, sep = "/")
|
|
suppressWarnings(
|
|
file_order <- c(file_order, gsub("^<card src=.*/(.*)./>$", "\\1",
|
|
grep("^<card src=", trimws(readLines(index)),
|
|
value = TRUE)))
|
|
)
|
|
in_index <- fnames %in% file_order
|
|
dat <- rbind(dat, data.frame(artwork, file_name = fnames, in_index, topic))
|
|
i <- i + 1
|
|
}
|
|
|
|
# take only the ones that are actually displayed and sort in the same order
|
|
# as indicated in index.html
|
|
out <- dat[dat$in_index, -3]
|
|
out <- out[order(file_order, out$file_name), ]
|
|
rownames(out) <- NULL
|
|
|
|
out$index <- unlist(sapply(table(out$artwork), seq_len))
|
|
out
|
|
}
|
|
|
|
###########################################################################
|
|
|
|
# Add topics: file names and topics
|
|
|
|
add_topic <- function(data, topics) {
|
|
|
|
tab_art <- lapply(artworks,
|
|
function(x) names(table(data$topicNumber[data$artwork == x])))
|
|
names(tab_art) <- artworks
|
|
|
|
tab_index <- lapply(tab_art, seq_along)
|
|
|
|
dat_split <- split(data, data$artwork)
|
|
|
|
set_label <- function(x) {
|
|
art <- unique(x$artwork)
|
|
x$topicIndex <- factor(x$topicNumber, labels = tab_index[[art]])
|
|
x
|
|
}
|
|
|
|
dat_label <- lapply(dat_split, set_label)
|
|
|
|
set_topic <- function(x) {
|
|
art <- unique(x$artwork)
|
|
labels_file <- topics[topics$artwork == art,
|
|
"file_name"][as.numeric(levels(x$topicIndex))]
|
|
x$topicFile <- as.character(factor(x$topicIndex, labels = labels_file))
|
|
labels_topic <- topics[topics$artwork == art,
|
|
"topic"][as.numeric(levels(x$topicIndex))]
|
|
x$topic <- as.character(factor(x$topicIndex, labels = labels_topic))
|
|
x
|
|
}
|
|
|
|
dat_topic <- lapply(dat_label, set_topic)
|
|
|
|
# table(dat_topic[["501"]]$topicNumber)
|
|
# table(dat_topic[["501"]]$topicIndex)
|
|
# table(dat_topic[["501"]]$topicFile)
|
|
# table(dat_topic[["501"]]$topic)
|
|
|
|
out <- do.call(rbind, dat_topic)
|
|
out <- out[order(out$date.start, out$fileId.start), ]
|
|
rownames(out) <- NULL
|
|
out
|
|
}
|
|
|
|
|