Code now also works for 8o8m; adjusted handling of glossar folder, added folder variable, etc.
This commit is contained in:
parent
7f6e967f7c
commit
85d3cfc2fa
@ -98,7 +98,9 @@ add_trace <- function(data, glossar_dict) {
|
|||||||
|
|
||||||
subdata2 <- add_trace_artworks(subdata2)
|
subdata2 <- add_trace_artworks(subdata2)
|
||||||
|
|
||||||
subdata2 <- add_trace_glossar(subdata2, glossar_dict)
|
if ("glossar" %in% unique(subdata2$artwork)) {
|
||||||
|
subdata2 <- add_trace_glossar(subdata2, glossar_dict)
|
||||||
|
}
|
||||||
|
|
||||||
out <- rbind(subdata1, subdata2)
|
out <- rbind(subdata1, subdata2)
|
||||||
out <- out[order(out$fileId, out$date, out$timeMs), ]
|
out <- out[order(out$fileId, out$date, out$timeMs), ]
|
||||||
|
@ -7,29 +7,30 @@ close_events <- function(data, event = c("move", "flipCard", "openTopic", "openP
|
|||||||
switch(event,
|
switch(event,
|
||||||
"move" = {
|
"move" = {
|
||||||
actions <- c("Transform start", "Transform stop")
|
actions <- c("Transform start", "Transform stop")
|
||||||
idvar <- c("fileId", "eventId", "artwork", "glossar")
|
idvar <- c("fileId", "folder", "eventId", "artwork", "glossar")
|
||||||
drop <- c("popup", "topicNumber", "trace", "event")
|
drop <- c("popup", "topicNumber", "trace", "event")
|
||||||
ncol <- 16
|
ncol <- 16
|
||||||
|
|
||||||
},
|
},
|
||||||
"flipCard" = {
|
"flipCard" = {
|
||||||
actions <- c("Show Info", "Show Front")
|
actions <- c("Show Info", "Show Front")
|
||||||
idvar <- c("fileId", "trace", "artwork", "glossar")
|
idvar <- c("fileId", "folder", "trace", "artwork", "glossar")
|
||||||
drop <- c("popup", "topicNumber", "eventId", "event")
|
drop <- c("popup", "topicNumber", "eventId", "event")
|
||||||
ncol <- 16
|
ncol <- 16
|
||||||
|
|
||||||
},
|
},
|
||||||
"openTopic" = {
|
"openTopic" = {
|
||||||
actions <- c("Artwork/OpenCard", "Artwork/CloseCard")
|
actions <- c("Artwork/OpenCard", "Artwork/CloseCard")
|
||||||
idvar <- c("fileId", "eventId", "trace", "glossar", "artwork",
|
idvar <- c("fileId", "folder", "eventId", "trace", "glossar",
|
||||||
"topicNumber")
|
"artwork", "topicNumber")
|
||||||
drop <- c("popup", "event")
|
drop <- c("popup", "event")
|
||||||
ncol <- 18
|
ncol <- 18
|
||||||
|
|
||||||
},
|
},
|
||||||
"openPopup" = {
|
"openPopup" = {
|
||||||
actions <- c("ShowPopup", "HidePopup")
|
actions <- c("ShowPopup", "HidePopup")
|
||||||
idvar <- c("fileId", "eventId", "trace", "glossar", "artwork", "popup")
|
idvar <- c("fileId", "folder", "eventId", "trace", "glossar",
|
||||||
|
"artwork", "popup")
|
||||||
drop <- c("topicNumber", "event")
|
drop <- c("topicNumber", "event")
|
||||||
ncol <- 18
|
ncol <- 18
|
||||||
# TODO: Should topicNumber maybe also be filled in for "openPopup"?
|
# TODO: Should topicNumber maybe also be filled in for "openPopup"?
|
||||||
@ -101,13 +102,13 @@ close_events <- function(data, event = c("move", "flipCard", "openTopic", "openP
|
|||||||
data_wide <- move_wide
|
data_wide <- move_wide
|
||||||
}
|
}
|
||||||
|
|
||||||
out <- data_wide[, c("fileId", "event", "artwork", "trace", "glossar",
|
out <- data_wide[, c("fileId", "folder", "event", "artwork", "trace",
|
||||||
"date.start", "date.stop", "timeMs.start",
|
"glossar", "date.start", "date.stop",
|
||||||
"timeMs.stop", "duration", "topicNumber", "popup",
|
"timeMs.start", "timeMs.stop", "duration",
|
||||||
"x.start", "y.start", "x.stop", "y.stop",
|
"topicNumber", "popup", "x.start", "y.start",
|
||||||
"distance", "scale.start", "scale.stop",
|
"x.stop", "y.stop", "distance", "scale.start",
|
||||||
"scaleSize", "rotation.start", "rotation.stop",
|
"scale.stop", "scaleSize", "rotation.start",
|
||||||
"rotationDegree")]
|
"rotation.stop", "rotationDegree")]
|
||||||
rownames(out) <- NULL
|
rownames(out) <- NULL
|
||||||
out
|
out
|
||||||
}
|
}
|
||||||
|
@ -5,12 +5,14 @@
|
|||||||
#'
|
#'
|
||||||
#' @param data Data frame of raw log files created with `parse_logfiles()`.
|
#' @param data Data frame of raw log files created with `parse_logfiles()`.
|
||||||
#' See `?parse_logfiles` for more details.
|
#' See `?parse_logfiles` for more details.
|
||||||
|
#' @param xmlfiles Vector of names of index files, often something like
|
||||||
|
#' `<artwork>.xml`.
|
||||||
#' @param xmlpath Path to folder where XML definitions of artworks live.
|
#' @param xmlpath Path to folder where XML definitions of artworks live.
|
||||||
#' @return Data frame.
|
#' @return Data frame.
|
||||||
#' @export
|
#' @export
|
||||||
#' @examples
|
#' @examples
|
||||||
#' # tbd
|
#' # tbd
|
||||||
create_eventlogs <- function(data, xmlpath) {
|
create_eventlogs <- function(data, xmlfiles, xmlpath) {
|
||||||
|
|
||||||
if (!lubridate::is.POSIXt(data$date)){
|
if (!lubridate::is.POSIXt(data$date)){
|
||||||
cat("########## Convertion variable `date` to POSIXct ##########", "\n")
|
cat("########## Convertion variable `date` to POSIXct ##########", "\n")
|
||||||
@ -25,16 +27,21 @@ create_eventlogs <- function(data, xmlpath) {
|
|||||||
# Create glossar dictionary ##############################################
|
# Create glossar dictionary ##############################################
|
||||||
cat("\n########## Creating glossar dictionary ##########", "\n")
|
cat("\n########## Creating glossar dictionary ##########", "\n")
|
||||||
artworks <- unique(stats::na.omit(dat$artwork))
|
artworks <- unique(stats::na.omit(dat$artwork))
|
||||||
artworks <- artworks[artworks != "glossar"]
|
|
||||||
glossar_files <- unique(subset(dat, dat$artwork == "glossar")$popup)
|
if ("glossar" %in% artworks) {
|
||||||
glossar_dict <- create_glossardict(artworks, glossar_files, path = xmlpath)
|
artworks <- artworks[artworks != "glossar"]
|
||||||
|
glossar_files <- unique(subset(dat, dat$artwork == "glossar")$popup)
|
||||||
|
glossar_dict <- create_glossardict(artworks, glossar_files, xmlpath = xmlpath)
|
||||||
|
} else {
|
||||||
|
glossar_dict <- NULL
|
||||||
|
}
|
||||||
|
|
||||||
# Add trace variable #####################################################
|
# Add trace variable #####################################################
|
||||||
cat("\n########## Adding trace variable... ##########", "\n")
|
cat("\n########## Adding trace variable... ##########", "\n")
|
||||||
dat1 <- add_trace(dat, glossar_dict)
|
dat1 <- add_trace(dat, glossar_dict)
|
||||||
|
|
||||||
# Close events
|
# Close events
|
||||||
cat("\n########## Closing events... ##########", "\n")
|
cat("\n\n########## Closing events... ##########", "\n")
|
||||||
c1 <- close_events(dat1, "move")
|
c1 <- close_events(dat1, "move")
|
||||||
cat("## --> move events closed.", "\n")
|
cat("## --> move events closed.", "\n")
|
||||||
c2 <- close_events(dat1, "flipCard")
|
c2 <- close_events(dat1, "flipCard")
|
||||||
@ -63,7 +70,7 @@ create_eventlogs <- function(data, xmlpath) {
|
|||||||
|
|
||||||
# Add event ID ###########################################################
|
# Add event ID ###########################################################
|
||||||
dat3$eventId <- seq_len(nrow(dat3))
|
dat3$eventId <- seq_len(nrow(dat3))
|
||||||
dat3 <- dat3[, c("fileId", "eventId", "case",
|
dat3 <- dat3[, c("fileId", "folder", "eventId", "case",
|
||||||
"trace", "glossar", "event", "artwork",
|
"trace", "glossar", "event", "artwork",
|
||||||
"date.start", "date.stop", "timeMs.start",
|
"date.start", "date.stop", "timeMs.start",
|
||||||
"timeMs.stop", "duration", "topicNumber", "popup",
|
"timeMs.stop", "duration", "topicNumber", "popup",
|
||||||
@ -78,13 +85,13 @@ create_eventlogs <- function(data, xmlpath) {
|
|||||||
|
|
||||||
# Add topics: file names and topics ######################################
|
# Add topics: file names and topics ######################################
|
||||||
cat("\n########## Adding information about topics... ##########", "\n\n")
|
cat("\n########## Adding information about topics... ##########", "\n\n")
|
||||||
artworks <- unique(dat4$artwork)
|
|
||||||
# remove artworks without XML information
|
# remove artworks without XML information
|
||||||
artworks <- artworks[!artworks %in% c("504", "505")]
|
#artworks <- artworks[!artworks %in% c("504", "505")]
|
||||||
topics <- extract_topics(artworks, pattern = paste0(artworks, ".xml"),
|
# TODO: This is hardcoded! Remove it!
|
||||||
path = xmlpath)
|
topics <- extract_topics(artworks, xmlfiles = xmlfiles, xmlpath = xmlpath)
|
||||||
|
|
||||||
dat5 <- add_topic(dat4, topics = topics)
|
dat5 <- add_topic(dat4, topics = topics)
|
||||||
dat5
|
dat5
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
@ -1,13 +1,13 @@
|
|||||||
create_glossardict <- function(artworks, glossar_files, path) {
|
create_glossardict <- function(artworks, glossar_files, xmlpath) {
|
||||||
|
|
||||||
x <- NULL
|
x <- NULL
|
||||||
|
|
||||||
for (glossar_file in glossar_files) {
|
for (glossar_file in glossar_files) {
|
||||||
for (artwork in artworks) {
|
for (artwork in artworks) {
|
||||||
fnames <- dir(pattern = paste0(artwork, "_"), path = paste0(path, artwork))
|
fnames <- dir(pattern = paste0(artwork, "_"), path = paste0(xmlpath, artwork))
|
||||||
for (fname in fnames) {
|
for (fname in fnames) {
|
||||||
suppressWarnings(
|
suppressWarnings(
|
||||||
lines <- readLines(paste0(path, artwork, "/", fname))
|
lines <- readLines(paste0(xmlpath, artwork, "/", fname))
|
||||||
)
|
)
|
||||||
if (any(grepl(glossar_file, lines))) {
|
if (any(grepl(glossar_file, lines))) {
|
||||||
x <- rbind(x, data.frame(glossar_file, artwork))
|
x <- rbind(x, data.frame(glossar_file, artwork))
|
||||||
|
42
R/helper.R
42
R/helper.R
@ -93,50 +93,37 @@ add_trace_subdata <- function(subdata, max_trace) {
|
|||||||
|
|
||||||
# Create data frame with file names and topics for each artwork
|
# Create data frame with file names and topics for each artwork
|
||||||
|
|
||||||
extract_topics <- function(artworks, pattern, path) {
|
extract_topics <- function(artworks, xmlfiles, xmlpath) {
|
||||||
|
|
||||||
dat <- NULL
|
out <- NULL
|
||||||
file_order <- NULL
|
|
||||||
i <- 1
|
i <- 1
|
||||||
|
|
||||||
for (artwork in artworks) {
|
for (artwork in artworks) {
|
||||||
|
|
||||||
if (length(pattern) == 1) {
|
index_file <- paste0(xmlpath, artwork, "/", xmlfiles[i])
|
||||||
index_file <- pattern
|
suppressWarnings(
|
||||||
} else {
|
fnames <- gsub("^<card src=.*/(.*)./>$", "\\1",
|
||||||
index_file <- pattern[i]
|
grep("^<card src=", trimws(readLines(index_file)),
|
||||||
}
|
value = TRUE))
|
||||||
|
)
|
||||||
|
|
||||||
fnames <- dir(pattern = paste0(artwork, "_"),
|
|
||||||
path = paste(path, artwork, sep = "/"))
|
|
||||||
topic <- NULL
|
topic <- NULL
|
||||||
for (fname in fnames) {
|
for (fname in fnames) {
|
||||||
suppressWarnings(
|
suppressWarnings(
|
||||||
topic <- c(topic, gsub("^<card type=.(.*).>$", "\\1",
|
topic <- c(topic, gsub("^<card type=.(.*).>$", "\\1",
|
||||||
grep("^<card type=",
|
grep("^<card type=",
|
||||||
trimws(readLines(paste(path, artwork, fname, sep = "/"))),
|
trimws(readLines(paste(xmlpath, artwork, fname, sep = "/"))),
|
||||||
value = T)))
|
value = T)))
|
||||||
)
|
)
|
||||||
|
|
||||||
}
|
}
|
||||||
index <- paste(path, artwork, index_file, sep = "/")
|
out <- rbind(out, data.frame(artwork, file_name = fnames, topic))
|
||||||
suppressWarnings(
|
|
||||||
file_order <- c(file_order, gsub("^<card src=.*/(.*)./>$", "\\1",
|
|
||||||
grep("^<card src=", trimws(readLines(index)),
|
|
||||||
value = TRUE)))
|
|
||||||
)
|
|
||||||
in_index <- fnames %in% file_order
|
|
||||||
dat <- rbind(dat, data.frame(artwork, file_name = fnames, in_index, topic))
|
|
||||||
i <- i + 1
|
i <- i + 1
|
||||||
}
|
}
|
||||||
|
|
||||||
# take only the ones that are actually displayed and sort in the same order
|
out <- out[order(out$artwork), ]
|
||||||
# as indicated in index.html
|
|
||||||
out <- dat[dat$in_index, -3]
|
|
||||||
out <- out[order(file_order, out$file_name), ]
|
|
||||||
rownames(out) <- NULL
|
rownames(out) <- NULL
|
||||||
|
|
||||||
out$index <- unlist(sapply(table(out$artwork), seq_len))
|
out$index <- unlist(lapply(table(out$artwork), seq_len))
|
||||||
out
|
out
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -188,8 +175,7 @@ add_topic <- function(data, topics) {
|
|||||||
|
|
||||||
# Create data frame with information on artworks
|
# Create data frame with information on artworks
|
||||||
|
|
||||||
extract_artworks <- function(artworks, files = paste0(artworks, ".xml"),
|
extract_artworks <- function(artworks, files, xmlpath) {
|
||||||
path = path) {
|
|
||||||
out <- NULL
|
out <- NULL
|
||||||
i <- 1
|
i <- 1
|
||||||
|
|
||||||
@ -201,7 +187,7 @@ extract_artworks <- function(artworks, files = paste0(artworks, ".xml"),
|
|||||||
index_file <- files[i]
|
index_file <- files[i]
|
||||||
}
|
}
|
||||||
|
|
||||||
index <- paste(path, artwork, index_file, sep = "/")
|
index <- paste(xmlpath, artwork, index_file, sep = "/")
|
||||||
varnames <- c("artist", "title", "misc", "description")
|
varnames <- c("artist", "title", "misc", "description")
|
||||||
xmllist <- XML::xmlToList(index)$header[varnames]
|
xmllist <- XML::xmlToList(index)$header[varnames]
|
||||||
|
|
||||||
|
@ -1,3 +1,122 @@
|
|||||||
|
#' Creating data frame from raw log files.
|
||||||
|
#'
|
||||||
|
#' Creates a data frame or CSV file from raw log files from a
|
||||||
|
#' Multi-Touch-Table at the IWM.
|
||||||
|
#'
|
||||||
|
#' @param folders A character vector of folder names that contain the raw
|
||||||
|
#' log files from the Multi-Touch-Table at the IWM.
|
||||||
|
#' @param path A path to the folder that contains the folders specified in
|
||||||
|
#' first argument. Needs to end in a "/"!
|
||||||
|
# TODO: How to catch this?
|
||||||
|
#' @param file Name of the file where parsed log files should be saved.
|
||||||
|
#' Default is "rawdata_logfiles.csv".
|
||||||
|
#' @param save Logical. If data frame should be returned by the function or
|
||||||
|
#' saved. Default is TRUE.
|
||||||
|
#' @return A data frame or NULL.
|
||||||
|
#' @export
|
||||||
|
#' @examples
|
||||||
|
#' # parse_logfiles("all", path = "../data/haum/haum_logs_2016-2023/")
|
||||||
|
parse_logfiles <- function(folders, path, file = "rawdata_logfiles.csv",
|
||||||
|
save = FALSE) {
|
||||||
|
|
||||||
|
pbapply::pboptions(style = 3, char = "=")
|
||||||
|
|
||||||
|
# TODO: This is not very intutitive
|
||||||
|
fnames <- dir(paste0(path, folders), pattern = "*.log", full.names = TRUE)
|
||||||
|
|
||||||
|
cat(paste0("\n########## Reading ", length(fnames), " log files... ##########"), "\n\n")
|
||||||
|
suppressWarnings(
|
||||||
|
logs <- pbapply::pblapply(fnames, readLines)
|
||||||
|
)
|
||||||
|
nlog <- sapply(logs, length)
|
||||||
|
dat <- data.frame(fileId = rep(leftpad_fnames(fnames), nlog),
|
||||||
|
logs = unlist(logs))
|
||||||
|
dat$folder <- rep(sapply(strsplit(fnames, "/"),
|
||||||
|
function(x) utils::tail(x, 2)[1]), nlog)
|
||||||
|
|
||||||
|
# Remove corrupt lines
|
||||||
|
d1 <- nrow(dat)
|
||||||
|
dat <- subset(dat, dat$logs != "")
|
||||||
|
d2 <- nrow(dat)
|
||||||
|
|
||||||
|
if(d1 > d2) {
|
||||||
|
warning(paste0(d1-d2, " corrupt lines have been found and removed from the data set\n"))
|
||||||
|
}
|
||||||
|
|
||||||
|
# Extract relevant infos
|
||||||
|
cat("\n########## Parsing individual data parts... ##########", "\n")
|
||||||
|
cat("\n Extract dates...", "\n\n")
|
||||||
|
date <- pbapply::pbsapply(dat$logs, gsub,
|
||||||
|
pattern = "^\\[(.*)\\], \\[.*$",
|
||||||
|
replacement = "\\1",
|
||||||
|
USE.NAMES = FALSE)
|
||||||
|
|
||||||
|
cat("\n Extract timestamps...", "\n\n")
|
||||||
|
timestamp <- pbapply::pbsapply(dat$logs, gsub,
|
||||||
|
pattern = "^\\[.*\\], \\[(.*)\\].*$",
|
||||||
|
replacement = "\\1",
|
||||||
|
USE.NAMES = FALSE)
|
||||||
|
|
||||||
|
cat("\n Extract events...", "\n\n")
|
||||||
|
action <- pbapply::pbsapply(dat$logs, gsub,
|
||||||
|
pattern = "^.*EyeVisit, (.*):*.*$",
|
||||||
|
replacement = "\\1",
|
||||||
|
USE.NAMES = FALSE)
|
||||||
|
|
||||||
|
cat("\n Parse separate events...", "\n\n")
|
||||||
|
events <- pbapply::pbsapply(strsplit(action, ":"), function(x) x[1])
|
||||||
|
|
||||||
|
cat("\n Extract topics...", "\n\n")
|
||||||
|
topics <- pbapply::pbsapply(strsplit(action, ":"), function(x) x[2])
|
||||||
|
|
||||||
|
cat("\n Extract move information...", "\n\n")
|
||||||
|
suppressWarnings(
|
||||||
|
moves <- pbapply::pbapply(do.call(rbind,
|
||||||
|
strsplit(sapply(strsplit(action, ":"),
|
||||||
|
function(x) x[3]), ",")), 2,
|
||||||
|
as.numeric)
|
||||||
|
)
|
||||||
|
# ATTENTION: as.numeric() forces NAs for "OpenCard" and "CloseCard"
|
||||||
|
|
||||||
|
cat("\n Extract popups...", "\n\n")
|
||||||
|
card_action <- trimws(pbapply::pbsapply(strsplit(action, ":"),
|
||||||
|
function(x) x[3])[grep("Artwork", events)])
|
||||||
|
|
||||||
|
card <- as.numeric(pbapply::pbsapply(strsplit(action, ":"),
|
||||||
|
function(x) x[4]))
|
||||||
|
|
||||||
|
events[grep("Artwork", events)] <- paste("Artwork", card_action, sep = "/")
|
||||||
|
|
||||||
|
cat("\n Transform timestamps to ms...", "\n\n")
|
||||||
|
ts_elements <- strsplit(timestamp, ":")
|
||||||
|
time_ms <- as.numeric(sapply(ts_elements, function(x) x[4])) +
|
||||||
|
as.numeric(sapply(ts_elements, function(x) x[3])) * 1000 +
|
||||||
|
as.numeric(sapply(ts_elements, function(x) x[2])) * 1000 * 60
|
||||||
|
|
||||||
|
dat$date <- lubridate::parse_date_time(date, "bdyHMSOp")
|
||||||
|
dat$timeMs <- time_ms
|
||||||
|
dat$event <- events
|
||||||
|
dat$artwork <- trimws(sapply(strsplit(topics, "/"), function(x) x[1]))
|
||||||
|
dat$popup <- sapply(strsplit(topics, "/"), function(x) x[2])
|
||||||
|
dat$topicNumber <- card
|
||||||
|
dat$x <- moves[,1]
|
||||||
|
dat$y <- moves[,2]
|
||||||
|
dat$scale <- moves[,3]
|
||||||
|
dat$rotation <- moves[,4]
|
||||||
|
dat$logs <- NULL
|
||||||
|
|
||||||
|
dat <- dat[order(dat$fileId, dat$date, dat$timeMs), ]
|
||||||
|
|
||||||
|
# Export data
|
||||||
|
if (save) {
|
||||||
|
cat("Saving data...", "\n\n")
|
||||||
|
utils::write.table(dat, file = file, sep = ";", row.names = FALSE)
|
||||||
|
cat(paste0("INFORMATION: Data file ", file, " has been written to ", getwd(), "\n\n"))
|
||||||
|
} else {
|
||||||
|
return(dat)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
#' Left padding file names of raw log files from Multi-Touch-Table at the
|
#' Left padding file names of raw log files from Multi-Touch-Table at the
|
||||||
#' IWM.
|
#' IWM.
|
||||||
#'
|
#'
|
||||||
@ -8,7 +127,7 @@
|
|||||||
#' @param fnames File name in the form of `yyyy_mm_dd-hh_mm_ss`, possible
|
#' @param fnames File name in the form of `yyyy_mm_dd-hh_mm_ss`, possible
|
||||||
#' with missing zero left padding.
|
#' with missing zero left padding.
|
||||||
#' @return Left padded file names.
|
#' @return Left padded file names.
|
||||||
#' @examples
|
#' @examples
|
||||||
#' # folders <- "all"
|
#' # folders <- "all"
|
||||||
#' # dirpaths <- paste0("../data/haum_logs_2016-2023/", folders)
|
#' # dirpaths <- paste0("../data/haum_logs_2016-2023/", folders)
|
||||||
#' # fnames <- dir(dirpaths, pattern = "*.log", full.names = TRUE)
|
#' # fnames <- dir(dirpaths, pattern = "*.log", full.names = TRUE)
|
||||||
@ -36,118 +155,3 @@ leftpad_fnames <- function(fnames) {
|
|||||||
res
|
res
|
||||||
}
|
}
|
||||||
|
|
||||||
#' Creating data frame from raw log files.
|
|
||||||
#'
|
|
||||||
#' Creates a data frame or CSV file from raw log files from a
|
|
||||||
#' Multi-Touch-Table at the IWM.
|
|
||||||
#'
|
|
||||||
#' @param folders A character vector of folder names that contain the raw
|
|
||||||
#' log files from the Multi-Touch-Table at the IWM.
|
|
||||||
#' @param path A path to the folder that contains the folders specified in
|
|
||||||
#' first argument. Needs to end in a "/"!
|
|
||||||
# TODO: How to catch this?
|
|
||||||
#' @param file Name of the file where parsed log files should be saved.
|
|
||||||
#' Default is "rawdata_logfiles.csv".
|
|
||||||
#' @param save Logical. If data frame should be returned by the function or
|
|
||||||
#' saved. Default is TRUE.
|
|
||||||
#' @return A data frame or NULL.
|
|
||||||
#' @export
|
|
||||||
#' @examples
|
|
||||||
#' # parse_logfiles("all", path = "../data/haum/haum_logs_2016-2023/")
|
|
||||||
parse_logfiles <- function(folders, path, file = "rawdata_logfiles.csv",
|
|
||||||
save = TRUE) {
|
|
||||||
dirpaths <- paste0(path, folders)
|
|
||||||
# TODO: This is not very intutitive
|
|
||||||
fnames <- dir(dirpaths, pattern = "*.log", full.names = TRUE)
|
|
||||||
|
|
||||||
cat(paste0("\n########## Reading ", length(fnames), " log files... ##########"), "\n\n")
|
|
||||||
suppressWarnings(
|
|
||||||
logs <- pbapply::pblapply(fnames, readLines)
|
|
||||||
)
|
|
||||||
nlog <- sapply(logs, length)
|
|
||||||
dat <- data.frame(fileId = rep(leftpad_fnames(fnames), nlog),
|
|
||||||
logs = unlist(logs))
|
|
||||||
|
|
||||||
# Remove corrupt lines
|
|
||||||
d1 <- nrow(dat)
|
|
||||||
dat <- subset(dat, dat$logs != "")
|
|
||||||
d2 <- nrow(dat)
|
|
||||||
|
|
||||||
if(d1 > d2) {
|
|
||||||
warning(paste0(d1-d2, " corrupt lines have been found and removed from the data set\n"))
|
|
||||||
}
|
|
||||||
|
|
||||||
# Extract relevant infos
|
|
||||||
cat("\n########## Parsing individual data parts... ##########", "\n")
|
|
||||||
cat("\n Extract dates...", "\n\n")
|
|
||||||
date <- pbapply::pbsapply(dat$logs, gsub,
|
|
||||||
pattern = "^\\[(.*)\\], \\[.*$",
|
|
||||||
replacement = "\\1",
|
|
||||||
USE.NAMES = FALSE)
|
|
||||||
|
|
||||||
cat("\n Extract timestamps...", "\n\n")
|
|
||||||
timestamp <- pbapply::pbsapply(dat$logs, gsub,
|
|
||||||
pattern = "^\\[.*\\], \\[(.*)\\].*$",
|
|
||||||
replacement = "\\1",
|
|
||||||
USE.NAMES = FALSE)
|
|
||||||
|
|
||||||
cat("\n Extract events...", "\n\n")
|
|
||||||
action <- pbapply::pbsapply(dat$logs, gsub,
|
|
||||||
pattern = "^.*EyeVisit, (.*):*.*$",
|
|
||||||
replacement = "\\1",
|
|
||||||
USE.NAMES = FALSE)
|
|
||||||
|
|
||||||
cat("\n Parse separate events...", "\n\n")
|
|
||||||
events <- pbapply::pbsapply(strsplit(action, ":"), function(x) x[1])
|
|
||||||
|
|
||||||
cat("\n Extract topics...", "\n\n")
|
|
||||||
topics <- pbapply::pbsapply(strsplit(action, ":"), function(x) x[2])
|
|
||||||
|
|
||||||
cat("\n Extract move information...", "\n\n")
|
|
||||||
suppressWarnings(
|
|
||||||
moves <- pbapply::pbapply(do.call(rbind,
|
|
||||||
strsplit(sapply(strsplit(action, ":"),
|
|
||||||
function(x) x[3]), ",")), 2,
|
|
||||||
as.numeric)
|
|
||||||
)
|
|
||||||
# ATTENTION: as.numeric() forces NAs for "OpenCard" and "CloseCard"
|
|
||||||
|
|
||||||
cat("\n Extract popups...", "\n\n")
|
|
||||||
card_action <- trimws(pbapply::pbsapply(strsplit(action, ":"),
|
|
||||||
function(x) x[3])[grep("Artwork", events)])
|
|
||||||
|
|
||||||
card <- as.numeric(pbapply::pbsapply(strsplit(action, ":"),
|
|
||||||
function(x) x[4]))
|
|
||||||
|
|
||||||
events[grep("Artwork", events)] <- paste("Artwork", card_action, sep = "/")
|
|
||||||
|
|
||||||
cat("\n Transform timestamps to ms...", "\n\n")
|
|
||||||
ts_elements <- strsplit(timestamp, ":")
|
|
||||||
time_ms <- as.numeric(sapply(ts_elements, function(x) x[4])) +
|
|
||||||
as.numeric(sapply(ts_elements, function(x) x[3])) * 1000 +
|
|
||||||
as.numeric(sapply(ts_elements, function(x) x[2])) * 1000 * 60
|
|
||||||
|
|
||||||
dat$date <- lubridate::parse_date_time(date, "bdyHMSOp")
|
|
||||||
dat$timeMs <- time_ms
|
|
||||||
dat$event <- events
|
|
||||||
dat$artwork <- trimws(sapply(strsplit(topics, "/"), function(x) x[1]))
|
|
||||||
dat$popup <- sapply(strsplit(topics, "/"), function(x) x[2])
|
|
||||||
dat$topicNumber <- card
|
|
||||||
dat$x <- moves[,1]
|
|
||||||
dat$y <- moves[,2]
|
|
||||||
dat$scale <- moves[,3]
|
|
||||||
dat$rotation <- moves[,4]
|
|
||||||
dat$logs <- NULL
|
|
||||||
|
|
||||||
dat <- dat[order(dat$fileId, dat$date, dat$timeMs), ]
|
|
||||||
|
|
||||||
# Export data
|
|
||||||
if (save) {
|
|
||||||
cat("Saving data...", "\n\n")
|
|
||||||
utils::write.table(dat, file = file, sep = ";", row.names = FALSE)
|
|
||||||
cat(paste0("INFORMATION: Data file ", file, " has been written to ", getwd(), "\n\n"))
|
|
||||||
} else {
|
|
||||||
return(dat)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
|
@ -4,12 +4,15 @@
|
|||||||
\alias{create_eventlogs}
|
\alias{create_eventlogs}
|
||||||
\title{Creating log events from raw log files.}
|
\title{Creating log events from raw log files.}
|
||||||
\usage{
|
\usage{
|
||||||
create_eventlogs(data, xmlpath)
|
create_eventlogs(data, xmlfiles, xmlpath)
|
||||||
}
|
}
|
||||||
\arguments{
|
\arguments{
|
||||||
\item{data}{Data frame of raw log files created with \code{parse_logfiles()}.
|
\item{data}{Data frame of raw log files created with \code{parse_logfiles()}.
|
||||||
See \code{?parse_logfiles} for more details.}
|
See \code{?parse_logfiles} for more details.}
|
||||||
|
|
||||||
|
\item{xmlfiles}{Vector of names of index files, often something like
|
||||||
|
\verb{<artwork>.xml}.}
|
||||||
|
|
||||||
\item{xmlpath}{Path to folder where XML definitions of artworks live.}
|
\item{xmlpath}{Path to folder where XML definitions of artworks live.}
|
||||||
}
|
}
|
||||||
\value{
|
\value{
|
||||||
|
@ -4,7 +4,7 @@
|
|||||||
\alias{parse_logfiles}
|
\alias{parse_logfiles}
|
||||||
\title{Creating data frame from raw log files.}
|
\title{Creating data frame from raw log files.}
|
||||||
\usage{
|
\usage{
|
||||||
parse_logfiles(folders, path, file = "rawdata_logfiles.csv", save = TRUE)
|
parse_logfiles(folders, path, file = "rawdata_logfiles.csv", save = FALSE)
|
||||||
}
|
}
|
||||||
\arguments{
|
\arguments{
|
||||||
\item{folders}{A character vector of folder names that contain the raw
|
\item{folders}{A character vector of folder names that contain the raw
|
||||||
|
6
tests/tests.R
Normal file
6
tests/tests.R
Normal file
@ -0,0 +1,6 @@
|
|||||||
|
# Check if traces start with `move` or `flipCard`
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
Loading…
Reference in New Issue
Block a user