Moved most stuff into package folder mtt; updated README so it works with new code

2023-09-21 16:45:06 +02:00
parent 55adcf03d7
commit 3dd13a6c6e
9 changed files with 51 additions and 666 deletions
@@ -1,125 +0,0 @@
-# setwd("C:/Users/nwickelmaier/Nextcloud/Documents/MDS/2023ss/60100_master_thesis/code")
-
-###### HELPER ######
-
-# Need to left pad file names. If I do not do this, the sorting of the
-# timestamps will be off and I get negative durations later on since the
-# wrong events get closed.
-
-
-leftpad_fnames <- function(x) {
-
-  z <- gsub(paste0(dirpaths, "/"), "\\1", x)
-  ys <- strsplit(z, "_")
-
-  res <- NULL
-
-  for (y in ys) {
-    y2 <- unlist(strsplit(y[3], "-"))
-    e1 <- y[1]
-    e2 <- sprintf("%02d", as.numeric(y[2]))
-    e3 <- sprintf("%02d", as.numeric(y2[1]))
-    e4 <- sprintf("%02d", as.numeric(y2[2]))
-    e5 <- sprintf("%02d", as.numeric(y[4]))
-    e6 <- sprintf("%02d", as.numeric(gsub(".log", "", y[5])))
-    e6 <- sprintf("%02d", as.numeric(gsub(".log", "", y[5])))
-
-    res <- c(res,
-             paste0(e1, "_", e2, "_", e3, "-", e4, "_", e5, "_", e6, ".log"))
-  }
-  res
-}
-
-##### CONTENT ######
-
-# Choose which folders with raw log files should be included
-
-folders <- "all"
-#folders <- "_2016b"
-
-dirpaths <- paste0("../data/haum_logs_2016-2023/", folders)
-
-fnames <- dir(dirpaths, pattern = "*.log", full.names = TRUE)
-length(fnames)
-head(fnames)
-
-logs <- lapply(fnames, readLines)
-nlog <- sapply(logs, length)
-dat <- data.frame(fileId = rep(leftpad_fnames(fnames), nlog),
-                  logs = unlist(logs))
-head(dat$logs)
-
-# Remove corrupted lines
-
-
-# corrupt lines are "" and need to be removed
-d1 <- dim(dat)[1]
-dat <- subset(dat, dat$logs != "")
-d2 <- dim(dat)[1]
-
-# TODO: Catch this in a function and give back a meaningful warning
-# The files contain `r d1-d2` corrupt lines that were removed from the
-# data.
-
-# Extract relevant infos
-
-date <- sapply(dat$logs, gsub,
-               pattern = "^\\[(.*)\\], \\[.*$",
-               replacement = "\\1",
-               USE.NAMES = FALSE)
-
-timestamp <- sapply(dat$logs, gsub,
-                    pattern = "^\\[.*\\], \\[(.*)\\].*$",
-                    replacement = "\\1",
-                    USE.NAMES = FALSE)
-
-action <- sapply(dat$logs, gsub,
-                 pattern = "^.*EyeVisit, (.*):*.*$",
-                 replacement = "\\1",
-                 USE.NAMES = FALSE)
-
-events <- sapply(strsplit(action, ":"), function(x) x[1])
-
-topics <- sapply(strsplit(action, ":"), function(x) x[2])
-
-moves <- apply(do.call(rbind,
-                 strsplit(sapply(strsplit(action, ":"), function(x) x[3]),
-                          ",")),
-               2, as.numeric)
-# ATTENTION: as.numeric() forces NAs for "OpenCard" and "CloseCard"
-
-card_action <- trimws(sapply(strsplit(action, ":"),
-                             function(x) x[3])[grep("Artwork", events)])
-
-card <- as.numeric(sapply(strsplit(action, ":"), function(x) x[4]))
-
-events[grep("Artwork", events)] <- paste("Artwork", card_action, sep = "/")
-
-ts_elements <- strsplit(timestamp, ":")
-time_ms <- as.numeric(sapply(ts_elements, function(x) x[4])) +
-           as.numeric(sapply(ts_elements, function(x) x[3])) * 1000 +
-           as.numeric(sapply(ts_elements, function(x) x[2])) * 1000 * 60
-
-dat$date        <- lubridate::parse_date_time(date, "bdyHMSOp")
-dat$timeMs      <- time_ms
-dat$event       <- events
-dat$artwork     <- trimws(sapply(strsplit(topics, "/"), function(x) x[1]))
-dat$popup       <- sapply(strsplit(topics, "/"), function(x) x[2])
-dat$topicNumber <- card
-dat$x           <- moves[,1]
-dat$y           <- moves[,2]
-dat$scale       <- moves[,3]
-dat$rotation    <- moves[,4]
-
-dat$logs <- NULL
-# remove original log files from data so file becomes smaller
-
-# sort by fileId, since reading in by file names does not make sense
-# because of missing left zero padding
-dat <- dat[order(dat$fileId, dat$date, dat$timeMs), ]
-
-# Export data
-
-write.table(dat, "../data/rawdata_logfiles.csv",
-            sep = ";", quote = FALSE, row.names = FALSE)
-
@@ -1,37 +0,0 @@
-# setwd("C:/Users/nwickelmaier/Nextcloud/Documents/MDS/2023ss/60100_master_thesis/data/ContentEyevisit/eyevisit_cards_light")
-
-dat0 <- read.table("../../rawdata_logfiles.csv", sep = ";",
-                   header = TRUE)
-# artwork names
-artworks <- unique(na.omit(dat0$artwork))[unique(na.omit(dat0$artwork)) != "glossar"]
-
-dat <- subset(dat0, dat0$artwork == "glossar")
-
-glossar_files <- unique(dat$popup)
-
-x <- NULL
-
-for (glossar_file in glossar_files) {
-  for (artwork in artworks) {
-    fnames <- dir(pattern = paste0(artwork, "_"), path = artwork)
-    for (fname in fnames) {
-      lines <- readLines(paste0(artwork, "/", fname))
-        if (any(grepl(glossar_file, lines))) {
-          x <- rbind(x, data.frame(glossar_file, artwork))
-          break
-      }
-    }
-  }
-}
-
-head(x, 20)
-
-glossar_dict <- as.data.frame(tapply(x$artwork, x$glossar_file, FUN = c))
-names(glossar_dict) <- "artwork"
-glossar_dict$glossar_file <- rownames(glossar_dict)
-rownames(glossar_dict) <- NULL
-glossar_dict <- glossar_dict[, c("glossar_file", "artwork")]
-
-save(glossar_dict, file = "../../glossar_dict.RData")
-# TODO: Save in interoperable format
-
@@ -1,6 +1,9 @@
+# TODO: This script is obsolete and needs to be updated!
+
 # setwd("C:/Users/nwickelmaier/Nextcloud/Documents/MDS/2023ss/60100_master_thesis/code")

-source("functions.R")
+#source("functions.R")
+devtools::load_all("../../../../software/mtt")

 small <- TRUE

@@ -10,10 +13,10 @@ now <- Sys.time()
 cat("########## Reading in data... ##########", "\n")

 if (small) {
-  dat0 <- read.table("../data/rawdata_logfiles_small.csv", sep = ";",
+  dat0 <- read.table("../data/haum/rawdata_logfiles_small.csv", sep = ";",
                     header = TRUE)
 } else {
-  dat0 <- read.table("../data/rawdata_logfiles.csv", sep = ";",
+  dat0 <- read.table("../data/haum/rawdata_logfiles.csv", sep = ";",
                     header = TRUE)
 }
 dat0$date <- as.POSIXct(dat0$date)
@@ -54,7 +57,6 @@ dat2 <- dat2[order(dat2$date.start, dat2$fileId.start), ]
 # Remove all events that do not have a `date.start`
 dat2 <- dat2[!is.na(dat2$date.start), ]
 rownames(dat2) <- NULL
-# TODO: Throw warning about this

 save(dat2, file = paste("tmp/dat2", ifelse(small, "small_", "full_"),
                       format(now, "%Y-%m-%d_%H-%M-%S"), ".RData"))
@@ -90,7 +92,7 @@ artworks <- unique(dat4$artwork)
 # remove artworks without XML information
 artworks <- artworks[!artworks %in% c("504", "505")]
 topics <- extract_topics(artworks, pattern = paste0(artworks, ".xml"),
-                         path = "../data/ContentEyevisit/eyevisit_cards_light/")
+                         path = "../data/haum/ContentEyevisit/eyevisit_cards_light/")

 dat5 <- add_topic(dat4, topics = topics)

@@ -101,6 +103,6 @@ save(dat5, file = paste("tmp/dat5", ifelse(small, "small_", "full_"),

 # Export data ############################################################
 cat("########## Exporting data frame with event logs... ##########", "\n")
-write.table(dat5, "../data/event_logfiles.csv", sep = ";",
+write.table(dat5, "../data/haum/event_logfiles.csv", sep = ";",
            row.names = FALSE)

@@ -16,7 +16,7 @@

 #' # Read data

-dat <- read.table("../data/event_logfiles.csv", sep = ";", header = TRUE)
+dat <- read.table("../data/haum/event_logfiles.csv", sep = ";", header = TRUE)
 dat$date.start <- as.POSIXct(dat$date.start)
 dat$date.stop <- as.POSIXct(dat$date.stop)

@@ -1,16 +0,0 @@
-path <- "C:/Users/nwickelmaier/Nextcloud/Documents/MDS/2023ss/60100_master_thesis/data/ContentEyevisit/eyevisit_cards_light"
-
-setwd(path)
-
-# artwork names
-dat0 <- read.table("../../event_logfiles.csv", sep = ";", header = TRUE)
-dat0$artwork <- sprintf("%03d", dat0$artwork)
-artworks <- sort(unique(dat0$artwork))
-
-# extract topics
-topics <- extract_topics(artworks, paste0(artworks, ".xml"), path)
-
-write.table(topics, file = "../../topics.csv", sep = ";", row.names = FALSE)
-
-# TODO: Keep this file?
-
@@ -1,458 +0,0 @@
-###########################################################################
-
-# Add trace variable
-
-add_trace <- function(data, glossar_dict = "../data/glossar_dict.RData") {
-
-  data$trace <- NA
-  subdata1 <- data[data$event %in% c("Transform start", "Transform stop"), ]
-  subdata2 <- data[!data$event %in% c("Transform start", "Transform stop"), ]
-
-  last_event <- subdata2$event[1]
-  artworks <- unique(subdata2$artwork)[unique(subdata2$artwork) != "glossar"]
-  n <- 1    # count artworks for progress
-
-  pb <- txtProgressBar(min = 0, max = nrow(subdata2), initial = NA,
-                       style = 3)
-
-  for (artwork in artworks) {
-
-    cat("\n\nAdding trace variable for artwork", artwork,
-               paste0("(", n, "/", length(artworks), ")"), "\n")
-
-    for (i in 1:nrow(subdata2)) {
-
-      if (last_event == "Show Info" & subdata2$artwork[i] == artwork) {
-        subdata2$trace[i] <- i
-        j <- i
-
-      } else if (last_event == "Show Front" & subdata2$artwork[i] == artwork) {
-        subdata2$trace[i] <- j
-
-      } else if (!(last_event %in% c("Show Info", "Show Front")) &
-                 subdata2$artwork[i] == artwork) {
-        subdata2$trace[i] <- j
-      }
-
-      if (i <= nrow(subdata2)) {
-        last_event <- subdata2$event[i + 1]
-      }
-      setTxtProgressBar(pb, i)
-    }
-    n <- n + 1
-  }
-
-  # Fix glossar entries (find corresponding artworks and fill in trace)
-  glossar_files <- unique(subdata2[subdata2$artwork == "glossar", "popup"])
-
-  # load lookup table for artworks and glossar files
-  load(glossar_dict)
-  lut <- glossar_dict[glossar_dict$glossar_file %in% glossar_files, ]
-
-  inside  <- glossar_files[glossar_files %in%
-                           lut[sapply(lut$artwork, length) == 1,
-                               "glossar_file"]]
-  single_art  <- unlist(lut[lut$glossar_file %in% inside, "artwork"])
-
-  m <- 1
-
-  for (file in lut$glossar_file) {
-
-    cat("\n\nAdding trace variable for glossar entry", file,
-               paste0("(", m, "/", length(lut$glossar_file), ")"), "\n")
-
-    artwork_list <- unlist(lut[lut$glossar_file == file, "artwork"])
-
-    for (i in seq_len(nrow(subdata2))) {
-
-      if (subdata2$event[i] == "Show Info" |
-          (subdata2$event[i] == "Artwork/OpenCard" &
-           subdata2$artwork[i] %in% single_art)) {
-
-        current_artwork <- subdata2[i, "artwork"]
-        j <- i
-        k <- i
-
-      } else {
-
-        current_artwork <- current_artwork
-
-      }
-
-      if (subdata2$event[i] == "Show Front" & subdata2$artwork[i] == current_artwork) {
-      # make sure artwork has not been closed, yet!
-        k <- i
-      }
-
-      if (subdata2$artwork[i] == "glossar" &
-          (current_artwork %in% artwork_list) &
-          subdata2$popup[i] == file & (j - k == 0)) {
-
-        subdata2[i, "trace"]   <- subdata2[j, "trace"]
-        subdata2[i, "artwork"] <- current_artwork
-
-      }
-      setTxtProgressBar(pb, i)
-    }
-    m <- m + 1
-  }
-
-  # Exclude not matched glossar entries
-  cat("\n\nINFORMATION: glossar entries that are not matched will be removed:",
-      sum(is.na(subdata2[subdata2$glossar == 1, "trace"])), "entries",
-      #proportions(table(is.na(subdata2[subdata2$glossar == 1, "trace"]))),
-      fill = TRUE)
-  subdata2 <- subset(subdata2, !is.na(subdata2$trace))
-  # REMEMBER: It can never be 100% correct, since it is always possible
-  # that several cards are open and that they link to the same glossar
-  # entry
-
-  # dat2[14110:14130, ]
-  # dat2[dat2$glossar == 1, ]
-
-  out <- rbind(subdata1, subdata2)
-  out <- out[order(out$fileId, out$date, out$timeMs), ]
-  out
-}
-
-###########################################################################
-
-close_events <- function(data, event = c("move", "flipCard", "openTopic", "openPopup")) {
-
-  event <- match.arg(event)
-
-  switch(event,
-    "move" = {
-      actions <- c("Transform start", "Transform stop")
-      idvar   <- c("fileId", "eventId", "artwork", "glossar")
-      drop    <- c("popup", "topicNumber", "trace", "event")
-      ncol    <- 16
-
-    },
-    "flipCard" = {
-      actions <- c("Show Info", "Show Front")
-      idvar   <- c("fileId", "trace", "artwork", "glossar")
-      drop    <- c("popup", "topicNumber", "eventId", "event")
-      ncol    <- 16
-
-    },
-    "openTopic" = {
-      actions <- c("Artwork/OpenCard", "Artwork/CloseCard")
-      idvar   <- c("fileId", "eventId", "trace", "glossar", "artwork",
-                   "topicNumber")
-      drop    <- c("popup", "event")
-      ncol    <- 18
-
-    },
-    "openPopup" = {
-      actions <- c("ShowPopup", "HidePopup")
-      idvar   <- c("fileId", "eventId", "trace", "glossar", "artwork", "popup")
-      drop    <- c("topicNumber", "event")
-      ncol    <- 18
-#   TODO: Should topicNumber maybe also be filled in for "openPopup"?
-
-    }
-  )
-
-  subdata <- subset(data, data$event %in% actions)
-  subdata <- subdata[order(subdata$artwork, subdata$popup, subdata$date, subdata$timeMs), ]
-  subdata$time <- ifelse(subdata$event == actions[1], "start", "stop")
-  num_start <- diff(c(0, which(subdata$event == actions[2])))
-  if (tail(subdata, 1)$time == "start") {
-    num_start <- c(num_start, 1)
-  }
-  subdata$eventId <- rep(seq_along(num_start), num_start)
-
-  if (event == "move") {
-    subdata    <- subdata[!duplicated(subdata[, c("event", "eventId")]), ]
-    id_stop    <- which(subdata$event == actions[2])
-    id_rm_stop <- id_stop[diff(id_stop) == 1]
-    subdata    <- subdata[-(id_rm_stop + 1), ]
-  }
-
-  subdata_split <- split(subdata, ~ fileId)
-
-  pbapply::pboptions(style = 3, char = "=")
-
-  subdata_split_wide <- pbapply::pblapply(subdata_split, reshape,
-                               direction = "wide",
-                               idvar = idvar,
-                               timevar = "time",
-                               drop = drop)
-#  suppressWarnings(
-#    data_wide <- reshape(subdata, direction = "wide",
-#                         idvar = idvar,
-#                         timevar = "time",
-#                         drop = drop)
-#  )
-
-  # remove entries with only start or stop events since they do not have
-  # all columns
-  ids <- which(sapply(subdata_split_wide, ncol) != ncol)
-  if (length(ids) > 0) subdata_split_wide <- subdata_split_wide[-ids]
-
-  data_wide <- dplyr::bind_rows(subdata_split_wide)
-
-  for (d in drop) data_wide[d] <- NA
-  data_wide$distance        <- NA
-  data_wide$scaleSize       <- NA
-  data_wide$rotationDegree  <- NA
-
-  data_wide$event <- event
-  data_wide$duration <- data_wide$timeMs.stop - data_wide$timeMs.start
-
-  if (event == "move") {
-    data_wide$distance <- apply(
-        data_wide[, c("x.start", "y.start", "x.stop", "y.stop")], 1,
-        function(x) dist(matrix(x, 2, 2, byrow = TRUE)))
-    data_wide$rotationDegree <- data_wide$rotation.stop -
-      data_wide$rotation.start
-    data_wide$scaleSize <- data_wide$scale.stop / data_wide$scale.start
-    # remove moves without any change
-    move_wide <- data_wide[data_wide$distance != 0 &
-                           data_wide$rotationDegree != 0 &
-                           data_wide$scaleSize != 1, ]
-    cat(paste("INFORMATION:", nrow(data_wide) - nrow(move_wide),
-    "lines containing move events were removed since they did",
-    "\nnot contain any change"), fill = TRUE)
-    data_wide <- move_wide
-  }
-
-  out <- data_wide[, c("fileId", "event", "artwork", "trace", "glossar",
-                       "date.start", "date.stop", "timeMs.start",
-                       "timeMs.stop", "duration", "topicNumber", "popup",
-                       "x.start", "y.start", "x.stop", "y.stop",
-                       "distance", "scale.start", "scale.stop",
-                       "scaleSize", "rotation.start", "rotation.stop",
-                       "rotationDegree")]
-  rownames(out) <- NULL
-  out
-}
-
-###########################################################################
-
-# Add case variable
-
-add_case <- function(data, cutoff = 20) {
-# TODO: What is the best choice for the cutoff here?
-
-  data$timediff <- as.numeric(diff(c(data$date.start[1], data$date.start)))
-  data$case <- NA
-  j <- 1
-  pb <- txtProgressBar(min = 0, max = nrow(data), initial = NA, style = 3)
-
-  for (i in seq_len(nrow(data))) {
-    if (data$timediff[i] <= cutoff) {
-      data$case[i] <- j
-    } else {
-      j <- j + 1
-      data$case[i] <- j
-    }
-    setTxtProgressBar(pb, i)
-  }
-  data$timediff <- NULL
-  data
-}
-
-###########################################################################
-
-# Add trace for moves
-
-add_trace_moves <- function(data) {
-
-  pbapply::pboptions(style = 3, char = "=")
-
-  trace_max <- max(data$trace, na.rm = TRUE)
-
-  #subdata_art <- split(data, ~ artwork)
-  subdata_case <- split(data, ~ case)
-
-  #subdata_list <- split(data, ~ artwork + case)
-  # --> does not work with complete data set
-  cat("Splitting data...", "\n")
-  subdata_list <- pbapply::pblapply(subdata_case, split, f = ~artwork)
-  subdata_list <- unlist(subdata_list, recursive = FALSE)
-
-  cat("Adding trace...", "\n")
-  subdata_trace <- pbapply::pblapply(subdata_list,
-        function(x) {
-          trace_max <<- trace_max + 1
-          add_trace_subdata(x, max_trace = trace_max)
-        }
-      )
-
-  out <- dplyr::bind_rows(subdata_trace)
-  out <- out[order(out$fileId.start, out$date.start, out$timeMs.start), ]
-  rownames(out) <- NULL
-
-  # Make trace a consecutive number
-  out$trace <- as.numeric(factor(out$trace, levels = unique(out$trace)))
-  out
-}
-
-
-add_trace_subdata <- function(subdata, max_trace) {
-
-  if (nrow(subdata) != 0) {
-
-    if (length(na.omit(unique(subdata$trace))) == 1) {
-      subdata[subdata$event == "move", "trace"] <- na.omit(unique(subdata$trace))
-    } else if (length(na.omit(unique(subdata$trace))) > 1) {
-      for (i in 1:nrow(subdata)) {
-        if (subdata$event[i] == "move") {
-          if (i == 1) {
-            subdata$trace[i] <- na.omit(unique(subdata$trace))[1]
-          } else {
-            subdata$trace[i] <- subdata$trace[i - 1]
-          }
-        }
-      }
-    } else if (all(is.na(subdata$trace))) {
-      for (i in 1:nrow(subdata)) {
-        subdata$trace[i] <- max_trace
-      }
-    }
-
-  } else {
-    warning("`subdata` has nrow = 0")
-  }
-  subdata
-}
-
-
-###########################################################################
-
-# Create data frame with file names and topics for each artwork
-
-extract_topics <- function(artworks, pattern, path) {
-
-  dat <- NULL
-  file_order <- NULL
-  i <- 1
-
-  for (artwork in artworks) {
-
-    if (length(pattern) == 1) {
-      index_file <- pattern
-    } else {
-      index_file <- pattern[i]
-    }
-
-    fnames <- dir(pattern = paste0(artwork, "_"),
-                  path = paste(path, artwork, sep = "/"))
-    topic <- NULL
-    for (fname in fnames) {
-      suppressWarnings(
-      topic <- c(topic, gsub("^<card type=.(.*).>$", "\\1",
-        grep("^<card type=",
-          trimws(readLines(paste(path, artwork, fname, sep = "/"))),
-          value = T)))
-      )
-
-    }
-    index <- paste(path, artwork, index_file, sep = "/")
-    suppressWarnings(
-    file_order <- c(file_order, gsub("^<card src=.*/(.*)./>$", "\\1",
-                         grep("^<card src=", trimws(readLines(index)),
-                              value = TRUE)))
-    )
-    in_index <- fnames %in% file_order
-    dat <- rbind(dat, data.frame(artwork, file_name = fnames, in_index, topic))
-    i <- i + 1
-  }
-
-  # take only the ones that are actually displayed and sort in the same order
-  # as indicated in index.html
-  out <- dat[dat$in_index, -3]
-  out <- out[order(file_order, out$file_name), ]
-  rownames(out) <- NULL
-
-  out$index <- unlist(sapply(table(out$artwork), seq_len))
-  out
-}
-
-###########################################################################
-
-# Add topics: file names and topics
-
-add_topic <- function(data, topics) {
-
-  artworks <- unique(data$artwork)
-  tab_art <- lapply(artworks,
-               function(x) names(table(data$topicNumber[data$artwork == x])))
-  names(tab_art) <- artworks
-
-  tab_index <- lapply(tab_art, seq_along)
-
-  dat_split <- split(data, ~ artwork)
-
-  set_label <- function(x) {
-    artwork <- unique(x$artwork)
-    x$topicIndex <- factor(x$topicNumber, labels = tab_index[[artwork]])
-    x
-  }
-
-  dat_label <- lapply(dat_split, set_label)
-
-  set_topic <- function(x) {
-    artwork <- unique(x$artwork)
-    labels_file <- topics[topics$artwork == artwork,
-                          "file_name"][as.numeric(levels(x$topicIndex))]
-    x$topicFile <- as.character(factor(x$topicIndex, labels = labels_file))
-    labels_topic <- topics[topics$artwork == artwork,
-                          "topic"][as.numeric(levels(x$topicIndex))]
-    x$topic <- as.character(factor(x$topicIndex, labels = labels_topic))
-    x
-  }
-
-  dat_topic <- lapply(dat_label, set_topic)
-
-  #out <- do.call(rbind, dat_topic)
-  out <- dplyr::bind_rows(dat_topic)
-  out <- out[order(out$fileId.start, out$date.start, out$timeMs.start), ]
-  rownames(out) <- NULL
-  out
-}
-
-###########################################################################
-
-# Create data frame with information on artworks
-
-extract_artworks <- function(artworks, files = paste0(artworks, ".xml"),
-                             path = path) {
-  out <- NULL
-  i <- 1
-
-  for (artwork in artworks) {
-
-    if (length(files) == 1) {
-      index_file <- files
-    } else {
-      index_file <- files[i]
-    }
-
-    index <- paste(path, artwork, index_file, sep = "/")
-    varnames <- c("artist", "title", "misc", "description")
-    xmllist <- XML::xmlToList(index)$header[varnames]
-
-    if (any(sapply(xmllist, is.null))) {# necessary for missing entries
-      names(xmllist) <- varnames
-      xmllist[which(sapply(xmllist, is.null))] <- NA
-    }
-    # remove ugly quotes
-    xmllist <- lapply(xmllist, function(x) gsub("„|“", "", x))
-    # remove HTML tags
-    xmllist <- lapply(xmllist, function(x) gsub("<br/>", " ", x))
-    xmldat <- as.data.frame(xmllist)
-    xmldat$artwork <- artwork
-    # trim white space from strings
-    xmldat$artist      <- trimws(xmldat$artist)
-    xmldat$title       <- trimws(xmldat$title)
-    xmldat$misc        <- trimws(xmldat$misc)
-    xmldat$description <- trimws(xmldat$description)
-    out <- rbind(out, xmldat)
-    i <- i + 1
-  }
-  out
-}
-
@@ -2,7 +2,7 @@

 library(lubridate)

-dat <- read.table("../data/rawdata_logfiles.csv", header = TRUE, sep = ";")
+dat <- read.table("../data/haum/rawdata_logfiles.csv", header = TRUE, sep = ";")
 # dat$event <- factor(dat$event, levels = c("Start Application",
 #                                           "Show Application",
 #                                           "Transform start",
@@ -0,0 +1,26 @@
+setwd("C:/Users/nwickelmaier/Nextcloud/Documents/MDS/2023ss/60100_master_thesis/code/")
+
+devtools::load_all("../../../../software/mtt")
+#library(mtt)
+
+dat <- parse_logfiles("2016", path = "../data/haum/LogFiles/",
+                      save = FALSE)
+datlogs <- create_eventlogs(dat, "../data/haum/ContentEyevisit/eyevisit_cards_light/")
+
+dat001 <- datlogs[which(datlogs$artwork == "001"), ]
+
+index <- as.numeric(as.factor(dat001$trace))
+cc <- sample(colors(), length(unique(dat001$trace)))
+
+plot(y.start ~ x.start, dat001, type = "n", xlab = "x", ylab = "y",
+     xlim = c(0, 3840), ylim = c(0, 2160))
+with(dat001[1:200,], arrows(x.start, y.start, x.stop, y.stop,
+                            length = .07, col = cc[index]))
+
+plot(y.start ~ x.start, dat001, xlab = "x", ylab = "y",
+     xlim = c(0, 3840), ylim = c(0, 2160), pch = 16, col = "gray")
+points(y.start ~ x.start, dat001, xlab = "x", ylab = "y",
+     xlim = c(0, 3840), ylim = c(0, 2160), cex = dat001$scaleSize,
+     col = "blue")
+
+