Adjusted some things after getting complete set of log files

This commit is contained in:
Nora Wickelmaier 2023-07-07 17:18:09 +02:00
parent fdb6c94b25
commit 45bf0d9af9
2 changed files with 26 additions and 21 deletions

View File

@ -14,6 +14,8 @@
# setwd("C:/Users/nwickelmaier/Nextcloud/Documents/MDS/2023ss/60100_master_thesis/code") # setwd("C:/Users/nwickelmaier/Nextcloud/Documents/MDS/2023ss/60100_master_thesis/code")
#+ setup, include = FALSE
knitr::opts_chunk$set(warning = FALSE, message = FALSE)
#' # Preprocessing raw log files into data frame #' # Preprocessing raw log files into data frame
@ -34,17 +36,11 @@
#' Choose which folders with raw log files should be included: #' Choose which folders with raw log files should be included:
folders <- c( folders <- "all"
"_2016"
, "_2017a"
, "_2017b"
, "_2018"
)
dirpaths <- paste0("../data/HAUM Logs/", folders) dirpaths <- paste0("../data/haum_logs_2016-2023/", folders)
fnames <- dir(dirpaths, pattern = "*.log", full.names = TRUE) fnames <- dir(dirpaths, pattern = "*.log", full.names = TRUE)
# TODO: Enter all data
length(fnames) length(fnames)
head(fnames) head(fnames)
@ -59,6 +55,7 @@ head(dat$logs)
# incomplete final line found on '_2016/2016_11_18-11_31_0.log' # incomplete final line found on '_2016/2016_11_18-11_31_0.log'
# incomplete final line found on '_2016/2016_11_18-11_38_30.log' # incomplete final line found on '_2016/2016_11_18-11_38_30.log'
# incomplete final line found on '_2016/2016_11_18-11_40_36.log' # incomplete final line found on '_2016/2016_11_18-11_40_36.log'
# ...
## --> files have a last line that looks like a binary entry?? ## --> files have a last line that looks like a binary entry??
@ -74,6 +71,7 @@ d2 <- dim(dat)[1]
#' The files contain `r d1-d2` corrupt lines that were remooved from the data. #' The files contain `r d1-d2` corrupt lines that were remooved from the data.
#' ### Extract relevant infos #' ### Extract relevant infos
date <- sapply(dat$logs, gsub, date <- sapply(dat$logs, gsub,
@ -124,9 +122,12 @@ dat$y <- moves[,2]
dat$scale <- moves[,3] dat$scale <- moves[,3]
dat$rotation <- moves[,4] dat$rotation <- moves[,4]
dat$logs <- NULL
# remove original log files from data so file becomes smaller
str(dat) str(dat)
head(dat[, 3:ncol(dat)], 20) head(dat[, 2:ncol(dat)], 20)
## TODO: Replace artwork and popup numbers with informative strings ## TODO: Replace artwork and popup numbers with informative strings

View File

@ -14,8 +14,10 @@ dat <- read.table("../data/rawdata_logfiles.csv", header = TRUE, sep = ";")
# "ShowPopup", "HidePopup")) # "ShowPopup", "HidePopup"))
# #
#dat$logs <- NULL # do not need original log files
dat$date <- as.POSIXct(dat$date) # create date object dat$date <- as.POSIXct(dat$date) # create date object
dat <- dat[order(dat$date), ]
rownames(dat) <- NULL
str(dat) str(dat)
head(dat) head(dat)
@ -31,16 +33,16 @@ table(year(dat$date))
#' Number of events total and per year #' Number of events total and per year
table(dat$event) table(dat$event)
# Start Application Show Application # Artwork/CloseCard Artwork/OpenCard
# 1679 1656 # 80026 92588
# Transform start Transform stop # HidePopup Show Application
# 2119815 1944618 # 59932 3179
# Show Info Show Front # Show Front Show Info
# 71955 71043 # 106945 108113
# Artwork/OpenCard Artwork/CloseCard # ShowPopup Start Application
# 64990 56750 # 60350 3210
# ShowPopup HidePopup # Transform start Transform stop
# 44070 43813 # 3219109 2966724
# --> more "openups" than "closes" (not surprisingly) # --> more "openups" than "closes" (not surprisingly)
table(dat$event, year(dat$date)) table(dat$event, year(dat$date))
@ -58,7 +60,9 @@ length(unique(as.Date(dat$date)))
table(dat$artwork) table(dat$artwork)
lattice::barchart(table(dat$artwork)) lattice::barchart(table(dat$artwork))
#' ## Corona Gaps
table(diff(as.Date(dat$date)))
dat[diff(as.Date(dat$date)) > 50, ]
#' ## Plots from Visualization Tool #' ## Plots from Visualization Tool