From 45bf0d9af96d6f972869341513b2bf7368e2996c Mon Sep 17 00:00:00 2001 From: nwickel Date: Fri, 7 Jul 2023 17:18:09 +0200 Subject: [PATCH] Adjusted some things after getting complete set of log files --- code/01_parse-logfiles.R | 19 ++++++++++--------- code/03_specs.R | 28 ++++++++++++++++------------ 2 files changed, 26 insertions(+), 21 deletions(-) diff --git a/code/01_parse-logfiles.R b/code/01_parse-logfiles.R index 84dab52..9084333 100644 --- a/code/01_parse-logfiles.R +++ b/code/01_parse-logfiles.R @@ -14,6 +14,8 @@ # setwd("C:/Users/nwickelmaier/Nextcloud/Documents/MDS/2023ss/60100_master_thesis/code") +#+ setup, include = FALSE +knitr::opts_chunk$set(warning = FALSE, message = FALSE) #' # Preprocessing raw log files into data frame @@ -34,17 +36,11 @@ #' Choose which folders with raw log files should be included: -folders <- c( - "_2016" - , "_2017a" - , "_2017b" - , "_2018" - ) +folders <- "all" -dirpaths <- paste0("../data/HAUM Logs/", folders) +dirpaths <- paste0("../data/haum_logs_2016-2023/", folders) fnames <- dir(dirpaths, pattern = "*.log", full.names = TRUE) -# TODO: Enter all data length(fnames) head(fnames) @@ -59,6 +55,7 @@ head(dat$logs) # incomplete final line found on '_2016/2016_11_18-11_31_0.log' # incomplete final line found on '_2016/2016_11_18-11_38_30.log' # incomplete final line found on '_2016/2016_11_18-11_40_36.log' +# ... ## --> files have a last line that looks like a binary entry?? @@ -74,6 +71,7 @@ d2 <- dim(dat)[1] #' The files contain `r d1-d2` corrupt lines that were remooved from the data. + #' ### Extract relevant infos date <- sapply(dat$logs, gsub, @@ -124,9 +122,12 @@ dat$y <- moves[,2] dat$scale <- moves[,3] dat$rotation <- moves[,4] +dat$logs <- NULL +# remove original log files from data so file becomes smaller + str(dat) -head(dat[, 3:ncol(dat)], 20) +head(dat[, 2:ncol(dat)], 20) ## TODO: Replace artwork and popup numbers with informative strings diff --git a/code/03_specs.R b/code/03_specs.R index 768e40c..79de837 100644 --- a/code/03_specs.R +++ b/code/03_specs.R @@ -14,8 +14,10 @@ dat <- read.table("../data/rawdata_logfiles.csv", header = TRUE, sep = ";") # "ShowPopup", "HidePopup")) # -#dat$logs <- NULL # do not need original log files dat$date <- as.POSIXct(dat$date) # create date object +dat <- dat[order(dat$date), ] + +rownames(dat) <- NULL str(dat) head(dat) @@ -31,16 +33,16 @@ table(year(dat$date)) #' Number of events total and per year table(dat$event) -# Start Application Show Application -# 1679 1656 -# Transform start Transform stop -# 2119815 1944618 -# Show Info Show Front -# 71955 71043 -# Artwork/OpenCard Artwork/CloseCard -# 64990 56750 -# ShowPopup HidePopup -# 44070 43813 +# Artwork/CloseCard Artwork/OpenCard +# 80026 92588 +# HidePopup Show Application +# 59932 3179 +# Show Front Show Info +# 106945 108113 +# ShowPopup Start Application +# 60350 3210 +# Transform start Transform stop +# 3219109 2966724 # --> more "openups" than "closes" (not surprisingly) table(dat$event, year(dat$date)) @@ -58,7 +60,9 @@ length(unique(as.Date(dat$date))) table(dat$artwork) lattice::barchart(table(dat$artwork)) - +#' ## Corona Gaps +table(diff(as.Date(dat$date))) +dat[diff(as.Date(dat$date)) > 50, ] #' ## Plots from Visualization Tool