mtt_haum/code/03_specs.R

100 lines
2.9 KiB
R
Raw Normal View History

2023-06-26 10:30:07 +02:00
# setwd("C:/Users/nwickelmaier/Nextcloud/Documents/MDS/2023ss/60100_master_thesis/code")
library(lubridate)
dat <- read.table("../data/rawdata_logfiles.csv", header = TRUE, sep = ";")
dat$event <- factor(dat$event, levels = c("Start Application",
"Show Application",
"Transform start",
"Transform stop",
"Show Info",
"Show Front",
"Artwork/OpenCard",
"Artwork/CloseCard",
"ShowPopup", "HidePopup"))
#dat$logs <- NULL # do not need original log files
dat$date <- as.POSIXct(dat$date) # create date object
str(dat)
head(dat)
#' Log files between December 2016 and December 2018
range(dat$date)
#' Number of log files per year
table(year(dat$date))
#' Number of events total and per year
table(dat$event)
# Start Application Show Application
# 1679 1656
# Transform start Transform stop
# 2119815 1944618
# Show Info Show Front
# 71955 71043
# Artwork/OpenCard Artwork/CloseCard
# 64990 56750
# ShowPopup HidePopup
# 44070 43813
# --> more "openups" than "closes" (not surprisingly)
table(dat$event, year(dat$date))
#' Number of log files per week day
table(weekdays(dat$date))
#' Anzahl der Tage
length(unique(as.Date(dat$date)))
#' Which artworks are looked at most often
table(dat$artwork)
lattice::barchart(table(dat$artwork))
#' ## Plots from Visualization Tool
dat16 <- dat[year(dat$date) == 2016, ]
counts <- table(as.Date(dat16$date), dat16$event)
lattice::barchart(counts, auto.key = TRUE)
start_events <- c("Transform start", "Show Info", "ShowPopup", "Artwork/OpenCard")
counts <- table(as.Date(dat16$date[dat16$event %in% start_events]),
dat16$event[dat16$event %in% start_events])
counts
lattice::barchart(counts, auto.key = TRUE)
### Example for log file in order to show structure
write.table(dat[240:660, 3:12], "set.txt", quote = FALSE)
# is then edited by hand to have all possible events...
select <- c(243, 244, 245, 246, 253, 254, 255, 256, 257, 259, 260, 262,
265, 266, 267, 268, 269, 270, 271, 287, 288, 289, 294, 295,
296, 303, 304, 305, 306, 307, 318, 319, 320, 321, 322, 323,
324, 325, 326, 357, 358, 359, 360, 361, 362, 363, 408, 409,
410, 411, 412, 413, 414, 415, 416, 417, 418, 439, 440, 441,
516, 518, 519, 587, 588, 589, 590, 591, 592, 593, 594, 595,
652, 653, 654, 655, 656, 657)
writeLines(dat[select, "logs"], "set_logs.txt")
tab <- table(diff(dat$date)) # uninformative on raw log data!
plot(tab[tab > 2000])