mtt_haum/code/specs.R

104 lines
3.0 KiB
R
Raw Normal View History

2023-06-26 10:30:07 +02:00
# setwd("C:/Users/nwickelmaier/Nextcloud/Documents/MDS/2023ss/60100_master_thesis/code")
library(lubridate)
dat <- read.table("../data/haum/rawdata_logfiles.csv", header = TRUE, sep = ";")
2023-06-27 10:06:14 +02:00
# dat$event <- factor(dat$event, levels = c("Start Application",
# "Show Application",
# "Transform start",
# "Transform stop",
# "Show Info",
# "Show Front",
# "Artwork/OpenCard",
# "Artwork/CloseCard",
# "ShowPopup", "HidePopup"))
#
2023-06-26 10:30:07 +02:00
dat$date <- as.POSIXct(dat$date) # create date object
dat <- dat[order(dat$date), ]
rownames(dat) <- NULL
2023-06-26 10:30:07 +02:00
str(dat)
head(dat)
#' Log files between December 2016 and December 2018
range(dat$date)
#' Number of log files per year
table(year(dat$date))
#' Number of events total and per year
table(dat$event)
# Artwork/CloseCard Artwork/OpenCard
# 80026 92588
# HidePopup Show Application
# 59932 3179
# Show Front Show Info
# 106945 108113
# ShowPopup Start Application
# 60350 3210
# Transform start Transform stop
# 3219109 2966724
2023-06-26 10:30:07 +02:00
# --> more "openups" than "closes" (not surprisingly)
table(dat$event, year(dat$date))
#' Number of log files per week day
table(weekdays(dat$date))
#' Anzahl der Tage
length(unique(as.Date(dat$date)))
#' Which artworks are looked at most often
table(dat$artwork)
lattice::barchart(table(dat$artwork))
#' ## Corona Gaps
table(diff(as.Date(dat$date)))
dat[diff(as.Date(dat$date)) > 50, ]
2023-06-26 10:30:07 +02:00
#' ## Plots from Visualization Tool
dat16 <- dat[year(dat$date) == 2016, ]
counts <- table(as.Date(dat16$date), dat16$event)
lattice::barchart(counts, auto.key = TRUE)
start_events <- c("Transform start", "Show Info", "ShowPopup", "Artwork/OpenCard")
counts <- table(as.Date(dat16$date[dat16$event %in% start_events]),
dat16$event[dat16$event %in% start_events])
counts
lattice::barchart(counts, auto.key = TRUE)
### Example for log file in order to show structure
2023-06-27 10:06:14 +02:00
write.table(dat[240:660, 3:12], "tmp_set.txt", quote = FALSE)
2023-06-26 10:30:07 +02:00
# is then edited by hand to have all possible events...
select <- c(243, 244, 245, 246, 253, 254, 255, 256, 257, 259, 260, 262,
265, 266, 267, 268, 269, 270, 271, 287, 288, 289, 294, 295,
296, 303, 304, 305, 306, 307, 318, 319, 320, 321, 322, 323,
324, 325, 326, 357, 358, 359, 360, 361, 362, 363, 408, 409,
410, 411, 412, 413, 414, 415, 416, 417, 418, 439, 440, 441,
516, 518, 519, 587, 588, 589, 590, 591, 592, 593, 594, 595,
652, 653, 654, 655, 656, 657)
2023-06-27 10:06:14 +02:00
writeLines(dat[select, "logs"], "tmp_set_logs.txt")
2023-06-26 10:30:07 +02:00
tab <- table(diff(dat$date)) # uninformative on raw log data!
plot(tab[tab > 2000])