2023-06-26 10:30:07 +02:00
|
|
|
# setwd("C:/Users/nwickelmaier/Nextcloud/Documents/MDS/2023ss/60100_master_thesis/code")
|
|
|
|
|
2023-09-12 14:31:36 +02:00
|
|
|
source("functions.R")
|
|
|
|
|
2023-09-19 15:25:30 +02:00
|
|
|
small <- TRUE
|
|
|
|
|
2023-09-20 16:11:05 +02:00
|
|
|
now <- Sys.time()
|
|
|
|
|
2023-09-13 14:20:08 +02:00
|
|
|
# Read data ##############################################################
|
2023-09-15 16:22:21 +02:00
|
|
|
cat("########## Reading in data... ##########", "\n")
|
2023-09-19 15:25:30 +02:00
|
|
|
|
|
|
|
if (small) {
|
|
|
|
dat0 <- read.table("../data/rawdata_logfiles_small.csv", sep = ";",
|
|
|
|
header = TRUE)
|
|
|
|
} else {
|
|
|
|
dat0 <- read.table("../data/rawdata_logfiles.csv", sep = ";",
|
|
|
|
header = TRUE)
|
|
|
|
}
|
2023-09-12 14:31:36 +02:00
|
|
|
dat0$date <- as.POSIXct(dat0$date)
|
2023-08-28 17:29:56 +02:00
|
|
|
dat0$glossar <- ifelse(dat0$artwork == "glossar", 1, 0)
|
2023-07-20 17:06:28 +02:00
|
|
|
|
2023-09-12 14:31:36 +02:00
|
|
|
# Remove irrelevant events
|
2023-07-20 17:06:28 +02:00
|
|
|
dat <- subset(dat0, !(dat0$event %in% c("Start Application",
|
|
|
|
"Show Application")))
|
2023-08-28 17:29:56 +02:00
|
|
|
|
2023-09-19 15:25:30 +02:00
|
|
|
save(dat, file = paste0("tmp/dat_", ifelse(small, "small_", "full_"),
|
2023-09-20 16:11:05 +02:00
|
|
|
format(now, "%Y-%m-%d_%H-%M-%S"), ".RData"))
|
2023-09-15 16:22:21 +02:00
|
|
|
|
2023-09-13 14:20:08 +02:00
|
|
|
# Add trace variable #####################################################
|
2023-09-15 16:22:21 +02:00
|
|
|
cat("########## Adding trace variable... ##########", "\n")
|
2023-09-12 14:31:36 +02:00
|
|
|
dat1 <- add_trace(dat)
|
2023-08-28 17:29:56 +02:00
|
|
|
|
2023-09-19 15:25:30 +02:00
|
|
|
save(dat1, file = paste("tmp/dat1", ifelse(small, "small_", "full_"),
|
2023-09-20 16:11:05 +02:00
|
|
|
format(now, "%Y-%m-%d_%H-%M-%S"), ".RData"))
|
2023-09-15 16:22:21 +02:00
|
|
|
|
2023-09-12 14:31:36 +02:00
|
|
|
# Close events
|
2023-09-18 09:59:24 +02:00
|
|
|
cat("########## Closing events... ##########", "\n")
|
|
|
|
c1 <- close_events(dat1, "move")
|
|
|
|
cat("## --> move events closed.", "\n")
|
|
|
|
c2 <- close_events(dat1, "flipCard")
|
|
|
|
cat("## --> flipCard events closed.", "\n")
|
|
|
|
c3 <- close_events(dat1, "openTopic")
|
|
|
|
cat("## --> openTopic events closed.", "\n")
|
|
|
|
c4 <- close_events(dat1, "openPopup")
|
|
|
|
cat("## --> openPopup events closed.", "\n")
|
|
|
|
dat2 <- rbind(c1, c2, c3, c4)
|
2023-09-13 14:20:08 +02:00
|
|
|
|
2023-09-12 14:31:36 +02:00
|
|
|
dat2 <- dat2[order(dat2$date.start, dat2$fileId.start), ]
|
2023-09-13 14:20:08 +02:00
|
|
|
|
2023-08-28 17:29:56 +02:00
|
|
|
# Remove durations when event spans more than one log file, since they are
|
|
|
|
# not interpretable
|
2023-09-18 09:59:24 +02:00
|
|
|
#dat2[which(dat2$fileId.start != dat2$fileId.stop), "duration"] <- NA
|
2023-06-26 10:30:07 +02:00
|
|
|
|
2023-09-12 14:31:36 +02:00
|
|
|
# Remove all events that do not have a `date.start`
|
|
|
|
dat2 <- dat2[!is.na(dat2$date.start), ]
|
|
|
|
rownames(dat2) <- NULL
|
2023-09-13 14:20:08 +02:00
|
|
|
# TODO: Throw warning about this
|
2023-08-28 17:29:56 +02:00
|
|
|
|
2023-09-19 15:25:30 +02:00
|
|
|
save(dat2, file = paste("tmp/dat2", ifelse(small, "small_", "full_"),
|
2023-09-20 16:11:05 +02:00
|
|
|
format(now, "%Y-%m-%d_%H-%M-%S"), ".RData"))
|
2023-09-15 16:22:21 +02:00
|
|
|
|
2023-09-13 14:20:08 +02:00
|
|
|
# Add case variable ######################################################
|
2023-09-15 16:22:21 +02:00
|
|
|
cat("########## Adding case and eventId variables... ##########", "\n")
|
2023-09-12 14:31:36 +02:00
|
|
|
dat3 <- add_case(dat2)
|
2023-08-14 16:57:03 +02:00
|
|
|
|
2023-09-13 14:20:08 +02:00
|
|
|
# Add event ID ###########################################################
|
2023-09-12 14:31:36 +02:00
|
|
|
dat3$eventId <- seq_len(nrow(dat3))
|
|
|
|
dat3 <- dat3[, c("fileId.start", "fileId.stop", "eventId", "case",
|
|
|
|
"trace", "glossar", "event", "artwork",
|
|
|
|
"date.start", "date.stop", "timeMs.start",
|
|
|
|
"timeMs.stop", "duration", "topicNumber", "popup",
|
|
|
|
"x.start", "y.start", "x.stop", "y.stop",
|
|
|
|
"distance", "scale.start", "scale.stop",
|
|
|
|
"scaleSize", "rotation.start", "rotation.stop",
|
|
|
|
"rotationDegree")]
|
2023-08-14 16:57:03 +02:00
|
|
|
|
2023-09-19 15:25:30 +02:00
|
|
|
save(dat3, file = paste("tmp/dat3", ifelse(small, "small_", "full_"),
|
2023-09-20 16:11:05 +02:00
|
|
|
format(now, "%Y-%m-%d_%H-%M-%S"), ".RData"))
|
2023-09-15 16:22:21 +02:00
|
|
|
|
2023-09-13 14:20:08 +02:00
|
|
|
# Add trace for move events ##############################################
|
2023-09-19 09:19:50 +02:00
|
|
|
cat("\n########## Adding trace variable for move events... ##########", "\n")
|
2023-09-12 14:31:36 +02:00
|
|
|
dat4 <- add_trace_moves(dat3)
|
2023-09-01 15:01:54 +02:00
|
|
|
|
2023-09-19 15:25:30 +02:00
|
|
|
save(dat4, file = paste("tmp/dat4", ifelse(small, "small_", "full_"),
|
2023-09-20 16:11:05 +02:00
|
|
|
format(now, "%Y-%m-%d_%H-%M-%S"), ".RData"))
|
2023-09-15 16:22:21 +02:00
|
|
|
|
2023-09-13 14:20:08 +02:00
|
|
|
# Add topics: file names and topics ######################################
|
2023-09-15 16:22:21 +02:00
|
|
|
cat("########## Adding information about topics... ##########", "\n")
|
2023-09-12 17:49:35 +02:00
|
|
|
artworks <- unique(dat4$artwork)
|
2023-09-19 09:19:50 +02:00
|
|
|
# remove artworks without XML information
|
|
|
|
artworks <- artworks[!artworks %in% c("504", "505")]
|
2023-09-12 17:49:35 +02:00
|
|
|
topics <- extract_topics(artworks, pattern = paste0(artworks, ".xml"),
|
|
|
|
path = "../data/ContentEyevisit/eyevisit_cards_light/")
|
2023-09-01 15:01:54 +02:00
|
|
|
|
2023-09-12 17:49:35 +02:00
|
|
|
dat5 <- add_topic(dat4, topics = topics)
|
2023-08-18 13:42:18 +02:00
|
|
|
|
2023-09-19 15:25:30 +02:00
|
|
|
save(dat5, file = paste("tmp/dat5", ifelse(small, "small_", "full_"),
|
2023-09-20 16:11:05 +02:00
|
|
|
format(now, "%Y-%m-%d_%H-%M-%S"), ".RData"))
|
2023-09-15 16:22:21 +02:00
|
|
|
|
2023-09-13 14:20:08 +02:00
|
|
|
# TODO: Replace artwork with informative strings
|
|
|
|
|
|
|
|
# Export data ############################################################
|
2023-09-15 16:22:21 +02:00
|
|
|
cat("########## Exporting data frame with event logs... ##########", "\n")
|
2023-09-12 17:49:35 +02:00
|
|
|
write.table(dat5, "../data/event_logfiles.csv", sep = ";",
|
2023-09-01 15:01:54 +02:00
|
|
|
row.names = FALSE)
|
2023-06-26 10:30:07 +02:00
|
|
|
|