mtt_haum/code/02_preprocessing.R

84 lines
3.1 KiB
R

# setwd("C:/Users/nwickelmaier/Nextcloud/Documents/MDS/2023ss/60100_master_thesis/code")
source("functions.R")
# Read data ##############################################################
cat("########## Reading in data... ##########", "\n")
dat0 <- read.table("../data/rawdata_logfiles.csv", sep = ";",
header = TRUE)
dat0$date <- as.POSIXct(dat0$date)
dat0$glossar <- ifelse(dat0$artwork == "glossar", 1, 0)
# Remove irrelevant events
dat <- subset(dat0, !(dat0$event %in% c("Start Application",
"Show Application")))
save(dat, file = "tmp/dat.RData")
# Add trace variable #####################################################
cat("########## Adding trace variable... ##########", "\n")
dat1 <- add_trace(dat)
save(dat1, file = "tmp/dat1.RData")
# Close events
cat("########## Closing events...")
dat2 <- rbind(close_events(dat1, "move"),
close_events(dat1, "flipCard"),
close_events(dat1, "openTopic"),
close_events(dat1, "openPopup"))
dat2 <- dat2[order(dat2$date.start, dat2$fileId.start), ]
# Remove durations when event spans more than one log file, since they are
# not interpretable
dat2[which(dat2$fileId.start != dat2$fileId.stop), "duration"] <- NA
# Remove all events that do not have a `date.start`
dat2 <- dat2[!is.na(dat2$date.start), ]
rownames(dat2) <- NULL
# TODO: Throw warning about this
save(dat2, file = "tmp/dat2.RData")
# Add case variable ######################################################
cat("########## Adding case and eventId variables... ##########", "\n")
dat3 <- add_case(dat2)
# Add event ID ###########################################################
dat3$eventId <- seq_len(nrow(dat3))
dat3 <- dat3[, c("fileId.start", "fileId.stop", "eventId", "case",
"trace", "glossar", "event", "artwork",
"date.start", "date.stop", "timeMs.start",
"timeMs.stop", "duration", "topicNumber", "popup",
"x.start", "y.start", "x.stop", "y.stop",
"distance", "scale.start", "scale.stop",
"scaleSize", "rotation.start", "rotation.stop",
"rotationDegree")]
save(dat3, file = "tmp/dat3.RData")
# Add trace for move events ##############################################
cat("########## Adding trace variable for move events... ##########", "\n")
dat4 <- add_trace_moves(dat3)
save(dat4, file = "tmp/dat4.RData")
# Add topics: file names and topics ######################################
cat("########## Adding information about topics... ##########", "\n")
artworks <- unique(dat4$artwork)
topics <- extract_topics(artworks, pattern = paste0(artworks, ".xml"),
path = "../data/ContentEyevisit/eyevisit_cards_light/")
dat5 <- add_topic(dat4, topics = topics)
save(dat5, file = "tmp/dat5.RData")
# TODO: Replace artwork with informative strings
# Export data ############################################################
cat("########## Exporting data frame with event logs... ##########", "\n")
write.table(dat5, "../data/event_logfiles.csv", sep = ";",
row.names = FALSE)