Working on optimization

This commit is contained in:
Nora Wickelmaier 2023-09-18 09:59:24 +02:00
parent b242eaddf3
commit d28a2497dc
3 changed files with 22 additions and 15 deletions

View File

@ -434,7 +434,7 @@ sudden there were 72 instead of 70 artworks. It seems like these two
artworks appear on October 21, 2022.
```{r}
dat0 <- read.table("../data/rawdata_logfiles.csv", sep = ";", header = TRUE)
dat0 <- read.table("data/rawdata_logfiles.csv", sep = ";", header = TRUE)
dat0$date <- as.POSIXct(dat0$date)
dat0$glossar <- ifelse(dat0$artwork == "glossar", 1, 0)
@ -448,7 +448,7 @@ summary(dat[dat$artwork %in% c("504", "505"), ])
The artworks seem to be have updated in general after October 21, 2022.
```{r}
art_before_oct2022 <- sort(unique(dat[dat$date >= "2022-10-21", "artwork"]))
art_after_oct2022 <- sort(unique(dat[dat$date >= "2022-10-21", "artwork"]))
art_before_oct2022 <- sort(unique(dat[dat$date <= "2022-10-21", "artwork"]))
# Removed artworks
art_before_oct2022[!art_before_oct2022 %in% art_after_oct2022]

View File

@ -13,33 +13,38 @@ dat0$glossar <- ifelse(dat0$artwork == "glossar", 1, 0)
dat <- subset(dat0, !(dat0$event %in% c("Start Application",
"Show Application")))
save(dat, file = "tmp/dat.RData")
#save(dat, file = "tmp/dat.RData")
# Add trace variable #####################################################
cat("########## Adding trace variable... ##########", "\n")
dat1 <- add_trace(dat)
save(dat1, file = "tmp/dat1.RData")
#save(dat1, file = "tmp/dat1.RData")
# Close events
cat("########## Closing events...")
dat2 <- rbind(close_events(dat1, "move"),
close_events(dat1, "flipCard"),
close_events(dat1, "openTopic"),
close_events(dat1, "openPopup"))
cat("########## Closing events... ##########", "\n")
c1 <- close_events(dat1, "move")
cat("## --> move events closed.", "\n")
c2 <- close_events(dat1, "flipCard")
cat("## --> flipCard events closed.", "\n")
c3 <- close_events(dat1, "openTopic")
cat("## --> openTopic events closed.", "\n")
c4 <- close_events(dat1, "openPopup")
cat("## --> openPopup events closed.", "\n")
dat2 <- rbind(c1, c2, c3, c4)
dat2 <- dat2[order(dat2$date.start, dat2$fileId.start), ]
# Remove durations when event spans more than one log file, since they are
# not interpretable
dat2[which(dat2$fileId.start != dat2$fileId.stop), "duration"] <- NA
#dat2[which(dat2$fileId.start != dat2$fileId.stop), "duration"] <- NA
# Remove all events that do not have a `date.start`
dat2 <- dat2[!is.na(dat2$date.start), ]
rownames(dat2) <- NULL
# TODO: Throw warning about this
save(dat2, file = "tmp/dat2.RData")
#save(dat2, file = "tmp/dat2.RData")
# Add case variable ######################################################
cat("########## Adding case and eventId variables... ##########", "\n")
@ -56,13 +61,13 @@ dat3 <- dat3[, c("fileId.start", "fileId.stop", "eventId", "case",
"scaleSize", "rotation.start", "rotation.stop",
"rotationDegree")]
save(dat3, file = "tmp/dat3.RData")
#save(dat3, file = "tmp/dat3.RData")
# Add trace for move events ##############################################
cat("########## Adding trace variable for move events... ##########", "\n")
dat4 <- add_trace_moves(dat3)
save(dat4, file = "tmp/dat4.RData")
#save(dat4, file = "tmp/dat4.RData")
# Add topics: file names and topics ######################################
cat("########## Adding information about topics... ##########", "\n")
@ -72,7 +77,7 @@ topics <- extract_topics(artworks, pattern = paste0(artworks, ".xml"),
dat5 <- add_topic(dat4, topics = topics)
save(dat5, file = "tmp/dat5.RData")
#save(dat5, file = "tmp/dat5.RData")
# TODO: Replace artwork with informative strings

View File

@ -147,6 +147,7 @@ close_events <- function(data, event = c("move", "flipCard", "openTopic", "openP
stop("`event` must be one of 'move', 'flipCard', 'openTopic',
'openPopup'.")
}
# TODO: `fileId` should now maybe go back into `idvar`
subdata <- subset(data, data$event %in% actions)
#subdata <- subdata[order(subdata$artwork, subdata$popup, subdata$date), ]
@ -241,9 +242,9 @@ add_case <- function(data, cutoff = 20) {
# TODO: What is the best choice for the cutoff here?
data$timediff <- as.numeric(diff(c(data$date.start[1], data$date.start)))
data$case <- NA
j <- 1
pb <- txtProgressBar(min = 0, max = nrow(data), style = 3)
for (i in seq_len(nrow(data))) {
if (data$timediff[i] <= cutoff) {
@ -252,6 +253,7 @@ add_case <- function(data, cutoff = 20) {
j <- j + 1
data$case[i] <- j
}
setTxtProgressBar(pb, i)
}
data$timediff <- NULL
data