Working on optimization

This commit is contained in:
Nora Wickelmaier 2023-09-18 09:59:24 +02:00
parent b242eaddf3
commit d28a2497dc
3 changed files with 22 additions and 15 deletions

View File

@ -434,7 +434,7 @@ sudden there were 72 instead of 70 artworks. It seems like these two
artworks appear on October 21, 2022. artworks appear on October 21, 2022.
```{r} ```{r}
dat0 <- read.table("../data/rawdata_logfiles.csv", sep = ";", header = TRUE) dat0 <- read.table("data/rawdata_logfiles.csv", sep = ";", header = TRUE)
dat0$date <- as.POSIXct(dat0$date) dat0$date <- as.POSIXct(dat0$date)
dat0$glossar <- ifelse(dat0$artwork == "glossar", 1, 0) dat0$glossar <- ifelse(dat0$artwork == "glossar", 1, 0)
@ -448,7 +448,7 @@ summary(dat[dat$artwork %in% c("504", "505"), ])
The artworks seem to be have updated in general after October 21, 2022. The artworks seem to be have updated in general after October 21, 2022.
```{r} ```{r}
art_before_oct2022 <- sort(unique(dat[dat$date >= "2022-10-21", "artwork"])) art_after_oct2022 <- sort(unique(dat[dat$date >= "2022-10-21", "artwork"]))
art_before_oct2022 <- sort(unique(dat[dat$date <= "2022-10-21", "artwork"])) art_before_oct2022 <- sort(unique(dat[dat$date <= "2022-10-21", "artwork"]))
# Removed artworks # Removed artworks
art_before_oct2022[!art_before_oct2022 %in% art_after_oct2022] art_before_oct2022[!art_before_oct2022 %in% art_after_oct2022]

View File

@ -13,33 +13,38 @@ dat0$glossar <- ifelse(dat0$artwork == "glossar", 1, 0)
dat <- subset(dat0, !(dat0$event %in% c("Start Application", dat <- subset(dat0, !(dat0$event %in% c("Start Application",
"Show Application"))) "Show Application")))
save(dat, file = "tmp/dat.RData") #save(dat, file = "tmp/dat.RData")
# Add trace variable ##################################################### # Add trace variable #####################################################
cat("########## Adding trace variable... ##########", "\n") cat("########## Adding trace variable... ##########", "\n")
dat1 <- add_trace(dat) dat1 <- add_trace(dat)
save(dat1, file = "tmp/dat1.RData") #save(dat1, file = "tmp/dat1.RData")
# Close events # Close events
cat("########## Closing events...") cat("########## Closing events... ##########", "\n")
dat2 <- rbind(close_events(dat1, "move"), c1 <- close_events(dat1, "move")
close_events(dat1, "flipCard"), cat("## --> move events closed.", "\n")
close_events(dat1, "openTopic"), c2 <- close_events(dat1, "flipCard")
close_events(dat1, "openPopup")) cat("## --> flipCard events closed.", "\n")
c3 <- close_events(dat1, "openTopic")
cat("## --> openTopic events closed.", "\n")
c4 <- close_events(dat1, "openPopup")
cat("## --> openPopup events closed.", "\n")
dat2 <- rbind(c1, c2, c3, c4)
dat2 <- dat2[order(dat2$date.start, dat2$fileId.start), ] dat2 <- dat2[order(dat2$date.start, dat2$fileId.start), ]
# Remove durations when event spans more than one log file, since they are # Remove durations when event spans more than one log file, since they are
# not interpretable # not interpretable
dat2[which(dat2$fileId.start != dat2$fileId.stop), "duration"] <- NA #dat2[which(dat2$fileId.start != dat2$fileId.stop), "duration"] <- NA
# Remove all events that do not have a `date.start` # Remove all events that do not have a `date.start`
dat2 <- dat2[!is.na(dat2$date.start), ] dat2 <- dat2[!is.na(dat2$date.start), ]
rownames(dat2) <- NULL rownames(dat2) <- NULL
# TODO: Throw warning about this # TODO: Throw warning about this
save(dat2, file = "tmp/dat2.RData") #save(dat2, file = "tmp/dat2.RData")
# Add case variable ###################################################### # Add case variable ######################################################
cat("########## Adding case and eventId variables... ##########", "\n") cat("########## Adding case and eventId variables... ##########", "\n")
@ -56,13 +61,13 @@ dat3 <- dat3[, c("fileId.start", "fileId.stop", "eventId", "case",
"scaleSize", "rotation.start", "rotation.stop", "scaleSize", "rotation.start", "rotation.stop",
"rotationDegree")] "rotationDegree")]
save(dat3, file = "tmp/dat3.RData") #save(dat3, file = "tmp/dat3.RData")
# Add trace for move events ############################################## # Add trace for move events ##############################################
cat("########## Adding trace variable for move events... ##########", "\n") cat("########## Adding trace variable for move events... ##########", "\n")
dat4 <- add_trace_moves(dat3) dat4 <- add_trace_moves(dat3)
save(dat4, file = "tmp/dat4.RData") #save(dat4, file = "tmp/dat4.RData")
# Add topics: file names and topics ###################################### # Add topics: file names and topics ######################################
cat("########## Adding information about topics... ##########", "\n") cat("########## Adding information about topics... ##########", "\n")
@ -72,7 +77,7 @@ topics <- extract_topics(artworks, pattern = paste0(artworks, ".xml"),
dat5 <- add_topic(dat4, topics = topics) dat5 <- add_topic(dat4, topics = topics)
save(dat5, file = "tmp/dat5.RData") #save(dat5, file = "tmp/dat5.RData")
# TODO: Replace artwork with informative strings # TODO: Replace artwork with informative strings

View File

@ -147,6 +147,7 @@ close_events <- function(data, event = c("move", "flipCard", "openTopic", "openP
stop("`event` must be one of 'move', 'flipCard', 'openTopic', stop("`event` must be one of 'move', 'flipCard', 'openTopic',
'openPopup'.") 'openPopup'.")
} }
# TODO: `fileId` should now maybe go back into `idvar`
subdata <- subset(data, data$event %in% actions) subdata <- subset(data, data$event %in% actions)
#subdata <- subdata[order(subdata$artwork, subdata$popup, subdata$date), ] #subdata <- subdata[order(subdata$artwork, subdata$popup, subdata$date), ]
@ -241,9 +242,9 @@ add_case <- function(data, cutoff = 20) {
# TODO: What is the best choice for the cutoff here? # TODO: What is the best choice for the cutoff here?
data$timediff <- as.numeric(diff(c(data$date.start[1], data$date.start))) data$timediff <- as.numeric(diff(c(data$date.start[1], data$date.start)))
data$case <- NA data$case <- NA
j <- 1 j <- 1
pb <- txtProgressBar(min = 0, max = nrow(data), style = 3)
for (i in seq_len(nrow(data))) { for (i in seq_len(nrow(data))) {
if (data$timediff[i] <= cutoff) { if (data$timediff[i] <= cutoff) {
@ -252,6 +253,7 @@ add_case <- function(data, cutoff = 20) {
j <- j + 1 j <- j + 1
data$case[i] <- j data$case[i] <- j
} }
setTxtProgressBar(pb, i)
} }
data$timediff <- NULL data$timediff <- NULL
data data