diff --git a/README.Rmd b/README.Rmd index 31ebdd9..b252ea0 100644 --- a/README.Rmd +++ b/README.Rmd @@ -434,7 +434,7 @@ sudden there were 72 instead of 70 artworks. It seems like these two artworks appear on October 21, 2022. ```{r} -dat0 <- read.table("../data/rawdata_logfiles.csv", sep = ";", header = TRUE) +dat0 <- read.table("data/rawdata_logfiles.csv", sep = ";", header = TRUE) dat0$date <- as.POSIXct(dat0$date) dat0$glossar <- ifelse(dat0$artwork == "glossar", 1, 0) @@ -448,7 +448,7 @@ summary(dat[dat$artwork %in% c("504", "505"), ]) The artworks seem to be have updated in general after October 21, 2022. ```{r} -art_before_oct2022 <- sort(unique(dat[dat$date >= "2022-10-21", "artwork"])) +art_after_oct2022 <- sort(unique(dat[dat$date >= "2022-10-21", "artwork"])) art_before_oct2022 <- sort(unique(dat[dat$date <= "2022-10-21", "artwork"])) # Removed artworks art_before_oct2022[!art_before_oct2022 %in% art_after_oct2022] diff --git a/code/02_preprocessing.R b/code/02_preprocessing.R index 843bd95..ab86df6 100644 --- a/code/02_preprocessing.R +++ b/code/02_preprocessing.R @@ -13,33 +13,38 @@ dat0$glossar <- ifelse(dat0$artwork == "glossar", 1, 0) dat <- subset(dat0, !(dat0$event %in% c("Start Application", "Show Application"))) -save(dat, file = "tmp/dat.RData") +#save(dat, file = "tmp/dat.RData") # Add trace variable ##################################################### cat("########## Adding trace variable... ##########", "\n") dat1 <- add_trace(dat) -save(dat1, file = "tmp/dat1.RData") +#save(dat1, file = "tmp/dat1.RData") # Close events -cat("########## Closing events...") -dat2 <- rbind(close_events(dat1, "move"), - close_events(dat1, "flipCard"), - close_events(dat1, "openTopic"), - close_events(dat1, "openPopup")) +cat("########## Closing events... ##########", "\n") +c1 <- close_events(dat1, "move") +cat("## --> move events closed.", "\n") +c2 <- close_events(dat1, "flipCard") +cat("## --> flipCard events closed.", "\n") +c3 <- close_events(dat1, "openTopic") +cat("## --> openTopic events closed.", "\n") +c4 <- close_events(dat1, "openPopup") +cat("## --> openPopup events closed.", "\n") +dat2 <- rbind(c1, c2, c3, c4) dat2 <- dat2[order(dat2$date.start, dat2$fileId.start), ] # Remove durations when event spans more than one log file, since they are # not interpretable -dat2[which(dat2$fileId.start != dat2$fileId.stop), "duration"] <- NA +#dat2[which(dat2$fileId.start != dat2$fileId.stop), "duration"] <- NA # Remove all events that do not have a `date.start` dat2 <- dat2[!is.na(dat2$date.start), ] rownames(dat2) <- NULL # TODO: Throw warning about this -save(dat2, file = "tmp/dat2.RData") +#save(dat2, file = "tmp/dat2.RData") # Add case variable ###################################################### cat("########## Adding case and eventId variables... ##########", "\n") @@ -56,13 +61,13 @@ dat3 <- dat3[, c("fileId.start", "fileId.stop", "eventId", "case", "scaleSize", "rotation.start", "rotation.stop", "rotationDegree")] -save(dat3, file = "tmp/dat3.RData") +#save(dat3, file = "tmp/dat3.RData") # Add trace for move events ############################################## cat("########## Adding trace variable for move events... ##########", "\n") dat4 <- add_trace_moves(dat3) -save(dat4, file = "tmp/dat4.RData") +#save(dat4, file = "tmp/dat4.RData") # Add topics: file names and topics ###################################### cat("########## Adding information about topics... ##########", "\n") @@ -72,7 +77,7 @@ topics <- extract_topics(artworks, pattern = paste0(artworks, ".xml"), dat5 <- add_topic(dat4, topics = topics) -save(dat5, file = "tmp/dat5.RData") +#save(dat5, file = "tmp/dat5.RData") # TODO: Replace artwork with informative strings diff --git a/code/functions.R b/code/functions.R index df14910..05a32b0 100644 --- a/code/functions.R +++ b/code/functions.R @@ -147,6 +147,7 @@ close_events <- function(data, event = c("move", "flipCard", "openTopic", "openP stop("`event` must be one of 'move', 'flipCard', 'openTopic', 'openPopup'.") } +# TODO: `fileId` should now maybe go back into `idvar` subdata <- subset(data, data$event %in% actions) #subdata <- subdata[order(subdata$artwork, subdata$popup, subdata$date), ] @@ -241,9 +242,9 @@ add_case <- function(data, cutoff = 20) { # TODO: What is the best choice for the cutoff here? data$timediff <- as.numeric(diff(c(data$date.start[1], data$date.start))) - data$case <- NA j <- 1 + pb <- txtProgressBar(min = 0, max = nrow(data), style = 3) for (i in seq_len(nrow(data))) { if (data$timediff[i] <= cutoff) { @@ -252,6 +253,7 @@ add_case <- function(data, cutoff = 20) { j <- j + 1 data$case[i] <- j } + setTxtProgressBar(pb, i) } data$timediff <- NULL data