From bfc5c1d93065edecb9e364796ebabd906499c155 Mon Sep 17 00:00:00 2001 From: nwickel Date: Fri, 1 Sep 2023 15:01:54 +0200 Subject: [PATCH] Worked on extracting topics for cards --- README.md | 4 +++ code/02_preprocessing.R | 57 ++++++++++--------------------- code/03_topic-cards.R | 42 +++++++++++++++++++++++ code/questions_number-of-cards.R | 58 ++++++++++++++++++++++++++++++++ 4 files changed, 121 insertions(+), 40 deletions(-) create mode 100644 code/03_topic-cards.R create mode 100644 code/questions_number-of-cards.R diff --git a/README.md b/README.md index 28bdeab..7e78064 100644 --- a/README.md +++ b/README.md @@ -242,6 +242,10 @@ Will probably just get rid of them! Think about if you want give warning messages about these deletions in the functions. +## Card indices go from 0 to 7 (instead of 0 to 5 as expected) + +See `questions_number-of-cards.R` for details. + # Reading list * @Arizmendi2022 [$-$] diff --git a/code/02_preprocessing.R b/code/02_preprocessing.R index 54b37ec..6127d13 100644 --- a/code/02_preprocessing.R +++ b/code/02_preprocessing.R @@ -53,12 +53,12 @@ num_stop <- c(diff(c(0, which(dat1$event == "Transform start")))) table(num_stop) # TODO: Do I still need this? -dat1$eventrep <- rep(num_start, num_start) -dat1$dupl <- duplicated(dat1[, c("event", "eventid")]) # keep first -dat1$dupl <- duplicated(dat1[, c("event", "eventid")], fromLast = TRUE) # keep last -dat1[dat1$eventrep == 10, ] -dat1$dupl <- NULL -dat1$eventrep <- NULL +# dat1$eventrep <- rep(num_start, num_start) +# dat1$dupl <- duplicated(dat1[, c("event", "eventid")]) # keep first +# dat1$dupl <- duplicated(dat1[, c("event", "eventid")], fromLast = TRUE) # keep last +# dat1[dat1$eventrep == 10, ] +# dat1$dupl <- NULL +# dat1$eventrep <- NULL # remove duplicated "Transform start" events @@ -89,7 +89,7 @@ trans_wide <- reshape(dat1, direction = "wide", # check how often an eventid is associated with two fileids nrow(subset(trans_wide, trans_wide$fileid.start != trans_wide$fileid.stop)) -# exclude from data set ?? +# TODO: exclude from data set ?? # trans_wide <- subset(trans_wide, trans_wide$fileid.start != trans_wide$fileid.stop) # which(is.na(trans_wide$date.start)) @@ -167,7 +167,7 @@ tail(dat2[, c("artwork", "event", "trace")], 50) rm(aws, i, j, last_event, art) -#' ## Fix glossar entries (find corresponding artworks) +#' ## Fix glossar entries (find corresponding artworks and fill in trace) glossar_files <- unique(dat2[dat2$artwork == "glossar", "popup"]) @@ -256,9 +256,9 @@ dat2[14110:14130, ] # TODO: Integrate for-loop into for-loop above -# TODO: For now: Exclude not matched glossar entries df <- subset(dat2, !is.na(dat2$trace)) +# TODO: For now: Exclude not matched glossar entries df <- df[order(df$trace), ] rownames(df) <- NULL @@ -279,20 +279,10 @@ flipCard_wide$event <- "flipCard" flipCard_wide$duration <- flipCard_wide$time_ms.stop - flipCard_wide$time_ms.start -# TODO: Check if I still need to enter all of these variables -# --> x, y, scale, rotation? flipCard_wide$card <- NA flipCard_wide$popup <- NA -flipCard_wide$x.start <- NA -flipCard_wide$x.stop <- NA -flipCard_wide$y.start <- NA -flipCard_wide$y.stop <- NA flipCard_wide$distance <- NA -flipCard_wide$scale.start <- NA -flipCard_wide$scale.stop <- NA flipCard_wide$scaleSize <- NA -flipCard_wide$rotation.start <- NA -flipCard_wide$rotation.stop <- NA flipCard_wide$rotationDegree <- NA dat_flipCard <- flipCard_wide[, c("fileid.start", "fileid.stop", "event", @@ -325,18 +315,9 @@ openTopic_wide$event <- "openTopic" openTopic_wide$duration <- openTopic_wide$time_ms.stop - openTopic_wide$time_ms.start - openTopic_wide$popup <- NA -openTopic_wide$x.start <- NA -openTopic_wide$x.stop <- NA -openTopic_wide$y.start <- NA -openTopic_wide$y.stop <- NA openTopic_wide$distance <- NA -openTopic_wide$scale.start <- NA -openTopic_wide$scale.stop <- NA openTopic_wide$scaleSize <- NA -openTopic_wide$rotation.start <- NA -openTopic_wide$rotation.stop <- NA openTopic_wide$rotationDegree <- NA dat_openTopic <- openTopic_wide[, c("fileid.start", "fileid.stop", "event", @@ -375,22 +356,13 @@ openPopup_wide <- reshape(dat5, direction = "wide", # df[df$trace == 4595, ] # --> artwork 046 popup selene.xml gets opened twice - openPopup_wide$event <- "openPopup" openPopup_wide$duration <- openPopup_wide$time_ms.stop - openPopup_wide$time_ms.start openPopup_wide$card <- NA -openPopup_wide$x.start <- NA -openPopup_wide$x.stop <- NA -openPopup_wide$y.start <- NA -openPopup_wide$y.stop <- NA openPopup_wide$distance <- NA -openPopup_wide$scale.start <- NA -openPopup_wide$scale.stop <- NA openPopup_wide$scaleSize <- NA -openPopup_wide$rotation.start <- NA -openPopup_wide$rotation.stop <- NA openPopup_wide$rotationDegree <- NA dat_openPopup <- openPopup_wide[, c("fileid.start", "fileid.stop", "event", @@ -529,12 +501,17 @@ out <- out[order(out$date.start), ] rownames(out) <- NULL # Make `trace` a consecutive number -out$trace2 <- as.numeric(factor(out$trace, levels = unique(out$trace))) +out$trace <- as.numeric(factor(out$trace, levels = unique(out$trace))) + +#' # Fill in topics + +topics <- read.table("../data/topics.csv", sep = ";", header = TRUE) +# TODO: #' # Export data -write.table(out, "../data/event_logfiles.csv", - sep = ";", quote = FALSE, row.names = FALSE) +write.table(out, "../data/event_logfiles.csv", sep = ";", + row.names = FALSE) # TODO: Write function for closing events diff --git a/code/03_topic-cards.R b/code/03_topic-cards.R new file mode 100644 index 0000000..3227967 --- /dev/null +++ b/code/03_topic-cards.R @@ -0,0 +1,42 @@ +# setwd("C:/Users/nwickelmaier/Nextcloud/Documents/MDS/2023ss/60100_master_thesis/data/ContentEyevisit/eyevisit_cards_light") +rm(list=ls()) + +dat0 <- read.table("../../event_logfiles.csv", sep = ";", header = TRUE) +dat0$artwork <- sprintf("%03d", dat0$artwork) + +# artwork names +artworks <- sort(unique(dat0$artwork)) + +# create data frame with file names and topics for each artwork + +dat <- NULL +file_order <- NULL + +for (artwork in artworks) { + fnames <- dir(pattern = paste0(artwork, "_"), path = artwork, full.names = TRUE) + topic <- NULL + for (fname in fnames) { + topic <- c(topic, gsub("^$", "\\1", + grep("^$", "\\1", + grep("^= 8) + +#' It might be possible, that the number indicates the index of the file +#' and not the actual card that was displayed. BUT: In many cases, there +#' are only 6 (or less) files, but a higher index is present... + +subset(cards, cards$diff < 0) +