From 25a2fadf151fbed2ea938ca820ed70941117926d Mon Sep 17 00:00:00 2001 From: nwickel Date: Tue, 26 Sep 2023 18:39:02 +0200 Subject: [PATCH] Started working on second presentation; created plots for it --- README.Rmd | 16 +++++-- code/plots_8o8m.R | 112 +++++++++++++++++++++++++++++++++++++++++++ code/visualization.R | 90 ++++++++++++++++++++++++---------- 3 files changed, 187 insertions(+), 31 deletions(-) create mode 100644 code/plots_8o8m.R diff --git a/README.Rmd b/README.Rmd index e730198..0560b2d 100644 --- a/README.Rmd +++ b/README.Rmd @@ -134,9 +134,9 @@ log file and completes in another one. The variable `timeMs` seems to be continuous within one log file but not over several log files. -```{r} +```{r, results = FALSE, fig.show = TRUE} # Read data -dat0 <- read.table("data/haum/rawdata_logfiles_small.csv", sep = ";", +dat0 <- read.table("data/haum/raw_logfiles_small_2023-09-26_13-50-20.csv", sep = ";", header = TRUE) dat0$date <- as.POSIXct(dat0$date) dat0$glossar <- ifelse(dat0$artwork == "glossar", 1, 0) @@ -146,7 +146,12 @@ dat <- subset(dat0, !(dat0$event %in% c("Start Application", "Show Application"))) # Add trace variable -dat1 <- add_trace(dat, glossar_dict = "data/haum/glossar_dict.RData") +artworks <- unique(stats::na.omit(dat$artwork)) +artworks <- artworks[artworks != "glossar"] +glossar_files <- unique(subset(dat, dat$artwork == "glossar")$popup) +glossar_dict <- create_glossardict(artworks, glossar_files, + xmlpath = "data/haum/ContentEyevisit/eyevisit_cards_light/") +dat1 <- add_trace(dat, glossar_dict) # Close events dat2 <- rbind(close_events(dat1, "move"), @@ -408,7 +413,7 @@ when I get them from `.html`. At first glance, it looks like using ```{r} artworks <- unique(dat2$artwork) path <- "data/haum/ContentEyevisit/eyevisit_cards_light/" -topics <- extract_topics(artworks, "index.xml", path) +topics <- extract_topics(artworks, rep("index.xml", length(artworks)), path) topics2 <- extract_topics(artworks, paste0(artworks, ".xml"), path) topics[!topics$file_name %in% topics2$file_name, ] @@ -427,7 +432,8 @@ sudden there were 72 instead of 70 artworks. It seems like these two artworks appear on October 21, 2022. ```{r} -dat0 <- read.table("data/haum/rawdata_logfiles.csv", sep = ";", header = TRUE) +dat0 <- read.table("data/haum/raw_logfiles_2023-09-23_01-31-30.csv", + sep = ";", header = TRUE) dat0$date <- as.POSIXct(dat0$date) dat0$glossar <- ifelse(dat0$artwork == "glossar", 1, 0) diff --git a/code/plots_8o8m.R b/code/plots_8o8m.R new file mode 100644 index 0000000..05a6e7c --- /dev/null +++ b/code/plots_8o8m.R @@ -0,0 +1,112 @@ +# setwd("C:/Users/nwickelmaier/Nextcloud/Documents/MDS/2023ss/60100_master_thesis/code/") + +library(lattice) +devtools::load_all("../../../../software/mtt") +library(ggplot2) + +# Read data +datlogs <- read.table("../data/8o8m/event_logfiles_2023-09-22_18-54-49.csv", + sep = ";", header = TRUE) +datlogs$date <- as.Date(datlogs$date.start) +datlogs$date.start <- as.POSIXct(datlogs$date.start) +datlogs$date.stop <- as.POSIXct(datlogs$date.stop) +datlogs$artwork <- sprintf("%02d", datlogs$artwork) + +### Which artwork gets touched most often? + +counts_artwork <- table(datlogs$artwork) + +artworks <- unique(datlogs$artwork) +datart <- extract_artworks(artworks, + paste0(artworks, "_de.xml"), + "../data/8o8m/Content8o8m/") +datart <- datart[order(datart$artwork), ] +names(counts_artwork) <- datart$title + +#pdf("../figures/counts_artwork_8o8m.pdf", width = 20, height = 10, pointsize = 10) +#par(mai = c(3, .6, .1, .1)) +tmp <- barplot(counts_artwork, las = 2, ylim = c(0, 90000), border = "white") +text(tmp, counts_artwork + 1500, c(datart$artwork)) +#dev.off() + +# more interesting per museum in this case... +counts_artwork <- aggregate(trace ~ artwork + folder, datlogs, length) +pdf("../figures/counts_artwork_8o8m.pdf", width = 20, height = 6, pointsize = 10) +barchart(trace ~ artwork | folder, counts_artwork, ylab = "", layout = c(5, 1)) +dev.off() + + +### Dwell times/duration +pdf("../figures/duration_8o8m.pdf", width = 5, height = 5, pointsize = 10) +bwplot(I(duration/1000/60) ~ as.factor(event), datlogs, ylab = "Duration in sec") +dev.off() + +### Are there certain areas of the table that are touched most often? + +# heatmap + +dattrim <- datlogs[datlogs$x.start < 3840 & + datlogs$x.start > 0 & + datlogs$y.start < 2160 & + datlogs$y.start > 0 & + datlogs$x.stop < 3840 & + datlogs$x.stop > 0 & + datlogs$y.stop < 2160 & + datlogs$y.stop > 0, ] + +cuts <- 100 # 200, 100, 70, ... + +# start +dattrim$x.start.cat <- cut(dattrim$x.start, cuts) +dattrim$y.start.cat <- cut(dattrim$y.start, cuts) + +tab.start <- xtabs( ~ x.start.cat + y.start.cat, dattrim) +colnames(tab.start) <- NULL +rownames(tab.start) <- NULL + +pdf("../figures/heatmap_start_8o8m.pdf", width = 5, height = 5, pointsize = 10) +heatmap(tab.start, Rowv = NA, Colv = NA) +dev.off() + +# stop +dattrim$x.stop.cat <- cut(dattrim$x.stop, cuts) +dattrim$y.stop.cat <- cut(dattrim$y.stop, cuts) +tab.stop <- xtabs( ~ x.stop.cat + y.stop.cat, dattrim) +colnames(tab.stop) <- NULL +rownames(tab.stop) <- NULL + +pdf("../figures/heatmap_stop_8o8m.pdf", width = 5, height = 5, pointsize = 10) +heatmap(tab.stop, Rowv = NA, Colv = NA) +dev.off() + +### How many visitors per day + +# Cases per day +datcase <- aggregate(case ~ date, datlogs, function(x) length(unique(x))) + +pdf("../figures/cases_per_day_8o8m.pdf", width = 9, height = 5, pointsize = 10) +par(mai = c(.6, .6, .1, .1), mgp = c(2.4, 1, 0)) +plot(case ~ date, datcase, type = "h", col = "#0072B2", lwd = 2) +dev.off() + +### Other stuff + +## weird behavior of timeMs + +pdf("../figures/timeMs_8o8m.pdf", width = 9, height = 6, pointsize = 10) +bwplot(timeMs.start ~ as.factor(fileId), datlogs[1:2000,], xlab = "", + scales = list(x = list(rot = 90), y = list(rot = 90))) +dev.off() + +## x,y-coordinates out of range + +set.seed(1522) + +pdf("../figures/xycoord_8o8m.pdf", width = 5, height = 5, pointsize = 10) +par(mai = c(.6, .6, .1, .1), mgp = c(2.4, 1, 0)) +plot(y.start ~ x.start, datlogs[sample(nrow(datlogs), 10000), ]) +abline(v = c(0, 3840), h = c(0, 2160), col = "#0072B2", lwd = 2) +legend("bottomleft", "Random sample of 10,000", bg = "white") +legend("topleft", "4K-Display: 3840 x 2160", bg = "white") +dev.off() + diff --git a/code/visualization.R b/code/visualization.R index 7d46cde..b2cf07e 100644 --- a/code/visualization.R +++ b/code/visualization.R @@ -2,6 +2,10 @@ library(lattice) +# Achims colors (used by lattice) +cc <- palette.colors(palette = "Okabe-Ito") +plot(1:10, col = cc, pch = 16, cex = 2) + # Read data datlogs <- read.table("../data/haum/event_logfiles_metadata_2023-09-23_01-31-30.csv", sep = ";", header = TRUE) @@ -10,7 +14,7 @@ datlogs$date.start <- as.POSIXct(datlogs$date.start) datlogs$date.stop <- as.POSIXct(datlogs$date.stop) datlogs$artwork <- sprintf("%03d", datlogs$artwork) -### Which artwork gets touched most often/first? +### Which artwork gets touched most often? counts_artwork <- table(datlogs$artwork) barchart(counts_artwork) @@ -25,7 +29,7 @@ names(counts_artwork) <- datart$title pdf("../figures/counts_artwork.pdf", width = 20, height = 10, pointsize = 10) par(mai = c(5, .6, .1, .1)) -mtp <- barplot(counts_artwork, las = 2, ylim = c(0, 60000), border = "white") +tmp <- barplot(counts_artwork, las = 2, ylim = c(0, 60000), border = "white") text(tmp, counts_artwork + 1000, c(datart$artwork, "504", "505")) dev.off() @@ -55,8 +59,10 @@ xyplot(ds ~ as.factor(event), datagg, groups = artwork) # without aggregation bwplot(duration ~ as.factor(event), datlogs) # in min -bwplot(I(duration/1000/60) ~ as.factor(event), datlogs) +pdf("../figures/duration.pdf", width = 5, height = 5, pointsize = 10) +bwplot(I(duration/1000/60) ~ as.factor(event), datlogs, ylab = "Duration in sec") +dev.off() datlogs$daydiff <- c(NA, diff(datlogs$date)) plot(daydiff ~ date, datlogs, type = "b") @@ -93,33 +99,40 @@ dattrim <- datlogs[datlogs$x.start < 3840 & datlogs$y.stop < 2160 & datlogs$y.stop > 0, ] -cuts <- 200 # 100, 70, ... +cuts <- 100 # 200, 100, 70, ... # start dattrim$x.start.cat <- cut(dattrim$x.start, cuts) dattrim$y.start.cat <- cut(dattrim$y.start, cuts) tab.start <- xtabs( ~ x.start.cat + y.start.cat, dattrim) -colnames(tab.start) <- paste0("c", 1:cuts) -rownames(tab.start) <- paste0("c", 1:cuts) +colnames(tab.start) <- NULL +rownames(tab.start) <- NULL +pdf("../figures/heatmap_start.pdf", width = 5, height = 5, pointsize = 10) heatmap(tab.start, Rowv = NA, Colv = NA) -my_colors <- colorRampPalette(c("gray40", "orange")) +dev.off() + + +my_colors <- colorRampPalette(c("#009E73", "#E69F00")) heatmap(tab.start, Rowv = NA, Colv = NA, col = my_colors(1000)) ggplot(as.data.frame(tab.start)) + geom_tile(aes(x = x.start.cat, y = y.start.cat, fill = Freq)) + - scale_fill_gradient(low = "gray40", high = "orange") + scale_fill_gradient(low = "#009E73", high = "#E69F00") # stop dattrim$x.stop.cat <- cut(dattrim$x.stop, cuts) dattrim$y.stop.cat <- cut(dattrim$y.stop, cuts) tab.stop <- xtabs( ~ x.stop.cat + y.stop.cat, dattrim) -colnames(tab.stop) <- paste0("c", 1:cuts) -rownames(tab.stop) <- paste0("c", 1:cuts) +colnames(tab.stop) <- NULL +rownames(tab.stop) <- NULL +pdf("../figures/heatmap_stop.pdf", width = 5, height = 5, pointsize = 10) heatmap(tab.stop, Rowv = NA, Colv = NA) -heatmap(tab.stop, Rowv = NA, Colv = NA, col = my_colors(1000)) +dev.off() + +heatmap(tab.stop, Rowv = NA, Colv = NA, col = my_colors(10)) ### How many visitors per day @@ -135,39 +148,64 @@ plot(datcase, type = "h") dattrace <- aggregate(trace ~ date, datlogs, function(x) length(unique(x))) plot(dattrace, type = "h") +plot(trace ~ date, dattrace, type = "h", col = "#0072B2") +points(case ~ date, datcase, type = "h") -# function dependencies of mtt +pdf("../figures/cases_per_day.pdf", width = 9, height = 5, pointsize = 10) +par(mai = c(.6, .6, .1, .1), mgp = c(2.4, 1, 0)) +plot(case ~ date, datcase, type = "h", col = "#0072B2") +abline(v = datcase$date[c(1027, 1039)], col = "#D55E00", lty = 2) +text(datcase$date[1037]-250, 150, paste0("Corona gap from ", + datcase$date[1027], " to ", + datcase$date[1039]), col = "#D55E00") +dev.off() + + +### Other stuff + +## function dependencies of mtt devtools::load_all("../../../../software/mtt") #library(mtt) library(mvbutils) foodweb(where = "package:mtt") +pdf("../figures/fun_depend_mtt.pdf", width = 8, height = 4, pointsize = 10) foodweb(where = "package:mtt", prune = c("parse_logfiles", "create_eventlogs", "extract_artworks", "extract_topics", "add_topic"), expand.ybox = 1.8, #cex = .6, - boxcolor = "gray", lwd = 2) + border = TRUE, + #boxcolor = "gray", + lwd = 2, mai = c(0, 0, 0, 0)) +dev.off() +## weird behavior of timeMs +pdf("../figures/timeMs.pdf", width = 9, height = 6, pointsize = 10) +#par(mai = c(.6, .6, .1, .1), mgp = c(2.4, 1, 0)) +#plot(timeMs.start ~ as.factor(fileId), datlogs[1:2000,], xlab = "fileId") +bwplot(timeMs.start ~ as.factor(fileId), datlogs[1:2000,], xlab = "", + scales = list(x = list(rot = 90), y = list(rot = 90))) +dev.off() +## x,y-coordinates out of range +set.seed(1522) +pdf("../figures/xycoord.pdf", width = 5, height = 5, pointsize = 10) +par(mai = c(.6, .6, .1, .1), mgp = c(2.4, 1, 0)) +#par(mfrow = c(1, 2)) +plot(y.start ~ x.start, datlogs[sample(nrow(datlogs), 10000), ]) +abline(v = c(0, 3840), h = c(0, 2160), col = "#0072B2", lwd = 2) +#plot(y.stop ~ x.stop, datlogs) +#abline(v = c(0, 3840), h = c(0, 2160), col = "#0072B2", lwd = 2) +legend("bottomleft", "Random sample of 10,000", bg = "white") +legend("topleft", "4K-Display: 3840 x 2160", bg = "white") +dev.off() - - - - - - - - - - - - -### Other stuff +## moves dat001 <- datlogs[which(datlogs$artwork == "001"), ]