From 1a5c016018b4d93682d1a8fb8dc91b6a7b3a80ad Mon Sep 17 00:00:00 2001 From: nwickel Date: Tue, 9 Jan 2024 17:45:16 +0100 Subject: [PATCH] Adjusted descriptives to new data set --- ...03_plots_haum.R => 03_haum_descriptives.R} | 149 +++++++++++------- 1 file changed, 88 insertions(+), 61 deletions(-) rename code/{03_plots_haum.R => 03_haum_descriptives.R} (60%) diff --git a/code/03_plots_haum.R b/code/03_haum_descriptives.R similarity index 60% rename from code/03_plots_haum.R rename to code/03_haum_descriptives.R index 58fa3c1..218c090 100644 --- a/code/03_plots_haum.R +++ b/code/03_haum_descriptives.R @@ -1,107 +1,130 @@ # setwd("C:/Users/nwickelmaier/Nextcloud/Documents/MDS/2023ss/60100_master_thesis/code/") library(lattice) +#library(mtt) +devtools::load_all("../../../../software/mtt") # Achims colors (used by lattice) #cc <- palette.colors(palette = "Okabe-Ito") #plot(1:10, col = cc, pch = 16, cex = 2) # Read data -datlogs <- read.table("results/haum/event_logfiles_metadata_2023-09-23_01-31-30.csv", - sep = ";", header = TRUE) -datlogs$date <- as.Date(datlogs$date) -datlogs$date.start <- as.POSIXct(datlogs$date.start) -datlogs$date.stop <- as.POSIXct(datlogs$date.stop) -datlogs$artwork <- sprintf("%03d", datlogs$artwork) -### Which artwork gets touched most often? +datlogs <- read.table("results/haum/event_logfiles_2024-01-02_19-44-50.csv", + colClasses = c("character", "character", "POSIXct", + "POSIXct", "character", "integer", + "numeric", "character", "character", + rep("numeric", 3), "character", + "character", rep("numeric", 11), + "character", "character"), + sep = ";", header = TRUE) -counts_artwork <- table(datlogs$artwork) -barchart(counts_artwork) +datlogs$event <- factor(datlogs$event, levels = c("move", "flipCard", + "openTopic", + "openPopup")) -artworks <- unique(datlogs$artwork) -artworks <- artworks[!artworks %in% c("504", "505")] -datart <- extract_artworks(artworks, - paste0(artworks, ".xml"), +### Number of log files +length(unique(datlogs$fileId.start)) +length(unique(datlogs$fileId.stop)) + +length(unique(c(datlogs$fileId.start, datlogs$fileId.stop))) +# 22803 + +### Number of activities +nrow(datlogs) +table(datlogs$event) +proportions(table(datlogs$event)) +proportions(table(datlogs$event[datlogs$event != "move"])) + +### Time range +range(as.Date(datlogs$date.start)) + +### Topics per item +print(xtabs( ~ item + topic, datlogs), zero = "-") +lattice::dotplot(xtabs( ~ item + topic, datlogs), auto.key = TRUE) + +### Which item gets touched most often? + +counts_item <- table(datlogs$item) +barchart(counts_item) + +items <- unique(datlogs$item) +#items <- items[!items %in% c("504", "505")] +datart <- extract_artworks(items, + paste0(items, ".xml"), "../data/haum/ContentEyevisit/eyevisit_cards_light/") datart <- datart[order(datart$artwork), ] -names(counts_artwork) <- datart$title +names(counts_item) <- datart$title -pdf("../figures/counts_artwork.pdf", width = 20, height = 10, pointsize = 10) +pdf("../figures/counts_item.pdf", width = 20, height = 10, pointsize = 10) par(mai = c(5, .6, .1, .1)) -tmp <- barplot(counts_artwork, las = 2, ylim = c(0, 60000), - border = "white", col = "#0072B2") -text(tmp, counts_artwork + 1000, c(datart$artwork, "504", "505")) +tmp <- barplot(counts_item, las = 2, ylim = c(0, 60000), + border = "white", col = "#3CB4DC") +text(tmp, counts_item + 1000, datart$artwork) dev.off() -### Which artwork gets touched most often first? +### Which item gets touched most often first? datcase <- datlogs[!duplicated(datlogs$case), ] -counts_case <- table(datcase$artwork) +counts_case <- table(datcase$item) names(counts_case) <- datart$title tmp <- barplot(counts_case, las = 2, border = "white") -text(tmp, counts_case + 100, c(datart$artwork, "504", "505")) +text(tmp, counts_case + 100, datart$item) -counts <- rbind(counts_artwork, counts_case) +counts <- rbind(counts_item, counts_case) barplot(counts, las = 2, border = "white", col = c("gray", "darkorange")) ### Which teasers seem to work well? barplot(table(datlogs$topic), las = 2) -barplot(table(datlogs$topicFile), las = 2) - ### Dwell times/duration -datagg <- aggregate(duration ~ event + artwork, datlogs, mean) +datagg <- aggregate(duration ~ event + item, datlogs, mean) datagg$ds <- datagg$duration / 1000 -bwplot(ds ~ as.factor(event), datagg) -bwplot(ds ~ as.factor(event) | artwork, datagg) -xyplot(ds ~ as.factor(event), datagg, groups = artwork) +bwplot(ds ~ event, datagg) +bwplot(ds ~ event | item, datagg) +xyplot(ds ~ event, datagg, groups = item) # without aggregation -bwplot(duration ~ as.factor(event), datlogs) +bwplot(duration ~ event, datlogs) # in min set.seed(1027) pdf("../figures/duration.pdf", width = 5, height = 5, pointsize = 10) -bwplot(I(duration/1000/60) ~ as.factor(event), datlogs[sample(nrow(datlogs), 100000), ], +bwplot(I(duration/1000/60) ~ event, datlogs[sample(nrow(datlogs), 100000), ], ylab = "Duration in min") dev.off() -datlogs$daydiff <- c(NA, diff(datlogs$date)) -plot(daydiff ~ date, datlogs, type = "b") - ### Move events -datmove <- aggregate(cbind(duration, scaleSize, rotationDegree, x.start, - y.start, x.stop, y.stop) ~ artwork, datlogs, +datmove <- aggregate(cbind(duration, scaleSize, rotationDegree, distance, x.start, + y.start, x.stop, y.stop) ~ item, datlogs, mean) hist(log(datlogs$scaleSize)) # --> better interpretable on logscale plot(y.start ~ x.start, datmove, pch = 16, col = "gray") -points(y.start ~ x.start, datmove, col = "#0072B2", cex = log(datmove$scaleSize)) -points(y.start ~ x.start, datmove, col = "#009E73", cex = datmove$rotationDegree) +points(y.start ~ x.start, datmove, col = "#3CB4DC", cex = datmove$scaleSize) plot(y.start ~ x.start, datmove, type = "n", xlab = "x", ylab = "y", xlim = c(0, 3840), ylim = c(0, 2160)) -with(datmove, text(x.start, y.start, artwork, col = "gray", cex = 1.5)) +with(datmove, text(x.start, y.start, item, col = "gray", cex = 1.5)) with(datmove, arrows(x.start, y.start, x.stop, y.stop, length = 0.07, lwd = 2) ) -abline(v = c(0, 3840), h = c(0, 2160), col = "#0072B2", lwd = 2) +abline(v = c(0, 3840), h = c(0, 2160), col = "#3CB4DC", lwd = 2) -datscale <- aggregate(scaleSize ~ artwork, datlogs, max) +datscale <- aggregate(scaleSize ~ item, datlogs, max) plot(y.start ~ x.start, datmove, pch = 16, col = "gray") -points(y.start ~ x.start, datmove, col = "#0072B2", cex = log(datscale$scaleSize)) +points(y.start ~ x.start, datmove, col = "#3CB4DC", cex = datscale$scaleSize) plot(y.start ~ x.start, datmove, type = "n", xlab = "x", ylab = "y", xlim = c(0, 3840), ylim = c(0, 2160)) -#with(datmove, text(x.stop, y.stop, artwork)) -with(datmove, text(x.start, y.start, artwork)) +#with(datmove, text(x.stop, y.stop, item)) +with(datmove, text(x.start, y.start, item)) ### Are there certain areas of the table that are touched most often? @@ -172,6 +195,8 @@ heatmap(tab.stop, Rowv = NA, Colv = NA, col = my_colors(10)) ### How many visitors per day +datlogs$date <- as.Date(datlogs$date.start) + # Interactions per day datint <- aggregate(case ~ date, datlogs, length) plot(datint, type = "h") @@ -180,20 +205,21 @@ plot(datint, type = "h") datcase <- aggregate(case ~ date, datlogs, function(x) length(unique(x))) plot(datcase, type = "h") -# Traces per day -dattrace <- aggregate(trace ~ date, datlogs, function(x) length(unique(x))) -plot(dattrace, type = "h") +# Paths per day +datpath <- aggregate(path ~ date, datlogs, function(x) length(unique(x))) +plot(datpath, type = "h") -plot(trace ~ date, dattrace, type = "h", col = "#0072B2") +plot(path ~ date, datpath, type = "h", col = "#3CB4DC") points(case ~ date, datcase, type = "h") -pdf("../figures/cases_per_day.pdf", width = 9, height = 5, pointsize = 10) +pdf("../figures/cases_per_day2.pdf", width = 9, height = 5, pointsize = 10) par(mai = c(.6, .6, .1, .1), mgp = c(2.4, 1, 0)) -plot(case ~ date, datcase, type = "h", col = "#0072B2") -abline(v = datcase$date[c(1027, 1039)], col = "#D55E00", lty = 2) -text(datcase$date[1037]-250, 150, paste0("Corona gap from ", - datcase$date[1027], " to ", - datcase$date[1039]), col = "#D55E00") +plot(case ~ date, datcase, type = "h", col = "#3CB4DC") +abline(v = datcase$date[datcase$date %in% c("2020-03-13", "2022-10-25")], + col = "#FF6900", lty = 2) +text(datcase$date[datcase$date == "2020-03-13"]+470, 80, + "Corona gap from 2020-03-13 to 2022-10-25", + col = "#D55E00") dev.off() @@ -209,10 +235,11 @@ foodweb(where = "package:mtt") pdf("../figures/fun_depend_mtt.pdf", width = 8, height = 4, pointsize = 10) foodweb(where = "package:mtt", prune = c("parse_logfiles", "create_eventlogs", "extract_artworks", - "extract_topics", "add_topic"), + "extract_topics"), #expand.ybox = 1.8, #cex = .6, #border = TRUE, #boxcolor = "gray", + color.lines = FALSE, lwd = 2, mai = c(0, 0, 0, 0)) dev.off() @@ -222,7 +249,7 @@ dev.off() pdf("../figures/timeMs.pdf", width = 9, height = 6, pointsize = 10) #par(mai = c(.6, .6, .1, .1), mgp = c(2.4, 1, 0)) #plot(timeMs.start ~ as.factor(fileId), datlogs[1:2000,], xlab = "fileId") -bwplot(timeMs.start ~ as.factor(fileId), datlogs[1:2000,], xlab = "", +bwplot(timeMs.start ~ as.factor(fileId.start), datlogs[1:2000,], xlab = "", scales = list(x = list(rot = 90), y = list(rot = 90))) dev.off() @@ -234,18 +261,18 @@ pdf("../figures/xycoord.pdf", width = 5, height = 5, pointsize = 10) par(mai = c(.6, .6, .1, .1), mgp = c(2.4, 1, 0)) #par(mfrow = c(1, 2)) plot(y.start ~ x.start, datlogs[sample(nrow(datlogs), 10000), ]) -abline(v = c(0, 3840), h = c(0, 2160), col = "#0072B2", lwd = 2) +abline(v = c(0, 3840), h = c(0, 2160), col = "#3CB4DC", lwd = 2) #plot(y.stop ~ x.stop, datlogs) -#abline(v = c(0, 3840), h = c(0, 2160), col = "#0072B2", lwd = 2) +#abline(v = c(0, 3840), h = c(0, 2160), col = "#3CB4DC", lwd = 2) legend("bottomleft", "Random sample of 10,000", bg = "white") legend("topleft", "4K-Display: 3840 x 2160", bg = "white") dev.off() ## moves -dat001 <- datlogs[which(datlogs$artwork == "001"), ] +dat001 <- datlogs[which(datlogs$item == "001"), ] -index <- as.numeric(as.factor(dat001$trace)) +index <- as.numeric(as.factor(dat001$path)) cc <- sample(colors(), 100) plot(y.start ~ x.start, dat001, type = "n", xlab = "x", ylab = "y", @@ -263,8 +290,8 @@ points(y.start ~ x.start, dat001, xlab = "x", ylab = "y", cc <- sample(colors(), 70) -dat1 <- datlogs[!duplicated(datlogs$artwork), ] -dat1 <- dat1[order(dat1$artwork), ] +dat1 <- datlogs[!duplicated(datlogs$item), ] +dat1 <- dat1[order(dat1$item), ] plot(y.start ~ x.start, dat1, type = "n", xlim = c(-100, 4500), ylim = c(-100, 2500)) abline(h = c(0, 2160), v = c(0, 3840), col = "lightgray")