Adjusted descriptives to new data set

This commit is contained in:
Nora Wickelmaier 2024-01-09 17:45:16 +01:00
parent 64f9add148
commit 1a5c016018

View File

@ -1,107 +1,130 @@
# setwd("C:/Users/nwickelmaier/Nextcloud/Documents/MDS/2023ss/60100_master_thesis/code/") # setwd("C:/Users/nwickelmaier/Nextcloud/Documents/MDS/2023ss/60100_master_thesis/code/")
library(lattice) library(lattice)
#library(mtt)
devtools::load_all("../../../../software/mtt")
# Achims colors (used by lattice) # Achims colors (used by lattice)
#cc <- palette.colors(palette = "Okabe-Ito") #cc <- palette.colors(palette = "Okabe-Ito")
#plot(1:10, col = cc, pch = 16, cex = 2) #plot(1:10, col = cc, pch = 16, cex = 2)
# Read data # Read data
datlogs <- read.table("results/haum/event_logfiles_metadata_2023-09-23_01-31-30.csv",
datlogs <- read.table("results/haum/event_logfiles_2024-01-02_19-44-50.csv",
colClasses = c("character", "character", "POSIXct",
"POSIXct", "character", "integer",
"numeric", "character", "character",
rep("numeric", 3), "character",
"character", rep("numeric", 11),
"character", "character"),
sep = ";", header = TRUE) sep = ";", header = TRUE)
datlogs$date <- as.Date(datlogs$date)
datlogs$date.start <- as.POSIXct(datlogs$date.start)
datlogs$date.stop <- as.POSIXct(datlogs$date.stop)
datlogs$artwork <- sprintf("%03d", datlogs$artwork)
### Which artwork gets touched most often? datlogs$event <- factor(datlogs$event, levels = c("move", "flipCard",
"openTopic",
"openPopup"))
counts_artwork <- table(datlogs$artwork) ### Number of log files
barchart(counts_artwork) length(unique(datlogs$fileId.start))
length(unique(datlogs$fileId.stop))
artworks <- unique(datlogs$artwork) length(unique(c(datlogs$fileId.start, datlogs$fileId.stop)))
artworks <- artworks[!artworks %in% c("504", "505")] # 22803
datart <- extract_artworks(artworks,
paste0(artworks, ".xml"), ### Number of activities
nrow(datlogs)
table(datlogs$event)
proportions(table(datlogs$event))
proportions(table(datlogs$event[datlogs$event != "move"]))
### Time range
range(as.Date(datlogs$date.start))
### Topics per item
print(xtabs( ~ item + topic, datlogs), zero = "-")
lattice::dotplot(xtabs( ~ item + topic, datlogs), auto.key = TRUE)
### Which item gets touched most often?
counts_item <- table(datlogs$item)
barchart(counts_item)
items <- unique(datlogs$item)
#items <- items[!items %in% c("504", "505")]
datart <- extract_artworks(items,
paste0(items, ".xml"),
"../data/haum/ContentEyevisit/eyevisit_cards_light/") "../data/haum/ContentEyevisit/eyevisit_cards_light/")
datart <- datart[order(datart$artwork), ] datart <- datart[order(datart$artwork), ]
names(counts_artwork) <- datart$title names(counts_item) <- datart$title
pdf("../figures/counts_artwork.pdf", width = 20, height = 10, pointsize = 10) pdf("../figures/counts_item.pdf", width = 20, height = 10, pointsize = 10)
par(mai = c(5, .6, .1, .1)) par(mai = c(5, .6, .1, .1))
tmp <- barplot(counts_artwork, las = 2, ylim = c(0, 60000), tmp <- barplot(counts_item, las = 2, ylim = c(0, 60000),
border = "white", col = "#0072B2") border = "white", col = "#3CB4DC")
text(tmp, counts_artwork + 1000, c(datart$artwork, "504", "505")) text(tmp, counts_item + 1000, datart$artwork)
dev.off() dev.off()
### Which artwork gets touched most often first? ### Which item gets touched most often first?
datcase <- datlogs[!duplicated(datlogs$case), ] datcase <- datlogs[!duplicated(datlogs$case), ]
counts_case <- table(datcase$artwork) counts_case <- table(datcase$item)
names(counts_case) <- datart$title names(counts_case) <- datart$title
tmp <- barplot(counts_case, las = 2, border = "white") tmp <- barplot(counts_case, las = 2, border = "white")
text(tmp, counts_case + 100, c(datart$artwork, "504", "505")) text(tmp, counts_case + 100, datart$item)
counts <- rbind(counts_artwork, counts_case) counts <- rbind(counts_item, counts_case)
barplot(counts, las = 2, border = "white", col = c("gray", "darkorange")) barplot(counts, las = 2, border = "white", col = c("gray", "darkorange"))
### Which teasers seem to work well? ### Which teasers seem to work well?
barplot(table(datlogs$topic), las = 2) barplot(table(datlogs$topic), las = 2)
barplot(table(datlogs$topicFile), las = 2)
### Dwell times/duration ### Dwell times/duration
datagg <- aggregate(duration ~ event + artwork, datlogs, mean) datagg <- aggregate(duration ~ event + item, datlogs, mean)
datagg$ds <- datagg$duration / 1000 datagg$ds <- datagg$duration / 1000
bwplot(ds ~ as.factor(event), datagg) bwplot(ds ~ event, datagg)
bwplot(ds ~ as.factor(event) | artwork, datagg) bwplot(ds ~ event | item, datagg)
xyplot(ds ~ as.factor(event), datagg, groups = artwork) xyplot(ds ~ event, datagg, groups = item)
# without aggregation # without aggregation
bwplot(duration ~ as.factor(event), datlogs) bwplot(duration ~ event, datlogs)
# in min # in min
set.seed(1027) set.seed(1027)
pdf("../figures/duration.pdf", width = 5, height = 5, pointsize = 10) pdf("../figures/duration.pdf", width = 5, height = 5, pointsize = 10)
bwplot(I(duration/1000/60) ~ as.factor(event), datlogs[sample(nrow(datlogs), 100000), ], bwplot(I(duration/1000/60) ~ event, datlogs[sample(nrow(datlogs), 100000), ],
ylab = "Duration in min") ylab = "Duration in min")
dev.off() dev.off()
datlogs$daydiff <- c(NA, diff(datlogs$date))
plot(daydiff ~ date, datlogs, type = "b")
### Move events ### Move events
datmove <- aggregate(cbind(duration, scaleSize, rotationDegree, x.start, datmove <- aggregate(cbind(duration, scaleSize, rotationDegree, distance, x.start,
y.start, x.stop, y.stop) ~ artwork, datlogs, y.start, x.stop, y.stop) ~ item, datlogs,
mean) mean)
hist(log(datlogs$scaleSize)) hist(log(datlogs$scaleSize))
# --> better interpretable on logscale # --> better interpretable on logscale
plot(y.start ~ x.start, datmove, pch = 16, col = "gray") plot(y.start ~ x.start, datmove, pch = 16, col = "gray")
points(y.start ~ x.start, datmove, col = "#0072B2", cex = log(datmove$scaleSize)) points(y.start ~ x.start, datmove, col = "#3CB4DC", cex = datmove$scaleSize)
points(y.start ~ x.start, datmove, col = "#009E73", cex = datmove$rotationDegree)
plot(y.start ~ x.start, datmove, type = "n", xlab = "x", ylab = "y", plot(y.start ~ x.start, datmove, type = "n", xlab = "x", ylab = "y",
xlim = c(0, 3840), ylim = c(0, 2160)) xlim = c(0, 3840), ylim = c(0, 2160))
with(datmove, text(x.start, y.start, artwork, col = "gray", cex = 1.5)) with(datmove, text(x.start, y.start, item, col = "gray", cex = 1.5))
with(datmove, with(datmove,
arrows(x.start, y.start, x.stop, y.stop, length = 0.07, lwd = 2) arrows(x.start, y.start, x.stop, y.stop, length = 0.07, lwd = 2)
) )
abline(v = c(0, 3840), h = c(0, 2160), col = "#0072B2", lwd = 2) abline(v = c(0, 3840), h = c(0, 2160), col = "#3CB4DC", lwd = 2)
datscale <- aggregate(scaleSize ~ artwork, datlogs, max) datscale <- aggregate(scaleSize ~ item, datlogs, max)
plot(y.start ~ x.start, datmove, pch = 16, col = "gray") plot(y.start ~ x.start, datmove, pch = 16, col = "gray")
points(y.start ~ x.start, datmove, col = "#0072B2", cex = log(datscale$scaleSize)) points(y.start ~ x.start, datmove, col = "#3CB4DC", cex = datscale$scaleSize)
plot(y.start ~ x.start, datmove, type = "n", xlab = "x", ylab = "y", plot(y.start ~ x.start, datmove, type = "n", xlab = "x", ylab = "y",
xlim = c(0, 3840), ylim = c(0, 2160)) xlim = c(0, 3840), ylim = c(0, 2160))
#with(datmove, text(x.stop, y.stop, artwork)) #with(datmove, text(x.stop, y.stop, item))
with(datmove, text(x.start, y.start, artwork)) with(datmove, text(x.start, y.start, item))
### Are there certain areas of the table that are touched most often? ### Are there certain areas of the table that are touched most often?
@ -172,6 +195,8 @@ heatmap(tab.stop, Rowv = NA, Colv = NA, col = my_colors(10))
### How many visitors per day ### How many visitors per day
datlogs$date <- as.Date(datlogs$date.start)
# Interactions per day # Interactions per day
datint <- aggregate(case ~ date, datlogs, length) datint <- aggregate(case ~ date, datlogs, length)
plot(datint, type = "h") plot(datint, type = "h")
@ -180,20 +205,21 @@ plot(datint, type = "h")
datcase <- aggregate(case ~ date, datlogs, function(x) length(unique(x))) datcase <- aggregate(case ~ date, datlogs, function(x) length(unique(x)))
plot(datcase, type = "h") plot(datcase, type = "h")
# Traces per day # Paths per day
dattrace <- aggregate(trace ~ date, datlogs, function(x) length(unique(x))) datpath <- aggregate(path ~ date, datlogs, function(x) length(unique(x)))
plot(dattrace, type = "h") plot(datpath, type = "h")
plot(trace ~ date, dattrace, type = "h", col = "#0072B2") plot(path ~ date, datpath, type = "h", col = "#3CB4DC")
points(case ~ date, datcase, type = "h") points(case ~ date, datcase, type = "h")
pdf("../figures/cases_per_day.pdf", width = 9, height = 5, pointsize = 10) pdf("../figures/cases_per_day2.pdf", width = 9, height = 5, pointsize = 10)
par(mai = c(.6, .6, .1, .1), mgp = c(2.4, 1, 0)) par(mai = c(.6, .6, .1, .1), mgp = c(2.4, 1, 0))
plot(case ~ date, datcase, type = "h", col = "#0072B2") plot(case ~ date, datcase, type = "h", col = "#3CB4DC")
abline(v = datcase$date[c(1027, 1039)], col = "#D55E00", lty = 2) abline(v = datcase$date[datcase$date %in% c("2020-03-13", "2022-10-25")],
text(datcase$date[1037]-250, 150, paste0("Corona gap from ", col = "#FF6900", lty = 2)
datcase$date[1027], " to ", text(datcase$date[datcase$date == "2020-03-13"]+470, 80,
datcase$date[1039]), col = "#D55E00") "Corona gap from 2020-03-13 to 2022-10-25",
col = "#D55E00")
dev.off() dev.off()
@ -209,10 +235,11 @@ foodweb(where = "package:mtt")
pdf("../figures/fun_depend_mtt.pdf", width = 8, height = 4, pointsize = 10) pdf("../figures/fun_depend_mtt.pdf", width = 8, height = 4, pointsize = 10)
foodweb(where = "package:mtt", foodweb(where = "package:mtt",
prune = c("parse_logfiles", "create_eventlogs", "extract_artworks", prune = c("parse_logfiles", "create_eventlogs", "extract_artworks",
"extract_topics", "add_topic"), "extract_topics"),
#expand.ybox = 1.8, #cex = .6, #expand.ybox = 1.8, #cex = .6,
#border = TRUE, #border = TRUE,
#boxcolor = "gray", #boxcolor = "gray",
color.lines = FALSE,
lwd = 2, mai = c(0, 0, 0, 0)) lwd = 2, mai = c(0, 0, 0, 0))
dev.off() dev.off()
@ -222,7 +249,7 @@ dev.off()
pdf("../figures/timeMs.pdf", width = 9, height = 6, pointsize = 10) pdf("../figures/timeMs.pdf", width = 9, height = 6, pointsize = 10)
#par(mai = c(.6, .6, .1, .1), mgp = c(2.4, 1, 0)) #par(mai = c(.6, .6, .1, .1), mgp = c(2.4, 1, 0))
#plot(timeMs.start ~ as.factor(fileId), datlogs[1:2000,], xlab = "fileId") #plot(timeMs.start ~ as.factor(fileId), datlogs[1:2000,], xlab = "fileId")
bwplot(timeMs.start ~ as.factor(fileId), datlogs[1:2000,], xlab = "", bwplot(timeMs.start ~ as.factor(fileId.start), datlogs[1:2000,], xlab = "",
scales = list(x = list(rot = 90), y = list(rot = 90))) scales = list(x = list(rot = 90), y = list(rot = 90)))
dev.off() dev.off()
@ -234,18 +261,18 @@ pdf("../figures/xycoord.pdf", width = 5, height = 5, pointsize = 10)
par(mai = c(.6, .6, .1, .1), mgp = c(2.4, 1, 0)) par(mai = c(.6, .6, .1, .1), mgp = c(2.4, 1, 0))
#par(mfrow = c(1, 2)) #par(mfrow = c(1, 2))
plot(y.start ~ x.start, datlogs[sample(nrow(datlogs), 10000), ]) plot(y.start ~ x.start, datlogs[sample(nrow(datlogs), 10000), ])
abline(v = c(0, 3840), h = c(0, 2160), col = "#0072B2", lwd = 2) abline(v = c(0, 3840), h = c(0, 2160), col = "#3CB4DC", lwd = 2)
#plot(y.stop ~ x.stop, datlogs) #plot(y.stop ~ x.stop, datlogs)
#abline(v = c(0, 3840), h = c(0, 2160), col = "#0072B2", lwd = 2) #abline(v = c(0, 3840), h = c(0, 2160), col = "#3CB4DC", lwd = 2)
legend("bottomleft", "Random sample of 10,000", bg = "white") legend("bottomleft", "Random sample of 10,000", bg = "white")
legend("topleft", "4K-Display: 3840 x 2160", bg = "white") legend("topleft", "4K-Display: 3840 x 2160", bg = "white")
dev.off() dev.off()
## moves ## moves
dat001 <- datlogs[which(datlogs$artwork == "001"), ] dat001 <- datlogs[which(datlogs$item == "001"), ]
index <- as.numeric(as.factor(dat001$trace)) index <- as.numeric(as.factor(dat001$path))
cc <- sample(colors(), 100) cc <- sample(colors(), 100)
plot(y.start ~ x.start, dat001, type = "n", xlab = "x", ylab = "y", plot(y.start ~ x.start, dat001, type = "n", xlab = "x", ylab = "y",
@ -263,8 +290,8 @@ points(y.start ~ x.start, dat001, xlab = "x", ylab = "y",
cc <- sample(colors(), 70) cc <- sample(colors(), 70)
dat1 <- datlogs[!duplicated(datlogs$artwork), ] dat1 <- datlogs[!duplicated(datlogs$item), ]
dat1 <- dat1[order(dat1$artwork), ] dat1 <- dat1[order(dat1$item), ]
plot(y.start ~ x.start, dat1, type = "n", xlim = c(-100, 4500), ylim = c(-100, 2500)) plot(y.start ~ x.start, dat1, type = "n", xlim = c(-100, 4500), ylim = c(-100, 2500))
abline(h = c(0, 2160), v = c(0, 3840), col = "lightgray") abline(h = c(0, 2160), v = c(0, 3840), col = "lightgray")