Started working on second presentation; created plots for it

This commit is contained in:
Nora Wickelmaier 2023-09-26 18:39:02 +02:00
parent 4c786d4df0
commit 25a2fadf15
3 changed files with 187 additions and 31 deletions

View File

@ -134,9 +134,9 @@ log file and completes in another one. The variable `timeMs` seems to be
continuous within one log file but not over several log files.
```{r}
```{r, results = FALSE, fig.show = TRUE}
# Read data
dat0 <- read.table("data/haum/rawdata_logfiles_small.csv", sep = ";",
dat0 <- read.table("data/haum/raw_logfiles_small_2023-09-26_13-50-20.csv", sep = ";",
header = TRUE)
dat0$date <- as.POSIXct(dat0$date)
dat0$glossar <- ifelse(dat0$artwork == "glossar", 1, 0)
@ -146,7 +146,12 @@ dat <- subset(dat0, !(dat0$event %in% c("Start Application",
"Show Application")))
# Add trace variable
dat1 <- add_trace(dat, glossar_dict = "data/haum/glossar_dict.RData")
artworks <- unique(stats::na.omit(dat$artwork))
artworks <- artworks[artworks != "glossar"]
glossar_files <- unique(subset(dat, dat$artwork == "glossar")$popup)
glossar_dict <- create_glossardict(artworks, glossar_files,
xmlpath = "data/haum/ContentEyevisit/eyevisit_cards_light/")
dat1 <- add_trace(dat, glossar_dict)
# Close events
dat2 <- rbind(close_events(dat1, "move"),
@ -408,7 +413,7 @@ when I get them from `<artwork>.html`. At first glance, it looks like using
```{r}
artworks <- unique(dat2$artwork)
path <- "data/haum/ContentEyevisit/eyevisit_cards_light/"
topics <- extract_topics(artworks, "index.xml", path)
topics <- extract_topics(artworks, rep("index.xml", length(artworks)), path)
topics2 <- extract_topics(artworks, paste0(artworks, ".xml"), path)
topics[!topics$file_name %in% topics2$file_name, ]
@ -427,7 +432,8 @@ sudden there were 72 instead of 70 artworks. It seems like these two
artworks appear on October 21, 2022.
```{r}
dat0 <- read.table("data/haum/rawdata_logfiles.csv", sep = ";", header = TRUE)
dat0 <- read.table("data/haum/raw_logfiles_2023-09-23_01-31-30.csv",
sep = ";", header = TRUE)
dat0$date <- as.POSIXct(dat0$date)
dat0$glossar <- ifelse(dat0$artwork == "glossar", 1, 0)

112
code/plots_8o8m.R Normal file
View File

@ -0,0 +1,112 @@
# setwd("C:/Users/nwickelmaier/Nextcloud/Documents/MDS/2023ss/60100_master_thesis/code/")
library(lattice)
devtools::load_all("../../../../software/mtt")
library(ggplot2)
# Read data
datlogs <- read.table("../data/8o8m/event_logfiles_2023-09-22_18-54-49.csv",
sep = ";", header = TRUE)
datlogs$date <- as.Date(datlogs$date.start)
datlogs$date.start <- as.POSIXct(datlogs$date.start)
datlogs$date.stop <- as.POSIXct(datlogs$date.stop)
datlogs$artwork <- sprintf("%02d", datlogs$artwork)
### Which artwork gets touched most often?
counts_artwork <- table(datlogs$artwork)
artworks <- unique(datlogs$artwork)
datart <- extract_artworks(artworks,
paste0(artworks, "_de.xml"),
"../data/8o8m/Content8o8m/")
datart <- datart[order(datart$artwork), ]
names(counts_artwork) <- datart$title
#pdf("../figures/counts_artwork_8o8m.pdf", width = 20, height = 10, pointsize = 10)
#par(mai = c(3, .6, .1, .1))
tmp <- barplot(counts_artwork, las = 2, ylim = c(0, 90000), border = "white")
text(tmp, counts_artwork + 1500, c(datart$artwork))
#dev.off()
# more interesting per museum in this case...
counts_artwork <- aggregate(trace ~ artwork + folder, datlogs, length)
pdf("../figures/counts_artwork_8o8m.pdf", width = 20, height = 6, pointsize = 10)
barchart(trace ~ artwork | folder, counts_artwork, ylab = "", layout = c(5, 1))
dev.off()
### Dwell times/duration
pdf("../figures/duration_8o8m.pdf", width = 5, height = 5, pointsize = 10)
bwplot(I(duration/1000/60) ~ as.factor(event), datlogs, ylab = "Duration in sec")
dev.off()
### Are there certain areas of the table that are touched most often?
# heatmap
dattrim <- datlogs[datlogs$x.start < 3840 &
datlogs$x.start > 0 &
datlogs$y.start < 2160 &
datlogs$y.start > 0 &
datlogs$x.stop < 3840 &
datlogs$x.stop > 0 &
datlogs$y.stop < 2160 &
datlogs$y.stop > 0, ]
cuts <- 100 # 200, 100, 70, ...
# start
dattrim$x.start.cat <- cut(dattrim$x.start, cuts)
dattrim$y.start.cat <- cut(dattrim$y.start, cuts)
tab.start <- xtabs( ~ x.start.cat + y.start.cat, dattrim)
colnames(tab.start) <- NULL
rownames(tab.start) <- NULL
pdf("../figures/heatmap_start_8o8m.pdf", width = 5, height = 5, pointsize = 10)
heatmap(tab.start, Rowv = NA, Colv = NA)
dev.off()
# stop
dattrim$x.stop.cat <- cut(dattrim$x.stop, cuts)
dattrim$y.stop.cat <- cut(dattrim$y.stop, cuts)
tab.stop <- xtabs( ~ x.stop.cat + y.stop.cat, dattrim)
colnames(tab.stop) <- NULL
rownames(tab.stop) <- NULL
pdf("../figures/heatmap_stop_8o8m.pdf", width = 5, height = 5, pointsize = 10)
heatmap(tab.stop, Rowv = NA, Colv = NA)
dev.off()
### How many visitors per day
# Cases per day
datcase <- aggregate(case ~ date, datlogs, function(x) length(unique(x)))
pdf("../figures/cases_per_day_8o8m.pdf", width = 9, height = 5, pointsize = 10)
par(mai = c(.6, .6, .1, .1), mgp = c(2.4, 1, 0))
plot(case ~ date, datcase, type = "h", col = "#0072B2", lwd = 2)
dev.off()
### Other stuff
## weird behavior of timeMs
pdf("../figures/timeMs_8o8m.pdf", width = 9, height = 6, pointsize = 10)
bwplot(timeMs.start ~ as.factor(fileId), datlogs[1:2000,], xlab = "",
scales = list(x = list(rot = 90), y = list(rot = 90)))
dev.off()
## x,y-coordinates out of range
set.seed(1522)
pdf("../figures/xycoord_8o8m.pdf", width = 5, height = 5, pointsize = 10)
par(mai = c(.6, .6, .1, .1), mgp = c(2.4, 1, 0))
plot(y.start ~ x.start, datlogs[sample(nrow(datlogs), 10000), ])
abline(v = c(0, 3840), h = c(0, 2160), col = "#0072B2", lwd = 2)
legend("bottomleft", "Random sample of 10,000", bg = "white")
legend("topleft", "4K-Display: 3840 x 2160", bg = "white")
dev.off()

View File

@ -2,6 +2,10 @@
library(lattice)
# Achims colors (used by lattice)
cc <- palette.colors(palette = "Okabe-Ito")
plot(1:10, col = cc, pch = 16, cex = 2)
# Read data
datlogs <- read.table("../data/haum/event_logfiles_metadata_2023-09-23_01-31-30.csv",
sep = ";", header = TRUE)
@ -10,7 +14,7 @@ datlogs$date.start <- as.POSIXct(datlogs$date.start)
datlogs$date.stop <- as.POSIXct(datlogs$date.stop)
datlogs$artwork <- sprintf("%03d", datlogs$artwork)
### Which artwork gets touched most often/first?
### Which artwork gets touched most often?
counts_artwork <- table(datlogs$artwork)
barchart(counts_artwork)
@ -25,7 +29,7 @@ names(counts_artwork) <- datart$title
pdf("../figures/counts_artwork.pdf", width = 20, height = 10, pointsize = 10)
par(mai = c(5, .6, .1, .1))
mtp <- barplot(counts_artwork, las = 2, ylim = c(0, 60000), border = "white")
tmp <- barplot(counts_artwork, las = 2, ylim = c(0, 60000), border = "white")
text(tmp, counts_artwork + 1000, c(datart$artwork, "504", "505"))
dev.off()
@ -55,8 +59,10 @@ xyplot(ds ~ as.factor(event), datagg, groups = artwork)
# without aggregation
bwplot(duration ~ as.factor(event), datlogs)
# in min
bwplot(I(duration/1000/60) ~ as.factor(event), datlogs)
pdf("../figures/duration.pdf", width = 5, height = 5, pointsize = 10)
bwplot(I(duration/1000/60) ~ as.factor(event), datlogs, ylab = "Duration in sec")
dev.off()
datlogs$daydiff <- c(NA, diff(datlogs$date))
plot(daydiff ~ date, datlogs, type = "b")
@ -93,33 +99,40 @@ dattrim <- datlogs[datlogs$x.start < 3840 &
datlogs$y.stop < 2160 &
datlogs$y.stop > 0, ]
cuts <- 200 # 100, 70, ...
cuts <- 100 # 200, 100, 70, ...
# start
dattrim$x.start.cat <- cut(dattrim$x.start, cuts)
dattrim$y.start.cat <- cut(dattrim$y.start, cuts)
tab.start <- xtabs( ~ x.start.cat + y.start.cat, dattrim)
colnames(tab.start) <- paste0("c", 1:cuts)
rownames(tab.start) <- paste0("c", 1:cuts)
colnames(tab.start) <- NULL
rownames(tab.start) <- NULL
pdf("../figures/heatmap_start.pdf", width = 5, height = 5, pointsize = 10)
heatmap(tab.start, Rowv = NA, Colv = NA)
my_colors <- colorRampPalette(c("gray40", "orange"))
dev.off()
my_colors <- colorRampPalette(c("#009E73", "#E69F00"))
heatmap(tab.start, Rowv = NA, Colv = NA, col = my_colors(1000))
ggplot(as.data.frame(tab.start)) +
geom_tile(aes(x = x.start.cat, y = y.start.cat, fill = Freq)) +
scale_fill_gradient(low = "gray40", high = "orange")
scale_fill_gradient(low = "#009E73", high = "#E69F00")
# stop
dattrim$x.stop.cat <- cut(dattrim$x.stop, cuts)
dattrim$y.stop.cat <- cut(dattrim$y.stop, cuts)
tab.stop <- xtabs( ~ x.stop.cat + y.stop.cat, dattrim)
colnames(tab.stop) <- paste0("c", 1:cuts)
rownames(tab.stop) <- paste0("c", 1:cuts)
colnames(tab.stop) <- NULL
rownames(tab.stop) <- NULL
pdf("../figures/heatmap_stop.pdf", width = 5, height = 5, pointsize = 10)
heatmap(tab.stop, Rowv = NA, Colv = NA)
heatmap(tab.stop, Rowv = NA, Colv = NA, col = my_colors(1000))
dev.off()
heatmap(tab.stop, Rowv = NA, Colv = NA, col = my_colors(10))
### How many visitors per day
@ -135,39 +148,64 @@ plot(datcase, type = "h")
dattrace <- aggregate(trace ~ date, datlogs, function(x) length(unique(x)))
plot(dattrace, type = "h")
plot(trace ~ date, dattrace, type = "h", col = "#0072B2")
points(case ~ date, datcase, type = "h")
# function dependencies of mtt
pdf("../figures/cases_per_day.pdf", width = 9, height = 5, pointsize = 10)
par(mai = c(.6, .6, .1, .1), mgp = c(2.4, 1, 0))
plot(case ~ date, datcase, type = "h", col = "#0072B2")
abline(v = datcase$date[c(1027, 1039)], col = "#D55E00", lty = 2)
text(datcase$date[1037]-250, 150, paste0("Corona gap from ",
datcase$date[1027], " to ",
datcase$date[1039]), col = "#D55E00")
dev.off()
### Other stuff
## function dependencies of mtt
devtools::load_all("../../../../software/mtt")
#library(mtt)
library(mvbutils)
foodweb(where = "package:mtt")
pdf("../figures/fun_depend_mtt.pdf", width = 8, height = 4, pointsize = 10)
foodweb(where = "package:mtt",
prune = c("parse_logfiles", "create_eventlogs", "extract_artworks",
"extract_topics", "add_topic"),
expand.ybox = 1.8, #cex = .6,
boxcolor = "gray", lwd = 2)
border = TRUE,
#boxcolor = "gray",
lwd = 2, mai = c(0, 0, 0, 0))
dev.off()
## weird behavior of timeMs
pdf("../figures/timeMs.pdf", width = 9, height = 6, pointsize = 10)
#par(mai = c(.6, .6, .1, .1), mgp = c(2.4, 1, 0))
#plot(timeMs.start ~ as.factor(fileId), datlogs[1:2000,], xlab = "fileId")
bwplot(timeMs.start ~ as.factor(fileId), datlogs[1:2000,], xlab = "",
scales = list(x = list(rot = 90), y = list(rot = 90)))
dev.off()
## x,y-coordinates out of range
set.seed(1522)
pdf("../figures/xycoord.pdf", width = 5, height = 5, pointsize = 10)
par(mai = c(.6, .6, .1, .1), mgp = c(2.4, 1, 0))
#par(mfrow = c(1, 2))
plot(y.start ~ x.start, datlogs[sample(nrow(datlogs), 10000), ])
abline(v = c(0, 3840), h = c(0, 2160), col = "#0072B2", lwd = 2)
#plot(y.stop ~ x.stop, datlogs)
#abline(v = c(0, 3840), h = c(0, 2160), col = "#0072B2", lwd = 2)
legend("bottomleft", "Random sample of 10,000", bg = "white")
legend("topleft", "4K-Display: 3840 x 2160", bg = "white")
dev.off()
### Other stuff
## moves
dat001 <- datlogs[which(datlogs$artwork == "001"), ]