Started working on second presentation; created plots for it
This commit is contained in:
parent
4c786d4df0
commit
25a2fadf15
16
README.Rmd
16
README.Rmd
@ -134,9 +134,9 @@ log file and completes in another one. The variable `timeMs` seems to be
|
|||||||
continuous within one log file but not over several log files.
|
continuous within one log file but not over several log files.
|
||||||
|
|
||||||
|
|
||||||
```{r}
|
```{r, results = FALSE, fig.show = TRUE}
|
||||||
# Read data
|
# Read data
|
||||||
dat0 <- read.table("data/haum/rawdata_logfiles_small.csv", sep = ";",
|
dat0 <- read.table("data/haum/raw_logfiles_small_2023-09-26_13-50-20.csv", sep = ";",
|
||||||
header = TRUE)
|
header = TRUE)
|
||||||
dat0$date <- as.POSIXct(dat0$date)
|
dat0$date <- as.POSIXct(dat0$date)
|
||||||
dat0$glossar <- ifelse(dat0$artwork == "glossar", 1, 0)
|
dat0$glossar <- ifelse(dat0$artwork == "glossar", 1, 0)
|
||||||
@ -146,7 +146,12 @@ dat <- subset(dat0, !(dat0$event %in% c("Start Application",
|
|||||||
"Show Application")))
|
"Show Application")))
|
||||||
|
|
||||||
# Add trace variable
|
# Add trace variable
|
||||||
dat1 <- add_trace(dat, glossar_dict = "data/haum/glossar_dict.RData")
|
artworks <- unique(stats::na.omit(dat$artwork))
|
||||||
|
artworks <- artworks[artworks != "glossar"]
|
||||||
|
glossar_files <- unique(subset(dat, dat$artwork == "glossar")$popup)
|
||||||
|
glossar_dict <- create_glossardict(artworks, glossar_files,
|
||||||
|
xmlpath = "data/haum/ContentEyevisit/eyevisit_cards_light/")
|
||||||
|
dat1 <- add_trace(dat, glossar_dict)
|
||||||
|
|
||||||
# Close events
|
# Close events
|
||||||
dat2 <- rbind(close_events(dat1, "move"),
|
dat2 <- rbind(close_events(dat1, "move"),
|
||||||
@ -408,7 +413,7 @@ when I get them from `<artwork>.html`. At first glance, it looks like using
|
|||||||
```{r}
|
```{r}
|
||||||
artworks <- unique(dat2$artwork)
|
artworks <- unique(dat2$artwork)
|
||||||
path <- "data/haum/ContentEyevisit/eyevisit_cards_light/"
|
path <- "data/haum/ContentEyevisit/eyevisit_cards_light/"
|
||||||
topics <- extract_topics(artworks, "index.xml", path)
|
topics <- extract_topics(artworks, rep("index.xml", length(artworks)), path)
|
||||||
topics2 <- extract_topics(artworks, paste0(artworks, ".xml"), path)
|
topics2 <- extract_topics(artworks, paste0(artworks, ".xml"), path)
|
||||||
|
|
||||||
topics[!topics$file_name %in% topics2$file_name, ]
|
topics[!topics$file_name %in% topics2$file_name, ]
|
||||||
@ -427,7 +432,8 @@ sudden there were 72 instead of 70 artworks. It seems like these two
|
|||||||
artworks appear on October 21, 2022.
|
artworks appear on October 21, 2022.
|
||||||
|
|
||||||
```{r}
|
```{r}
|
||||||
dat0 <- read.table("data/haum/rawdata_logfiles.csv", sep = ";", header = TRUE)
|
dat0 <- read.table("data/haum/raw_logfiles_2023-09-23_01-31-30.csv",
|
||||||
|
sep = ";", header = TRUE)
|
||||||
dat0$date <- as.POSIXct(dat0$date)
|
dat0$date <- as.POSIXct(dat0$date)
|
||||||
dat0$glossar <- ifelse(dat0$artwork == "glossar", 1, 0)
|
dat0$glossar <- ifelse(dat0$artwork == "glossar", 1, 0)
|
||||||
|
|
||||||
|
112
code/plots_8o8m.R
Normal file
112
code/plots_8o8m.R
Normal file
@ -0,0 +1,112 @@
|
|||||||
|
# setwd("C:/Users/nwickelmaier/Nextcloud/Documents/MDS/2023ss/60100_master_thesis/code/")
|
||||||
|
|
||||||
|
library(lattice)
|
||||||
|
devtools::load_all("../../../../software/mtt")
|
||||||
|
library(ggplot2)
|
||||||
|
|
||||||
|
# Read data
|
||||||
|
datlogs <- read.table("../data/8o8m/event_logfiles_2023-09-22_18-54-49.csv",
|
||||||
|
sep = ";", header = TRUE)
|
||||||
|
datlogs$date <- as.Date(datlogs$date.start)
|
||||||
|
datlogs$date.start <- as.POSIXct(datlogs$date.start)
|
||||||
|
datlogs$date.stop <- as.POSIXct(datlogs$date.stop)
|
||||||
|
datlogs$artwork <- sprintf("%02d", datlogs$artwork)
|
||||||
|
|
||||||
|
### Which artwork gets touched most often?
|
||||||
|
|
||||||
|
counts_artwork <- table(datlogs$artwork)
|
||||||
|
|
||||||
|
artworks <- unique(datlogs$artwork)
|
||||||
|
datart <- extract_artworks(artworks,
|
||||||
|
paste0(artworks, "_de.xml"),
|
||||||
|
"../data/8o8m/Content8o8m/")
|
||||||
|
datart <- datart[order(datart$artwork), ]
|
||||||
|
names(counts_artwork) <- datart$title
|
||||||
|
|
||||||
|
#pdf("../figures/counts_artwork_8o8m.pdf", width = 20, height = 10, pointsize = 10)
|
||||||
|
#par(mai = c(3, .6, .1, .1))
|
||||||
|
tmp <- barplot(counts_artwork, las = 2, ylim = c(0, 90000), border = "white")
|
||||||
|
text(tmp, counts_artwork + 1500, c(datart$artwork))
|
||||||
|
#dev.off()
|
||||||
|
|
||||||
|
# more interesting per museum in this case...
|
||||||
|
counts_artwork <- aggregate(trace ~ artwork + folder, datlogs, length)
|
||||||
|
pdf("../figures/counts_artwork_8o8m.pdf", width = 20, height = 6, pointsize = 10)
|
||||||
|
barchart(trace ~ artwork | folder, counts_artwork, ylab = "", layout = c(5, 1))
|
||||||
|
dev.off()
|
||||||
|
|
||||||
|
|
||||||
|
### Dwell times/duration
|
||||||
|
pdf("../figures/duration_8o8m.pdf", width = 5, height = 5, pointsize = 10)
|
||||||
|
bwplot(I(duration/1000/60) ~ as.factor(event), datlogs, ylab = "Duration in sec")
|
||||||
|
dev.off()
|
||||||
|
|
||||||
|
### Are there certain areas of the table that are touched most often?
|
||||||
|
|
||||||
|
# heatmap
|
||||||
|
|
||||||
|
dattrim <- datlogs[datlogs$x.start < 3840 &
|
||||||
|
datlogs$x.start > 0 &
|
||||||
|
datlogs$y.start < 2160 &
|
||||||
|
datlogs$y.start > 0 &
|
||||||
|
datlogs$x.stop < 3840 &
|
||||||
|
datlogs$x.stop > 0 &
|
||||||
|
datlogs$y.stop < 2160 &
|
||||||
|
datlogs$y.stop > 0, ]
|
||||||
|
|
||||||
|
cuts <- 100 # 200, 100, 70, ...
|
||||||
|
|
||||||
|
# start
|
||||||
|
dattrim$x.start.cat <- cut(dattrim$x.start, cuts)
|
||||||
|
dattrim$y.start.cat <- cut(dattrim$y.start, cuts)
|
||||||
|
|
||||||
|
tab.start <- xtabs( ~ x.start.cat + y.start.cat, dattrim)
|
||||||
|
colnames(tab.start) <- NULL
|
||||||
|
rownames(tab.start) <- NULL
|
||||||
|
|
||||||
|
pdf("../figures/heatmap_start_8o8m.pdf", width = 5, height = 5, pointsize = 10)
|
||||||
|
heatmap(tab.start, Rowv = NA, Colv = NA)
|
||||||
|
dev.off()
|
||||||
|
|
||||||
|
# stop
|
||||||
|
dattrim$x.stop.cat <- cut(dattrim$x.stop, cuts)
|
||||||
|
dattrim$y.stop.cat <- cut(dattrim$y.stop, cuts)
|
||||||
|
tab.stop <- xtabs( ~ x.stop.cat + y.stop.cat, dattrim)
|
||||||
|
colnames(tab.stop) <- NULL
|
||||||
|
rownames(tab.stop) <- NULL
|
||||||
|
|
||||||
|
pdf("../figures/heatmap_stop_8o8m.pdf", width = 5, height = 5, pointsize = 10)
|
||||||
|
heatmap(tab.stop, Rowv = NA, Colv = NA)
|
||||||
|
dev.off()
|
||||||
|
|
||||||
|
### How many visitors per day
|
||||||
|
|
||||||
|
# Cases per day
|
||||||
|
datcase <- aggregate(case ~ date, datlogs, function(x) length(unique(x)))
|
||||||
|
|
||||||
|
pdf("../figures/cases_per_day_8o8m.pdf", width = 9, height = 5, pointsize = 10)
|
||||||
|
par(mai = c(.6, .6, .1, .1), mgp = c(2.4, 1, 0))
|
||||||
|
plot(case ~ date, datcase, type = "h", col = "#0072B2", lwd = 2)
|
||||||
|
dev.off()
|
||||||
|
|
||||||
|
### Other stuff
|
||||||
|
|
||||||
|
## weird behavior of timeMs
|
||||||
|
|
||||||
|
pdf("../figures/timeMs_8o8m.pdf", width = 9, height = 6, pointsize = 10)
|
||||||
|
bwplot(timeMs.start ~ as.factor(fileId), datlogs[1:2000,], xlab = "",
|
||||||
|
scales = list(x = list(rot = 90), y = list(rot = 90)))
|
||||||
|
dev.off()
|
||||||
|
|
||||||
|
## x,y-coordinates out of range
|
||||||
|
|
||||||
|
set.seed(1522)
|
||||||
|
|
||||||
|
pdf("../figures/xycoord_8o8m.pdf", width = 5, height = 5, pointsize = 10)
|
||||||
|
par(mai = c(.6, .6, .1, .1), mgp = c(2.4, 1, 0))
|
||||||
|
plot(y.start ~ x.start, datlogs[sample(nrow(datlogs), 10000), ])
|
||||||
|
abline(v = c(0, 3840), h = c(0, 2160), col = "#0072B2", lwd = 2)
|
||||||
|
legend("bottomleft", "Random sample of 10,000", bg = "white")
|
||||||
|
legend("topleft", "4K-Display: 3840 x 2160", bg = "white")
|
||||||
|
dev.off()
|
||||||
|
|
@ -2,6 +2,10 @@
|
|||||||
|
|
||||||
library(lattice)
|
library(lattice)
|
||||||
|
|
||||||
|
# Achims colors (used by lattice)
|
||||||
|
cc <- palette.colors(palette = "Okabe-Ito")
|
||||||
|
plot(1:10, col = cc, pch = 16, cex = 2)
|
||||||
|
|
||||||
# Read data
|
# Read data
|
||||||
datlogs <- read.table("../data/haum/event_logfiles_metadata_2023-09-23_01-31-30.csv",
|
datlogs <- read.table("../data/haum/event_logfiles_metadata_2023-09-23_01-31-30.csv",
|
||||||
sep = ";", header = TRUE)
|
sep = ";", header = TRUE)
|
||||||
@ -10,7 +14,7 @@ datlogs$date.start <- as.POSIXct(datlogs$date.start)
|
|||||||
datlogs$date.stop <- as.POSIXct(datlogs$date.stop)
|
datlogs$date.stop <- as.POSIXct(datlogs$date.stop)
|
||||||
datlogs$artwork <- sprintf("%03d", datlogs$artwork)
|
datlogs$artwork <- sprintf("%03d", datlogs$artwork)
|
||||||
|
|
||||||
### Which artwork gets touched most often/first?
|
### Which artwork gets touched most often?
|
||||||
|
|
||||||
counts_artwork <- table(datlogs$artwork)
|
counts_artwork <- table(datlogs$artwork)
|
||||||
barchart(counts_artwork)
|
barchart(counts_artwork)
|
||||||
@ -25,7 +29,7 @@ names(counts_artwork) <- datart$title
|
|||||||
|
|
||||||
pdf("../figures/counts_artwork.pdf", width = 20, height = 10, pointsize = 10)
|
pdf("../figures/counts_artwork.pdf", width = 20, height = 10, pointsize = 10)
|
||||||
par(mai = c(5, .6, .1, .1))
|
par(mai = c(5, .6, .1, .1))
|
||||||
mtp <- barplot(counts_artwork, las = 2, ylim = c(0, 60000), border = "white")
|
tmp <- barplot(counts_artwork, las = 2, ylim = c(0, 60000), border = "white")
|
||||||
text(tmp, counts_artwork + 1000, c(datart$artwork, "504", "505"))
|
text(tmp, counts_artwork + 1000, c(datart$artwork, "504", "505"))
|
||||||
dev.off()
|
dev.off()
|
||||||
|
|
||||||
@ -55,8 +59,10 @@ xyplot(ds ~ as.factor(event), datagg, groups = artwork)
|
|||||||
# without aggregation
|
# without aggregation
|
||||||
bwplot(duration ~ as.factor(event), datlogs)
|
bwplot(duration ~ as.factor(event), datlogs)
|
||||||
# in min
|
# in min
|
||||||
bwplot(I(duration/1000/60) ~ as.factor(event), datlogs)
|
|
||||||
|
|
||||||
|
pdf("../figures/duration.pdf", width = 5, height = 5, pointsize = 10)
|
||||||
|
bwplot(I(duration/1000/60) ~ as.factor(event), datlogs, ylab = "Duration in sec")
|
||||||
|
dev.off()
|
||||||
|
|
||||||
datlogs$daydiff <- c(NA, diff(datlogs$date))
|
datlogs$daydiff <- c(NA, diff(datlogs$date))
|
||||||
plot(daydiff ~ date, datlogs, type = "b")
|
plot(daydiff ~ date, datlogs, type = "b")
|
||||||
@ -93,33 +99,40 @@ dattrim <- datlogs[datlogs$x.start < 3840 &
|
|||||||
datlogs$y.stop < 2160 &
|
datlogs$y.stop < 2160 &
|
||||||
datlogs$y.stop > 0, ]
|
datlogs$y.stop > 0, ]
|
||||||
|
|
||||||
cuts <- 200 # 100, 70, ...
|
cuts <- 100 # 200, 100, 70, ...
|
||||||
|
|
||||||
# start
|
# start
|
||||||
dattrim$x.start.cat <- cut(dattrim$x.start, cuts)
|
dattrim$x.start.cat <- cut(dattrim$x.start, cuts)
|
||||||
dattrim$y.start.cat <- cut(dattrim$y.start, cuts)
|
dattrim$y.start.cat <- cut(dattrim$y.start, cuts)
|
||||||
|
|
||||||
tab.start <- xtabs( ~ x.start.cat + y.start.cat, dattrim)
|
tab.start <- xtabs( ~ x.start.cat + y.start.cat, dattrim)
|
||||||
colnames(tab.start) <- paste0("c", 1:cuts)
|
colnames(tab.start) <- NULL
|
||||||
rownames(tab.start) <- paste0("c", 1:cuts)
|
rownames(tab.start) <- NULL
|
||||||
|
|
||||||
|
pdf("../figures/heatmap_start.pdf", width = 5, height = 5, pointsize = 10)
|
||||||
heatmap(tab.start, Rowv = NA, Colv = NA)
|
heatmap(tab.start, Rowv = NA, Colv = NA)
|
||||||
my_colors <- colorRampPalette(c("gray40", "orange"))
|
dev.off()
|
||||||
|
|
||||||
|
|
||||||
|
my_colors <- colorRampPalette(c("#009E73", "#E69F00"))
|
||||||
heatmap(tab.start, Rowv = NA, Colv = NA, col = my_colors(1000))
|
heatmap(tab.start, Rowv = NA, Colv = NA, col = my_colors(1000))
|
||||||
|
|
||||||
ggplot(as.data.frame(tab.start)) +
|
ggplot(as.data.frame(tab.start)) +
|
||||||
geom_tile(aes(x = x.start.cat, y = y.start.cat, fill = Freq)) +
|
geom_tile(aes(x = x.start.cat, y = y.start.cat, fill = Freq)) +
|
||||||
scale_fill_gradient(low = "gray40", high = "orange")
|
scale_fill_gradient(low = "#009E73", high = "#E69F00")
|
||||||
|
|
||||||
# stop
|
# stop
|
||||||
dattrim$x.stop.cat <- cut(dattrim$x.stop, cuts)
|
dattrim$x.stop.cat <- cut(dattrim$x.stop, cuts)
|
||||||
dattrim$y.stop.cat <- cut(dattrim$y.stop, cuts)
|
dattrim$y.stop.cat <- cut(dattrim$y.stop, cuts)
|
||||||
tab.stop <- xtabs( ~ x.stop.cat + y.stop.cat, dattrim)
|
tab.stop <- xtabs( ~ x.stop.cat + y.stop.cat, dattrim)
|
||||||
colnames(tab.stop) <- paste0("c", 1:cuts)
|
colnames(tab.stop) <- NULL
|
||||||
rownames(tab.stop) <- paste0("c", 1:cuts)
|
rownames(tab.stop) <- NULL
|
||||||
|
|
||||||
|
pdf("../figures/heatmap_stop.pdf", width = 5, height = 5, pointsize = 10)
|
||||||
heatmap(tab.stop, Rowv = NA, Colv = NA)
|
heatmap(tab.stop, Rowv = NA, Colv = NA)
|
||||||
heatmap(tab.stop, Rowv = NA, Colv = NA, col = my_colors(1000))
|
dev.off()
|
||||||
|
|
||||||
|
heatmap(tab.stop, Rowv = NA, Colv = NA, col = my_colors(10))
|
||||||
|
|
||||||
### How many visitors per day
|
### How many visitors per day
|
||||||
|
|
||||||
@ -135,39 +148,64 @@ plot(datcase, type = "h")
|
|||||||
dattrace <- aggregate(trace ~ date, datlogs, function(x) length(unique(x)))
|
dattrace <- aggregate(trace ~ date, datlogs, function(x) length(unique(x)))
|
||||||
plot(dattrace, type = "h")
|
plot(dattrace, type = "h")
|
||||||
|
|
||||||
|
plot(trace ~ date, dattrace, type = "h", col = "#0072B2")
|
||||||
|
points(case ~ date, datcase, type = "h")
|
||||||
|
|
||||||
# function dependencies of mtt
|
pdf("../figures/cases_per_day.pdf", width = 9, height = 5, pointsize = 10)
|
||||||
|
par(mai = c(.6, .6, .1, .1), mgp = c(2.4, 1, 0))
|
||||||
|
plot(case ~ date, datcase, type = "h", col = "#0072B2")
|
||||||
|
abline(v = datcase$date[c(1027, 1039)], col = "#D55E00", lty = 2)
|
||||||
|
text(datcase$date[1037]-250, 150, paste0("Corona gap from ",
|
||||||
|
datcase$date[1027], " to ",
|
||||||
|
datcase$date[1039]), col = "#D55E00")
|
||||||
|
dev.off()
|
||||||
|
|
||||||
|
|
||||||
|
### Other stuff
|
||||||
|
|
||||||
|
## function dependencies of mtt
|
||||||
devtools::load_all("../../../../software/mtt")
|
devtools::load_all("../../../../software/mtt")
|
||||||
#library(mtt)
|
#library(mtt)
|
||||||
|
|
||||||
library(mvbutils)
|
library(mvbutils)
|
||||||
foodweb(where = "package:mtt")
|
foodweb(where = "package:mtt")
|
||||||
|
|
||||||
|
pdf("../figures/fun_depend_mtt.pdf", width = 8, height = 4, pointsize = 10)
|
||||||
foodweb(where = "package:mtt",
|
foodweb(where = "package:mtt",
|
||||||
prune = c("parse_logfiles", "create_eventlogs", "extract_artworks",
|
prune = c("parse_logfiles", "create_eventlogs", "extract_artworks",
|
||||||
"extract_topics", "add_topic"),
|
"extract_topics", "add_topic"),
|
||||||
expand.ybox = 1.8, #cex = .6,
|
expand.ybox = 1.8, #cex = .6,
|
||||||
boxcolor = "gray", lwd = 2)
|
border = TRUE,
|
||||||
|
#boxcolor = "gray",
|
||||||
|
lwd = 2, mai = c(0, 0, 0, 0))
|
||||||
|
dev.off()
|
||||||
|
|
||||||
|
|
||||||
|
## weird behavior of timeMs
|
||||||
|
|
||||||
|
pdf("../figures/timeMs.pdf", width = 9, height = 6, pointsize = 10)
|
||||||
|
#par(mai = c(.6, .6, .1, .1), mgp = c(2.4, 1, 0))
|
||||||
|
#plot(timeMs.start ~ as.factor(fileId), datlogs[1:2000,], xlab = "fileId")
|
||||||
|
bwplot(timeMs.start ~ as.factor(fileId), datlogs[1:2000,], xlab = "",
|
||||||
|
scales = list(x = list(rot = 90), y = list(rot = 90)))
|
||||||
|
dev.off()
|
||||||
|
|
||||||
|
## x,y-coordinates out of range
|
||||||
|
|
||||||
|
set.seed(1522)
|
||||||
|
|
||||||
|
pdf("../figures/xycoord.pdf", width = 5, height = 5, pointsize = 10)
|
||||||
|
par(mai = c(.6, .6, .1, .1), mgp = c(2.4, 1, 0))
|
||||||
|
#par(mfrow = c(1, 2))
|
||||||
|
plot(y.start ~ x.start, datlogs[sample(nrow(datlogs), 10000), ])
|
||||||
|
abline(v = c(0, 3840), h = c(0, 2160), col = "#0072B2", lwd = 2)
|
||||||
|
#plot(y.stop ~ x.stop, datlogs)
|
||||||
|
#abline(v = c(0, 3840), h = c(0, 2160), col = "#0072B2", lwd = 2)
|
||||||
|
legend("bottomleft", "Random sample of 10,000", bg = "white")
|
||||||
|
legend("topleft", "4K-Display: 3840 x 2160", bg = "white")
|
||||||
|
dev.off()
|
||||||
|
|
||||||
|
## moves
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
### Other stuff
|
|
||||||
|
|
||||||
dat001 <- datlogs[which(datlogs$artwork == "001"), ]
|
dat001 <- datlogs[which(datlogs$artwork == "001"), ]
|
||||||
|
|
||||||
|
Loading…
Reference in New Issue
Block a user