Started working on second presentation; created plots for it
This commit is contained in:
parent
4c786d4df0
commit
25a2fadf15
16
README.Rmd
16
README.Rmd
@ -134,9 +134,9 @@ log file and completes in another one. The variable `timeMs` seems to be
|
||||
continuous within one log file but not over several log files.
|
||||
|
||||
|
||||
```{r}
|
||||
```{r, results = FALSE, fig.show = TRUE}
|
||||
# Read data
|
||||
dat0 <- read.table("data/haum/rawdata_logfiles_small.csv", sep = ";",
|
||||
dat0 <- read.table("data/haum/raw_logfiles_small_2023-09-26_13-50-20.csv", sep = ";",
|
||||
header = TRUE)
|
||||
dat0$date <- as.POSIXct(dat0$date)
|
||||
dat0$glossar <- ifelse(dat0$artwork == "glossar", 1, 0)
|
||||
@ -146,7 +146,12 @@ dat <- subset(dat0, !(dat0$event %in% c("Start Application",
|
||||
"Show Application")))
|
||||
|
||||
# Add trace variable
|
||||
dat1 <- add_trace(dat, glossar_dict = "data/haum/glossar_dict.RData")
|
||||
artworks <- unique(stats::na.omit(dat$artwork))
|
||||
artworks <- artworks[artworks != "glossar"]
|
||||
glossar_files <- unique(subset(dat, dat$artwork == "glossar")$popup)
|
||||
glossar_dict <- create_glossardict(artworks, glossar_files,
|
||||
xmlpath = "data/haum/ContentEyevisit/eyevisit_cards_light/")
|
||||
dat1 <- add_trace(dat, glossar_dict)
|
||||
|
||||
# Close events
|
||||
dat2 <- rbind(close_events(dat1, "move"),
|
||||
@ -408,7 +413,7 @@ when I get them from `<artwork>.html`. At first glance, it looks like using
|
||||
```{r}
|
||||
artworks <- unique(dat2$artwork)
|
||||
path <- "data/haum/ContentEyevisit/eyevisit_cards_light/"
|
||||
topics <- extract_topics(artworks, "index.xml", path)
|
||||
topics <- extract_topics(artworks, rep("index.xml", length(artworks)), path)
|
||||
topics2 <- extract_topics(artworks, paste0(artworks, ".xml"), path)
|
||||
|
||||
topics[!topics$file_name %in% topics2$file_name, ]
|
||||
@ -427,7 +432,8 @@ sudden there were 72 instead of 70 artworks. It seems like these two
|
||||
artworks appear on October 21, 2022.
|
||||
|
||||
```{r}
|
||||
dat0 <- read.table("data/haum/rawdata_logfiles.csv", sep = ";", header = TRUE)
|
||||
dat0 <- read.table("data/haum/raw_logfiles_2023-09-23_01-31-30.csv",
|
||||
sep = ";", header = TRUE)
|
||||
dat0$date <- as.POSIXct(dat0$date)
|
||||
dat0$glossar <- ifelse(dat0$artwork == "glossar", 1, 0)
|
||||
|
||||
|
112
code/plots_8o8m.R
Normal file
112
code/plots_8o8m.R
Normal file
@ -0,0 +1,112 @@
|
||||
# setwd("C:/Users/nwickelmaier/Nextcloud/Documents/MDS/2023ss/60100_master_thesis/code/")
|
||||
|
||||
library(lattice)
|
||||
devtools::load_all("../../../../software/mtt")
|
||||
library(ggplot2)
|
||||
|
||||
# Read data
|
||||
datlogs <- read.table("../data/8o8m/event_logfiles_2023-09-22_18-54-49.csv",
|
||||
sep = ";", header = TRUE)
|
||||
datlogs$date <- as.Date(datlogs$date.start)
|
||||
datlogs$date.start <- as.POSIXct(datlogs$date.start)
|
||||
datlogs$date.stop <- as.POSIXct(datlogs$date.stop)
|
||||
datlogs$artwork <- sprintf("%02d", datlogs$artwork)
|
||||
|
||||
### Which artwork gets touched most often?
|
||||
|
||||
counts_artwork <- table(datlogs$artwork)
|
||||
|
||||
artworks <- unique(datlogs$artwork)
|
||||
datart <- extract_artworks(artworks,
|
||||
paste0(artworks, "_de.xml"),
|
||||
"../data/8o8m/Content8o8m/")
|
||||
datart <- datart[order(datart$artwork), ]
|
||||
names(counts_artwork) <- datart$title
|
||||
|
||||
#pdf("../figures/counts_artwork_8o8m.pdf", width = 20, height = 10, pointsize = 10)
|
||||
#par(mai = c(3, .6, .1, .1))
|
||||
tmp <- barplot(counts_artwork, las = 2, ylim = c(0, 90000), border = "white")
|
||||
text(tmp, counts_artwork + 1500, c(datart$artwork))
|
||||
#dev.off()
|
||||
|
||||
# more interesting per museum in this case...
|
||||
counts_artwork <- aggregate(trace ~ artwork + folder, datlogs, length)
|
||||
pdf("../figures/counts_artwork_8o8m.pdf", width = 20, height = 6, pointsize = 10)
|
||||
barchart(trace ~ artwork | folder, counts_artwork, ylab = "", layout = c(5, 1))
|
||||
dev.off()
|
||||
|
||||
|
||||
### Dwell times/duration
|
||||
pdf("../figures/duration_8o8m.pdf", width = 5, height = 5, pointsize = 10)
|
||||
bwplot(I(duration/1000/60) ~ as.factor(event), datlogs, ylab = "Duration in sec")
|
||||
dev.off()
|
||||
|
||||
### Are there certain areas of the table that are touched most often?
|
||||
|
||||
# heatmap
|
||||
|
||||
dattrim <- datlogs[datlogs$x.start < 3840 &
|
||||
datlogs$x.start > 0 &
|
||||
datlogs$y.start < 2160 &
|
||||
datlogs$y.start > 0 &
|
||||
datlogs$x.stop < 3840 &
|
||||
datlogs$x.stop > 0 &
|
||||
datlogs$y.stop < 2160 &
|
||||
datlogs$y.stop > 0, ]
|
||||
|
||||
cuts <- 100 # 200, 100, 70, ...
|
||||
|
||||
# start
|
||||
dattrim$x.start.cat <- cut(dattrim$x.start, cuts)
|
||||
dattrim$y.start.cat <- cut(dattrim$y.start, cuts)
|
||||
|
||||
tab.start <- xtabs( ~ x.start.cat + y.start.cat, dattrim)
|
||||
colnames(tab.start) <- NULL
|
||||
rownames(tab.start) <- NULL
|
||||
|
||||
pdf("../figures/heatmap_start_8o8m.pdf", width = 5, height = 5, pointsize = 10)
|
||||
heatmap(tab.start, Rowv = NA, Colv = NA)
|
||||
dev.off()
|
||||
|
||||
# stop
|
||||
dattrim$x.stop.cat <- cut(dattrim$x.stop, cuts)
|
||||
dattrim$y.stop.cat <- cut(dattrim$y.stop, cuts)
|
||||
tab.stop <- xtabs( ~ x.stop.cat + y.stop.cat, dattrim)
|
||||
colnames(tab.stop) <- NULL
|
||||
rownames(tab.stop) <- NULL
|
||||
|
||||
pdf("../figures/heatmap_stop_8o8m.pdf", width = 5, height = 5, pointsize = 10)
|
||||
heatmap(tab.stop, Rowv = NA, Colv = NA)
|
||||
dev.off()
|
||||
|
||||
### How many visitors per day
|
||||
|
||||
# Cases per day
|
||||
datcase <- aggregate(case ~ date, datlogs, function(x) length(unique(x)))
|
||||
|
||||
pdf("../figures/cases_per_day_8o8m.pdf", width = 9, height = 5, pointsize = 10)
|
||||
par(mai = c(.6, .6, .1, .1), mgp = c(2.4, 1, 0))
|
||||
plot(case ~ date, datcase, type = "h", col = "#0072B2", lwd = 2)
|
||||
dev.off()
|
||||
|
||||
### Other stuff
|
||||
|
||||
## weird behavior of timeMs
|
||||
|
||||
pdf("../figures/timeMs_8o8m.pdf", width = 9, height = 6, pointsize = 10)
|
||||
bwplot(timeMs.start ~ as.factor(fileId), datlogs[1:2000,], xlab = "",
|
||||
scales = list(x = list(rot = 90), y = list(rot = 90)))
|
||||
dev.off()
|
||||
|
||||
## x,y-coordinates out of range
|
||||
|
||||
set.seed(1522)
|
||||
|
||||
pdf("../figures/xycoord_8o8m.pdf", width = 5, height = 5, pointsize = 10)
|
||||
par(mai = c(.6, .6, .1, .1), mgp = c(2.4, 1, 0))
|
||||
plot(y.start ~ x.start, datlogs[sample(nrow(datlogs), 10000), ])
|
||||
abline(v = c(0, 3840), h = c(0, 2160), col = "#0072B2", lwd = 2)
|
||||
legend("bottomleft", "Random sample of 10,000", bg = "white")
|
||||
legend("topleft", "4K-Display: 3840 x 2160", bg = "white")
|
||||
dev.off()
|
||||
|
@ -2,6 +2,10 @@
|
||||
|
||||
library(lattice)
|
||||
|
||||
# Achims colors (used by lattice)
|
||||
cc <- palette.colors(palette = "Okabe-Ito")
|
||||
plot(1:10, col = cc, pch = 16, cex = 2)
|
||||
|
||||
# Read data
|
||||
datlogs <- read.table("../data/haum/event_logfiles_metadata_2023-09-23_01-31-30.csv",
|
||||
sep = ";", header = TRUE)
|
||||
@ -10,7 +14,7 @@ datlogs$date.start <- as.POSIXct(datlogs$date.start)
|
||||
datlogs$date.stop <- as.POSIXct(datlogs$date.stop)
|
||||
datlogs$artwork <- sprintf("%03d", datlogs$artwork)
|
||||
|
||||
### Which artwork gets touched most often/first?
|
||||
### Which artwork gets touched most often?
|
||||
|
||||
counts_artwork <- table(datlogs$artwork)
|
||||
barchart(counts_artwork)
|
||||
@ -25,7 +29,7 @@ names(counts_artwork) <- datart$title
|
||||
|
||||
pdf("../figures/counts_artwork.pdf", width = 20, height = 10, pointsize = 10)
|
||||
par(mai = c(5, .6, .1, .1))
|
||||
mtp <- barplot(counts_artwork, las = 2, ylim = c(0, 60000), border = "white")
|
||||
tmp <- barplot(counts_artwork, las = 2, ylim = c(0, 60000), border = "white")
|
||||
text(tmp, counts_artwork + 1000, c(datart$artwork, "504", "505"))
|
||||
dev.off()
|
||||
|
||||
@ -55,8 +59,10 @@ xyplot(ds ~ as.factor(event), datagg, groups = artwork)
|
||||
# without aggregation
|
||||
bwplot(duration ~ as.factor(event), datlogs)
|
||||
# in min
|
||||
bwplot(I(duration/1000/60) ~ as.factor(event), datlogs)
|
||||
|
||||
pdf("../figures/duration.pdf", width = 5, height = 5, pointsize = 10)
|
||||
bwplot(I(duration/1000/60) ~ as.factor(event), datlogs, ylab = "Duration in sec")
|
||||
dev.off()
|
||||
|
||||
datlogs$daydiff <- c(NA, diff(datlogs$date))
|
||||
plot(daydiff ~ date, datlogs, type = "b")
|
||||
@ -93,33 +99,40 @@ dattrim <- datlogs[datlogs$x.start < 3840 &
|
||||
datlogs$y.stop < 2160 &
|
||||
datlogs$y.stop > 0, ]
|
||||
|
||||
cuts <- 200 # 100, 70, ...
|
||||
cuts <- 100 # 200, 100, 70, ...
|
||||
|
||||
# start
|
||||
dattrim$x.start.cat <- cut(dattrim$x.start, cuts)
|
||||
dattrim$y.start.cat <- cut(dattrim$y.start, cuts)
|
||||
|
||||
tab.start <- xtabs( ~ x.start.cat + y.start.cat, dattrim)
|
||||
colnames(tab.start) <- paste0("c", 1:cuts)
|
||||
rownames(tab.start) <- paste0("c", 1:cuts)
|
||||
colnames(tab.start) <- NULL
|
||||
rownames(tab.start) <- NULL
|
||||
|
||||
pdf("../figures/heatmap_start.pdf", width = 5, height = 5, pointsize = 10)
|
||||
heatmap(tab.start, Rowv = NA, Colv = NA)
|
||||
my_colors <- colorRampPalette(c("gray40", "orange"))
|
||||
dev.off()
|
||||
|
||||
|
||||
my_colors <- colorRampPalette(c("#009E73", "#E69F00"))
|
||||
heatmap(tab.start, Rowv = NA, Colv = NA, col = my_colors(1000))
|
||||
|
||||
ggplot(as.data.frame(tab.start)) +
|
||||
geom_tile(aes(x = x.start.cat, y = y.start.cat, fill = Freq)) +
|
||||
scale_fill_gradient(low = "gray40", high = "orange")
|
||||
scale_fill_gradient(low = "#009E73", high = "#E69F00")
|
||||
|
||||
# stop
|
||||
dattrim$x.stop.cat <- cut(dattrim$x.stop, cuts)
|
||||
dattrim$y.stop.cat <- cut(dattrim$y.stop, cuts)
|
||||
tab.stop <- xtabs( ~ x.stop.cat + y.stop.cat, dattrim)
|
||||
colnames(tab.stop) <- paste0("c", 1:cuts)
|
||||
rownames(tab.stop) <- paste0("c", 1:cuts)
|
||||
colnames(tab.stop) <- NULL
|
||||
rownames(tab.stop) <- NULL
|
||||
|
||||
pdf("../figures/heatmap_stop.pdf", width = 5, height = 5, pointsize = 10)
|
||||
heatmap(tab.stop, Rowv = NA, Colv = NA)
|
||||
heatmap(tab.stop, Rowv = NA, Colv = NA, col = my_colors(1000))
|
||||
dev.off()
|
||||
|
||||
heatmap(tab.stop, Rowv = NA, Colv = NA, col = my_colors(10))
|
||||
|
||||
### How many visitors per day
|
||||
|
||||
@ -135,39 +148,64 @@ plot(datcase, type = "h")
|
||||
dattrace <- aggregate(trace ~ date, datlogs, function(x) length(unique(x)))
|
||||
plot(dattrace, type = "h")
|
||||
|
||||
plot(trace ~ date, dattrace, type = "h", col = "#0072B2")
|
||||
points(case ~ date, datcase, type = "h")
|
||||
|
||||
# function dependencies of mtt
|
||||
pdf("../figures/cases_per_day.pdf", width = 9, height = 5, pointsize = 10)
|
||||
par(mai = c(.6, .6, .1, .1), mgp = c(2.4, 1, 0))
|
||||
plot(case ~ date, datcase, type = "h", col = "#0072B2")
|
||||
abline(v = datcase$date[c(1027, 1039)], col = "#D55E00", lty = 2)
|
||||
text(datcase$date[1037]-250, 150, paste0("Corona gap from ",
|
||||
datcase$date[1027], " to ",
|
||||
datcase$date[1039]), col = "#D55E00")
|
||||
dev.off()
|
||||
|
||||
|
||||
### Other stuff
|
||||
|
||||
## function dependencies of mtt
|
||||
devtools::load_all("../../../../software/mtt")
|
||||
#library(mtt)
|
||||
|
||||
library(mvbutils)
|
||||
foodweb(where = "package:mtt")
|
||||
|
||||
pdf("../figures/fun_depend_mtt.pdf", width = 8, height = 4, pointsize = 10)
|
||||
foodweb(where = "package:mtt",
|
||||
prune = c("parse_logfiles", "create_eventlogs", "extract_artworks",
|
||||
"extract_topics", "add_topic"),
|
||||
expand.ybox = 1.8, #cex = .6,
|
||||
boxcolor = "gray", lwd = 2)
|
||||
border = TRUE,
|
||||
#boxcolor = "gray",
|
||||
lwd = 2, mai = c(0, 0, 0, 0))
|
||||
dev.off()
|
||||
|
||||
|
||||
## weird behavior of timeMs
|
||||
|
||||
pdf("../figures/timeMs.pdf", width = 9, height = 6, pointsize = 10)
|
||||
#par(mai = c(.6, .6, .1, .1), mgp = c(2.4, 1, 0))
|
||||
#plot(timeMs.start ~ as.factor(fileId), datlogs[1:2000,], xlab = "fileId")
|
||||
bwplot(timeMs.start ~ as.factor(fileId), datlogs[1:2000,], xlab = "",
|
||||
scales = list(x = list(rot = 90), y = list(rot = 90)))
|
||||
dev.off()
|
||||
|
||||
## x,y-coordinates out of range
|
||||
|
||||
set.seed(1522)
|
||||
|
||||
pdf("../figures/xycoord.pdf", width = 5, height = 5, pointsize = 10)
|
||||
par(mai = c(.6, .6, .1, .1), mgp = c(2.4, 1, 0))
|
||||
#par(mfrow = c(1, 2))
|
||||
plot(y.start ~ x.start, datlogs[sample(nrow(datlogs), 10000), ])
|
||||
abline(v = c(0, 3840), h = c(0, 2160), col = "#0072B2", lwd = 2)
|
||||
#plot(y.stop ~ x.stop, datlogs)
|
||||
#abline(v = c(0, 3840), h = c(0, 2160), col = "#0072B2", lwd = 2)
|
||||
legend("bottomleft", "Random sample of 10,000", bg = "white")
|
||||
legend("topleft", "4K-Display: 3840 x 2160", bg = "white")
|
||||
dev.off()
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
### Other stuff
|
||||
## moves
|
||||
|
||||
dat001 <- datlogs[which(datlogs$artwork == "001"), ]
|
||||
|
||||
|
Loading…
Reference in New Issue
Block a user