Worked on analysis files, cleaned out and restructured folders
This commit is contained in:
parent
a130ee596d
commit
4c786d4df0
@ -3,7 +3,7 @@
|
||||
#library(mtt)
|
||||
devtools::load_all("../../../../software/mtt")
|
||||
|
||||
now <- format(Sys.time(), , "%Y-%m-%d_%H-%M-%S")
|
||||
now <- format(Sys.time(), "%Y-%m-%d_%H-%M-%S")
|
||||
|
||||
folders <- dir("../data/8o8m/LogFiles/")
|
||||
#folders <- "Berlin"
|
||||
@ -12,8 +12,15 @@ folders <- dir("../data/8o8m/LogFiles/")
|
||||
datraw <- parse_logfiles(folders, path = "../data/8o8m/LogFiles/")
|
||||
artworks <- unique(na.omit(datraw$artwork))
|
||||
|
||||
# export data
|
||||
write.table(datraw, paste0("../data/8o8m/raw_logfiles_", now, ".csv"),
|
||||
sep = ";", row.names = FALSE)
|
||||
|
||||
datraw2 <- datraw[!is.na(datraw$artwork), ]
|
||||
# TODO: Why is this happening?
|
||||
|
||||
# convert to log events
|
||||
datlogs <- create_eventlogs(datraw, xmlfiles = paste0(artworks, "_de.xml"),
|
||||
datlogs <- create_eventlogs(datraw2, xmlfiles = paste0(artworks, "_de.xml"),
|
||||
xmlpath = "../data/8o8m/Content8o8m/")
|
||||
|
||||
# export data
|
||||
|
@ -3,7 +3,8 @@
|
||||
#library(mtt)
|
||||
devtools::load_all("../../../../software/mtt")
|
||||
|
||||
now <- format(Sys.time(), , "%Y-%m-%d_%H-%M-%S")
|
||||
now <- format(Sys.time(), "%Y-%m-%d_%H-%M-%S")
|
||||
#now <- "2023-09-23_01-31-30"
|
||||
|
||||
path <- "../data/haum/LogFiles/"
|
||||
|
||||
@ -11,13 +12,27 @@ folders <- dir(path)
|
||||
|
||||
# parse raw log files
|
||||
datraw <- parse_logfiles(folders, path)
|
||||
|
||||
# export data
|
||||
write.table(datraw, paste0("../data/haum/raw_logfiles_", now, ".csv"),
|
||||
sep = ";", row.names = FALSE)
|
||||
#save(datraw, file = paste0("../data/haum/datraw_", now, ".RData"))
|
||||
#load("../data/haum/datraw_2023-09-23_01-31-30.RData")
|
||||
artworks <- unique(na.omit(datraw$artwork))
|
||||
|
||||
# convert to log events
|
||||
datlogs <- create_eventlogs(datraw, xmlfiles = paste0(artworks, "_de.xml"),
|
||||
datlogs <- create_eventlogs(datraw,
|
||||
xmlpath = "../data/haum/ContentEyevisit/eyevisit_cards_light/")
|
||||
artworks <- unique(na.omit(datlogs$artwork))
|
||||
artworks <- artworks[!artworks %in% c("504", "505")]
|
||||
|
||||
topics <- extract_topics(artworks, xmlfiles = paste0(artworks, ".xml"),
|
||||
xmlpath = "../data/haum/ContentEyevisit/eyevisit_cards_light/")
|
||||
|
||||
datlogs_topics <- add_topic(datlogs, topics = topics)
|
||||
|
||||
|
||||
# export data
|
||||
write.table(datlogs, paste0("../data/haum/event_logfiles_", now, ".csv"),
|
||||
write.table(datlogs_topics, paste0("../data/haum/event_logfiles_", now, ".csv"),
|
||||
sep = ";", row.names = FALSE)
|
||||
|
||||
|
71
code/metadata.R
Normal file
71
code/metadata.R
Normal file
@ -0,0 +1,71 @@
|
||||
# setwd("C:/Users/nwickelmaier/Nextcloud/Documents/MDS/2023ss/60100_master_thesis/")
|
||||
|
||||
## Read data for holiday
|
||||
|
||||
hd0 <- read.table("data/metadata/feiertage.csv", sep = ";", header = TRUE)
|
||||
hd0$X.br. <- NULL
|
||||
|
||||
hd <- hd0[hd0$Abkuerzung == "NI", ]
|
||||
names(hd) <- c("state", "stateCode", "date", "holiday")
|
||||
hd$date <- as.POSIXct(hd$date)
|
||||
|
||||
## Read data for school vacations
|
||||
|
||||
# https://ferien-api.de/#holidaysPerStateAndYear
|
||||
# Data extracted (on Linux) via:
|
||||
# curl https://ferien-api.de/api/v1/holidays/NI > schulferien_NI.json
|
||||
|
||||
# library(jsonlite)
|
||||
#
|
||||
# dat <- read_json("data/metadata/schulferien_NI.json", simplify = TRUE)
|
||||
# dat$slug <- NULL
|
||||
#
|
||||
# dat$name <- paste0(gsub("^(.*).niedersachsen.*", "\\1", dat$name),
|
||||
# gsub("^.*niedersachsen [0-9]{4}(.*)", "\\1",
|
||||
# dat$name))
|
||||
#
|
||||
# write.table(dat, "data/metadata/schulferien_2019-2025_NI.csv", sep = ";",
|
||||
# row.names = FALSE, quote = FALSE)
|
||||
|
||||
sf1 <- read.table("data/metadata/schulferien_2016-2018_NI.csv", sep = ";",
|
||||
header = TRUE)
|
||||
sf2 <- read.table("data/metadata/schulferien_2019-2025_NI.csv", sep = ";",
|
||||
header = TRUE)
|
||||
sf <- rbind(sf1, sf2)
|
||||
sf$start <- as.Date(sf$start)
|
||||
sf$end <- as.Date(sf$end)
|
||||
|
||||
sfdat <- NULL
|
||||
|
||||
for (i in seq_len(nrow(sf))) {
|
||||
date <- seq(sf$start[i], sf$end[i], by = 1)
|
||||
sfdat <- rbind(sfdat, data.frame(date, vacations = sf$name[i],
|
||||
stateCodeVacations = sf$stateCode[i]))
|
||||
}
|
||||
|
||||
# TODO: How to handle stateCode? There will be several for certain types of
|
||||
# data sets... Not important here, since I only do NI.
|
||||
|
||||
# load (small) event log data set
|
||||
dat <- read.table("data/haum/event_logfiles_2023-09-23_01-31-30.csv",
|
||||
sep = ";", header = TRUE)
|
||||
dat$date.start <- as.POSIXct(dat$date.start)
|
||||
dat$date.stop <- as.POSIXct(dat$date.stop)
|
||||
dat$artwork <- sprintf("%03d", dat$artwork)
|
||||
dat$date <- as.Date(dat$date.start)
|
||||
|
||||
## Add metadata
|
||||
|
||||
# holidays
|
||||
dat1 <- merge(dat, hd, by = "date", all.x = TRUE)
|
||||
# school vacations
|
||||
dat2 <- merge(dat1, sfdat, by = "date", all.x = TRUE)
|
||||
|
||||
## Export data
|
||||
|
||||
write.table(dat2,
|
||||
file = "data/haum/event_logfiles_metadata_2023-09-23_01-31-30.csv",
|
||||
sep = ";", row.names = FALSE)
|
||||
|
||||
# TODO: Maybe add infos about artworks?
|
||||
|
@ -10,7 +10,7 @@ if (file.exists(fname)) file.remove(fname)
|
||||
path <- "C:/Users/nwickelmaier/Nextcloud/Documents/MDS/2023ss/60100_master_thesis/data/8o8m/Content8o8m"
|
||||
artworks <- dir(path = path)
|
||||
|
||||
dat <- extract_artworks(artworks, file = paste0(artworks, "_de.xml"), path = path)
|
||||
dat <- extract_artworks(artworks, xmlfiles = paste0(artworks, "_de.xml"), xmlpath = path)
|
||||
|
||||
fout <- file(fname, "a") # open in append mode
|
||||
|
||||
|
@ -1,15 +1,16 @@
|
||||
# setwd("C:/Users/nwickelmaier/Nextcloud/Documents/MDS/2023ss/60100_master_thesis/code")
|
||||
|
||||
source("functions.R")
|
||||
devtools::load_all("../../../../software/mtt")
|
||||
|
||||
fname <- "../haum/overview_artworks.tex"
|
||||
if (file.exists(fname)) file.remove(fname)
|
||||
|
||||
path <- "C:/Users/nwickelmaier/Nextcloud/Documents/MDS/2023ss/60100_master_thesis/data/ContentEyevisit/eyevisit_cards_light"
|
||||
artworks <- dir(path = path)
|
||||
xmlpath <- "C:/Users/nwickelmaier/Nextcloud/Documents/MDS/2023ss/60100_master_thesis/data/ContentEyevisit/eyevisit_cards_light"
|
||||
artworks <- dir(path = xmlpath)
|
||||
artworks <- artworks[artworks != "glossar"]
|
||||
|
||||
dat <- extract_artworks(artworks, files = paste0(artworks, ".xml"), path = path)
|
||||
dat <- extract_artworks(artworks, xmlfiles = paste0(artworks, ".xml"),
|
||||
xmlpath = xmlpath)
|
||||
|
||||
fout <- file(fname, "a") # open in append mode
|
||||
|
||||
@ -35,7 +36,7 @@ for (artwork in dat$artwork) {
|
||||
writeLines("", fout)
|
||||
|
||||
writeLines("\\begin{center}", fout)
|
||||
art_path <- paste(path, artwork, artwork, sep = "/")
|
||||
art_path <- paste(xmlpath, artwork, artwork, sep = "/")
|
||||
writeLines(paste0("\\includegraphics[width = 12cm]{", art_path, "}"), fout)
|
||||
writeLines("\\end{center}", fout)
|
||||
writeLines("", fout)
|
||||
|
@ -1,18 +0,0 @@
|
||||
# setwd("C:/Users/nwickelmaier/Nextcloud/Documents/MDS/2023ss/60100_master_thesis/data/metadata")
|
||||
|
||||
# https://ferien-api.de/#holidaysPerStateAndYear
|
||||
# Data extracted (on Linux) via:
|
||||
# curl https://ferien-api.de/api/v1/holidays/NI > schulferien_NI.json
|
||||
|
||||
library(jsonlite)
|
||||
|
||||
dat <- read_json("schulferien_NI.json", simplify = TRUE)
|
||||
dat$slug <- NULL
|
||||
dat$stateCode <- NULL
|
||||
|
||||
dat$name <- paste0(gsub("^(.*).niedersachsen.*", "\\1", dat$name),
|
||||
gsub("^.*niedersachsen [0-9]{4}(.*)", "\\1",
|
||||
dat$name))
|
||||
|
||||
write.table(dat, "schulferien_2019-2025_NI.csv", sep = ";", row.names = FALSE)
|
||||
|
@ -1,16 +1,178 @@
|
||||
setwd("C:/Users/nwickelmaier/Nextcloud/Documents/MDS/2023ss/60100_master_thesis/code/")
|
||||
# setwd("C:/Users/nwickelmaier/Nextcloud/Documents/MDS/2023ss/60100_master_thesis/code/")
|
||||
|
||||
library(lattice)
|
||||
|
||||
# Read data
|
||||
datlogs <- read.table("../data/haum/event_logfiles_metadata_2023-09-23_01-31-30.csv",
|
||||
sep = ";", header = TRUE)
|
||||
datlogs$date <- as.Date(datlogs$date.start)
|
||||
datlogs$date.start <- as.POSIXct(datlogs$date.start)
|
||||
datlogs$date.stop <- as.POSIXct(datlogs$date.stop)
|
||||
datlogs$artwork <- sprintf("%03d", datlogs$artwork)
|
||||
|
||||
### Which artwork gets touched most often/first?
|
||||
|
||||
counts_artwork <- table(datlogs$artwork)
|
||||
barchart(counts_artwork)
|
||||
|
||||
artworks <- unique(datlogs$artwork)
|
||||
artworks <- artworks[!artworks %in% c("504", "505")]
|
||||
datart <- extract_artworks(artworks,
|
||||
paste0(artworks, ".xml"),
|
||||
"../data/haum/ContentEyevisit/eyevisit_cards_light/")
|
||||
datart <- datart[order(datart$artwork), ]
|
||||
names(counts_artwork) <- datart$title
|
||||
|
||||
pdf("../figures/counts_artwork.pdf", width = 20, height = 10, pointsize = 10)
|
||||
par(mai = c(5, .6, .1, .1))
|
||||
mtp <- barplot(counts_artwork, las = 2, ylim = c(0, 60000), border = "white")
|
||||
text(tmp, counts_artwork + 1000, c(datart$artwork, "504", "505"))
|
||||
dev.off()
|
||||
|
||||
### Which artwork gets touched most often first?
|
||||
|
||||
datcase <- datlogs[!duplicated(datlogs$case), ]
|
||||
counts_case <- table(datcase$artwork)
|
||||
names(counts_case) <- datart$title
|
||||
tmp <- barplot(counts_case, las = 2, border = "white")
|
||||
text(tmp, counts_case + 100, c(datart$artwork, "504", "505"))
|
||||
|
||||
counts <- rbind(counts_artwork, counts_case)
|
||||
barplot(counts, las = 2, border = "white", col = c("gray", "darkorange"))
|
||||
|
||||
### Which teasers seem to work well?
|
||||
barplot(table(datlogs$topic), las = 2)
|
||||
barplot(table(datlogs$topicFile), las = 2)
|
||||
|
||||
|
||||
### Dwell times/duration
|
||||
datagg <- aggregate(duration ~ event + artwork, datlogs, mean)
|
||||
datagg$ds <- datagg$duration / 1000
|
||||
|
||||
bwplot(ds ~ as.factor(event), datagg)
|
||||
xyplot(ds ~ as.factor(event), datagg, groups = artwork)
|
||||
|
||||
# without aggregation
|
||||
bwplot(duration ~ as.factor(event), datlogs)
|
||||
# in min
|
||||
bwplot(I(duration/1000/60) ~ as.factor(event), datlogs)
|
||||
|
||||
|
||||
datlogs$daydiff <- c(NA, diff(datlogs$date))
|
||||
plot(daydiff ~ date, datlogs, type = "b")
|
||||
|
||||
|
||||
### Are there certain areas of the table that are touched most often?
|
||||
|
||||
# heatmap
|
||||
cuts <- 100
|
||||
|
||||
datlogs$x.start.cat <- cut(datlogs$x.start, cuts)
|
||||
datlogs$y.start.cat <- cut(datlogs$y.start, cuts)
|
||||
|
||||
tab <- xtabs( ~ x.start.cat + y.start.cat, datlogs)
|
||||
|
||||
colnames(tab) <- paste0("c", 1:cuts)
|
||||
rownames(tab) <- paste0("c", 1:cuts)
|
||||
|
||||
heatmap(tab, Rowv = NA, Colv = NA)
|
||||
|
||||
|
||||
library(ggplot2)
|
||||
|
||||
ggplot(as.data.frame(tab)) +
|
||||
geom_tile(aes(x = x.start.cat, y = y.start.cat, fill = Freq)) +
|
||||
scale_fill_gradient(low = "gray40", high = "orange")
|
||||
|
||||
dattrim <- datlogs[datlogs$x.start < 3840 &
|
||||
datlogs$x.start > 0 &
|
||||
datlogs$y.start < 2160 &
|
||||
datlogs$y.start > 0 &
|
||||
datlogs$x.stop < 3840 &
|
||||
datlogs$x.stop > 0 &
|
||||
datlogs$y.stop < 2160 &
|
||||
datlogs$y.stop > 0, ]
|
||||
|
||||
cuts <- 200 # 100, 70, ...
|
||||
|
||||
# start
|
||||
dattrim$x.start.cat <- cut(dattrim$x.start, cuts)
|
||||
dattrim$y.start.cat <- cut(dattrim$y.start, cuts)
|
||||
|
||||
tab.start <- xtabs( ~ x.start.cat + y.start.cat, dattrim)
|
||||
colnames(tab.start) <- paste0("c", 1:cuts)
|
||||
rownames(tab.start) <- paste0("c", 1:cuts)
|
||||
|
||||
heatmap(tab.start, Rowv = NA, Colv = NA)
|
||||
my_colors <- colorRampPalette(c("gray40", "orange"))
|
||||
heatmap(tab.start, Rowv = NA, Colv = NA, col = my_colors(1000))
|
||||
|
||||
ggplot(as.data.frame(tab.start)) +
|
||||
geom_tile(aes(x = x.start.cat, y = y.start.cat, fill = Freq)) +
|
||||
scale_fill_gradient(low = "gray40", high = "orange")
|
||||
|
||||
# stop
|
||||
dattrim$x.stop.cat <- cut(dattrim$x.stop, cuts)
|
||||
dattrim$y.stop.cat <- cut(dattrim$y.stop, cuts)
|
||||
tab.stop <- xtabs( ~ x.stop.cat + y.stop.cat, dattrim)
|
||||
colnames(tab.stop) <- paste0("c", 1:cuts)
|
||||
rownames(tab.stop) <- paste0("c", 1:cuts)
|
||||
|
||||
heatmap(tab.stop, Rowv = NA, Colv = NA)
|
||||
heatmap(tab.stop, Rowv = NA, Colv = NA, col = my_colors(1000))
|
||||
|
||||
### How many visitors per day
|
||||
|
||||
# Interactions per day
|
||||
datint <- aggregate(case ~ date, datlogs, length)
|
||||
plot(datint, type = "h")
|
||||
|
||||
# Cases per day
|
||||
datcase <- aggregate(case ~ date, datlogs, function(x) length(unique(x)))
|
||||
plot(datcase, type = "h")
|
||||
|
||||
# Traces per day
|
||||
dattrace <- aggregate(trace ~ date, datlogs, function(x) length(unique(x)))
|
||||
plot(dattrace, type = "h")
|
||||
|
||||
|
||||
# function dependencies of mtt
|
||||
devtools::load_all("../../../../software/mtt")
|
||||
#library(mtt)
|
||||
|
||||
dat <- parse_logfiles("2016", path = "../data/haum/LogFiles/",
|
||||
save = FALSE)
|
||||
datlogs <- create_eventlogs(dat, "../data/haum/ContentEyevisit/eyevisit_cards_light/")
|
||||
library(mvbutils)
|
||||
foodweb(where = "package:mtt")
|
||||
|
||||
foodweb(where = "package:mtt",
|
||||
prune = c("parse_logfiles", "create_eventlogs", "extract_artworks",
|
||||
"extract_topics", "add_topic"),
|
||||
expand.ybox = 1.8, #cex = .6,
|
||||
boxcolor = "gray", lwd = 2)
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
### Other stuff
|
||||
|
||||
dat001 <- datlogs[which(datlogs$artwork == "001"), ]
|
||||
|
||||
index <- as.numeric(as.factor(dat001$trace))
|
||||
cc <- sample(colors(), length(unique(dat001$trace)))
|
||||
cc <- sample(colors(), 100)
|
||||
|
||||
plot(y.start ~ x.start, dat001, type = "n", xlab = "x", ylab = "y",
|
||||
xlim = c(0, 3840), ylim = c(0, 2160))
|
||||
@ -24,3 +186,15 @@ points(y.start ~ x.start, dat001, xlab = "x", ylab = "y",
|
||||
col = "blue")
|
||||
|
||||
|
||||
|
||||
cc <- sample(colors(), 70)
|
||||
|
||||
dat1 <- datlogs[!duplicated(datlogs$artwork), ]
|
||||
dat1 <- dat1[order(dat1$artwork), ]
|
||||
|
||||
plot(y.start ~ x.start, dat1, type = "n", xlim = c(-100, 4500), ylim = c(-100, 2500))
|
||||
abline(h = c(0, 2160), v = c(0, 3840), col = "lightgray")
|
||||
with(dat1, points(x.start, y.start, col = cc, pch = 16))
|
||||
with(dat1, points(x.stop, y.stop, col = cc, pch = 16))
|
||||
with(dat1, arrows(x.start, y.start, x.stop, y.stop, length = .07, col = cc))
|
||||
|
||||
|
Loading…
Reference in New Issue
Block a user