Cleaned up descriptives a bit; still a mess with a lot of old stuff

This commit is contained in:
Nora Wickelmaier 2024-03-22 11:00:36 +01:00
parent bdeb8fb718
commit c0b24a03aa

View File

@ -11,18 +11,28 @@
# #
# input: results/haum/event_logfiles_2024-02-21_16-07-33.csv # input: results/haum/event_logfiles_2024-02-21_16-07-33.csv
# results/haum/raw_logfiles_2024-02-21_16-07-33.csv # results/haum/raw_logfiles_2024-02-21_16-07-33.csv
# output: # output: results/figures/counts_item.pdf
# results/figures/counts_item_firsttouch.pdf
# results/figures/duration.pdf
# results/figures/heatmap_start.pdf
# results/figures/heatmap_stop.pdf
# results/figures/cases_per_day.pdf
# results/figures/timeMs.pdf
# results/figures/xycoord.pdf
# results/figures/event-dist.pdf
# results/figures/traceexplore_trace-event.pdf
# results/figures/ra_trace-event.pdf
# results/figures/traceexplore_case-event.pdf
# results/figures/bp_tod.pdf
# results/figures/bp_wd.pdf
# results/figures/bp_wds.pdf
# results/figures/bp_corona.pdf
# results/figures/traceexplore_case-artwork_often080.pdf
# #
# last mod: 2024-03-13 # last mod: 2024-03-22
# setwd("C:/Users/nwickelmaier/Nextcloud/Documents/MDS/2023ss/60100_master_thesis/analysis/code") # setwd("C:/Users/nwickelmaier/Nextcloud/Documents/MDS/2023ss/60100_master_thesis/analysis/code")
library(lattice)
library(bupaverse)
#library(mtt)
devtools::load_all("../../../../../software/mtt")
# Overall Research Question: How do museum visitors interact with the # Overall Research Question: How do museum visitors interact with the
# artworks presented on the MTT? # artworks presented on the MTT?
@ -88,7 +98,7 @@ lattice::barchart(counts_item)
items <- unique(datlogs$item) items <- unique(datlogs$item)
#items <- items[!items %in% c("504", "505")] #items <- items[!items %in% c("504", "505")]
datart <- extract_artworks(items, datart <- mtt::extract_artworks(items,
paste0(items, ".xml"), paste0(items, ".xml"),
"../data/haum/ContentEyevisit/eyevisit_cards_light/") "../data/haum/ContentEyevisit/eyevisit_cards_light/")
datart <- datart[order(datart$artwork), ] datart <- datart[order(datart$artwork), ]
@ -251,24 +261,6 @@ text(datcase$date[datcase$date == "2020-03-13"]+470, 80,
col = "#FF6900") col = "#FF6900")
dev.off() dev.off()
### Other stuff
library(mvbutils)
foodweb(where = "package:mtt")
pdf("results/figures/fun_depend_mtt.pdf", width = 8, height = 4, pointsize = 10)
foodweb(where = "package:mtt",
prune = c("parse_logfiles", "create_eventlogs", "extract_artworks",
"extract_topics"),
#expand.ybox = 1.8, #cex = .6,
#border = TRUE,
#boxcolor = "gray",
color.lines = FALSE,
lwd = 2, mai = c(0, 0, 0, 0))
dev.off()
## weird behavior of timeMs ## weird behavior of timeMs
pdf("results/figures/timeMs.pdf", width = 9, height = 6, pointsize = 10) pdf("results/figures/timeMs.pdf", width = 9, height = 6, pointsize = 10)
@ -345,7 +337,7 @@ barplot(t(proportions(tab, margin = "item")), las = 2, col = c("#78004B", "#3CB4
dev.off() dev.off()
#barchart(proportions(tab, margin = "item"), las = 2) #lattice::barchart(proportions(tab, margin = "item"), las = 2)
# Proportion of events # Proportion of events
proportions(xtabs( ~ event, datlogs)) proportions(xtabs( ~ event, datlogs))
@ -371,14 +363,14 @@ sum(datlogs$fileId.start != datlogs$fileId.stop, na.rm = TRUE) / nrow(datlogs)
datlogs$start <- datlogs$date.start datlogs$start <- datlogs$date.start
datlogs$complete <- datlogs$date.stop datlogs$complete <- datlogs$date.stop
alog <- activitylog(datlogs, alog <- bupaR::activitylog(datlogs,
case_id = "path", case_id = "path",
activity_id = "event", activity_id = "event",
#resource_id = "case", #resource_id = "case",
resource_id = "item", resource_id = "item",
timestamps = c("start", "complete")) timestamps = c("start", "complete"))
process_map(alog, frequency("relative")) processmapR::process_map(alog, processmapR::frequency("relative"))
alogf <- edeaR::filter_trace_frequency(alog, percentage = 0.9) alogf <- edeaR::filter_trace_frequency(alog, percentage = 0.9)
@ -391,7 +383,7 @@ processmapR::process_map(alogf, # alog,
alog_no_move <- alog[alog$event != "move", ] alog_no_move <- alog[alog$event != "move", ]
pdf("../figures/traceexplore_trace-event.pdf", height = 8, width = 12, pointsize = 10) pdf("results/figures/traceexplore_trace-event.pdf", height = 8, width = 12, pointsize = 10)
set.seed(1447) set.seed(1447)
processmapR::trace_explorer(alog_no_move[alog_no_move$path %in% processmapR::trace_explorer(alog_no_move[alog_no_move$path %in%
sample(unique(alog_no_move$path), 400),], sample(unique(alog_no_move$path), 400),],
@ -399,7 +391,7 @@ processmapR::trace_explorer(alog_no_move[alog_no_move$path %in%
abbreviate = T) abbreviate = T)
dev.off() dev.off()
pdf("../figures/ra_trace-event.pdf", height = 8, width = 12, pointsize = 10) pdf("results/figures/ra_trace-event.pdf", height = 8, width = 12, pointsize = 10)
ra_no_move <- edeaR::resource_frequency(alog_no_move, "resource-activity") ra_no_move <- edeaR::resource_frequency(alog_no_move, "resource-activity")
levels(ra_no_move$event) <- c("flipCard", "flipCard", "openTopic", "openPopup") levels(ra_no_move$event) <- c("flipCard", "flipCard", "openTopic", "openPopup")
@ -426,29 +418,29 @@ which.max(table(datlogs$artwork))
which.min(table(datlogs$artwork)) which.min(table(datlogs$artwork))
which.min(table(datlogs$artwork)[-c(71,72)]) which.min(table(datlogs$artwork)[-c(71,72)])
alog080 <- activitylog(datlogs[datlogs$artwork == "080",], alog080 <- bupaR::activitylog(datlogs[datlogs$artwork == "080",],
case_id = "path", case_id = "path",
activity_id = "event", activity_id = "event",
resource_id = "artwork", resource_id = "artwork",
timestamps = c("start", "complete")) timestamps = c("start", "complete"))
process_map(alog80, frequency("relative")) processmapR::process_map(alog80, processmapR::frequency("relative"))
alog087 <- activitylog(datlogs[datlogs$artwork == "087",], alog087 <- bupaR::activitylog(datlogs[datlogs$artwork == "087",],
case_id = "path", case_id = "path",
activity_id = "event", activity_id = "event",
resource_id = "artwork", resource_id = "artwork",
timestamps = c("start", "complete")) timestamps = c("start", "complete"))
process_map(alog087, frequency("relative")) processmapR::process_map(alog087, processmapR::frequency("relative"))
alog504 <- activitylog(datlogs[datlogs$artwork == "504",], alog504 <- bupaR::activitylog(datlogs[datlogs$artwork == "504",],
case_id = "path", case_id = "path",
activity_id = "event", activity_id = "event",
resource_id = "artwork", resource_id = "artwork",
timestamps = c("start", "complete")) timestamps = c("start", "complete"))
process_map(alog504, frequency("relative")) processmapR::process_map(alog504, processmapR::frequency("relative"))
#--------------- (3.3) Patterns of cases --------------- #--------------- (3.3) Patterns of cases ---------------
@ -459,17 +451,17 @@ process_map(alog504, frequency("relative"))
# ... weekdays for "normal" and school vacation days? # ... weekdays for "normal" and school vacation days?
# ... pre and post corona? # ... pre and post corona?
alog <- activitylog(datlogs, alog <- bupaR::activitylog(datlogs,
case_id = "case", case_id = "case",
activity_id = "event", activity_id = "event",
resource_id = "path", resource_id = "path",
timestamps = c("start", "complete")) timestamps = c("start", "complete"))
process_map(alog, frequency("relative")) processmapR::process_map(alog, processmapR::frequency("relative"))
alog_no_move <- alog[alog$event != "move", ] alog_no_move <- alog[alog$event != "move", ]
pdf("../figures/traceexplore_case-event.pdf", height = 8, width = 12, pointsize = 10) pdf("results/figures/traceexplore_case-event.pdf", height = 8, width = 12, pointsize = 10)
set.seed(1050) set.seed(1050)
processmapR::trace_explorer(alog_no_move[alog_no_move$path %in% processmapR::trace_explorer(alog_no_move[alog_no_move$path %in%
sample(unique(alog_no_move$path), 300),], sample(unique(alog_no_move$path), 300),],
@ -477,38 +469,38 @@ processmapR::trace_explorer(alog_no_move[alog_no_move$path %in%
abbreviate = T) abbreviate = T)
dev.off() dev.off()
process_map(alog080, frequency("relative")) processmapR::process_map(alog080, processmapR::frequency("relative"))
alog087 <- activitylog(datlogs[datlogs$artwork == "087",], alog087 <- bupaR::activitylog(datlogs[datlogs$artwork == "087",],
case_id = "case", case_id = "case",
activity_id = "event", activity_id = "event",
resource_id = "path", resource_id = "path",
timestamps = c("start", "complete")) timestamps = c("start", "complete"))
process_map(alog087, frequency("relative")) processmapR::process_map(alog087, processmapR::frequency("relative"))
### Mornings and afternoons ### Mornings and afternoons
datlogs$tod <- ifelse(lubridate::hour(datlogs$start) > 13, "afternoon", "morning") datlogs$tod <- ifelse(lubridate::hour(datlogs$start) > 13, "afternoon", "morning")
alog <- activitylog(datlogs[datlogs$tod == "morning",], alog <- bupaR::activitylog(datlogs[datlogs$tod == "morning",],
case_id = "case", case_id = "case",
activity_id = "event", activity_id = "event",
resource_id = "path", resource_id = "path",
timestamps = c("start", "complete")) timestamps = c("start", "complete"))
process_map(alog, frequency("relative")) processmapR::process_map(alog, processmapR::frequency("relative"))
alog <- activitylog(datlogs[datlogs$tod == "afternoon",], alog <- bupaR::activitylog(datlogs[datlogs$tod == "afternoon",],
case_id = "case", case_id = "case",
activity_id = "event", activity_id = "event",
resource_id = "path", resource_id = "path",
timestamps = c("start", "complete")) timestamps = c("start", "complete"))
process_map(alog, frequency("relative")) processmapR::process_map(alog, processmapR::frequency("relative"))
# Are the same artworks looked at? # Are the same artworks looked at?
pdf("../figures/bp_tod.pdf", height = 3.375, width = 12, pointsize = 10) pdf("results/figures/bp_tod.pdf", height = 3.375, width = 12, pointsize = 10)
par(mai = c(.5,.6,.1,.1), mgp = c(2.4, 1, 0)) par(mai = c(.5,.6,.1,.1), mgp = c(2.4, 1, 0))
barplot(proportions(xtabs( ~ tod + artwork, datlogs), margin = "tod"), #col = cc[1:2], barplot(proportions(xtabs( ~ tod + artwork, datlogs), margin = "tod"), #col = cc[1:2],
@ -521,24 +513,24 @@ dev.off()
datlogs$wd <- ifelse(datlogs$weekdays %in% c("Saturday", "Sunday"), "weekend", "weekday") datlogs$wd <- ifelse(datlogs$weekdays %in% c("Saturday", "Sunday"), "weekend", "weekday")
alog <- activitylog(datlogs[datlogs$wd == "weekend",], alog <- bupaR::activitylog(datlogs[datlogs$wd == "weekend",],
case_id = "case", case_id = "case",
activity_id = "event", activity_id = "event",
resource_id = "path", resource_id = "path",
timestamps = c("start", "complete")) timestamps = c("start", "complete"))
process_map(alog, frequency("relative")) processmapR::process_map(alog, processmapR::frequency("relative"))
alog <- activitylog(datlogs[datlogs$wd == "weekday",], alog <- bupaR::activitylog(datlogs[datlogs$wd == "weekday",],
case_id = "case", case_id = "case",
activity_id = "event", activity_id = "event",
resource_id = "path", resource_id = "path",
timestamps = c("start", "complete")) timestamps = c("start", "complete"))
process_map(alog, frequency("relative")) processmapR::process_map(alog, processmapR::frequency("relative"))
# Are the same artworks looked at? # Are the same artworks looked at?
pdf("../figures/bp_wd.pdf", height = 3.375, width = 12, pointsize = 10) pdf("results/figures/bp_wd.pdf", height = 3.375, width = 12, pointsize = 10)
par(mai = c(.5,.6,.1,.1), mgp = c(2.4, 1, 0)) par(mai = c(.5,.6,.1,.1), mgp = c(2.4, 1, 0))
barplot(proportions(xtabs( ~ wd + artwork, datlogs), margin = "wd"), barplot(proportions(xtabs( ~ wd + artwork, datlogs), margin = "wd"),
@ -552,24 +544,24 @@ dev.off()
datlogs$wds <- ifelse(!is.na(datlogs$vacation), "vacation", "school") datlogs$wds <- ifelse(!is.na(datlogs$vacation), "vacation", "school")
datlogs$wds[datlogs$wd == "weekend"] <- NA datlogs$wds[datlogs$wd == "weekend"] <- NA
alog <- activitylog(datlogs[which(datlogs$wds == "school"),], alog <- bupaR::activitylog(datlogs[which(datlogs$wds == "school"),],
case_id = "case", case_id = "case",
activity_id = "event", activity_id = "event",
resource_id = "path", resource_id = "path",
timestamps = c("start", "complete")) timestamps = c("start", "complete"))
process_map(alog, frequency("relative")) processmapR::process_map(alog, processmapR::frequency("relative"))
alog <- activitylog(datlogs[which(datlogs$wds == "vacation"),], alog <- bupaR::activitylog(datlogs[which(datlogs$wds == "vacation"),],
case_id = "case", case_id = "case",
activity_id = "event", activity_id = "event",
resource_id = "path", resource_id = "path",
timestamps = c("start", "complete")) timestamps = c("start", "complete"))
process_map(alog, frequency("relative")) processmapR::process_map(alog, processmapR::frequency("relative"))
# Are the same artworks looked at? # Are the same artworks looked at?
pdf("../figures/bp_wds.pdf", height = 3.375, width = 12, pointsize = 10) pdf("results/figures/bp_wds.pdf", height = 3.375, width = 12, pointsize = 10)
par(mai = c(.5,.6,.1,.1), mgp = c(2.4, 1, 0)) par(mai = c(.5,.6,.1,.1), mgp = c(2.4, 1, 0))
#barplot(xtabs( ~ wds + artwork, datlogs), las = 2, beside = TRUE, #barplot(xtabs( ~ wds + artwork, datlogs), las = 2, beside = TRUE,
@ -583,24 +575,24 @@ dev.off()
datlogs$corona <- ifelse(datlogs$date < "2020-03-14", "pre", "post") datlogs$corona <- ifelse(datlogs$date < "2020-03-14", "pre", "post")
alog <- activitylog(datlogs[which(datlogs$corona == "pre"),], alog <- bupaR::activitylog(datlogs[which(datlogs$corona == "pre"),],
case_id = "case", case_id = "case",
activity_id = "event", activity_id = "event",
resource_id = "path", resource_id = "path",
timestamps = c("start", "complete")) timestamps = c("start", "complete"))
process_map(alog, frequency("relative")) processmapR::process_map(alog, processmapR::frequency("relative"))
alog <- activitylog(datlogs[which(datlogs$corona == "post"),], alog <- bupaR::activitylog(datlogs[which(datlogs$corona == "post"),],
case_id = "case", case_id = "case",
activity_id = "event", activity_id = "event",
resource_id = "path", resource_id = "path",
timestamps = c("start", "complete")) timestamps = c("start", "complete"))
process_map(alog, frequency("relative")) processmapR::process_map(alog, processmapR::frequency("relative"))
# Are the same artworks looked at? # Are the same artworks looked at?
pdf("../figures/bp_corona.pdf", height = 3.375, width = 12, pointsize = 10) pdf("results/figures/bp_corona.pdf", height = 3.375, width = 12, pointsize = 10)
par(mai = c(.5,.6,.1,.1), mgp = c(2.4, 1, 0)) par(mai = c(.5,.6,.1,.1), mgp = c(2.4, 1, 0))
barplot(proportions(xtabs( ~ corona + artwork, datlogs), margin = "corona"), barplot(proportions(xtabs( ~ corona + artwork, datlogs), margin = "corona"),
@ -613,13 +605,13 @@ dev.off()
# Order in which artworks are looked at # Order in which artworks are looked at
nart <- 5 # select 5 artworks randomly nart <- 5 # select 5 artworks randomly
alog <- activitylog(datlogs,#[datlogs$artwork %in% sample(unique(datlogs$artwork), nart), ], alog <- bupaR::activitylog(datlogs,#[datlogs$artwork %in% sample(unique(datlogs$artwork), nart), ],
case_id = "case", case_id = "case",
activity_id = "artwork", activity_id = "artwork",
resource_id = "path", resource_id = "path",
timestamps = c("start", "complete")) timestamps = c("start", "complete"))
#map <- process_map(alog, frequency("relative")) #map <- processmapR::process_map(alog, processmapR::frequency("relative"))
## select cases with Vermeer ## select cases with Vermeer
length(unique(datlogs[datlogs$artwork == "080", "case"])) length(unique(datlogs[datlogs$artwork == "080", "case"]))
@ -636,16 +628,16 @@ which(table(tmp$artwork) > 14000)
often080 <- names(which(table(tmp$artwork) > 14000)) often080 <- names(which(table(tmp$artwork) > 14000))
alog <- activitylog(datlogs[datlogs$artwork %in% often080, ], alog <- bupaR::activitylog(datlogs[datlogs$artwork %in% often080, ],
case_id = "case", case_id = "case",
activity_id = "artwork", activity_id = "artwork",
resource_id = "path", resource_id = "path",
timestamps = c("start", "complete")) timestamps = c("start", "complete"))
process_map(alog, frequency("relative")) processmapR::process_map(alog, processmapR::frequency("relative"))
pdf("../figures/traceexplore_case-artwork_often080.pdf", height = 8, width = 12, pointsize = 10) pdf("results/figures/traceexplore_case-artwork_often080.pdf", height = 8, width = 12, pointsize = 10)
processmapR::trace_explorer(alog, processmapR::trace_explorer(alog,
n_traces = 30, type = "frequent", n_traces = 30, type = "frequent",
@ -658,31 +650,31 @@ dev.off()
# Are there certain topics that people are interested in more than others? # Are there certain topics that people are interested in more than others?
# Do these topic distributions differ for comparable artworks? # Do these topic distributions differ for comparable artworks?
alog <- activitylog(datlogs[which(datlogs$event == "openTopic"),], alog <- bupaR::activitylog(datlogs[which(datlogs$event == "openTopic"),],
case_id = "case", case_id = "case",
activity_id = "topic", activity_id = "topic",
resource_id = "path", resource_id = "path",
timestamps = c("start", "complete")) timestamps = c("start", "complete"))
process_map(alog, frequency("relative")) processmapR::process_map(alog, processmapR::frequency("relative"))
# Order of topics for Vermeer # Order of topics for Vermeer
# alog080 <- activitylog(datlogs[datlogs$artwork == "080",], # alog080 <- bupaR::activitylog(datlogs[datlogs$artwork == "080",],
# case_id = "case", # case_id = "case",
# activity_id = "topic", # activity_id = "topic",
# resource_id = "path", # resource_id = "path",
# timestamps = c("start", "complete")) # timestamps = c("start", "complete"))
# #
# alog080 <- activitylog(datlogs[datlogs$artwork == "080",], # alog080 <- bupaR::activitylog(datlogs[datlogs$artwork == "080",],
# case_id = "case", # case_id = "case",
# activity_id = "topicFile", # activity_id = "topicFile",
# resource_id = "path", # resource_id = "path",
# timestamps = c("start", "complete")) # timestamps = c("start", "complete"))
# #
# #process_map(alog080, frequency("relative")) # #processmapR::process_map(alog080, processmapR::frequency("relative"))
# #
# # Comparable artwork # # Comparable artwork
# alog083 <- activitylog(datlogs[datlogs$artwork == "083",], # alog083 <- bupaR::activitylog(datlogs[datlogs$artwork == "083",],
# case_id = "case", # case_id = "case",
# activity_id = "topic", # activity_id = "topic",
# resource_id = "path", # resource_id = "path",
@ -697,63 +689,13 @@ which(rowSums(xtabs( ~ artwork + topic, datlogs[datlogs$topic %in%
for (art in c("037", "046", "062", "080", "083", "109")) { for (art in c("037", "046", "062", "080", "083", "109")) {
alog <- activitylog(datlogs[datlogs$event == "openTopic" & datlogs$artwork == art,], alog <- bupaR::activitylog(datlogs[datlogs$event == "openTopic" & datlogs$artwork == art,],
case_id = "case", case_id = "case",
activity_id = "topic", activity_id = "topic",
resource_id = "path", resource_id = "path",
timestamps = c("start", "complete")) timestamps = c("start", "complete"))
process_map(alog, frequency("relative")) processmapR::process_map(alog, processmapR::frequency("relative"))
} }
# Angewandte Kunst, Graphik, Gemälde, Kultur
c("Kultur", "Kultur", "Graphik", "Gemälde", "Gemälde", "Gemälde",
"Gemälde", "Gemälde", "Graphik", "Gemälde", "Angewandte Kunst", "",
"Gemälde", "Angewandte Kunst", "", "", "Graphik", "Angewandte Kunst",
"Angewandte Kunst", "Gemälde", "Angewandte Kunst", "Gemälde", "",
"Gemälde", "Gemälde", "Gemälde", "Graphik", "Gemälde", "Gemälde",
"Gemälde", "", "Angewandte Kunst", "Angewandte Kunst", "Gemälde",
"Graphik", "Gemälde", "Gemälde", "Gemälde", "Gemälde",
"Angewandte Kunst", "Gemälde", "Gemälde", "Gemälde", "Kultur", "Kultur",
"Gemälde", "Kultur", "", "Gemälde", "", "Graphik", "Kultur", "Gemälde",
"", "Kultur", "Gemälde", "Kultur", "Gemälde", "Gemälde", "Gemälde",
"Kultur", "Kultur", "Kultur", "Kultur", "Kultur", "Kultur",
"Angewandte Kunst", "Info", "Info", "Info", "Kultur", "Kultur")
# BURSTS
which.max(table(datlogs$date))
tmp <- datlogs[datlogs$date == "2017-02-12", ]
# number of traces per case on 2017-02-12
rowSums(xtabs( ~ case + path, tmp) != 0)
range(tmp$start)
hours <- lubridate::hour(tmp$start)
xtabs( ~ case + hours, tmp)
# distribution of cases over the day
colSums(xtabs( ~ case + hours, tmp) != 0)
barplot(colSums(xtabs( ~ case + hours, tmp) != 0))
aggregate(path ~ case + hours, tmp, length)
tmp <- aggregate(path ~ case, datlogs, length)
tmp$date <- as.Date(datlogs[!duplicated(datlogs$case), "start"])
tmp$time <- lubridate::hour(datlogs[!duplicated(datlogs$case), "start"])
tmp[tmp$path > 200, ]
plot(path ~ time, tmp, cex = 2, col = rgb(0,0,0,.3))
lattice::barchart(path ~ time, tmp, horizontal=F)