diff --git a/code/02_descriptives.R b/code/02_descriptives.R index 0b74a44..4e91155 100644 --- a/code/02_descriptives.R +++ b/code/02_descriptives.R @@ -11,18 +11,28 @@ # # input: results/haum/event_logfiles_2024-02-21_16-07-33.csv # results/haum/raw_logfiles_2024-02-21_16-07-33.csv -# output: +# output: results/figures/counts_item.pdf +# results/figures/counts_item_firsttouch.pdf +# results/figures/duration.pdf +# results/figures/heatmap_start.pdf +# results/figures/heatmap_stop.pdf +# results/figures/cases_per_day.pdf +# results/figures/timeMs.pdf +# results/figures/xycoord.pdf +# results/figures/event-dist.pdf +# results/figures/traceexplore_trace-event.pdf +# results/figures/ra_trace-event.pdf +# results/figures/traceexplore_case-event.pdf +# results/figures/bp_tod.pdf +# results/figures/bp_wd.pdf +# results/figures/bp_wds.pdf +# results/figures/bp_corona.pdf +# results/figures/traceexplore_case-artwork_often080.pdf # -# last mod: 2024-03-13 +# last mod: 2024-03-22 # setwd("C:/Users/nwickelmaier/Nextcloud/Documents/MDS/2023ss/60100_master_thesis/analysis/code") -library(lattice) -library(bupaverse) -#library(mtt) -devtools::load_all("../../../../../software/mtt") - - # Overall Research Question: How do museum visitors interact with the # artworks presented on the MTT? @@ -88,7 +98,7 @@ lattice::barchart(counts_item) items <- unique(datlogs$item) #items <- items[!items %in% c("504", "505")] -datart <- extract_artworks(items, +datart <- mtt::extract_artworks(items, paste0(items, ".xml"), "../data/haum/ContentEyevisit/eyevisit_cards_light/") datart <- datart[order(datart$artwork), ] @@ -251,24 +261,6 @@ text(datcase$date[datcase$date == "2020-03-13"]+470, 80, col = "#FF6900") dev.off() - -### Other stuff - -library(mvbutils) -foodweb(where = "package:mtt") - -pdf("results/figures/fun_depend_mtt.pdf", width = 8, height = 4, pointsize = 10) -foodweb(where = "package:mtt", - prune = c("parse_logfiles", "create_eventlogs", "extract_artworks", - "extract_topics"), - #expand.ybox = 1.8, #cex = .6, - #border = TRUE, - #boxcolor = "gray", - color.lines = FALSE, - lwd = 2, mai = c(0, 0, 0, 0)) -dev.off() - - ## weird behavior of timeMs pdf("results/figures/timeMs.pdf", width = 9, height = 6, pointsize = 10) @@ -345,7 +337,7 @@ barplot(t(proportions(tab, margin = "item")), las = 2, col = c("#78004B", "#3CB4 dev.off() -#barchart(proportions(tab, margin = "item"), las = 2) +#lattice::barchart(proportions(tab, margin = "item"), las = 2) # Proportion of events proportions(xtabs( ~ event, datlogs)) @@ -371,14 +363,14 @@ sum(datlogs$fileId.start != datlogs$fileId.stop, na.rm = TRUE) / nrow(datlogs) datlogs$start <- datlogs$date.start datlogs$complete <- datlogs$date.stop -alog <- activitylog(datlogs, +alog <- bupaR::activitylog(datlogs, case_id = "path", activity_id = "event", #resource_id = "case", resource_id = "item", timestamps = c("start", "complete")) -process_map(alog, frequency("relative")) +processmapR::process_map(alog, processmapR::frequency("relative")) alogf <- edeaR::filter_trace_frequency(alog, percentage = 0.9) @@ -391,7 +383,7 @@ processmapR::process_map(alogf, # alog, alog_no_move <- alog[alog$event != "move", ] -pdf("../figures/traceexplore_trace-event.pdf", height = 8, width = 12, pointsize = 10) +pdf("results/figures/traceexplore_trace-event.pdf", height = 8, width = 12, pointsize = 10) set.seed(1447) processmapR::trace_explorer(alog_no_move[alog_no_move$path %in% sample(unique(alog_no_move$path), 400),], @@ -399,7 +391,7 @@ processmapR::trace_explorer(alog_no_move[alog_no_move$path %in% abbreviate = T) dev.off() -pdf("../figures/ra_trace-event.pdf", height = 8, width = 12, pointsize = 10) +pdf("results/figures/ra_trace-event.pdf", height = 8, width = 12, pointsize = 10) ra_no_move <- edeaR::resource_frequency(alog_no_move, "resource-activity") levels(ra_no_move$event) <- c("flipCard", "flipCard", "openTopic", "openPopup") @@ -426,29 +418,29 @@ which.max(table(datlogs$artwork)) which.min(table(datlogs$artwork)) which.min(table(datlogs$artwork)[-c(71,72)]) -alog080 <- activitylog(datlogs[datlogs$artwork == "080",], +alog080 <- bupaR::activitylog(datlogs[datlogs$artwork == "080",], case_id = "path", activity_id = "event", resource_id = "artwork", timestamps = c("start", "complete")) -process_map(alog80, frequency("relative")) +processmapR::process_map(alog80, processmapR::frequency("relative")) -alog087 <- activitylog(datlogs[datlogs$artwork == "087",], +alog087 <- bupaR::activitylog(datlogs[datlogs$artwork == "087",], case_id = "path", activity_id = "event", resource_id = "artwork", timestamps = c("start", "complete")) -process_map(alog087, frequency("relative")) +processmapR::process_map(alog087, processmapR::frequency("relative")) -alog504 <- activitylog(datlogs[datlogs$artwork == "504",], +alog504 <- bupaR::activitylog(datlogs[datlogs$artwork == "504",], case_id = "path", activity_id = "event", resource_id = "artwork", timestamps = c("start", "complete")) -process_map(alog504, frequency("relative")) +processmapR::process_map(alog504, processmapR::frequency("relative")) #--------------- (3.3) Patterns of cases --------------- @@ -459,17 +451,17 @@ process_map(alog504, frequency("relative")) # ... weekdays for "normal" and school vacation days? # ... pre and post corona? -alog <- activitylog(datlogs, +alog <- bupaR::activitylog(datlogs, case_id = "case", activity_id = "event", resource_id = "path", timestamps = c("start", "complete")) -process_map(alog, frequency("relative")) +processmapR::process_map(alog, processmapR::frequency("relative")) alog_no_move <- alog[alog$event != "move", ] -pdf("../figures/traceexplore_case-event.pdf", height = 8, width = 12, pointsize = 10) +pdf("results/figures/traceexplore_case-event.pdf", height = 8, width = 12, pointsize = 10) set.seed(1050) processmapR::trace_explorer(alog_no_move[alog_no_move$path %in% sample(unique(alog_no_move$path), 300),], @@ -477,38 +469,38 @@ processmapR::trace_explorer(alog_no_move[alog_no_move$path %in% abbreviate = T) dev.off() -process_map(alog080, frequency("relative")) +processmapR::process_map(alog080, processmapR::frequency("relative")) -alog087 <- activitylog(datlogs[datlogs$artwork == "087",], +alog087 <- bupaR::activitylog(datlogs[datlogs$artwork == "087",], case_id = "case", activity_id = "event", resource_id = "path", timestamps = c("start", "complete")) -process_map(alog087, frequency("relative")) +processmapR::process_map(alog087, processmapR::frequency("relative")) ### Mornings and afternoons datlogs$tod <- ifelse(lubridate::hour(datlogs$start) > 13, "afternoon", "morning") -alog <- activitylog(datlogs[datlogs$tod == "morning",], +alog <- bupaR::activitylog(datlogs[datlogs$tod == "morning",], case_id = "case", activity_id = "event", resource_id = "path", timestamps = c("start", "complete")) -process_map(alog, frequency("relative")) +processmapR::process_map(alog, processmapR::frequency("relative")) -alog <- activitylog(datlogs[datlogs$tod == "afternoon",], +alog <- bupaR::activitylog(datlogs[datlogs$tod == "afternoon",], case_id = "case", activity_id = "event", resource_id = "path", timestamps = c("start", "complete")) -process_map(alog, frequency("relative")) +processmapR::process_map(alog, processmapR::frequency("relative")) # Are the same artworks looked at? -pdf("../figures/bp_tod.pdf", height = 3.375, width = 12, pointsize = 10) +pdf("results/figures/bp_tod.pdf", height = 3.375, width = 12, pointsize = 10) par(mai = c(.5,.6,.1,.1), mgp = c(2.4, 1, 0)) barplot(proportions(xtabs( ~ tod + artwork, datlogs), margin = "tod"), #col = cc[1:2], @@ -521,24 +513,24 @@ dev.off() datlogs$wd <- ifelse(datlogs$weekdays %in% c("Saturday", "Sunday"), "weekend", "weekday") -alog <- activitylog(datlogs[datlogs$wd == "weekend",], +alog <- bupaR::activitylog(datlogs[datlogs$wd == "weekend",], case_id = "case", activity_id = "event", resource_id = "path", timestamps = c("start", "complete")) -process_map(alog, frequency("relative")) +processmapR::process_map(alog, processmapR::frequency("relative")) -alog <- activitylog(datlogs[datlogs$wd == "weekday",], +alog <- bupaR::activitylog(datlogs[datlogs$wd == "weekday",], case_id = "case", activity_id = "event", resource_id = "path", timestamps = c("start", "complete")) -process_map(alog, frequency("relative")) +processmapR::process_map(alog, processmapR::frequency("relative")) # Are the same artworks looked at? -pdf("../figures/bp_wd.pdf", height = 3.375, width = 12, pointsize = 10) +pdf("results/figures/bp_wd.pdf", height = 3.375, width = 12, pointsize = 10) par(mai = c(.5,.6,.1,.1), mgp = c(2.4, 1, 0)) barplot(proportions(xtabs( ~ wd + artwork, datlogs), margin = "wd"), @@ -552,24 +544,24 @@ dev.off() datlogs$wds <- ifelse(!is.na(datlogs$vacation), "vacation", "school") datlogs$wds[datlogs$wd == "weekend"] <- NA -alog <- activitylog(datlogs[which(datlogs$wds == "school"),], +alog <- bupaR::activitylog(datlogs[which(datlogs$wds == "school"),], case_id = "case", activity_id = "event", resource_id = "path", timestamps = c("start", "complete")) -process_map(alog, frequency("relative")) +processmapR::process_map(alog, processmapR::frequency("relative")) -alog <- activitylog(datlogs[which(datlogs$wds == "vacation"),], +alog <- bupaR::activitylog(datlogs[which(datlogs$wds == "vacation"),], case_id = "case", activity_id = "event", resource_id = "path", timestamps = c("start", "complete")) -process_map(alog, frequency("relative")) +processmapR::process_map(alog, processmapR::frequency("relative")) # Are the same artworks looked at? -pdf("../figures/bp_wds.pdf", height = 3.375, width = 12, pointsize = 10) +pdf("results/figures/bp_wds.pdf", height = 3.375, width = 12, pointsize = 10) par(mai = c(.5,.6,.1,.1), mgp = c(2.4, 1, 0)) #barplot(xtabs( ~ wds + artwork, datlogs), las = 2, beside = TRUE, @@ -583,24 +575,24 @@ dev.off() datlogs$corona <- ifelse(datlogs$date < "2020-03-14", "pre", "post") -alog <- activitylog(datlogs[which(datlogs$corona == "pre"),], +alog <- bupaR::activitylog(datlogs[which(datlogs$corona == "pre"),], case_id = "case", activity_id = "event", resource_id = "path", timestamps = c("start", "complete")) -process_map(alog, frequency("relative")) +processmapR::process_map(alog, processmapR::frequency("relative")) -alog <- activitylog(datlogs[which(datlogs$corona == "post"),], +alog <- bupaR::activitylog(datlogs[which(datlogs$corona == "post"),], case_id = "case", activity_id = "event", resource_id = "path", timestamps = c("start", "complete")) -process_map(alog, frequency("relative")) +processmapR::process_map(alog, processmapR::frequency("relative")) # Are the same artworks looked at? -pdf("../figures/bp_corona.pdf", height = 3.375, width = 12, pointsize = 10) +pdf("results/figures/bp_corona.pdf", height = 3.375, width = 12, pointsize = 10) par(mai = c(.5,.6,.1,.1), mgp = c(2.4, 1, 0)) barplot(proportions(xtabs( ~ corona + artwork, datlogs), margin = "corona"), @@ -613,13 +605,13 @@ dev.off() # Order in which artworks are looked at nart <- 5 # select 5 artworks randomly -alog <- activitylog(datlogs,#[datlogs$artwork %in% sample(unique(datlogs$artwork), nart), ], +alog <- bupaR::activitylog(datlogs,#[datlogs$artwork %in% sample(unique(datlogs$artwork), nart), ], case_id = "case", activity_id = "artwork", resource_id = "path", timestamps = c("start", "complete")) -#map <- process_map(alog, frequency("relative")) +#map <- processmapR::process_map(alog, processmapR::frequency("relative")) ## select cases with Vermeer length(unique(datlogs[datlogs$artwork == "080", "case"])) @@ -636,16 +628,16 @@ which(table(tmp$artwork) > 14000) often080 <- names(which(table(tmp$artwork) > 14000)) -alog <- activitylog(datlogs[datlogs$artwork %in% often080, ], +alog <- bupaR::activitylog(datlogs[datlogs$artwork %in% often080, ], case_id = "case", activity_id = "artwork", resource_id = "path", timestamps = c("start", "complete")) -process_map(alog, frequency("relative")) +processmapR::process_map(alog, processmapR::frequency("relative")) -pdf("../figures/traceexplore_case-artwork_often080.pdf", height = 8, width = 12, pointsize = 10) +pdf("results/figures/traceexplore_case-artwork_often080.pdf", height = 8, width = 12, pointsize = 10) processmapR::trace_explorer(alog, n_traces = 30, type = "frequent", @@ -658,31 +650,31 @@ dev.off() # Are there certain topics that people are interested in more than others? # Do these topic distributions differ for comparable artworks? -alog <- activitylog(datlogs[which(datlogs$event == "openTopic"),], +alog <- bupaR::activitylog(datlogs[which(datlogs$event == "openTopic"),], case_id = "case", activity_id = "topic", resource_id = "path", timestamps = c("start", "complete")) -process_map(alog, frequency("relative")) +processmapR::process_map(alog, processmapR::frequency("relative")) # Order of topics for Vermeer -# alog080 <- activitylog(datlogs[datlogs$artwork == "080",], +# alog080 <- bupaR::activitylog(datlogs[datlogs$artwork == "080",], # case_id = "case", # activity_id = "topic", # resource_id = "path", # timestamps = c("start", "complete")) # -# alog080 <- activitylog(datlogs[datlogs$artwork == "080",], +# alog080 <- bupaR::activitylog(datlogs[datlogs$artwork == "080",], # case_id = "case", # activity_id = "topicFile", # resource_id = "path", # timestamps = c("start", "complete")) # -# #process_map(alog080, frequency("relative")) +# #processmapR::process_map(alog080, processmapR::frequency("relative")) # # # Comparable artwork -# alog083 <- activitylog(datlogs[datlogs$artwork == "083",], +# alog083 <- bupaR::activitylog(datlogs[datlogs$artwork == "083",], # case_id = "case", # activity_id = "topic", # resource_id = "path", @@ -697,63 +689,13 @@ which(rowSums(xtabs( ~ artwork + topic, datlogs[datlogs$topic %in% for (art in c("037", "046", "062", "080", "083", "109")) { - alog <- activitylog(datlogs[datlogs$event == "openTopic" & datlogs$artwork == art,], + alog <- bupaR::activitylog(datlogs[datlogs$event == "openTopic" & datlogs$artwork == art,], case_id = "case", activity_id = "topic", resource_id = "path", timestamps = c("start", "complete")) - process_map(alog, frequency("relative")) + processmapR::process_map(alog, processmapR::frequency("relative")) } - -# Angewandte Kunst, Graphik, Gemälde, Kultur - - -c("Kultur", "Kultur", "Graphik", "Gemälde", "Gemälde", "Gemälde", - "Gemälde", "Gemälde", "Graphik", "Gemälde", "Angewandte Kunst", "", - "Gemälde", "Angewandte Kunst", "", "", "Graphik", "Angewandte Kunst", - "Angewandte Kunst", "Gemälde", "Angewandte Kunst", "Gemälde", "", - "Gemälde", "Gemälde", "Gemälde", "Graphik", "Gemälde", "Gemälde", - "Gemälde", "", "Angewandte Kunst", "Angewandte Kunst", "Gemälde", - "Graphik", "Gemälde", "Gemälde", "Gemälde", "Gemälde", - "Angewandte Kunst", "Gemälde", "Gemälde", "Gemälde", "Kultur", "Kultur", - "Gemälde", "Kultur", "", "Gemälde", "", "Graphik", "Kultur", "Gemälde", - "", "Kultur", "Gemälde", "Kultur", "Gemälde", "Gemälde", "Gemälde", - "Kultur", "Kultur", "Kultur", "Kultur", "Kultur", "Kultur", - "Angewandte Kunst", "Info", "Info", "Info", "Kultur", "Kultur") - - - -# BURSTS -which.max(table(datlogs$date)) -tmp <- datlogs[datlogs$date == "2017-02-12", ] - -# number of traces per case on 2017-02-12 -rowSums(xtabs( ~ case + path, tmp) != 0) - -range(tmp$start) -hours <- lubridate::hour(tmp$start) -xtabs( ~ case + hours, tmp) - -# distribution of cases over the day -colSums(xtabs( ~ case + hours, tmp) != 0) -barplot(colSums(xtabs( ~ case + hours, tmp) != 0)) - -aggregate(path ~ case + hours, tmp, length) - - - - -tmp <- aggregate(path ~ case, datlogs, length) -tmp$date <- as.Date(datlogs[!duplicated(datlogs$case), "start"]) -tmp$time <- lubridate::hour(datlogs[!duplicated(datlogs$case), "start"]) - -tmp[tmp$path > 200, ] - -plot(path ~ time, tmp, cex = 2, col = rgb(0,0,0,.3)) - -lattice::barchart(path ~ time, tmp, horizontal=F) - -