# 02_descriptives.R # # content: (1) Read data # (2) Descriptives # (3) Process Mining # (3.1) Check data quality # (3.2) Interactions for different artworks # (3.3) Patterns of cases # (3.4) Artwork sequences # (3.5) Topics # # input: results/haum/event_logfiles_2024-02-21_16-07-33.csv # results/haum/raw_logfiles_2024-02-21_16-07-33.csv # output: # # last mod: 2024-03-13 # setwd("C:/Users/nwickelmaier/Nextcloud/Documents/MDS/2023ss/60100_master_thesis/analysis/code") library(lattice) library(bupaverse) #library(mtt) devtools::load_all("../../../../../software/mtt") # Overall Research Question: How do museum visitors interact with the # artworks presented on the MTT? # Distribution of bursts # Can this be visualized in a nice way? #--------------- (1) Read data --------------- datlogs <- read.table("results/haum/event_logfiles_2024-02-21_16-07-33.csv", colClasses = c("character", "character", "POSIXct", "POSIXct", "character", "integer", "numeric", "character", "character", rep("numeric", 3), "character", "character", rep("numeric", 11), "character", "character"), sep = ";", header = TRUE) datlogs$event <- factor(datlogs$event, levels = c("move", "flipCard", "openTopic", "openPopup")) datraw <- read.table("results/haum/raw_logfiles_2024-02-21_16-07-33.csv", sep = ";", header = TRUE) # Add weekdays to data frame datlogs$weekdays <- factor(weekdays(datlogs$date.start), levels = c("Montag", "Dienstag", "Mittwoch", "Donnerstag", "Freitag", "Samstag", "Sonntag"), labels = c("Monday", "Tuesday", "Wednesday", "Thursday", "Friday", "Saturday", "Sunday")) ### Number of log files length(unique(datraw$fileId)) # 39767 length(unique(c(datlogs$fileId.start, datlogs$fileId.stop))) # 22789 ### Number of activities nrow(datlogs) table(datlogs$event) proportions(table(datlogs$event)) proportions(table(datlogs$event[datlogs$event != "move"])) ### Time range range(as.Date(datlogs$date.start)) ### Topics per item print(xtabs( ~ item + topic, datlogs), zero = "-") lattice::dotplot(xtabs( ~ item + topic, datlogs), auto.key = TRUE) mat <- t(as.matrix(xtabs( ~ item + topic, datlogs))) mat[mat == 0] <- NA image(mat, axes = F, col = rainbow(100)) #--------------- (2) Descriptives --------------- ### Which item gets touched most often? counts_item <- table(datlogs$item) lattice::barchart(counts_item) items <- unique(datlogs$item) #items <- items[!items %in% c("504", "505")] datart <- extract_artworks(items, paste0(items, ".xml"), "../data/haum/ContentEyevisit/eyevisit_cards_light/") datart <- datart[order(datart$artwork), ] names(counts_item) <- datart$title pdf("results/figures/counts_item.pdf", width = 20, height = 10, pointsize = 10) par(mai = c(5, .6, .1, .1)) tmp <- barplot(counts_item, las = 2, ylim = c(0, 60000), border = NA, col = "#434F4F") text(tmp, counts_item + 1000, datart$artwork) dev.off() ### Which item gets touched most often first? datcase <- datlogs[!duplicated(datlogs$case), ] counts_case <- table(datcase$item) names(counts_case) <- datart$title tmp <- barplot(counts_case, las = 2, border = "white") text(tmp, counts_case + 100, datart$item) counts <- rbind(counts_item, counts_case) pdf("results/figures/counts_item_firsttouch.pdf", width = 20, height = 10, pointsize = 10) par(mai = c(5, .6, .1, .1)) tmp <- barplot(counts, las = 2, border = NA, col = c("#434F4F", "#FF6900"), ylim = c(0, 65000)) text(tmp, counts_item + counts_case + 1000, datart$artwork) legend("topleft", c("Total interactions", "First interactions"), col = c("#434F4F", "#FF6900"), pch = 15, bty = "n") dev.off() ### Which teasers seem to work well? barplot(table(datlogs$topic), las = 2) ### Dwell times/duration datagg <- aggregate(duration ~ event + item, datlogs, mean) datagg$ds <- datagg$duration / 1000 # in secs lattice::bwplot(ds ~ event, datagg) # without aggregation lattice::bwplot(duration / 1000 / 60 ~ event, datlogs) # in min set.seed(1027) pdf("results/figures/duration.pdf", width = 5, height = 5, pointsize = 10) lattice::bwplot(I(duration/1000/60) ~ event, datlogs[sample(nrow(datlogs), 100000), ], ylab = "Duration in min") dev.off() ### Move events datmove <- aggregate(cbind(duration, scaleSize, rotationDegree, distance, x.start, y.start, x.stop, y.stop) ~ item, datlogs, mean) hist(log(datlogs$scaleSize)) # --> better interpretable on logscale plot(y.start ~ x.start, datmove, pch = 16, col = "gray") points(y.start ~ x.start, datmove, col = "#3CB4DC", cex = datmove$scaleSize) plot(y.start ~ x.start, datmove, type = "n", xlab = "x", ylab = "y", xlim = c(0, 3840), ylim = c(0, 2160)) with(datmove, text(x.start, y.start, item, col = "gray", cex = 1.5)) with(datmove, arrows(x.start, y.start, x.stop, y.stop, length = 0.07, lwd = 2) ) abline(v = c(0, 3840), h = c(0, 2160), col = "#3CB4DC", lwd = 2) datscale <- aggregate(scaleSize ~ item, datlogs, max) plot(y.start ~ x.start, datmove, pch = 16, col = "gray") points(y.start ~ x.start, datmove, col = "#3CB4DC", cex = datscale$scaleSize) plot(y.start ~ x.start, datmove, type = "n", xlab = "x", ylab = "y", xlim = c(0, 3840), ylim = c(0, 2160)) #with(datmove, text(x.stop, y.stop, item)) with(datmove, text(x.start, y.start, item)) ### Are there certain areas of the table that are touched most often? # heatmap cuts <- 100 datlogs$x.start.cat <- cut(datlogs$x.start, cuts) datlogs$y.start.cat <- cut(datlogs$y.start, cuts) tab <- xtabs( ~ x.start.cat + y.start.cat, datlogs) colnames(tab) <- paste0("c", 1:cuts) rownames(tab) <- paste0("c", 1:cuts) heatmap(tab, Rowv = NA, Colv = NA) dattrim <- datlogs[datlogs$x.start < 3840 & datlogs$x.start > 0 & datlogs$y.start < 2160 & datlogs$y.start > 0 & datlogs$x.stop < 3840 & datlogs$x.stop > 0 & datlogs$y.stop < 2160 & datlogs$y.stop > 0, ] cuts <- 100 # 200, 100, 70, ... # start dattrim$x.start.cat <- cut(dattrim$x.start, cuts) dattrim$y.start.cat <- cut(dattrim$y.start, cuts) tab.start <- xtabs( ~ x.start.cat + y.start.cat, dattrim) colnames(tab.start) <- NULL rownames(tab.start) <- NULL pdf("results/figures/heatmap_start.pdf", width = 5, height = 5, pointsize = 10) heatmap(tab.start, Rowv = NA, Colv = NA) dev.off() # stop dattrim$x.stop.cat <- cut(dattrim$x.stop, cuts) dattrim$y.stop.cat <- cut(dattrim$y.stop, cuts) tab.stop <- xtabs( ~ x.stop.cat + y.stop.cat, dattrim) colnames(tab.stop) <- NULL rownames(tab.stop) <- NULL pdf("results/figures/heatmap_stop.pdf", width = 5, height = 5, pointsize = 10) heatmap(tab.stop, Rowv = NA, Colv = NA) dev.off() ### How many visitors per day datlogs$date <- as.Date(datlogs$date.start) # Interactions per day datint <- aggregate(case ~ date, datlogs, length) plot(datint, type = "h") # Cases per day datcase <- aggregate(case ~ date, datlogs, function(x) length(unique(x))) plot(datcase, type = "h") # Paths per day datpath <- aggregate(path ~ date, datlogs, function(x) length(unique(x))) plot(datpath, type = "h") plot(path ~ date, datpath, type = "h", col = "#3CB4DC") points(case ~ date, datcase, type = "h") pdf("results/figures/cases_per_day.pdf", width = 9, height = 5, pointsize = 10) par(mai = c(.6, .6, .1, .1), mgp = c(2.4, 1, 0)) plot(case ~ date, datcase, type = "h", col = "#434F4F") abline(v = datcase$date[datcase$date %in% c("2020-03-13", "2022-10-25")], col = "#FF6900", lty = 2) text(datcase$date[datcase$date == "2020-03-13"]+470, 80, "Corona gap from 2020-03-13 to 2022-10-25", col = "#FF6900") dev.off() ### Other stuff library(mvbutils) foodweb(where = "package:mtt") pdf("results/figures/fun_depend_mtt.pdf", width = 8, height = 4, pointsize = 10) foodweb(where = "package:mtt", prune = c("parse_logfiles", "create_eventlogs", "extract_artworks", "extract_topics"), #expand.ybox = 1.8, #cex = .6, #border = TRUE, #boxcolor = "gray", color.lines = FALSE, lwd = 2, mai = c(0, 0, 0, 0)) dev.off() ## weird behavior of timeMs pdf("results/figures/timeMs.pdf", width = 9, height = 6, pointsize = 10) #par(mai = c(.6, .6, .1, .1), mgp = c(2.4, 1, 0)) #plot(timeMs.start ~ as.factor(fileId), datlogs[1:2000,], xlab = "fileId") lattice::bwplot(timeMs.start ~ as.factor(fileId.start), datlogs[1:2000,], xlab = "", scales = list(x = list(rot = 90), y = list(rot = 90))) dev.off() ## x,y-coordinates out of range set.seed(1522) pdf("results/figures/xycoord.pdf", width = 5, height = 5, pointsize = 10) par(mai = c(.6, .6, .1, .1), mgp = c(2.4, 1, 0)) #par(mfrow = c(1, 2)) plot(y.start ~ x.start, datlogs[sample(nrow(datlogs), 10000), ]) abline(v = c(0, 3840), h = c(0, 2160), col = "#3CB4DC", lwd = 2) #plot(y.stop ~ x.stop, datlogs) #abline(v = c(0, 3840), h = c(0, 2160), col = "#3CB4DC", lwd = 2) legend("bottomleft", "Random sample of 10,000", bg = "white") legend("topleft", "4K-Display: 3840 x 2160", bg = "white") dev.off() ## moves dat001 <- datlogs[which(datlogs$item == "001"), ] index <- as.numeric(as.factor(dat001$path)) cc <- sample(colors(), 100) plot(y.start ~ x.start, dat001, type = "n", xlab = "x", ylab = "y", xlim = c(0, 3840), ylim = c(0, 2160)) with(dat001[1:200,], arrows(x.start, y.start, x.stop, y.stop, length = .07, col = cc[index])) plot(y.start ~ x.start, dat001, xlab = "x", ylab = "y", xlim = c(0, 3840), ylim = c(0, 2160), pch = 16, col = "gray") points(y.start ~ x.start, dat001, xlab = "x", ylab = "y", xlim = c(0, 3840), ylim = c(0, 2160), cex = dat001$scaleSize, col = "blue") cc <- sample(colors(), 70) dat1 <- datlogs[!duplicated(datlogs$item), ] dat1 <- dat1[order(dat1$item), ] plot(y.start ~ x.start, dat1, type = "n", xlim = c(-100, 4500), ylim = c(-100, 2500)) abline(h = c(0, 2160), v = c(0, 3840), col = "lightgray") with(dat1, points(x.start, y.start, col = cc, pch = 16)) with(dat1, points(x.stop, y.stop, col = cc, pch = 16)) with(dat1, arrows(x.start, y.start, x.stop, y.stop, length = .07, col = cc)) # How many events per topic, per path, ... # How many popups per artwork? # Number of events per artwork tab <- xtabs( ~ item + event, datlogs) addmargins(tab) proportions(tab, margin = "item") proportions(tab, margin = "event") cc <- palette.colors(palette = "Okabe-Ito")[c(3,2,4,8)] pdf("results/figures/event-dist.pdf", height = 3.375, width = 12, pointsize = 10) par(mai = c(.4,.4,.1,.1), mgp = c(2.4, 1, 0)) barplot(t(proportions(tab, margin = "item")), las = 2, col = c("#78004B", "#3CB4DC", "#91C86E", "#FF6900"), legend.text = levels(datlogs$event), args.legend = list(x = "bottomleft", bg = "white")) dev.off() #barchart(proportions(tab, margin = "item"), las = 2) # Proportion of events proportions(xtabs( ~ event, datlogs)) # Mean proportion of event per path colMeans(proportions(xtabs( ~ path + event, datlogs), margin = "path")) # Mean proportion of event per item colMeans(proportions(tab, margin = "item")) # Proportion of unclosed events nrow(datlogs[is.na(datlogs$complete), ]) nrow(datlogs[is.na(datlogs$complete), ]) / nrow(datlogs) # Proportion of events spanning more than one log file sum(datlogs$fileId.start != datlogs$fileId.stop, na.rm = TRUE) sum(datlogs$fileId.start != datlogs$fileId.stop, na.rm = TRUE) / nrow(datlogs) #--------------- (3) Process Mining --------------- #--------------- (3.1) Check data quality --------------- datlogs$start <- datlogs$date.start datlogs$complete <- datlogs$date.stop alog <- activitylog(datlogs, case_id = "path", activity_id = "event", #resource_id = "case", resource_id = "item", timestamps = c("start", "complete")) process_map(alog, frequency("relative")) alogf <- edeaR::filter_trace_frequency(alog, percentage = 0.9) processmapR::process_map(alogf, # alog, type_nodes = processmapR::frequency("absolute"), sec_nodes = processmapR::frequency("relative"), type_edges = processmapR::frequency("absolute"), sec_edges = processmapR::frequency("relative"), rankdir = "TB") alog_no_move <- alog[alog$event != "move", ] pdf("../figures/traceexplore_trace-event.pdf", height = 8, width = 12, pointsize = 10) set.seed(1447) processmapR::trace_explorer(alog_no_move[alog_no_move$path %in% sample(unique(alog_no_move$path), 400),], coverage = 1, type = "frequent", abbreviate = T) dev.off() pdf("../figures/ra_trace-event.pdf", height = 8, width = 12, pointsize = 10) ra_no_move <- edeaR::resource_frequency(alog_no_move, "resource-activity") levels(ra_no_move$event) <- c("flipCard", "flipCard", "openTopic", "openPopup") plot(ra_no_move) dev.off() ra <- edeaR::resource_frequency(alog, "resource-activity") plot(ra) heatmap(xtabs(relative_activity ~ artwork + event, ra)) heatmap(xtabs(relative_resource ~ artwork + event, ra_no_move)) heatmap(xtabs(relative_activity ~ artwork + event, ra_no_move)) aggregate(relative_activity ~ event, ra, sum) aggregate(relative_resource ~ artwork, ra, sum) #--------------- (3.2) Interactions for different artworks --------------- # Do interaction patterns for events per trace look different for different # artworks? which.max(table(datlogs$artwork)) which.min(table(datlogs$artwork)) which.min(table(datlogs$artwork)[-c(71,72)]) alog080 <- activitylog(datlogs[datlogs$artwork == "080",], case_id = "path", activity_id = "event", resource_id = "artwork", timestamps = c("start", "complete")) process_map(alog80, frequency("relative")) alog087 <- activitylog(datlogs[datlogs$artwork == "087",], case_id = "path", activity_id = "event", resource_id = "artwork", timestamps = c("start", "complete")) process_map(alog087, frequency("relative")) alog504 <- activitylog(datlogs[datlogs$artwork == "504",], case_id = "path", activity_id = "event", resource_id = "artwork", timestamps = c("start", "complete")) process_map(alog504, frequency("relative")) #--------------- (3.3) Patterns of cases --------------- # What kind of patterns do we have? Are their typical sequences for cases? # Do case patterns look different for ... # ... mornings and afternoons? # ... weekdays and weekends? # ... weekdays for "normal" and school vacation days? # ... pre and post corona? alog <- activitylog(datlogs, case_id = "case", activity_id = "event", resource_id = "path", timestamps = c("start", "complete")) process_map(alog, frequency("relative")) alog_no_move <- alog[alog$event != "move", ] pdf("../figures/traceexplore_case-event.pdf", height = 8, width = 12, pointsize = 10) set.seed(1050) processmapR::trace_explorer(alog_no_move[alog_no_move$path %in% sample(unique(alog_no_move$path), 300),], coverage = 1, type = "frequent", abbreviate = T) dev.off() process_map(alog080, frequency("relative")) alog087 <- activitylog(datlogs[datlogs$artwork == "087",], case_id = "case", activity_id = "event", resource_id = "path", timestamps = c("start", "complete")) process_map(alog087, frequency("relative")) ### Mornings and afternoons datlogs$tod <- ifelse(lubridate::hour(datlogs$start) > 13, "afternoon", "morning") alog <- activitylog(datlogs[datlogs$tod == "morning",], case_id = "case", activity_id = "event", resource_id = "path", timestamps = c("start", "complete")) process_map(alog, frequency("relative")) alog <- activitylog(datlogs[datlogs$tod == "afternoon",], case_id = "case", activity_id = "event", resource_id = "path", timestamps = c("start", "complete")) process_map(alog, frequency("relative")) # Are the same artworks looked at? pdf("../figures/bp_tod.pdf", height = 3.375, width = 12, pointsize = 10) par(mai = c(.5,.6,.1,.1), mgp = c(2.4, 1, 0)) barplot(proportions(xtabs( ~ tod + artwork, datlogs), margin = "tod"), #col = cc[1:2], las = 2, beside = TRUE, legend = c("afternoon", "morning"), args.legend = list(x = "topleft")) dev.off() ### Weekdays and weekends datlogs$wd <- ifelse(datlogs$weekdays %in% c("Saturday", "Sunday"), "weekend", "weekday") alog <- activitylog(datlogs[datlogs$wd == "weekend",], case_id = "case", activity_id = "event", resource_id = "path", timestamps = c("start", "complete")) process_map(alog, frequency("relative")) alog <- activitylog(datlogs[datlogs$wd == "weekday",], case_id = "case", activity_id = "event", resource_id = "path", timestamps = c("start", "complete")) process_map(alog, frequency("relative")) # Are the same artworks looked at? pdf("../figures/bp_wd.pdf", height = 3.375, width = 12, pointsize = 10) par(mai = c(.5,.6,.1,.1), mgp = c(2.4, 1, 0)) barplot(proportions(xtabs( ~ wd + artwork, datlogs), margin = "wd"), las = 2, beside = TRUE, legend = c("weekday", "weekend"), args.legend = list(x = "topleft")) dev.off() ### Weekdays vs. school vacation weekdays datlogs$wds <- ifelse(!is.na(datlogs$vacation), "vacation", "school") datlogs$wds[datlogs$wd == "weekend"] <- NA alog <- activitylog(datlogs[which(datlogs$wds == "school"),], case_id = "case", activity_id = "event", resource_id = "path", timestamps = c("start", "complete")) process_map(alog, frequency("relative")) alog <- activitylog(datlogs[which(datlogs$wds == "vacation"),], case_id = "case", activity_id = "event", resource_id = "path", timestamps = c("start", "complete")) process_map(alog, frequency("relative")) # Are the same artworks looked at? pdf("../figures/bp_wds.pdf", height = 3.375, width = 12, pointsize = 10) par(mai = c(.5,.6,.1,.1), mgp = c(2.4, 1, 0)) #barplot(xtabs( ~ wds + artwork, datlogs), las = 2, beside = TRUE, barplot(proportions(xtabs( ~ wds + artwork, datlogs), margin = "wds"), las = 2, beside = TRUE, legend = c("school", "vacation"), args.legend = list(x = "topleft")) dev.off() ### Pre and post Corona datlogs$corona <- ifelse(datlogs$date < "2020-03-14", "pre", "post") alog <- activitylog(datlogs[which(datlogs$corona == "pre"),], case_id = "case", activity_id = "event", resource_id = "path", timestamps = c("start", "complete")) process_map(alog, frequency("relative")) alog <- activitylog(datlogs[which(datlogs$corona == "post"),], case_id = "case", activity_id = "event", resource_id = "path", timestamps = c("start", "complete")) process_map(alog, frequency("relative")) # Are the same artworks looked at? pdf("../figures/bp_corona.pdf", height = 3.375, width = 12, pointsize = 10) par(mai = c(.5,.6,.1,.1), mgp = c(2.4, 1, 0)) barplot(proportions(xtabs( ~ corona + artwork, datlogs), margin = "corona"), las = 2, beside = TRUE, legend = c("post", "pre"), args.legend = list(x = "topleft")) dev.off() #--------------- (3.4) Artwork sequences --------------- # Order in which artworks are looked at nart <- 5 # select 5 artworks randomly alog <- activitylog(datlogs,#[datlogs$artwork %in% sample(unique(datlogs$artwork), nart), ], case_id = "case", activity_id = "artwork", resource_id = "path", timestamps = c("start", "complete")) #map <- process_map(alog, frequency("relative")) ## select cases with Vermeer length(unique(datlogs[datlogs$artwork == "080", "case"])) # 12615 case080 <- unique(datlogs[datlogs$artwork == "080", "case"]) tmp <- datlogs[datlogs$case %in% case080, ] table(tmp$artwork) # --> all :) # select the ones most often (I am aiming for 10...) barplot(table(tmp$artwork)) abline(h = 14000, col = "red") which(table(tmp$artwork) > 14000) often080 <- names(which(table(tmp$artwork) > 14000)) alog <- activitylog(datlogs[datlogs$artwork %in% often080, ], case_id = "case", activity_id = "artwork", resource_id = "path", timestamps = c("start", "complete")) process_map(alog, frequency("relative")) pdf("../figures/traceexplore_case-artwork_often080.pdf", height = 8, width = 12, pointsize = 10) processmapR::trace_explorer(alog, n_traces = 30, type = "frequent", abbreviate = TRUE) dev.off() #--------------- (3.5) Topics --------------- # Are there certain topics that people are interested in more than others? # Do these topic distributions differ for comparable artworks? alog <- activitylog(datlogs[which(datlogs$event == "openTopic"),], case_id = "case", activity_id = "topic", resource_id = "path", timestamps = c("start", "complete")) process_map(alog, frequency("relative")) # Order of topics for Vermeer # alog080 <- activitylog(datlogs[datlogs$artwork == "080",], # case_id = "case", # activity_id = "topic", # resource_id = "path", # timestamps = c("start", "complete")) # # alog080 <- activitylog(datlogs[datlogs$artwork == "080",], # case_id = "case", # activity_id = "topicFile", # resource_id = "path", # timestamps = c("start", "complete")) # # #process_map(alog080, frequency("relative")) # # # Comparable artwork # alog083 <- activitylog(datlogs[datlogs$artwork == "083",], # case_id = "case", # activity_id = "topic", # resource_id = "path", # timestamps = c("start", "complete")) # artworks that have the same topics than Vermeer which(rowSums(xtabs( ~ artwork + topic, datlogs[datlogs$topic %in% c("artist", "details", "extra info", "komposition", "licht und farbe", "thema"), ]) != 0) == 6) #037 046 062 080 083 109 for (art in c("037", "046", "062", "080", "083", "109")) { alog <- activitylog(datlogs[datlogs$event == "openTopic" & datlogs$artwork == art,], case_id = "case", activity_id = "topic", resource_id = "path", timestamps = c("start", "complete")) process_map(alog, frequency("relative")) } # Angewandte Kunst, Graphik, Gemälde, Kultur c("Kultur", "Kultur", "Graphik", "Gemälde", "Gemälde", "Gemälde", "Gemälde", "Gemälde", "Graphik", "Gemälde", "Angewandte Kunst", "", "Gemälde", "Angewandte Kunst", "", "", "Graphik", "Angewandte Kunst", "Angewandte Kunst", "Gemälde", "Angewandte Kunst", "Gemälde", "", "Gemälde", "Gemälde", "Gemälde", "Graphik", "Gemälde", "Gemälde", "Gemälde", "", "Angewandte Kunst", "Angewandte Kunst", "Gemälde", "Graphik", "Gemälde", "Gemälde", "Gemälde", "Gemälde", "Angewandte Kunst", "Gemälde", "Gemälde", "Gemälde", "Kultur", "Kultur", "Gemälde", "Kultur", "", "Gemälde", "", "Graphik", "Kultur", "Gemälde", "", "Kultur", "Gemälde", "Kultur", "Gemälde", "Gemälde", "Gemälde", "Kultur", "Kultur", "Kultur", "Kultur", "Kultur", "Kultur", "Angewandte Kunst", "Info", "Info", "Info", "Kultur", "Kultur") # BURSTS which.max(table(datlogs$date)) tmp <- datlogs[datlogs$date == "2017-02-12", ] # number of traces per case on 2017-02-12 rowSums(xtabs( ~ case + path, tmp) != 0) range(tmp$start) hours <- lubridate::hour(tmp$start) xtabs( ~ case + hours, tmp) # distribution of cases over the day colSums(xtabs( ~ case + hours, tmp) != 0) barplot(colSums(xtabs( ~ case + hours, tmp) != 0)) aggregate(path ~ case + hours, tmp, length) tmp <- aggregate(path ~ case, datlogs, length) tmp$date <- as.Date(datlogs[!duplicated(datlogs$case), "start"]) tmp$time <- lubridate::hour(datlogs[!duplicated(datlogs$case), "start"]) tmp[tmp$path > 200, ] plot(path ~ time, tmp, cex = 2, col = rgb(0,0,0,.3)) lattice::barchart(path ~ time, tmp, horizontal=F)