Worked on talk for next meeting with AK

This commit is contained in:
Nora Wickelmaier 2023-11-10 18:30:15 +01:00
parent fb35fcfe4a
commit 3bdb138b69
2 changed files with 94 additions and 18 deletions

View File

@ -5,7 +5,7 @@
# (3) Process Mining
#
# input: ../data/haum/event_logfiles_glossar_2023-10-29_10-26-42.csv
# output:
# output:
#
# last mod: 2023-11-02, NW
@ -19,9 +19,37 @@ library(bupaverse)
# Overall Research Question: How do museum visitors interact with the
# artworks presented on the MTT?
#---------------------------------------------------------------------------
# https://billster45.github.io/rapid_r_data_vis_book/process-mining.html
# TODO: checkout different specifications for process maps:
# edeaR::filter_trace_frequency(percentage = 0.9)
# processmapR::process_map(type_nodes = processmapR::frequency("absolute"),
# sec_nodes = processmapR::frequency("relative"),
# type_edges = processmapR::frequency("absolute"),
# sec_edges = processmapR::frequency("relative"),
# rankdir = "TB")
# processmapR::trace_explorer(coverage = 1,
# type = "frequent",
# .abbreviate = T)
# edeaR::resource_frequency("resource-activity") %>%
# plot()
# edeaR::resource_frequency(level = "case") %>%
# plot()
# edeaR::resource_specialisation(level = "activity") %>%
# plot()
# Functions for conformance checking:
# https://bupaverse.github.io/processcheckR/
#---------------------------------------------------------------------------
# What about popups?
# Distribution of bursts
@ -93,8 +121,11 @@ sum(dat$fileId.start != dat$fileId.stop, na.rm = TRUE) / nrow(dat)
#--------------- (3.1) Check data quality ---------------
dat$trace2 <- dat$trace
dat$trace <- NULL
alog <- activitylog(dat,
case_id = "trace",
case_id = "trace2",
activity_id = "event",
#resource_id = "case",
resource_id = "artwork",
@ -103,6 +134,37 @@ alog <- activitylog(dat,
# process_map(alog, frequency("relative"))
map_as_pdf(alog, file = "../figures/pm_trace-event.pdf")
alogf <- edeaR::filter_trace_frequency(alog, percentage = 0.9)
processmapR::process_map(alogf, # alog,
type_nodes = processmapR::frequency("absolute"),
sec_nodes = processmapR::frequency("relative"),
type_edges = processmapR::frequency("absolute"),
sec_edges = processmapR::frequency("relative"),
rankdir = "TB")
alog_no_move <- alog[alog$event != "move", ]
set.seed(1447)
processmapR::trace_explorer(alog_no_move[alog_no_move$trace2 %in%
sample(unique(alog_no_move$trace2), 400),],
coverage = 1, type = "frequent",
abbreviate = T)
ra_no_move <- edeaR::resource_frequency(alog_no_move, "resource-activity")
levels(ra_no_move$event) <- c("flipCard", "flipCard", "openTopic", "openPopup")
plot(ra_no_move)
ra <- edeaR::resource_frequency(alog, "resource-activity")
plot(ra)
heatmap(xtabs(relative_activity ~ artwork + event, ra))
heatmap(xtabs(relative_resource ~ artwork + event, ra_no_move))
heatmap(xtabs(relative_activity ~ artwork + event, ra_no_move))
aggregate(relative_activity ~ event, ra, sum)
aggregate(relative_resource ~ artwork, ra, sum)
#--------------- (3.2) Interactions for different artworks ---------------
# Do interaction patterns for events per trace look different for different
# artworks?
@ -345,25 +407,25 @@ map_as_pdf(alog, file = "../figures/pm_case-topic.pdf")
# activity_id = "topic",
# resource_id = "trace",
# timestamps = c("start", "complete"))
#
#
# map_as_pdf(alog080, file = "../figures/pm_case-topic_080.pdf")
#
#
#
#
# alog080 <- activitylog(dat[dat$artwork == "080",],
# case_id = "case",
# activity_id = "topicFile",
# resource_id = "trace",
# timestamps = c("start", "complete"))
#
#
# #process_map(alog080, frequency("relative"))
#
#
# # Comparable artwork
# alog083 <- activitylog(dat[dat$artwork == "083",],
# case_id = "case",
# activity_id = "topic",
# resource_id = "trace",
# timestamps = c("start", "complete"))
#
#
# map_as_pdf(alog083, file = "../figures/pm_case-topic_083.pdf")
# artworks that have the same topics than Vermeer
@ -371,7 +433,7 @@ which(rowSums(xtabs( ~ artwork + topic, dat[dat$topic %in%
c("artist", "details", "extra info", "komposition",
"licht und farbe", "thema"), ]) != 0) == 6)
#037 046 062 080 083 109
#037 046 062 080 083 109
for (art in c("037", "046", "062", "080", "083", "109")) {
@ -380,7 +442,7 @@ for (art in c("037", "046", "062", "080", "083", "109")) {
activity_id = "topic",
resource_id = "trace",
timestamps = c("start", "complete"))
map_as_pdf(alog, file = paste0("../figures/pm_case-topic_", art, ".pdf"))
}
@ -412,14 +474,28 @@ tmp <- dat[dat$date == "2017-02-12", ]
rowSums(xtabs( ~ case + trace, tmp) != 0)
range(tmp$start)
startcut <- cut(tmp$start, 7)
xtabs( ~ case + startcut, tmp)
hours <- lubridate::hour(tmp$start)
xtabs( ~ case + hours, tmp)
# distribution of cases over the day
colSums(xtabs( ~ case + startcut, tmp) != 0)
barplot(colSums(xtabs( ~ case + startcut, tmp) != 0))
colSums(xtabs( ~ case + hours, tmp) != 0)
barplot(colSums(xtabs( ~ case + hours, tmp) != 0))
aggregate(trace ~ case + hours, tmp, length)
tmp <- aggregate(trace2 ~ case, dat, length)
tmp$date <- as.Date(dat[!duplicated(dat$case), "start"])
tmp$time <- lubridate::hour(dat[!duplicated(dat$case), "start"])
tmp[tmp$trace2 > 200, ]
plot(trace2 ~ time, tmp, cex = 2, col = rgb(0,0,0,.3))
lattice::barchart(trace2 ~ time, tmp, horizontal=F)
aggregate(trace ~ case + startcut, tmp, length)
###########################################################################

View File

@ -210,8 +210,8 @@ pdf("../figures/fun_depend_mtt.pdf", width = 8, height = 4, pointsize = 10)
foodweb(where = "package:mtt",
prune = c("parse_logfiles", "create_eventlogs", "extract_artworks",
"extract_topics", "add_topic"),
expand.ybox = 1.8, #cex = .6,
border = TRUE,
#expand.ybox = 1.8, #cex = .6,
#border = TRUE,
#boxcolor = "gray",
lwd = 2, mai = c(0, 0, 0, 0))
dev.off()