Worked on talk for next meeting with AK
This commit is contained in:
parent
fb35fcfe4a
commit
3bdb138b69
@ -5,7 +5,7 @@
|
||||
# (3) Process Mining
|
||||
#
|
||||
# input: ../data/haum/event_logfiles_glossar_2023-10-29_10-26-42.csv
|
||||
# output:
|
||||
# output:
|
||||
#
|
||||
# last mod: 2023-11-02, NW
|
||||
|
||||
@ -19,9 +19,37 @@ library(bupaverse)
|
||||
# Overall Research Question: How do museum visitors interact with the
|
||||
# artworks presented on the MTT?
|
||||
|
||||
#---------------------------------------------------------------------------
|
||||
# https://billster45.github.io/rapid_r_data_vis_book/process-mining.html
|
||||
# TODO: checkout different specifications for process maps:
|
||||
|
||||
# edeaR::filter_trace_frequency(percentage = 0.9)
|
||||
|
||||
# processmapR::process_map(type_nodes = processmapR::frequency("absolute"),
|
||||
# sec_nodes = processmapR::frequency("relative"),
|
||||
# type_edges = processmapR::frequency("absolute"),
|
||||
# sec_edges = processmapR::frequency("relative"),
|
||||
# rankdir = "TB")
|
||||
|
||||
# processmapR::trace_explorer(coverage = 1,
|
||||
# type = "frequent",
|
||||
# .abbreviate = T)
|
||||
|
||||
# edeaR::resource_frequency("resource-activity") %>%
|
||||
# plot()
|
||||
|
||||
# edeaR::resource_frequency(level = "case") %>%
|
||||
# plot()
|
||||
|
||||
# edeaR::resource_specialisation(level = "activity") %>%
|
||||
# plot()
|
||||
|
||||
# Functions for conformance checking:
|
||||
# https://bupaverse.github.io/processcheckR/
|
||||
|
||||
#---------------------------------------------------------------------------
|
||||
|
||||
|
||||
# What about popups?
|
||||
|
||||
|
||||
# Distribution of bursts
|
||||
@ -93,8 +121,11 @@ sum(dat$fileId.start != dat$fileId.stop, na.rm = TRUE) / nrow(dat)
|
||||
|
||||
#--------------- (3.1) Check data quality ---------------
|
||||
|
||||
dat$trace2 <- dat$trace
|
||||
dat$trace <- NULL
|
||||
|
||||
alog <- activitylog(dat,
|
||||
case_id = "trace",
|
||||
case_id = "trace2",
|
||||
activity_id = "event",
|
||||
#resource_id = "case",
|
||||
resource_id = "artwork",
|
||||
@ -103,6 +134,37 @@ alog <- activitylog(dat,
|
||||
# process_map(alog, frequency("relative"))
|
||||
map_as_pdf(alog, file = "../figures/pm_trace-event.pdf")
|
||||
|
||||
alogf <- edeaR::filter_trace_frequency(alog, percentage = 0.9)
|
||||
|
||||
processmapR::process_map(alogf, # alog,
|
||||
type_nodes = processmapR::frequency("absolute"),
|
||||
sec_nodes = processmapR::frequency("relative"),
|
||||
type_edges = processmapR::frequency("absolute"),
|
||||
sec_edges = processmapR::frequency("relative"),
|
||||
rankdir = "TB")
|
||||
|
||||
alog_no_move <- alog[alog$event != "move", ]
|
||||
set.seed(1447)
|
||||
processmapR::trace_explorer(alog_no_move[alog_no_move$trace2 %in%
|
||||
sample(unique(alog_no_move$trace2), 400),],
|
||||
coverage = 1, type = "frequent",
|
||||
abbreviate = T)
|
||||
|
||||
ra_no_move <- edeaR::resource_frequency(alog_no_move, "resource-activity")
|
||||
levels(ra_no_move$event) <- c("flipCard", "flipCard", "openTopic", "openPopup")
|
||||
plot(ra_no_move)
|
||||
|
||||
ra <- edeaR::resource_frequency(alog, "resource-activity")
|
||||
plot(ra)
|
||||
|
||||
heatmap(xtabs(relative_activity ~ artwork + event, ra))
|
||||
heatmap(xtabs(relative_resource ~ artwork + event, ra_no_move))
|
||||
heatmap(xtabs(relative_activity ~ artwork + event, ra_no_move))
|
||||
|
||||
aggregate(relative_activity ~ event, ra, sum)
|
||||
aggregate(relative_resource ~ artwork, ra, sum)
|
||||
|
||||
|
||||
#--------------- (3.2) Interactions for different artworks ---------------
|
||||
# Do interaction patterns for events per trace look different for different
|
||||
# artworks?
|
||||
@ -345,25 +407,25 @@ map_as_pdf(alog, file = "../figures/pm_case-topic.pdf")
|
||||
# activity_id = "topic",
|
||||
# resource_id = "trace",
|
||||
# timestamps = c("start", "complete"))
|
||||
#
|
||||
#
|
||||
# map_as_pdf(alog080, file = "../figures/pm_case-topic_080.pdf")
|
||||
#
|
||||
#
|
||||
#
|
||||
#
|
||||
# alog080 <- activitylog(dat[dat$artwork == "080",],
|
||||
# case_id = "case",
|
||||
# activity_id = "topicFile",
|
||||
# resource_id = "trace",
|
||||
# timestamps = c("start", "complete"))
|
||||
#
|
||||
#
|
||||
# #process_map(alog080, frequency("relative"))
|
||||
#
|
||||
#
|
||||
# # Comparable artwork
|
||||
# alog083 <- activitylog(dat[dat$artwork == "083",],
|
||||
# case_id = "case",
|
||||
# activity_id = "topic",
|
||||
# resource_id = "trace",
|
||||
# timestamps = c("start", "complete"))
|
||||
#
|
||||
#
|
||||
# map_as_pdf(alog083, file = "../figures/pm_case-topic_083.pdf")
|
||||
|
||||
# artworks that have the same topics than Vermeer
|
||||
@ -371,7 +433,7 @@ which(rowSums(xtabs( ~ artwork + topic, dat[dat$topic %in%
|
||||
c("artist", "details", "extra info", "komposition",
|
||||
"licht und farbe", "thema"), ]) != 0) == 6)
|
||||
|
||||
#037 046 062 080 083 109
|
||||
#037 046 062 080 083 109
|
||||
|
||||
for (art in c("037", "046", "062", "080", "083", "109")) {
|
||||
|
||||
@ -380,7 +442,7 @@ for (art in c("037", "046", "062", "080", "083", "109")) {
|
||||
activity_id = "topic",
|
||||
resource_id = "trace",
|
||||
timestamps = c("start", "complete"))
|
||||
|
||||
|
||||
map_as_pdf(alog, file = paste0("../figures/pm_case-topic_", art, ".pdf"))
|
||||
}
|
||||
|
||||
@ -412,14 +474,28 @@ tmp <- dat[dat$date == "2017-02-12", ]
|
||||
rowSums(xtabs( ~ case + trace, tmp) != 0)
|
||||
|
||||
range(tmp$start)
|
||||
startcut <- cut(tmp$start, 7)
|
||||
xtabs( ~ case + startcut, tmp)
|
||||
hours <- lubridate::hour(tmp$start)
|
||||
xtabs( ~ case + hours, tmp)
|
||||
|
||||
# distribution of cases over the day
|
||||
colSums(xtabs( ~ case + startcut, tmp) != 0)
|
||||
barplot(colSums(xtabs( ~ case + startcut, tmp) != 0))
|
||||
colSums(xtabs( ~ case + hours, tmp) != 0)
|
||||
barplot(colSums(xtabs( ~ case + hours, tmp) != 0))
|
||||
|
||||
aggregate(trace ~ case + hours, tmp, length)
|
||||
|
||||
|
||||
|
||||
|
||||
tmp <- aggregate(trace2 ~ case, dat, length)
|
||||
tmp$date <- as.Date(dat[!duplicated(dat$case), "start"])
|
||||
tmp$time <- lubridate::hour(dat[!duplicated(dat$case), "start"])
|
||||
|
||||
tmp[tmp$trace2 > 200, ]
|
||||
|
||||
plot(trace2 ~ time, tmp, cex = 2, col = rgb(0,0,0,.3))
|
||||
|
||||
lattice::barchart(trace2 ~ time, tmp, horizontal=F)
|
||||
|
||||
aggregate(trace ~ case + startcut, tmp, length)
|
||||
|
||||
|
||||
###########################################################################
|
||||
|
@ -210,8 +210,8 @@ pdf("../figures/fun_depend_mtt.pdf", width = 8, height = 4, pointsize = 10)
|
||||
foodweb(where = "package:mtt",
|
||||
prune = c("parse_logfiles", "create_eventlogs", "extract_artworks",
|
||||
"extract_topics", "add_topic"),
|
||||
expand.ybox = 1.8, #cex = .6,
|
||||
border = TRUE,
|
||||
#expand.ybox = 1.8, #cex = .6,
|
||||
#border = TRUE,
|
||||
#boxcolor = "gray",
|
||||
lwd = 2, mai = c(0, 0, 0, 0))
|
||||
dev.off()
|
||||
|
Loading…
Reference in New Issue
Block a user