Worked on talk for next meeting with AK
This commit is contained in:
parent
fb35fcfe4a
commit
3bdb138b69
@ -5,7 +5,7 @@
|
|||||||
# (3) Process Mining
|
# (3) Process Mining
|
||||||
#
|
#
|
||||||
# input: ../data/haum/event_logfiles_glossar_2023-10-29_10-26-42.csv
|
# input: ../data/haum/event_logfiles_glossar_2023-10-29_10-26-42.csv
|
||||||
# output:
|
# output:
|
||||||
#
|
#
|
||||||
# last mod: 2023-11-02, NW
|
# last mod: 2023-11-02, NW
|
||||||
|
|
||||||
@ -19,9 +19,37 @@ library(bupaverse)
|
|||||||
# Overall Research Question: How do museum visitors interact with the
|
# Overall Research Question: How do museum visitors interact with the
|
||||||
# artworks presented on the MTT?
|
# artworks presented on the MTT?
|
||||||
|
|
||||||
|
#---------------------------------------------------------------------------
|
||||||
|
# https://billster45.github.io/rapid_r_data_vis_book/process-mining.html
|
||||||
|
# TODO: checkout different specifications for process maps:
|
||||||
|
|
||||||
|
# edeaR::filter_trace_frequency(percentage = 0.9)
|
||||||
|
|
||||||
|
# processmapR::process_map(type_nodes = processmapR::frequency("absolute"),
|
||||||
|
# sec_nodes = processmapR::frequency("relative"),
|
||||||
|
# type_edges = processmapR::frequency("absolute"),
|
||||||
|
# sec_edges = processmapR::frequency("relative"),
|
||||||
|
# rankdir = "TB")
|
||||||
|
|
||||||
|
# processmapR::trace_explorer(coverage = 1,
|
||||||
|
# type = "frequent",
|
||||||
|
# .abbreviate = T)
|
||||||
|
|
||||||
|
# edeaR::resource_frequency("resource-activity") %>%
|
||||||
|
# plot()
|
||||||
|
|
||||||
|
# edeaR::resource_frequency(level = "case") %>%
|
||||||
|
# plot()
|
||||||
|
|
||||||
|
# edeaR::resource_specialisation(level = "activity") %>%
|
||||||
|
# plot()
|
||||||
|
|
||||||
|
# Functions for conformance checking:
|
||||||
|
# https://bupaverse.github.io/processcheckR/
|
||||||
|
|
||||||
|
#---------------------------------------------------------------------------
|
||||||
|
|
||||||
|
|
||||||
# What about popups?
|
|
||||||
|
|
||||||
|
|
||||||
# Distribution of bursts
|
# Distribution of bursts
|
||||||
@ -93,8 +121,11 @@ sum(dat$fileId.start != dat$fileId.stop, na.rm = TRUE) / nrow(dat)
|
|||||||
|
|
||||||
#--------------- (3.1) Check data quality ---------------
|
#--------------- (3.1) Check data quality ---------------
|
||||||
|
|
||||||
|
dat$trace2 <- dat$trace
|
||||||
|
dat$trace <- NULL
|
||||||
|
|
||||||
alog <- activitylog(dat,
|
alog <- activitylog(dat,
|
||||||
case_id = "trace",
|
case_id = "trace2",
|
||||||
activity_id = "event",
|
activity_id = "event",
|
||||||
#resource_id = "case",
|
#resource_id = "case",
|
||||||
resource_id = "artwork",
|
resource_id = "artwork",
|
||||||
@ -103,6 +134,37 @@ alog <- activitylog(dat,
|
|||||||
# process_map(alog, frequency("relative"))
|
# process_map(alog, frequency("relative"))
|
||||||
map_as_pdf(alog, file = "../figures/pm_trace-event.pdf")
|
map_as_pdf(alog, file = "../figures/pm_trace-event.pdf")
|
||||||
|
|
||||||
|
alogf <- edeaR::filter_trace_frequency(alog, percentage = 0.9)
|
||||||
|
|
||||||
|
processmapR::process_map(alogf, # alog,
|
||||||
|
type_nodes = processmapR::frequency("absolute"),
|
||||||
|
sec_nodes = processmapR::frequency("relative"),
|
||||||
|
type_edges = processmapR::frequency("absolute"),
|
||||||
|
sec_edges = processmapR::frequency("relative"),
|
||||||
|
rankdir = "TB")
|
||||||
|
|
||||||
|
alog_no_move <- alog[alog$event != "move", ]
|
||||||
|
set.seed(1447)
|
||||||
|
processmapR::trace_explorer(alog_no_move[alog_no_move$trace2 %in%
|
||||||
|
sample(unique(alog_no_move$trace2), 400),],
|
||||||
|
coverage = 1, type = "frequent",
|
||||||
|
abbreviate = T)
|
||||||
|
|
||||||
|
ra_no_move <- edeaR::resource_frequency(alog_no_move, "resource-activity")
|
||||||
|
levels(ra_no_move$event) <- c("flipCard", "flipCard", "openTopic", "openPopup")
|
||||||
|
plot(ra_no_move)
|
||||||
|
|
||||||
|
ra <- edeaR::resource_frequency(alog, "resource-activity")
|
||||||
|
plot(ra)
|
||||||
|
|
||||||
|
heatmap(xtabs(relative_activity ~ artwork + event, ra))
|
||||||
|
heatmap(xtabs(relative_resource ~ artwork + event, ra_no_move))
|
||||||
|
heatmap(xtabs(relative_activity ~ artwork + event, ra_no_move))
|
||||||
|
|
||||||
|
aggregate(relative_activity ~ event, ra, sum)
|
||||||
|
aggregate(relative_resource ~ artwork, ra, sum)
|
||||||
|
|
||||||
|
|
||||||
#--------------- (3.2) Interactions for different artworks ---------------
|
#--------------- (3.2) Interactions for different artworks ---------------
|
||||||
# Do interaction patterns for events per trace look different for different
|
# Do interaction patterns for events per trace look different for different
|
||||||
# artworks?
|
# artworks?
|
||||||
@ -345,25 +407,25 @@ map_as_pdf(alog, file = "../figures/pm_case-topic.pdf")
|
|||||||
# activity_id = "topic",
|
# activity_id = "topic",
|
||||||
# resource_id = "trace",
|
# resource_id = "trace",
|
||||||
# timestamps = c("start", "complete"))
|
# timestamps = c("start", "complete"))
|
||||||
#
|
#
|
||||||
# map_as_pdf(alog080, file = "../figures/pm_case-topic_080.pdf")
|
# map_as_pdf(alog080, file = "../figures/pm_case-topic_080.pdf")
|
||||||
#
|
#
|
||||||
#
|
#
|
||||||
# alog080 <- activitylog(dat[dat$artwork == "080",],
|
# alog080 <- activitylog(dat[dat$artwork == "080",],
|
||||||
# case_id = "case",
|
# case_id = "case",
|
||||||
# activity_id = "topicFile",
|
# activity_id = "topicFile",
|
||||||
# resource_id = "trace",
|
# resource_id = "trace",
|
||||||
# timestamps = c("start", "complete"))
|
# timestamps = c("start", "complete"))
|
||||||
#
|
#
|
||||||
# #process_map(alog080, frequency("relative"))
|
# #process_map(alog080, frequency("relative"))
|
||||||
#
|
#
|
||||||
# # Comparable artwork
|
# # Comparable artwork
|
||||||
# alog083 <- activitylog(dat[dat$artwork == "083",],
|
# alog083 <- activitylog(dat[dat$artwork == "083",],
|
||||||
# case_id = "case",
|
# case_id = "case",
|
||||||
# activity_id = "topic",
|
# activity_id = "topic",
|
||||||
# resource_id = "trace",
|
# resource_id = "trace",
|
||||||
# timestamps = c("start", "complete"))
|
# timestamps = c("start", "complete"))
|
||||||
#
|
#
|
||||||
# map_as_pdf(alog083, file = "../figures/pm_case-topic_083.pdf")
|
# map_as_pdf(alog083, file = "../figures/pm_case-topic_083.pdf")
|
||||||
|
|
||||||
# artworks that have the same topics than Vermeer
|
# artworks that have the same topics than Vermeer
|
||||||
@ -371,7 +433,7 @@ which(rowSums(xtabs( ~ artwork + topic, dat[dat$topic %in%
|
|||||||
c("artist", "details", "extra info", "komposition",
|
c("artist", "details", "extra info", "komposition",
|
||||||
"licht und farbe", "thema"), ]) != 0) == 6)
|
"licht und farbe", "thema"), ]) != 0) == 6)
|
||||||
|
|
||||||
#037 046 062 080 083 109
|
#037 046 062 080 083 109
|
||||||
|
|
||||||
for (art in c("037", "046", "062", "080", "083", "109")) {
|
for (art in c("037", "046", "062", "080", "083", "109")) {
|
||||||
|
|
||||||
@ -380,7 +442,7 @@ for (art in c("037", "046", "062", "080", "083", "109")) {
|
|||||||
activity_id = "topic",
|
activity_id = "topic",
|
||||||
resource_id = "trace",
|
resource_id = "trace",
|
||||||
timestamps = c("start", "complete"))
|
timestamps = c("start", "complete"))
|
||||||
|
|
||||||
map_as_pdf(alog, file = paste0("../figures/pm_case-topic_", art, ".pdf"))
|
map_as_pdf(alog, file = paste0("../figures/pm_case-topic_", art, ".pdf"))
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -412,14 +474,28 @@ tmp <- dat[dat$date == "2017-02-12", ]
|
|||||||
rowSums(xtabs( ~ case + trace, tmp) != 0)
|
rowSums(xtabs( ~ case + trace, tmp) != 0)
|
||||||
|
|
||||||
range(tmp$start)
|
range(tmp$start)
|
||||||
startcut <- cut(tmp$start, 7)
|
hours <- lubridate::hour(tmp$start)
|
||||||
xtabs( ~ case + startcut, tmp)
|
xtabs( ~ case + hours, tmp)
|
||||||
|
|
||||||
# distribution of cases over the day
|
# distribution of cases over the day
|
||||||
colSums(xtabs( ~ case + startcut, tmp) != 0)
|
colSums(xtabs( ~ case + hours, tmp) != 0)
|
||||||
barplot(colSums(xtabs( ~ case + startcut, tmp) != 0))
|
barplot(colSums(xtabs( ~ case + hours, tmp) != 0))
|
||||||
|
|
||||||
|
aggregate(trace ~ case + hours, tmp, length)
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
tmp <- aggregate(trace2 ~ case, dat, length)
|
||||||
|
tmp$date <- as.Date(dat[!duplicated(dat$case), "start"])
|
||||||
|
tmp$time <- lubridate::hour(dat[!duplicated(dat$case), "start"])
|
||||||
|
|
||||||
|
tmp[tmp$trace2 > 200, ]
|
||||||
|
|
||||||
|
plot(trace2 ~ time, tmp, cex = 2, col = rgb(0,0,0,.3))
|
||||||
|
|
||||||
|
lattice::barchart(trace2 ~ time, tmp, horizontal=F)
|
||||||
|
|
||||||
aggregate(trace ~ case + startcut, tmp, length)
|
|
||||||
|
|
||||||
|
|
||||||
###########################################################################
|
###########################################################################
|
||||||
|
@ -210,8 +210,8 @@ pdf("../figures/fun_depend_mtt.pdf", width = 8, height = 4, pointsize = 10)
|
|||||||
foodweb(where = "package:mtt",
|
foodweb(where = "package:mtt",
|
||||||
prune = c("parse_logfiles", "create_eventlogs", "extract_artworks",
|
prune = c("parse_logfiles", "create_eventlogs", "extract_artworks",
|
||||||
"extract_topics", "add_topic"),
|
"extract_topics", "add_topic"),
|
||||||
expand.ybox = 1.8, #cex = .6,
|
#expand.ybox = 1.8, #cex = .6,
|
||||||
border = TRUE,
|
#border = TRUE,
|
||||||
#boxcolor = "gray",
|
#boxcolor = "gray",
|
||||||
lwd = 2, mai = c(0, 0, 0, 0))
|
lwd = 2, mai = c(0, 0, 0, 0))
|
||||||
dev.off()
|
dev.off()
|
||||||
|
Loading…
Reference in New Issue
Block a user