Worked on second presentation, added problems and plots, figs, etc.

This commit is contained in:
Nora Wickelmaier 2023-09-28 15:04:59 +02:00
parent a3c731c84f
commit 00a6f19cf6
5 changed files with 216 additions and 31 deletions

View File

@ -32,13 +32,17 @@ text(tmp, counts_artwork + 1500, c(datart$artwork))
# more interesting per museum in this case...
counts_artwork <- aggregate(trace ~ artwork + folder, datlogs, length)
pdf("../figures/counts_artwork_8o8m.pdf", width = 20, height = 6, pointsize = 10)
barchart(trace ~ artwork | folder, counts_artwork, ylab = "", layout = c(5, 1))
barchart(trace ~ artwork | folder, counts_artwork, ylab = "", layout = c(5, 1),
border = "transparent", col = "#0072B2")
dev.off()
### Dwell times/duration
set.seed(1033)
pdf("../figures/duration_8o8m.pdf", width = 5, height = 5, pointsize = 10)
bwplot(I(duration/1000/60) ~ as.factor(event), datlogs, ylab = "Duration in sec")
bwplot(I(duration/1000/60) ~ as.factor(event), datlogs[sample(nrow(datlogs), 100000), ],
ylab = "Duration in min")
dev.off()
### Are there certain areas of the table that are touched most often?
@ -82,13 +86,15 @@ dev.off()
### How many visitors per day
# Cases per day
datcase <- aggregate(case ~ date, datlogs, function(x) length(unique(x)))
datcase <- aggregate(case ~ date + folder, datlogs, function(x) length(unique(x)))
pdf("../figures/cases_per_day_8o8m.pdf", width = 9, height = 5, pointsize = 10)
par(mai = c(.6, .6, .1, .1), mgp = c(2.4, 1, 0))
plot(case ~ date, datcase, type = "h", col = "#0072B2", lwd = 2)
pdf("../figures/cases_per_day_8o8m.pdf", width = 20, height = 6, pointsize = 10)
barchart(case ~ date | folder, datcase, horizontal = F,
scales = list(x = list(rot = 90, at = seq(1, 122, 10)), y = list(rot = 90)),
border = "transparent", col = "#0072B2")
dev.off()
### Other stuff
## weird behavior of timeMs

View File

@ -9,7 +9,7 @@ plot(1:10, col = cc, pch = 16, cex = 2)
# Read data
datlogs <- read.table("../data/haum/event_logfiles_metadata_2023-09-23_01-31-30.csv",
sep = ";", header = TRUE)
datlogs$date <- as.Date(datlogs$date.start)
datlogs$date <- as.Date(datlogs$date)
datlogs$date.start <- as.POSIXct(datlogs$date.start)
datlogs$date.stop <- as.POSIXct(datlogs$date.stop)
datlogs$artwork <- sprintf("%03d", datlogs$artwork)
@ -29,7 +29,8 @@ names(counts_artwork) <- datart$title
pdf("../figures/counts_artwork.pdf", width = 20, height = 10, pointsize = 10)
par(mai = c(5, .6, .1, .1))
tmp <- barplot(counts_artwork, las = 2, ylim = c(0, 60000), border = "white")
tmp <- barplot(counts_artwork, las = 2, ylim = c(0, 60000),
border = "white", col = "#0072B2")
text(tmp, counts_artwork + 1000, c(datart$artwork, "504", "505"))
dev.off()
@ -60,8 +61,11 @@ xyplot(ds ~ as.factor(event), datagg, groups = artwork)
bwplot(duration ~ as.factor(event), datlogs)
# in min
set.seed(1027)
pdf("../figures/duration.pdf", width = 5, height = 5, pointsize = 10)
bwplot(I(duration/1000/60) ~ as.factor(event), datlogs, ylab = "Duration in sec")
bwplot(I(duration/1000/60) ~ as.factor(event), datlogs[sample(nrow(datlogs), 100000), ],
ylab = "Duration in min")
dev.off()
datlogs$daydiff <- c(NA, diff(datlogs$date))

View File

@ -1,26 +1,42 @@
#' ---
#' title: "Modelling log files with Process Mining"
#' author: "Nora Wickelmaier"
#' date: "`r Sys.Date()`"
#' output:
#' html_document:
#' toc: true
#' toc_float: true
#' pdf_document:
#' toc: true
#' number_sections: true
#' geometry: margin = 2.5cm
#' ---
# setwd("C:/Users/nwickelmaier/Nextcloud/Documents/MDS/2023ss/60100_master_thesis/code")
#' # Read data
# Read data
dat <- read.table("../data/haum/event_logfiles_small.csv", sep = ";", header = TRUE)
dat$date.start <- as.POSIXct(dat$date.start)
dat$date.stop <- as.POSIXct(dat$date.stop)
# dat0 <- read.table("../data/haum/event_logfiles_metadata_2023-09-23_01-31-30.csv",
# sep = ";", header = TRUE)
dat0 <- read.table("../data/haum/event_logfiles_small_metadata_2023-09-25_09-56-34.csv",
sep = ";", header = TRUE)
dat0$date <- as.Date(dat0$date)
dat0$date.start <- as.POSIXct(dat0$date.start)
dat0$date.stop <- as.POSIXct(dat0$date.stop)
dat0$artwork <- sprintf("%03d", dat0$artwork)
#' # Creating event logs
# TODO: Write a functions that closes events spanning different log files
# OR: Remove openTopic and OpenPopup events that do not start with a
# flipCard (AND openPopup events without openTopic event beforehand)
table(dat0[!duplicated(dat0$trace), "event"])
# flipCard move openPopup openTopic
# 45528 247718 981 3457
proportions(table(dat0[!duplicated(dat0$trace), "event"]))
tmp <- dat0[!duplicated(dat0$trace) & dat0$event %in% c("openTopic",
"openPopup"), ]
dat <- dat0
i <- 1
stop <- 1
while (stop > 0) {
stop <- sum(!duplicated(dat$trace) & dat$event %in% c("openTopic", "openPopup"))
dat <- dat[!(!duplicated(dat$trace) &
dat$event %in% c("openTopic", "openPopup")), ]
print(i)
i <- i + 1
print(table(dat[!duplicated(dat$trace), "event"]))
}
# Creating event logs
library(bupaverse)
@ -86,9 +102,6 @@ animate_process(elog[elog$artwork %in% c("080", "054"), ],
range = c("black", "gray"))))
# --> not sure, yet, how to interpret this...
alog080 <- activitylog(dat[dat$artwork %in% "080", ],
#case_id = "case",
case_id = "trace",

101
code/check_traces.R Normal file
View File

@ -0,0 +1,101 @@
# setwd("C:/Users/nwickelmaier/Nextcloud/Documents/MDS/2023ss/60100_master_thesis/code")
# Read data
dat <- read.table("../data/haum/event_logfiles_metadata_2023-09-23_01-31-30.csv",
sep = ";", header = TRUE)
# dat <- read.table("../data/haum/event_logfiles_small_metadata_2023-09-25_09-56-34.csv",
# sep = ";", header = TRUE)
dat$date <- as.Date(dat$date)
dat$date.start <- as.POSIXct(dat$date.start)
dat$date.stop <- as.POSIXct(dat$date.stop)
dat$artwork <- sprintf("%03d", dat$artwork)
library(bupaverse)
names(dat)[names(dat) %in% c("date.start", "date.stop")] <- c("start", "complete")
create_pdf <- function(trace, folder = "../figures/processmaps/") {
alog <- activitylog(dat[which(dat$trace == trace), ],
case_id = "trace",
activity_id = "event",
resource_id = "artwork",
timestamps = c("start", "complete"))
map <- process_map(alog)
g <- DiagrammeR::grViz(map$x$diagram) |> DiagrammeRsvg::export_svg() |> charToRaw()
rsvg::rsvg_pdf(g, paste0(folder, trace, ".pdf"))
}
find_trace <- function(trace) {
alog <- activitylog(dat[which(dat$trace == trace), ],
case_id = "trace",
activity_id = "event",
resource_id = "artwork",
timestamps = c("start", "complete"))
map <- process_map(alog)
d <- strsplit(map$x$diagram, "\n")[[1]]
o <- grep("^.{6}[[]label", d, value = TRUE)
p <- grep("^.{1}[1-6].->", d, value = TRUE)
num_ot <- gsub("^.{3}([1-6]).*", "\\1", grep("openTopic", o, value = TRUE))
num_op <- gsub("^.{3}([1-6]).*", "\\1", grep("openPopup", o, value = TRUE))
rel_path <- grep("^.{1}[2].->.[1-6]", p, value = TRUE)
rel_num <- gsub("^.{1}[2].->.([1-6]).*" , "\\1", rel_path)
num_fc <- gsub("^.{3}([1-6]).*", "\\1", grep("flipCard", o, value = TRUE))
if (length(num_fc) > 0) {
rel_path_fc <- grep(paste0("^.{1}[", num_fc, "].->.[1-6]"), p, value = TRUE)
rel_num_fc <- gsub(paste0("^.{1}[", num_fc, "].->.([1-6]).*"), "\\1", rel_path_fc)
if (any(c(num_ot, num_op) %in% rel_num) | any(num_op == rel_num_fc)) {
trace
}
} else {
if (any(c(num_ot, num_op) %in% rel_num)) {
trace
}
}
}
ctrace <- pbapply::pbsapply(unique(dat$trace), find_trace)
unlist(ctrace)
length(unlist(ctrace))
# create plots
for (trace in unlist(ctrace)) {
create_pdf(trace)
}
alog <- activitylog(dat,
case_id = "trace",
activity_id = "event",
resource_id = "artwork",
timestamps = c("start", "complete"))
map <- process_map(alog)
g <- DiagrammeR::grViz(map$x$diagram) |> DiagrammeRsvg::export_svg() |> charToRaw()
rsvg::rsvg_pdf(g, "../figures/processmap_haum.pdf", width = 10, height = 5)
# adjusted colors
writeLines(map$x$diagram, "process_map_haum.gv")
g <- DiagrammeR::grViz("process_map_haum.gv") |> DiagrammeRsvg::export_svg() |> charToRaw()
rsvg::rsvg_pdf(g, "../figures/processmap_haum_adjusted.pdf", width = 10, height = 5)
alog <- activitylog(dat[!dat$trace %in% unlist(ctrace), ],
case_id = "trace",
activity_id = "event",
resource_id = "artwork",
timestamps = c("start", "complete"))
map <- process_map(alog)
g <- DiagrammeR::grViz(map$x$diagram) |> DiagrammeRsvg::export_svg() |> charToRaw()
rsvg::rsvg_pdf(g, "../figures/processmap_haum_cleaned.pdf", width = 12, height = 5)

61
code/process_map_haum.gv Normal file
View File

@ -0,0 +1,61 @@
digraph {
graph [layout = "dot",
outputorder = "edgesfirst",
bgcolor = "white",
rankdir = "LR"]
node [fontname = "Helvetica",
fontsize = "10",
shape = "circle",
fixedsize = "true",
width = "0.5",
style = "filled",
fillcolor = "aliceblue",
color = "gray70",
fontcolor = "gray50"]
edge [fontname = "Helvetica",
fontsize = "8",
weight = "1.5",
color = "gray80",
arrowsize = "0.5"]
"1" [label = "End", shape = "circle", style = "rounded,filled", fontcolor = "brown4", color = "brown4", tooltip = "ARTIFICIAL_END
8681", penwidth = "1.5", fixedsize = "FALSE", fontname = "Arial", fontsize = "10", fillcolor = "#FFFFFF"]
"2" [label = "Start", shape = "circle", style = "rounded,filled", fontcolor = "chartreuse4", color = "chartreuse4", tooltip = "ARTIFICIAL_START
8681", penwidth = "1.5", fixedsize = "FALSE", fontname = "Arial", fontsize = "10", fillcolor = "#FFFFFF"]
"3" [label = "flipCard
3457", shape = "rectangle", style = "rounded,filled", fontcolor = "black", color = "grey", tooltip = "flipCard
3457", penwidth = "1.5", fixedsize = "FALSE", fontname = "Arial", fontsize = "10", fillcolor = "#FFF7FB"]
"4" [label = "move
29954", shape = "rectangle", style = "rounded,filled", fontcolor = "white", color = "grey", tooltip = "move
29954", penwidth = "1.5", fixedsize = "FALSE", fontname = "Arial", fontsize = "10", fillcolor = "#034E7B"]
"5" [label = "openPopup
2581", shape = "rectangle", style = "rounded,filled", fontcolor = "black", color = "grey", tooltip = "openPopup
2581", penwidth = "1.5", fixedsize = "FALSE", fontname = "Arial", fontsize = "10", fillcolor = "#FFF7FB"]
"6" [label = "openTopic
3370", shape = "rectangle", style = "rounded,filled", fontcolor = "black", color = "grey", tooltip = "openTopic
3370", penwidth = "1.5", fixedsize = "FALSE", fontname = "Arial", fontsize = "10", fillcolor = "#FFF7FB"]
"2"->"3" [label = "1538", penwidth = "1.32182464950827", color = "dodgerblue4", fontname = "Arial", fontsize = "10", weight = "1", constraint = "TRUE"]
"2"->"4" [label = "7139", penwidth = "2.49382716049383", color = "dodgerblue4", fontname = "Arial", fontsize = "10", weight = "1", constraint = "TRUE"]
"2"->"5" [label = "1", penwidth = "3", color = "red", fontname = "Arial", fontsize = "10", weight = "1", constraint = "TRUE"]
"2"->"6" [label = "3", penwidth = "3", color = "red", fontname = "Arial", fontsize = "10", weight = "1", constraint = "TRUE"]
"3"->"1" [label = "586", penwidth = "1.12261979493618", color = "dodgerblue4", fontname = "Arial", fontsize = "10", weight = "1", constraint = "TRUE"]
"3"->"4" [label = "1703", penwidth = "1.35635070098347", color = "dodgerblue4", fontname = "Arial", fontsize = "10", weight = "1", constraint = "TRUE"]
"3"->"5" [label = "2", penwidth = "3", color = "red", fontname = "Arial", fontsize = "10", weight = "1", constraint = "TRUE"]
"3"->"6" [label = "1166", penwidth = "1.24398409709144", color = "dodgerblue4", fontname = "Arial", fontsize = "10", weight = "1", constraint = "TRUE"]
"4"->"1" [label = "7222", penwidth = "2.51119481062984", color = "dodgerblue4", fontname = "Arial", fontsize = "10", weight = "1", constraint = "TRUE"]
"4"->"3" [label = "1919", penwidth = "1.40154844109646", color = "dodgerblue4", fontname = "Arial", fontsize = "10", weight = "1", constraint = "TRUE"]
"4"->"4" [label = "19116", penwidth = "5", color = "dodgerblue4", fontname = "Arial", fontsize = "10", weight = "1", constraint = "TRUE"]
"4"->"5" [label = "470", penwidth = "1.09834693450513", color = "dodgerblue4", fontname = "Arial", fontsize = "10", weight = "1", constraint = "TRUE"]
"4"->"6" [label = "1227", penwidth = "1.25674827369743", color = "dodgerblue4", fontname = "Arial", fontsize = "10", weight = "1", constraint = "TRUE"]
"5"->"1" [label = "195", penwidth = "1.04080351537979", color = "dodgerblue4", fontname = "Arial", fontsize = "10", weight = "1", constraint = "TRUE"]
"5"->"4" [label = "685", penwidth = "1.1433354258213", color = "dodgerblue4", fontname = "Arial", fontsize = "10", weight = "1", constraint = "TRUE"]
"5"->"5" [label = "1514", penwidth = "1.3168026783846", color = "dodgerblue4", fontname = "Arial", fontsize = "10", weight = "1", constraint = "TRUE"]
"5"->"6" [label = "187", penwidth = "1.03912952500523", color = "dodgerblue4", fontname = "Arial", fontsize = "10", weight = "1", constraint = "TRUE"]
"6"->"1" [label = "678", penwidth = "1.14187068424357", color = "dodgerblue4", fontname = "Arial", fontsize = "10", weight = "1", constraint = "TRUE"]
"6"->"4" [label = "1311", penwidth = "1.27432517263026", color = "dodgerblue4", fontname = "Arial", fontsize = "10", weight = "1", constraint = "TRUE"]
"6"->"5" [label = "594", penwidth = "1.12429378531073", color = "dodgerblue4", fontname = "Arial", fontsize = "10", weight = "1", constraint = "TRUE"]
"6"->"6" [label = "787", penwidth = "1.16467880309688", color = "dodgerblue4", fontname = "Arial", fontsize = "10", weight = "1", constraint = "TRUE"]
}