Worked on second presentation, added problems and plots, figs, etc.
This commit is contained in:
parent
a3c731c84f
commit
00a6f19cf6
@ -32,13 +32,17 @@ text(tmp, counts_artwork + 1500, c(datart$artwork))
|
|||||||
# more interesting per museum in this case...
|
# more interesting per museum in this case...
|
||||||
counts_artwork <- aggregate(trace ~ artwork + folder, datlogs, length)
|
counts_artwork <- aggregate(trace ~ artwork + folder, datlogs, length)
|
||||||
pdf("../figures/counts_artwork_8o8m.pdf", width = 20, height = 6, pointsize = 10)
|
pdf("../figures/counts_artwork_8o8m.pdf", width = 20, height = 6, pointsize = 10)
|
||||||
barchart(trace ~ artwork | folder, counts_artwork, ylab = "", layout = c(5, 1))
|
barchart(trace ~ artwork | folder, counts_artwork, ylab = "", layout = c(5, 1),
|
||||||
|
border = "transparent", col = "#0072B2")
|
||||||
dev.off()
|
dev.off()
|
||||||
|
|
||||||
|
|
||||||
### Dwell times/duration
|
### Dwell times/duration
|
||||||
|
set.seed(1033)
|
||||||
|
|
||||||
pdf("../figures/duration_8o8m.pdf", width = 5, height = 5, pointsize = 10)
|
pdf("../figures/duration_8o8m.pdf", width = 5, height = 5, pointsize = 10)
|
||||||
bwplot(I(duration/1000/60) ~ as.factor(event), datlogs, ylab = "Duration in sec")
|
bwplot(I(duration/1000/60) ~ as.factor(event), datlogs[sample(nrow(datlogs), 100000), ],
|
||||||
|
ylab = "Duration in min")
|
||||||
dev.off()
|
dev.off()
|
||||||
|
|
||||||
### Are there certain areas of the table that are touched most often?
|
### Are there certain areas of the table that are touched most often?
|
||||||
@ -82,13 +86,15 @@ dev.off()
|
|||||||
### How many visitors per day
|
### How many visitors per day
|
||||||
|
|
||||||
# Cases per day
|
# Cases per day
|
||||||
datcase <- aggregate(case ~ date, datlogs, function(x) length(unique(x)))
|
datcase <- aggregate(case ~ date + folder, datlogs, function(x) length(unique(x)))
|
||||||
|
|
||||||
pdf("../figures/cases_per_day_8o8m.pdf", width = 9, height = 5, pointsize = 10)
|
pdf("../figures/cases_per_day_8o8m.pdf", width = 20, height = 6, pointsize = 10)
|
||||||
par(mai = c(.6, .6, .1, .1), mgp = c(2.4, 1, 0))
|
barchart(case ~ date | folder, datcase, horizontal = F,
|
||||||
plot(case ~ date, datcase, type = "h", col = "#0072B2", lwd = 2)
|
scales = list(x = list(rot = 90, at = seq(1, 122, 10)), y = list(rot = 90)),
|
||||||
|
border = "transparent", col = "#0072B2")
|
||||||
dev.off()
|
dev.off()
|
||||||
|
|
||||||
|
|
||||||
### Other stuff
|
### Other stuff
|
||||||
|
|
||||||
## weird behavior of timeMs
|
## weird behavior of timeMs
|
||||||
|
@ -9,7 +9,7 @@ plot(1:10, col = cc, pch = 16, cex = 2)
|
|||||||
# Read data
|
# Read data
|
||||||
datlogs <- read.table("../data/haum/event_logfiles_metadata_2023-09-23_01-31-30.csv",
|
datlogs <- read.table("../data/haum/event_logfiles_metadata_2023-09-23_01-31-30.csv",
|
||||||
sep = ";", header = TRUE)
|
sep = ";", header = TRUE)
|
||||||
datlogs$date <- as.Date(datlogs$date.start)
|
datlogs$date <- as.Date(datlogs$date)
|
||||||
datlogs$date.start <- as.POSIXct(datlogs$date.start)
|
datlogs$date.start <- as.POSIXct(datlogs$date.start)
|
||||||
datlogs$date.stop <- as.POSIXct(datlogs$date.stop)
|
datlogs$date.stop <- as.POSIXct(datlogs$date.stop)
|
||||||
datlogs$artwork <- sprintf("%03d", datlogs$artwork)
|
datlogs$artwork <- sprintf("%03d", datlogs$artwork)
|
||||||
@ -29,7 +29,8 @@ names(counts_artwork) <- datart$title
|
|||||||
|
|
||||||
pdf("../figures/counts_artwork.pdf", width = 20, height = 10, pointsize = 10)
|
pdf("../figures/counts_artwork.pdf", width = 20, height = 10, pointsize = 10)
|
||||||
par(mai = c(5, .6, .1, .1))
|
par(mai = c(5, .6, .1, .1))
|
||||||
tmp <- barplot(counts_artwork, las = 2, ylim = c(0, 60000), border = "white")
|
tmp <- barplot(counts_artwork, las = 2, ylim = c(0, 60000),
|
||||||
|
border = "white", col = "#0072B2")
|
||||||
text(tmp, counts_artwork + 1000, c(datart$artwork, "504", "505"))
|
text(tmp, counts_artwork + 1000, c(datart$artwork, "504", "505"))
|
||||||
dev.off()
|
dev.off()
|
||||||
|
|
||||||
@ -60,8 +61,11 @@ xyplot(ds ~ as.factor(event), datagg, groups = artwork)
|
|||||||
bwplot(duration ~ as.factor(event), datlogs)
|
bwplot(duration ~ as.factor(event), datlogs)
|
||||||
# in min
|
# in min
|
||||||
|
|
||||||
|
set.seed(1027)
|
||||||
|
|
||||||
pdf("../figures/duration.pdf", width = 5, height = 5, pointsize = 10)
|
pdf("../figures/duration.pdf", width = 5, height = 5, pointsize = 10)
|
||||||
bwplot(I(duration/1000/60) ~ as.factor(event), datlogs, ylab = "Duration in sec")
|
bwplot(I(duration/1000/60) ~ as.factor(event), datlogs[sample(nrow(datlogs), 100000), ],
|
||||||
|
ylab = "Duration in min")
|
||||||
dev.off()
|
dev.off()
|
||||||
|
|
||||||
datlogs$daydiff <- c(NA, diff(datlogs$date))
|
datlogs$daydiff <- c(NA, diff(datlogs$date))
|
||||||
|
@ -1,26 +1,42 @@
|
|||||||
#' ---
|
|
||||||
#' title: "Modelling log files with Process Mining"
|
|
||||||
#' author: "Nora Wickelmaier"
|
|
||||||
#' date: "`r Sys.Date()`"
|
|
||||||
#' output:
|
|
||||||
#' html_document:
|
|
||||||
#' toc: true
|
|
||||||
#' toc_float: true
|
|
||||||
#' pdf_document:
|
|
||||||
#' toc: true
|
|
||||||
#' number_sections: true
|
|
||||||
#' geometry: margin = 2.5cm
|
|
||||||
#' ---
|
|
||||||
|
|
||||||
# setwd("C:/Users/nwickelmaier/Nextcloud/Documents/MDS/2023ss/60100_master_thesis/code")
|
# setwd("C:/Users/nwickelmaier/Nextcloud/Documents/MDS/2023ss/60100_master_thesis/code")
|
||||||
|
|
||||||
#' # Read data
|
# Read data
|
||||||
|
|
||||||
dat <- read.table("../data/haum/event_logfiles_small.csv", sep = ";", header = TRUE)
|
# dat0 <- read.table("../data/haum/event_logfiles_metadata_2023-09-23_01-31-30.csv",
|
||||||
dat$date.start <- as.POSIXct(dat$date.start)
|
# sep = ";", header = TRUE)
|
||||||
dat$date.stop <- as.POSIXct(dat$date.stop)
|
dat0 <- read.table("../data/haum/event_logfiles_small_metadata_2023-09-25_09-56-34.csv",
|
||||||
|
sep = ";", header = TRUE)
|
||||||
|
dat0$date <- as.Date(dat0$date)
|
||||||
|
dat0$date.start <- as.POSIXct(dat0$date.start)
|
||||||
|
dat0$date.stop <- as.POSIXct(dat0$date.stop)
|
||||||
|
dat0$artwork <- sprintf("%03d", dat0$artwork)
|
||||||
|
|
||||||
#' # Creating event logs
|
# TODO: Write a functions that closes events spanning different log files
|
||||||
|
# OR: Remove openTopic and OpenPopup events that do not start with a
|
||||||
|
# flipCard (AND openPopup events without openTopic event beforehand)
|
||||||
|
|
||||||
|
table(dat0[!duplicated(dat0$trace), "event"])
|
||||||
|
# flipCard move openPopup openTopic
|
||||||
|
# 45528 247718 981 3457
|
||||||
|
proportions(table(dat0[!duplicated(dat0$trace), "event"]))
|
||||||
|
|
||||||
|
tmp <- dat0[!duplicated(dat0$trace) & dat0$event %in% c("openTopic",
|
||||||
|
"openPopup"), ]
|
||||||
|
|
||||||
|
dat <- dat0
|
||||||
|
i <- 1
|
||||||
|
stop <- 1
|
||||||
|
|
||||||
|
while (stop > 0) {
|
||||||
|
stop <- sum(!duplicated(dat$trace) & dat$event %in% c("openTopic", "openPopup"))
|
||||||
|
dat <- dat[!(!duplicated(dat$trace) &
|
||||||
|
dat$event %in% c("openTopic", "openPopup")), ]
|
||||||
|
print(i)
|
||||||
|
i <- i + 1
|
||||||
|
print(table(dat[!duplicated(dat$trace), "event"]))
|
||||||
|
}
|
||||||
|
|
||||||
|
# Creating event logs
|
||||||
|
|
||||||
library(bupaverse)
|
library(bupaverse)
|
||||||
|
|
||||||
@ -86,9 +102,6 @@ animate_process(elog[elog$artwork %in% c("080", "054"), ],
|
|||||||
range = c("black", "gray"))))
|
range = c("black", "gray"))))
|
||||||
# --> not sure, yet, how to interpret this...
|
# --> not sure, yet, how to interpret this...
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
alog080 <- activitylog(dat[dat$artwork %in% "080", ],
|
alog080 <- activitylog(dat[dat$artwork %in% "080", ],
|
||||||
#case_id = "case",
|
#case_id = "case",
|
||||||
case_id = "trace",
|
case_id = "trace",
|
||||||
|
101
code/check_traces.R
Normal file
101
code/check_traces.R
Normal file
@ -0,0 +1,101 @@
|
|||||||
|
# setwd("C:/Users/nwickelmaier/Nextcloud/Documents/MDS/2023ss/60100_master_thesis/code")
|
||||||
|
|
||||||
|
# Read data
|
||||||
|
|
||||||
|
dat <- read.table("../data/haum/event_logfiles_metadata_2023-09-23_01-31-30.csv",
|
||||||
|
sep = ";", header = TRUE)
|
||||||
|
# dat <- read.table("../data/haum/event_logfiles_small_metadata_2023-09-25_09-56-34.csv",
|
||||||
|
# sep = ";", header = TRUE)
|
||||||
|
dat$date <- as.Date(dat$date)
|
||||||
|
dat$date.start <- as.POSIXct(dat$date.start)
|
||||||
|
dat$date.stop <- as.POSIXct(dat$date.stop)
|
||||||
|
dat$artwork <- sprintf("%03d", dat$artwork)
|
||||||
|
|
||||||
|
library(bupaverse)
|
||||||
|
|
||||||
|
names(dat)[names(dat) %in% c("date.start", "date.stop")] <- c("start", "complete")
|
||||||
|
|
||||||
|
create_pdf <- function(trace, folder = "../figures/processmaps/") {
|
||||||
|
alog <- activitylog(dat[which(dat$trace == trace), ],
|
||||||
|
case_id = "trace",
|
||||||
|
activity_id = "event",
|
||||||
|
resource_id = "artwork",
|
||||||
|
timestamps = c("start", "complete"))
|
||||||
|
|
||||||
|
map <- process_map(alog)
|
||||||
|
g <- DiagrammeR::grViz(map$x$diagram) |> DiagrammeRsvg::export_svg() |> charToRaw()
|
||||||
|
rsvg::rsvg_pdf(g, paste0(folder, trace, ".pdf"))
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
find_trace <- function(trace) {
|
||||||
|
|
||||||
|
alog <- activitylog(dat[which(dat$trace == trace), ],
|
||||||
|
case_id = "trace",
|
||||||
|
activity_id = "event",
|
||||||
|
resource_id = "artwork",
|
||||||
|
timestamps = c("start", "complete"))
|
||||||
|
|
||||||
|
map <- process_map(alog)
|
||||||
|
d <- strsplit(map$x$diagram, "\n")[[1]]
|
||||||
|
o <- grep("^.{6}[[]label", d, value = TRUE)
|
||||||
|
p <- grep("^.{1}[1-6].->", d, value = TRUE)
|
||||||
|
num_ot <- gsub("^.{3}([1-6]).*", "\\1", grep("openTopic", o, value = TRUE))
|
||||||
|
num_op <- gsub("^.{3}([1-6]).*", "\\1", grep("openPopup", o, value = TRUE))
|
||||||
|
rel_path <- grep("^.{1}[2].->.[1-6]", p, value = TRUE)
|
||||||
|
rel_num <- gsub("^.{1}[2].->.([1-6]).*" , "\\1", rel_path)
|
||||||
|
num_fc <- gsub("^.{3}([1-6]).*", "\\1", grep("flipCard", o, value = TRUE))
|
||||||
|
if (length(num_fc) > 0) {
|
||||||
|
rel_path_fc <- grep(paste0("^.{1}[", num_fc, "].->.[1-6]"), p, value = TRUE)
|
||||||
|
rel_num_fc <- gsub(paste0("^.{1}[", num_fc, "].->.([1-6]).*"), "\\1", rel_path_fc)
|
||||||
|
if (any(c(num_ot, num_op) %in% rel_num) | any(num_op == rel_num_fc)) {
|
||||||
|
trace
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
if (any(c(num_ot, num_op) %in% rel_num)) {
|
||||||
|
trace
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
ctrace <- pbapply::pbsapply(unique(dat$trace), find_trace)
|
||||||
|
|
||||||
|
unlist(ctrace)
|
||||||
|
length(unlist(ctrace))
|
||||||
|
|
||||||
|
|
||||||
|
# create plots
|
||||||
|
for (trace in unlist(ctrace)) {
|
||||||
|
create_pdf(trace)
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
alog <- activitylog(dat,
|
||||||
|
case_id = "trace",
|
||||||
|
activity_id = "event",
|
||||||
|
resource_id = "artwork",
|
||||||
|
timestamps = c("start", "complete"))
|
||||||
|
|
||||||
|
map <- process_map(alog)
|
||||||
|
g <- DiagrammeR::grViz(map$x$diagram) |> DiagrammeRsvg::export_svg() |> charToRaw()
|
||||||
|
rsvg::rsvg_pdf(g, "../figures/processmap_haum.pdf", width = 10, height = 5)
|
||||||
|
|
||||||
|
# adjusted colors
|
||||||
|
writeLines(map$x$diagram, "process_map_haum.gv")
|
||||||
|
g <- DiagrammeR::grViz("process_map_haum.gv") |> DiagrammeRsvg::export_svg() |> charToRaw()
|
||||||
|
rsvg::rsvg_pdf(g, "../figures/processmap_haum_adjusted.pdf", width = 10, height = 5)
|
||||||
|
|
||||||
|
|
||||||
|
alog <- activitylog(dat[!dat$trace %in% unlist(ctrace), ],
|
||||||
|
case_id = "trace",
|
||||||
|
activity_id = "event",
|
||||||
|
resource_id = "artwork",
|
||||||
|
timestamps = c("start", "complete"))
|
||||||
|
|
||||||
|
map <- process_map(alog)
|
||||||
|
g <- DiagrammeR::grViz(map$x$diagram) |> DiagrammeRsvg::export_svg() |> charToRaw()
|
||||||
|
rsvg::rsvg_pdf(g, "../figures/processmap_haum_cleaned.pdf", width = 12, height = 5)
|
||||||
|
|
||||||
|
|
61
code/process_map_haum.gv
Normal file
61
code/process_map_haum.gv
Normal file
@ -0,0 +1,61 @@
|
|||||||
|
digraph {
|
||||||
|
|
||||||
|
graph [layout = "dot",
|
||||||
|
outputorder = "edgesfirst",
|
||||||
|
bgcolor = "white",
|
||||||
|
rankdir = "LR"]
|
||||||
|
|
||||||
|
node [fontname = "Helvetica",
|
||||||
|
fontsize = "10",
|
||||||
|
shape = "circle",
|
||||||
|
fixedsize = "true",
|
||||||
|
width = "0.5",
|
||||||
|
style = "filled",
|
||||||
|
fillcolor = "aliceblue",
|
||||||
|
color = "gray70",
|
||||||
|
fontcolor = "gray50"]
|
||||||
|
|
||||||
|
edge [fontname = "Helvetica",
|
||||||
|
fontsize = "8",
|
||||||
|
weight = "1.5",
|
||||||
|
color = "gray80",
|
||||||
|
arrowsize = "0.5"]
|
||||||
|
|
||||||
|
"1" [label = "End", shape = "circle", style = "rounded,filled", fontcolor = "brown4", color = "brown4", tooltip = "ARTIFICIAL_END
|
||||||
|
8681", penwidth = "1.5", fixedsize = "FALSE", fontname = "Arial", fontsize = "10", fillcolor = "#FFFFFF"]
|
||||||
|
"2" [label = "Start", shape = "circle", style = "rounded,filled", fontcolor = "chartreuse4", color = "chartreuse4", tooltip = "ARTIFICIAL_START
|
||||||
|
8681", penwidth = "1.5", fixedsize = "FALSE", fontname = "Arial", fontsize = "10", fillcolor = "#FFFFFF"]
|
||||||
|
"3" [label = "flipCard
|
||||||
|
3457", shape = "rectangle", style = "rounded,filled", fontcolor = "black", color = "grey", tooltip = "flipCard
|
||||||
|
3457", penwidth = "1.5", fixedsize = "FALSE", fontname = "Arial", fontsize = "10", fillcolor = "#FFF7FB"]
|
||||||
|
"4" [label = "move
|
||||||
|
29954", shape = "rectangle", style = "rounded,filled", fontcolor = "white", color = "grey", tooltip = "move
|
||||||
|
29954", penwidth = "1.5", fixedsize = "FALSE", fontname = "Arial", fontsize = "10", fillcolor = "#034E7B"]
|
||||||
|
"5" [label = "openPopup
|
||||||
|
2581", shape = "rectangle", style = "rounded,filled", fontcolor = "black", color = "grey", tooltip = "openPopup
|
||||||
|
2581", penwidth = "1.5", fixedsize = "FALSE", fontname = "Arial", fontsize = "10", fillcolor = "#FFF7FB"]
|
||||||
|
"6" [label = "openTopic
|
||||||
|
3370", shape = "rectangle", style = "rounded,filled", fontcolor = "black", color = "grey", tooltip = "openTopic
|
||||||
|
3370", penwidth = "1.5", fixedsize = "FALSE", fontname = "Arial", fontsize = "10", fillcolor = "#FFF7FB"]
|
||||||
|
"2"->"3" [label = "1538", penwidth = "1.32182464950827", color = "dodgerblue4", fontname = "Arial", fontsize = "10", weight = "1", constraint = "TRUE"]
|
||||||
|
"2"->"4" [label = "7139", penwidth = "2.49382716049383", color = "dodgerblue4", fontname = "Arial", fontsize = "10", weight = "1", constraint = "TRUE"]
|
||||||
|
"2"->"5" [label = "1", penwidth = "3", color = "red", fontname = "Arial", fontsize = "10", weight = "1", constraint = "TRUE"]
|
||||||
|
"2"->"6" [label = "3", penwidth = "3", color = "red", fontname = "Arial", fontsize = "10", weight = "1", constraint = "TRUE"]
|
||||||
|
"3"->"1" [label = "586", penwidth = "1.12261979493618", color = "dodgerblue4", fontname = "Arial", fontsize = "10", weight = "1", constraint = "TRUE"]
|
||||||
|
"3"->"4" [label = "1703", penwidth = "1.35635070098347", color = "dodgerblue4", fontname = "Arial", fontsize = "10", weight = "1", constraint = "TRUE"]
|
||||||
|
"3"->"5" [label = "2", penwidth = "3", color = "red", fontname = "Arial", fontsize = "10", weight = "1", constraint = "TRUE"]
|
||||||
|
"3"->"6" [label = "1166", penwidth = "1.24398409709144", color = "dodgerblue4", fontname = "Arial", fontsize = "10", weight = "1", constraint = "TRUE"]
|
||||||
|
"4"->"1" [label = "7222", penwidth = "2.51119481062984", color = "dodgerblue4", fontname = "Arial", fontsize = "10", weight = "1", constraint = "TRUE"]
|
||||||
|
"4"->"3" [label = "1919", penwidth = "1.40154844109646", color = "dodgerblue4", fontname = "Arial", fontsize = "10", weight = "1", constraint = "TRUE"]
|
||||||
|
"4"->"4" [label = "19116", penwidth = "5", color = "dodgerblue4", fontname = "Arial", fontsize = "10", weight = "1", constraint = "TRUE"]
|
||||||
|
"4"->"5" [label = "470", penwidth = "1.09834693450513", color = "dodgerblue4", fontname = "Arial", fontsize = "10", weight = "1", constraint = "TRUE"]
|
||||||
|
"4"->"6" [label = "1227", penwidth = "1.25674827369743", color = "dodgerblue4", fontname = "Arial", fontsize = "10", weight = "1", constraint = "TRUE"]
|
||||||
|
"5"->"1" [label = "195", penwidth = "1.04080351537979", color = "dodgerblue4", fontname = "Arial", fontsize = "10", weight = "1", constraint = "TRUE"]
|
||||||
|
"5"->"4" [label = "685", penwidth = "1.1433354258213", color = "dodgerblue4", fontname = "Arial", fontsize = "10", weight = "1", constraint = "TRUE"]
|
||||||
|
"5"->"5" [label = "1514", penwidth = "1.3168026783846", color = "dodgerblue4", fontname = "Arial", fontsize = "10", weight = "1", constraint = "TRUE"]
|
||||||
|
"5"->"6" [label = "187", penwidth = "1.03912952500523", color = "dodgerblue4", fontname = "Arial", fontsize = "10", weight = "1", constraint = "TRUE"]
|
||||||
|
"6"->"1" [label = "678", penwidth = "1.14187068424357", color = "dodgerblue4", fontname = "Arial", fontsize = "10", weight = "1", constraint = "TRUE"]
|
||||||
|
"6"->"4" [label = "1311", penwidth = "1.27432517263026", color = "dodgerblue4", fontname = "Arial", fontsize = "10", weight = "1", constraint = "TRUE"]
|
||||||
|
"6"->"5" [label = "594", penwidth = "1.12429378531073", color = "dodgerblue4", fontname = "Arial", fontsize = "10", weight = "1", constraint = "TRUE"]
|
||||||
|
"6"->"6" [label = "787", penwidth = "1.16467880309688", color = "dodgerblue4", fontname = "Arial", fontsize = "10", weight = "1", constraint = "TRUE"]
|
||||||
|
}
|
Loading…
Reference in New Issue
Block a user