From 00a6f19cf6ee5766f6f67172fe0ec5d9bca95e09 Mon Sep 17 00:00:00 2001 From: nwickel Date: Thu, 28 Sep 2023 15:04:59 +0200 Subject: [PATCH] Worked on second presentation, added problems and plots, figs, etc. --- code/03_plots_8o8m.R | 18 ++++--- code/03_plots_haum.R | 10 ++-- code/04_modeling_haum.R | 57 +++++++++++++--------- code/check_traces.R | 101 +++++++++++++++++++++++++++++++++++++++ code/process_map_haum.gv | 61 +++++++++++++++++++++++ 5 files changed, 216 insertions(+), 31 deletions(-) create mode 100644 code/check_traces.R create mode 100644 code/process_map_haum.gv diff --git a/code/03_plots_8o8m.R b/code/03_plots_8o8m.R index 05a6e7c..feeee2a 100644 --- a/code/03_plots_8o8m.R +++ b/code/03_plots_8o8m.R @@ -32,13 +32,17 @@ text(tmp, counts_artwork + 1500, c(datart$artwork)) # more interesting per museum in this case... counts_artwork <- aggregate(trace ~ artwork + folder, datlogs, length) pdf("../figures/counts_artwork_8o8m.pdf", width = 20, height = 6, pointsize = 10) -barchart(trace ~ artwork | folder, counts_artwork, ylab = "", layout = c(5, 1)) +barchart(trace ~ artwork | folder, counts_artwork, ylab = "", layout = c(5, 1), + border = "transparent", col = "#0072B2") dev.off() ### Dwell times/duration +set.seed(1033) + pdf("../figures/duration_8o8m.pdf", width = 5, height = 5, pointsize = 10) -bwplot(I(duration/1000/60) ~ as.factor(event), datlogs, ylab = "Duration in sec") +bwplot(I(duration/1000/60) ~ as.factor(event), datlogs[sample(nrow(datlogs), 100000), ], + ylab = "Duration in min") dev.off() ### Are there certain areas of the table that are touched most often? @@ -82,13 +86,15 @@ dev.off() ### How many visitors per day # Cases per day -datcase <- aggregate(case ~ date, datlogs, function(x) length(unique(x))) +datcase <- aggregate(case ~ date + folder, datlogs, function(x) length(unique(x))) -pdf("../figures/cases_per_day_8o8m.pdf", width = 9, height = 5, pointsize = 10) -par(mai = c(.6, .6, .1, .1), mgp = c(2.4, 1, 0)) -plot(case ~ date, datcase, type = "h", col = "#0072B2", lwd = 2) +pdf("../figures/cases_per_day_8o8m.pdf", width = 20, height = 6, pointsize = 10) +barchart(case ~ date | folder, datcase, horizontal = F, + scales = list(x = list(rot = 90, at = seq(1, 122, 10)), y = list(rot = 90)), + border = "transparent", col = "#0072B2") dev.off() + ### Other stuff ## weird behavior of timeMs diff --git a/code/03_plots_haum.R b/code/03_plots_haum.R index b2cf07e..b2553fe 100644 --- a/code/03_plots_haum.R +++ b/code/03_plots_haum.R @@ -9,7 +9,7 @@ plot(1:10, col = cc, pch = 16, cex = 2) # Read data datlogs <- read.table("../data/haum/event_logfiles_metadata_2023-09-23_01-31-30.csv", sep = ";", header = TRUE) -datlogs$date <- as.Date(datlogs$date.start) +datlogs$date <- as.Date(datlogs$date) datlogs$date.start <- as.POSIXct(datlogs$date.start) datlogs$date.stop <- as.POSIXct(datlogs$date.stop) datlogs$artwork <- sprintf("%03d", datlogs$artwork) @@ -29,7 +29,8 @@ names(counts_artwork) <- datart$title pdf("../figures/counts_artwork.pdf", width = 20, height = 10, pointsize = 10) par(mai = c(5, .6, .1, .1)) -tmp <- barplot(counts_artwork, las = 2, ylim = c(0, 60000), border = "white") +tmp <- barplot(counts_artwork, las = 2, ylim = c(0, 60000), + border = "white", col = "#0072B2") text(tmp, counts_artwork + 1000, c(datart$artwork, "504", "505")) dev.off() @@ -60,8 +61,11 @@ xyplot(ds ~ as.factor(event), datagg, groups = artwork) bwplot(duration ~ as.factor(event), datlogs) # in min +set.seed(1027) + pdf("../figures/duration.pdf", width = 5, height = 5, pointsize = 10) -bwplot(I(duration/1000/60) ~ as.factor(event), datlogs, ylab = "Duration in sec") +bwplot(I(duration/1000/60) ~ as.factor(event), datlogs[sample(nrow(datlogs), 100000), ], + ylab = "Duration in min") dev.off() datlogs$daydiff <- c(NA, diff(datlogs$date)) diff --git a/code/04_modeling_haum.R b/code/04_modeling_haum.R index 1a4e0f0..51c92d2 100644 --- a/code/04_modeling_haum.R +++ b/code/04_modeling_haum.R @@ -1,26 +1,42 @@ -#' --- -#' title: "Modelling log files with Process Mining" -#' author: "Nora Wickelmaier" -#' date: "`r Sys.Date()`" -#' output: -#' html_document: -#' toc: true -#' toc_float: true -#' pdf_document: -#' toc: true -#' number_sections: true -#' geometry: margin = 2.5cm -#' --- - # setwd("C:/Users/nwickelmaier/Nextcloud/Documents/MDS/2023ss/60100_master_thesis/code") -#' # Read data +# Read data -dat <- read.table("../data/haum/event_logfiles_small.csv", sep = ";", header = TRUE) -dat$date.start <- as.POSIXct(dat$date.start) -dat$date.stop <- as.POSIXct(dat$date.stop) +# dat0 <- read.table("../data/haum/event_logfiles_metadata_2023-09-23_01-31-30.csv", +# sep = ";", header = TRUE) +dat0 <- read.table("../data/haum/event_logfiles_small_metadata_2023-09-25_09-56-34.csv", + sep = ";", header = TRUE) +dat0$date <- as.Date(dat0$date) +dat0$date.start <- as.POSIXct(dat0$date.start) +dat0$date.stop <- as.POSIXct(dat0$date.stop) +dat0$artwork <- sprintf("%03d", dat0$artwork) -#' # Creating event logs +# TODO: Write a functions that closes events spanning different log files +# OR: Remove openTopic and OpenPopup events that do not start with a +# flipCard (AND openPopup events without openTopic event beforehand) + +table(dat0[!duplicated(dat0$trace), "event"]) +# flipCard move openPopup openTopic +# 45528 247718 981 3457 +proportions(table(dat0[!duplicated(dat0$trace), "event"])) + +tmp <- dat0[!duplicated(dat0$trace) & dat0$event %in% c("openTopic", + "openPopup"), ] + +dat <- dat0 +i <- 1 +stop <- 1 + +while (stop > 0) { + stop <- sum(!duplicated(dat$trace) & dat$event %in% c("openTopic", "openPopup")) + dat <- dat[!(!duplicated(dat$trace) & + dat$event %in% c("openTopic", "openPopup")), ] + print(i) + i <- i + 1 + print(table(dat[!duplicated(dat$trace), "event"])) +} + +# Creating event logs library(bupaverse) @@ -86,9 +102,6 @@ animate_process(elog[elog$artwork %in% c("080", "054"), ], range = c("black", "gray")))) # --> not sure, yet, how to interpret this... - - - alog080 <- activitylog(dat[dat$artwork %in% "080", ], #case_id = "case", case_id = "trace", diff --git a/code/check_traces.R b/code/check_traces.R new file mode 100644 index 0000000..cd7be08 --- /dev/null +++ b/code/check_traces.R @@ -0,0 +1,101 @@ +# setwd("C:/Users/nwickelmaier/Nextcloud/Documents/MDS/2023ss/60100_master_thesis/code") + +# Read data + +dat <- read.table("../data/haum/event_logfiles_metadata_2023-09-23_01-31-30.csv", + sep = ";", header = TRUE) +# dat <- read.table("../data/haum/event_logfiles_small_metadata_2023-09-25_09-56-34.csv", +# sep = ";", header = TRUE) +dat$date <- as.Date(dat$date) +dat$date.start <- as.POSIXct(dat$date.start) +dat$date.stop <- as.POSIXct(dat$date.stop) +dat$artwork <- sprintf("%03d", dat$artwork) + +library(bupaverse) + +names(dat)[names(dat) %in% c("date.start", "date.stop")] <- c("start", "complete") + +create_pdf <- function(trace, folder = "../figures/processmaps/") { + alog <- activitylog(dat[which(dat$trace == trace), ], + case_id = "trace", + activity_id = "event", + resource_id = "artwork", + timestamps = c("start", "complete")) + + map <- process_map(alog) + g <- DiagrammeR::grViz(map$x$diagram) |> DiagrammeRsvg::export_svg() |> charToRaw() + rsvg::rsvg_pdf(g, paste0(folder, trace, ".pdf")) +} + + + +find_trace <- function(trace) { + + alog <- activitylog(dat[which(dat$trace == trace), ], + case_id = "trace", + activity_id = "event", + resource_id = "artwork", + timestamps = c("start", "complete")) + + map <- process_map(alog) + d <- strsplit(map$x$diagram, "\n")[[1]] + o <- grep("^.{6}[[]label", d, value = TRUE) + p <- grep("^.{1}[1-6].->", d, value = TRUE) + num_ot <- gsub("^.{3}([1-6]).*", "\\1", grep("openTopic", o, value = TRUE)) + num_op <- gsub("^.{3}([1-6]).*", "\\1", grep("openPopup", o, value = TRUE)) + rel_path <- grep("^.{1}[2].->.[1-6]", p, value = TRUE) + rel_num <- gsub("^.{1}[2].->.([1-6]).*" , "\\1", rel_path) + num_fc <- gsub("^.{3}([1-6]).*", "\\1", grep("flipCard", o, value = TRUE)) + if (length(num_fc) > 0) { + rel_path_fc <- grep(paste0("^.{1}[", num_fc, "].->.[1-6]"), p, value = TRUE) + rel_num_fc <- gsub(paste0("^.{1}[", num_fc, "].->.([1-6]).*"), "\\1", rel_path_fc) + if (any(c(num_ot, num_op) %in% rel_num) | any(num_op == rel_num_fc)) { + trace + } + } else { + if (any(c(num_ot, num_op) %in% rel_num)) { + trace + } + } +} + +ctrace <- pbapply::pbsapply(unique(dat$trace), find_trace) + +unlist(ctrace) +length(unlist(ctrace)) + + +# create plots +for (trace in unlist(ctrace)) { + create_pdf(trace) +} + + + +alog <- activitylog(dat, + case_id = "trace", + activity_id = "event", + resource_id = "artwork", + timestamps = c("start", "complete")) + +map <- process_map(alog) +g <- DiagrammeR::grViz(map$x$diagram) |> DiagrammeRsvg::export_svg() |> charToRaw() +rsvg::rsvg_pdf(g, "../figures/processmap_haum.pdf", width = 10, height = 5) + +# adjusted colors +writeLines(map$x$diagram, "process_map_haum.gv") +g <- DiagrammeR::grViz("process_map_haum.gv") |> DiagrammeRsvg::export_svg() |> charToRaw() +rsvg::rsvg_pdf(g, "../figures/processmap_haum_adjusted.pdf", width = 10, height = 5) + + +alog <- activitylog(dat[!dat$trace %in% unlist(ctrace), ], + case_id = "trace", + activity_id = "event", + resource_id = "artwork", + timestamps = c("start", "complete")) + +map <- process_map(alog) +g <- DiagrammeR::grViz(map$x$diagram) |> DiagrammeRsvg::export_svg() |> charToRaw() +rsvg::rsvg_pdf(g, "../figures/processmap_haum_cleaned.pdf", width = 12, height = 5) + + diff --git a/code/process_map_haum.gv b/code/process_map_haum.gv new file mode 100644 index 0000000..fcfafd5 --- /dev/null +++ b/code/process_map_haum.gv @@ -0,0 +1,61 @@ +digraph { + +graph [layout = "dot", + outputorder = "edgesfirst", + bgcolor = "white", + rankdir = "LR"] + +node [fontname = "Helvetica", + fontsize = "10", + shape = "circle", + fixedsize = "true", + width = "0.5", + style = "filled", + fillcolor = "aliceblue", + color = "gray70", + fontcolor = "gray50"] + +edge [fontname = "Helvetica", + fontsize = "8", + weight = "1.5", + color = "gray80", + arrowsize = "0.5"] + + "1" [label = "End", shape = "circle", style = "rounded,filled", fontcolor = "brown4", color = "brown4", tooltip = "ARTIFICIAL_END +8681", penwidth = "1.5", fixedsize = "FALSE", fontname = "Arial", fontsize = "10", fillcolor = "#FFFFFF"] + "2" [label = "Start", shape = "circle", style = "rounded,filled", fontcolor = "chartreuse4", color = "chartreuse4", tooltip = "ARTIFICIAL_START +8681", penwidth = "1.5", fixedsize = "FALSE", fontname = "Arial", fontsize = "10", fillcolor = "#FFFFFF"] + "3" [label = "flipCard +3457", shape = "rectangle", style = "rounded,filled", fontcolor = "black", color = "grey", tooltip = "flipCard +3457", penwidth = "1.5", fixedsize = "FALSE", fontname = "Arial", fontsize = "10", fillcolor = "#FFF7FB"] + "4" [label = "move +29954", shape = "rectangle", style = "rounded,filled", fontcolor = "white", color = "grey", tooltip = "move +29954", penwidth = "1.5", fixedsize = "FALSE", fontname = "Arial", fontsize = "10", fillcolor = "#034E7B"] + "5" [label = "openPopup +2581", shape = "rectangle", style = "rounded,filled", fontcolor = "black", color = "grey", tooltip = "openPopup +2581", penwidth = "1.5", fixedsize = "FALSE", fontname = "Arial", fontsize = "10", fillcolor = "#FFF7FB"] + "6" [label = "openTopic +3370", shape = "rectangle", style = "rounded,filled", fontcolor = "black", color = "grey", tooltip = "openTopic +3370", penwidth = "1.5", fixedsize = "FALSE", fontname = "Arial", fontsize = "10", fillcolor = "#FFF7FB"] +"2"->"3" [label = "1538", penwidth = "1.32182464950827", color = "dodgerblue4", fontname = "Arial", fontsize = "10", weight = "1", constraint = "TRUE"] +"2"->"4" [label = "7139", penwidth = "2.49382716049383", color = "dodgerblue4", fontname = "Arial", fontsize = "10", weight = "1", constraint = "TRUE"] +"2"->"5" [label = "1", penwidth = "3", color = "red", fontname = "Arial", fontsize = "10", weight = "1", constraint = "TRUE"] +"2"->"6" [label = "3", penwidth = "3", color = "red", fontname = "Arial", fontsize = "10", weight = "1", constraint = "TRUE"] +"3"->"1" [label = "586", penwidth = "1.12261979493618", color = "dodgerblue4", fontname = "Arial", fontsize = "10", weight = "1", constraint = "TRUE"] +"3"->"4" [label = "1703", penwidth = "1.35635070098347", color = "dodgerblue4", fontname = "Arial", fontsize = "10", weight = "1", constraint = "TRUE"] +"3"->"5" [label = "2", penwidth = "3", color = "red", fontname = "Arial", fontsize = "10", weight = "1", constraint = "TRUE"] +"3"->"6" [label = "1166", penwidth = "1.24398409709144", color = "dodgerblue4", fontname = "Arial", fontsize = "10", weight = "1", constraint = "TRUE"] +"4"->"1" [label = "7222", penwidth = "2.51119481062984", color = "dodgerblue4", fontname = "Arial", fontsize = "10", weight = "1", constraint = "TRUE"] +"4"->"3" [label = "1919", penwidth = "1.40154844109646", color = "dodgerblue4", fontname = "Arial", fontsize = "10", weight = "1", constraint = "TRUE"] +"4"->"4" [label = "19116", penwidth = "5", color = "dodgerblue4", fontname = "Arial", fontsize = "10", weight = "1", constraint = "TRUE"] +"4"->"5" [label = "470", penwidth = "1.09834693450513", color = "dodgerblue4", fontname = "Arial", fontsize = "10", weight = "1", constraint = "TRUE"] +"4"->"6" [label = "1227", penwidth = "1.25674827369743", color = "dodgerblue4", fontname = "Arial", fontsize = "10", weight = "1", constraint = "TRUE"] +"5"->"1" [label = "195", penwidth = "1.04080351537979", color = "dodgerblue4", fontname = "Arial", fontsize = "10", weight = "1", constraint = "TRUE"] +"5"->"4" [label = "685", penwidth = "1.1433354258213", color = "dodgerblue4", fontname = "Arial", fontsize = "10", weight = "1", constraint = "TRUE"] +"5"->"5" [label = "1514", penwidth = "1.3168026783846", color = "dodgerblue4", fontname = "Arial", fontsize = "10", weight = "1", constraint = "TRUE"] +"5"->"6" [label = "187", penwidth = "1.03912952500523", color = "dodgerblue4", fontname = "Arial", fontsize = "10", weight = "1", constraint = "TRUE"] +"6"->"1" [label = "678", penwidth = "1.14187068424357", color = "dodgerblue4", fontname = "Arial", fontsize = "10", weight = "1", constraint = "TRUE"] +"6"->"4" [label = "1311", penwidth = "1.27432517263026", color = "dodgerblue4", fontname = "Arial", fontsize = "10", weight = "1", constraint = "TRUE"] +"6"->"5" [label = "594", penwidth = "1.12429378531073", color = "dodgerblue4", fontname = "Arial", fontsize = "10", weight = "1", constraint = "TRUE"] +"6"->"6" [label = "787", penwidth = "1.16467880309688", color = "dodgerblue4", fontname = "Arial", fontsize = "10", weight = "1", constraint = "TRUE"] +}