Compare commits
10 Commits
26f90a7fec
...
cba441f08b
Author | SHA1 | Date | |
---|---|---|---|
cba441f08b | |||
24c7967246 | |||
9ad5123747 | |||
4857f08577 | |||
43c7f34645 | |||
b762968774 | |||
c0b24a03aa | |||
bdeb8fb718 | |||
6feea5a251 | |||
b50f52dc6c |
@ -8,8 +8,8 @@
|
||||
# ../data/metadata/feiertage.csv
|
||||
# ../data/metadata/schulferien_2016-2018_NI.csv
|
||||
# ../data/metadata/schulferien_2019-2025_NI.csv
|
||||
# output: raw_logfiles_<timestamp>.csv
|
||||
# event_logfiles_<timestamp>.csv
|
||||
# output: results/raw_logfiles_<timestamp>.csv
|
||||
# results/event_logfiles_<timestamp>.csv
|
||||
#
|
||||
# last mod: 2024-02-23, NW
|
||||
|
||||
@ -29,12 +29,12 @@ folders <- dir(path)
|
||||
datraw <- parse_logfiles(folders, path)
|
||||
# 91 corrupt lines have been found and removed from the data set
|
||||
|
||||
# datraw <- read.table("results/haum/raw_logfiles_2023-10-25_16-20-45.csv",
|
||||
# datraw <- read.table("results/raw_logfiles_2023-10-25_16-20-45.csv",
|
||||
# sep = ";", header = TRUE)
|
||||
|
||||
## Export data
|
||||
|
||||
write.table(datraw, paste0("results/haum/raw_logfiles_", now, ".csv"),
|
||||
write.table(datraw, paste0("results/raw_logfiles_", now, ".csv"),
|
||||
sep = ";", row.names = FALSE)
|
||||
|
||||
#--------------- (2) Create event logs ---------------
|
||||
@ -131,6 +131,6 @@ dat2 <- dat2[order(dat2$fileId.start, dat2$date.start, dat2$timeMs.start), ]
|
||||
|
||||
## Export data
|
||||
|
||||
write.table(dat2, paste0("results/haum/event_logfiles_", now, ".csv"),
|
||||
write.table(dat2, paste0("results/event_logfiles_", now, ".csv"),
|
||||
sep = ";", row.names = FALSE)
|
||||
|
||||
|
@ -9,16 +9,30 @@
|
||||
# (3.4) Artwork sequences
|
||||
# (3.5) Topics
|
||||
#
|
||||
# input: results/haum/event_logfiles_2024-02-21_16-07-33.csv
|
||||
# output:
|
||||
# input: results/event_logfiles_2024-02-21_16-07-33.csv
|
||||
# results/raw_logfiles_2024-02-21_16-07-33.csv
|
||||
# output: results/figures/counts_item.pdf
|
||||
# results/figures/counts_item_firsttouch.pdf
|
||||
# results/figures/duration.pdf
|
||||
# results/figures/heatmap_start.pdf
|
||||
# results/figures/heatmap_stop.pdf
|
||||
# results/figures/cases_per_day.pdf
|
||||
# results/figures/timeMs.pdf
|
||||
# results/figures/xycoord.pdf
|
||||
# results/figures/event-dist.pdf
|
||||
# results/figures/traceexplore_trace-event.pdf
|
||||
# results/figures/ra_trace-event.pdf
|
||||
# results/figures/traceexplore_case-event.pdf
|
||||
# results/figures/bp_tod.pdf
|
||||
# results/figures/bp_wd.pdf
|
||||
# results/figures/bp_wds.pdf
|
||||
# results/figures/bp_corona.pdf
|
||||
# results/figures/traceexplore_case-artwork_often080.pdf
|
||||
#
|
||||
# last mod: 2024-03-13
|
||||
# last mod: 2024-03-22
|
||||
|
||||
# setwd("C:/Users/nwickelmaier/Nextcloud/Documents/MDS/2023ss/60100_master_thesis/analysis/code")
|
||||
|
||||
library(lattice)
|
||||
library(bupaverse)
|
||||
|
||||
# Overall Research Question: How do museum visitors interact with the
|
||||
# artworks presented on the MTT?
|
||||
|
||||
@ -27,7 +41,7 @@ library(bupaverse)
|
||||
|
||||
#--------------- (1) Read data ---------------
|
||||
|
||||
datlogs <- read.table("results/haum/event_logfiles_2024-02-21_16-07-33.csv",
|
||||
datlogs <- read.table("results/event_logfiles_2024-02-21_16-07-33.csv",
|
||||
colClasses = c("character", "character", "POSIXct",
|
||||
"POSIXct", "character", "integer",
|
||||
"numeric", "character", "character",
|
||||
@ -40,7 +54,7 @@ datlogs$event <- factor(datlogs$event, levels = c("move", "flipCard",
|
||||
"openTopic",
|
||||
"openPopup"))
|
||||
|
||||
datraw <- read.table("results/haum/raw_logfiles_2024-02-21_16-07-33.csv",
|
||||
datraw <- read.table("results/raw_logfiles_2024-02-21_16-07-33.csv",
|
||||
sep = ";", header = TRUE)
|
||||
|
||||
# Add weekdays to data frame
|
||||
@ -74,43 +88,263 @@ lattice::dotplot(xtabs( ~ item + topic, datlogs), auto.key = TRUE)
|
||||
mat <- t(as.matrix(xtabs( ~ item + topic, datlogs)))
|
||||
mat[mat == 0] <- NA
|
||||
image(mat, axes = F, col = rainbow(100))
|
||||
heatmap(t(mat))
|
||||
|
||||
|
||||
datlogs$start <- datlogs$date.start
|
||||
datlogs$complete <- datlogs$date.stop
|
||||
|
||||
|
||||
|
||||
#--------------- (2) Descriptives ---------------
|
||||
|
||||
### Which item gets touched most often?
|
||||
|
||||
counts_item <- table(datlogs$item)
|
||||
lattice::barchart(counts_item)
|
||||
|
||||
items <- unique(datlogs$item)
|
||||
#items <- items[!items %in% c("504", "505")]
|
||||
datart <- mtt::extract_artworks(items,
|
||||
paste0(items, ".xml"),
|
||||
"../data/haum/ContentEyevisit/eyevisit_cards_light/")
|
||||
datart <- datart[order(datart$artwork), ]
|
||||
names(counts_item) <- datart$title
|
||||
|
||||
pdf("results/figures/counts_item.pdf", width = 20, height = 10, pointsize = 10)
|
||||
par(mai = c(5, .6, .1, .1))
|
||||
tmp <- barplot(counts_item, las = 2, ylim = c(0, 60000),
|
||||
border = NA, col = "#434F4F")
|
||||
text(tmp, counts_item + 1000, datart$artwork)
|
||||
dev.off()
|
||||
|
||||
### Which item gets touched most often first?
|
||||
|
||||
datcase <- datlogs[!duplicated(datlogs$case), ]
|
||||
counts_case <- table(datcase$item)
|
||||
names(counts_case) <- datart$title
|
||||
tmp <- barplot(counts_case, las = 2, border = "white")
|
||||
text(tmp, counts_case + 100, datart$item)
|
||||
|
||||
counts <- rbind(counts_item, counts_case)
|
||||
|
||||
pdf("results/figures/counts_item_firsttouch.pdf",
|
||||
width = 20, height = 10, pointsize = 10)
|
||||
par(mai = c(5, .6, .1, .1))
|
||||
|
||||
tmp <- barplot(counts, las = 2, border = NA, col = c("#434F4F", "#FF6900"), ylim = c(0, 65000))
|
||||
text(tmp, counts_item + counts_case + 1000, datart$artwork)
|
||||
legend("topleft", c("Total interactions", "First interactions"),
|
||||
col = c("#434F4F", "#FF6900"), pch = 15, bty = "n")
|
||||
dev.off()
|
||||
|
||||
### Which teasers seem to work well?
|
||||
barplot(table(datlogs$topic), las = 2)
|
||||
|
||||
### Dwell times/duration
|
||||
datagg <- aggregate(duration ~ event + item, datlogs, mean)
|
||||
datagg$ds <- datagg$duration / 1000 # in secs
|
||||
|
||||
lattice::bwplot(ds ~ event, datagg)
|
||||
|
||||
# without aggregation
|
||||
lattice::bwplot(duration / 1000 / 60 ~ event, datlogs)
|
||||
# in min
|
||||
|
||||
set.seed(1027)
|
||||
|
||||
pdf("results/figures/duration.pdf", width = 5, height = 5, pointsize = 10)
|
||||
lattice::bwplot(I(duration/1000/60) ~ event, datlogs[sample(nrow(datlogs), 100000), ],
|
||||
ylab = "Duration in min")
|
||||
dev.off()
|
||||
|
||||
### Move events
|
||||
|
||||
datmove <- aggregate(cbind(duration, scaleSize, rotationDegree, distance, x.start,
|
||||
y.start, x.stop, y.stop) ~ item, datlogs,
|
||||
mean)
|
||||
|
||||
hist(log(datlogs$scaleSize))
|
||||
# --> better interpretable on logscale
|
||||
|
||||
plot(y.start ~ x.start, datmove, pch = 16, col = "gray")
|
||||
points(y.start ~ x.start, datmove, col = "#3CB4DC", cex = datmove$scaleSize)
|
||||
|
||||
|
||||
plot(y.start ~ x.start, datmove, type = "n", xlab = "x", ylab = "y",
|
||||
xlim = c(0, 3840), ylim = c(0, 2160))
|
||||
with(datmove, text(x.start, y.start, item, col = "gray", cex = 1.5))
|
||||
with(datmove,
|
||||
arrows(x.start, y.start, x.stop, y.stop, length = 0.07, lwd = 2)
|
||||
)
|
||||
abline(v = c(0, 3840), h = c(0, 2160), col = "#3CB4DC", lwd = 2)
|
||||
|
||||
datscale <- aggregate(scaleSize ~ item, datlogs, max)
|
||||
plot(y.start ~ x.start, datmove, pch = 16, col = "gray")
|
||||
points(y.start ~ x.start, datmove, col = "#3CB4DC", cex = datscale$scaleSize)
|
||||
|
||||
plot(y.start ~ x.start, datmove, type = "n", xlab = "x", ylab = "y",
|
||||
xlim = c(0, 3840), ylim = c(0, 2160))
|
||||
#with(datmove, text(x.stop, y.stop, item))
|
||||
with(datmove, text(x.start, y.start, item))
|
||||
|
||||
|
||||
### Are there certain areas of the table that are touched most often?
|
||||
|
||||
# heatmap
|
||||
cuts <- 100
|
||||
|
||||
datlogs$x.start.cat <- cut(datlogs$x.start, cuts)
|
||||
datlogs$y.start.cat <- cut(datlogs$y.start, cuts)
|
||||
|
||||
tab <- xtabs( ~ x.start.cat + y.start.cat, datlogs)
|
||||
|
||||
colnames(tab) <- paste0("c", 1:cuts)
|
||||
rownames(tab) <- paste0("c", 1:cuts)
|
||||
|
||||
heatmap(tab, Rowv = NA, Colv = NA)
|
||||
|
||||
|
||||
dattrim <- datlogs[datlogs$x.start < 3840 &
|
||||
datlogs$x.start > 0 &
|
||||
datlogs$y.start < 2160 &
|
||||
datlogs$y.start > 0 &
|
||||
datlogs$x.stop < 3840 &
|
||||
datlogs$x.stop > 0 &
|
||||
datlogs$y.stop < 2160 &
|
||||
datlogs$y.stop > 0, ]
|
||||
|
||||
cuts <- 100 # 200, 100, 70, ...
|
||||
|
||||
# start
|
||||
dattrim$x.start.cat <- cut(dattrim$x.start, cuts)
|
||||
dattrim$y.start.cat <- cut(dattrim$y.start, cuts)
|
||||
|
||||
tab.start <- xtabs( ~ x.start.cat + y.start.cat, dattrim)
|
||||
colnames(tab.start) <- NULL
|
||||
rownames(tab.start) <- NULL
|
||||
|
||||
pdf("results/figures/heatmap_start.pdf", width = 5, height = 5, pointsize = 10)
|
||||
heatmap(tab.start, Rowv = NA, Colv = NA)
|
||||
dev.off()
|
||||
|
||||
# stop
|
||||
dattrim$x.stop.cat <- cut(dattrim$x.stop, cuts)
|
||||
dattrim$y.stop.cat <- cut(dattrim$y.stop, cuts)
|
||||
tab.stop <- xtabs( ~ x.stop.cat + y.stop.cat, dattrim)
|
||||
colnames(tab.stop) <- NULL
|
||||
rownames(tab.stop) <- NULL
|
||||
|
||||
pdf("results/figures/heatmap_stop.pdf", width = 5, height = 5, pointsize = 10)
|
||||
heatmap(tab.stop, Rowv = NA, Colv = NA)
|
||||
dev.off()
|
||||
|
||||
### How many visitors per day
|
||||
|
||||
datlogs$date <- as.Date(datlogs$date.start)
|
||||
|
||||
# Interactions per day
|
||||
datint <- aggregate(case ~ date, datlogs, length)
|
||||
plot(datint, type = "h")
|
||||
|
||||
# Cases per day
|
||||
datcase <- aggregate(case ~ date, datlogs, function(x) length(unique(x)))
|
||||
plot(datcase, type = "h")
|
||||
|
||||
# Paths per day
|
||||
datpath <- aggregate(path ~ date, datlogs, function(x) length(unique(x)))
|
||||
plot(datpath, type = "h")
|
||||
|
||||
plot(path ~ date, datpath, type = "h", col = "#3CB4DC")
|
||||
points(case ~ date, datcase, type = "h")
|
||||
|
||||
pdf("results/figures/cases_per_day.pdf", width = 9, height = 5, pointsize = 10)
|
||||
par(mai = c(.6, .6, .1, .1), mgp = c(2.4, 1, 0))
|
||||
plot(case ~ date, datcase, type = "h", col = "#434F4F")
|
||||
abline(v = datcase$date[datcase$date %in% c("2020-03-13", "2022-10-25")],
|
||||
col = "#FF6900", lty = 2)
|
||||
text(datcase$date[datcase$date == "2020-03-13"]+470, 80,
|
||||
"Corona gap from 2020-03-13 to 2022-10-25",
|
||||
col = "#FF6900")
|
||||
dev.off()
|
||||
|
||||
## weird behavior of timeMs
|
||||
|
||||
pdf("results/figures/timeMs.pdf", width = 9, height = 6, pointsize = 10)
|
||||
#par(mai = c(.6, .6, .1, .1), mgp = c(2.4, 1, 0))
|
||||
#plot(timeMs.start ~ as.factor(fileId), datlogs[1:2000,], xlab = "fileId")
|
||||
lattice::bwplot(timeMs.start ~ as.factor(fileId.start), datlogs[1:2000,], xlab = "",
|
||||
scales = list(x = list(rot = 90), y = list(rot = 90)))
|
||||
dev.off()
|
||||
|
||||
## x,y-coordinates out of range
|
||||
|
||||
set.seed(1522)
|
||||
|
||||
pdf("results/figures/xycoord.pdf", width = 5, height = 5, pointsize = 10)
|
||||
par(mai = c(.6, .6, .1, .1), mgp = c(2.4, 1, 0))
|
||||
#par(mfrow = c(1, 2))
|
||||
plot(y.start ~ x.start, datlogs[sample(nrow(datlogs), 10000), ])
|
||||
abline(v = c(0, 3840), h = c(0, 2160), col = "#3CB4DC", lwd = 2)
|
||||
#plot(y.stop ~ x.stop, datlogs)
|
||||
#abline(v = c(0, 3840), h = c(0, 2160), col = "#3CB4DC", lwd = 2)
|
||||
legend("bottomleft", "Random sample of 10,000", bg = "white")
|
||||
legend("topleft", "4K-Display: 3840 x 2160", bg = "white")
|
||||
dev.off()
|
||||
|
||||
## moves
|
||||
|
||||
dat001 <- datlogs[which(datlogs$item == "001"), ]
|
||||
|
||||
index <- as.numeric(as.factor(dat001$path))
|
||||
cc <- sample(colors(), 100)
|
||||
|
||||
plot(y.start ~ x.start, dat001, type = "n", xlab = "x", ylab = "y",
|
||||
xlim = c(0, 3840), ylim = c(0, 2160))
|
||||
with(dat001[1:200,], arrows(x.start, y.start, x.stop, y.stop,
|
||||
length = .07, col = cc[index]))
|
||||
|
||||
plot(y.start ~ x.start, dat001, xlab = "x", ylab = "y",
|
||||
xlim = c(0, 3840), ylim = c(0, 2160), pch = 16, col = "gray")
|
||||
points(y.start ~ x.start, dat001, xlab = "x", ylab = "y",
|
||||
xlim = c(0, 3840), ylim = c(0, 2160), cex = dat001$scaleSize,
|
||||
col = "blue")
|
||||
|
||||
|
||||
|
||||
cc <- sample(colors(), 70)
|
||||
|
||||
dat1 <- datlogs[!duplicated(datlogs$item), ]
|
||||
dat1 <- dat1[order(dat1$item), ]
|
||||
|
||||
plot(y.start ~ x.start, dat1, type = "n", xlim = c(-100, 4500), ylim = c(-100, 2500))
|
||||
abline(h = c(0, 2160), v = c(0, 3840), col = "lightgray")
|
||||
with(dat1, points(x.start, y.start, col = cc, pch = 16))
|
||||
with(dat1, points(x.stop, y.stop, col = cc, pch = 16))
|
||||
with(dat1, arrows(x.start, y.start, x.stop, y.stop, length = .07, col = cc))
|
||||
|
||||
|
||||
# How many events per topic, per path, ...
|
||||
# How many popups per artwork?
|
||||
|
||||
# Number of events per artwork
|
||||
tab <- xtabs( ~ artwork + event, datlogs)
|
||||
tab <- xtabs( ~ item + event, datlogs)
|
||||
addmargins(tab)
|
||||
|
||||
proportions(tab, margin = "artwork")
|
||||
proportions(tab, margin = "item")
|
||||
proportions(tab, margin = "event")
|
||||
|
||||
cc <- palette.colors(palette = "Okabe-Ito")[c(3,2,4,8)]
|
||||
|
||||
pdf("../figures/event-dist.pdf", height = 3.375, width = 12, pointsize = 10)
|
||||
pdf("results/figures/event-dist.pdf", height = 3.375, width = 12, pointsize = 10)
|
||||
par(mai = c(.4,.4,.1,.1), mgp = c(2.4, 1, 0))
|
||||
|
||||
barplot(t(proportions(tab, margin = "artwork")), las = 2, col = cc,
|
||||
barplot(t(proportions(tab, margin = "item")), las = 2, col = c("#78004B", "#3CB4DC", "#91C86E", "#FF6900"),
|
||||
legend.text = levels(datlogs$event), args.legend = list(x = "bottomleft", bg = "white"))
|
||||
|
||||
dev.off()
|
||||
|
||||
#barchart(proportions(tab, margin = "artwork"), las = 2)
|
||||
#lattice::barchart(proportions(tab, margin = "item"), las = 2)
|
||||
|
||||
# Proportion of events
|
||||
proportions(xtabs( ~ event, datlogs))
|
||||
# Mean proportion of event per path
|
||||
colMeans(proportions(xtabs( ~ path + event, datlogs), margin = "path"))
|
||||
# Mean proportion of event per artwork
|
||||
colMeans(proportions(tab, margin = "artwork"))
|
||||
# Mean proportion of event per item
|
||||
colMeans(proportions(tab, margin = "item"))
|
||||
|
||||
# Proportion of unclosed events
|
||||
|
||||
@ -126,15 +360,17 @@ sum(datlogs$fileId.start != datlogs$fileId.stop, na.rm = TRUE) / nrow(datlogs)
|
||||
|
||||
#--------------- (3.1) Check data quality ---------------
|
||||
|
||||
alog <- activitylog(datlogs,
|
||||
datlogs$start <- datlogs$date.start
|
||||
datlogs$complete <- datlogs$date.stop
|
||||
|
||||
alog <- bupaR::activitylog(datlogs,
|
||||
case_id = "path",
|
||||
activity_id = "event",
|
||||
#resource_id = "case",
|
||||
resource_id = "artwork",
|
||||
resource_id = "item",
|
||||
timestamps = c("start", "complete"))
|
||||
|
||||
# process_map(alog, frequency("relative"))
|
||||
map_as_pdf(alog, file = "../figures/pm_trace-event.pdf")
|
||||
processmapR::process_map(alog, processmapR::frequency("relative"))
|
||||
|
||||
alogf <- edeaR::filter_trace_frequency(alog, percentage = 0.9)
|
||||
|
||||
@ -147,7 +383,7 @@ processmapR::process_map(alogf, # alog,
|
||||
|
||||
alog_no_move <- alog[alog$event != "move", ]
|
||||
|
||||
pdf("../figures/traceexplore_trace-event.pdf", height = 8, width = 12, pointsize = 10)
|
||||
pdf("results/figures/traceexplore_trace-event.pdf", height = 8, width = 12, pointsize = 10)
|
||||
set.seed(1447)
|
||||
processmapR::trace_explorer(alog_no_move[alog_no_move$path %in%
|
||||
sample(unique(alog_no_move$path), 400),],
|
||||
@ -155,7 +391,7 @@ processmapR::trace_explorer(alog_no_move[alog_no_move$path %in%
|
||||
abbreviate = T)
|
||||
dev.off()
|
||||
|
||||
pdf("../figures/ra_trace-event.pdf", height = 8, width = 12, pointsize = 10)
|
||||
pdf("results/figures/ra_trace-event.pdf", height = 8, width = 12, pointsize = 10)
|
||||
|
||||
ra_no_move <- edeaR::resource_frequency(alog_no_move, "resource-activity")
|
||||
levels(ra_no_move$event) <- c("flipCard", "flipCard", "openTopic", "openPopup")
|
||||
@ -182,29 +418,29 @@ which.max(table(datlogs$artwork))
|
||||
which.min(table(datlogs$artwork))
|
||||
which.min(table(datlogs$artwork)[-c(71,72)])
|
||||
|
||||
alog080 <- activitylog(datlogs[datlogs$artwork == "080",],
|
||||
alog080 <- bupaR::activitylog(datlogs[datlogs$artwork == "080",],
|
||||
case_id = "path",
|
||||
activity_id = "event",
|
||||
resource_id = "artwork",
|
||||
timestamps = c("start", "complete"))
|
||||
|
||||
map_as_pdf(alog080, file = "../figures/pm_trace-event_080.pdf")
|
||||
processmapR::process_map(alog80, processmapR::frequency("relative"))
|
||||
|
||||
alog087 <- activitylog(datlogs[datlogs$artwork == "087",],
|
||||
alog087 <- bupaR::activitylog(datlogs[datlogs$artwork == "087",],
|
||||
case_id = "path",
|
||||
activity_id = "event",
|
||||
resource_id = "artwork",
|
||||
timestamps = c("start", "complete"))
|
||||
|
||||
map_as_pdf(alog087, file = "../figures/pm_trace-event_087.pdf")
|
||||
processmapR::process_map(alog087, processmapR::frequency("relative"))
|
||||
|
||||
alog504 <- activitylog(datlogs[datlogs$artwork == "504",],
|
||||
alog504 <- bupaR::activitylog(datlogs[datlogs$artwork == "504",],
|
||||
case_id = "path",
|
||||
activity_id = "event",
|
||||
resource_id = "artwork",
|
||||
timestamps = c("start", "complete"))
|
||||
|
||||
map_as_pdf(alog504, file = "../figures/pm_trace-event_504.pdf")
|
||||
processmapR::process_map(alog504, processmapR::frequency("relative"))
|
||||
|
||||
#--------------- (3.3) Patterns of cases ---------------
|
||||
|
||||
@ -215,17 +451,17 @@ map_as_pdf(alog504, file = "../figures/pm_trace-event_504.pdf")
|
||||
# ... weekdays for "normal" and school vacation days?
|
||||
# ... pre and post corona?
|
||||
|
||||
alog <- activitylog(datlogs,
|
||||
alog <- bupaR::activitylog(datlogs,
|
||||
case_id = "case",
|
||||
activity_id = "event",
|
||||
resource_id = "path",
|
||||
timestamps = c("start", "complete"))
|
||||
|
||||
map_as_pdf(alog, file = "../figures/pm_case-event.pdf")
|
||||
processmapR::process_map(alog, processmapR::frequency("relative"))
|
||||
|
||||
alog_no_move <- alog[alog$event != "move", ]
|
||||
|
||||
pdf("../figures/traceexplore_case-event.pdf", height = 8, width = 12, pointsize = 10)
|
||||
pdf("results/figures/traceexplore_case-event.pdf", height = 8, width = 12, pointsize = 10)
|
||||
set.seed(1050)
|
||||
processmapR::trace_explorer(alog_no_move[alog_no_move$path %in%
|
||||
sample(unique(alog_no_move$path), 300),],
|
||||
@ -233,38 +469,38 @@ processmapR::trace_explorer(alog_no_move[alog_no_move$path %in%
|
||||
abbreviate = T)
|
||||
dev.off()
|
||||
|
||||
map_as_pdf(alog080, file = "../figures/pm_case-event_080.pdf")
|
||||
processmapR::process_map(alog080, processmapR::frequency("relative"))
|
||||
|
||||
alog087 <- activitylog(datlogs[datlogs$artwork == "087",],
|
||||
alog087 <- bupaR::activitylog(datlogs[datlogs$artwork == "087",],
|
||||
case_id = "case",
|
||||
activity_id = "event",
|
||||
resource_id = "path",
|
||||
timestamps = c("start", "complete"))
|
||||
|
||||
map_as_pdf(alog087, file = "../figures/pm_case-event_087.pdf")
|
||||
processmapR::process_map(alog087, processmapR::frequency("relative"))
|
||||
|
||||
### Mornings and afternoons
|
||||
|
||||
datlogs$tod <- ifelse(lubridate::hour(datlogs$start) > 13, "afternoon", "morning")
|
||||
|
||||
alog <- activitylog(datlogs[datlogs$tod == "morning",],
|
||||
alog <- bupaR::activitylog(datlogs[datlogs$tod == "morning",],
|
||||
case_id = "case",
|
||||
activity_id = "event",
|
||||
resource_id = "path",
|
||||
timestamps = c("start", "complete"))
|
||||
|
||||
map_as_pdf(alog, file = "../figures/pm_case-event_morning.pdf")
|
||||
processmapR::process_map(alog, processmapR::frequency("relative"))
|
||||
|
||||
alog <- activitylog(datlogs[datlogs$tod == "afternoon",],
|
||||
alog <- bupaR::activitylog(datlogs[datlogs$tod == "afternoon",],
|
||||
case_id = "case",
|
||||
activity_id = "event",
|
||||
resource_id = "path",
|
||||
timestamps = c("start", "complete"))
|
||||
|
||||
map_as_pdf(alog, file = "../figures/pm_case-event_afternoon.pdf")
|
||||
processmapR::process_map(alog, processmapR::frequency("relative"))
|
||||
|
||||
# Are the same artworks looked at?
|
||||
pdf("../figures/bp_tod.pdf", height = 3.375, width = 12, pointsize = 10)
|
||||
pdf("results/figures/bp_tod.pdf", height = 3.375, width = 12, pointsize = 10)
|
||||
par(mai = c(.5,.6,.1,.1), mgp = c(2.4, 1, 0))
|
||||
|
||||
barplot(proportions(xtabs( ~ tod + artwork, datlogs), margin = "tod"), #col = cc[1:2],
|
||||
@ -277,24 +513,24 @@ dev.off()
|
||||
|
||||
datlogs$wd <- ifelse(datlogs$weekdays %in% c("Saturday", "Sunday"), "weekend", "weekday")
|
||||
|
||||
alog <- activitylog(datlogs[datlogs$wd == "weekend",],
|
||||
alog <- bupaR::activitylog(datlogs[datlogs$wd == "weekend",],
|
||||
case_id = "case",
|
||||
activity_id = "event",
|
||||
resource_id = "path",
|
||||
timestamps = c("start", "complete"))
|
||||
|
||||
map_as_pdf(alog, file = "../figures/pm_case-event_weekend.pdf")
|
||||
processmapR::process_map(alog, processmapR::frequency("relative"))
|
||||
|
||||
alog <- activitylog(datlogs[datlogs$wd == "weekday",],
|
||||
alog <- bupaR::activitylog(datlogs[datlogs$wd == "weekday",],
|
||||
case_id = "case",
|
||||
activity_id = "event",
|
||||
resource_id = "path",
|
||||
timestamps = c("start", "complete"))
|
||||
|
||||
map_as_pdf(alog, file = "../figures/pm_case-event_weekday.pdf")
|
||||
processmapR::process_map(alog, processmapR::frequency("relative"))
|
||||
|
||||
# Are the same artworks looked at?
|
||||
pdf("../figures/bp_wd.pdf", height = 3.375, width = 12, pointsize = 10)
|
||||
pdf("results/figures/bp_wd.pdf", height = 3.375, width = 12, pointsize = 10)
|
||||
par(mai = c(.5,.6,.1,.1), mgp = c(2.4, 1, 0))
|
||||
|
||||
barplot(proportions(xtabs( ~ wd + artwork, datlogs), margin = "wd"),
|
||||
@ -308,24 +544,24 @@ dev.off()
|
||||
datlogs$wds <- ifelse(!is.na(datlogs$vacation), "vacation", "school")
|
||||
datlogs$wds[datlogs$wd == "weekend"] <- NA
|
||||
|
||||
alog <- activitylog(datlogs[which(datlogs$wds == "school"),],
|
||||
alog <- bupaR::activitylog(datlogs[which(datlogs$wds == "school"),],
|
||||
case_id = "case",
|
||||
activity_id = "event",
|
||||
resource_id = "path",
|
||||
timestamps = c("start", "complete"))
|
||||
|
||||
map_as_pdf(alog, file = "../figures/pm_case-event_school.pdf")
|
||||
processmapR::process_map(alog, processmapR::frequency("relative"))
|
||||
|
||||
alog <- activitylog(datlogs[which(datlogs$wds == "vacation"),],
|
||||
alog <- bupaR::activitylog(datlogs[which(datlogs$wds == "vacation"),],
|
||||
case_id = "case",
|
||||
activity_id = "event",
|
||||
resource_id = "path",
|
||||
timestamps = c("start", "complete"))
|
||||
|
||||
map_as_pdf(alog, file = "../figures/pm_case-event_vacation.pdf")
|
||||
processmapR::process_map(alog, processmapR::frequency("relative"))
|
||||
|
||||
# Are the same artworks looked at?
|
||||
pdf("../figures/bp_wds.pdf", height = 3.375, width = 12, pointsize = 10)
|
||||
pdf("results/figures/bp_wds.pdf", height = 3.375, width = 12, pointsize = 10)
|
||||
par(mai = c(.5,.6,.1,.1), mgp = c(2.4, 1, 0))
|
||||
|
||||
#barplot(xtabs( ~ wds + artwork, datlogs), las = 2, beside = TRUE,
|
||||
@ -339,24 +575,24 @@ dev.off()
|
||||
|
||||
datlogs$corona <- ifelse(datlogs$date < "2020-03-14", "pre", "post")
|
||||
|
||||
alog <- activitylog(datlogs[which(datlogs$corona == "pre"),],
|
||||
alog <- bupaR::activitylog(datlogs[which(datlogs$corona == "pre"),],
|
||||
case_id = "case",
|
||||
activity_id = "event",
|
||||
resource_id = "path",
|
||||
timestamps = c("start", "complete"))
|
||||
|
||||
map_as_pdf(alog, file = "../figures/pm_case-event_pre-corona.pdf")
|
||||
processmapR::process_map(alog, processmapR::frequency("relative"))
|
||||
|
||||
alog <- activitylog(datlogs[which(datlogs$corona == "post"),],
|
||||
alog <- bupaR::activitylog(datlogs[which(datlogs$corona == "post"),],
|
||||
case_id = "case",
|
||||
activity_id = "event",
|
||||
resource_id = "path",
|
||||
timestamps = c("start", "complete"))
|
||||
|
||||
map_as_pdf(alog, file = "../figures/pm_case-event_post-corona.pdf")
|
||||
processmapR::process_map(alog, processmapR::frequency("relative"))
|
||||
|
||||
# Are the same artworks looked at?
|
||||
pdf("../figures/bp_corona.pdf", height = 3.375, width = 12, pointsize = 10)
|
||||
pdf("results/figures/bp_corona.pdf", height = 3.375, width = 12, pointsize = 10)
|
||||
par(mai = c(.5,.6,.1,.1), mgp = c(2.4, 1, 0))
|
||||
|
||||
barplot(proportions(xtabs( ~ corona + artwork, datlogs), margin = "corona"),
|
||||
@ -369,13 +605,13 @@ dev.off()
|
||||
# Order in which artworks are looked at
|
||||
|
||||
nart <- 5 # select 5 artworks randomly
|
||||
alog <- activitylog(datlogs,#[datlogs$artwork %in% sample(unique(datlogs$artwork), nart), ],
|
||||
alog <- bupaR::activitylog(datlogs,#[datlogs$artwork %in% sample(unique(datlogs$artwork), nart), ],
|
||||
case_id = "case",
|
||||
activity_id = "artwork",
|
||||
resource_id = "path",
|
||||
timestamps = c("start", "complete"))
|
||||
|
||||
#map <- process_map(alog, frequency("relative"))
|
||||
#map <- processmapR::process_map(alog, processmapR::frequency("relative"))
|
||||
|
||||
## select cases with Vermeer
|
||||
length(unique(datlogs[datlogs$artwork == "080", "case"]))
|
||||
@ -392,16 +628,16 @@ which(table(tmp$artwork) > 14000)
|
||||
|
||||
often080 <- names(which(table(tmp$artwork) > 14000))
|
||||
|
||||
alog <- activitylog(datlogs[datlogs$artwork %in% often080, ],
|
||||
alog <- bupaR::activitylog(datlogs[datlogs$artwork %in% often080, ],
|
||||
case_id = "case",
|
||||
activity_id = "artwork",
|
||||
resource_id = "path",
|
||||
timestamps = c("start", "complete"))
|
||||
|
||||
map_as_pdf(alog, file = "../figures/pm_case-artwork_often080.pdf")
|
||||
processmapR::process_map(alog, processmapR::frequency("relative"))
|
||||
|
||||
|
||||
pdf("../figures/traceexplore_case-artwork_often080.pdf", height = 8, width = 12, pointsize = 10)
|
||||
pdf("results/figures/traceexplore_case-artwork_often080.pdf", height = 8, width = 12, pointsize = 10)
|
||||
|
||||
processmapR::trace_explorer(alog,
|
||||
n_traces = 30, type = "frequent",
|
||||
@ -414,40 +650,35 @@ dev.off()
|
||||
# Are there certain topics that people are interested in more than others?
|
||||
# Do these topic distributions differ for comparable artworks?
|
||||
|
||||
alog <- activitylog(datlogs[which(datlogs$event == "openTopic"),],
|
||||
alog <- bupaR::activitylog(datlogs[which(datlogs$event == "openTopic"),],
|
||||
case_id = "case",
|
||||
activity_id = "topic",
|
||||
resource_id = "path",
|
||||
timestamps = c("start", "complete"))
|
||||
|
||||
map_as_pdf(alog, file = "../figures/pm_case-topic.pdf")
|
||||
processmapR::process_map(alog, processmapR::frequency("relative"))
|
||||
|
||||
# Order of topics for Vermeer
|
||||
# alog080 <- activitylog(datlogs[datlogs$artwork == "080",],
|
||||
# alog080 <- bupaR::activitylog(datlogs[datlogs$artwork == "080",],
|
||||
# case_id = "case",
|
||||
# activity_id = "topic",
|
||||
# resource_id = "path",
|
||||
# timestamps = c("start", "complete"))
|
||||
#
|
||||
# map_as_pdf(alog080, file = "../figures/pm_case-topic_080.pdf")
|
||||
#
|
||||
#
|
||||
# alog080 <- activitylog(datlogs[datlogs$artwork == "080",],
|
||||
# alog080 <- bupaR::activitylog(datlogs[datlogs$artwork == "080",],
|
||||
# case_id = "case",
|
||||
# activity_id = "topicFile",
|
||||
# resource_id = "path",
|
||||
# timestamps = c("start", "complete"))
|
||||
#
|
||||
# #process_map(alog080, frequency("relative"))
|
||||
# #processmapR::process_map(alog080, processmapR::frequency("relative"))
|
||||
#
|
||||
# # Comparable artwork
|
||||
# alog083 <- activitylog(datlogs[datlogs$artwork == "083",],
|
||||
# alog083 <- bupaR::activitylog(datlogs[datlogs$artwork == "083",],
|
||||
# case_id = "case",
|
||||
# activity_id = "topic",
|
||||
# resource_id = "path",
|
||||
# timestamps = c("start", "complete"))
|
||||
#
|
||||
# map_as_pdf(alog083, file = "../figures/pm_case-topic_083.pdf")
|
||||
|
||||
# artworks that have the same topics than Vermeer
|
||||
which(rowSums(xtabs( ~ artwork + topic, datlogs[datlogs$topic %in%
|
||||
@ -458,73 +689,13 @@ which(rowSums(xtabs( ~ artwork + topic, datlogs[datlogs$topic %in%
|
||||
|
||||
for (art in c("037", "046", "062", "080", "083", "109")) {
|
||||
|
||||
alog <- activitylog(datlogs[datlogs$event == "openTopic" & datlogs$artwork == art,],
|
||||
alog <- bupaR::activitylog(datlogs[datlogs$event == "openTopic" & datlogs$artwork == art,],
|
||||
case_id = "case",
|
||||
activity_id = "topic",
|
||||
resource_id = "path",
|
||||
timestamps = c("start", "complete"))
|
||||
|
||||
map_as_pdf(alog, file = paste0("../figures/pm_case-topic_", art, ".pdf"))
|
||||
processmapR::process_map(alog, processmapR::frequency("relative"))
|
||||
}
|
||||
|
||||
|
||||
|
||||
# Angewandte Kunst, Graphik, Gemälde, Kultur
|
||||
|
||||
|
||||
c("Kultur", "Kultur", "Graphik", "Gemälde", "Gemälde", "Gemälde",
|
||||
"Gemälde", "Gemälde", "Graphik", "Gemälde", "Angewandte Kunst", "",
|
||||
"Gemälde", "Angewandte Kunst", "", "", "Graphik", "Angewandte Kunst",
|
||||
"Angewandte Kunst", "Gemälde", "Angewandte Kunst", "Gemälde", "",
|
||||
"Gemälde", "Gemälde", "Gemälde", "Graphik", "Gemälde", "Gemälde",
|
||||
"Gemälde", "", "Angewandte Kunst", "Angewandte Kunst", "Gemälde",
|
||||
"Graphik", "Gemälde", "Gemälde", "Gemälde", "Gemälde",
|
||||
"Angewandte Kunst", "Gemälde", "Gemälde", "Gemälde", "Kultur", "Kultur",
|
||||
"Gemälde", "Kultur", "", "Gemälde", "", "Graphik", "Kultur", "Gemälde",
|
||||
"", "Kultur", "Gemälde", "Kultur", "Gemälde", "Gemälde", "Gemälde",
|
||||
"Kultur", "Kultur", "Kultur", "Kultur", "Kultur", "Kultur",
|
||||
"Angewandte Kunst", "Info", "Info", "Info", "Kultur", "Kultur")
|
||||
|
||||
|
||||
|
||||
# BURSTS
|
||||
which.max(table(datlogs$date))
|
||||
tmp <- datlogs[datlogs$date == "2017-02-12", ]
|
||||
|
||||
# number of traces per case on 2017-02-12
|
||||
rowSums(xtabs( ~ case + path, tmp) != 0)
|
||||
|
||||
range(tmp$start)
|
||||
hours <- lubridate::hour(tmp$start)
|
||||
xtabs( ~ case + hours, tmp)
|
||||
|
||||
# distribution of cases over the day
|
||||
colSums(xtabs( ~ case + hours, tmp) != 0)
|
||||
barplot(colSums(xtabs( ~ case + hours, tmp) != 0))
|
||||
|
||||
aggregate(path ~ case + hours, tmp, length)
|
||||
|
||||
|
||||
|
||||
|
||||
tmp <- aggregate(path ~ case, datlogs, length)
|
||||
tmp$date <- as.Date(datlogs[!duplicated(datlogs$case), "start"])
|
||||
tmp$time <- lubridate::hour(datlogs[!duplicated(datlogs$case), "start"])
|
||||
|
||||
tmp[tmp$path > 200, ]
|
||||
|
||||
plot(path ~ time, tmp, cex = 2, col = rgb(0,0,0,.3))
|
||||
|
||||
lattice::barchart(path ~ time, tmp, horizontal=F)
|
||||
|
||||
|
||||
|
||||
###########################################################################
|
||||
# HELPER
|
||||
|
||||
map_as_pdf <- function(alog, file, type = frequency("relative")) {
|
||||
map <- process_map(alog, type = type)
|
||||
g <- DiagrammeR::grViz(map$x$diagram) |> DiagrammeRsvg::export_svg() |> charToRaw()
|
||||
rsvg::rsvg_pdf(g, file)
|
||||
}
|
||||
|
||||
|
@ -1,18 +1,14 @@
|
||||
# 03_create-petrinet.py
|
||||
#
|
||||
# content: (1) Create places and transitions
|
||||
# (2) Sequential net
|
||||
# (3) Concurrent net
|
||||
# (2) Normative net
|
||||
#
|
||||
# input: --
|
||||
# output: results/haum/conformative_petrinet_con.pnml
|
||||
# results/processmaps/conformative_petrinet_con.png
|
||||
# results/processmaps/conformative_bpmn_con.png
|
||||
# results/haum/conformative_petrinet_seq.pnml
|
||||
# results/processmaps/conformative_petrinet_seq.png
|
||||
# results/processmaps/conformative_bpmn_seq.png
|
||||
# output: results/normative_petrinet.pnml
|
||||
# results/processmaps/normative_petrinet.png
|
||||
# results/processmaps/normative_bpmn.png
|
||||
#
|
||||
# last mod: 2024-03-06
|
||||
# last mod: 2024-03-22
|
||||
|
||||
import pm4py
|
||||
from pm4py.objects.petri_net.obj import PetriNet, Marking
|
||||
@ -62,93 +58,90 @@ t_16 = PetriNet.Transition("t_16")
|
||||
t_17 = PetriNet.Transition("t_17")
|
||||
t_18 = PetriNet.Transition("t_18")
|
||||
|
||||
#--------------- (2) Sequential net ---------------
|
||||
#--------------- (2) Normative net ---------------
|
||||
|
||||
net_seq = PetriNet("new_petri_net")
|
||||
net = PetriNet("new_petri_net")
|
||||
|
||||
# Add places
|
||||
net_seq.places.add(source)
|
||||
net_seq.places.add(sink)
|
||||
net_seq.places.add(p_1)
|
||||
net_seq.places.add(p_2)
|
||||
net_seq.places.add(p_3)
|
||||
net_seq.places.add(p_4)
|
||||
net_seq.places.add(p_5)
|
||||
net_seq.places.add(p_6)
|
||||
net_seq.places.add(p_7)
|
||||
net_seq.places.add(p_8)
|
||||
net.places.add(source)
|
||||
net.places.add(sink)
|
||||
net.places.add(p_1)
|
||||
net.places.add(p_2)
|
||||
net.places.add(p_3)
|
||||
net.places.add(p_4)
|
||||
net.places.add(p_5)
|
||||
net.places.add(p_6)
|
||||
net.places.add(p_7)
|
||||
net.places.add(p_8)
|
||||
net.places.add(p_9)
|
||||
net.places.add(p_10)
|
||||
net.places.add(p_11)
|
||||
net.places.add(p_12)
|
||||
|
||||
# Add transitions
|
||||
net_seq.transitions.add(mv)
|
||||
net_seq.transitions.add(fc)
|
||||
net_seq.transitions.add(ot)
|
||||
net_seq.transitions.add(op)
|
||||
net.transitions.add(mv)
|
||||
net.transitions.add(fc)
|
||||
net.transitions.add(ot)
|
||||
net.transitions.add(op)
|
||||
|
||||
# Add hidden transitions
|
||||
net_seq.transitions.add(t_1)
|
||||
net_seq.transitions.add(t_2)
|
||||
net_seq.transitions.add(t_3)
|
||||
net_seq.transitions.add(t_4)
|
||||
net_seq.transitions.add(t_5)
|
||||
net_seq.transitions.add(t_6)
|
||||
net_seq.transitions.add(t_7)
|
||||
net_seq.transitions.add(t_8)
|
||||
net_seq.transitions.add(t_9)
|
||||
net_seq.transitions.add(t_10)
|
||||
net_seq.transitions.add(t_11)
|
||||
net_seq.transitions.add(t_12)
|
||||
net_seq.transitions.add(t_13)
|
||||
net_seq.transitions.add(t_14)
|
||||
net_seq.transitions.add(t_15)
|
||||
net_seq.transitions.add(t_16)
|
||||
net_seq.transitions.add(t_17)
|
||||
net_seq.transitions.add(t_18)
|
||||
net.transitions.add(t_1)
|
||||
net.transitions.add(t_2)
|
||||
net.transitions.add(t_3)
|
||||
net.transitions.add(t_4)
|
||||
net.transitions.add(t_5)
|
||||
net.transitions.add(t_6)
|
||||
net.transitions.add(t_7)
|
||||
net.transitions.add(t_8)
|
||||
net.transitions.add(t_9)
|
||||
net.transitions.add(t_10)
|
||||
net.transitions.add(t_11)
|
||||
net.transitions.add(t_12)
|
||||
net.transitions.add(t_13)
|
||||
net.transitions.add(t_14)
|
||||
net.transitions.add(t_15)
|
||||
|
||||
# Add arcs
|
||||
petri_utils.add_arc_from_to(source, t_1, net_seq)
|
||||
petri_utils.add_arc_from_to(source, t_2, net_seq)
|
||||
petri_utils.add_arc_from_to(t_1, p_1, net_seq)
|
||||
petri_utils.add_arc_from_to(t_2, p_2, net_seq)
|
||||
petri_utils.add_arc_from_to(p_1, mv, net_seq)
|
||||
petri_utils.add_arc_from_to(p_2, fc, net_seq)
|
||||
petri_utils.add_arc_from_to(mv, p_3, net_seq)
|
||||
petri_utils.add_arc_from_to(p_3, t_3, net_seq)
|
||||
petri_utils.add_arc_from_to(p_3, t_4, net_seq)
|
||||
petri_utils.add_arc_from_to(p_3, t_5, net_seq)
|
||||
petri_utils.add_arc_from_to(p_3, t_6, net_seq)
|
||||
petri_utils.add_arc_from_to(p_3, t_7, net_seq)
|
||||
petri_utils.add_arc_from_to(t_7, p_1, net_seq)
|
||||
petri_utils.add_arc_from_to(fc, p_4, net_seq)
|
||||
petri_utils.add_arc_from_to(p_4, t_8, net_seq)
|
||||
petri_utils.add_arc_from_to(p_4, t_9, net_seq)
|
||||
petri_utils.add_arc_from_to(p_4, t_10, net_seq)
|
||||
petri_utils.add_arc_from_to(t_9, p_1, net_seq)
|
||||
petri_utils.add_arc_from_to(t_16, p_5, net_seq)
|
||||
petri_utils.add_arc_from_to(t_3, p_2, net_seq)
|
||||
petri_utils.add_arc_from_to(t_5, p_6, net_seq)
|
||||
petri_utils.add_arc_from_to(t_6, p_5, net_seq)
|
||||
petri_utils.add_arc_from_to(p_6, ot, net_seq)
|
||||
petri_utils.add_arc_from_to(p_5, op, net_seq)
|
||||
petri_utils.add_arc_from_to(ot, p_8, net_seq)
|
||||
petri_utils.add_arc_from_to(op, p_7, net_seq)
|
||||
petri_utils.add_arc_from_to(p_8, t_11, net_seq)
|
||||
petri_utils.add_arc_from_to(p_8, t_12, net_seq)
|
||||
petri_utils.add_arc_from_to(p_8, t_13, net_seq)
|
||||
petri_utils.add_arc_from_to(p_8, t_17, net_seq)
|
||||
petri_utils.add_arc_from_to(t_10, p_6, net_seq)
|
||||
petri_utils.add_arc_from_to(t_17, p_6, net_seq)
|
||||
petri_utils.add_arc_from_to(p_7, t_14, net_seq)
|
||||
petri_utils.add_arc_from_to(p_7, t_15, net_seq)
|
||||
petri_utils.add_arc_from_to(p_7, t_16, net_seq)
|
||||
petri_utils.add_arc_from_to(p_7, t_18, net_seq)
|
||||
petri_utils.add_arc_from_to(t_18, p_6, net_seq)
|
||||
petri_utils.add_arc_from_to(t_13, p_5, net_seq)
|
||||
petri_utils.add_arc_from_to(t_15, p_1, net_seq)
|
||||
petri_utils.add_arc_from_to(t_11, p_1, net_seq)
|
||||
petri_utils.add_arc_from_to(t_4, sink, net_seq)
|
||||
petri_utils.add_arc_from_to(t_8, sink, net_seq)
|
||||
petri_utils.add_arc_from_to(t_12, sink, net_seq)
|
||||
petri_utils.add_arc_from_to(t_14, sink, net_seq)
|
||||
petri_utils.add_arc_from_to(source, t_1, net)
|
||||
petri_utils.add_arc_from_to(t_1, p_1, net)
|
||||
petri_utils.add_arc_from_to(t_1, p_2, net)
|
||||
petri_utils.add_arc_from_to(p_1, t_2, net)
|
||||
petri_utils.add_arc_from_to(p_1, t_3, net)
|
||||
petri_utils.add_arc_from_to(t_3, p_5, net)
|
||||
petri_utils.add_arc_from_to(t_2, p_3, net)
|
||||
petri_utils.add_arc_from_to(p_3, mv, net)
|
||||
petri_utils.add_arc_from_to(mv, p_4, net)
|
||||
petri_utils.add_arc_from_to(p_4, t_5, net)
|
||||
petri_utils.add_arc_from_to(p_4, t_6, net)
|
||||
petri_utils.add_arc_from_to(t_6, p_3, net)
|
||||
petri_utils.add_arc_from_to(t_5, p_5, net)
|
||||
petri_utils.add_arc_from_to(p_5, t_15, net)
|
||||
petri_utils.add_arc_from_to(t_15, sink, net)
|
||||
petri_utils.add_arc_from_to(p_2, fc, net)
|
||||
petri_utils.add_arc_from_to(p_2, t_8, net)
|
||||
petri_utils.add_arc_from_to(t_8, p_12, net)
|
||||
petri_utils.add_arc_from_to(p_12, t_15, net)
|
||||
petri_utils.add_arc_from_to(fc, p_6, net)
|
||||
petri_utils.add_arc_from_to(p_6, t_9, net)
|
||||
petri_utils.add_arc_from_to(t_9, p_12, net)
|
||||
petri_utils.add_arc_from_to(p_6, t_4, net)
|
||||
petri_utils.add_arc_from_to(t_4, p_7, net)
|
||||
petri_utils.add_arc_from_to(p_7, ot, net)
|
||||
petri_utils.add_arc_from_to(ot, p_8, net)
|
||||
petri_utils.add_arc_from_to(p_8, t_10, net)
|
||||
petri_utils.add_arc_from_to(t_10, p_11, net)
|
||||
petri_utils.add_arc_from_to(p_11, t_13, net)
|
||||
petri_utils.add_arc_from_to(t_13, p_12, net)
|
||||
petri_utils.add_arc_from_to(p_8, t_7, net)
|
||||
petri_utils.add_arc_from_to(t_7, p_9, net)
|
||||
petri_utils.add_arc_from_to(p_9, op, net)
|
||||
petri_utils.add_arc_from_to(op, p_10, net)
|
||||
petri_utils.add_arc_from_to(p_10, t_11, net)
|
||||
petri_utils.add_arc_from_to(p_10, t_12, net)
|
||||
petri_utils.add_arc_from_to(t_12, p_9, net)
|
||||
petri_utils.add_arc_from_to(t_11, p_11, net)
|
||||
petri_utils.add_arc_from_to(p_11, t_14, net)
|
||||
petri_utils.add_arc_from_to(t_14, p_7, net)
|
||||
|
||||
# Add tokens
|
||||
initial_marking = Marking()
|
||||
@ -156,116 +149,14 @@ initial_marking[source] = 1
|
||||
final_marking = Marking()
|
||||
final_marking[sink] = 1
|
||||
|
||||
pm4py.view_petri_net(net_seq, initial_marking, final_marking)
|
||||
pm4py.write_pnml(net_seq, initial_marking, final_marking, "results/haum/conformative_petrinet_seq.pnml")
|
||||
pm4py.view_petri_net(net, initial_marking, final_marking)
|
||||
pm4py.write_pnml(net, initial_marking, final_marking,
|
||||
"results/normative_petrinet.pnml")
|
||||
|
||||
pm4py.vis.save_vis_petri_net(net_seq, initial_marking, final_marking,
|
||||
"results/processmaps/conformative_petrinet_seq.png")
|
||||
pm4py.vis.save_vis_petri_net(net, initial_marking, final_marking,
|
||||
"results/processmaps/normative_petrinet.png")
|
||||
|
||||
bpmn = pm4py.convert.convert_to_bpmn(net_seq, initial_marking, final_marking)
|
||||
bpmn = pm4py.convert.convert_to_bpmn(net, initial_marking, final_marking)
|
||||
pm4py.view_bpmn(bpmn)
|
||||
|
||||
pm4py.vis.save_vis_bpmn(bpmn, "results/processmaps/conformative_bpmn_seq.png")
|
||||
|
||||
|
||||
#--------------- (3) Concurrent net ---------------
|
||||
|
||||
net_con = PetriNet("new_petri_net")
|
||||
|
||||
# Add places
|
||||
net_con.places.add(source)
|
||||
net_con.places.add(sink)
|
||||
net_con.places.add(p_1)
|
||||
net_con.places.add(p_2)
|
||||
net_con.places.add(p_3)
|
||||
net_con.places.add(p_4)
|
||||
net_con.places.add(p_5)
|
||||
net_con.places.add(p_6)
|
||||
net_con.places.add(p_7)
|
||||
net_con.places.add(p_8)
|
||||
net_con.places.add(p_9)
|
||||
net_con.places.add(p_10)
|
||||
net_con.places.add(p_11)
|
||||
net_con.places.add(p_12)
|
||||
|
||||
# Add transitions
|
||||
net_con.transitions.add(mv)
|
||||
net_con.transitions.add(fc)
|
||||
net_con.transitions.add(ot)
|
||||
net_con.transitions.add(op)
|
||||
|
||||
# Add hidden transitions
|
||||
net_con.transitions.add(t_1)
|
||||
net_con.transitions.add(t_2)
|
||||
net_con.transitions.add(t_3)
|
||||
net_con.transitions.add(t_4)
|
||||
net_con.transitions.add(t_5)
|
||||
net_con.transitions.add(t_6)
|
||||
net_con.transitions.add(t_7)
|
||||
net_con.transitions.add(t_8)
|
||||
net_con.transitions.add(t_9)
|
||||
net_con.transitions.add(t_10)
|
||||
net_con.transitions.add(t_11)
|
||||
net_con.transitions.add(t_12)
|
||||
net_con.transitions.add(t_13)
|
||||
net_con.transitions.add(t_14)
|
||||
net_con.transitions.add(t_15)
|
||||
|
||||
# Add arcs
|
||||
petri_utils.add_arc_from_to(source, t_1, net_con)
|
||||
petri_utils.add_arc_from_to(t_1, p_1, net_con)
|
||||
petri_utils.add_arc_from_to(t_1, p_2, net_con)
|
||||
petri_utils.add_arc_from_to(p_1, t_2, net_con)
|
||||
petri_utils.add_arc_from_to(p_1, t_3, net_con)
|
||||
petri_utils.add_arc_from_to(t_3, p_5, net_con)
|
||||
petri_utils.add_arc_from_to(t_2, p_3, net_con)
|
||||
petri_utils.add_arc_from_to(p_3, mv, net_con)
|
||||
petri_utils.add_arc_from_to(mv, p_4, net_con)
|
||||
petri_utils.add_arc_from_to(p_4, t_5, net_con)
|
||||
petri_utils.add_arc_from_to(p_4, t_6, net_con)
|
||||
petri_utils.add_arc_from_to(t_6, p_3, net_con)
|
||||
petri_utils.add_arc_from_to(t_5, p_5, net_con)
|
||||
petri_utils.add_arc_from_to(p_5, t_15, net_con)
|
||||
petri_utils.add_arc_from_to(t_15, sink, net_con)
|
||||
petri_utils.add_arc_from_to(p_2, fc, net_con)
|
||||
petri_utils.add_arc_from_to(p_2, t_8, net_con)
|
||||
petri_utils.add_arc_from_to(t_8, p_12, net_con)
|
||||
petri_utils.add_arc_from_to(p_12, t_15, net_con)
|
||||
petri_utils.add_arc_from_to(fc, p_6, net_con)
|
||||
petri_utils.add_arc_from_to(p_6, t_9, net_con)
|
||||
petri_utils.add_arc_from_to(t_9, p_12, net_con)
|
||||
petri_utils.add_arc_from_to(p_6, t_4, net_con)
|
||||
petri_utils.add_arc_from_to(t_4, p_7, net_con)
|
||||
petri_utils.add_arc_from_to(p_7, ot, net_con)
|
||||
petri_utils.add_arc_from_to(ot, p_8, net_con)
|
||||
petri_utils.add_arc_from_to(p_8, t_10, net_con)
|
||||
petri_utils.add_arc_from_to(t_10, p_11, net_con)
|
||||
petri_utils.add_arc_from_to(p_11, t_13, net_con)
|
||||
petri_utils.add_arc_from_to(t_13, p_12, net_con)
|
||||
petri_utils.add_arc_from_to(p_8, t_7, net_con)
|
||||
petri_utils.add_arc_from_to(t_7, p_9, net_con)
|
||||
petri_utils.add_arc_from_to(p_9, op, net_con)
|
||||
petri_utils.add_arc_from_to(op, p_10, net_con)
|
||||
petri_utils.add_arc_from_to(p_10, t_11, net_con)
|
||||
petri_utils.add_arc_from_to(p_10, t_12, net_con)
|
||||
petri_utils.add_arc_from_to(t_12, p_9, net_con)
|
||||
petri_utils.add_arc_from_to(t_11, p_11, net_con)
|
||||
petri_utils.add_arc_from_to(p_11, t_14, net_con)
|
||||
petri_utils.add_arc_from_to(t_14, p_7, net_con)
|
||||
|
||||
# Add tokens
|
||||
initial_marking = Marking()
|
||||
initial_marking[source] = 1
|
||||
final_marking = Marking()
|
||||
final_marking[sink] = 1
|
||||
|
||||
pm4py.view_petri_net(net_con, initial_marking, final_marking)
|
||||
pm4py.write_pnml(net_con, initial_marking, final_marking, "results/haum/conformative_petrinet_con.pnml")
|
||||
|
||||
pm4py.vis.save_vis_petri_net(net_con, initial_marking, final_marking,
|
||||
"results/processmaps/conformative_petrinet_con.png")
|
||||
|
||||
bpmn = pm4py.convert.convert_to_bpmn(net_con, initial_marking, final_marking)
|
||||
pm4py.view_bpmn(bpmn)
|
||||
|
||||
pm4py.vis.save_vis_bpmn(bpmn, "results/processmaps/conformative_bpmn_con.png")
|
||||
pm4py.vis.save_vis_bpmn(bpmn, "results/processmaps/normative_bpmn.png")
|
||||
|
@ -1,25 +1,24 @@
|
||||
# 04_conformance-checking.py
|
||||
#
|
||||
# content: (1) Load data and create event log
|
||||
# (2) Infos for items
|
||||
# (2) Check against normative Petri Net
|
||||
#
|
||||
# input: results/haum/event_logfiles_2024-02-21_16-07-33.csv
|
||||
# results/haum/conformative_petrinet_con.pnml
|
||||
# output: results/processmaps/dfg_complete_python.png
|
||||
# results/eval_all-miners_complete.csv
|
||||
# input: results/event_logfiles_2024-02-21_16-07-33.csv
|
||||
# results/normative_petrinet.pnml
|
||||
# output: results/eval_all-miners_complete.csv
|
||||
# results/eval_all-miners_clean.csv
|
||||
# results/processmaps/petrinet_conformative.png
|
||||
# results/processmaps/petrinet_heuristics_clean.png
|
||||
# results/processmaps/petrinet_alpha_clean.png
|
||||
# results/processmaps/petrinet_inductive_clean.png
|
||||
# results/processmaps/petrinet_ilp_clean.png
|
||||
# results/processmaps/bpmn_conformative.png
|
||||
# results/processmaps/bpmn_inductive_clean.png
|
||||
# results/processmaps/bpmn_ilp_clean.png
|
||||
# results/processmaps/bpmn_alpha_clean.png
|
||||
# results/processmaps/bpmn_heuristics_clean.png
|
||||
# ../../thesis/figures/petrinet_normative.png
|
||||
# ../../thesis/figures/petrinet_heuristics_clean.png
|
||||
# ../../thesis/figures/petrinet_alpha_clean.png
|
||||
# ../../thesis/figures/petrinet_inductive_clean.png
|
||||
# ../../thesis/figures/petrinet_ilp_clean.png
|
||||
# ../../thesis/figures/bpmn_normative.png
|
||||
# ../../thesis/figures/bpmn_inductive_clean.png
|
||||
# ../../thesis/figures/bpmn_ilp_clean.png
|
||||
# ../../thesis/figures/bpmn_alpha_clean.png
|
||||
# ../../thesis/figures/bpmn_heuristics_clean.png
|
||||
#
|
||||
# last mod: 2024-03-06
|
||||
# last mod: 2024-03-22
|
||||
|
||||
import pm4py
|
||||
import pandas as pd
|
||||
@ -29,13 +28,13 @@ from python_helpers import eval_pm, pn_infos_miner
|
||||
|
||||
#--------------- (1) Load data and create event logs ---------------
|
||||
|
||||
dat = pd.read_csv("results/haum/event_logfiles_2024-02-21_16-07-33.csv", sep = ";")
|
||||
dat = pd.read_csv("results/event_logfiles_2024-02-21_16-07-33.csv", sep = ";")
|
||||
|
||||
event_log = pm4py.format_dataframe(dat, case_id = "path",
|
||||
activity_key = "event",
|
||||
timestamp_key = "date.start")
|
||||
|
||||
###### Descriptives of log data ######
|
||||
## Descriptives of log data
|
||||
|
||||
# Distribution of events
|
||||
event_log.event.value_counts()
|
||||
@ -57,9 +56,9 @@ len(variants_no_move)
|
||||
sorted_variants_no_move = dict(sorted(variants_no_move.items(), key=lambda item: item[1], reverse = True))
|
||||
{k: sorted_variants_no_move[k] for k in list(sorted_variants_no_move)[:20]}
|
||||
|
||||
###### Check against "conformative" Petri Net ######
|
||||
#--------------- (2) Check against normative Petri Net ---------------
|
||||
|
||||
basenet, initial_marking, final_marking = pm4py.read_pnml("results/haum/conformative_petrinet_con.pnml")
|
||||
basenet, initial_marking, final_marking = pm4py.read_pnml("results/normative_petrinet.pnml")
|
||||
|
||||
# TBR
|
||||
replayed_traces = pm4py.conformance_diagnostics_token_based_replay(event_log, basenet, initial_marking, final_marking)
|
||||
@ -93,23 +92,13 @@ event_log[event_log["@@case_index"] == index_broken[0]].item.unique().tolist()
|
||||
event_log[event_log["@@case_index"] == index_broken[0]]["fileId.start"].unique().tolist()
|
||||
# --> logging error in raw file
|
||||
|
||||
## Footprints
|
||||
from pm4py.algo.discovery.footprints import algorithm as footprints_discovery
|
||||
from pm4py.visualization.footprints import visualizer as fp_visualizer
|
||||
fp_log = footprints_discovery.apply(event_log, variant=footprints_discovery.Variants.ENTIRE_EVENT_LOG)
|
||||
fp_net = footprints_discovery.apply(basenet, initial_marking, final_marking)
|
||||
gviz = fp_visualizer.apply(fp_net, parameters={fp_visualizer.Variants.SINGLE.value.Parameters.FORMAT: "svg"})
|
||||
fp_visualizer.view(gviz)
|
||||
|
||||
efg_graph = pm4py.discover_eventually_follows_graph(event_log)
|
||||
|
||||
## Fitting different miners
|
||||
|
||||
eval = pd.DataFrame(columns = ["fitness", "precision", "generalizability",
|
||||
"simplicity", "sound", "narcs", "ntrans",
|
||||
"nplaces", "nvariants", "mostfreq"])
|
||||
|
||||
for miner in ["conformative", "alpha", "heuristics", "inductive", "ilp"]:
|
||||
for miner in ["normative", "alpha", "heuristics", "inductive", "ilp"]:
|
||||
eval = pd.concat([eval, pn_infos_miner(event_log, miner)])
|
||||
|
||||
eval.to_csv("results/eval_all-miners_complete.csv", sep = ";")
|
||||
@ -121,7 +110,7 @@ eval_clean = pd.DataFrame(columns = ["fitness", "precision", "generalizability",
|
||||
"simplicity", "sound", "narcs", "ntrans",
|
||||
"nplaces", "nvariants", "mostfreq"])
|
||||
|
||||
for miner in ["conformative", "alpha", "heuristics", "inductive", "ilp"]:
|
||||
for miner in ["normative", "alpha", "heuristics", "inductive", "ilp"]:
|
||||
eval_clean = pd.concat([eval_clean, pn_infos_miner(event_log_clean, miner)])
|
||||
|
||||
eval_clean.to_csv("results/eval_all-miners_clean.csv", sep = ";")
|
||||
@ -129,28 +118,27 @@ eval_clean.to_csv("results/eval_all-miners_clean.csv", sep = ";")
|
||||
## Directly-follows graph
|
||||
dfg, start_activities, end_activities = pm4py.discover_dfg(event_log_clean)
|
||||
pm4py.view_dfg(dfg, start_activities, end_activities)
|
||||
pm4py.save_vis_dfg(dfg, start_activities, end_activities, "results/processmaps/dfg_complete_python.png")
|
||||
|
||||
## Export petri nets
|
||||
pm4py.vis.save_vis_petri_net(basenet, initial_marking, final_marking, "results/processmaps/petrinet_conformative.png")
|
||||
pm4py.vis.save_vis_petri_net(basenet, initial_marking, final_marking,
|
||||
"../../thesis/figures/petrinet_normative.png")
|
||||
h_net, h_im, h_fm = pm4py.discover_petri_net_heuristics(event_log_clean)
|
||||
pm4py.vis.save_vis_petri_net(h_net, h_im, h_fm, "results/processmaps/petrinet_heuristics_clean.png")
|
||||
pm4py.vis.save_vis_petri_net(h_net, h_im, h_fm, "../../thesis/figures/petrinet_heuristics_clean.png")
|
||||
a_net, a_im, a_fm = pm4py.discover_petri_net_alpha(event_log_clean)
|
||||
pm4py.vis.save_vis_petri_net(a_net, a_im, a_fm, "results/processmaps/petrinet_alpha_clean.png")
|
||||
pm4py.vis.save_vis_petri_net(a_net, a_im, a_fm, "../../thesis/figures/petrinet_alpha_clean.png")
|
||||
i_net, i_im, i_fm = pm4py.discover_petri_net_inductive(event_log_clean)
|
||||
pm4py.vis.save_vis_petri_net(i_net, i_im, i_fm, "results/processmaps/petrinet_inductive_clean.png")
|
||||
pm4py.vis.save_vis_petri_net(i_net, i_im, i_fm, "../../thesis/figures/petrinet_inductive_clean.png")
|
||||
ilp_net, ilp_im, ilp_fm = pm4py.discover_petri_net_ilp(event_log_clean)
|
||||
pm4py.vis.save_vis_petri_net(ilp_net, ilp_im, ilp_fm, "results/processmaps/petrinet_ilp_clean.png")
|
||||
pm4py.vis.save_vis_petri_net(ilp_net, ilp_im, ilp_fm, "../../thesis/figures/petrinet_ilp_clean.png")
|
||||
|
||||
# convert to BPMN
|
||||
base_bpmn = pm4py.convert.convert_to_bpmn(basenet, initial_marking, final_marking)
|
||||
pm4py.vis.save_vis_bpmn(base_bpmn, "results/processmaps/bpmn_conformative.png")
|
||||
pm4py.vis.save_vis_bpmn(base_bpmn, "../../thesis/figures/bpmn_normative.png")
|
||||
i_bpmn = pm4py.convert.convert_to_bpmn(i_net, i_im, i_fm)
|
||||
pm4py.vis.save_vis_bpmn(i_bpmn, "results/processmaps/bpmn_inductive_clean.png")
|
||||
pm4py.vis.save_vis_bpmn(i_bpmn, "../../thesis/figures/bpmn_inductive_clean.png")
|
||||
ilp_bpmn = pm4py.convert.convert_to_bpmn(ilp_net, ilp_im, ilp_fm)
|
||||
pm4py.vis.save_vis_bpmn(ilp_bpmn, "results/processmaps/bpmn_ilp_clean.png")
|
||||
pm4py.vis.save_vis_bpmn(ilp_bpmn, "../../thesis/figures/bpmn_ilp_clean.png")
|
||||
a_bpmn = pm4py.convert.convert_to_bpmn(a_net, a_im, a_fm)
|
||||
pm4py.vis.save_vis_bpmn(a_bpmn, "results/processmaps/bpmn_alpha_clean.png")
|
||||
pm4py.vis.save_vis_bpmn(a_bpmn, "../../thesis/figures/bpmn_alpha_clean.png")
|
||||
h_bpmn = pm4py.convert.convert_to_bpmn(h_net, h_im, h_fm)
|
||||
pm4py.vis.save_vis_bpmn(h_bpmn, "results/processmaps/bpmn_heuristics_clean.png")
|
||||
|
||||
pm4py.vis.save_vis_bpmn(h_bpmn, "../../thesis/figures/bpmn_heuristics_clean.png")
|
||||
|
@ -5,22 +5,23 @@
|
||||
# (3) DFG for complete data
|
||||
# (4) Export data frame for analyses
|
||||
#
|
||||
# input: results/haum/event_logfiles_2024-02-21_16-07-33.csv
|
||||
# results/haum/raw_logfiles_2024-02-21_16-07-33.csv
|
||||
# output: results/haum/eventlogs_pre-corona_cleaned.RData
|
||||
# results/haum/eventlogs_pre-corona_cleaned.csv
|
||||
# input: results/event_logfiles_2024-02-21_16-07-33.csv
|
||||
# results/raw_logfiles_2024-02-21_16-07-33.csv
|
||||
# output: results/eventlogs_pre-corona_cleaned.RData
|
||||
# results/eventlogs_pre-corona_cleaned.csv
|
||||
# ../../thesis/figures/dfg_complete_WFnet_R.pdf
|
||||
#
|
||||
# last mod: 2024-03-06
|
||||
# last mod: 2024-03-23
|
||||
|
||||
# setwd("C:/Users/nwickelmaier/Nextcloud/Documents/MDS/2023ss/60100_master_thesis/analysis/code")
|
||||
|
||||
#--------------- (1) Look at broken trace ---------------
|
||||
|
||||
datraw <- read.table("results/haum/raw_logfiles_2024-02-21_16-07-33.csv",
|
||||
header = TRUE, sep = ";")
|
||||
datraw <- read.table("results/raw_logfiles_2024-02-21_16-07-33.csv",
|
||||
header = TRUE, sep = ";")
|
||||
|
||||
|
||||
datlogs <- read.table("results/haum/event_logfiles_2024-02-21_16-07-33.csv",
|
||||
datlogs <- read.table("results/event_logfiles_2024-02-21_16-07-33.csv",
|
||||
colClasses = c("character", "character", "POSIXct",
|
||||
"POSIXct", "character", "integer",
|
||||
"numeric", "character", "character",
|
||||
@ -84,7 +85,7 @@ dfg <- processmapR::process_map(alog,
|
||||
render = FALSE)
|
||||
|
||||
processmapR::export_map(dfg,
|
||||
file_name = paste0("results/processmaps/dfg_complete_R.pdf"),
|
||||
file_name = paste0("../../thesis/figures/dfg_complete_WFnet_R.pdf"),
|
||||
file_type = "pdf")
|
||||
|
||||
rm(tmp)
|
||||
@ -109,10 +110,10 @@ dat <- datlogs[as.Date(datlogs$date.start) < "2020-03-13", ]
|
||||
# Remove corrupt trace
|
||||
dat <- dat[dat$path != 106098, ]
|
||||
|
||||
save(dat, file = "results/haum/eventlogs_pre-corona_cleaned.RData")
|
||||
save(dat, file = "results/eventlogs_pre-corona_cleaned.RData")
|
||||
|
||||
write.table(dat,
|
||||
file = "results/haum/eventlogs_pre-corona_cleaned.csv",
|
||||
file = "results/eventlogs_pre-corona_cleaned.csv",
|
||||
sep = ";",
|
||||
quote = FALSE,
|
||||
row.names = FALSE)
|
||||
|
@ -3,10 +3,10 @@
|
||||
# content: (1) Load data and create event log
|
||||
# (2) Infos for items
|
||||
#
|
||||
# input: results/haum/eventlogs_pre-corona_cleaned.csv
|
||||
# output: results/haum/pn_infos_items.csv
|
||||
# input: results/eventlogs_pre-corona_cleaned.csv
|
||||
# output: results/pn_infos_items.csv
|
||||
#
|
||||
# last mod: 2024-03-06
|
||||
# last mod: 2024-03-22
|
||||
|
||||
import pm4py
|
||||
import pandas as pd
|
||||
@ -16,7 +16,7 @@ from python_helpers import eval_pm, pn_infos
|
||||
|
||||
#--------------- (1) Load data and create event logs ---------------
|
||||
|
||||
dat = pd.read_csv("results/haum/eventlogs_pre-corona_cleaned", sep = ";")
|
||||
dat = pd.read_csv("results/eventlogs_pre-corona_cleaned", sep = ";")
|
||||
|
||||
log_path = pm4py.format_dataframe(dat, case_id = "path", activity_key = "event",
|
||||
timestamp_key = "date.start")
|
||||
@ -33,5 +33,5 @@ for item in log_path.item.unique().tolist():
|
||||
eval = eval.sort_index()
|
||||
|
||||
# Export
|
||||
eval.to_csv("results/haum/pn_infos_items.csv", sep = ";")
|
||||
eval.to_csv("results/pn_infos_items.csv", sep = ";")
|
||||
|
||||
|
@ -7,15 +7,12 @@
|
||||
# (2) Clustering
|
||||
# (3) Visualization with pictures
|
||||
#
|
||||
# input: results/haum/eventlogs_pre-corona_cleaned.RData
|
||||
# results/haum/pn_infos_items.csv
|
||||
# output: results/haum/eventlogs_pre-corona_item-clusters.csv
|
||||
# results/figures/dendrogram_items.pdf
|
||||
# results/figures/clustering_items.pdf
|
||||
# results/figures/clustering_artworks.pdf
|
||||
# results/figures/clustering_artworks.png
|
||||
# input: results/eventlogs_pre-corona_cleaned.RData
|
||||
# results/pn_infos_items.csv
|
||||
# output: results/eventlogs_pre-corona_item-clusters.csv
|
||||
# ../../thesis/figures/data/clustering_items.RData"
|
||||
#
|
||||
# last mod: 2024-03-08
|
||||
# last mod: 2024-03-22
|
||||
|
||||
# setwd("C:/Users/nwickelmaier/Nextcloud/Documents/MDS/2023ss/60100_master_thesis/analysis/code")
|
||||
|
||||
@ -25,11 +22,11 @@ source("R_helpers.R")
|
||||
|
||||
#--------------- (1.1) Read log event data ---------------
|
||||
|
||||
load("results/haum/eventlogs_pre-corona_cleaned.RData")
|
||||
load("results/eventlogs_pre-corona_cleaned.RData")
|
||||
|
||||
#--------------- (1.2) Read infos for PM for items ---------------
|
||||
|
||||
datitem <- read.table("results/haum/pn_infos_items.csv", header = TRUE,
|
||||
datitem <- read.table("results/pn_infos_items.csv", header = TRUE,
|
||||
sep = ";", row.names = 1)
|
||||
|
||||
#--------------- (1.3) Extract additional infos for clustering ---------------
|
||||
@ -96,9 +93,6 @@ mycols <- c("#434F4F", "#78004B", "#FF6900", "#3CB4DC", "#91C86E", "Black")
|
||||
|
||||
cluster <- cutree(hc, k = k)
|
||||
|
||||
pdf("results/figures/dendrogram_items.pdf", width = 6.5, height = 5.5, pointsize = 10)
|
||||
# TODO: Move code for plots to /thesis/
|
||||
|
||||
factoextra::fviz_dend(hc, k = k,
|
||||
cex = 0.5,
|
||||
k_colors = mycols,
|
||||
@ -109,10 +103,6 @@ factoextra::fviz_dend(hc, k = k,
|
||||
#ggtheme = ggplot2::theme_bw()
|
||||
)
|
||||
|
||||
dev.off()
|
||||
|
||||
pdf("results/figures/clustering_items.pdf", width = 6.5, height = 5.5, pointsize = 10)
|
||||
|
||||
factoextra::fviz_cluster(list(data = df, cluster = cluster),
|
||||
palette = mycols,
|
||||
ellipse.type = "convex",
|
||||
@ -121,8 +111,6 @@ factoextra::fviz_cluster(list(data = df, cluster = cluster),
|
||||
main = "",
|
||||
ggtheme = ggplot2::theme_bw())
|
||||
|
||||
dev.off()
|
||||
|
||||
aggregate(cbind(precision, generalizability, nvariants, duration, distance,
|
||||
scaleSize , rotationDegree, npaths, ncases, nmoves,
|
||||
nflipCard, nopenTopic, nopenPopup) ~ cluster, datitem,
|
||||
@ -138,18 +126,6 @@ item <- sprintf("%03d", as.numeric(gsub("item_([0-9]{3})", "\\1",
|
||||
res <- merge(dat, data.frame(item, cluster), by = "item", all.x = TRUE)
|
||||
res <- res[order(res$fileId.start, res$date.start, res$timeMs.start), ]
|
||||
|
||||
# Look at clusters
|
||||
par(mfrow = c(2,2))
|
||||
vioplot::vioplot(duration ~ cluster, res)
|
||||
vioplot::vioplot(distance ~ cluster, res)
|
||||
vioplot::vioplot(scaleSize ~ cluster, res)
|
||||
vioplot::vioplot(rotationDegree ~ cluster, res)
|
||||
|
||||
write.table(res,
|
||||
file = "results/haum/eventlogs_pre-corona_item-clusters.csv",
|
||||
sep = ";",
|
||||
quote = FALSE,
|
||||
row.names = FALSE)
|
||||
|
||||
# DFGs for clusters
|
||||
res$start <- res$date.start
|
||||
@ -162,64 +138,31 @@ for (clst in sort(unique(res$cluster))) {
|
||||
activity_id = "event",
|
||||
resource_id = "item",
|
||||
timestamps = c("start", "complete"))
|
||||
|
||||
dfg <- processmapR::process_map(alog,
|
||||
|
||||
processmapR::process_map(alog,
|
||||
type_nodes = processmapR::frequency("relative", color_scale = "Greys"),
|
||||
sec_nodes = processmapR::frequency("absolute"),
|
||||
type_edges = processmapR::frequency("relative", color_edges = "#FF6900"),
|
||||
sec_edges = processmapR::frequency("absolute"),
|
||||
rankdir = "LR",
|
||||
render = FALSE)
|
||||
|
||||
processmapR::export_map(dfg,
|
||||
file_name = paste0("results/processmaps/dfg_cluster", clst, "_R.pdf"),
|
||||
file_type = "pdf",
|
||||
title = paste("DFG Cluster", clst))
|
||||
rankdir = "LR")
|
||||
}
|
||||
|
||||
#--------------- (3) Visualization with pictures ---------------
|
||||
|
||||
coor_2d <- cmdscale(dist_mat, k = 2)
|
||||
# Look at clusters
|
||||
par(mfrow = c(2,2))
|
||||
vioplot::vioplot(duration ~ cluster, res)
|
||||
vioplot::vioplot(distance ~ cluster, res)
|
||||
vioplot::vioplot(scaleSize ~ cluster, res)
|
||||
vioplot::vioplot(rotationDegree ~ cluster, res)
|
||||
|
||||
items <- sprintf("%03d", as.numeric(rownames(datitem)))
|
||||
write.table(res,
|
||||
file = "results/eventlogs_pre-corona_item-clusters.csv",
|
||||
sep = ";",
|
||||
quote = FALSE,
|
||||
row.names = FALSE)
|
||||
|
||||
pdf("results/figures/clustering_artworks.pdf", height = 8, width = 8, pointsize = 16)
|
||||
#png("results/figures/clustering_artworks.png", units = "in", height = 8, width = 8, pointsize = 16, res = 300)
|
||||
# Save data for plots and tables
|
||||
|
||||
par(mai = c(.6,.6,.1,.1), mgp = c(2.4, 1, 0))
|
||||
|
||||
plot(coor_2d, type = "n", ylim = c(-3.7, 2.6), xlim = c(-5, 10.5),
|
||||
xlab = "", ylab = "")
|
||||
|
||||
for (item in items) {
|
||||
|
||||
if (item == "125") {
|
||||
|
||||
pic <- jpeg::readJPEG(paste0("../data/haum/ContentEyevisit/eyevisit_cards_light/",
|
||||
item, "/", item, ".jpg"))
|
||||
} else {
|
||||
pic <- png::readPNG(paste0("../data/haum/ContentEyevisit/eyevisit_cards_light/",
|
||||
item, "/", item, ".png"))
|
||||
}
|
||||
|
||||
img <- as.raster(pic[,,1:3])
|
||||
|
||||
x <- coor_2d[items == item, 1]
|
||||
y <- coor_2d[items == item, 2]
|
||||
|
||||
points(x, y,
|
||||
col = mycols[cluster[items == item]],
|
||||
cex = 6,
|
||||
pch = 15)
|
||||
|
||||
rasterImage(img,
|
||||
xleft = x - .45,
|
||||
xright = x + .45,
|
||||
ybottom = y - .2,
|
||||
ytop = y + .2)
|
||||
|
||||
}
|
||||
legend("topright", paste("Cluster", 1:k), col = mycols, pch = 15, bty = "n")
|
||||
|
||||
dev.off()
|
||||
save(hc, k, res, dist_mat, datitem, df,
|
||||
file = "../../thesis/figures/data/clustering_items.RData")
|
||||
|
||||
|
@ -5,10 +5,11 @@
|
||||
# (3) Select features for navigation behavior
|
||||
# (4) Export data frames
|
||||
#
|
||||
# input: results/haum/event_logfiles_2024-02-21_16-07-33.csv
|
||||
# output: results/haum/eventlogs_pre-corona_case-clusters.csv
|
||||
# input: results/event_logfiles_2024-02-21_16-07-33.csv
|
||||
# output: results/dataframes_case_2019.RData
|
||||
# results/centrality_cases.RData
|
||||
#
|
||||
# last mod: 2024-03-08
|
||||
# last mod: 2024-03-22
|
||||
|
||||
# setwd("C:/Users/nwickelmaier/Nextcloud/Documents/MDS/2023ss/60100_master_thesis/analysis/code")
|
||||
|
||||
@ -16,7 +17,7 @@ source("R_helpers.R")
|
||||
|
||||
#--------------- (1) Read data ---------------
|
||||
|
||||
load("results/haum/eventlogs_pre-corona_cleaned.RData")
|
||||
load("results/eventlogs_pre-corona_cleaned.RData")
|
||||
|
||||
# Select one year to handle number of cases
|
||||
dat <- dat[as.Date(dat$date.start) > "2018-12-31" &
|
||||
@ -135,8 +136,8 @@ dattree <- data.frame(case = datcase$case,
|
||||
)
|
||||
|
||||
# centrality <- pbapply::pbsapply(dattree$case, get_centrality, data = dat)
|
||||
# save(centrality, file = "results/haum/tmp_centrality.RData")
|
||||
load("results/haum/tmp_centrality.RData")
|
||||
# save(centrality, file = "results/centrality_cases.RData")
|
||||
load("results/centrality_cases.RData")
|
||||
|
||||
dattree$BetweenCentrality <- centrality
|
||||
|
||||
@ -171,17 +172,5 @@ dattree$AvDurItemNorm <- normalize(dattree$AvDurItem)
|
||||
|
||||
#--------------- (4) Export data frames ---------------
|
||||
|
||||
save(dat, datcase, dattree, file = "results/haum/dataframes_case_2019.RData")
|
||||
|
||||
write.table(datcase,
|
||||
file = "results/haum/datcase.csv",
|
||||
sep = ";",
|
||||
quote = FALSE,
|
||||
row.names = FALSE)
|
||||
|
||||
write.table(datcase,
|
||||
file = "results/haum/dattree.csv",
|
||||
sep = ";",
|
||||
quote = FALSE,
|
||||
row.names = FALSE)
|
||||
save(dat, datcase, dattree, file = "results/dataframes_case_2019.RData")
|
||||
|
@ -1,45 +0,0 @@
|
||||
# 08_infos-clusters.py
|
||||
#
|
||||
# content: (1) Load data and create event log
|
||||
# (2) Infos for clusters
|
||||
# (3) Process maps for clusters
|
||||
#
|
||||
# input: results/haum/eventlogs_pre-corona_item-clusters.csv
|
||||
# output: results/haum/pn_infos_clusters.csv
|
||||
#
|
||||
# last mod: 2024-03-06
|
||||
|
||||
import pm4py
|
||||
import pandas as pd
|
||||
|
||||
from python_helpers import eval_pm, pn_infos
|
||||
|
||||
#--------------- (1) Load data and create event logs ---------------
|
||||
|
||||
dat = pd.read_csv("results/haum/eventlogs_pre-corona_item-clusters.csv", sep = ";")
|
||||
|
||||
log_path = pm4py.format_dataframe(dat, case_id = "path", activity_key = "event",
|
||||
timestamp_key = "date.start")
|
||||
|
||||
#--------------- (2) Infos for clusters ---------------
|
||||
|
||||
# Merge clusters into data frame
|
||||
eval = pd.DataFrame(columns = ["fitness", "precision", "generalizability",
|
||||
"simplicity", "sound", "narcs", "ntrans",
|
||||
"nplaces", "nvariants", "mostfreq"])
|
||||
for cluster in log_path.grp.unique().tolist():
|
||||
eval = pd.concat([eval, pn_infos(log_path, "grp", cluster)])
|
||||
eval = eval.sort_index()
|
||||
|
||||
eval.to_csv("results/haum/pn_infos_clusters.csv", sep = ";")
|
||||
|
||||
#--------------- (3) Process maps for clusters ---------------
|
||||
|
||||
for cluster in log_path.grp.unique().tolist():
|
||||
subdata = log_path[log_path.grp == cluster]
|
||||
subnet, subim, subfm = pm4py.discover_petri_net_inductive(subdata, noise_threshold=0.5)
|
||||
pm4py.save_vis_petri_net(subnet, subim, subfm,
|
||||
"results/processmaps/petrinet_cluster" + str(cluster).zfill(3) + ".png")
|
||||
bpmn = pm4py.convert.convert_to_bpmn(subnet, subim, subfm)
|
||||
pm4py.vis.save_vis_bpmn(bpmn, "results/processmaps/bpmn_cluster_" +
|
||||
str(cluster).zfill(3) + ".png")
|
@ -4,18 +4,18 @@
|
||||
# (2) Clustering
|
||||
# (3) Fit tree
|
||||
#
|
||||
# input: results/haum/dataframes_case_2019.RData
|
||||
# output: results/haum/eventlogs_2019_case-clusters.csv
|
||||
# results/haum/tmp_user-navigation.RData
|
||||
# input: results/dataframes_case_2019.RData
|
||||
# output: results/eventlogs_2019_case-clusters.csv
|
||||
# results/user-navigation.RData
|
||||
# ../../thesis/figures/data/clustering_cases.RData
|
||||
#
|
||||
# last mod: 2024-03-15
|
||||
# last mod: 2024-03-22
|
||||
|
||||
# setwd("C:/Users/nwickelmaier/Nextcloud/Documents/MDS/2023ss/60100_master_thesis/analysis/code")
|
||||
|
||||
#--------------- (1) Load data ---------------
|
||||
|
||||
load("results/haum/dataframes_case_2019.RData")
|
||||
load("results/dataframes_case_2019.RData")
|
||||
|
||||
#--------------- (2) Clustering ---------------
|
||||
|
||||
@ -119,13 +119,13 @@ aggregate(cbind(duration, distance, scaleSize, rotationDegree) ~ cluster, res, m
|
||||
aggregate(cbind(duration, distance, scaleSize, rotationDegree) ~ cluster, res, median)
|
||||
|
||||
write.table(res,
|
||||
file = "results/haum/eventlogs_2019_case-clusters.csv",
|
||||
file = "results/eventlogs_2019_case-clusters.csv",
|
||||
sep = ";",
|
||||
quote = FALSE,
|
||||
row.names = FALSE)
|
||||
|
||||
save(res, dist_mat, hcs, acs, coor_2d, coor_3d,
|
||||
file = "results/haum/tmp_user-navigation.RData")
|
||||
file = "results/user-navigation.RData")
|
||||
|
||||
save(coor_2d, coor_3d, cluster, dattree,
|
||||
file = "../../thesis/figures/data/clustering_cases.RData")
|
@ -6,10 +6,10 @@
|
||||
# (4) Clustering
|
||||
# (5) Fit tree
|
||||
#
|
||||
# input: results/haum/event_logfiles_2024-02-21_16-07-33.csv
|
||||
# output: results/haum/eventlogs_pre-corona_case-clusters.csv
|
||||
# input: results/event_logfiles_2024-02-21_16-07-33.csv
|
||||
# output: --
|
||||
#
|
||||
# last mod: 2024-03-15
|
||||
# last mod: 2024-03-22
|
||||
|
||||
# setwd("C:/Users/nwickelmaier/Nextcloud/Documents/MDS/2023ss/60100_master_thesis/analysis/code")
|
||||
|
||||
@ -17,7 +17,7 @@ source("R_helpers.R")
|
||||
|
||||
#--------------- (1) Read data ---------------
|
||||
|
||||
load("results/haum/eventlogs_pre-corona_cleaned.RData")
|
||||
load("results/eventlogs_pre-corona_cleaned.RData")
|
||||
|
||||
# Select one year to handle number of cases
|
||||
dat <- dat[as.Date(dat$date.start) > "2017-12-31" &
|
||||
@ -77,8 +77,8 @@ dattree18 <- data.frame(case = datcase18$case,
|
||||
)
|
||||
|
||||
# centrality <- pbapply::pbsapply(dattree18$case, get_centrality, data = dat)
|
||||
# save(centrality, file = "results/haum/tmp_centrality_2018.RData")
|
||||
load("results/haum/tmp_centrality_2018.RData")
|
||||
# save(centrality, file = "results/centrality_2018.RData")
|
||||
load("results/centrality_2018.RData")
|
||||
|
||||
dattree18$BetweenCentrality <- centrality
|
||||
|
@ -3,10 +3,11 @@
|
||||
# content: (1) Read data
|
||||
# (2) Investigate variants
|
||||
#
|
||||
# input: results/haum/eventlogs_pre-corona_case-clusters.RData
|
||||
# output:
|
||||
# input: results/eventlogs_pre-corona_cleaned.RData
|
||||
# output: ../../thesis/figures/freq-traces.pdf
|
||||
# ../../thesis/figures/freq-traces_powerlaw.pdf
|
||||
#
|
||||
# last mod: 2024-03-12
|
||||
# last mod: 2024-03-22
|
||||
|
||||
# setwd("C:/Users/nwickelmaier/Nextcloud/Documents/MDS/2023ss/60100_master_thesis/analysis/code")
|
||||
|
||||
@ -14,7 +15,7 @@ library(bupaverse)
|
||||
|
||||
#--------------- (1) Read data ---------------
|
||||
|
||||
load("results/haum/eventlogs_pre-corona_cleaned.RData")
|
||||
load("results/eventlogs_pre-corona_cleaned.RData")
|
||||
|
||||
#--------------- (2) Investigate variants ---------------
|
||||
|
||||
@ -27,7 +28,7 @@ alog <- activitylog(dat,
|
||||
resource_id = "path",
|
||||
timestamps = c("start", "complete"))
|
||||
|
||||
pdf("results/figures/freq-traces.pdf", height = 7, width = 6, pointsize = 10)
|
||||
pdf("../../thesis/figures/freq-traces.pdf", height = 7, width = 6, pointsize = 10)
|
||||
trace_explorer(alog, n_traces = 25)
|
||||
# --> sequences of artworks are just too rare
|
||||
dev.off()
|
||||
@ -54,7 +55,7 @@ y <- as.numeric(tab)
|
||||
p1 <- lm(log(y) ~ log(x))
|
||||
pre <- exp(coef(p1)[1]) * x^coef(p1)[2]
|
||||
|
||||
pdf("results/figures/freq-traces_powerlaw.pdf", height = 3.375,
|
||||
pdf("../../thesis/figures/freq-traces_powerlaw.pdf", height = 3.375,
|
||||
width = 3.375, pointsize = 10)
|
||||
par(mai = c(.6,.6,.1,.1), mgp = c(2.4, 1, 0))
|
||||
|
93
code/12_dfgs-case-clusters.R
Normal file
93
code/12_dfgs-case-clusters.R
Normal file
@ -0,0 +1,93 @@
|
||||
# 13_dfgs-case-clusters.R
|
||||
#
|
||||
# content: (1) Read data
|
||||
# (2) Export DFGs for clusters
|
||||
#
|
||||
# input: results/user-navigation.RData
|
||||
# output: ../../thesis/figures/dfg_cases_cluster1_R.pdf
|
||||
# ../../thesis/figures/dfg_cases_cluster2_R.pdf
|
||||
# ../../thesis/figures/dfg_cases_cluster3_R.pdf
|
||||
# ../../thesis/figures/dfg_cases_cluster4_R.pdf
|
||||
# ../../thesis/figures/dfg_cases_cluster5_R.pdf
|
||||
#
|
||||
# last mod: 2024-03-22
|
||||
|
||||
# setwd("C:/Users/nwickelmaier/Nextcloud/Documents/MDS/2023ss/60100_master_thesis/analysis/code")
|
||||
|
||||
#--------------- (1) Read data ---------------
|
||||
|
||||
load("results/user-navigation.RData")
|
||||
|
||||
dat <- res
|
||||
|
||||
dat$start <- as.POSIXct(dat$date.start)
|
||||
dat$complete <- as.POSIXct(dat$date.stop)
|
||||
|
||||
alog <- bupaR::activitylog(dat[dat$cluster == cluster, ],
|
||||
case_id = "case",
|
||||
activity_id = "item",
|
||||
resource_id = "path",
|
||||
timestamps = c("start", "complete"))
|
||||
|
||||
processmapR::trace_explorer(alog, n_traces = 25)
|
||||
|
||||
tr <- bupaR::traces(alog)
|
||||
tab <- table(tr$absolute_frequency)
|
||||
|
||||
tab[1] / nrow(tr)
|
||||
|
||||
alog |> edeaR::filter_infrequent_flows(min_n = 20) |> processmapR::process_map()
|
||||
|
||||
#--------------- (2) Export DFGs for clusters ---------------
|
||||
|
||||
mycols <- c("#3CB4DC", "#FF6900", "#78004B", "#91C86E", "#434F4F")
|
||||
cl_names <- c("Scanning", "Exploring", "Flitting", "Searching", "Info")
|
||||
|
||||
ns <- c(30, 20, 10, 5, 30)
|
||||
|
||||
for (i in 1:5) {
|
||||
|
||||
alog <- bupaR::activitylog(dat[dat$cluster == i, ],
|
||||
case_id = "case",
|
||||
activity_id = "item",
|
||||
resource_id = "path",
|
||||
timestamps = c("start", "complete"))
|
||||
|
||||
dfg <- processmapR::process_map(edeaR::filter_infrequent_flows(alog, min_n = ns[i]),
|
||||
type_nodes = processmapR::frequency("relative", color_scale = "Greys"),
|
||||
sec_nodes = processmapR::frequency("absolute"),
|
||||
type_edges = processmapR::frequency("relative", color_edges = mycols[i]),
|
||||
sec_edges = processmapR::frequency("absolute"),
|
||||
rankdir = "LR",
|
||||
render = FALSE)
|
||||
|
||||
processmapR::export_map(dfg,
|
||||
file_name = paste0("../../thesis/figures/dfg_cases_cluster", i, "_R.pdf"),
|
||||
file_type = "pdf",
|
||||
title = cl_names[i])
|
||||
}
|
||||
|
||||
# cluster 1: 50
|
||||
# cluster 2: 30 o. 20
|
||||
# cluster 3: 20 - 30
|
||||
# cluster 4: 5
|
||||
# cluster 5: 20
|
||||
|
||||
get_percent_variants <- function(log, cluster, min_n) {
|
||||
|
||||
alog <- bupaR::activitylog(log[log$cluster == cluster, ],
|
||||
case_id = "case",
|
||||
activity_id = "item",
|
||||
resource_id = "path",
|
||||
timestamps = c("start", "complete"))
|
||||
|
||||
nrow(edeaR::filter_infrequent_flows(alog, min_n = min_n)) /
|
||||
nrow(alog)
|
||||
}
|
||||
|
||||
perc <- numeric(5)
|
||||
|
||||
for (i in 1:5) {
|
||||
perc[i] <- get_percent_variants(log = dat, cluster = i, min_n = ns[i])
|
||||
}
|
||||
|
@ -1,46 +0,0 @@
|
||||
# 12_pm-case-clusters.py
|
||||
#
|
||||
# content: (1) Load data and create event log
|
||||
# (2) Infos for clusters
|
||||
# (3) Process maps for clusters
|
||||
#
|
||||
# input: results/haum/eventlogs_pre-corona_item-clusters.csv
|
||||
# output: results/haum/pn_infos_clusters.csv
|
||||
#
|
||||
# last mod: 2024-03-10
|
||||
|
||||
import pm4py
|
||||
import pandas as pd
|
||||
|
||||
from python_helpers import eval_pm, pn_infos
|
||||
|
||||
#--------------- (1) Load data and create event logs ---------------
|
||||
|
||||
dat = pd.read_csv("results/haum/eventlogs_2019_case-clusters_new.csv", sep = ";")
|
||||
|
||||
event_log = pm4py.format_dataframe(dat, case_id = "case", activity_key = "event_new",
|
||||
timestamp_key = "date.start")
|
||||
|
||||
#--------------- (2) Infos for clusters ---------------
|
||||
|
||||
# Merge clusters into data frame
|
||||
eval = pd.DataFrame(columns = ["fitness", "precision", "generalizability",
|
||||
"simplicity", "sound", "narcs", "ntrans",
|
||||
"nplaces", "nvariants", "mostfreq"])
|
||||
for cluster in event_log.cluster.unique().tolist():
|
||||
eval = pd.concat([eval, pn_infos(event_log, "cluster", cluster)])
|
||||
eval = eval.sort_index()
|
||||
|
||||
eval.to_csv("results/haum/pn_infos_clusters.csv", sep = ";")
|
||||
|
||||
#--------------- (3) Process maps for clusters ---------------
|
||||
|
||||
for cluster in event_log.cluster.unique().tolist():
|
||||
subdata = event_log[event_log.cluster == cluster]
|
||||
subnet, subim, subfm = pm4py.discover_petri_net_inductive(subdata, noise_threshold = .7)
|
||||
pm4py.save_vis_petri_net(subnet, subim, subfm,
|
||||
"results/processmaps/petrinet_cluster" + str(cluster) + "_cases.png")
|
||||
bpmn = pm4py.convert.convert_to_bpmn(subnet, subim, subfm)
|
||||
pm4py.vis.save_vis_bpmn(bpmn, "results/processmaps/bpmn_cluster" +
|
||||
str(cluster) + "_cases.png")
|
||||
|
@ -36,8 +36,8 @@ def pn_infos_miner(log, miner):
|
||||
net, im, fm = pm4py.discover_petri_net_ilp(log)
|
||||
elif miner == "inductive":
|
||||
net, im, fm = pm4py.discover_petri_net_inductive(log)
|
||||
elif miner == "conformative":
|
||||
net, im, fm = pm4py.read_pnml("results/haum/conformative_petrinet_con.pnml")
|
||||
elif miner == "normative":
|
||||
net, im, fm = pm4py.read_pnml("results/normative_petrinet.pnml")
|
||||
|
||||
eval = eval_append(log, net, im, fm)
|
||||
eval.index = [miner]
|
||||
|
Loading…
Reference in New Issue
Block a user