Compare commits
	
		
			10 Commits
		
	
	
		
			26f90a7fec
			...
			cba441f08b
		
	
	| Author | SHA1 | Date | |
|---|---|---|---|
| cba441f08b | |||
| 24c7967246 | |||
| 9ad5123747 | |||
| 4857f08577 | |||
| 43c7f34645 | |||
| b762968774 | |||
| c0b24a03aa | |||
| bdeb8fb718 | |||
| 6feea5a251 | |||
| b50f52dc6c | 
| @ -8,8 +8,8 @@ | ||||
| #         ../data/metadata/feiertage.csv | ||||
| #         ../data/metadata/schulferien_2016-2018_NI.csv | ||||
| #         ../data/metadata/schulferien_2019-2025_NI.csv | ||||
| # output: raw_logfiles_<timestamp>.csv | ||||
| #         event_logfiles_<timestamp>.csv | ||||
| # output: results/raw_logfiles_<timestamp>.csv | ||||
| #         results/event_logfiles_<timestamp>.csv | ||||
| # | ||||
| # last mod: 2024-02-23, NW | ||||
| 
 | ||||
| @ -29,12 +29,12 @@ folders <- dir(path) | ||||
| datraw <- parse_logfiles(folders, path) | ||||
| # 91 corrupt lines have been found and removed from the data set | ||||
| 
 | ||||
| # datraw <- read.table("results/haum/raw_logfiles_2023-10-25_16-20-45.csv", | ||||
| # datraw <- read.table("results/raw_logfiles_2023-10-25_16-20-45.csv", | ||||
| #                      sep = ";", header = TRUE) | ||||
| 
 | ||||
| ## Export data | ||||
| 
 | ||||
| write.table(datraw, paste0("results/haum/raw_logfiles_", now, ".csv"), | ||||
| write.table(datraw, paste0("results/raw_logfiles_", now, ".csv"), | ||||
|             sep = ";", row.names = FALSE) | ||||
| 
 | ||||
| #--------------- (2) Create event logs --------------- | ||||
| @ -131,6 +131,6 @@ dat2 <- dat2[order(dat2$fileId.start, dat2$date.start, dat2$timeMs.start), ] | ||||
| 
 | ||||
| ## Export data | ||||
| 
 | ||||
| write.table(dat2, paste0("results/haum/event_logfiles_", now, ".csv"), | ||||
| write.table(dat2, paste0("results/event_logfiles_", now, ".csv"), | ||||
|             sep = ";", row.names = FALSE) | ||||
| 
 | ||||
|  | ||||
| @ -9,16 +9,30 @@ | ||||
| #           (3.4) Artwork sequences | ||||
| #           (3.5) Topics | ||||
| # | ||||
| # input:  results/haum/event_logfiles_2024-02-21_16-07-33.csv | ||||
| # output: | ||||
| # input:  results/event_logfiles_2024-02-21_16-07-33.csv | ||||
| #         results/raw_logfiles_2024-02-21_16-07-33.csv | ||||
| # output: results/figures/counts_item.pdf | ||||
| #         results/figures/counts_item_firsttouch.pdf | ||||
| #         results/figures/duration.pdf | ||||
| #         results/figures/heatmap_start.pdf | ||||
| #         results/figures/heatmap_stop.pdf | ||||
| #         results/figures/cases_per_day.pdf | ||||
| #         results/figures/timeMs.pdf | ||||
| #         results/figures/xycoord.pdf | ||||
| #         results/figures/event-dist.pdf | ||||
| #         results/figures/traceexplore_trace-event.pdf | ||||
| #         results/figures/ra_trace-event.pdf | ||||
| #         results/figures/traceexplore_case-event.pdf | ||||
| #         results/figures/bp_tod.pdf | ||||
| #         results/figures/bp_wd.pdf | ||||
| #         results/figures/bp_wds.pdf | ||||
| #         results/figures/bp_corona.pdf | ||||
| #         results/figures/traceexplore_case-artwork_often080.pdf | ||||
| # | ||||
| # last mod: 2024-03-13 | ||||
| # last mod: 2024-03-22 | ||||
| 
 | ||||
| # setwd("C:/Users/nwickelmaier/Nextcloud/Documents/MDS/2023ss/60100_master_thesis/analysis/code") | ||||
| 
 | ||||
| library(lattice) | ||||
| library(bupaverse) | ||||
| 
 | ||||
| # Overall Research Question: How do museum visitors interact with the | ||||
| # artworks presented on the MTT? | ||||
| 
 | ||||
| @ -27,7 +41,7 @@ library(bupaverse) | ||||
| 
 | ||||
| #--------------- (1) Read data --------------- | ||||
| 
 | ||||
| datlogs <- read.table("results/haum/event_logfiles_2024-02-21_16-07-33.csv", | ||||
| datlogs <- read.table("results/event_logfiles_2024-02-21_16-07-33.csv", | ||||
|                       colClasses = c("character", "character", "POSIXct", | ||||
|                                      "POSIXct", "character", "integer", | ||||
|                                      "numeric", "character", "character", | ||||
| @ -40,7 +54,7 @@ datlogs$event <- factor(datlogs$event, levels = c("move", "flipCard", | ||||
|                                                   "openTopic", | ||||
|                                                   "openPopup")) | ||||
| 
 | ||||
| datraw <- read.table("results/haum/raw_logfiles_2024-02-21_16-07-33.csv", | ||||
| datraw <- read.table("results/raw_logfiles_2024-02-21_16-07-33.csv", | ||||
|                      sep = ";", header = TRUE) | ||||
| 
 | ||||
| # Add weekdays to data frame | ||||
| @ -74,43 +88,263 @@ lattice::dotplot(xtabs( ~ item + topic, datlogs), auto.key = TRUE) | ||||
| mat <- t(as.matrix(xtabs( ~ item + topic, datlogs))) | ||||
| mat[mat == 0] <- NA | ||||
| image(mat, axes = F, col = rainbow(100)) | ||||
| heatmap(t(mat)) | ||||
| 
 | ||||
| 
 | ||||
| datlogs$start <- datlogs$date.start | ||||
| datlogs$complete <- datlogs$date.stop | ||||
| 
 | ||||
| 
 | ||||
| 
 | ||||
| #--------------- (2) Descriptives --------------- | ||||
| 
 | ||||
| ### Which item gets touched most often? | ||||
| 
 | ||||
| counts_item <- table(datlogs$item) | ||||
| lattice::barchart(counts_item) | ||||
| 
 | ||||
| items <- unique(datlogs$item) | ||||
| #items <- items[!items %in% c("504", "505")] | ||||
| datart <- mtt::extract_artworks(items, | ||||
|                            paste0(items, ".xml"), | ||||
|                            "../data/haum/ContentEyevisit/eyevisit_cards_light/") | ||||
| datart <- datart[order(datart$artwork), ] | ||||
| names(counts_item) <- datart$title | ||||
| 
 | ||||
| pdf("results/figures/counts_item.pdf", width = 20, height = 10, pointsize = 10) | ||||
| par(mai = c(5, .6, .1, .1)) | ||||
| tmp <- barplot(counts_item, las = 2, ylim = c(0, 60000), | ||||
|                border = NA, col = "#434F4F") | ||||
| text(tmp, counts_item + 1000, datart$artwork) | ||||
| dev.off() | ||||
| 
 | ||||
| ### Which item gets touched most often first? | ||||
| 
 | ||||
| datcase <- datlogs[!duplicated(datlogs$case), ] | ||||
| counts_case <- table(datcase$item) | ||||
| names(counts_case) <- datart$title | ||||
| tmp <- barplot(counts_case, las = 2, border = "white") | ||||
| text(tmp, counts_case + 100, datart$item) | ||||
| 
 | ||||
| counts <- rbind(counts_item, counts_case) | ||||
| 
 | ||||
| pdf("results/figures/counts_item_firsttouch.pdf", | ||||
|     width = 20, height = 10, pointsize = 10) | ||||
| par(mai = c(5, .6, .1, .1)) | ||||
| 
 | ||||
| tmp <- barplot(counts, las = 2, border = NA, col = c("#434F4F", "#FF6900"), ylim = c(0, 65000)) | ||||
| text(tmp, counts_item + counts_case + 1000, datart$artwork) | ||||
| legend("topleft", c("Total interactions", "First interactions"), | ||||
|        col = c("#434F4F", "#FF6900"), pch = 15, bty = "n") | ||||
| dev.off() | ||||
| 
 | ||||
| ### Which teasers seem to work well? | ||||
| barplot(table(datlogs$topic), las = 2) | ||||
| 
 | ||||
| ### Dwell times/duration | ||||
| datagg <- aggregate(duration ~ event + item, datlogs, mean) | ||||
| datagg$ds <- datagg$duration / 1000   # in secs | ||||
| 
 | ||||
| lattice::bwplot(ds ~ event, datagg) | ||||
| 
 | ||||
| # without aggregation | ||||
| lattice::bwplot(duration / 1000 / 60 ~ event, datlogs) | ||||
| # in min | ||||
| 
 | ||||
| set.seed(1027) | ||||
| 
 | ||||
| pdf("results/figures/duration.pdf", width = 5, height = 5, pointsize = 10) | ||||
| lattice::bwplot(I(duration/1000/60) ~ event, datlogs[sample(nrow(datlogs), 100000), ], | ||||
|        ylab = "Duration in min") | ||||
| dev.off() | ||||
| 
 | ||||
| ### Move events | ||||
| 
 | ||||
| datmove <- aggregate(cbind(duration, scaleSize, rotationDegree, distance, x.start, | ||||
|                            y.start, x.stop, y.stop) ~ item, datlogs, | ||||
|                      mean) | ||||
| 
 | ||||
| hist(log(datlogs$scaleSize)) | ||||
| # --> better interpretable on logscale | ||||
| 
 | ||||
| plot(y.start ~ x.start, datmove, pch = 16, col = "gray") | ||||
| points(y.start ~ x.start, datmove, col = "#3CB4DC", cex = datmove$scaleSize) | ||||
| 
 | ||||
| 
 | ||||
| plot(y.start ~ x.start, datmove, type = "n", xlab = "x", ylab = "y", | ||||
|      xlim = c(0, 3840), ylim = c(0, 2160)) | ||||
| with(datmove, text(x.start, y.start, item, col = "gray", cex = 1.5)) | ||||
| with(datmove, | ||||
|      arrows(x.start, y.start, x.stop, y.stop, length = 0.07, lwd = 2) | ||||
|      ) | ||||
| abline(v = c(0, 3840), h = c(0, 2160), col = "#3CB4DC", lwd = 2) | ||||
| 
 | ||||
| datscale <- aggregate(scaleSize ~ item, datlogs, max) | ||||
| plot(y.start ~ x.start, datmove, pch = 16, col = "gray") | ||||
| points(y.start ~ x.start, datmove, col = "#3CB4DC", cex = datscale$scaleSize) | ||||
| 
 | ||||
| plot(y.start ~ x.start, datmove, type = "n", xlab = "x", ylab = "y", | ||||
|      xlim = c(0, 3840), ylim = c(0, 2160)) | ||||
| #with(datmove, text(x.stop, y.stop, item)) | ||||
| with(datmove, text(x.start, y.start, item)) | ||||
| 
 | ||||
| 
 | ||||
| ### Are there certain areas of the table that are touched most often? | ||||
| 
 | ||||
| # heatmap | ||||
| cuts <- 100 | ||||
| 
 | ||||
| datlogs$x.start.cat <- cut(datlogs$x.start, cuts) | ||||
| datlogs$y.start.cat <- cut(datlogs$y.start, cuts) | ||||
| 
 | ||||
| tab <- xtabs( ~ x.start.cat + y.start.cat, datlogs) | ||||
| 
 | ||||
| colnames(tab) <- paste0("c", 1:cuts) | ||||
| rownames(tab) <- paste0("c", 1:cuts) | ||||
| 
 | ||||
| heatmap(tab, Rowv = NA, Colv = NA) | ||||
| 
 | ||||
| 
 | ||||
| dattrim <- datlogs[datlogs$x.start < 3840 & | ||||
|                    datlogs$x.start > 0 & | ||||
|                    datlogs$y.start < 2160 & | ||||
|                    datlogs$y.start > 0 & | ||||
|                    datlogs$x.stop < 3840 & | ||||
|                    datlogs$x.stop > 0 & | ||||
|                    datlogs$y.stop < 2160 & | ||||
|                    datlogs$y.stop > 0, ] | ||||
| 
 | ||||
| cuts <- 100 # 200, 100, 70, ... | ||||
| 
 | ||||
| # start | ||||
| dattrim$x.start.cat <- cut(dattrim$x.start, cuts) | ||||
| dattrim$y.start.cat <- cut(dattrim$y.start, cuts) | ||||
| 
 | ||||
| tab.start <- xtabs( ~ x.start.cat + y.start.cat, dattrim) | ||||
| colnames(tab.start) <- NULL | ||||
| rownames(tab.start) <- NULL | ||||
| 
 | ||||
| pdf("results/figures/heatmap_start.pdf", width = 5, height = 5, pointsize = 10) | ||||
| heatmap(tab.start, Rowv = NA, Colv = NA) | ||||
| dev.off() | ||||
| 
 | ||||
| # stop | ||||
| dattrim$x.stop.cat <- cut(dattrim$x.stop, cuts) | ||||
| dattrim$y.stop.cat <- cut(dattrim$y.stop, cuts) | ||||
| tab.stop <- xtabs( ~ x.stop.cat + y.stop.cat, dattrim) | ||||
| colnames(tab.stop) <- NULL | ||||
| rownames(tab.stop) <- NULL | ||||
| 
 | ||||
| pdf("results/figures/heatmap_stop.pdf", width = 5, height = 5, pointsize = 10) | ||||
| heatmap(tab.stop, Rowv = NA, Colv = NA) | ||||
| dev.off() | ||||
| 
 | ||||
| ### How many visitors per day | ||||
| 
 | ||||
| datlogs$date <- as.Date(datlogs$date.start) | ||||
| 
 | ||||
| # Interactions per day | ||||
| datint <- aggregate(case ~ date, datlogs, length) | ||||
| plot(datint, type = "h") | ||||
| 
 | ||||
| # Cases per day | ||||
| datcase <- aggregate(case ~ date, datlogs, function(x) length(unique(x))) | ||||
| plot(datcase, type = "h") | ||||
| 
 | ||||
| # Paths per day | ||||
| datpath <- aggregate(path ~ date, datlogs, function(x) length(unique(x))) | ||||
| plot(datpath, type = "h") | ||||
| 
 | ||||
| plot(path ~ date, datpath, type = "h", col = "#3CB4DC") | ||||
| points(case ~ date, datcase, type = "h") | ||||
| 
 | ||||
| pdf("results/figures/cases_per_day.pdf", width = 9, height = 5, pointsize = 10) | ||||
| par(mai = c(.6, .6, .1, .1), mgp = c(2.4, 1, 0)) | ||||
| plot(case ~ date, datcase, type = "h", col = "#434F4F") | ||||
| abline(v = datcase$date[datcase$date %in% c("2020-03-13", "2022-10-25")], | ||||
|        col = "#FF6900", lty = 2) | ||||
| text(datcase$date[datcase$date == "2020-03-13"]+470, 80, | ||||
|      "Corona gap from 2020-03-13 to 2022-10-25", | ||||
|      col = "#FF6900") | ||||
| dev.off() | ||||
| 
 | ||||
| ## weird behavior of timeMs | ||||
| 
 | ||||
| pdf("results/figures/timeMs.pdf", width = 9, height = 6, pointsize = 10) | ||||
| #par(mai = c(.6, .6, .1, .1), mgp = c(2.4, 1, 0)) | ||||
| #plot(timeMs.start ~ as.factor(fileId), datlogs[1:2000,], xlab = "fileId") | ||||
| lattice::bwplot(timeMs.start ~ as.factor(fileId.start), datlogs[1:2000,], xlab = "", | ||||
|        scales = list(x = list(rot = 90), y = list(rot = 90))) | ||||
| dev.off() | ||||
| 
 | ||||
| ## x,y-coordinates out of range | ||||
| 
 | ||||
| set.seed(1522) | ||||
| 
 | ||||
| pdf("results/figures/xycoord.pdf", width = 5, height = 5, pointsize = 10) | ||||
| par(mai = c(.6, .6, .1, .1), mgp = c(2.4, 1, 0)) | ||||
| #par(mfrow = c(1, 2)) | ||||
| plot(y.start ~ x.start, datlogs[sample(nrow(datlogs), 10000), ]) | ||||
| abline(v = c(0, 3840), h = c(0, 2160), col = "#3CB4DC", lwd = 2) | ||||
| #plot(y.stop ~ x.stop, datlogs) | ||||
| #abline(v = c(0, 3840), h = c(0, 2160), col = "#3CB4DC", lwd = 2) | ||||
| legend("bottomleft", "Random sample of 10,000", bg = "white") | ||||
| legend("topleft", "4K-Display: 3840 x 2160", bg = "white") | ||||
| dev.off() | ||||
| 
 | ||||
| ## moves | ||||
| 
 | ||||
| dat001 <- datlogs[which(datlogs$item == "001"), ] | ||||
| 
 | ||||
| index <- as.numeric(as.factor(dat001$path)) | ||||
| cc <- sample(colors(), 100) | ||||
| 
 | ||||
| plot(y.start ~ x.start, dat001, type = "n", xlab = "x", ylab = "y", | ||||
|      xlim = c(0, 3840), ylim = c(0, 2160)) | ||||
| with(dat001[1:200,], arrows(x.start, y.start, x.stop, y.stop, | ||||
|                             length = .07, col = cc[index])) | ||||
| 
 | ||||
| plot(y.start ~ x.start, dat001, xlab = "x", ylab = "y", | ||||
|      xlim = c(0, 3840), ylim = c(0, 2160), pch = 16, col = "gray") | ||||
| points(y.start ~ x.start, dat001, xlab = "x", ylab = "y", | ||||
|      xlim = c(0, 3840), ylim = c(0, 2160), cex = dat001$scaleSize, | ||||
|      col = "blue") | ||||
| 
 | ||||
| 
 | ||||
| 
 | ||||
| cc <- sample(colors(), 70) | ||||
| 
 | ||||
| dat1 <- datlogs[!duplicated(datlogs$item), ] | ||||
| dat1 <- dat1[order(dat1$item), ] | ||||
| 
 | ||||
| plot(y.start ~ x.start, dat1, type = "n", xlim = c(-100, 4500), ylim = c(-100, 2500)) | ||||
| abline(h = c(0, 2160), v = c(0, 3840), col = "lightgray") | ||||
| with(dat1, points(x.start, y.start, col = cc, pch = 16)) | ||||
| with(dat1, points(x.stop, y.stop, col = cc, pch = 16)) | ||||
| with(dat1, arrows(x.start, y.start, x.stop, y.stop, length = .07, col = cc)) | ||||
| 
 | ||||
| 
 | ||||
| # How many events per topic, per path, ... | ||||
| # How many popups per artwork? | ||||
| 
 | ||||
| # Number of events per artwork | ||||
| tab <- xtabs( ~ artwork + event, datlogs) | ||||
| tab <- xtabs( ~ item + event, datlogs) | ||||
| addmargins(tab) | ||||
| 
 | ||||
| proportions(tab, margin = "artwork") | ||||
| proportions(tab, margin = "item") | ||||
| proportions(tab, margin = "event") | ||||
| 
 | ||||
| cc <- palette.colors(palette = "Okabe-Ito")[c(3,2,4,8)] | ||||
| 
 | ||||
| pdf("../figures/event-dist.pdf", height = 3.375, width = 12, pointsize = 10) | ||||
| pdf("results/figures/event-dist.pdf", height = 3.375, width = 12, pointsize = 10) | ||||
| par(mai = c(.4,.4,.1,.1), mgp = c(2.4, 1, 0)) | ||||
| 
 | ||||
| barplot(t(proportions(tab, margin = "artwork")), las = 2, col = cc, | ||||
| barplot(t(proportions(tab, margin = "item")), las = 2, col = c("#78004B", "#3CB4DC", "#91C86E", "#FF6900"), | ||||
|         legend.text = levels(datlogs$event), args.legend = list(x = "bottomleft", bg = "white")) | ||||
| 
 | ||||
| dev.off() | ||||
| 
 | ||||
| #barchart(proportions(tab, margin = "artwork"), las = 2) | ||||
| #lattice::barchart(proportions(tab, margin = "item"), las = 2) | ||||
| 
 | ||||
| # Proportion of events | ||||
| proportions(xtabs( ~ event, datlogs)) | ||||
| # Mean proportion of event per path | ||||
| colMeans(proportions(xtabs( ~ path + event, datlogs), margin = "path")) | ||||
| # Mean proportion of event per artwork | ||||
| colMeans(proportions(tab, margin = "artwork")) | ||||
| # Mean proportion of event per item | ||||
| colMeans(proportions(tab, margin = "item")) | ||||
| 
 | ||||
| # Proportion of unclosed events | ||||
| 
 | ||||
| @ -126,15 +360,17 @@ sum(datlogs$fileId.start != datlogs$fileId.stop, na.rm = TRUE) / nrow(datlogs) | ||||
| 
 | ||||
| #--------------- (3.1) Check data quality --------------- | ||||
| 
 | ||||
| alog <- activitylog(datlogs, | ||||
| datlogs$start <- datlogs$date.start | ||||
| datlogs$complete <- datlogs$date.stop | ||||
| 
 | ||||
| alog <- bupaR::activitylog(datlogs, | ||||
|                     case_id = "path", | ||||
|                     activity_id = "event", | ||||
|                     #resource_id = "case", | ||||
|                     resource_id = "artwork", | ||||
|                     resource_id = "item", | ||||
|                     timestamps = c("start", "complete")) | ||||
| 
 | ||||
| # process_map(alog, frequency("relative")) | ||||
| map_as_pdf(alog, file = "../figures/pm_trace-event.pdf") | ||||
| processmapR::process_map(alog, processmapR::frequency("relative")) | ||||
| 
 | ||||
| alogf <- edeaR::filter_trace_frequency(alog, percentage = 0.9) | ||||
| 
 | ||||
| @ -147,7 +383,7 @@ processmapR::process_map(alogf,   # alog, | ||||
| 
 | ||||
| alog_no_move <- alog[alog$event != "move", ] | ||||
| 
 | ||||
| pdf("../figures/traceexplore_trace-event.pdf", height = 8, width = 12, pointsize = 10) | ||||
| pdf("results/figures/traceexplore_trace-event.pdf", height = 8, width = 12, pointsize = 10) | ||||
| set.seed(1447) | ||||
| processmapR::trace_explorer(alog_no_move[alog_no_move$path %in% | ||||
|                             sample(unique(alog_no_move$path), 400),], | ||||
| @ -155,7 +391,7 @@ processmapR::trace_explorer(alog_no_move[alog_no_move$path %in% | ||||
|                             abbreviate = T) | ||||
| dev.off() | ||||
| 
 | ||||
| pdf("../figures/ra_trace-event.pdf", height = 8, width = 12, pointsize = 10) | ||||
| pdf("results/figures/ra_trace-event.pdf", height = 8, width = 12, pointsize = 10) | ||||
| 
 | ||||
| ra_no_move <- edeaR::resource_frequency(alog_no_move, "resource-activity") | ||||
| levels(ra_no_move$event) <- c("flipCard", "flipCard", "openTopic", "openPopup") | ||||
| @ -182,29 +418,29 @@ which.max(table(datlogs$artwork)) | ||||
| which.min(table(datlogs$artwork)) | ||||
| which.min(table(datlogs$artwork)[-c(71,72)]) | ||||
| 
 | ||||
| alog080 <- activitylog(datlogs[datlogs$artwork == "080",], | ||||
| alog080 <- bupaR::activitylog(datlogs[datlogs$artwork == "080",], | ||||
|                     case_id = "path", | ||||
|                     activity_id = "event", | ||||
|                     resource_id = "artwork", | ||||
|                     timestamps = c("start", "complete")) | ||||
| 
 | ||||
| map_as_pdf(alog080, file = "../figures/pm_trace-event_080.pdf") | ||||
| processmapR::process_map(alog80, processmapR::frequency("relative")) | ||||
| 
 | ||||
| alog087 <- activitylog(datlogs[datlogs$artwork == "087",], | ||||
| alog087 <- bupaR::activitylog(datlogs[datlogs$artwork == "087",], | ||||
|                     case_id = "path", | ||||
|                     activity_id = "event", | ||||
|                     resource_id = "artwork", | ||||
|                     timestamps = c("start", "complete")) | ||||
| 
 | ||||
| map_as_pdf(alog087, file = "../figures/pm_trace-event_087.pdf") | ||||
| processmapR::process_map(alog087, processmapR::frequency("relative")) | ||||
| 
 | ||||
| alog504 <- activitylog(datlogs[datlogs$artwork == "504",], | ||||
| alog504 <- bupaR::activitylog(datlogs[datlogs$artwork == "504",], | ||||
|                     case_id = "path", | ||||
|                     activity_id = "event", | ||||
|                     resource_id = "artwork", | ||||
|                     timestamps = c("start", "complete")) | ||||
| 
 | ||||
| map_as_pdf(alog504, file = "../figures/pm_trace-event_504.pdf") | ||||
| processmapR::process_map(alog504, processmapR::frequency("relative")) | ||||
| 
 | ||||
| #--------------- (3.3) Patterns of cases --------------- | ||||
| 
 | ||||
| @ -215,17 +451,17 @@ map_as_pdf(alog504, file = "../figures/pm_trace-event_504.pdf") | ||||
| # ... weekdays for "normal" and school vacation days? | ||||
| # ... pre and post corona? | ||||
| 
 | ||||
| alog <- activitylog(datlogs, | ||||
| alog <- bupaR::activitylog(datlogs, | ||||
|                     case_id = "case", | ||||
|                     activity_id = "event", | ||||
|                     resource_id = "path", | ||||
|                     timestamps = c("start", "complete")) | ||||
| 
 | ||||
| map_as_pdf(alog, file = "../figures/pm_case-event.pdf") | ||||
| processmapR::process_map(alog, processmapR::frequency("relative")) | ||||
| 
 | ||||
| alog_no_move <- alog[alog$event != "move", ] | ||||
| 
 | ||||
| pdf("../figures/traceexplore_case-event.pdf", height = 8, width = 12, pointsize = 10) | ||||
| pdf("results/figures/traceexplore_case-event.pdf", height = 8, width = 12, pointsize = 10) | ||||
| set.seed(1050) | ||||
| processmapR::trace_explorer(alog_no_move[alog_no_move$path %in% | ||||
|                             sample(unique(alog_no_move$path), 300),], | ||||
| @ -233,38 +469,38 @@ processmapR::trace_explorer(alog_no_move[alog_no_move$path %in% | ||||
|                             abbreviate = T) | ||||
| dev.off() | ||||
| 
 | ||||
| map_as_pdf(alog080, file = "../figures/pm_case-event_080.pdf") | ||||
| processmapR::process_map(alog080, processmapR::frequency("relative")) | ||||
| 
 | ||||
| alog087 <- activitylog(datlogs[datlogs$artwork == "087",], | ||||
| alog087 <- bupaR::activitylog(datlogs[datlogs$artwork == "087",], | ||||
|                        case_id = "case", | ||||
|                        activity_id = "event", | ||||
|                        resource_id = "path", | ||||
|                        timestamps = c("start", "complete")) | ||||
| 
 | ||||
| map_as_pdf(alog087, file = "../figures/pm_case-event_087.pdf") | ||||
| processmapR::process_map(alog087, processmapR::frequency("relative")) | ||||
| 
 | ||||
| ### Mornings and afternoons | ||||
| 
 | ||||
| datlogs$tod <- ifelse(lubridate::hour(datlogs$start) > 13, "afternoon", "morning") | ||||
| 
 | ||||
| alog <- activitylog(datlogs[datlogs$tod == "morning",], | ||||
| alog <- bupaR::activitylog(datlogs[datlogs$tod == "morning",], | ||||
|                     case_id = "case", | ||||
|                     activity_id = "event", | ||||
|                     resource_id = "path", | ||||
|                     timestamps = c("start", "complete")) | ||||
| 
 | ||||
| map_as_pdf(alog, file = "../figures/pm_case-event_morning.pdf") | ||||
| processmapR::process_map(alog, processmapR::frequency("relative")) | ||||
| 
 | ||||
| alog <- activitylog(datlogs[datlogs$tod == "afternoon",], | ||||
| alog <- bupaR::activitylog(datlogs[datlogs$tod == "afternoon",], | ||||
|                     case_id = "case", | ||||
|                     activity_id = "event", | ||||
|                     resource_id = "path", | ||||
|                     timestamps = c("start", "complete")) | ||||
| 
 | ||||
| map_as_pdf(alog, file = "../figures/pm_case-event_afternoon.pdf") | ||||
| processmapR::process_map(alog, processmapR::frequency("relative")) | ||||
| 
 | ||||
| # Are the same artworks looked at? | ||||
| pdf("../figures/bp_tod.pdf", height = 3.375, width = 12, pointsize = 10) | ||||
| pdf("results/figures/bp_tod.pdf", height = 3.375, width = 12, pointsize = 10) | ||||
| par(mai = c(.5,.6,.1,.1), mgp = c(2.4, 1, 0)) | ||||
| 
 | ||||
| barplot(proportions(xtabs( ~ tod + artwork, datlogs), margin = "tod"), #col = cc[1:2], | ||||
| @ -277,24 +513,24 @@ dev.off() | ||||
| 
 | ||||
| datlogs$wd <- ifelse(datlogs$weekdays %in% c("Saturday", "Sunday"), "weekend", "weekday") | ||||
| 
 | ||||
| alog <- activitylog(datlogs[datlogs$wd == "weekend",], | ||||
| alog <- bupaR::activitylog(datlogs[datlogs$wd == "weekend",], | ||||
|                     case_id = "case", | ||||
|                     activity_id = "event", | ||||
|                     resource_id = "path", | ||||
|                     timestamps = c("start", "complete")) | ||||
| 
 | ||||
| map_as_pdf(alog, file = "../figures/pm_case-event_weekend.pdf") | ||||
| processmapR::process_map(alog, processmapR::frequency("relative")) | ||||
| 
 | ||||
| alog <- activitylog(datlogs[datlogs$wd == "weekday",], | ||||
| alog <- bupaR::activitylog(datlogs[datlogs$wd == "weekday",], | ||||
|                     case_id = "case", | ||||
|                     activity_id = "event", | ||||
|                     resource_id = "path", | ||||
|                     timestamps = c("start", "complete")) | ||||
| 
 | ||||
| map_as_pdf(alog, file = "../figures/pm_case-event_weekday.pdf") | ||||
| processmapR::process_map(alog, processmapR::frequency("relative")) | ||||
| 
 | ||||
| # Are the same artworks looked at? | ||||
| pdf("../figures/bp_wd.pdf", height = 3.375, width = 12, pointsize = 10) | ||||
| pdf("results/figures/bp_wd.pdf", height = 3.375, width = 12, pointsize = 10) | ||||
| par(mai = c(.5,.6,.1,.1), mgp = c(2.4, 1, 0)) | ||||
| 
 | ||||
| barplot(proportions(xtabs( ~ wd + artwork, datlogs), margin = "wd"), | ||||
| @ -308,24 +544,24 @@ dev.off() | ||||
| datlogs$wds <- ifelse(!is.na(datlogs$vacation), "vacation", "school") | ||||
| datlogs$wds[datlogs$wd == "weekend"] <- NA | ||||
| 
 | ||||
| alog <- activitylog(datlogs[which(datlogs$wds == "school"),], | ||||
| alog <- bupaR::activitylog(datlogs[which(datlogs$wds == "school"),], | ||||
|                     case_id = "case", | ||||
|                     activity_id = "event", | ||||
|                     resource_id = "path", | ||||
|                     timestamps = c("start", "complete")) | ||||
| 
 | ||||
| map_as_pdf(alog, file = "../figures/pm_case-event_school.pdf") | ||||
| processmapR::process_map(alog, processmapR::frequency("relative")) | ||||
| 
 | ||||
| alog <- activitylog(datlogs[which(datlogs$wds == "vacation"),], | ||||
| alog <- bupaR::activitylog(datlogs[which(datlogs$wds == "vacation"),], | ||||
|                     case_id = "case", | ||||
|                     activity_id = "event", | ||||
|                     resource_id = "path", | ||||
|                     timestamps = c("start", "complete")) | ||||
| 
 | ||||
| map_as_pdf(alog, file = "../figures/pm_case-event_vacation.pdf") | ||||
| processmapR::process_map(alog, processmapR::frequency("relative")) | ||||
| 
 | ||||
| # Are the same artworks looked at? | ||||
| pdf("../figures/bp_wds.pdf", height = 3.375, width = 12, pointsize = 10) | ||||
| pdf("results/figures/bp_wds.pdf", height = 3.375, width = 12, pointsize = 10) | ||||
| par(mai = c(.5,.6,.1,.1), mgp = c(2.4, 1, 0)) | ||||
| 
 | ||||
| #barplot(xtabs( ~ wds + artwork, datlogs), las = 2, beside = TRUE, | ||||
| @ -339,24 +575,24 @@ dev.off() | ||||
| 
 | ||||
| datlogs$corona <- ifelse(datlogs$date < "2020-03-14", "pre", "post") | ||||
| 
 | ||||
| alog <- activitylog(datlogs[which(datlogs$corona == "pre"),], | ||||
| alog <- bupaR::activitylog(datlogs[which(datlogs$corona == "pre"),], | ||||
|                     case_id = "case", | ||||
|                     activity_id = "event", | ||||
|                     resource_id = "path", | ||||
|                     timestamps = c("start", "complete")) | ||||
| 
 | ||||
| map_as_pdf(alog, file = "../figures/pm_case-event_pre-corona.pdf") | ||||
| processmapR::process_map(alog, processmapR::frequency("relative")) | ||||
| 
 | ||||
| alog <- activitylog(datlogs[which(datlogs$corona == "post"),], | ||||
| alog <- bupaR::activitylog(datlogs[which(datlogs$corona == "post"),], | ||||
|                     case_id = "case", | ||||
|                     activity_id = "event", | ||||
|                     resource_id = "path", | ||||
|                     timestamps = c("start", "complete")) | ||||
| 
 | ||||
| map_as_pdf(alog, file = "../figures/pm_case-event_post-corona.pdf") | ||||
| processmapR::process_map(alog, processmapR::frequency("relative")) | ||||
| 
 | ||||
| # Are the same artworks looked at? | ||||
| pdf("../figures/bp_corona.pdf", height = 3.375, width = 12, pointsize = 10) | ||||
| pdf("results/figures/bp_corona.pdf", height = 3.375, width = 12, pointsize = 10) | ||||
| par(mai = c(.5,.6,.1,.1), mgp = c(2.4, 1, 0)) | ||||
| 
 | ||||
| barplot(proportions(xtabs( ~ corona + artwork, datlogs), margin = "corona"), | ||||
| @ -369,13 +605,13 @@ dev.off() | ||||
| # Order in which artworks are looked at | ||||
| 
 | ||||
| nart <- 5     # select 5 artworks randomly | ||||
| alog <- activitylog(datlogs,#[datlogs$artwork %in% sample(unique(datlogs$artwork), nart), ], | ||||
| alog <- bupaR::activitylog(datlogs,#[datlogs$artwork %in% sample(unique(datlogs$artwork), nart), ], | ||||
|                     case_id = "case", | ||||
|                     activity_id = "artwork", | ||||
|                     resource_id = "path", | ||||
|                     timestamps = c("start", "complete")) | ||||
| 
 | ||||
| #map <- process_map(alog, frequency("relative")) | ||||
| #map <- processmapR::process_map(alog, processmapR::frequency("relative")) | ||||
| 
 | ||||
| ## select cases with Vermeer | ||||
| length(unique(datlogs[datlogs$artwork == "080", "case"])) | ||||
| @ -392,16 +628,16 @@ which(table(tmp$artwork) > 14000) | ||||
| 
 | ||||
| often080 <- names(which(table(tmp$artwork) > 14000)) | ||||
| 
 | ||||
| alog <- activitylog(datlogs[datlogs$artwork %in% often080, ], | ||||
| alog <- bupaR::activitylog(datlogs[datlogs$artwork %in% often080, ], | ||||
|                     case_id = "case", | ||||
|                     activity_id = "artwork", | ||||
|                     resource_id = "path", | ||||
|                     timestamps = c("start", "complete")) | ||||
| 
 | ||||
| map_as_pdf(alog, file = "../figures/pm_case-artwork_often080.pdf") | ||||
| processmapR::process_map(alog, processmapR::frequency("relative")) | ||||
| 
 | ||||
| 
 | ||||
| pdf("../figures/traceexplore_case-artwork_often080.pdf", height = 8, width = 12, pointsize = 10) | ||||
| pdf("results/figures/traceexplore_case-artwork_often080.pdf", height = 8, width = 12, pointsize = 10) | ||||
| 
 | ||||
| processmapR::trace_explorer(alog, | ||||
|                             n_traces = 30, type = "frequent", | ||||
| @ -414,40 +650,35 @@ dev.off() | ||||
| # Are there certain topics that people are interested in more than others? | ||||
| # Do these topic distributions differ for comparable artworks? | ||||
| 
 | ||||
| alog <- activitylog(datlogs[which(datlogs$event == "openTopic"),], | ||||
| alog <- bupaR::activitylog(datlogs[which(datlogs$event == "openTopic"),], | ||||
|                     case_id = "case", | ||||
|                     activity_id = "topic", | ||||
|                     resource_id = "path", | ||||
|                     timestamps = c("start", "complete")) | ||||
| 
 | ||||
| map_as_pdf(alog, file = "../figures/pm_case-topic.pdf") | ||||
| processmapR::process_map(alog, processmapR::frequency("relative")) | ||||
| 
 | ||||
| # Order of topics for Vermeer | ||||
| # alog080 <- activitylog(datlogs[datlogs$artwork == "080",], | ||||
| # alog080 <- bupaR::activitylog(datlogs[datlogs$artwork == "080",], | ||||
| #                     case_id = "case", | ||||
| #                     activity_id = "topic", | ||||
| #                     resource_id = "path", | ||||
| #                     timestamps = c("start", "complete")) | ||||
| # | ||||
| # map_as_pdf(alog080, file = "../figures/pm_case-topic_080.pdf") | ||||
| # | ||||
| # | ||||
| # alog080 <- activitylog(datlogs[datlogs$artwork == "080",], | ||||
| # alog080 <- bupaR::activitylog(datlogs[datlogs$artwork == "080",], | ||||
| #                        case_id = "case", | ||||
| #                        activity_id = "topicFile", | ||||
| #                        resource_id = "path", | ||||
| #                        timestamps = c("start", "complete")) | ||||
| # | ||||
| # #process_map(alog080, frequency("relative")) | ||||
| # #processmapR::process_map(alog080, processmapR::frequency("relative")) | ||||
| # | ||||
| # # Comparable artwork | ||||
| # alog083 <- activitylog(datlogs[datlogs$artwork == "083",], | ||||
| # alog083 <- bupaR::activitylog(datlogs[datlogs$artwork == "083",], | ||||
| #                        case_id = "case", | ||||
| #                        activity_id = "topic", | ||||
| #                        resource_id = "path", | ||||
| #                        timestamps = c("start", "complete")) | ||||
| # | ||||
| # map_as_pdf(alog083, file = "../figures/pm_case-topic_083.pdf") | ||||
| 
 | ||||
| # artworks that have the same topics than Vermeer | ||||
| which(rowSums(xtabs( ~ artwork + topic, datlogs[datlogs$topic %in% | ||||
| @ -458,73 +689,13 @@ which(rowSums(xtabs( ~ artwork + topic, datlogs[datlogs$topic %in% | ||||
| 
 | ||||
| for (art in c("037", "046", "062", "080", "083", "109")) { | ||||
| 
 | ||||
|   alog <- activitylog(datlogs[datlogs$event == "openTopic" & datlogs$artwork == art,], | ||||
|   alog <- bupaR::activitylog(datlogs[datlogs$event == "openTopic" & datlogs$artwork == art,], | ||||
|                       case_id = "case", | ||||
|                       activity_id = "topic", | ||||
|                       resource_id = "path", | ||||
|                       timestamps = c("start", "complete")) | ||||
| 
 | ||||
|   map_as_pdf(alog, file = paste0("../figures/pm_case-topic_", art, ".pdf")) | ||||
|   processmapR::process_map(alog, processmapR::frequency("relative")) | ||||
| } | ||||
| 
 | ||||
| 
 | ||||
| 
 | ||||
| # Angewandte Kunst, Graphik, Gemälde, Kultur | ||||
| 
 | ||||
| 
 | ||||
| c("Kultur", "Kultur", "Graphik", "Gemälde", "Gemälde", "Gemälde", | ||||
|   "Gemälde", "Gemälde", "Graphik", "Gemälde", "Angewandte Kunst", "", | ||||
|   "Gemälde", "Angewandte Kunst", "", "", "Graphik", "Angewandte Kunst", | ||||
|   "Angewandte Kunst", "Gemälde", "Angewandte Kunst", "Gemälde", "", | ||||
|   "Gemälde", "Gemälde", "Gemälde", "Graphik", "Gemälde", "Gemälde", | ||||
|   "Gemälde", "", "Angewandte Kunst", "Angewandte Kunst", "Gemälde", | ||||
|   "Graphik", "Gemälde", "Gemälde", "Gemälde", "Gemälde", | ||||
|   "Angewandte Kunst", "Gemälde", "Gemälde", "Gemälde", "Kultur", "Kultur", | ||||
|   "Gemälde", "Kultur", "", "Gemälde", "", "Graphik", "Kultur", "Gemälde", | ||||
|   "", "Kultur", "Gemälde", "Kultur", "Gemälde", "Gemälde", "Gemälde", | ||||
|   "Kultur", "Kultur", "Kultur", "Kultur", "Kultur", "Kultur", | ||||
|   "Angewandte Kunst", "Info", "Info", "Info", "Kultur", "Kultur") | ||||
| 
 | ||||
| 
 | ||||
| 
 | ||||
| # BURSTS | ||||
| which.max(table(datlogs$date)) | ||||
| tmp <- datlogs[datlogs$date == "2017-02-12", ] | ||||
| 
 | ||||
| # number of traces per case on 2017-02-12 | ||||
| rowSums(xtabs( ~ case + path, tmp) != 0) | ||||
| 
 | ||||
| range(tmp$start) | ||||
| hours <- lubridate::hour(tmp$start) | ||||
| xtabs( ~ case + hours, tmp) | ||||
| 
 | ||||
| # distribution of cases over the day | ||||
| colSums(xtabs( ~ case + hours, tmp) != 0) | ||||
| barplot(colSums(xtabs( ~ case + hours, tmp) != 0)) | ||||
| 
 | ||||
| aggregate(path ~ case + hours, tmp, length) | ||||
| 
 | ||||
| 
 | ||||
| 
 | ||||
| 
 | ||||
| tmp <- aggregate(path ~ case, datlogs, length) | ||||
| tmp$date <- as.Date(datlogs[!duplicated(datlogs$case), "start"]) | ||||
| tmp$time <- lubridate::hour(datlogs[!duplicated(datlogs$case), "start"]) | ||||
| 
 | ||||
| tmp[tmp$path > 200, ] | ||||
| 
 | ||||
| plot(path ~ time, tmp, cex = 2, col = rgb(0,0,0,.3)) | ||||
| 
 | ||||
| lattice::barchart(path ~ time, tmp, horizontal=F) | ||||
| 
 | ||||
| 
 | ||||
| 
 | ||||
| ########################################################################### | ||||
| # HELPER | ||||
| 
 | ||||
| map_as_pdf <- function(alog, file, type = frequency("relative")) { | ||||
|   map <- process_map(alog, type = type) | ||||
|   g <- DiagrammeR::grViz(map$x$diagram) |> DiagrammeRsvg::export_svg() |> charToRaw() | ||||
|   rsvg::rsvg_pdf(g, file) | ||||
| } | ||||
| 
 | ||||
|  | ||||
| @ -1,18 +1,14 @@ | ||||
| # 03_create-petrinet.py | ||||
| # | ||||
| # content: (1) Create places and transitions | ||||
| #          (2) Sequential net | ||||
| #          (3) Concurrent net | ||||
| #          (2) Normative net | ||||
| # | ||||
| # input:  -- | ||||
| # output: results/haum/conformative_petrinet_con.pnml | ||||
| #         results/processmaps/conformative_petrinet_con.png | ||||
| #         results/processmaps/conformative_bpmn_con.png | ||||
| #         results/haum/conformative_petrinet_seq.pnml | ||||
| #         results/processmaps/conformative_petrinet_seq.png | ||||
| #         results/processmaps/conformative_bpmn_seq.png | ||||
| # output: results/normative_petrinet.pnml | ||||
| #         results/processmaps/normative_petrinet.png | ||||
| #         results/processmaps/normative_bpmn.png | ||||
| # | ||||
| # last mod: 2024-03-06 | ||||
| # last mod: 2024-03-22 | ||||
| 
 | ||||
| import pm4py | ||||
| from pm4py.objects.petri_net.obj import PetriNet, Marking | ||||
| @ -62,93 +58,90 @@ t_16 = PetriNet.Transition("t_16") | ||||
| t_17 = PetriNet.Transition("t_17") | ||||
| t_18 = PetriNet.Transition("t_18") | ||||
| 
 | ||||
| #--------------- (2) Sequential net --------------- | ||||
| #--------------- (2) Normative net --------------- | ||||
| 
 | ||||
| net_seq = PetriNet("new_petri_net") | ||||
| net = PetriNet("new_petri_net") | ||||
| 
 | ||||
| # Add places | ||||
| net_seq.places.add(source) | ||||
| net_seq.places.add(sink) | ||||
| net_seq.places.add(p_1) | ||||
| net_seq.places.add(p_2) | ||||
| net_seq.places.add(p_3) | ||||
| net_seq.places.add(p_4) | ||||
| net_seq.places.add(p_5) | ||||
| net_seq.places.add(p_6) | ||||
| net_seq.places.add(p_7) | ||||
| net_seq.places.add(p_8) | ||||
| net.places.add(source) | ||||
| net.places.add(sink) | ||||
| net.places.add(p_1) | ||||
| net.places.add(p_2) | ||||
| net.places.add(p_3) | ||||
| net.places.add(p_4) | ||||
| net.places.add(p_5) | ||||
| net.places.add(p_6) | ||||
| net.places.add(p_7) | ||||
| net.places.add(p_8) | ||||
| net.places.add(p_9) | ||||
| net.places.add(p_10) | ||||
| net.places.add(p_11) | ||||
| net.places.add(p_12) | ||||
| 
 | ||||
| # Add transitions | ||||
| net_seq.transitions.add(mv) | ||||
| net_seq.transitions.add(fc) | ||||
| net_seq.transitions.add(ot) | ||||
| net_seq.transitions.add(op) | ||||
| net.transitions.add(mv) | ||||
| net.transitions.add(fc) | ||||
| net.transitions.add(ot) | ||||
| net.transitions.add(op) | ||||
| 
 | ||||
| # Add hidden transitions | ||||
| net_seq.transitions.add(t_1) | ||||
| net_seq.transitions.add(t_2) | ||||
| net_seq.transitions.add(t_3) | ||||
| net_seq.transitions.add(t_4) | ||||
| net_seq.transitions.add(t_5) | ||||
| net_seq.transitions.add(t_6) | ||||
| net_seq.transitions.add(t_7) | ||||
| net_seq.transitions.add(t_8) | ||||
| net_seq.transitions.add(t_9) | ||||
| net_seq.transitions.add(t_10) | ||||
| net_seq.transitions.add(t_11) | ||||
| net_seq.transitions.add(t_12) | ||||
| net_seq.transitions.add(t_13) | ||||
| net_seq.transitions.add(t_14) | ||||
| net_seq.transitions.add(t_15) | ||||
| net_seq.transitions.add(t_16) | ||||
| net_seq.transitions.add(t_17) | ||||
| net_seq.transitions.add(t_18) | ||||
| net.transitions.add(t_1) | ||||
| net.transitions.add(t_2) | ||||
| net.transitions.add(t_3) | ||||
| net.transitions.add(t_4) | ||||
| net.transitions.add(t_5) | ||||
| net.transitions.add(t_6) | ||||
| net.transitions.add(t_7) | ||||
| net.transitions.add(t_8) | ||||
| net.transitions.add(t_9) | ||||
| net.transitions.add(t_10) | ||||
| net.transitions.add(t_11) | ||||
| net.transitions.add(t_12) | ||||
| net.transitions.add(t_13) | ||||
| net.transitions.add(t_14) | ||||
| net.transitions.add(t_15) | ||||
| 
 | ||||
| # Add arcs | ||||
| petri_utils.add_arc_from_to(source, t_1, net_seq) | ||||
| petri_utils.add_arc_from_to(source, t_2, net_seq) | ||||
| petri_utils.add_arc_from_to(t_1, p_1, net_seq) | ||||
| petri_utils.add_arc_from_to(t_2, p_2, net_seq) | ||||
| petri_utils.add_arc_from_to(p_1, mv, net_seq) | ||||
| petri_utils.add_arc_from_to(p_2, fc, net_seq) | ||||
| petri_utils.add_arc_from_to(mv, p_3, net_seq) | ||||
| petri_utils.add_arc_from_to(p_3, t_3, net_seq) | ||||
| petri_utils.add_arc_from_to(p_3, t_4, net_seq) | ||||
| petri_utils.add_arc_from_to(p_3, t_5, net_seq) | ||||
| petri_utils.add_arc_from_to(p_3, t_6, net_seq) | ||||
| petri_utils.add_arc_from_to(p_3, t_7, net_seq) | ||||
| petri_utils.add_arc_from_to(t_7, p_1, net_seq) | ||||
| petri_utils.add_arc_from_to(fc, p_4, net_seq) | ||||
| petri_utils.add_arc_from_to(p_4, t_8, net_seq) | ||||
| petri_utils.add_arc_from_to(p_4, t_9, net_seq) | ||||
| petri_utils.add_arc_from_to(p_4, t_10, net_seq) | ||||
| petri_utils.add_arc_from_to(t_9, p_1, net_seq) | ||||
| petri_utils.add_arc_from_to(t_16, p_5, net_seq) | ||||
| petri_utils.add_arc_from_to(t_3, p_2, net_seq) | ||||
| petri_utils.add_arc_from_to(t_5, p_6, net_seq) | ||||
| petri_utils.add_arc_from_to(t_6, p_5, net_seq) | ||||
| petri_utils.add_arc_from_to(p_6, ot, net_seq) | ||||
| petri_utils.add_arc_from_to(p_5, op, net_seq) | ||||
| petri_utils.add_arc_from_to(ot, p_8, net_seq) | ||||
| petri_utils.add_arc_from_to(op, p_7, net_seq) | ||||
| petri_utils.add_arc_from_to(p_8, t_11, net_seq) | ||||
| petri_utils.add_arc_from_to(p_8, t_12, net_seq) | ||||
| petri_utils.add_arc_from_to(p_8, t_13, net_seq) | ||||
| petri_utils.add_arc_from_to(p_8, t_17, net_seq) | ||||
| petri_utils.add_arc_from_to(t_10, p_6, net_seq) | ||||
| petri_utils.add_arc_from_to(t_17, p_6, net_seq) | ||||
| petri_utils.add_arc_from_to(p_7, t_14, net_seq) | ||||
| petri_utils.add_arc_from_to(p_7, t_15, net_seq) | ||||
| petri_utils.add_arc_from_to(p_7, t_16, net_seq) | ||||
| petri_utils.add_arc_from_to(p_7, t_18, net_seq) | ||||
| petri_utils.add_arc_from_to(t_18, p_6, net_seq) | ||||
| petri_utils.add_arc_from_to(t_13, p_5, net_seq) | ||||
| petri_utils.add_arc_from_to(t_15, p_1, net_seq) | ||||
| petri_utils.add_arc_from_to(t_11, p_1, net_seq) | ||||
| petri_utils.add_arc_from_to(t_4, sink, net_seq) | ||||
| petri_utils.add_arc_from_to(t_8, sink, net_seq) | ||||
| petri_utils.add_arc_from_to(t_12, sink, net_seq) | ||||
| petri_utils.add_arc_from_to(t_14, sink, net_seq) | ||||
| petri_utils.add_arc_from_to(source, t_1, net) | ||||
| petri_utils.add_arc_from_to(t_1, p_1, net) | ||||
| petri_utils.add_arc_from_to(t_1, p_2, net) | ||||
| petri_utils.add_arc_from_to(p_1, t_2, net) | ||||
| petri_utils.add_arc_from_to(p_1, t_3, net) | ||||
| petri_utils.add_arc_from_to(t_3, p_5, net) | ||||
| petri_utils.add_arc_from_to(t_2, p_3, net) | ||||
| petri_utils.add_arc_from_to(p_3, mv, net) | ||||
| petri_utils.add_arc_from_to(mv, p_4, net) | ||||
| petri_utils.add_arc_from_to(p_4, t_5, net) | ||||
| petri_utils.add_arc_from_to(p_4, t_6, net) | ||||
| petri_utils.add_arc_from_to(t_6, p_3, net) | ||||
| petri_utils.add_arc_from_to(t_5, p_5, net) | ||||
| petri_utils.add_arc_from_to(p_5, t_15, net) | ||||
| petri_utils.add_arc_from_to(t_15, sink, net) | ||||
| petri_utils.add_arc_from_to(p_2, fc, net) | ||||
| petri_utils.add_arc_from_to(p_2, t_8, net) | ||||
| petri_utils.add_arc_from_to(t_8, p_12, net) | ||||
| petri_utils.add_arc_from_to(p_12, t_15, net) | ||||
| petri_utils.add_arc_from_to(fc, p_6, net) | ||||
| petri_utils.add_arc_from_to(p_6, t_9, net) | ||||
| petri_utils.add_arc_from_to(t_9, p_12, net) | ||||
| petri_utils.add_arc_from_to(p_6, t_4, net) | ||||
| petri_utils.add_arc_from_to(t_4, p_7, net) | ||||
| petri_utils.add_arc_from_to(p_7, ot, net) | ||||
| petri_utils.add_arc_from_to(ot, p_8, net) | ||||
| petri_utils.add_arc_from_to(p_8, t_10, net) | ||||
| petri_utils.add_arc_from_to(t_10, p_11, net) | ||||
| petri_utils.add_arc_from_to(p_11, t_13, net) | ||||
| petri_utils.add_arc_from_to(t_13, p_12, net) | ||||
| petri_utils.add_arc_from_to(p_8, t_7, net) | ||||
| petri_utils.add_arc_from_to(t_7, p_9, net) | ||||
| petri_utils.add_arc_from_to(p_9, op, net) | ||||
| petri_utils.add_arc_from_to(op, p_10, net) | ||||
| petri_utils.add_arc_from_to(p_10, t_11, net) | ||||
| petri_utils.add_arc_from_to(p_10, t_12, net) | ||||
| petri_utils.add_arc_from_to(t_12, p_9, net) | ||||
| petri_utils.add_arc_from_to(t_11, p_11, net) | ||||
| petri_utils.add_arc_from_to(p_11, t_14, net) | ||||
| petri_utils.add_arc_from_to(t_14, p_7, net) | ||||
| 
 | ||||
| # Add tokens | ||||
| initial_marking = Marking() | ||||
| @ -156,116 +149,14 @@ initial_marking[source] = 1 | ||||
| final_marking = Marking() | ||||
| final_marking[sink] = 1 | ||||
| 
 | ||||
| pm4py.view_petri_net(net_seq, initial_marking, final_marking) | ||||
| pm4py.write_pnml(net_seq, initial_marking, final_marking, "results/haum/conformative_petrinet_seq.pnml") | ||||
| pm4py.view_petri_net(net, initial_marking, final_marking) | ||||
| pm4py.write_pnml(net, initial_marking, final_marking, | ||||
|         "results/normative_petrinet.pnml") | ||||
| 
 | ||||
| pm4py.vis.save_vis_petri_net(net_seq, initial_marking, final_marking, | ||||
|         "results/processmaps/conformative_petrinet_seq.png") | ||||
| pm4py.vis.save_vis_petri_net(net, initial_marking, final_marking, | ||||
|         "results/processmaps/normative_petrinet.png") | ||||
| 
 | ||||
| bpmn = pm4py.convert.convert_to_bpmn(net_seq, initial_marking, final_marking) | ||||
| bpmn = pm4py.convert.convert_to_bpmn(net, initial_marking, final_marking) | ||||
| pm4py.view_bpmn(bpmn) | ||||
| 
 | ||||
| pm4py.vis.save_vis_bpmn(bpmn, "results/processmaps/conformative_bpmn_seq.png") | ||||
| 
 | ||||
| 
 | ||||
| #--------------- (3) Concurrent net --------------- | ||||
| 
 | ||||
| net_con = PetriNet("new_petri_net") | ||||
| 
 | ||||
| # Add places | ||||
| net_con.places.add(source) | ||||
| net_con.places.add(sink) | ||||
| net_con.places.add(p_1) | ||||
| net_con.places.add(p_2) | ||||
| net_con.places.add(p_3) | ||||
| net_con.places.add(p_4) | ||||
| net_con.places.add(p_5) | ||||
| net_con.places.add(p_6) | ||||
| net_con.places.add(p_7) | ||||
| net_con.places.add(p_8) | ||||
| net_con.places.add(p_9) | ||||
| net_con.places.add(p_10) | ||||
| net_con.places.add(p_11) | ||||
| net_con.places.add(p_12) | ||||
| 
 | ||||
| # Add transitions | ||||
| net_con.transitions.add(mv) | ||||
| net_con.transitions.add(fc) | ||||
| net_con.transitions.add(ot) | ||||
| net_con.transitions.add(op) | ||||
| 
 | ||||
| # Add hidden transitions | ||||
| net_con.transitions.add(t_1) | ||||
| net_con.transitions.add(t_2) | ||||
| net_con.transitions.add(t_3) | ||||
| net_con.transitions.add(t_4) | ||||
| net_con.transitions.add(t_5) | ||||
| net_con.transitions.add(t_6) | ||||
| net_con.transitions.add(t_7) | ||||
| net_con.transitions.add(t_8) | ||||
| net_con.transitions.add(t_9) | ||||
| net_con.transitions.add(t_10) | ||||
| net_con.transitions.add(t_11) | ||||
| net_con.transitions.add(t_12) | ||||
| net_con.transitions.add(t_13) | ||||
| net_con.transitions.add(t_14) | ||||
| net_con.transitions.add(t_15) | ||||
| 
 | ||||
| # Add arcs | ||||
| petri_utils.add_arc_from_to(source, t_1, net_con) | ||||
| petri_utils.add_arc_from_to(t_1, p_1, net_con) | ||||
| petri_utils.add_arc_from_to(t_1, p_2, net_con) | ||||
| petri_utils.add_arc_from_to(p_1, t_2, net_con) | ||||
| petri_utils.add_arc_from_to(p_1, t_3, net_con) | ||||
| petri_utils.add_arc_from_to(t_3, p_5, net_con) | ||||
| petri_utils.add_arc_from_to(t_2, p_3, net_con) | ||||
| petri_utils.add_arc_from_to(p_3, mv, net_con) | ||||
| petri_utils.add_arc_from_to(mv, p_4, net_con) | ||||
| petri_utils.add_arc_from_to(p_4, t_5, net_con) | ||||
| petri_utils.add_arc_from_to(p_4, t_6, net_con) | ||||
| petri_utils.add_arc_from_to(t_6, p_3, net_con) | ||||
| petri_utils.add_arc_from_to(t_5, p_5, net_con) | ||||
| petri_utils.add_arc_from_to(p_5, t_15, net_con) | ||||
| petri_utils.add_arc_from_to(t_15, sink, net_con) | ||||
| petri_utils.add_arc_from_to(p_2, fc, net_con) | ||||
| petri_utils.add_arc_from_to(p_2, t_8, net_con) | ||||
| petri_utils.add_arc_from_to(t_8, p_12, net_con) | ||||
| petri_utils.add_arc_from_to(p_12, t_15, net_con) | ||||
| petri_utils.add_arc_from_to(fc, p_6, net_con) | ||||
| petri_utils.add_arc_from_to(p_6, t_9, net_con) | ||||
| petri_utils.add_arc_from_to(t_9, p_12, net_con) | ||||
| petri_utils.add_arc_from_to(p_6, t_4, net_con) | ||||
| petri_utils.add_arc_from_to(t_4, p_7, net_con) | ||||
| petri_utils.add_arc_from_to(p_7, ot, net_con) | ||||
| petri_utils.add_arc_from_to(ot, p_8, net_con) | ||||
| petri_utils.add_arc_from_to(p_8, t_10, net_con) | ||||
| petri_utils.add_arc_from_to(t_10, p_11, net_con) | ||||
| petri_utils.add_arc_from_to(p_11, t_13, net_con) | ||||
| petri_utils.add_arc_from_to(t_13, p_12, net_con) | ||||
| petri_utils.add_arc_from_to(p_8, t_7, net_con) | ||||
| petri_utils.add_arc_from_to(t_7, p_9, net_con) | ||||
| petri_utils.add_arc_from_to(p_9, op, net_con) | ||||
| petri_utils.add_arc_from_to(op, p_10, net_con) | ||||
| petri_utils.add_arc_from_to(p_10, t_11, net_con) | ||||
| petri_utils.add_arc_from_to(p_10, t_12, net_con) | ||||
| petri_utils.add_arc_from_to(t_12, p_9, net_con) | ||||
| petri_utils.add_arc_from_to(t_11, p_11, net_con) | ||||
| petri_utils.add_arc_from_to(p_11, t_14, net_con) | ||||
| petri_utils.add_arc_from_to(t_14, p_7, net_con) | ||||
| 
 | ||||
| # Add tokens | ||||
| initial_marking = Marking() | ||||
| initial_marking[source] = 1 | ||||
| final_marking = Marking() | ||||
| final_marking[sink] = 1 | ||||
| 
 | ||||
| pm4py.view_petri_net(net_con, initial_marking, final_marking) | ||||
| pm4py.write_pnml(net_con, initial_marking, final_marking, "results/haum/conformative_petrinet_con.pnml") | ||||
| 
 | ||||
| pm4py.vis.save_vis_petri_net(net_con, initial_marking, final_marking, | ||||
|         "results/processmaps/conformative_petrinet_con.png") | ||||
| 
 | ||||
| bpmn = pm4py.convert.convert_to_bpmn(net_con, initial_marking, final_marking) | ||||
| pm4py.view_bpmn(bpmn) | ||||
| 
 | ||||
| pm4py.vis.save_vis_bpmn(bpmn, "results/processmaps/conformative_bpmn_con.png") | ||||
| pm4py.vis.save_vis_bpmn(bpmn, "results/processmaps/normative_bpmn.png") | ||||
|  | ||||
| @ -1,25 +1,24 @@ | ||||
| # 04_conformance-checking.py | ||||
| # | ||||
| # content: (1) Load data and create event log | ||||
| #          (2) Infos for items | ||||
| #          (2) Check against normative Petri Net | ||||
| # | ||||
| # input:  results/haum/event_logfiles_2024-02-21_16-07-33.csv | ||||
| #         results/haum/conformative_petrinet_con.pnml | ||||
| # output: results/processmaps/dfg_complete_python.png | ||||
| #         results/eval_all-miners_complete.csv | ||||
| # input:  results/event_logfiles_2024-02-21_16-07-33.csv | ||||
| #         results/normative_petrinet.pnml | ||||
| # output: results/eval_all-miners_complete.csv | ||||
| #         results/eval_all-miners_clean.csv | ||||
| #         results/processmaps/petrinet_conformative.png | ||||
| #         results/processmaps/petrinet_heuristics_clean.png | ||||
| #         results/processmaps/petrinet_alpha_clean.png | ||||
| #         results/processmaps/petrinet_inductive_clean.png | ||||
| #         results/processmaps/petrinet_ilp_clean.png | ||||
| #         results/processmaps/bpmn_conformative.png | ||||
| #         results/processmaps/bpmn_inductive_clean.png | ||||
| #         results/processmaps/bpmn_ilp_clean.png | ||||
| #         results/processmaps/bpmn_alpha_clean.png | ||||
| #         results/processmaps/bpmn_heuristics_clean.png | ||||
| #         ../../thesis/figures/petrinet_normative.png | ||||
| #         ../../thesis/figures/petrinet_heuristics_clean.png | ||||
| #         ../../thesis/figures/petrinet_alpha_clean.png | ||||
| #         ../../thesis/figures/petrinet_inductive_clean.png | ||||
| #         ../../thesis/figures/petrinet_ilp_clean.png | ||||
| #         ../../thesis/figures/bpmn_normative.png | ||||
| #         ../../thesis/figures/bpmn_inductive_clean.png | ||||
| #         ../../thesis/figures/bpmn_ilp_clean.png | ||||
| #         ../../thesis/figures/bpmn_alpha_clean.png | ||||
| #         ../../thesis/figures/bpmn_heuristics_clean.png | ||||
| # | ||||
| # last mod: 2024-03-06 | ||||
| # last mod: 2024-03-22 | ||||
| 
 | ||||
| import pm4py | ||||
| import pandas as pd | ||||
| @ -29,13 +28,13 @@ from python_helpers import eval_pm, pn_infos_miner | ||||
| 
 | ||||
| #--------------- (1) Load data and create event logs --------------- | ||||
| 
 | ||||
| dat = pd.read_csv("results/haum/event_logfiles_2024-02-21_16-07-33.csv", sep = ";") | ||||
| dat = pd.read_csv("results/event_logfiles_2024-02-21_16-07-33.csv", sep = ";") | ||||
| 
 | ||||
| event_log = pm4py.format_dataframe(dat, case_id = "path", | ||||
|                                    activity_key = "event", | ||||
|                                    timestamp_key = "date.start") | ||||
| 
 | ||||
| ###### Descriptives of log data ###### | ||||
| ## Descriptives of log data | ||||
| 
 | ||||
| # Distribution of events | ||||
| event_log.event.value_counts() | ||||
| @ -57,9 +56,9 @@ len(variants_no_move) | ||||
| sorted_variants_no_move = dict(sorted(variants_no_move.items(), key=lambda item: item[1], reverse = True)) | ||||
| {k: sorted_variants_no_move[k] for k in list(sorted_variants_no_move)[:20]} | ||||
| 
 | ||||
| ###### Check against "conformative" Petri Net ###### | ||||
| #--------------- (2) Check against normative Petri Net --------------- | ||||
| 
 | ||||
| basenet, initial_marking, final_marking = pm4py.read_pnml("results/haum/conformative_petrinet_con.pnml") | ||||
| basenet, initial_marking, final_marking = pm4py.read_pnml("results/normative_petrinet.pnml") | ||||
| 
 | ||||
| # TBR | ||||
| replayed_traces = pm4py.conformance_diagnostics_token_based_replay(event_log, basenet, initial_marking, final_marking) | ||||
| @ -93,23 +92,13 @@ event_log[event_log["@@case_index"] == index_broken[0]].item.unique().tolist() | ||||
| event_log[event_log["@@case_index"] == index_broken[0]]["fileId.start"].unique().tolist() | ||||
| # --> logging error in raw file | ||||
| 
 | ||||
| ## Footprints                       | ||||
| from pm4py.algo.discovery.footprints import algorithm as footprints_discovery | ||||
| from pm4py.visualization.footprints import visualizer as fp_visualizer | ||||
| fp_log = footprints_discovery.apply(event_log, variant=footprints_discovery.Variants.ENTIRE_EVENT_LOG) | ||||
| fp_net = footprints_discovery.apply(basenet, initial_marking, final_marking) | ||||
| gviz = fp_visualizer.apply(fp_net, parameters={fp_visualizer.Variants.SINGLE.value.Parameters.FORMAT: "svg"}) | ||||
| fp_visualizer.view(gviz) | ||||
| 
 | ||||
| efg_graph = pm4py.discover_eventually_follows_graph(event_log) | ||||
| 
 | ||||
| ## Fitting different miners | ||||
| 
 | ||||
| eval = pd.DataFrame(columns = ["fitness", "precision", "generalizability", | ||||
|                                "simplicity", "sound", "narcs", "ntrans", | ||||
|                                "nplaces", "nvariants", "mostfreq"]) | ||||
| 
 | ||||
| for miner in ["conformative", "alpha", "heuristics", "inductive", "ilp"]: | ||||
| for miner in ["normative", "alpha", "heuristics", "inductive", "ilp"]: | ||||
|     eval = pd.concat([eval, pn_infos_miner(event_log, miner)]) | ||||
| 
 | ||||
| eval.to_csv("results/eval_all-miners_complete.csv", sep = ";") | ||||
| @ -121,7 +110,7 @@ eval_clean = pd.DataFrame(columns = ["fitness", "precision", "generalizability", | ||||
|                                      "simplicity", "sound", "narcs", "ntrans", | ||||
|                                      "nplaces", "nvariants", "mostfreq"]) | ||||
| 
 | ||||
| for miner in ["conformative", "alpha", "heuristics", "inductive", "ilp"]: | ||||
| for miner in ["normative", "alpha", "heuristics", "inductive", "ilp"]: | ||||
|     eval_clean = pd.concat([eval_clean, pn_infos_miner(event_log_clean, miner)]) | ||||
| 
 | ||||
| eval_clean.to_csv("results/eval_all-miners_clean.csv", sep = ";") | ||||
| @ -129,28 +118,27 @@ eval_clean.to_csv("results/eval_all-miners_clean.csv", sep = ";") | ||||
| ## Directly-follows graph | ||||
| dfg, start_activities, end_activities = pm4py.discover_dfg(event_log_clean) | ||||
| pm4py.view_dfg(dfg, start_activities, end_activities) | ||||
| pm4py.save_vis_dfg(dfg, start_activities, end_activities, "results/processmaps/dfg_complete_python.png") | ||||
| 
 | ||||
| ## Export petri nets | ||||
| pm4py.vis.save_vis_petri_net(basenet, initial_marking, final_marking, "results/processmaps/petrinet_conformative.png") | ||||
| pm4py.vis.save_vis_petri_net(basenet, initial_marking, final_marking, | ||||
|         "../../thesis/figures/petrinet_normative.png") | ||||
| h_net, h_im, h_fm = pm4py.discover_petri_net_heuristics(event_log_clean) | ||||
| pm4py.vis.save_vis_petri_net(h_net, h_im, h_fm, "results/processmaps/petrinet_heuristics_clean.png") | ||||
| pm4py.vis.save_vis_petri_net(h_net, h_im, h_fm, "../../thesis/figures/petrinet_heuristics_clean.png") | ||||
| a_net, a_im, a_fm = pm4py.discover_petri_net_alpha(event_log_clean) | ||||
| pm4py.vis.save_vis_petri_net(a_net, a_im, a_fm, "results/processmaps/petrinet_alpha_clean.png") | ||||
| pm4py.vis.save_vis_petri_net(a_net, a_im, a_fm, "../../thesis/figures/petrinet_alpha_clean.png") | ||||
| i_net, i_im, i_fm = pm4py.discover_petri_net_inductive(event_log_clean) | ||||
| pm4py.vis.save_vis_petri_net(i_net, i_im, i_fm, "results/processmaps/petrinet_inductive_clean.png") | ||||
| pm4py.vis.save_vis_petri_net(i_net, i_im, i_fm, "../../thesis/figures/petrinet_inductive_clean.png") | ||||
| ilp_net, ilp_im, ilp_fm = pm4py.discover_petri_net_ilp(event_log_clean) | ||||
| pm4py.vis.save_vis_petri_net(ilp_net, ilp_im, ilp_fm, "results/processmaps/petrinet_ilp_clean.png") | ||||
| pm4py.vis.save_vis_petri_net(ilp_net, ilp_im, ilp_fm, "../../thesis/figures/petrinet_ilp_clean.png") | ||||
| 
 | ||||
| # convert to BPMN | ||||
| base_bpmn = pm4py.convert.convert_to_bpmn(basenet, initial_marking, final_marking) | ||||
| pm4py.vis.save_vis_bpmn(base_bpmn, "results/processmaps/bpmn_conformative.png") | ||||
| pm4py.vis.save_vis_bpmn(base_bpmn, "../../thesis/figures/bpmn_normative.png") | ||||
| i_bpmn = pm4py.convert.convert_to_bpmn(i_net, i_im, i_fm) | ||||
| pm4py.vis.save_vis_bpmn(i_bpmn, "results/processmaps/bpmn_inductive_clean.png") | ||||
| pm4py.vis.save_vis_bpmn(i_bpmn, "../../thesis/figures/bpmn_inductive_clean.png") | ||||
| ilp_bpmn = pm4py.convert.convert_to_bpmn(ilp_net, ilp_im, ilp_fm) | ||||
| pm4py.vis.save_vis_bpmn(ilp_bpmn, "results/processmaps/bpmn_ilp_clean.png") | ||||
| pm4py.vis.save_vis_bpmn(ilp_bpmn, "../../thesis/figures/bpmn_ilp_clean.png") | ||||
| a_bpmn = pm4py.convert.convert_to_bpmn(a_net, a_im, a_fm) | ||||
| pm4py.vis.save_vis_bpmn(a_bpmn, "results/processmaps/bpmn_alpha_clean.png") | ||||
| pm4py.vis.save_vis_bpmn(a_bpmn, "../../thesis/figures/bpmn_alpha_clean.png") | ||||
| h_bpmn = pm4py.convert.convert_to_bpmn(h_net, h_im, h_fm) | ||||
| pm4py.vis.save_vis_bpmn(h_bpmn, "results/processmaps/bpmn_heuristics_clean.png") | ||||
| 
 | ||||
| pm4py.vis.save_vis_bpmn(h_bpmn, "../../thesis/figures/bpmn_heuristics_clean.png") | ||||
|  | ||||
| @ -5,22 +5,23 @@ | ||||
| #          (3) DFG for complete data | ||||
| #          (4) Export data frame for analyses | ||||
| # | ||||
| # input:  results/haum/event_logfiles_2024-02-21_16-07-33.csv | ||||
| #         results/haum/raw_logfiles_2024-02-21_16-07-33.csv | ||||
| # output: results/haum/eventlogs_pre-corona_cleaned.RData | ||||
| #         results/haum/eventlogs_pre-corona_cleaned.csv | ||||
| # input:  results/event_logfiles_2024-02-21_16-07-33.csv | ||||
| #         results/raw_logfiles_2024-02-21_16-07-33.csv | ||||
| # output: results/eventlogs_pre-corona_cleaned.RData | ||||
| #         results/eventlogs_pre-corona_cleaned.csv | ||||
| #         ../../thesis/figures/dfg_complete_WFnet_R.pdf | ||||
| # | ||||
| # last mod: 2024-03-06 | ||||
| # last mod: 2024-03-23 | ||||
| 
 | ||||
| # setwd("C:/Users/nwickelmaier/Nextcloud/Documents/MDS/2023ss/60100_master_thesis/analysis/code") | ||||
| 
 | ||||
| #--------------- (1) Look at broken trace --------------- | ||||
| 
 | ||||
| datraw <- read.table("results/haum/raw_logfiles_2024-02-21_16-07-33.csv", | ||||
| datraw <- read.table("results/raw_logfiles_2024-02-21_16-07-33.csv", | ||||
|                      header = TRUE, sep = ";") | ||||
| 
 | ||||
| 
 | ||||
| datlogs <- read.table("results/haum/event_logfiles_2024-02-21_16-07-33.csv", | ||||
| datlogs <- read.table("results/event_logfiles_2024-02-21_16-07-33.csv", | ||||
|                       colClasses = c("character", "character", "POSIXct", | ||||
|                                      "POSIXct", "character", "integer", | ||||
|                                      "numeric", "character", "character", | ||||
| @ -84,7 +85,7 @@ dfg <- processmapR::process_map(alog, | ||||
|   render     = FALSE) | ||||
| 
 | ||||
| processmapR::export_map(dfg, | ||||
|   file_name = paste0("results/processmaps/dfg_complete_R.pdf"), | ||||
|   file_name = paste0("../../thesis/figures/dfg_complete_WFnet_R.pdf"), | ||||
|   file_type = "pdf") | ||||
| 
 | ||||
| rm(tmp) | ||||
| @ -109,10 +110,10 @@ dat <- datlogs[as.Date(datlogs$date.start) < "2020-03-13", ] | ||||
| # Remove corrupt trace | ||||
| dat <- dat[dat$path != 106098, ] | ||||
| 
 | ||||
| save(dat, file = "results/haum/eventlogs_pre-corona_cleaned.RData") | ||||
| save(dat, file = "results/eventlogs_pre-corona_cleaned.RData") | ||||
| 
 | ||||
| write.table(dat, | ||||
|             file = "results/haum/eventlogs_pre-corona_cleaned.csv", | ||||
|             file = "results/eventlogs_pre-corona_cleaned.csv", | ||||
|             sep = ";", | ||||
|             quote = FALSE, | ||||
|             row.names = FALSE) | ||||
|  | ||||
| @ -3,10 +3,10 @@ | ||||
| # content: (1) Load data and create event log | ||||
| #          (2) Infos for items | ||||
| # | ||||
| # input:  results/haum/eventlogs_pre-corona_cleaned.csv | ||||
| # output: results/haum/pn_infos_items.csv | ||||
| # input:  results/eventlogs_pre-corona_cleaned.csv | ||||
| # output: results/pn_infos_items.csv | ||||
| # | ||||
| # last mod: 2024-03-06 | ||||
| # last mod: 2024-03-22 | ||||
| 
 | ||||
| import pm4py | ||||
| import pandas as pd | ||||
| @ -16,7 +16,7 @@ from python_helpers import eval_pm, pn_infos | ||||
| 
 | ||||
| #--------------- (1) Load data and create event logs --------------- | ||||
| 
 | ||||
| dat = pd.read_csv("results/haum/eventlogs_pre-corona_cleaned", sep = ";") | ||||
| dat = pd.read_csv("results/eventlogs_pre-corona_cleaned", sep = ";") | ||||
| 
 | ||||
| log_path = pm4py.format_dataframe(dat, case_id = "path", activity_key = "event", | ||||
|                                   timestamp_key = "date.start") | ||||
| @ -33,5 +33,5 @@ for item in log_path.item.unique().tolist(): | ||||
| eval = eval.sort_index() | ||||
| 
 | ||||
| # Export | ||||
| eval.to_csv("results/haum/pn_infos_items.csv", sep = ";") | ||||
| eval.to_csv("results/pn_infos_items.csv", sep = ";") | ||||
| 
 | ||||
|  | ||||
| @ -7,15 +7,12 @@ | ||||
| #          (2) Clustering | ||||
| #          (3) Visualization with pictures | ||||
| # | ||||
| # input:  results/haum/eventlogs_pre-corona_cleaned.RData | ||||
| #         results/haum/pn_infos_items.csv | ||||
| # output: results/haum/eventlogs_pre-corona_item-clusters.csv | ||||
| #         results/figures/dendrogram_items.pdf | ||||
| #         results/figures/clustering_items.pdf | ||||
| #         results/figures/clustering_artworks.pdf | ||||
| #         results/figures/clustering_artworks.png | ||||
| # input:  results/eventlogs_pre-corona_cleaned.RData | ||||
| #         results/pn_infos_items.csv | ||||
| # output: results/eventlogs_pre-corona_item-clusters.csv | ||||
| #         ../../thesis/figures/data/clustering_items.RData" | ||||
| # | ||||
| # last mod: 2024-03-08 | ||||
| # last mod: 2024-03-22 | ||||
| 
 | ||||
| # setwd("C:/Users/nwickelmaier/Nextcloud/Documents/MDS/2023ss/60100_master_thesis/analysis/code") | ||||
| 
 | ||||
| @ -25,11 +22,11 @@ source("R_helpers.R") | ||||
| 
 | ||||
| #--------------- (1.1) Read log event data --------------- | ||||
| 
 | ||||
| load("results/haum/eventlogs_pre-corona_cleaned.RData") | ||||
| load("results/eventlogs_pre-corona_cleaned.RData") | ||||
| 
 | ||||
| #--------------- (1.2) Read infos for PM for items --------------- | ||||
| 
 | ||||
| datitem <- read.table("results/haum/pn_infos_items.csv", header = TRUE, | ||||
| datitem <- read.table("results/pn_infos_items.csv", header = TRUE, | ||||
|                       sep = ";", row.names = 1) | ||||
| 
 | ||||
| #--------------- (1.3) Extract additional infos for clustering --------------- | ||||
| @ -96,9 +93,6 @@ mycols <- c("#434F4F", "#78004B", "#FF6900", "#3CB4DC", "#91C86E", "Black") | ||||
| 
 | ||||
| cluster <- cutree(hc, k = k) | ||||
| 
 | ||||
| pdf("results/figures/dendrogram_items.pdf", width = 6.5, height = 5.5, pointsize = 10) | ||||
| # TODO: Move code for plots to /thesis/ | ||||
| 
 | ||||
| factoextra::fviz_dend(hc, k = k, | ||||
|                       cex = 0.5, | ||||
|                       k_colors = mycols, | ||||
| @ -109,10 +103,6 @@ factoextra::fviz_dend(hc, k = k, | ||||
|                       #ggtheme = ggplot2::theme_bw() | ||||
| ) | ||||
| 
 | ||||
| dev.off() | ||||
| 
 | ||||
| pdf("results/figures/clustering_items.pdf", width = 6.5, height = 5.5, pointsize = 10) | ||||
| 
 | ||||
| factoextra::fviz_cluster(list(data = df, cluster = cluster), | ||||
|                          palette = mycols, | ||||
|                          ellipse.type = "convex", | ||||
| @ -121,8 +111,6 @@ factoextra::fviz_cluster(list(data = df, cluster = cluster), | ||||
|                          main = "", | ||||
|                          ggtheme = ggplot2::theme_bw()) | ||||
| 
 | ||||
| dev.off() | ||||
| 
 | ||||
| aggregate(cbind(precision, generalizability, nvariants, duration, distance, | ||||
|                 scaleSize , rotationDegree, npaths, ncases, nmoves, | ||||
|                 nflipCard, nopenTopic, nopenPopup) ~ cluster, datitem, | ||||
| @ -138,18 +126,6 @@ item <- sprintf("%03d", as.numeric(gsub("item_([0-9]{3})", "\\1", | ||||
| res <- merge(dat, data.frame(item, cluster), by = "item", all.x = TRUE) | ||||
| res <- res[order(res$fileId.start, res$date.start, res$timeMs.start), ] | ||||
| 
 | ||||
| # Look at clusters | ||||
| par(mfrow = c(2,2)) | ||||
| vioplot::vioplot(duration ~ cluster, res) | ||||
| vioplot::vioplot(distance ~ cluster, res) | ||||
| vioplot::vioplot(scaleSize ~ cluster, res) | ||||
| vioplot::vioplot(rotationDegree ~ cluster, res) | ||||
| 
 | ||||
| write.table(res, | ||||
|             file = "results/haum/eventlogs_pre-corona_item-clusters.csv", | ||||
|             sep = ";", | ||||
|             quote = FALSE, | ||||
|             row.names = FALSE) | ||||
| 
 | ||||
| # DFGs for clusters | ||||
| res$start <- res$date.start | ||||
| @ -163,63 +139,30 @@ for (clst in sort(unique(res$cluster))) { | ||||
|     resource_id = "item", | ||||
|     timestamps  = c("start", "complete")) | ||||
| 
 | ||||
|   dfg <- processmapR::process_map(alog, | ||||
|   processmapR::process_map(alog, | ||||
|     type_nodes = processmapR::frequency("relative", color_scale = "Greys"), | ||||
|     sec_nodes  = processmapR::frequency("absolute"), | ||||
|     type_edges = processmapR::frequency("relative", color_edges = "#FF6900"), | ||||
|     sec_edges  = processmapR::frequency("absolute"), | ||||
|     rankdir    = "LR", | ||||
|     render     = FALSE) | ||||
| 
 | ||||
|   processmapR::export_map(dfg, | ||||
|     file_name = paste0("results/processmaps/dfg_cluster", clst, "_R.pdf"), | ||||
|     file_type = "pdf", | ||||
|     title     = paste("DFG Cluster", clst)) | ||||
|     rankdir    = "LR") | ||||
| } | ||||
| 
 | ||||
| #--------------- (3) Visualization with pictures --------------- | ||||
| 
 | ||||
| coor_2d <- cmdscale(dist_mat, k = 2) | ||||
| # Look at clusters | ||||
| par(mfrow = c(2,2)) | ||||
| vioplot::vioplot(duration ~ cluster, res) | ||||
| vioplot::vioplot(distance ~ cluster, res) | ||||
| vioplot::vioplot(scaleSize ~ cluster, res) | ||||
| vioplot::vioplot(rotationDegree ~ cluster, res) | ||||
| 
 | ||||
| items <- sprintf("%03d", as.numeric(rownames(datitem))) | ||||
| write.table(res, | ||||
|             file = "results/eventlogs_pre-corona_item-clusters.csv", | ||||
|             sep = ";", | ||||
|             quote = FALSE, | ||||
|             row.names = FALSE) | ||||
| 
 | ||||
| pdf("results/figures/clustering_artworks.pdf", height = 8, width = 8, pointsize = 16) | ||||
| #png("results/figures/clustering_artworks.png", units = "in", height = 8, width = 8, pointsize = 16, res = 300) | ||||
| # Save data for plots and tables | ||||
| 
 | ||||
| par(mai = c(.6,.6,.1,.1), mgp = c(2.4, 1, 0)) | ||||
| 
 | ||||
| plot(coor_2d, type = "n", ylim = c(-3.7, 2.6), xlim = c(-5, 10.5), | ||||
|      xlab = "", ylab = "") | ||||
| 
 | ||||
| for (item in items) { | ||||
| 
 | ||||
|   if (item == "125") { | ||||
| 
 | ||||
|     pic <- jpeg::readJPEG(paste0("../data/haum/ContentEyevisit/eyevisit_cards_light/", | ||||
|                           item, "/", item, ".jpg")) | ||||
|   } else { | ||||
|     pic <- png::readPNG(paste0("../data/haum/ContentEyevisit/eyevisit_cards_light/", | ||||
|                         item, "/", item, ".png")) | ||||
|   } | ||||
| 
 | ||||
|   img <- as.raster(pic[,,1:3]) | ||||
| 
 | ||||
|   x <- coor_2d[items == item, 1] | ||||
|   y <- coor_2d[items == item, 2] | ||||
| 
 | ||||
|   points(x, y, | ||||
|          col = mycols[cluster[items == item]], | ||||
|          cex = 6, | ||||
|          pch = 15) | ||||
| 
 | ||||
|   rasterImage(img, | ||||
|               xleft = x - .45, | ||||
|               xright = x + .45, | ||||
|               ybottom = y - .2, | ||||
|               ytop = y + .2) | ||||
| 
 | ||||
| } | ||||
| legend("topright", paste("Cluster", 1:k), col = mycols, pch = 15, bty = "n") | ||||
| 
 | ||||
| dev.off() | ||||
| save(hc, k, res, dist_mat, datitem, df, | ||||
|      file = "../../thesis/figures/data/clustering_items.RData") | ||||
| 
 | ||||
|  | ||||
| @ -5,10 +5,11 @@ | ||||
| #          (3) Select features for navigation behavior | ||||
| #          (4) Export data frames | ||||
| # | ||||
| # input:  results/haum/event_logfiles_2024-02-21_16-07-33.csv | ||||
| # output: results/haum/eventlogs_pre-corona_case-clusters.csv | ||||
| # input:  results/event_logfiles_2024-02-21_16-07-33.csv | ||||
| # output: results/dataframes_case_2019.RData | ||||
| #         results/centrality_cases.RData | ||||
| # | ||||
| # last mod: 2024-03-08 | ||||
| # last mod: 2024-03-22 | ||||
| 
 | ||||
| # setwd("C:/Users/nwickelmaier/Nextcloud/Documents/MDS/2023ss/60100_master_thesis/analysis/code") | ||||
| 
 | ||||
| @ -16,7 +17,7 @@ source("R_helpers.R") | ||||
| 
 | ||||
| #--------------- (1) Read data --------------- | ||||
| 
 | ||||
| load("results/haum/eventlogs_pre-corona_cleaned.RData") | ||||
| load("results/eventlogs_pre-corona_cleaned.RData") | ||||
| 
 | ||||
| # Select one year to handle number of cases | ||||
| dat <- dat[as.Date(dat$date.start) > "2018-12-31" & | ||||
| @ -135,8 +136,8 @@ dattree <- data.frame(case = datcase$case, | ||||
| ) | ||||
| 
 | ||||
| # centrality <- pbapply::pbsapply(dattree$case, get_centrality, data = dat) | ||||
| # save(centrality, file = "results/haum/tmp_centrality.RData") | ||||
| load("results/haum/tmp_centrality.RData") | ||||
| # save(centrality, file = "results/centrality_cases.RData") | ||||
| load("results/centrality_cases.RData") | ||||
| 
 | ||||
| dattree$BetweenCentrality <- centrality | ||||
| 
 | ||||
| @ -171,17 +172,5 @@ dattree$AvDurItemNorm <- normalize(dattree$AvDurItem) | ||||
| 
 | ||||
| #--------------- (4) Export data frames --------------- | ||||
| 
 | ||||
| save(dat, datcase, dattree, file = "results/haum/dataframes_case_2019.RData") | ||||
| 
 | ||||
| write.table(datcase, | ||||
|             file = "results/haum/datcase.csv", | ||||
|             sep = ";", | ||||
|             quote = FALSE, | ||||
|             row.names = FALSE) | ||||
| 
 | ||||
| write.table(datcase, | ||||
|             file = "results/haum/dattree.csv", | ||||
|             sep = ";", | ||||
|             quote = FALSE, | ||||
|             row.names = FALSE) | ||||
| save(dat, datcase, dattree, file = "results/dataframes_case_2019.RData") | ||||
| 
 | ||||
| @ -1,45 +0,0 @@ | ||||
| # 08_infos-clusters.py | ||||
| # | ||||
| # content: (1) Load data and create event log | ||||
| #          (2) Infos for clusters | ||||
| #          (3) Process maps for clusters | ||||
| # | ||||
| # input:  results/haum/eventlogs_pre-corona_item-clusters.csv | ||||
| # output: results/haum/pn_infos_clusters.csv | ||||
| # | ||||
| # last mod: 2024-03-06 | ||||
| 
 | ||||
| import pm4py | ||||
| import pandas as pd | ||||
| 
 | ||||
| from python_helpers import eval_pm, pn_infos | ||||
| 
 | ||||
| #--------------- (1) Load data and create event logs --------------- | ||||
| 
 | ||||
| dat = pd.read_csv("results/haum/eventlogs_pre-corona_item-clusters.csv", sep = ";") | ||||
| 
 | ||||
| log_path = pm4py.format_dataframe(dat, case_id = "path", activity_key = "event", | ||||
|                                   timestamp_key = "date.start") | ||||
| 
 | ||||
| #--------------- (2) Infos for clusters --------------- | ||||
| 
 | ||||
| # Merge clusters into data frame | ||||
| eval = pd.DataFrame(columns = ["fitness", "precision", "generalizability", | ||||
|                                "simplicity", "sound", "narcs", "ntrans", | ||||
|                                "nplaces", "nvariants", "mostfreq"]) | ||||
| for cluster in log_path.grp.unique().tolist(): | ||||
|     eval = pd.concat([eval, pn_infos(log_path, "grp", cluster)]) | ||||
| eval = eval.sort_index() | ||||
| 
 | ||||
| eval.to_csv("results/haum/pn_infos_clusters.csv", sep = ";") | ||||
| 
 | ||||
| #--------------- (3) Process maps for clusters --------------- | ||||
| 
 | ||||
| for cluster in log_path.grp.unique().tolist(): | ||||
|     subdata = log_path[log_path.grp == cluster] | ||||
|     subnet, subim, subfm = pm4py.discover_petri_net_inductive(subdata, noise_threshold=0.5) | ||||
|     pm4py.save_vis_petri_net(subnet, subim, subfm, | ||||
|        "results/processmaps/petrinet_cluster" + str(cluster).zfill(3) + ".png") | ||||
|     bpmn = pm4py.convert.convert_to_bpmn(subnet, subim, subfm) | ||||
|     pm4py.vis.save_vis_bpmn(bpmn, "results/processmaps/bpmn_cluster_" + | ||||
|             str(cluster).zfill(3) + ".png") | ||||
| @ -4,18 +4,18 @@ | ||||
| #          (2) Clustering | ||||
| #          (3) Fit tree | ||||
| # | ||||
| # input:  results/haum/dataframes_case_2019.RData | ||||
| # output: results/haum/eventlogs_2019_case-clusters.csv | ||||
| #         results/haum/tmp_user-navigation.RData | ||||
| # input:  results/dataframes_case_2019.RData | ||||
| # output: results/eventlogs_2019_case-clusters.csv | ||||
| #         results/user-navigation.RData | ||||
| #         ../../thesis/figures/data/clustering_cases.RData | ||||
| # | ||||
| # last mod: 2024-03-15 | ||||
| # last mod: 2024-03-22 | ||||
| 
 | ||||
| # setwd("C:/Users/nwickelmaier/Nextcloud/Documents/MDS/2023ss/60100_master_thesis/analysis/code") | ||||
| 
 | ||||
| #--------------- (1) Load data --------------- | ||||
| 
 | ||||
| load("results/haum/dataframes_case_2019.RData") | ||||
| load("results/dataframes_case_2019.RData") | ||||
| 
 | ||||
| #--------------- (2) Clustering --------------- | ||||
| 
 | ||||
| @ -119,13 +119,13 @@ aggregate(cbind(duration, distance, scaleSize, rotationDegree) ~ cluster, res, m | ||||
| aggregate(cbind(duration, distance, scaleSize, rotationDegree) ~ cluster, res, median) | ||||
| 
 | ||||
| write.table(res, | ||||
|             file = "results/haum/eventlogs_2019_case-clusters.csv", | ||||
|             file = "results/eventlogs_2019_case-clusters.csv", | ||||
|             sep = ";", | ||||
|             quote = FALSE, | ||||
|             row.names = FALSE) | ||||
| 
 | ||||
| save(res, dist_mat, hcs, acs, coor_2d, coor_3d, | ||||
|      file = "results/haum/tmp_user-navigation.RData") | ||||
|      file = "results/user-navigation.RData") | ||||
| 
 | ||||
| save(coor_2d, coor_3d, cluster, dattree, | ||||
|      file = "../../thesis/figures/data/clustering_cases.RData") | ||||
| @ -6,10 +6,10 @@ | ||||
| #          (4) Clustering | ||||
| #          (5) Fit tree | ||||
| # | ||||
| # input:  results/haum/event_logfiles_2024-02-21_16-07-33.csv | ||||
| # output: results/haum/eventlogs_pre-corona_case-clusters.csv | ||||
| # input:  results/event_logfiles_2024-02-21_16-07-33.csv | ||||
| # output: -- | ||||
| # | ||||
| # last mod: 2024-03-15 | ||||
| # last mod: 2024-03-22 | ||||
| 
 | ||||
| # setwd("C:/Users/nwickelmaier/Nextcloud/Documents/MDS/2023ss/60100_master_thesis/analysis/code") | ||||
| 
 | ||||
| @ -17,7 +17,7 @@ source("R_helpers.R") | ||||
| 
 | ||||
| #--------------- (1) Read data --------------- | ||||
| 
 | ||||
| load("results/haum/eventlogs_pre-corona_cleaned.RData") | ||||
| load("results/eventlogs_pre-corona_cleaned.RData") | ||||
| 
 | ||||
| # Select one year to handle number of cases | ||||
| dat <- dat[as.Date(dat$date.start) > "2017-12-31" & | ||||
| @ -77,8 +77,8 @@ dattree18 <- data.frame(case = datcase18$case, | ||||
| ) | ||||
| 
 | ||||
| # centrality <- pbapply::pbsapply(dattree18$case, get_centrality, data = dat) | ||||
| # save(centrality, file = "results/haum/tmp_centrality_2018.RData") | ||||
| load("results/haum/tmp_centrality_2018.RData") | ||||
| # save(centrality, file = "results/centrality_2018.RData") | ||||
| load("results/centrality_2018.RData") | ||||
| 
 | ||||
| dattree18$BetweenCentrality <- centrality | ||||
| 
 | ||||
| @ -3,10 +3,11 @@ | ||||
| # content: (1) Read data | ||||
| #          (2) Investigate variants | ||||
| # | ||||
| # input:  results/haum/eventlogs_pre-corona_case-clusters.RData | ||||
| # output:  | ||||
| # input:  results/eventlogs_pre-corona_cleaned.RData | ||||
| # output: ../../thesis/figures/freq-traces.pdf | ||||
| #         ../../thesis/figures/freq-traces_powerlaw.pdf | ||||
| # | ||||
| # last mod: 2024-03-12 | ||||
| # last mod: 2024-03-22 | ||||
| 
 | ||||
| # setwd("C:/Users/nwickelmaier/Nextcloud/Documents/MDS/2023ss/60100_master_thesis/analysis/code") | ||||
| 
 | ||||
| @ -14,7 +15,7 @@ library(bupaverse) | ||||
| 
 | ||||
| #--------------- (1) Read data --------------- | ||||
| 
 | ||||
| load("results/haum/eventlogs_pre-corona_cleaned.RData") | ||||
| load("results/eventlogs_pre-corona_cleaned.RData") | ||||
| 
 | ||||
| #--------------- (2) Investigate variants --------------- | ||||
| 
 | ||||
| @ -27,7 +28,7 @@ alog <- activitylog(dat, | ||||
|                     resource_id = "path", | ||||
|                     timestamps  = c("start", "complete")) | ||||
| 
 | ||||
| pdf("results/figures/freq-traces.pdf", height = 7, width = 6, pointsize = 10) | ||||
| pdf("../../thesis/figures/freq-traces.pdf", height = 7, width = 6, pointsize = 10) | ||||
| trace_explorer(alog, n_traces = 25) | ||||
| # --> sequences of artworks are just too rare | ||||
| dev.off() | ||||
| @ -54,7 +55,7 @@ y <- as.numeric(tab) | ||||
| p1 <- lm(log(y) ~ log(x)) | ||||
| pre <- exp(coef(p1)[1]) * x^coef(p1)[2] | ||||
| 
 | ||||
| pdf("results/figures/freq-traces_powerlaw.pdf", height = 3.375, | ||||
| pdf("../../thesis/figures/freq-traces_powerlaw.pdf", height = 3.375, | ||||
|     width = 3.375, pointsize = 10) | ||||
| par(mai = c(.6,.6,.1,.1), mgp = c(2.4, 1, 0)) | ||||
| 
 | ||||
							
								
								
									
										93
									
								
								code/12_dfgs-case-clusters.R
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										93
									
								
								code/12_dfgs-case-clusters.R
									
									
									
									
									
										Normal file
									
								
							| @ -0,0 +1,93 @@ | ||||
| # 13_dfgs-case-clusters.R | ||||
| # | ||||
| # content: (1) Read data | ||||
| #          (2) Export DFGs for clusters | ||||
| # | ||||
| # input:  results/user-navigation.RData | ||||
| # output: ../../thesis/figures/dfg_cases_cluster1_R.pdf | ||||
| #         ../../thesis/figures/dfg_cases_cluster2_R.pdf | ||||
| #         ../../thesis/figures/dfg_cases_cluster3_R.pdf | ||||
| #         ../../thesis/figures/dfg_cases_cluster4_R.pdf | ||||
| #         ../../thesis/figures/dfg_cases_cluster5_R.pdf | ||||
| # | ||||
| # last mod: 2024-03-22 | ||||
| 
 | ||||
| # setwd("C:/Users/nwickelmaier/Nextcloud/Documents/MDS/2023ss/60100_master_thesis/analysis/code") | ||||
| 
 | ||||
| #--------------- (1) Read data --------------- | ||||
| 
 | ||||
| load("results/user-navigation.RData") | ||||
| 
 | ||||
| dat <- res | ||||
| 
 | ||||
| dat$start <- as.POSIXct(dat$date.start) | ||||
| dat$complete <- as.POSIXct(dat$date.stop) | ||||
| 
 | ||||
| alog <- bupaR::activitylog(dat[dat$cluster == cluster, ], | ||||
|                             case_id     = "case", | ||||
|                             activity_id = "item", | ||||
|                             resource_id = "path", | ||||
|                             timestamps  = c("start", "complete")) | ||||
| 
 | ||||
| processmapR::trace_explorer(alog, n_traces = 25) | ||||
| 
 | ||||
| tr <- bupaR::traces(alog) | ||||
| tab <- table(tr$absolute_frequency) | ||||
| 
 | ||||
| tab[1] / nrow(tr) | ||||
| 
 | ||||
| alog |> edeaR::filter_infrequent_flows(min_n = 20) |> processmapR::process_map() | ||||
| 
 | ||||
| #--------------- (2) Export DFGs for clusters --------------- | ||||
| 
 | ||||
| mycols <- c("#3CB4DC", "#FF6900", "#78004B", "#91C86E", "#434F4F") | ||||
| cl_names <- c("Scanning", "Exploring", "Flitting", "Searching", "Info") | ||||
| 
 | ||||
| ns <- c(30, 20, 10, 5, 30) | ||||
| 
 | ||||
| for (i in 1:5) { | ||||
| 
 | ||||
|   alog <- bupaR::activitylog(dat[dat$cluster == i, ], | ||||
|                              case_id     = "case", | ||||
|                              activity_id = "item", | ||||
|                              resource_id = "path", | ||||
|                              timestamps  = c("start", "complete")) | ||||
|    | ||||
|   dfg <- processmapR::process_map(edeaR::filter_infrequent_flows(alog, min_n = ns[i]), | ||||
|     type_nodes = processmapR::frequency("relative", color_scale = "Greys"), | ||||
|     sec_nodes  = processmapR::frequency("absolute"), | ||||
|     type_edges = processmapR::frequency("relative", color_edges = mycols[i]), | ||||
|     sec_edges  = processmapR::frequency("absolute"), | ||||
|     rankdir    = "LR", | ||||
|     render     = FALSE) | ||||
| 
 | ||||
|   processmapR::export_map(dfg, | ||||
|     file_name = paste0("../../thesis/figures/dfg_cases_cluster", i, "_R.pdf"), | ||||
|     file_type = "pdf", | ||||
|     title     = cl_names[i]) | ||||
| } | ||||
| 
 | ||||
| # cluster 1: 50 | ||||
| # cluster 2: 30 o. 20 | ||||
| # cluster 3: 20 - 30 | ||||
| # cluster 4: 5 | ||||
| # cluster 5: 20 | ||||
| 
 | ||||
| get_percent_variants <- function(log, cluster, min_n) { | ||||
| 
 | ||||
|   alog <- bupaR::activitylog(log[log$cluster == cluster, ], | ||||
|                              case_id     = "case", | ||||
|                              activity_id = "item", | ||||
|                              resource_id = "path", | ||||
|                              timestamps  = c("start", "complete")) | ||||
| 
 | ||||
|   nrow(edeaR::filter_infrequent_flows(alog, min_n = min_n)) / | ||||
|     nrow(alog) | ||||
| } | ||||
| 
 | ||||
| perc <- numeric(5) | ||||
| 
 | ||||
| for (i in 1:5) { | ||||
|   perc[i] <- get_percent_variants(log = dat, cluster = i, min_n = ns[i]) | ||||
| } | ||||
| 
 | ||||
| @ -1,46 +0,0 @@ | ||||
| # 12_pm-case-clusters.py | ||||
| # | ||||
| # content: (1) Load data and create event log | ||||
| #          (2) Infos for clusters | ||||
| #          (3) Process maps for clusters | ||||
| # | ||||
| # input:  results/haum/eventlogs_pre-corona_item-clusters.csv | ||||
| # output: results/haum/pn_infos_clusters.csv | ||||
| # | ||||
| # last mod: 2024-03-10 | ||||
| 
 | ||||
| import pm4py | ||||
| import pandas as pd | ||||
| 
 | ||||
| from python_helpers import eval_pm, pn_infos | ||||
| 
 | ||||
| #--------------- (1) Load data and create event logs --------------- | ||||
| 
 | ||||
| dat = pd.read_csv("results/haum/eventlogs_2019_case-clusters_new.csv", sep = ";") | ||||
| 
 | ||||
| event_log = pm4py.format_dataframe(dat, case_id = "case", activity_key = "event_new", | ||||
|                                   timestamp_key = "date.start") | ||||
| 
 | ||||
| #--------------- (2) Infos for clusters --------------- | ||||
| 
 | ||||
| # Merge clusters into data frame | ||||
| eval = pd.DataFrame(columns = ["fitness", "precision", "generalizability", | ||||
|                                "simplicity", "sound", "narcs", "ntrans", | ||||
|                                "nplaces", "nvariants", "mostfreq"]) | ||||
| for cluster in event_log.cluster.unique().tolist(): | ||||
|     eval = pd.concat([eval, pn_infos(event_log, "cluster", cluster)]) | ||||
| eval = eval.sort_index() | ||||
| 
 | ||||
| eval.to_csv("results/haum/pn_infos_clusters.csv", sep = ";") | ||||
| 
 | ||||
| #--------------- (3) Process maps for clusters --------------- | ||||
| 
 | ||||
| for cluster in event_log.cluster.unique().tolist(): | ||||
|     subdata = event_log[event_log.cluster == cluster] | ||||
|     subnet, subim, subfm = pm4py.discover_petri_net_inductive(subdata, noise_threshold = .7) | ||||
|     pm4py.save_vis_petri_net(subnet, subim, subfm, | ||||
|        "results/processmaps/petrinet_cluster" + str(cluster) + "_cases.png") | ||||
|     bpmn = pm4py.convert.convert_to_bpmn(subnet, subim, subfm) | ||||
|     pm4py.vis.save_vis_bpmn(bpmn, "results/processmaps/bpmn_cluster" + | ||||
|             str(cluster) + "_cases.png") | ||||
| 
 | ||||
| @ -36,8 +36,8 @@ def pn_infos_miner(log, miner): | ||||
|         net, im, fm = pm4py.discover_petri_net_ilp(log) | ||||
|     elif miner == "inductive": | ||||
|         net, im, fm = pm4py.discover_petri_net_inductive(log) | ||||
|     elif miner == "conformative": | ||||
|         net, im, fm = pm4py.read_pnml("results/haum/conformative_petrinet_con.pnml") | ||||
|     elif miner == "normative": | ||||
|         net, im, fm = pm4py.read_pnml("results/normative_petrinet.pnml") | ||||
| 
 | ||||
|     eval = eval_append(log, net, im, fm) | ||||
|     eval.index = [miner] | ||||
|  | ||||
		Loading…
	
	
			
			x
			
			
		
	
		Reference in New Issue
	
	Block a user