Compare commits
	
		
			10 Commits
		
	
	
		
			26f90a7fec
			...
			cba441f08b
		
	
	| Author | SHA1 | Date | |
|---|---|---|---|
| cba441f08b | |||
| 24c7967246 | |||
| 9ad5123747 | |||
| 4857f08577 | |||
| 43c7f34645 | |||
| b762968774 | |||
| c0b24a03aa | |||
| bdeb8fb718 | |||
| 6feea5a251 | |||
| b50f52dc6c | 
| @ -8,8 +8,8 @@ | |||||||
| #         ../data/metadata/feiertage.csv | #         ../data/metadata/feiertage.csv | ||||||
| #         ../data/metadata/schulferien_2016-2018_NI.csv | #         ../data/metadata/schulferien_2016-2018_NI.csv | ||||||
| #         ../data/metadata/schulferien_2019-2025_NI.csv | #         ../data/metadata/schulferien_2019-2025_NI.csv | ||||||
| # output: raw_logfiles_<timestamp>.csv | # output: results/raw_logfiles_<timestamp>.csv | ||||||
| #         event_logfiles_<timestamp>.csv | #         results/event_logfiles_<timestamp>.csv | ||||||
| # | # | ||||||
| # last mod: 2024-02-23, NW | # last mod: 2024-02-23, NW | ||||||
| 
 | 
 | ||||||
| @ -29,12 +29,12 @@ folders <- dir(path) | |||||||
| datraw <- parse_logfiles(folders, path) | datraw <- parse_logfiles(folders, path) | ||||||
| # 91 corrupt lines have been found and removed from the data set | # 91 corrupt lines have been found and removed from the data set | ||||||
| 
 | 
 | ||||||
| # datraw <- read.table("results/haum/raw_logfiles_2023-10-25_16-20-45.csv", | # datraw <- read.table("results/raw_logfiles_2023-10-25_16-20-45.csv", | ||||||
| #                      sep = ";", header = TRUE) | #                      sep = ";", header = TRUE) | ||||||
| 
 | 
 | ||||||
| ## Export data | ## Export data | ||||||
| 
 | 
 | ||||||
| write.table(datraw, paste0("results/haum/raw_logfiles_", now, ".csv"), | write.table(datraw, paste0("results/raw_logfiles_", now, ".csv"), | ||||||
|             sep = ";", row.names = FALSE) |             sep = ";", row.names = FALSE) | ||||||
| 
 | 
 | ||||||
| #--------------- (2) Create event logs --------------- | #--------------- (2) Create event logs --------------- | ||||||
| @ -131,6 +131,6 @@ dat2 <- dat2[order(dat2$fileId.start, dat2$date.start, dat2$timeMs.start), ] | |||||||
| 
 | 
 | ||||||
| ## Export data | ## Export data | ||||||
| 
 | 
 | ||||||
| write.table(dat2, paste0("results/haum/event_logfiles_", now, ".csv"), | write.table(dat2, paste0("results/event_logfiles_", now, ".csv"), | ||||||
|             sep = ";", row.names = FALSE) |             sep = ";", row.names = FALSE) | ||||||
| 
 | 
 | ||||||
|  | |||||||
| @ -9,16 +9,30 @@ | |||||||
| #           (3.4) Artwork sequences | #           (3.4) Artwork sequences | ||||||
| #           (3.5) Topics | #           (3.5) Topics | ||||||
| # | # | ||||||
| # input:  results/haum/event_logfiles_2024-02-21_16-07-33.csv | # input:  results/event_logfiles_2024-02-21_16-07-33.csv | ||||||
| # output: | #         results/raw_logfiles_2024-02-21_16-07-33.csv | ||||||
|  | # output: results/figures/counts_item.pdf | ||||||
|  | #         results/figures/counts_item_firsttouch.pdf | ||||||
|  | #         results/figures/duration.pdf | ||||||
|  | #         results/figures/heatmap_start.pdf | ||||||
|  | #         results/figures/heatmap_stop.pdf | ||||||
|  | #         results/figures/cases_per_day.pdf | ||||||
|  | #         results/figures/timeMs.pdf | ||||||
|  | #         results/figures/xycoord.pdf | ||||||
|  | #         results/figures/event-dist.pdf | ||||||
|  | #         results/figures/traceexplore_trace-event.pdf | ||||||
|  | #         results/figures/ra_trace-event.pdf | ||||||
|  | #         results/figures/traceexplore_case-event.pdf | ||||||
|  | #         results/figures/bp_tod.pdf | ||||||
|  | #         results/figures/bp_wd.pdf | ||||||
|  | #         results/figures/bp_wds.pdf | ||||||
|  | #         results/figures/bp_corona.pdf | ||||||
|  | #         results/figures/traceexplore_case-artwork_often080.pdf | ||||||
| # | # | ||||||
| # last mod: 2024-03-13 | # last mod: 2024-03-22 | ||||||
| 
 | 
 | ||||||
| # setwd("C:/Users/nwickelmaier/Nextcloud/Documents/MDS/2023ss/60100_master_thesis/analysis/code") | # setwd("C:/Users/nwickelmaier/Nextcloud/Documents/MDS/2023ss/60100_master_thesis/analysis/code") | ||||||
| 
 | 
 | ||||||
| library(lattice) |  | ||||||
| library(bupaverse) |  | ||||||
| 
 |  | ||||||
| # Overall Research Question: How do museum visitors interact with the | # Overall Research Question: How do museum visitors interact with the | ||||||
| # artworks presented on the MTT? | # artworks presented on the MTT? | ||||||
| 
 | 
 | ||||||
| @ -27,7 +41,7 @@ library(bupaverse) | |||||||
| 
 | 
 | ||||||
| #--------------- (1) Read data --------------- | #--------------- (1) Read data --------------- | ||||||
| 
 | 
 | ||||||
| datlogs <- read.table("results/haum/event_logfiles_2024-02-21_16-07-33.csv", | datlogs <- read.table("results/event_logfiles_2024-02-21_16-07-33.csv", | ||||||
|                       colClasses = c("character", "character", "POSIXct", |                       colClasses = c("character", "character", "POSIXct", | ||||||
|                                      "POSIXct", "character", "integer", |                                      "POSIXct", "character", "integer", | ||||||
|                                      "numeric", "character", "character", |                                      "numeric", "character", "character", | ||||||
| @ -40,7 +54,7 @@ datlogs$event <- factor(datlogs$event, levels = c("move", "flipCard", | |||||||
|                                                   "openTopic", |                                                   "openTopic", | ||||||
|                                                   "openPopup")) |                                                   "openPopup")) | ||||||
| 
 | 
 | ||||||
| datraw <- read.table("results/haum/raw_logfiles_2024-02-21_16-07-33.csv", | datraw <- read.table("results/raw_logfiles_2024-02-21_16-07-33.csv", | ||||||
|                      sep = ";", header = TRUE) |                      sep = ";", header = TRUE) | ||||||
| 
 | 
 | ||||||
| # Add weekdays to data frame | # Add weekdays to data frame | ||||||
| @ -74,43 +88,263 @@ lattice::dotplot(xtabs( ~ item + topic, datlogs), auto.key = TRUE) | |||||||
| mat <- t(as.matrix(xtabs( ~ item + topic, datlogs))) | mat <- t(as.matrix(xtabs( ~ item + topic, datlogs))) | ||||||
| mat[mat == 0] <- NA | mat[mat == 0] <- NA | ||||||
| image(mat, axes = F, col = rainbow(100)) | image(mat, axes = F, col = rainbow(100)) | ||||||
| heatmap(t(mat)) |  | ||||||
| 
 |  | ||||||
| 
 |  | ||||||
| datlogs$start <- datlogs$date.start |  | ||||||
| datlogs$complete <- datlogs$date.stop |  | ||||||
| 
 |  | ||||||
| 
 |  | ||||||
| 
 | 
 | ||||||
| #--------------- (2) Descriptives --------------- | #--------------- (2) Descriptives --------------- | ||||||
|  | 
 | ||||||
|  | ### Which item gets touched most often? | ||||||
|  | 
 | ||||||
|  | counts_item <- table(datlogs$item) | ||||||
|  | lattice::barchart(counts_item) | ||||||
|  | 
 | ||||||
|  | items <- unique(datlogs$item) | ||||||
|  | #items <- items[!items %in% c("504", "505")] | ||||||
|  | datart <- mtt::extract_artworks(items, | ||||||
|  |                            paste0(items, ".xml"), | ||||||
|  |                            "../data/haum/ContentEyevisit/eyevisit_cards_light/") | ||||||
|  | datart <- datart[order(datart$artwork), ] | ||||||
|  | names(counts_item) <- datart$title | ||||||
|  | 
 | ||||||
|  | pdf("results/figures/counts_item.pdf", width = 20, height = 10, pointsize = 10) | ||||||
|  | par(mai = c(5, .6, .1, .1)) | ||||||
|  | tmp <- barplot(counts_item, las = 2, ylim = c(0, 60000), | ||||||
|  |                border = NA, col = "#434F4F") | ||||||
|  | text(tmp, counts_item + 1000, datart$artwork) | ||||||
|  | dev.off() | ||||||
|  | 
 | ||||||
|  | ### Which item gets touched most often first? | ||||||
|  | 
 | ||||||
|  | datcase <- datlogs[!duplicated(datlogs$case), ] | ||||||
|  | counts_case <- table(datcase$item) | ||||||
|  | names(counts_case) <- datart$title | ||||||
|  | tmp <- barplot(counts_case, las = 2, border = "white") | ||||||
|  | text(tmp, counts_case + 100, datart$item) | ||||||
|  | 
 | ||||||
|  | counts <- rbind(counts_item, counts_case) | ||||||
|  | 
 | ||||||
|  | pdf("results/figures/counts_item_firsttouch.pdf", | ||||||
|  |     width = 20, height = 10, pointsize = 10) | ||||||
|  | par(mai = c(5, .6, .1, .1)) | ||||||
|  | 
 | ||||||
|  | tmp <- barplot(counts, las = 2, border = NA, col = c("#434F4F", "#FF6900"), ylim = c(0, 65000)) | ||||||
|  | text(tmp, counts_item + counts_case + 1000, datart$artwork) | ||||||
|  | legend("topleft", c("Total interactions", "First interactions"), | ||||||
|  |        col = c("#434F4F", "#FF6900"), pch = 15, bty = "n") | ||||||
|  | dev.off() | ||||||
|  | 
 | ||||||
|  | ### Which teasers seem to work well? | ||||||
|  | barplot(table(datlogs$topic), las = 2) | ||||||
|  | 
 | ||||||
|  | ### Dwell times/duration | ||||||
|  | datagg <- aggregate(duration ~ event + item, datlogs, mean) | ||||||
|  | datagg$ds <- datagg$duration / 1000   # in secs | ||||||
|  | 
 | ||||||
|  | lattice::bwplot(ds ~ event, datagg) | ||||||
|  | 
 | ||||||
|  | # without aggregation | ||||||
|  | lattice::bwplot(duration / 1000 / 60 ~ event, datlogs) | ||||||
|  | # in min | ||||||
|  | 
 | ||||||
|  | set.seed(1027) | ||||||
|  | 
 | ||||||
|  | pdf("results/figures/duration.pdf", width = 5, height = 5, pointsize = 10) | ||||||
|  | lattice::bwplot(I(duration/1000/60) ~ event, datlogs[sample(nrow(datlogs), 100000), ], | ||||||
|  |        ylab = "Duration in min") | ||||||
|  | dev.off() | ||||||
|  | 
 | ||||||
|  | ### Move events | ||||||
|  | 
 | ||||||
|  | datmove <- aggregate(cbind(duration, scaleSize, rotationDegree, distance, x.start, | ||||||
|  |                            y.start, x.stop, y.stop) ~ item, datlogs, | ||||||
|  |                      mean) | ||||||
|  | 
 | ||||||
|  | hist(log(datlogs$scaleSize)) | ||||||
|  | # --> better interpretable on logscale | ||||||
|  | 
 | ||||||
|  | plot(y.start ~ x.start, datmove, pch = 16, col = "gray") | ||||||
|  | points(y.start ~ x.start, datmove, col = "#3CB4DC", cex = datmove$scaleSize) | ||||||
|  | 
 | ||||||
|  | 
 | ||||||
|  | plot(y.start ~ x.start, datmove, type = "n", xlab = "x", ylab = "y", | ||||||
|  |      xlim = c(0, 3840), ylim = c(0, 2160)) | ||||||
|  | with(datmove, text(x.start, y.start, item, col = "gray", cex = 1.5)) | ||||||
|  | with(datmove, | ||||||
|  |      arrows(x.start, y.start, x.stop, y.stop, length = 0.07, lwd = 2) | ||||||
|  |      ) | ||||||
|  | abline(v = c(0, 3840), h = c(0, 2160), col = "#3CB4DC", lwd = 2) | ||||||
|  | 
 | ||||||
|  | datscale <- aggregate(scaleSize ~ item, datlogs, max) | ||||||
|  | plot(y.start ~ x.start, datmove, pch = 16, col = "gray") | ||||||
|  | points(y.start ~ x.start, datmove, col = "#3CB4DC", cex = datscale$scaleSize) | ||||||
|  | 
 | ||||||
|  | plot(y.start ~ x.start, datmove, type = "n", xlab = "x", ylab = "y", | ||||||
|  |      xlim = c(0, 3840), ylim = c(0, 2160)) | ||||||
|  | #with(datmove, text(x.stop, y.stop, item)) | ||||||
|  | with(datmove, text(x.start, y.start, item)) | ||||||
|  | 
 | ||||||
|  | 
 | ||||||
|  | ### Are there certain areas of the table that are touched most often? | ||||||
|  | 
 | ||||||
|  | # heatmap | ||||||
|  | cuts <- 100 | ||||||
|  | 
 | ||||||
|  | datlogs$x.start.cat <- cut(datlogs$x.start, cuts) | ||||||
|  | datlogs$y.start.cat <- cut(datlogs$y.start, cuts) | ||||||
|  | 
 | ||||||
|  | tab <- xtabs( ~ x.start.cat + y.start.cat, datlogs) | ||||||
|  | 
 | ||||||
|  | colnames(tab) <- paste0("c", 1:cuts) | ||||||
|  | rownames(tab) <- paste0("c", 1:cuts) | ||||||
|  | 
 | ||||||
|  | heatmap(tab, Rowv = NA, Colv = NA) | ||||||
|  | 
 | ||||||
|  | 
 | ||||||
|  | dattrim <- datlogs[datlogs$x.start < 3840 & | ||||||
|  |                    datlogs$x.start > 0 & | ||||||
|  |                    datlogs$y.start < 2160 & | ||||||
|  |                    datlogs$y.start > 0 & | ||||||
|  |                    datlogs$x.stop < 3840 & | ||||||
|  |                    datlogs$x.stop > 0 & | ||||||
|  |                    datlogs$y.stop < 2160 & | ||||||
|  |                    datlogs$y.stop > 0, ] | ||||||
|  | 
 | ||||||
|  | cuts <- 100 # 200, 100, 70, ... | ||||||
|  | 
 | ||||||
|  | # start | ||||||
|  | dattrim$x.start.cat <- cut(dattrim$x.start, cuts) | ||||||
|  | dattrim$y.start.cat <- cut(dattrim$y.start, cuts) | ||||||
|  | 
 | ||||||
|  | tab.start <- xtabs( ~ x.start.cat + y.start.cat, dattrim) | ||||||
|  | colnames(tab.start) <- NULL | ||||||
|  | rownames(tab.start) <- NULL | ||||||
|  | 
 | ||||||
|  | pdf("results/figures/heatmap_start.pdf", width = 5, height = 5, pointsize = 10) | ||||||
|  | heatmap(tab.start, Rowv = NA, Colv = NA) | ||||||
|  | dev.off() | ||||||
|  | 
 | ||||||
|  | # stop | ||||||
|  | dattrim$x.stop.cat <- cut(dattrim$x.stop, cuts) | ||||||
|  | dattrim$y.stop.cat <- cut(dattrim$y.stop, cuts) | ||||||
|  | tab.stop <- xtabs( ~ x.stop.cat + y.stop.cat, dattrim) | ||||||
|  | colnames(tab.stop) <- NULL | ||||||
|  | rownames(tab.stop) <- NULL | ||||||
|  | 
 | ||||||
|  | pdf("results/figures/heatmap_stop.pdf", width = 5, height = 5, pointsize = 10) | ||||||
|  | heatmap(tab.stop, Rowv = NA, Colv = NA) | ||||||
|  | dev.off() | ||||||
|  | 
 | ||||||
|  | ### How many visitors per day | ||||||
|  | 
 | ||||||
|  | datlogs$date <- as.Date(datlogs$date.start) | ||||||
|  | 
 | ||||||
|  | # Interactions per day | ||||||
|  | datint <- aggregate(case ~ date, datlogs, length) | ||||||
|  | plot(datint, type = "h") | ||||||
|  | 
 | ||||||
|  | # Cases per day | ||||||
|  | datcase <- aggregate(case ~ date, datlogs, function(x) length(unique(x))) | ||||||
|  | plot(datcase, type = "h") | ||||||
|  | 
 | ||||||
|  | # Paths per day | ||||||
|  | datpath <- aggregate(path ~ date, datlogs, function(x) length(unique(x))) | ||||||
|  | plot(datpath, type = "h") | ||||||
|  | 
 | ||||||
|  | plot(path ~ date, datpath, type = "h", col = "#3CB4DC") | ||||||
|  | points(case ~ date, datcase, type = "h") | ||||||
|  | 
 | ||||||
|  | pdf("results/figures/cases_per_day.pdf", width = 9, height = 5, pointsize = 10) | ||||||
|  | par(mai = c(.6, .6, .1, .1), mgp = c(2.4, 1, 0)) | ||||||
|  | plot(case ~ date, datcase, type = "h", col = "#434F4F") | ||||||
|  | abline(v = datcase$date[datcase$date %in% c("2020-03-13", "2022-10-25")], | ||||||
|  |        col = "#FF6900", lty = 2) | ||||||
|  | text(datcase$date[datcase$date == "2020-03-13"]+470, 80, | ||||||
|  |      "Corona gap from 2020-03-13 to 2022-10-25", | ||||||
|  |      col = "#FF6900") | ||||||
|  | dev.off() | ||||||
|  | 
 | ||||||
|  | ## weird behavior of timeMs | ||||||
|  | 
 | ||||||
|  | pdf("results/figures/timeMs.pdf", width = 9, height = 6, pointsize = 10) | ||||||
|  | #par(mai = c(.6, .6, .1, .1), mgp = c(2.4, 1, 0)) | ||||||
|  | #plot(timeMs.start ~ as.factor(fileId), datlogs[1:2000,], xlab = "fileId") | ||||||
|  | lattice::bwplot(timeMs.start ~ as.factor(fileId.start), datlogs[1:2000,], xlab = "", | ||||||
|  |        scales = list(x = list(rot = 90), y = list(rot = 90))) | ||||||
|  | dev.off() | ||||||
|  | 
 | ||||||
|  | ## x,y-coordinates out of range | ||||||
|  | 
 | ||||||
|  | set.seed(1522) | ||||||
|  | 
 | ||||||
|  | pdf("results/figures/xycoord.pdf", width = 5, height = 5, pointsize = 10) | ||||||
|  | par(mai = c(.6, .6, .1, .1), mgp = c(2.4, 1, 0)) | ||||||
|  | #par(mfrow = c(1, 2)) | ||||||
|  | plot(y.start ~ x.start, datlogs[sample(nrow(datlogs), 10000), ]) | ||||||
|  | abline(v = c(0, 3840), h = c(0, 2160), col = "#3CB4DC", lwd = 2) | ||||||
|  | #plot(y.stop ~ x.stop, datlogs) | ||||||
|  | #abline(v = c(0, 3840), h = c(0, 2160), col = "#3CB4DC", lwd = 2) | ||||||
|  | legend("bottomleft", "Random sample of 10,000", bg = "white") | ||||||
|  | legend("topleft", "4K-Display: 3840 x 2160", bg = "white") | ||||||
|  | dev.off() | ||||||
|  | 
 | ||||||
|  | ## moves | ||||||
|  | 
 | ||||||
|  | dat001 <- datlogs[which(datlogs$item == "001"), ] | ||||||
|  | 
 | ||||||
|  | index <- as.numeric(as.factor(dat001$path)) | ||||||
|  | cc <- sample(colors(), 100) | ||||||
|  | 
 | ||||||
|  | plot(y.start ~ x.start, dat001, type = "n", xlab = "x", ylab = "y", | ||||||
|  |      xlim = c(0, 3840), ylim = c(0, 2160)) | ||||||
|  | with(dat001[1:200,], arrows(x.start, y.start, x.stop, y.stop, | ||||||
|  |                             length = .07, col = cc[index])) | ||||||
|  | 
 | ||||||
|  | plot(y.start ~ x.start, dat001, xlab = "x", ylab = "y", | ||||||
|  |      xlim = c(0, 3840), ylim = c(0, 2160), pch = 16, col = "gray") | ||||||
|  | points(y.start ~ x.start, dat001, xlab = "x", ylab = "y", | ||||||
|  |      xlim = c(0, 3840), ylim = c(0, 2160), cex = dat001$scaleSize, | ||||||
|  |      col = "blue") | ||||||
|  | 
 | ||||||
|  | 
 | ||||||
|  | 
 | ||||||
|  | cc <- sample(colors(), 70) | ||||||
|  | 
 | ||||||
|  | dat1 <- datlogs[!duplicated(datlogs$item), ] | ||||||
|  | dat1 <- dat1[order(dat1$item), ] | ||||||
|  | 
 | ||||||
|  | plot(y.start ~ x.start, dat1, type = "n", xlim = c(-100, 4500), ylim = c(-100, 2500)) | ||||||
|  | abline(h = c(0, 2160), v = c(0, 3840), col = "lightgray") | ||||||
|  | with(dat1, points(x.start, y.start, col = cc, pch = 16)) | ||||||
|  | with(dat1, points(x.stop, y.stop, col = cc, pch = 16)) | ||||||
|  | with(dat1, arrows(x.start, y.start, x.stop, y.stop, length = .07, col = cc)) | ||||||
|  | 
 | ||||||
|  | 
 | ||||||
| # How many events per topic, per path, ... | # How many events per topic, per path, ... | ||||||
| # How many popups per artwork? | # How many popups per artwork? | ||||||
| 
 | 
 | ||||||
| # Number of events per artwork | # Number of events per artwork | ||||||
| tab <- xtabs( ~ artwork + event, datlogs) | tab <- xtabs( ~ item + event, datlogs) | ||||||
| addmargins(tab) | addmargins(tab) | ||||||
| 
 | 
 | ||||||
| proportions(tab, margin = "artwork") | proportions(tab, margin = "item") | ||||||
| proportions(tab, margin = "event") | proportions(tab, margin = "event") | ||||||
| 
 | 
 | ||||||
| cc <- palette.colors(palette = "Okabe-Ito")[c(3,2,4,8)] | cc <- palette.colors(palette = "Okabe-Ito")[c(3,2,4,8)] | ||||||
| 
 | 
 | ||||||
| pdf("../figures/event-dist.pdf", height = 3.375, width = 12, pointsize = 10) | pdf("results/figures/event-dist.pdf", height = 3.375, width = 12, pointsize = 10) | ||||||
| par(mai = c(.4,.4,.1,.1), mgp = c(2.4, 1, 0)) | par(mai = c(.4,.4,.1,.1), mgp = c(2.4, 1, 0)) | ||||||
| 
 | 
 | ||||||
| barplot(t(proportions(tab, margin = "artwork")), las = 2, col = cc, | barplot(t(proportions(tab, margin = "item")), las = 2, col = c("#78004B", "#3CB4DC", "#91C86E", "#FF6900"), | ||||||
|         legend.text = levels(datlogs$event), args.legend = list(x = "bottomleft", bg = "white")) |         legend.text = levels(datlogs$event), args.legend = list(x = "bottomleft", bg = "white")) | ||||||
| 
 | 
 | ||||||
| dev.off() | dev.off() | ||||||
| 
 | 
 | ||||||
| #barchart(proportions(tab, margin = "artwork"), las = 2) | #lattice::barchart(proportions(tab, margin = "item"), las = 2) | ||||||
| 
 | 
 | ||||||
| # Proportion of events | # Proportion of events | ||||||
| proportions(xtabs( ~ event, datlogs)) | proportions(xtabs( ~ event, datlogs)) | ||||||
| # Mean proportion of event per path | # Mean proportion of event per path | ||||||
| colMeans(proportions(xtabs( ~ path + event, datlogs), margin = "path")) | colMeans(proportions(xtabs( ~ path + event, datlogs), margin = "path")) | ||||||
| # Mean proportion of event per artwork | # Mean proportion of event per item | ||||||
| colMeans(proportions(tab, margin = "artwork")) | colMeans(proportions(tab, margin = "item")) | ||||||
| 
 | 
 | ||||||
| # Proportion of unclosed events | # Proportion of unclosed events | ||||||
| 
 | 
 | ||||||
| @ -126,15 +360,17 @@ sum(datlogs$fileId.start != datlogs$fileId.stop, na.rm = TRUE) / nrow(datlogs) | |||||||
| 
 | 
 | ||||||
| #--------------- (3.1) Check data quality --------------- | #--------------- (3.1) Check data quality --------------- | ||||||
| 
 | 
 | ||||||
| alog <- activitylog(datlogs, | datlogs$start <- datlogs$date.start | ||||||
|  | datlogs$complete <- datlogs$date.stop | ||||||
|  | 
 | ||||||
|  | alog <- bupaR::activitylog(datlogs, | ||||||
|                     case_id = "path", |                     case_id = "path", | ||||||
|                     activity_id = "event", |                     activity_id = "event", | ||||||
|                     #resource_id = "case", |                     #resource_id = "case", | ||||||
|                     resource_id = "artwork", |                     resource_id = "item", | ||||||
|                     timestamps = c("start", "complete")) |                     timestamps = c("start", "complete")) | ||||||
| 
 | 
 | ||||||
| # process_map(alog, frequency("relative")) | processmapR::process_map(alog, processmapR::frequency("relative")) | ||||||
| map_as_pdf(alog, file = "../figures/pm_trace-event.pdf") |  | ||||||
| 
 | 
 | ||||||
| alogf <- edeaR::filter_trace_frequency(alog, percentage = 0.9) | alogf <- edeaR::filter_trace_frequency(alog, percentage = 0.9) | ||||||
| 
 | 
 | ||||||
| @ -147,7 +383,7 @@ processmapR::process_map(alogf,   # alog, | |||||||
| 
 | 
 | ||||||
| alog_no_move <- alog[alog$event != "move", ] | alog_no_move <- alog[alog$event != "move", ] | ||||||
| 
 | 
 | ||||||
| pdf("../figures/traceexplore_trace-event.pdf", height = 8, width = 12, pointsize = 10) | pdf("results/figures/traceexplore_trace-event.pdf", height = 8, width = 12, pointsize = 10) | ||||||
| set.seed(1447) | set.seed(1447) | ||||||
| processmapR::trace_explorer(alog_no_move[alog_no_move$path %in% | processmapR::trace_explorer(alog_no_move[alog_no_move$path %in% | ||||||
|                             sample(unique(alog_no_move$path), 400),], |                             sample(unique(alog_no_move$path), 400),], | ||||||
| @ -155,7 +391,7 @@ processmapR::trace_explorer(alog_no_move[alog_no_move$path %in% | |||||||
|                             abbreviate = T) |                             abbreviate = T) | ||||||
| dev.off() | dev.off() | ||||||
| 
 | 
 | ||||||
| pdf("../figures/ra_trace-event.pdf", height = 8, width = 12, pointsize = 10) | pdf("results/figures/ra_trace-event.pdf", height = 8, width = 12, pointsize = 10) | ||||||
| 
 | 
 | ||||||
| ra_no_move <- edeaR::resource_frequency(alog_no_move, "resource-activity") | ra_no_move <- edeaR::resource_frequency(alog_no_move, "resource-activity") | ||||||
| levels(ra_no_move$event) <- c("flipCard", "flipCard", "openTopic", "openPopup") | levels(ra_no_move$event) <- c("flipCard", "flipCard", "openTopic", "openPopup") | ||||||
| @ -182,29 +418,29 @@ which.max(table(datlogs$artwork)) | |||||||
| which.min(table(datlogs$artwork)) | which.min(table(datlogs$artwork)) | ||||||
| which.min(table(datlogs$artwork)[-c(71,72)]) | which.min(table(datlogs$artwork)[-c(71,72)]) | ||||||
| 
 | 
 | ||||||
| alog080 <- activitylog(datlogs[datlogs$artwork == "080",], | alog080 <- bupaR::activitylog(datlogs[datlogs$artwork == "080",], | ||||||
|                     case_id = "path", |                     case_id = "path", | ||||||
|                     activity_id = "event", |                     activity_id = "event", | ||||||
|                     resource_id = "artwork", |                     resource_id = "artwork", | ||||||
|                     timestamps = c("start", "complete")) |                     timestamps = c("start", "complete")) | ||||||
| 
 | 
 | ||||||
| map_as_pdf(alog080, file = "../figures/pm_trace-event_080.pdf") | processmapR::process_map(alog80, processmapR::frequency("relative")) | ||||||
| 
 | 
 | ||||||
| alog087 <- activitylog(datlogs[datlogs$artwork == "087",], | alog087 <- bupaR::activitylog(datlogs[datlogs$artwork == "087",], | ||||||
|                     case_id = "path", |                     case_id = "path", | ||||||
|                     activity_id = "event", |                     activity_id = "event", | ||||||
|                     resource_id = "artwork", |                     resource_id = "artwork", | ||||||
|                     timestamps = c("start", "complete")) |                     timestamps = c("start", "complete")) | ||||||
| 
 | 
 | ||||||
| map_as_pdf(alog087, file = "../figures/pm_trace-event_087.pdf") | processmapR::process_map(alog087, processmapR::frequency("relative")) | ||||||
| 
 | 
 | ||||||
| alog504 <- activitylog(datlogs[datlogs$artwork == "504",], | alog504 <- bupaR::activitylog(datlogs[datlogs$artwork == "504",], | ||||||
|                     case_id = "path", |                     case_id = "path", | ||||||
|                     activity_id = "event", |                     activity_id = "event", | ||||||
|                     resource_id = "artwork", |                     resource_id = "artwork", | ||||||
|                     timestamps = c("start", "complete")) |                     timestamps = c("start", "complete")) | ||||||
| 
 | 
 | ||||||
| map_as_pdf(alog504, file = "../figures/pm_trace-event_504.pdf") | processmapR::process_map(alog504, processmapR::frequency("relative")) | ||||||
| 
 | 
 | ||||||
| #--------------- (3.3) Patterns of cases --------------- | #--------------- (3.3) Patterns of cases --------------- | ||||||
| 
 | 
 | ||||||
| @ -215,17 +451,17 @@ map_as_pdf(alog504, file = "../figures/pm_trace-event_504.pdf") | |||||||
| # ... weekdays for "normal" and school vacation days? | # ... weekdays for "normal" and school vacation days? | ||||||
| # ... pre and post corona? | # ... pre and post corona? | ||||||
| 
 | 
 | ||||||
| alog <- activitylog(datlogs, | alog <- bupaR::activitylog(datlogs, | ||||||
|                     case_id = "case", |                     case_id = "case", | ||||||
|                     activity_id = "event", |                     activity_id = "event", | ||||||
|                     resource_id = "path", |                     resource_id = "path", | ||||||
|                     timestamps = c("start", "complete")) |                     timestamps = c("start", "complete")) | ||||||
| 
 | 
 | ||||||
| map_as_pdf(alog, file = "../figures/pm_case-event.pdf") | processmapR::process_map(alog, processmapR::frequency("relative")) | ||||||
| 
 | 
 | ||||||
| alog_no_move <- alog[alog$event != "move", ] | alog_no_move <- alog[alog$event != "move", ] | ||||||
| 
 | 
 | ||||||
| pdf("../figures/traceexplore_case-event.pdf", height = 8, width = 12, pointsize = 10) | pdf("results/figures/traceexplore_case-event.pdf", height = 8, width = 12, pointsize = 10) | ||||||
| set.seed(1050) | set.seed(1050) | ||||||
| processmapR::trace_explorer(alog_no_move[alog_no_move$path %in% | processmapR::trace_explorer(alog_no_move[alog_no_move$path %in% | ||||||
|                             sample(unique(alog_no_move$path), 300),], |                             sample(unique(alog_no_move$path), 300),], | ||||||
| @ -233,38 +469,38 @@ processmapR::trace_explorer(alog_no_move[alog_no_move$path %in% | |||||||
|                             abbreviate = T) |                             abbreviate = T) | ||||||
| dev.off() | dev.off() | ||||||
| 
 | 
 | ||||||
| map_as_pdf(alog080, file = "../figures/pm_case-event_080.pdf") | processmapR::process_map(alog080, processmapR::frequency("relative")) | ||||||
| 
 | 
 | ||||||
| alog087 <- activitylog(datlogs[datlogs$artwork == "087",], | alog087 <- bupaR::activitylog(datlogs[datlogs$artwork == "087",], | ||||||
|                        case_id = "case", |                        case_id = "case", | ||||||
|                        activity_id = "event", |                        activity_id = "event", | ||||||
|                        resource_id = "path", |                        resource_id = "path", | ||||||
|                        timestamps = c("start", "complete")) |                        timestamps = c("start", "complete")) | ||||||
| 
 | 
 | ||||||
| map_as_pdf(alog087, file = "../figures/pm_case-event_087.pdf") | processmapR::process_map(alog087, processmapR::frequency("relative")) | ||||||
| 
 | 
 | ||||||
| ### Mornings and afternoons | ### Mornings and afternoons | ||||||
| 
 | 
 | ||||||
| datlogs$tod <- ifelse(lubridate::hour(datlogs$start) > 13, "afternoon", "morning") | datlogs$tod <- ifelse(lubridate::hour(datlogs$start) > 13, "afternoon", "morning") | ||||||
| 
 | 
 | ||||||
| alog <- activitylog(datlogs[datlogs$tod == "morning",], | alog <- bupaR::activitylog(datlogs[datlogs$tod == "morning",], | ||||||
|                     case_id = "case", |                     case_id = "case", | ||||||
|                     activity_id = "event", |                     activity_id = "event", | ||||||
|                     resource_id = "path", |                     resource_id = "path", | ||||||
|                     timestamps = c("start", "complete")) |                     timestamps = c("start", "complete")) | ||||||
| 
 | 
 | ||||||
| map_as_pdf(alog, file = "../figures/pm_case-event_morning.pdf") | processmapR::process_map(alog, processmapR::frequency("relative")) | ||||||
| 
 | 
 | ||||||
| alog <- activitylog(datlogs[datlogs$tod == "afternoon",], | alog <- bupaR::activitylog(datlogs[datlogs$tod == "afternoon",], | ||||||
|                     case_id = "case", |                     case_id = "case", | ||||||
|                     activity_id = "event", |                     activity_id = "event", | ||||||
|                     resource_id = "path", |                     resource_id = "path", | ||||||
|                     timestamps = c("start", "complete")) |                     timestamps = c("start", "complete")) | ||||||
| 
 | 
 | ||||||
| map_as_pdf(alog, file = "../figures/pm_case-event_afternoon.pdf") | processmapR::process_map(alog, processmapR::frequency("relative")) | ||||||
| 
 | 
 | ||||||
| # Are the same artworks looked at? | # Are the same artworks looked at? | ||||||
| pdf("../figures/bp_tod.pdf", height = 3.375, width = 12, pointsize = 10) | pdf("results/figures/bp_tod.pdf", height = 3.375, width = 12, pointsize = 10) | ||||||
| par(mai = c(.5,.6,.1,.1), mgp = c(2.4, 1, 0)) | par(mai = c(.5,.6,.1,.1), mgp = c(2.4, 1, 0)) | ||||||
| 
 | 
 | ||||||
| barplot(proportions(xtabs( ~ tod + artwork, datlogs), margin = "tod"), #col = cc[1:2], | barplot(proportions(xtabs( ~ tod + artwork, datlogs), margin = "tod"), #col = cc[1:2], | ||||||
| @ -277,24 +513,24 @@ dev.off() | |||||||
| 
 | 
 | ||||||
| datlogs$wd <- ifelse(datlogs$weekdays %in% c("Saturday", "Sunday"), "weekend", "weekday") | datlogs$wd <- ifelse(datlogs$weekdays %in% c("Saturday", "Sunday"), "weekend", "weekday") | ||||||
| 
 | 
 | ||||||
| alog <- activitylog(datlogs[datlogs$wd == "weekend",], | alog <- bupaR::activitylog(datlogs[datlogs$wd == "weekend",], | ||||||
|                     case_id = "case", |                     case_id = "case", | ||||||
|                     activity_id = "event", |                     activity_id = "event", | ||||||
|                     resource_id = "path", |                     resource_id = "path", | ||||||
|                     timestamps = c("start", "complete")) |                     timestamps = c("start", "complete")) | ||||||
| 
 | 
 | ||||||
| map_as_pdf(alog, file = "../figures/pm_case-event_weekend.pdf") | processmapR::process_map(alog, processmapR::frequency("relative")) | ||||||
| 
 | 
 | ||||||
| alog <- activitylog(datlogs[datlogs$wd == "weekday",], | alog <- bupaR::activitylog(datlogs[datlogs$wd == "weekday",], | ||||||
|                     case_id = "case", |                     case_id = "case", | ||||||
|                     activity_id = "event", |                     activity_id = "event", | ||||||
|                     resource_id = "path", |                     resource_id = "path", | ||||||
|                     timestamps = c("start", "complete")) |                     timestamps = c("start", "complete")) | ||||||
| 
 | 
 | ||||||
| map_as_pdf(alog, file = "../figures/pm_case-event_weekday.pdf") | processmapR::process_map(alog, processmapR::frequency("relative")) | ||||||
| 
 | 
 | ||||||
| # Are the same artworks looked at? | # Are the same artworks looked at? | ||||||
| pdf("../figures/bp_wd.pdf", height = 3.375, width = 12, pointsize = 10) | pdf("results/figures/bp_wd.pdf", height = 3.375, width = 12, pointsize = 10) | ||||||
| par(mai = c(.5,.6,.1,.1), mgp = c(2.4, 1, 0)) | par(mai = c(.5,.6,.1,.1), mgp = c(2.4, 1, 0)) | ||||||
| 
 | 
 | ||||||
| barplot(proportions(xtabs( ~ wd + artwork, datlogs), margin = "wd"), | barplot(proportions(xtabs( ~ wd + artwork, datlogs), margin = "wd"), | ||||||
| @ -308,24 +544,24 @@ dev.off() | |||||||
| datlogs$wds <- ifelse(!is.na(datlogs$vacation), "vacation", "school") | datlogs$wds <- ifelse(!is.na(datlogs$vacation), "vacation", "school") | ||||||
| datlogs$wds[datlogs$wd == "weekend"] <- NA | datlogs$wds[datlogs$wd == "weekend"] <- NA | ||||||
| 
 | 
 | ||||||
| alog <- activitylog(datlogs[which(datlogs$wds == "school"),], | alog <- bupaR::activitylog(datlogs[which(datlogs$wds == "school"),], | ||||||
|                     case_id = "case", |                     case_id = "case", | ||||||
|                     activity_id = "event", |                     activity_id = "event", | ||||||
|                     resource_id = "path", |                     resource_id = "path", | ||||||
|                     timestamps = c("start", "complete")) |                     timestamps = c("start", "complete")) | ||||||
| 
 | 
 | ||||||
| map_as_pdf(alog, file = "../figures/pm_case-event_school.pdf") | processmapR::process_map(alog, processmapR::frequency("relative")) | ||||||
| 
 | 
 | ||||||
| alog <- activitylog(datlogs[which(datlogs$wds == "vacation"),], | alog <- bupaR::activitylog(datlogs[which(datlogs$wds == "vacation"),], | ||||||
|                     case_id = "case", |                     case_id = "case", | ||||||
|                     activity_id = "event", |                     activity_id = "event", | ||||||
|                     resource_id = "path", |                     resource_id = "path", | ||||||
|                     timestamps = c("start", "complete")) |                     timestamps = c("start", "complete")) | ||||||
| 
 | 
 | ||||||
| map_as_pdf(alog, file = "../figures/pm_case-event_vacation.pdf") | processmapR::process_map(alog, processmapR::frequency("relative")) | ||||||
| 
 | 
 | ||||||
| # Are the same artworks looked at? | # Are the same artworks looked at? | ||||||
| pdf("../figures/bp_wds.pdf", height = 3.375, width = 12, pointsize = 10) | pdf("results/figures/bp_wds.pdf", height = 3.375, width = 12, pointsize = 10) | ||||||
| par(mai = c(.5,.6,.1,.1), mgp = c(2.4, 1, 0)) | par(mai = c(.5,.6,.1,.1), mgp = c(2.4, 1, 0)) | ||||||
| 
 | 
 | ||||||
| #barplot(xtabs( ~ wds + artwork, datlogs), las = 2, beside = TRUE, | #barplot(xtabs( ~ wds + artwork, datlogs), las = 2, beside = TRUE, | ||||||
| @ -339,24 +575,24 @@ dev.off() | |||||||
| 
 | 
 | ||||||
| datlogs$corona <- ifelse(datlogs$date < "2020-03-14", "pre", "post") | datlogs$corona <- ifelse(datlogs$date < "2020-03-14", "pre", "post") | ||||||
| 
 | 
 | ||||||
| alog <- activitylog(datlogs[which(datlogs$corona == "pre"),], | alog <- bupaR::activitylog(datlogs[which(datlogs$corona == "pre"),], | ||||||
|                     case_id = "case", |                     case_id = "case", | ||||||
|                     activity_id = "event", |                     activity_id = "event", | ||||||
|                     resource_id = "path", |                     resource_id = "path", | ||||||
|                     timestamps = c("start", "complete")) |                     timestamps = c("start", "complete")) | ||||||
| 
 | 
 | ||||||
| map_as_pdf(alog, file = "../figures/pm_case-event_pre-corona.pdf") | processmapR::process_map(alog, processmapR::frequency("relative")) | ||||||
| 
 | 
 | ||||||
| alog <- activitylog(datlogs[which(datlogs$corona == "post"),], | alog <- bupaR::activitylog(datlogs[which(datlogs$corona == "post"),], | ||||||
|                     case_id = "case", |                     case_id = "case", | ||||||
|                     activity_id = "event", |                     activity_id = "event", | ||||||
|                     resource_id = "path", |                     resource_id = "path", | ||||||
|                     timestamps = c("start", "complete")) |                     timestamps = c("start", "complete")) | ||||||
| 
 | 
 | ||||||
| map_as_pdf(alog, file = "../figures/pm_case-event_post-corona.pdf") | processmapR::process_map(alog, processmapR::frequency("relative")) | ||||||
| 
 | 
 | ||||||
| # Are the same artworks looked at? | # Are the same artworks looked at? | ||||||
| pdf("../figures/bp_corona.pdf", height = 3.375, width = 12, pointsize = 10) | pdf("results/figures/bp_corona.pdf", height = 3.375, width = 12, pointsize = 10) | ||||||
| par(mai = c(.5,.6,.1,.1), mgp = c(2.4, 1, 0)) | par(mai = c(.5,.6,.1,.1), mgp = c(2.4, 1, 0)) | ||||||
| 
 | 
 | ||||||
| barplot(proportions(xtabs( ~ corona + artwork, datlogs), margin = "corona"), | barplot(proportions(xtabs( ~ corona + artwork, datlogs), margin = "corona"), | ||||||
| @ -369,13 +605,13 @@ dev.off() | |||||||
| # Order in which artworks are looked at | # Order in which artworks are looked at | ||||||
| 
 | 
 | ||||||
| nart <- 5     # select 5 artworks randomly | nart <- 5     # select 5 artworks randomly | ||||||
| alog <- activitylog(datlogs,#[datlogs$artwork %in% sample(unique(datlogs$artwork), nart), ], | alog <- bupaR::activitylog(datlogs,#[datlogs$artwork %in% sample(unique(datlogs$artwork), nart), ], | ||||||
|                     case_id = "case", |                     case_id = "case", | ||||||
|                     activity_id = "artwork", |                     activity_id = "artwork", | ||||||
|                     resource_id = "path", |                     resource_id = "path", | ||||||
|                     timestamps = c("start", "complete")) |                     timestamps = c("start", "complete")) | ||||||
| 
 | 
 | ||||||
| #map <- process_map(alog, frequency("relative")) | #map <- processmapR::process_map(alog, processmapR::frequency("relative")) | ||||||
| 
 | 
 | ||||||
| ## select cases with Vermeer | ## select cases with Vermeer | ||||||
| length(unique(datlogs[datlogs$artwork == "080", "case"])) | length(unique(datlogs[datlogs$artwork == "080", "case"])) | ||||||
| @ -392,16 +628,16 @@ which(table(tmp$artwork) > 14000) | |||||||
| 
 | 
 | ||||||
| often080 <- names(which(table(tmp$artwork) > 14000)) | often080 <- names(which(table(tmp$artwork) > 14000)) | ||||||
| 
 | 
 | ||||||
| alog <- activitylog(datlogs[datlogs$artwork %in% often080, ], | alog <- bupaR::activitylog(datlogs[datlogs$artwork %in% often080, ], | ||||||
|                     case_id = "case", |                     case_id = "case", | ||||||
|                     activity_id = "artwork", |                     activity_id = "artwork", | ||||||
|                     resource_id = "path", |                     resource_id = "path", | ||||||
|                     timestamps = c("start", "complete")) |                     timestamps = c("start", "complete")) | ||||||
| 
 | 
 | ||||||
| map_as_pdf(alog, file = "../figures/pm_case-artwork_often080.pdf") | processmapR::process_map(alog, processmapR::frequency("relative")) | ||||||
| 
 | 
 | ||||||
| 
 | 
 | ||||||
| pdf("../figures/traceexplore_case-artwork_often080.pdf", height = 8, width = 12, pointsize = 10) | pdf("results/figures/traceexplore_case-artwork_often080.pdf", height = 8, width = 12, pointsize = 10) | ||||||
| 
 | 
 | ||||||
| processmapR::trace_explorer(alog, | processmapR::trace_explorer(alog, | ||||||
|                             n_traces = 30, type = "frequent", |                             n_traces = 30, type = "frequent", | ||||||
| @ -414,40 +650,35 @@ dev.off() | |||||||
| # Are there certain topics that people are interested in more than others? | # Are there certain topics that people are interested in more than others? | ||||||
| # Do these topic distributions differ for comparable artworks? | # Do these topic distributions differ for comparable artworks? | ||||||
| 
 | 
 | ||||||
| alog <- activitylog(datlogs[which(datlogs$event == "openTopic"),], | alog <- bupaR::activitylog(datlogs[which(datlogs$event == "openTopic"),], | ||||||
|                     case_id = "case", |                     case_id = "case", | ||||||
|                     activity_id = "topic", |                     activity_id = "topic", | ||||||
|                     resource_id = "path", |                     resource_id = "path", | ||||||
|                     timestamps = c("start", "complete")) |                     timestamps = c("start", "complete")) | ||||||
| 
 | 
 | ||||||
| map_as_pdf(alog, file = "../figures/pm_case-topic.pdf") | processmapR::process_map(alog, processmapR::frequency("relative")) | ||||||
| 
 | 
 | ||||||
| # Order of topics for Vermeer | # Order of topics for Vermeer | ||||||
| # alog080 <- activitylog(datlogs[datlogs$artwork == "080",], | # alog080 <- bupaR::activitylog(datlogs[datlogs$artwork == "080",], | ||||||
| #                     case_id = "case", | #                     case_id = "case", | ||||||
| #                     activity_id = "topic", | #                     activity_id = "topic", | ||||||
| #                     resource_id = "path", | #                     resource_id = "path", | ||||||
| #                     timestamps = c("start", "complete")) | #                     timestamps = c("start", "complete")) | ||||||
| # | # | ||||||
| # map_as_pdf(alog080, file = "../figures/pm_case-topic_080.pdf") | # alog080 <- bupaR::activitylog(datlogs[datlogs$artwork == "080",], | ||||||
| # |  | ||||||
| # |  | ||||||
| # alog080 <- activitylog(datlogs[datlogs$artwork == "080",], |  | ||||||
| #                        case_id = "case", | #                        case_id = "case", | ||||||
| #                        activity_id = "topicFile", | #                        activity_id = "topicFile", | ||||||
| #                        resource_id = "path", | #                        resource_id = "path", | ||||||
| #                        timestamps = c("start", "complete")) | #                        timestamps = c("start", "complete")) | ||||||
| # | # | ||||||
| # #process_map(alog080, frequency("relative")) | # #processmapR::process_map(alog080, processmapR::frequency("relative")) | ||||||
| # | # | ||||||
| # # Comparable artwork | # # Comparable artwork | ||||||
| # alog083 <- activitylog(datlogs[datlogs$artwork == "083",], | # alog083 <- bupaR::activitylog(datlogs[datlogs$artwork == "083",], | ||||||
| #                        case_id = "case", | #                        case_id = "case", | ||||||
| #                        activity_id = "topic", | #                        activity_id = "topic", | ||||||
| #                        resource_id = "path", | #                        resource_id = "path", | ||||||
| #                        timestamps = c("start", "complete")) | #                        timestamps = c("start", "complete")) | ||||||
| # |  | ||||||
| # map_as_pdf(alog083, file = "../figures/pm_case-topic_083.pdf") |  | ||||||
| 
 | 
 | ||||||
| # artworks that have the same topics than Vermeer | # artworks that have the same topics than Vermeer | ||||||
| which(rowSums(xtabs( ~ artwork + topic, datlogs[datlogs$topic %in% | which(rowSums(xtabs( ~ artwork + topic, datlogs[datlogs$topic %in% | ||||||
| @ -458,73 +689,13 @@ which(rowSums(xtabs( ~ artwork + topic, datlogs[datlogs$topic %in% | |||||||
| 
 | 
 | ||||||
| for (art in c("037", "046", "062", "080", "083", "109")) { | for (art in c("037", "046", "062", "080", "083", "109")) { | ||||||
| 
 | 
 | ||||||
|   alog <- activitylog(datlogs[datlogs$event == "openTopic" & datlogs$artwork == art,], |   alog <- bupaR::activitylog(datlogs[datlogs$event == "openTopic" & datlogs$artwork == art,], | ||||||
|                       case_id = "case", |                       case_id = "case", | ||||||
|                       activity_id = "topic", |                       activity_id = "topic", | ||||||
|                       resource_id = "path", |                       resource_id = "path", | ||||||
|                       timestamps = c("start", "complete")) |                       timestamps = c("start", "complete")) | ||||||
| 
 | 
 | ||||||
|   map_as_pdf(alog, file = paste0("../figures/pm_case-topic_", art, ".pdf")) |   processmapR::process_map(alog, processmapR::frequency("relative")) | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
| 
 | 
 | ||||||
| 
 |  | ||||||
| # Angewandte Kunst, Graphik, Gemälde, Kultur |  | ||||||
| 
 |  | ||||||
| 
 |  | ||||||
| c("Kultur", "Kultur", "Graphik", "Gemälde", "Gemälde", "Gemälde", |  | ||||||
|   "Gemälde", "Gemälde", "Graphik", "Gemälde", "Angewandte Kunst", "", |  | ||||||
|   "Gemälde", "Angewandte Kunst", "", "", "Graphik", "Angewandte Kunst", |  | ||||||
|   "Angewandte Kunst", "Gemälde", "Angewandte Kunst", "Gemälde", "", |  | ||||||
|   "Gemälde", "Gemälde", "Gemälde", "Graphik", "Gemälde", "Gemälde", |  | ||||||
|   "Gemälde", "", "Angewandte Kunst", "Angewandte Kunst", "Gemälde", |  | ||||||
|   "Graphik", "Gemälde", "Gemälde", "Gemälde", "Gemälde", |  | ||||||
|   "Angewandte Kunst", "Gemälde", "Gemälde", "Gemälde", "Kultur", "Kultur", |  | ||||||
|   "Gemälde", "Kultur", "", "Gemälde", "", "Graphik", "Kultur", "Gemälde", |  | ||||||
|   "", "Kultur", "Gemälde", "Kultur", "Gemälde", "Gemälde", "Gemälde", |  | ||||||
|   "Kultur", "Kultur", "Kultur", "Kultur", "Kultur", "Kultur", |  | ||||||
|   "Angewandte Kunst", "Info", "Info", "Info", "Kultur", "Kultur") |  | ||||||
| 
 |  | ||||||
| 
 |  | ||||||
| 
 |  | ||||||
| # BURSTS |  | ||||||
| which.max(table(datlogs$date)) |  | ||||||
| tmp <- datlogs[datlogs$date == "2017-02-12", ] |  | ||||||
| 
 |  | ||||||
| # number of traces per case on 2017-02-12 |  | ||||||
| rowSums(xtabs( ~ case + path, tmp) != 0) |  | ||||||
| 
 |  | ||||||
| range(tmp$start) |  | ||||||
| hours <- lubridate::hour(tmp$start) |  | ||||||
| xtabs( ~ case + hours, tmp) |  | ||||||
| 
 |  | ||||||
| # distribution of cases over the day |  | ||||||
| colSums(xtabs( ~ case + hours, tmp) != 0) |  | ||||||
| barplot(colSums(xtabs( ~ case + hours, tmp) != 0)) |  | ||||||
| 
 |  | ||||||
| aggregate(path ~ case + hours, tmp, length) |  | ||||||
| 
 |  | ||||||
| 
 |  | ||||||
| 
 |  | ||||||
| 
 |  | ||||||
| tmp <- aggregate(path ~ case, datlogs, length) |  | ||||||
| tmp$date <- as.Date(datlogs[!duplicated(datlogs$case), "start"]) |  | ||||||
| tmp$time <- lubridate::hour(datlogs[!duplicated(datlogs$case), "start"]) |  | ||||||
| 
 |  | ||||||
| tmp[tmp$path > 200, ] |  | ||||||
| 
 |  | ||||||
| plot(path ~ time, tmp, cex = 2, col = rgb(0,0,0,.3)) |  | ||||||
| 
 |  | ||||||
| lattice::barchart(path ~ time, tmp, horizontal=F) |  | ||||||
| 
 |  | ||||||
| 
 |  | ||||||
| 
 |  | ||||||
| ########################################################################### |  | ||||||
| # HELPER |  | ||||||
| 
 |  | ||||||
| map_as_pdf <- function(alog, file, type = frequency("relative")) { |  | ||||||
|   map <- process_map(alog, type = type) |  | ||||||
|   g <- DiagrammeR::grViz(map$x$diagram) |> DiagrammeRsvg::export_svg() |> charToRaw() |  | ||||||
|   rsvg::rsvg_pdf(g, file) |  | ||||||
| } |  | ||||||
| 
 |  | ||||||
|  | |||||||
| @ -1,18 +1,14 @@ | |||||||
| # 03_create-petrinet.py | # 03_create-petrinet.py | ||||||
| # | # | ||||||
| # content: (1) Create places and transitions | # content: (1) Create places and transitions | ||||||
| #          (2) Sequential net | #          (2) Normative net | ||||||
| #          (3) Concurrent net |  | ||||||
| # | # | ||||||
| # input:  -- | # input:  -- | ||||||
| # output: results/haum/conformative_petrinet_con.pnml | # output: results/normative_petrinet.pnml | ||||||
| #         results/processmaps/conformative_petrinet_con.png | #         results/processmaps/normative_petrinet.png | ||||||
| #         results/processmaps/conformative_bpmn_con.png | #         results/processmaps/normative_bpmn.png | ||||||
| #         results/haum/conformative_petrinet_seq.pnml |  | ||||||
| #         results/processmaps/conformative_petrinet_seq.png |  | ||||||
| #         results/processmaps/conformative_bpmn_seq.png |  | ||||||
| # | # | ||||||
| # last mod: 2024-03-06 | # last mod: 2024-03-22 | ||||||
| 
 | 
 | ||||||
| import pm4py | import pm4py | ||||||
| from pm4py.objects.petri_net.obj import PetriNet, Marking | from pm4py.objects.petri_net.obj import PetriNet, Marking | ||||||
| @ -62,93 +58,90 @@ t_16 = PetriNet.Transition("t_16") | |||||||
| t_17 = PetriNet.Transition("t_17") | t_17 = PetriNet.Transition("t_17") | ||||||
| t_18 = PetriNet.Transition("t_18") | t_18 = PetriNet.Transition("t_18") | ||||||
| 
 | 
 | ||||||
| #--------------- (2) Sequential net --------------- | #--------------- (2) Normative net --------------- | ||||||
| 
 | 
 | ||||||
| net_seq = PetriNet("new_petri_net") | net = PetriNet("new_petri_net") | ||||||
| 
 | 
 | ||||||
| # Add places | # Add places | ||||||
| net_seq.places.add(source) | net.places.add(source) | ||||||
| net_seq.places.add(sink) | net.places.add(sink) | ||||||
| net_seq.places.add(p_1) | net.places.add(p_1) | ||||||
| net_seq.places.add(p_2) | net.places.add(p_2) | ||||||
| net_seq.places.add(p_3) | net.places.add(p_3) | ||||||
| net_seq.places.add(p_4) | net.places.add(p_4) | ||||||
| net_seq.places.add(p_5) | net.places.add(p_5) | ||||||
| net_seq.places.add(p_6) | net.places.add(p_6) | ||||||
| net_seq.places.add(p_7) | net.places.add(p_7) | ||||||
| net_seq.places.add(p_8) | net.places.add(p_8) | ||||||
|  | net.places.add(p_9) | ||||||
|  | net.places.add(p_10) | ||||||
|  | net.places.add(p_11) | ||||||
|  | net.places.add(p_12) | ||||||
| 
 | 
 | ||||||
| # Add transitions | # Add transitions | ||||||
| net_seq.transitions.add(mv) | net.transitions.add(mv) | ||||||
| net_seq.transitions.add(fc) | net.transitions.add(fc) | ||||||
| net_seq.transitions.add(ot) | net.transitions.add(ot) | ||||||
| net_seq.transitions.add(op) | net.transitions.add(op) | ||||||
| 
 | 
 | ||||||
| # Add hidden transitions | # Add hidden transitions | ||||||
| net_seq.transitions.add(t_1) | net.transitions.add(t_1) | ||||||
| net_seq.transitions.add(t_2) | net.transitions.add(t_2) | ||||||
| net_seq.transitions.add(t_3) | net.transitions.add(t_3) | ||||||
| net_seq.transitions.add(t_4) | net.transitions.add(t_4) | ||||||
| net_seq.transitions.add(t_5) | net.transitions.add(t_5) | ||||||
| net_seq.transitions.add(t_6) | net.transitions.add(t_6) | ||||||
| net_seq.transitions.add(t_7) | net.transitions.add(t_7) | ||||||
| net_seq.transitions.add(t_8) | net.transitions.add(t_8) | ||||||
| net_seq.transitions.add(t_9) | net.transitions.add(t_9) | ||||||
| net_seq.transitions.add(t_10) | net.transitions.add(t_10) | ||||||
| net_seq.transitions.add(t_11) | net.transitions.add(t_11) | ||||||
| net_seq.transitions.add(t_12) | net.transitions.add(t_12) | ||||||
| net_seq.transitions.add(t_13) | net.transitions.add(t_13) | ||||||
| net_seq.transitions.add(t_14) | net.transitions.add(t_14) | ||||||
| net_seq.transitions.add(t_15) | net.transitions.add(t_15) | ||||||
| net_seq.transitions.add(t_16) |  | ||||||
| net_seq.transitions.add(t_17) |  | ||||||
| net_seq.transitions.add(t_18) |  | ||||||
| 
 | 
 | ||||||
| # Add arcs | # Add arcs | ||||||
| petri_utils.add_arc_from_to(source, t_1, net_seq) | petri_utils.add_arc_from_to(source, t_1, net) | ||||||
| petri_utils.add_arc_from_to(source, t_2, net_seq) | petri_utils.add_arc_from_to(t_1, p_1, net) | ||||||
| petri_utils.add_arc_from_to(t_1, p_1, net_seq) | petri_utils.add_arc_from_to(t_1, p_2, net) | ||||||
| petri_utils.add_arc_from_to(t_2, p_2, net_seq) | petri_utils.add_arc_from_to(p_1, t_2, net) | ||||||
| petri_utils.add_arc_from_to(p_1, mv, net_seq) | petri_utils.add_arc_from_to(p_1, t_3, net) | ||||||
| petri_utils.add_arc_from_to(p_2, fc, net_seq) | petri_utils.add_arc_from_to(t_3, p_5, net) | ||||||
| petri_utils.add_arc_from_to(mv, p_3, net_seq) | petri_utils.add_arc_from_to(t_2, p_3, net) | ||||||
| petri_utils.add_arc_from_to(p_3, t_3, net_seq) | petri_utils.add_arc_from_to(p_3, mv, net) | ||||||
| petri_utils.add_arc_from_to(p_3, t_4, net_seq) | petri_utils.add_arc_from_to(mv, p_4, net) | ||||||
| petri_utils.add_arc_from_to(p_3, t_5, net_seq) | petri_utils.add_arc_from_to(p_4, t_5, net) | ||||||
| petri_utils.add_arc_from_to(p_3, t_6, net_seq) | petri_utils.add_arc_from_to(p_4, t_6, net) | ||||||
| petri_utils.add_arc_from_to(p_3, t_7, net_seq) | petri_utils.add_arc_from_to(t_6, p_3, net) | ||||||
| petri_utils.add_arc_from_to(t_7, p_1, net_seq) | petri_utils.add_arc_from_to(t_5, p_5, net) | ||||||
| petri_utils.add_arc_from_to(fc, p_4, net_seq) | petri_utils.add_arc_from_to(p_5, t_15, net) | ||||||
| petri_utils.add_arc_from_to(p_4, t_8, net_seq) | petri_utils.add_arc_from_to(t_15, sink, net) | ||||||
| petri_utils.add_arc_from_to(p_4, t_9, net_seq) | petri_utils.add_arc_from_to(p_2, fc, net) | ||||||
| petri_utils.add_arc_from_to(p_4, t_10, net_seq) | petri_utils.add_arc_from_to(p_2, t_8, net) | ||||||
| petri_utils.add_arc_from_to(t_9, p_1, net_seq) | petri_utils.add_arc_from_to(t_8, p_12, net) | ||||||
| petri_utils.add_arc_from_to(t_16, p_5, net_seq) | petri_utils.add_arc_from_to(p_12, t_15, net) | ||||||
| petri_utils.add_arc_from_to(t_3, p_2, net_seq) | petri_utils.add_arc_from_to(fc, p_6, net) | ||||||
| petri_utils.add_arc_from_to(t_5, p_6, net_seq) | petri_utils.add_arc_from_to(p_6, t_9, net) | ||||||
| petri_utils.add_arc_from_to(t_6, p_5, net_seq) | petri_utils.add_arc_from_to(t_9, p_12, net) | ||||||
| petri_utils.add_arc_from_to(p_6, ot, net_seq) | petri_utils.add_arc_from_to(p_6, t_4, net) | ||||||
| petri_utils.add_arc_from_to(p_5, op, net_seq) | petri_utils.add_arc_from_to(t_4, p_7, net) | ||||||
| petri_utils.add_arc_from_to(ot, p_8, net_seq) | petri_utils.add_arc_from_to(p_7, ot, net) | ||||||
| petri_utils.add_arc_from_to(op, p_7, net_seq) | petri_utils.add_arc_from_to(ot, p_8, net) | ||||||
| petri_utils.add_arc_from_to(p_8, t_11, net_seq) | petri_utils.add_arc_from_to(p_8, t_10, net) | ||||||
| petri_utils.add_arc_from_to(p_8, t_12, net_seq) | petri_utils.add_arc_from_to(t_10, p_11, net) | ||||||
| petri_utils.add_arc_from_to(p_8, t_13, net_seq) | petri_utils.add_arc_from_to(p_11, t_13, net) | ||||||
| petri_utils.add_arc_from_to(p_8, t_17, net_seq) | petri_utils.add_arc_from_to(t_13, p_12, net) | ||||||
| petri_utils.add_arc_from_to(t_10, p_6, net_seq) | petri_utils.add_arc_from_to(p_8, t_7, net) | ||||||
| petri_utils.add_arc_from_to(t_17, p_6, net_seq) | petri_utils.add_arc_from_to(t_7, p_9, net) | ||||||
| petri_utils.add_arc_from_to(p_7, t_14, net_seq) | petri_utils.add_arc_from_to(p_9, op, net) | ||||||
| petri_utils.add_arc_from_to(p_7, t_15, net_seq) | petri_utils.add_arc_from_to(op, p_10, net) | ||||||
| petri_utils.add_arc_from_to(p_7, t_16, net_seq) | petri_utils.add_arc_from_to(p_10, t_11, net) | ||||||
| petri_utils.add_arc_from_to(p_7, t_18, net_seq) | petri_utils.add_arc_from_to(p_10, t_12, net) | ||||||
| petri_utils.add_arc_from_to(t_18, p_6, net_seq) | petri_utils.add_arc_from_to(t_12, p_9, net) | ||||||
| petri_utils.add_arc_from_to(t_13, p_5, net_seq) | petri_utils.add_arc_from_to(t_11, p_11, net) | ||||||
| petri_utils.add_arc_from_to(t_15, p_1, net_seq) | petri_utils.add_arc_from_to(p_11, t_14, net) | ||||||
| petri_utils.add_arc_from_to(t_11, p_1, net_seq) | petri_utils.add_arc_from_to(t_14, p_7, net) | ||||||
| petri_utils.add_arc_from_to(t_4, sink, net_seq) |  | ||||||
| petri_utils.add_arc_from_to(t_8, sink, net_seq) |  | ||||||
| petri_utils.add_arc_from_to(t_12, sink, net_seq) |  | ||||||
| petri_utils.add_arc_from_to(t_14, sink, net_seq) |  | ||||||
| 
 | 
 | ||||||
| # Add tokens | # Add tokens | ||||||
| initial_marking = Marking() | initial_marking = Marking() | ||||||
| @ -156,116 +149,14 @@ initial_marking[source] = 1 | |||||||
| final_marking = Marking() | final_marking = Marking() | ||||||
| final_marking[sink] = 1 | final_marking[sink] = 1 | ||||||
| 
 | 
 | ||||||
| pm4py.view_petri_net(net_seq, initial_marking, final_marking) | pm4py.view_petri_net(net, initial_marking, final_marking) | ||||||
| pm4py.write_pnml(net_seq, initial_marking, final_marking, "results/haum/conformative_petrinet_seq.pnml") | pm4py.write_pnml(net, initial_marking, final_marking, | ||||||
|  |         "results/normative_petrinet.pnml") | ||||||
| 
 | 
 | ||||||
| pm4py.vis.save_vis_petri_net(net_seq, initial_marking, final_marking, | pm4py.vis.save_vis_petri_net(net, initial_marking, final_marking, | ||||||
|         "results/processmaps/conformative_petrinet_seq.png") |         "results/processmaps/normative_petrinet.png") | ||||||
| 
 | 
 | ||||||
| bpmn = pm4py.convert.convert_to_bpmn(net_seq, initial_marking, final_marking) | bpmn = pm4py.convert.convert_to_bpmn(net, initial_marking, final_marking) | ||||||
| pm4py.view_bpmn(bpmn) | pm4py.view_bpmn(bpmn) | ||||||
| 
 | 
 | ||||||
| pm4py.vis.save_vis_bpmn(bpmn, "results/processmaps/conformative_bpmn_seq.png") | pm4py.vis.save_vis_bpmn(bpmn, "results/processmaps/normative_bpmn.png") | ||||||
| 
 |  | ||||||
| 
 |  | ||||||
| #--------------- (3) Concurrent net --------------- |  | ||||||
| 
 |  | ||||||
| net_con = PetriNet("new_petri_net") |  | ||||||
| 
 |  | ||||||
| # Add places |  | ||||||
| net_con.places.add(source) |  | ||||||
| net_con.places.add(sink) |  | ||||||
| net_con.places.add(p_1) |  | ||||||
| net_con.places.add(p_2) |  | ||||||
| net_con.places.add(p_3) |  | ||||||
| net_con.places.add(p_4) |  | ||||||
| net_con.places.add(p_5) |  | ||||||
| net_con.places.add(p_6) |  | ||||||
| net_con.places.add(p_7) |  | ||||||
| net_con.places.add(p_8) |  | ||||||
| net_con.places.add(p_9) |  | ||||||
| net_con.places.add(p_10) |  | ||||||
| net_con.places.add(p_11) |  | ||||||
| net_con.places.add(p_12) |  | ||||||
| 
 |  | ||||||
| # Add transitions |  | ||||||
| net_con.transitions.add(mv) |  | ||||||
| net_con.transitions.add(fc) |  | ||||||
| net_con.transitions.add(ot) |  | ||||||
| net_con.transitions.add(op) |  | ||||||
| 
 |  | ||||||
| # Add hidden transitions |  | ||||||
| net_con.transitions.add(t_1) |  | ||||||
| net_con.transitions.add(t_2) |  | ||||||
| net_con.transitions.add(t_3) |  | ||||||
| net_con.transitions.add(t_4) |  | ||||||
| net_con.transitions.add(t_5) |  | ||||||
| net_con.transitions.add(t_6) |  | ||||||
| net_con.transitions.add(t_7) |  | ||||||
| net_con.transitions.add(t_8) |  | ||||||
| net_con.transitions.add(t_9) |  | ||||||
| net_con.transitions.add(t_10) |  | ||||||
| net_con.transitions.add(t_11) |  | ||||||
| net_con.transitions.add(t_12) |  | ||||||
| net_con.transitions.add(t_13) |  | ||||||
| net_con.transitions.add(t_14) |  | ||||||
| net_con.transitions.add(t_15) |  | ||||||
| 
 |  | ||||||
| # Add arcs |  | ||||||
| petri_utils.add_arc_from_to(source, t_1, net_con) |  | ||||||
| petri_utils.add_arc_from_to(t_1, p_1, net_con) |  | ||||||
| petri_utils.add_arc_from_to(t_1, p_2, net_con) |  | ||||||
| petri_utils.add_arc_from_to(p_1, t_2, net_con) |  | ||||||
| petri_utils.add_arc_from_to(p_1, t_3, net_con) |  | ||||||
| petri_utils.add_arc_from_to(t_3, p_5, net_con) |  | ||||||
| petri_utils.add_arc_from_to(t_2, p_3, net_con) |  | ||||||
| petri_utils.add_arc_from_to(p_3, mv, net_con) |  | ||||||
| petri_utils.add_arc_from_to(mv, p_4, net_con) |  | ||||||
| petri_utils.add_arc_from_to(p_4, t_5, net_con) |  | ||||||
| petri_utils.add_arc_from_to(p_4, t_6, net_con) |  | ||||||
| petri_utils.add_arc_from_to(t_6, p_3, net_con) |  | ||||||
| petri_utils.add_arc_from_to(t_5, p_5, net_con) |  | ||||||
| petri_utils.add_arc_from_to(p_5, t_15, net_con) |  | ||||||
| petri_utils.add_arc_from_to(t_15, sink, net_con) |  | ||||||
| petri_utils.add_arc_from_to(p_2, fc, net_con) |  | ||||||
| petri_utils.add_arc_from_to(p_2, t_8, net_con) |  | ||||||
| petri_utils.add_arc_from_to(t_8, p_12, net_con) |  | ||||||
| petri_utils.add_arc_from_to(p_12, t_15, net_con) |  | ||||||
| petri_utils.add_arc_from_to(fc, p_6, net_con) |  | ||||||
| petri_utils.add_arc_from_to(p_6, t_9, net_con) |  | ||||||
| petri_utils.add_arc_from_to(t_9, p_12, net_con) |  | ||||||
| petri_utils.add_arc_from_to(p_6, t_4, net_con) |  | ||||||
| petri_utils.add_arc_from_to(t_4, p_7, net_con) |  | ||||||
| petri_utils.add_arc_from_to(p_7, ot, net_con) |  | ||||||
| petri_utils.add_arc_from_to(ot, p_8, net_con) |  | ||||||
| petri_utils.add_arc_from_to(p_8, t_10, net_con) |  | ||||||
| petri_utils.add_arc_from_to(t_10, p_11, net_con) |  | ||||||
| petri_utils.add_arc_from_to(p_11, t_13, net_con) |  | ||||||
| petri_utils.add_arc_from_to(t_13, p_12, net_con) |  | ||||||
| petri_utils.add_arc_from_to(p_8, t_7, net_con) |  | ||||||
| petri_utils.add_arc_from_to(t_7, p_9, net_con) |  | ||||||
| petri_utils.add_arc_from_to(p_9, op, net_con) |  | ||||||
| petri_utils.add_arc_from_to(op, p_10, net_con) |  | ||||||
| petri_utils.add_arc_from_to(p_10, t_11, net_con) |  | ||||||
| petri_utils.add_arc_from_to(p_10, t_12, net_con) |  | ||||||
| petri_utils.add_arc_from_to(t_12, p_9, net_con) |  | ||||||
| petri_utils.add_arc_from_to(t_11, p_11, net_con) |  | ||||||
| petri_utils.add_arc_from_to(p_11, t_14, net_con) |  | ||||||
| petri_utils.add_arc_from_to(t_14, p_7, net_con) |  | ||||||
| 
 |  | ||||||
| # Add tokens |  | ||||||
| initial_marking = Marking() |  | ||||||
| initial_marking[source] = 1 |  | ||||||
| final_marking = Marking() |  | ||||||
| final_marking[sink] = 1 |  | ||||||
| 
 |  | ||||||
| pm4py.view_petri_net(net_con, initial_marking, final_marking) |  | ||||||
| pm4py.write_pnml(net_con, initial_marking, final_marking, "results/haum/conformative_petrinet_con.pnml") |  | ||||||
| 
 |  | ||||||
| pm4py.vis.save_vis_petri_net(net_con, initial_marking, final_marking, |  | ||||||
|         "results/processmaps/conformative_petrinet_con.png") |  | ||||||
| 
 |  | ||||||
| bpmn = pm4py.convert.convert_to_bpmn(net_con, initial_marking, final_marking) |  | ||||||
| pm4py.view_bpmn(bpmn) |  | ||||||
| 
 |  | ||||||
| pm4py.vis.save_vis_bpmn(bpmn, "results/processmaps/conformative_bpmn_con.png") |  | ||||||
|  | |||||||
| @ -1,25 +1,24 @@ | |||||||
| # 04_conformance-checking.py | # 04_conformance-checking.py | ||||||
| # | # | ||||||
| # content: (1) Load data and create event log | # content: (1) Load data and create event log | ||||||
| #          (2) Infos for items | #          (2) Check against normative Petri Net | ||||||
| # | # | ||||||
| # input:  results/haum/event_logfiles_2024-02-21_16-07-33.csv | # input:  results/event_logfiles_2024-02-21_16-07-33.csv | ||||||
| #         results/haum/conformative_petrinet_con.pnml | #         results/normative_petrinet.pnml | ||||||
| # output: results/processmaps/dfg_complete_python.png | # output: results/eval_all-miners_complete.csv | ||||||
| #         results/eval_all-miners_complete.csv |  | ||||||
| #         results/eval_all-miners_clean.csv | #         results/eval_all-miners_clean.csv | ||||||
| #         results/processmaps/petrinet_conformative.png | #         ../../thesis/figures/petrinet_normative.png | ||||||
| #         results/processmaps/petrinet_heuristics_clean.png | #         ../../thesis/figures/petrinet_heuristics_clean.png | ||||||
| #         results/processmaps/petrinet_alpha_clean.png | #         ../../thesis/figures/petrinet_alpha_clean.png | ||||||
| #         results/processmaps/petrinet_inductive_clean.png | #         ../../thesis/figures/petrinet_inductive_clean.png | ||||||
| #         results/processmaps/petrinet_ilp_clean.png | #         ../../thesis/figures/petrinet_ilp_clean.png | ||||||
| #         results/processmaps/bpmn_conformative.png | #         ../../thesis/figures/bpmn_normative.png | ||||||
| #         results/processmaps/bpmn_inductive_clean.png | #         ../../thesis/figures/bpmn_inductive_clean.png | ||||||
| #         results/processmaps/bpmn_ilp_clean.png | #         ../../thesis/figures/bpmn_ilp_clean.png | ||||||
| #         results/processmaps/bpmn_alpha_clean.png | #         ../../thesis/figures/bpmn_alpha_clean.png | ||||||
| #         results/processmaps/bpmn_heuristics_clean.png | #         ../../thesis/figures/bpmn_heuristics_clean.png | ||||||
| # | # | ||||||
| # last mod: 2024-03-06 | # last mod: 2024-03-22 | ||||||
| 
 | 
 | ||||||
| import pm4py | import pm4py | ||||||
| import pandas as pd | import pandas as pd | ||||||
| @ -29,13 +28,13 @@ from python_helpers import eval_pm, pn_infos_miner | |||||||
| 
 | 
 | ||||||
| #--------------- (1) Load data and create event logs --------------- | #--------------- (1) Load data and create event logs --------------- | ||||||
| 
 | 
 | ||||||
| dat = pd.read_csv("results/haum/event_logfiles_2024-02-21_16-07-33.csv", sep = ";") | dat = pd.read_csv("results/event_logfiles_2024-02-21_16-07-33.csv", sep = ";") | ||||||
| 
 | 
 | ||||||
| event_log = pm4py.format_dataframe(dat, case_id = "path", | event_log = pm4py.format_dataframe(dat, case_id = "path", | ||||||
|                                    activity_key = "event", |                                    activity_key = "event", | ||||||
|                                    timestamp_key = "date.start") |                                    timestamp_key = "date.start") | ||||||
| 
 | 
 | ||||||
| ###### Descriptives of log data ###### | ## Descriptives of log data | ||||||
| 
 | 
 | ||||||
| # Distribution of events | # Distribution of events | ||||||
| event_log.event.value_counts() | event_log.event.value_counts() | ||||||
| @ -57,9 +56,9 @@ len(variants_no_move) | |||||||
| sorted_variants_no_move = dict(sorted(variants_no_move.items(), key=lambda item: item[1], reverse = True)) | sorted_variants_no_move = dict(sorted(variants_no_move.items(), key=lambda item: item[1], reverse = True)) | ||||||
| {k: sorted_variants_no_move[k] for k in list(sorted_variants_no_move)[:20]} | {k: sorted_variants_no_move[k] for k in list(sorted_variants_no_move)[:20]} | ||||||
| 
 | 
 | ||||||
| ###### Check against "conformative" Petri Net ###### | #--------------- (2) Check against normative Petri Net --------------- | ||||||
| 
 | 
 | ||||||
| basenet, initial_marking, final_marking = pm4py.read_pnml("results/haum/conformative_petrinet_con.pnml") | basenet, initial_marking, final_marking = pm4py.read_pnml("results/normative_petrinet.pnml") | ||||||
| 
 | 
 | ||||||
| # TBR | # TBR | ||||||
| replayed_traces = pm4py.conformance_diagnostics_token_based_replay(event_log, basenet, initial_marking, final_marking) | replayed_traces = pm4py.conformance_diagnostics_token_based_replay(event_log, basenet, initial_marking, final_marking) | ||||||
| @ -93,23 +92,13 @@ event_log[event_log["@@case_index"] == index_broken[0]].item.unique().tolist() | |||||||
| event_log[event_log["@@case_index"] == index_broken[0]]["fileId.start"].unique().tolist() | event_log[event_log["@@case_index"] == index_broken[0]]["fileId.start"].unique().tolist() | ||||||
| # --> logging error in raw file | # --> logging error in raw file | ||||||
| 
 | 
 | ||||||
| ## Footprints                       |  | ||||||
| from pm4py.algo.discovery.footprints import algorithm as footprints_discovery |  | ||||||
| from pm4py.visualization.footprints import visualizer as fp_visualizer |  | ||||||
| fp_log = footprints_discovery.apply(event_log, variant=footprints_discovery.Variants.ENTIRE_EVENT_LOG) |  | ||||||
| fp_net = footprints_discovery.apply(basenet, initial_marking, final_marking) |  | ||||||
| gviz = fp_visualizer.apply(fp_net, parameters={fp_visualizer.Variants.SINGLE.value.Parameters.FORMAT: "svg"}) |  | ||||||
| fp_visualizer.view(gviz) |  | ||||||
| 
 |  | ||||||
| efg_graph = pm4py.discover_eventually_follows_graph(event_log) |  | ||||||
| 
 |  | ||||||
| ## Fitting different miners | ## Fitting different miners | ||||||
| 
 | 
 | ||||||
| eval = pd.DataFrame(columns = ["fitness", "precision", "generalizability", | eval = pd.DataFrame(columns = ["fitness", "precision", "generalizability", | ||||||
|                                "simplicity", "sound", "narcs", "ntrans", |                                "simplicity", "sound", "narcs", "ntrans", | ||||||
|                                "nplaces", "nvariants", "mostfreq"]) |                                "nplaces", "nvariants", "mostfreq"]) | ||||||
| 
 | 
 | ||||||
| for miner in ["conformative", "alpha", "heuristics", "inductive", "ilp"]: | for miner in ["normative", "alpha", "heuristics", "inductive", "ilp"]: | ||||||
|     eval = pd.concat([eval, pn_infos_miner(event_log, miner)]) |     eval = pd.concat([eval, pn_infos_miner(event_log, miner)]) | ||||||
| 
 | 
 | ||||||
| eval.to_csv("results/eval_all-miners_complete.csv", sep = ";") | eval.to_csv("results/eval_all-miners_complete.csv", sep = ";") | ||||||
| @ -121,7 +110,7 @@ eval_clean = pd.DataFrame(columns = ["fitness", "precision", "generalizability", | |||||||
|                                      "simplicity", "sound", "narcs", "ntrans", |                                      "simplicity", "sound", "narcs", "ntrans", | ||||||
|                                      "nplaces", "nvariants", "mostfreq"]) |                                      "nplaces", "nvariants", "mostfreq"]) | ||||||
| 
 | 
 | ||||||
| for miner in ["conformative", "alpha", "heuristics", "inductive", "ilp"]: | for miner in ["normative", "alpha", "heuristics", "inductive", "ilp"]: | ||||||
|     eval_clean = pd.concat([eval_clean, pn_infos_miner(event_log_clean, miner)]) |     eval_clean = pd.concat([eval_clean, pn_infos_miner(event_log_clean, miner)]) | ||||||
| 
 | 
 | ||||||
| eval_clean.to_csv("results/eval_all-miners_clean.csv", sep = ";") | eval_clean.to_csv("results/eval_all-miners_clean.csv", sep = ";") | ||||||
| @ -129,28 +118,27 @@ eval_clean.to_csv("results/eval_all-miners_clean.csv", sep = ";") | |||||||
| ## Directly-follows graph | ## Directly-follows graph | ||||||
| dfg, start_activities, end_activities = pm4py.discover_dfg(event_log_clean) | dfg, start_activities, end_activities = pm4py.discover_dfg(event_log_clean) | ||||||
| pm4py.view_dfg(dfg, start_activities, end_activities) | pm4py.view_dfg(dfg, start_activities, end_activities) | ||||||
| pm4py.save_vis_dfg(dfg, start_activities, end_activities, "results/processmaps/dfg_complete_python.png") |  | ||||||
| 
 | 
 | ||||||
| ## Export petri nets | ## Export petri nets | ||||||
| pm4py.vis.save_vis_petri_net(basenet, initial_marking, final_marking, "results/processmaps/petrinet_conformative.png") | pm4py.vis.save_vis_petri_net(basenet, initial_marking, final_marking, | ||||||
|  |         "../../thesis/figures/petrinet_normative.png") | ||||||
| h_net, h_im, h_fm = pm4py.discover_petri_net_heuristics(event_log_clean) | h_net, h_im, h_fm = pm4py.discover_petri_net_heuristics(event_log_clean) | ||||||
| pm4py.vis.save_vis_petri_net(h_net, h_im, h_fm, "results/processmaps/petrinet_heuristics_clean.png") | pm4py.vis.save_vis_petri_net(h_net, h_im, h_fm, "../../thesis/figures/petrinet_heuristics_clean.png") | ||||||
| a_net, a_im, a_fm = pm4py.discover_petri_net_alpha(event_log_clean) | a_net, a_im, a_fm = pm4py.discover_petri_net_alpha(event_log_clean) | ||||||
| pm4py.vis.save_vis_petri_net(a_net, a_im, a_fm, "results/processmaps/petrinet_alpha_clean.png") | pm4py.vis.save_vis_petri_net(a_net, a_im, a_fm, "../../thesis/figures/petrinet_alpha_clean.png") | ||||||
| i_net, i_im, i_fm = pm4py.discover_petri_net_inductive(event_log_clean) | i_net, i_im, i_fm = pm4py.discover_petri_net_inductive(event_log_clean) | ||||||
| pm4py.vis.save_vis_petri_net(i_net, i_im, i_fm, "results/processmaps/petrinet_inductive_clean.png") | pm4py.vis.save_vis_petri_net(i_net, i_im, i_fm, "../../thesis/figures/petrinet_inductive_clean.png") | ||||||
| ilp_net, ilp_im, ilp_fm = pm4py.discover_petri_net_ilp(event_log_clean) | ilp_net, ilp_im, ilp_fm = pm4py.discover_petri_net_ilp(event_log_clean) | ||||||
| pm4py.vis.save_vis_petri_net(ilp_net, ilp_im, ilp_fm, "results/processmaps/petrinet_ilp_clean.png") | pm4py.vis.save_vis_petri_net(ilp_net, ilp_im, ilp_fm, "../../thesis/figures/petrinet_ilp_clean.png") | ||||||
| 
 | 
 | ||||||
| # convert to BPMN | # convert to BPMN | ||||||
| base_bpmn = pm4py.convert.convert_to_bpmn(basenet, initial_marking, final_marking) | base_bpmn = pm4py.convert.convert_to_bpmn(basenet, initial_marking, final_marking) | ||||||
| pm4py.vis.save_vis_bpmn(base_bpmn, "results/processmaps/bpmn_conformative.png") | pm4py.vis.save_vis_bpmn(base_bpmn, "../../thesis/figures/bpmn_normative.png") | ||||||
| i_bpmn = pm4py.convert.convert_to_bpmn(i_net, i_im, i_fm) | i_bpmn = pm4py.convert.convert_to_bpmn(i_net, i_im, i_fm) | ||||||
| pm4py.vis.save_vis_bpmn(i_bpmn, "results/processmaps/bpmn_inductive_clean.png") | pm4py.vis.save_vis_bpmn(i_bpmn, "../../thesis/figures/bpmn_inductive_clean.png") | ||||||
| ilp_bpmn = pm4py.convert.convert_to_bpmn(ilp_net, ilp_im, ilp_fm) | ilp_bpmn = pm4py.convert.convert_to_bpmn(ilp_net, ilp_im, ilp_fm) | ||||||
| pm4py.vis.save_vis_bpmn(ilp_bpmn, "results/processmaps/bpmn_ilp_clean.png") | pm4py.vis.save_vis_bpmn(ilp_bpmn, "../../thesis/figures/bpmn_ilp_clean.png") | ||||||
| a_bpmn = pm4py.convert.convert_to_bpmn(a_net, a_im, a_fm) | a_bpmn = pm4py.convert.convert_to_bpmn(a_net, a_im, a_fm) | ||||||
| pm4py.vis.save_vis_bpmn(a_bpmn, "results/processmaps/bpmn_alpha_clean.png") | pm4py.vis.save_vis_bpmn(a_bpmn, "../../thesis/figures/bpmn_alpha_clean.png") | ||||||
| h_bpmn = pm4py.convert.convert_to_bpmn(h_net, h_im, h_fm) | h_bpmn = pm4py.convert.convert_to_bpmn(h_net, h_im, h_fm) | ||||||
| pm4py.vis.save_vis_bpmn(h_bpmn, "results/processmaps/bpmn_heuristics_clean.png") | pm4py.vis.save_vis_bpmn(h_bpmn, "../../thesis/figures/bpmn_heuristics_clean.png") | ||||||
| 
 |  | ||||||
|  | |||||||
| @ -5,22 +5,23 @@ | |||||||
| #          (3) DFG for complete data | #          (3) DFG for complete data | ||||||
| #          (4) Export data frame for analyses | #          (4) Export data frame for analyses | ||||||
| # | # | ||||||
| # input:  results/haum/event_logfiles_2024-02-21_16-07-33.csv | # input:  results/event_logfiles_2024-02-21_16-07-33.csv | ||||||
| #         results/haum/raw_logfiles_2024-02-21_16-07-33.csv | #         results/raw_logfiles_2024-02-21_16-07-33.csv | ||||||
| # output: results/haum/eventlogs_pre-corona_cleaned.RData | # output: results/eventlogs_pre-corona_cleaned.RData | ||||||
| #         results/haum/eventlogs_pre-corona_cleaned.csv | #         results/eventlogs_pre-corona_cleaned.csv | ||||||
|  | #         ../../thesis/figures/dfg_complete_WFnet_R.pdf | ||||||
| # | # | ||||||
| # last mod: 2024-03-06 | # last mod: 2024-03-23 | ||||||
| 
 | 
 | ||||||
| # setwd("C:/Users/nwickelmaier/Nextcloud/Documents/MDS/2023ss/60100_master_thesis/analysis/code") | # setwd("C:/Users/nwickelmaier/Nextcloud/Documents/MDS/2023ss/60100_master_thesis/analysis/code") | ||||||
| 
 | 
 | ||||||
| #--------------- (1) Look at broken trace --------------- | #--------------- (1) Look at broken trace --------------- | ||||||
| 
 | 
 | ||||||
| datraw <- read.table("results/haum/raw_logfiles_2024-02-21_16-07-33.csv", | datraw <- read.table("results/raw_logfiles_2024-02-21_16-07-33.csv", | ||||||
|                    header = TRUE, sep = ";") |                      header = TRUE, sep = ";") | ||||||
| 
 | 
 | ||||||
| 
 | 
 | ||||||
| datlogs <- read.table("results/haum/event_logfiles_2024-02-21_16-07-33.csv", | datlogs <- read.table("results/event_logfiles_2024-02-21_16-07-33.csv", | ||||||
|                       colClasses = c("character", "character", "POSIXct", |                       colClasses = c("character", "character", "POSIXct", | ||||||
|                                      "POSIXct", "character", "integer", |                                      "POSIXct", "character", "integer", | ||||||
|                                      "numeric", "character", "character", |                                      "numeric", "character", "character", | ||||||
| @ -84,7 +85,7 @@ dfg <- processmapR::process_map(alog, | |||||||
|   render     = FALSE) |   render     = FALSE) | ||||||
| 
 | 
 | ||||||
| processmapR::export_map(dfg, | processmapR::export_map(dfg, | ||||||
|   file_name = paste0("results/processmaps/dfg_complete_R.pdf"), |   file_name = paste0("../../thesis/figures/dfg_complete_WFnet_R.pdf"), | ||||||
|   file_type = "pdf") |   file_type = "pdf") | ||||||
| 
 | 
 | ||||||
| rm(tmp) | rm(tmp) | ||||||
| @ -109,10 +110,10 @@ dat <- datlogs[as.Date(datlogs$date.start) < "2020-03-13", ] | |||||||
| # Remove corrupt trace | # Remove corrupt trace | ||||||
| dat <- dat[dat$path != 106098, ] | dat <- dat[dat$path != 106098, ] | ||||||
| 
 | 
 | ||||||
| save(dat, file = "results/haum/eventlogs_pre-corona_cleaned.RData") | save(dat, file = "results/eventlogs_pre-corona_cleaned.RData") | ||||||
| 
 | 
 | ||||||
| write.table(dat, | write.table(dat, | ||||||
|             file = "results/haum/eventlogs_pre-corona_cleaned.csv", |             file = "results/eventlogs_pre-corona_cleaned.csv", | ||||||
|             sep = ";", |             sep = ";", | ||||||
|             quote = FALSE, |             quote = FALSE, | ||||||
|             row.names = FALSE) |             row.names = FALSE) | ||||||
|  | |||||||
| @ -3,10 +3,10 @@ | |||||||
| # content: (1) Load data and create event log | # content: (1) Load data and create event log | ||||||
| #          (2) Infos for items | #          (2) Infos for items | ||||||
| # | # | ||||||
| # input:  results/haum/eventlogs_pre-corona_cleaned.csv | # input:  results/eventlogs_pre-corona_cleaned.csv | ||||||
| # output: results/haum/pn_infos_items.csv | # output: results/pn_infos_items.csv | ||||||
| # | # | ||||||
| # last mod: 2024-03-06 | # last mod: 2024-03-22 | ||||||
| 
 | 
 | ||||||
| import pm4py | import pm4py | ||||||
| import pandas as pd | import pandas as pd | ||||||
| @ -16,7 +16,7 @@ from python_helpers import eval_pm, pn_infos | |||||||
| 
 | 
 | ||||||
| #--------------- (1) Load data and create event logs --------------- | #--------------- (1) Load data and create event logs --------------- | ||||||
| 
 | 
 | ||||||
| dat = pd.read_csv("results/haum/eventlogs_pre-corona_cleaned", sep = ";") | dat = pd.read_csv("results/eventlogs_pre-corona_cleaned", sep = ";") | ||||||
| 
 | 
 | ||||||
| log_path = pm4py.format_dataframe(dat, case_id = "path", activity_key = "event", | log_path = pm4py.format_dataframe(dat, case_id = "path", activity_key = "event", | ||||||
|                                   timestamp_key = "date.start") |                                   timestamp_key = "date.start") | ||||||
| @ -33,5 +33,5 @@ for item in log_path.item.unique().tolist(): | |||||||
| eval = eval.sort_index() | eval = eval.sort_index() | ||||||
| 
 | 
 | ||||||
| # Export | # Export | ||||||
| eval.to_csv("results/haum/pn_infos_items.csv", sep = ";") | eval.to_csv("results/pn_infos_items.csv", sep = ";") | ||||||
| 
 | 
 | ||||||
|  | |||||||
| @ -7,15 +7,12 @@ | |||||||
| #          (2) Clustering | #          (2) Clustering | ||||||
| #          (3) Visualization with pictures | #          (3) Visualization with pictures | ||||||
| # | # | ||||||
| # input:  results/haum/eventlogs_pre-corona_cleaned.RData | # input:  results/eventlogs_pre-corona_cleaned.RData | ||||||
| #         results/haum/pn_infos_items.csv | #         results/pn_infos_items.csv | ||||||
| # output: results/haum/eventlogs_pre-corona_item-clusters.csv | # output: results/eventlogs_pre-corona_item-clusters.csv | ||||||
| #         results/figures/dendrogram_items.pdf | #         ../../thesis/figures/data/clustering_items.RData" | ||||||
| #         results/figures/clustering_items.pdf |  | ||||||
| #         results/figures/clustering_artworks.pdf |  | ||||||
| #         results/figures/clustering_artworks.png |  | ||||||
| # | # | ||||||
| # last mod: 2024-03-08 | # last mod: 2024-03-22 | ||||||
| 
 | 
 | ||||||
| # setwd("C:/Users/nwickelmaier/Nextcloud/Documents/MDS/2023ss/60100_master_thesis/analysis/code") | # setwd("C:/Users/nwickelmaier/Nextcloud/Documents/MDS/2023ss/60100_master_thesis/analysis/code") | ||||||
| 
 | 
 | ||||||
| @ -25,11 +22,11 @@ source("R_helpers.R") | |||||||
| 
 | 
 | ||||||
| #--------------- (1.1) Read log event data --------------- | #--------------- (1.1) Read log event data --------------- | ||||||
| 
 | 
 | ||||||
| load("results/haum/eventlogs_pre-corona_cleaned.RData") | load("results/eventlogs_pre-corona_cleaned.RData") | ||||||
| 
 | 
 | ||||||
| #--------------- (1.2) Read infos for PM for items --------------- | #--------------- (1.2) Read infos for PM for items --------------- | ||||||
| 
 | 
 | ||||||
| datitem <- read.table("results/haum/pn_infos_items.csv", header = TRUE, | datitem <- read.table("results/pn_infos_items.csv", header = TRUE, | ||||||
|                       sep = ";", row.names = 1) |                       sep = ";", row.names = 1) | ||||||
| 
 | 
 | ||||||
| #--------------- (1.3) Extract additional infos for clustering --------------- | #--------------- (1.3) Extract additional infos for clustering --------------- | ||||||
| @ -96,9 +93,6 @@ mycols <- c("#434F4F", "#78004B", "#FF6900", "#3CB4DC", "#91C86E", "Black") | |||||||
| 
 | 
 | ||||||
| cluster <- cutree(hc, k = k) | cluster <- cutree(hc, k = k) | ||||||
| 
 | 
 | ||||||
| pdf("results/figures/dendrogram_items.pdf", width = 6.5, height = 5.5, pointsize = 10) |  | ||||||
| # TODO: Move code for plots to /thesis/ |  | ||||||
| 
 |  | ||||||
| factoextra::fviz_dend(hc, k = k, | factoextra::fviz_dend(hc, k = k, | ||||||
|                       cex = 0.5, |                       cex = 0.5, | ||||||
|                       k_colors = mycols, |                       k_colors = mycols, | ||||||
| @ -109,10 +103,6 @@ factoextra::fviz_dend(hc, k = k, | |||||||
|                       #ggtheme = ggplot2::theme_bw() |                       #ggtheme = ggplot2::theme_bw() | ||||||
| ) | ) | ||||||
| 
 | 
 | ||||||
| dev.off() |  | ||||||
| 
 |  | ||||||
| pdf("results/figures/clustering_items.pdf", width = 6.5, height = 5.5, pointsize = 10) |  | ||||||
| 
 |  | ||||||
| factoextra::fviz_cluster(list(data = df, cluster = cluster), | factoextra::fviz_cluster(list(data = df, cluster = cluster), | ||||||
|                          palette = mycols, |                          palette = mycols, | ||||||
|                          ellipse.type = "convex", |                          ellipse.type = "convex", | ||||||
| @ -121,8 +111,6 @@ factoextra::fviz_cluster(list(data = df, cluster = cluster), | |||||||
|                          main = "", |                          main = "", | ||||||
|                          ggtheme = ggplot2::theme_bw()) |                          ggtheme = ggplot2::theme_bw()) | ||||||
| 
 | 
 | ||||||
| dev.off() |  | ||||||
| 
 |  | ||||||
| aggregate(cbind(precision, generalizability, nvariants, duration, distance, | aggregate(cbind(precision, generalizability, nvariants, duration, distance, | ||||||
|                 scaleSize , rotationDegree, npaths, ncases, nmoves, |                 scaleSize , rotationDegree, npaths, ncases, nmoves, | ||||||
|                 nflipCard, nopenTopic, nopenPopup) ~ cluster, datitem, |                 nflipCard, nopenTopic, nopenPopup) ~ cluster, datitem, | ||||||
| @ -138,18 +126,6 @@ item <- sprintf("%03d", as.numeric(gsub("item_([0-9]{3})", "\\1", | |||||||
| res <- merge(dat, data.frame(item, cluster), by = "item", all.x = TRUE) | res <- merge(dat, data.frame(item, cluster), by = "item", all.x = TRUE) | ||||||
| res <- res[order(res$fileId.start, res$date.start, res$timeMs.start), ] | res <- res[order(res$fileId.start, res$date.start, res$timeMs.start), ] | ||||||
| 
 | 
 | ||||||
| # Look at clusters |  | ||||||
| par(mfrow = c(2,2)) |  | ||||||
| vioplot::vioplot(duration ~ cluster, res) |  | ||||||
| vioplot::vioplot(distance ~ cluster, res) |  | ||||||
| vioplot::vioplot(scaleSize ~ cluster, res) |  | ||||||
| vioplot::vioplot(rotationDegree ~ cluster, res) |  | ||||||
| 
 |  | ||||||
| write.table(res, |  | ||||||
|             file = "results/haum/eventlogs_pre-corona_item-clusters.csv", |  | ||||||
|             sep = ";", |  | ||||||
|             quote = FALSE, |  | ||||||
|             row.names = FALSE) |  | ||||||
| 
 | 
 | ||||||
| # DFGs for clusters | # DFGs for clusters | ||||||
| res$start <- res$date.start | res$start <- res$date.start | ||||||
| @ -163,63 +139,30 @@ for (clst in sort(unique(res$cluster))) { | |||||||
|     resource_id = "item", |     resource_id = "item", | ||||||
|     timestamps  = c("start", "complete")) |     timestamps  = c("start", "complete")) | ||||||
| 
 | 
 | ||||||
|   dfg <- processmapR::process_map(alog, |   processmapR::process_map(alog, | ||||||
|     type_nodes = processmapR::frequency("relative", color_scale = "Greys"), |     type_nodes = processmapR::frequency("relative", color_scale = "Greys"), | ||||||
|     sec_nodes  = processmapR::frequency("absolute"), |     sec_nodes  = processmapR::frequency("absolute"), | ||||||
|     type_edges = processmapR::frequency("relative", color_edges = "#FF6900"), |     type_edges = processmapR::frequency("relative", color_edges = "#FF6900"), | ||||||
|     sec_edges  = processmapR::frequency("absolute"), |     sec_edges  = processmapR::frequency("absolute"), | ||||||
|     rankdir    = "LR", |     rankdir    = "LR") | ||||||
|     render     = FALSE) |  | ||||||
| 
 |  | ||||||
|   processmapR::export_map(dfg, |  | ||||||
|     file_name = paste0("results/processmaps/dfg_cluster", clst, "_R.pdf"), |  | ||||||
|     file_type = "pdf", |  | ||||||
|     title     = paste("DFG Cluster", clst)) |  | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
| #--------------- (3) Visualization with pictures --------------- |  | ||||||
| 
 | 
 | ||||||
| coor_2d <- cmdscale(dist_mat, k = 2) | # Look at clusters | ||||||
|  | par(mfrow = c(2,2)) | ||||||
|  | vioplot::vioplot(duration ~ cluster, res) | ||||||
|  | vioplot::vioplot(distance ~ cluster, res) | ||||||
|  | vioplot::vioplot(scaleSize ~ cluster, res) | ||||||
|  | vioplot::vioplot(rotationDegree ~ cluster, res) | ||||||
| 
 | 
 | ||||||
| items <- sprintf("%03d", as.numeric(rownames(datitem))) | write.table(res, | ||||||
|  |             file = "results/eventlogs_pre-corona_item-clusters.csv", | ||||||
|  |             sep = ";", | ||||||
|  |             quote = FALSE, | ||||||
|  |             row.names = FALSE) | ||||||
| 
 | 
 | ||||||
| pdf("results/figures/clustering_artworks.pdf", height = 8, width = 8, pointsize = 16) | # Save data for plots and tables | ||||||
| #png("results/figures/clustering_artworks.png", units = "in", height = 8, width = 8, pointsize = 16, res = 300) |  | ||||||
| 
 | 
 | ||||||
| par(mai = c(.6,.6,.1,.1), mgp = c(2.4, 1, 0)) | save(hc, k, res, dist_mat, datitem, df, | ||||||
| 
 |      file = "../../thesis/figures/data/clustering_items.RData") | ||||||
| plot(coor_2d, type = "n", ylim = c(-3.7, 2.6), xlim = c(-5, 10.5), |  | ||||||
|      xlab = "", ylab = "") |  | ||||||
| 
 |  | ||||||
| for (item in items) { |  | ||||||
| 
 |  | ||||||
|   if (item == "125") { |  | ||||||
| 
 |  | ||||||
|     pic <- jpeg::readJPEG(paste0("../data/haum/ContentEyevisit/eyevisit_cards_light/", |  | ||||||
|                           item, "/", item, ".jpg")) |  | ||||||
|   } else { |  | ||||||
|     pic <- png::readPNG(paste0("../data/haum/ContentEyevisit/eyevisit_cards_light/", |  | ||||||
|                         item, "/", item, ".png")) |  | ||||||
|   } |  | ||||||
| 
 |  | ||||||
|   img <- as.raster(pic[,,1:3]) |  | ||||||
| 
 |  | ||||||
|   x <- coor_2d[items == item, 1] |  | ||||||
|   y <- coor_2d[items == item, 2] |  | ||||||
| 
 |  | ||||||
|   points(x, y, |  | ||||||
|          col = mycols[cluster[items == item]], |  | ||||||
|          cex = 6, |  | ||||||
|          pch = 15) |  | ||||||
| 
 |  | ||||||
|   rasterImage(img, |  | ||||||
|               xleft = x - .45, |  | ||||||
|               xright = x + .45, |  | ||||||
|               ybottom = y - .2, |  | ||||||
|               ytop = y + .2) |  | ||||||
| 
 |  | ||||||
| } |  | ||||||
| legend("topright", paste("Cluster", 1:k), col = mycols, pch = 15, bty = "n") |  | ||||||
| 
 |  | ||||||
| dev.off() |  | ||||||
| 
 | 
 | ||||||
|  | |||||||
| @ -5,10 +5,11 @@ | |||||||
| #          (3) Select features for navigation behavior | #          (3) Select features for navigation behavior | ||||||
| #          (4) Export data frames | #          (4) Export data frames | ||||||
| # | # | ||||||
| # input:  results/haum/event_logfiles_2024-02-21_16-07-33.csv | # input:  results/event_logfiles_2024-02-21_16-07-33.csv | ||||||
| # output: results/haum/eventlogs_pre-corona_case-clusters.csv | # output: results/dataframes_case_2019.RData | ||||||
|  | #         results/centrality_cases.RData | ||||||
| # | # | ||||||
| # last mod: 2024-03-08 | # last mod: 2024-03-22 | ||||||
| 
 | 
 | ||||||
| # setwd("C:/Users/nwickelmaier/Nextcloud/Documents/MDS/2023ss/60100_master_thesis/analysis/code") | # setwd("C:/Users/nwickelmaier/Nextcloud/Documents/MDS/2023ss/60100_master_thesis/analysis/code") | ||||||
| 
 | 
 | ||||||
| @ -16,7 +17,7 @@ source("R_helpers.R") | |||||||
| 
 | 
 | ||||||
| #--------------- (1) Read data --------------- | #--------------- (1) Read data --------------- | ||||||
| 
 | 
 | ||||||
| load("results/haum/eventlogs_pre-corona_cleaned.RData") | load("results/eventlogs_pre-corona_cleaned.RData") | ||||||
| 
 | 
 | ||||||
| # Select one year to handle number of cases | # Select one year to handle number of cases | ||||||
| dat <- dat[as.Date(dat$date.start) > "2018-12-31" & | dat <- dat[as.Date(dat$date.start) > "2018-12-31" & | ||||||
| @ -135,8 +136,8 @@ dattree <- data.frame(case = datcase$case, | |||||||
| ) | ) | ||||||
| 
 | 
 | ||||||
| # centrality <- pbapply::pbsapply(dattree$case, get_centrality, data = dat) | # centrality <- pbapply::pbsapply(dattree$case, get_centrality, data = dat) | ||||||
| # save(centrality, file = "results/haum/tmp_centrality.RData") | # save(centrality, file = "results/centrality_cases.RData") | ||||||
| load("results/haum/tmp_centrality.RData") | load("results/centrality_cases.RData") | ||||||
| 
 | 
 | ||||||
| dattree$BetweenCentrality <- centrality | dattree$BetweenCentrality <- centrality | ||||||
| 
 | 
 | ||||||
| @ -171,17 +172,5 @@ dattree$AvDurItemNorm <- normalize(dattree$AvDurItem) | |||||||
| 
 | 
 | ||||||
| #--------------- (4) Export data frames --------------- | #--------------- (4) Export data frames --------------- | ||||||
| 
 | 
 | ||||||
| save(dat, datcase, dattree, file = "results/haum/dataframes_case_2019.RData") | save(dat, datcase, dattree, file = "results/dataframes_case_2019.RData") | ||||||
| 
 |  | ||||||
| write.table(datcase, |  | ||||||
|             file = "results/haum/datcase.csv", |  | ||||||
|             sep = ";", |  | ||||||
|             quote = FALSE, |  | ||||||
|             row.names = FALSE) |  | ||||||
| 
 |  | ||||||
| write.table(datcase, |  | ||||||
|             file = "results/haum/dattree.csv", |  | ||||||
|             sep = ";", |  | ||||||
|             quote = FALSE, |  | ||||||
|             row.names = FALSE) |  | ||||||
| 
 | 
 | ||||||
| @ -1,45 +0,0 @@ | |||||||
| # 08_infos-clusters.py |  | ||||||
| # |  | ||||||
| # content: (1) Load data and create event log |  | ||||||
| #          (2) Infos for clusters |  | ||||||
| #          (3) Process maps for clusters |  | ||||||
| # |  | ||||||
| # input:  results/haum/eventlogs_pre-corona_item-clusters.csv |  | ||||||
| # output: results/haum/pn_infos_clusters.csv |  | ||||||
| # |  | ||||||
| # last mod: 2024-03-06 |  | ||||||
| 
 |  | ||||||
| import pm4py |  | ||||||
| import pandas as pd |  | ||||||
| 
 |  | ||||||
| from python_helpers import eval_pm, pn_infos |  | ||||||
| 
 |  | ||||||
| #--------------- (1) Load data and create event logs --------------- |  | ||||||
| 
 |  | ||||||
| dat = pd.read_csv("results/haum/eventlogs_pre-corona_item-clusters.csv", sep = ";") |  | ||||||
| 
 |  | ||||||
| log_path = pm4py.format_dataframe(dat, case_id = "path", activity_key = "event", |  | ||||||
|                                   timestamp_key = "date.start") |  | ||||||
| 
 |  | ||||||
| #--------------- (2) Infos for clusters --------------- |  | ||||||
| 
 |  | ||||||
| # Merge clusters into data frame |  | ||||||
| eval = pd.DataFrame(columns = ["fitness", "precision", "generalizability", |  | ||||||
|                                "simplicity", "sound", "narcs", "ntrans", |  | ||||||
|                                "nplaces", "nvariants", "mostfreq"]) |  | ||||||
| for cluster in log_path.grp.unique().tolist(): |  | ||||||
|     eval = pd.concat([eval, pn_infos(log_path, "grp", cluster)]) |  | ||||||
| eval = eval.sort_index() |  | ||||||
| 
 |  | ||||||
| eval.to_csv("results/haum/pn_infos_clusters.csv", sep = ";") |  | ||||||
| 
 |  | ||||||
| #--------------- (3) Process maps for clusters --------------- |  | ||||||
| 
 |  | ||||||
| for cluster in log_path.grp.unique().tolist(): |  | ||||||
|     subdata = log_path[log_path.grp == cluster] |  | ||||||
|     subnet, subim, subfm = pm4py.discover_petri_net_inductive(subdata, noise_threshold=0.5) |  | ||||||
|     pm4py.save_vis_petri_net(subnet, subim, subfm, |  | ||||||
|        "results/processmaps/petrinet_cluster" + str(cluster).zfill(3) + ".png") |  | ||||||
|     bpmn = pm4py.convert.convert_to_bpmn(subnet, subim, subfm) |  | ||||||
|     pm4py.vis.save_vis_bpmn(bpmn, "results/processmaps/bpmn_cluster_" + |  | ||||||
|             str(cluster).zfill(3) + ".png") |  | ||||||
| @ -4,18 +4,18 @@ | |||||||
| #          (2) Clustering | #          (2) Clustering | ||||||
| #          (3) Fit tree | #          (3) Fit tree | ||||||
| # | # | ||||||
| # input:  results/haum/dataframes_case_2019.RData | # input:  results/dataframes_case_2019.RData | ||||||
| # output: results/haum/eventlogs_2019_case-clusters.csv | # output: results/eventlogs_2019_case-clusters.csv | ||||||
| #         results/haum/tmp_user-navigation.RData | #         results/user-navigation.RData | ||||||
| #         ../../thesis/figures/data/clustering_cases.RData | #         ../../thesis/figures/data/clustering_cases.RData | ||||||
| # | # | ||||||
| # last mod: 2024-03-15 | # last mod: 2024-03-22 | ||||||
| 
 | 
 | ||||||
| # setwd("C:/Users/nwickelmaier/Nextcloud/Documents/MDS/2023ss/60100_master_thesis/analysis/code") | # setwd("C:/Users/nwickelmaier/Nextcloud/Documents/MDS/2023ss/60100_master_thesis/analysis/code") | ||||||
| 
 | 
 | ||||||
| #--------------- (1) Load data --------------- | #--------------- (1) Load data --------------- | ||||||
| 
 | 
 | ||||||
| load("results/haum/dataframes_case_2019.RData") | load("results/dataframes_case_2019.RData") | ||||||
| 
 | 
 | ||||||
| #--------------- (2) Clustering --------------- | #--------------- (2) Clustering --------------- | ||||||
| 
 | 
 | ||||||
| @ -119,13 +119,13 @@ aggregate(cbind(duration, distance, scaleSize, rotationDegree) ~ cluster, res, m | |||||||
| aggregate(cbind(duration, distance, scaleSize, rotationDegree) ~ cluster, res, median) | aggregate(cbind(duration, distance, scaleSize, rotationDegree) ~ cluster, res, median) | ||||||
| 
 | 
 | ||||||
| write.table(res, | write.table(res, | ||||||
|             file = "results/haum/eventlogs_2019_case-clusters.csv", |             file = "results/eventlogs_2019_case-clusters.csv", | ||||||
|             sep = ";", |             sep = ";", | ||||||
|             quote = FALSE, |             quote = FALSE, | ||||||
|             row.names = FALSE) |             row.names = FALSE) | ||||||
| 
 | 
 | ||||||
| save(res, dist_mat, hcs, acs, coor_2d, coor_3d, | save(res, dist_mat, hcs, acs, coor_2d, coor_3d, | ||||||
|      file = "results/haum/tmp_user-navigation.RData") |      file = "results/user-navigation.RData") | ||||||
| 
 | 
 | ||||||
| save(coor_2d, coor_3d, cluster, dattree, | save(coor_2d, coor_3d, cluster, dattree, | ||||||
|      file = "../../thesis/figures/data/clustering_cases.RData") |      file = "../../thesis/figures/data/clustering_cases.RData") | ||||||
| @ -6,10 +6,10 @@ | |||||||
| #          (4) Clustering | #          (4) Clustering | ||||||
| #          (5) Fit tree | #          (5) Fit tree | ||||||
| # | # | ||||||
| # input:  results/haum/event_logfiles_2024-02-21_16-07-33.csv | # input:  results/event_logfiles_2024-02-21_16-07-33.csv | ||||||
| # output: results/haum/eventlogs_pre-corona_case-clusters.csv | # output: -- | ||||||
| # | # | ||||||
| # last mod: 2024-03-15 | # last mod: 2024-03-22 | ||||||
| 
 | 
 | ||||||
| # setwd("C:/Users/nwickelmaier/Nextcloud/Documents/MDS/2023ss/60100_master_thesis/analysis/code") | # setwd("C:/Users/nwickelmaier/Nextcloud/Documents/MDS/2023ss/60100_master_thesis/analysis/code") | ||||||
| 
 | 
 | ||||||
| @ -17,7 +17,7 @@ source("R_helpers.R") | |||||||
| 
 | 
 | ||||||
| #--------------- (1) Read data --------------- | #--------------- (1) Read data --------------- | ||||||
| 
 | 
 | ||||||
| load("results/haum/eventlogs_pre-corona_cleaned.RData") | load("results/eventlogs_pre-corona_cleaned.RData") | ||||||
| 
 | 
 | ||||||
| # Select one year to handle number of cases | # Select one year to handle number of cases | ||||||
| dat <- dat[as.Date(dat$date.start) > "2017-12-31" & | dat <- dat[as.Date(dat$date.start) > "2017-12-31" & | ||||||
| @ -77,8 +77,8 @@ dattree18 <- data.frame(case = datcase18$case, | |||||||
| ) | ) | ||||||
| 
 | 
 | ||||||
| # centrality <- pbapply::pbsapply(dattree18$case, get_centrality, data = dat) | # centrality <- pbapply::pbsapply(dattree18$case, get_centrality, data = dat) | ||||||
| # save(centrality, file = "results/haum/tmp_centrality_2018.RData") | # save(centrality, file = "results/centrality_2018.RData") | ||||||
| load("results/haum/tmp_centrality_2018.RData") | load("results/centrality_2018.RData") | ||||||
| 
 | 
 | ||||||
| dattree18$BetweenCentrality <- centrality | dattree18$BetweenCentrality <- centrality | ||||||
| 
 | 
 | ||||||
| @ -3,10 +3,11 @@ | |||||||
| # content: (1) Read data | # content: (1) Read data | ||||||
| #          (2) Investigate variants | #          (2) Investigate variants | ||||||
| # | # | ||||||
| # input:  results/haum/eventlogs_pre-corona_case-clusters.RData | # input:  results/eventlogs_pre-corona_cleaned.RData | ||||||
| # output:  | # output: ../../thesis/figures/freq-traces.pdf | ||||||
|  | #         ../../thesis/figures/freq-traces_powerlaw.pdf | ||||||
| # | # | ||||||
| # last mod: 2024-03-12 | # last mod: 2024-03-22 | ||||||
| 
 | 
 | ||||||
| # setwd("C:/Users/nwickelmaier/Nextcloud/Documents/MDS/2023ss/60100_master_thesis/analysis/code") | # setwd("C:/Users/nwickelmaier/Nextcloud/Documents/MDS/2023ss/60100_master_thesis/analysis/code") | ||||||
| 
 | 
 | ||||||
| @ -14,7 +15,7 @@ library(bupaverse) | |||||||
| 
 | 
 | ||||||
| #--------------- (1) Read data --------------- | #--------------- (1) Read data --------------- | ||||||
| 
 | 
 | ||||||
| load("results/haum/eventlogs_pre-corona_cleaned.RData") | load("results/eventlogs_pre-corona_cleaned.RData") | ||||||
| 
 | 
 | ||||||
| #--------------- (2) Investigate variants --------------- | #--------------- (2) Investigate variants --------------- | ||||||
| 
 | 
 | ||||||
| @ -27,7 +28,7 @@ alog <- activitylog(dat, | |||||||
|                     resource_id = "path", |                     resource_id = "path", | ||||||
|                     timestamps  = c("start", "complete")) |                     timestamps  = c("start", "complete")) | ||||||
| 
 | 
 | ||||||
| pdf("results/figures/freq-traces.pdf", height = 7, width = 6, pointsize = 10) | pdf("../../thesis/figures/freq-traces.pdf", height = 7, width = 6, pointsize = 10) | ||||||
| trace_explorer(alog, n_traces = 25) | trace_explorer(alog, n_traces = 25) | ||||||
| # --> sequences of artworks are just too rare | # --> sequences of artworks are just too rare | ||||||
| dev.off() | dev.off() | ||||||
| @ -54,7 +55,7 @@ y <- as.numeric(tab) | |||||||
| p1 <- lm(log(y) ~ log(x)) | p1 <- lm(log(y) ~ log(x)) | ||||||
| pre <- exp(coef(p1)[1]) * x^coef(p1)[2] | pre <- exp(coef(p1)[1]) * x^coef(p1)[2] | ||||||
| 
 | 
 | ||||||
| pdf("results/figures/freq-traces_powerlaw.pdf", height = 3.375, | pdf("../../thesis/figures/freq-traces_powerlaw.pdf", height = 3.375, | ||||||
|     width = 3.375, pointsize = 10) |     width = 3.375, pointsize = 10) | ||||||
| par(mai = c(.6,.6,.1,.1), mgp = c(2.4, 1, 0)) | par(mai = c(.6,.6,.1,.1), mgp = c(2.4, 1, 0)) | ||||||
| 
 | 
 | ||||||
							
								
								
									
										93
									
								
								code/12_dfgs-case-clusters.R
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										93
									
								
								code/12_dfgs-case-clusters.R
									
									
									
									
									
										Normal file
									
								
							| @ -0,0 +1,93 @@ | |||||||
|  | # 13_dfgs-case-clusters.R | ||||||
|  | # | ||||||
|  | # content: (1) Read data | ||||||
|  | #          (2) Export DFGs for clusters | ||||||
|  | # | ||||||
|  | # input:  results/user-navigation.RData | ||||||
|  | # output: ../../thesis/figures/dfg_cases_cluster1_R.pdf | ||||||
|  | #         ../../thesis/figures/dfg_cases_cluster2_R.pdf | ||||||
|  | #         ../../thesis/figures/dfg_cases_cluster3_R.pdf | ||||||
|  | #         ../../thesis/figures/dfg_cases_cluster4_R.pdf | ||||||
|  | #         ../../thesis/figures/dfg_cases_cluster5_R.pdf | ||||||
|  | # | ||||||
|  | # last mod: 2024-03-22 | ||||||
|  | 
 | ||||||
|  | # setwd("C:/Users/nwickelmaier/Nextcloud/Documents/MDS/2023ss/60100_master_thesis/analysis/code") | ||||||
|  | 
 | ||||||
|  | #--------------- (1) Read data --------------- | ||||||
|  | 
 | ||||||
|  | load("results/user-navigation.RData") | ||||||
|  | 
 | ||||||
|  | dat <- res | ||||||
|  | 
 | ||||||
|  | dat$start <- as.POSIXct(dat$date.start) | ||||||
|  | dat$complete <- as.POSIXct(dat$date.stop) | ||||||
|  | 
 | ||||||
|  | alog <- bupaR::activitylog(dat[dat$cluster == cluster, ], | ||||||
|  |                             case_id     = "case", | ||||||
|  |                             activity_id = "item", | ||||||
|  |                             resource_id = "path", | ||||||
|  |                             timestamps  = c("start", "complete")) | ||||||
|  | 
 | ||||||
|  | processmapR::trace_explorer(alog, n_traces = 25) | ||||||
|  | 
 | ||||||
|  | tr <- bupaR::traces(alog) | ||||||
|  | tab <- table(tr$absolute_frequency) | ||||||
|  | 
 | ||||||
|  | tab[1] / nrow(tr) | ||||||
|  | 
 | ||||||
|  | alog |> edeaR::filter_infrequent_flows(min_n = 20) |> processmapR::process_map() | ||||||
|  | 
 | ||||||
|  | #--------------- (2) Export DFGs for clusters --------------- | ||||||
|  | 
 | ||||||
|  | mycols <- c("#3CB4DC", "#FF6900", "#78004B", "#91C86E", "#434F4F") | ||||||
|  | cl_names <- c("Scanning", "Exploring", "Flitting", "Searching", "Info") | ||||||
|  | 
 | ||||||
|  | ns <- c(30, 20, 10, 5, 30) | ||||||
|  | 
 | ||||||
|  | for (i in 1:5) { | ||||||
|  | 
 | ||||||
|  |   alog <- bupaR::activitylog(dat[dat$cluster == i, ], | ||||||
|  |                              case_id     = "case", | ||||||
|  |                              activity_id = "item", | ||||||
|  |                              resource_id = "path", | ||||||
|  |                              timestamps  = c("start", "complete")) | ||||||
|  |    | ||||||
|  |   dfg <- processmapR::process_map(edeaR::filter_infrequent_flows(alog, min_n = ns[i]), | ||||||
|  |     type_nodes = processmapR::frequency("relative", color_scale = "Greys"), | ||||||
|  |     sec_nodes  = processmapR::frequency("absolute"), | ||||||
|  |     type_edges = processmapR::frequency("relative", color_edges = mycols[i]), | ||||||
|  |     sec_edges  = processmapR::frequency("absolute"), | ||||||
|  |     rankdir    = "LR", | ||||||
|  |     render     = FALSE) | ||||||
|  | 
 | ||||||
|  |   processmapR::export_map(dfg, | ||||||
|  |     file_name = paste0("../../thesis/figures/dfg_cases_cluster", i, "_R.pdf"), | ||||||
|  |     file_type = "pdf", | ||||||
|  |     title     = cl_names[i]) | ||||||
|  | } | ||||||
|  | 
 | ||||||
|  | # cluster 1: 50 | ||||||
|  | # cluster 2: 30 o. 20 | ||||||
|  | # cluster 3: 20 - 30 | ||||||
|  | # cluster 4: 5 | ||||||
|  | # cluster 5: 20 | ||||||
|  | 
 | ||||||
|  | get_percent_variants <- function(log, cluster, min_n) { | ||||||
|  | 
 | ||||||
|  |   alog <- bupaR::activitylog(log[log$cluster == cluster, ], | ||||||
|  |                              case_id     = "case", | ||||||
|  |                              activity_id = "item", | ||||||
|  |                              resource_id = "path", | ||||||
|  |                              timestamps  = c("start", "complete")) | ||||||
|  | 
 | ||||||
|  |   nrow(edeaR::filter_infrequent_flows(alog, min_n = min_n)) / | ||||||
|  |     nrow(alog) | ||||||
|  | } | ||||||
|  | 
 | ||||||
|  | perc <- numeric(5) | ||||||
|  | 
 | ||||||
|  | for (i in 1:5) { | ||||||
|  |   perc[i] <- get_percent_variants(log = dat, cluster = i, min_n = ns[i]) | ||||||
|  | } | ||||||
|  | 
 | ||||||
| @ -1,46 +0,0 @@ | |||||||
| # 12_pm-case-clusters.py |  | ||||||
| # |  | ||||||
| # content: (1) Load data and create event log |  | ||||||
| #          (2) Infos for clusters |  | ||||||
| #          (3) Process maps for clusters |  | ||||||
| # |  | ||||||
| # input:  results/haum/eventlogs_pre-corona_item-clusters.csv |  | ||||||
| # output: results/haum/pn_infos_clusters.csv |  | ||||||
| # |  | ||||||
| # last mod: 2024-03-10 |  | ||||||
| 
 |  | ||||||
| import pm4py |  | ||||||
| import pandas as pd |  | ||||||
| 
 |  | ||||||
| from python_helpers import eval_pm, pn_infos |  | ||||||
| 
 |  | ||||||
| #--------------- (1) Load data and create event logs --------------- |  | ||||||
| 
 |  | ||||||
| dat = pd.read_csv("results/haum/eventlogs_2019_case-clusters_new.csv", sep = ";") |  | ||||||
| 
 |  | ||||||
| event_log = pm4py.format_dataframe(dat, case_id = "case", activity_key = "event_new", |  | ||||||
|                                   timestamp_key = "date.start") |  | ||||||
| 
 |  | ||||||
| #--------------- (2) Infos for clusters --------------- |  | ||||||
| 
 |  | ||||||
| # Merge clusters into data frame |  | ||||||
| eval = pd.DataFrame(columns = ["fitness", "precision", "generalizability", |  | ||||||
|                                "simplicity", "sound", "narcs", "ntrans", |  | ||||||
|                                "nplaces", "nvariants", "mostfreq"]) |  | ||||||
| for cluster in event_log.cluster.unique().tolist(): |  | ||||||
|     eval = pd.concat([eval, pn_infos(event_log, "cluster", cluster)]) |  | ||||||
| eval = eval.sort_index() |  | ||||||
| 
 |  | ||||||
| eval.to_csv("results/haum/pn_infos_clusters.csv", sep = ";") |  | ||||||
| 
 |  | ||||||
| #--------------- (3) Process maps for clusters --------------- |  | ||||||
| 
 |  | ||||||
| for cluster in event_log.cluster.unique().tolist(): |  | ||||||
|     subdata = event_log[event_log.cluster == cluster] |  | ||||||
|     subnet, subim, subfm = pm4py.discover_petri_net_inductive(subdata, noise_threshold = .7) |  | ||||||
|     pm4py.save_vis_petri_net(subnet, subim, subfm, |  | ||||||
|        "results/processmaps/petrinet_cluster" + str(cluster) + "_cases.png") |  | ||||||
|     bpmn = pm4py.convert.convert_to_bpmn(subnet, subim, subfm) |  | ||||||
|     pm4py.vis.save_vis_bpmn(bpmn, "results/processmaps/bpmn_cluster" + |  | ||||||
|             str(cluster) + "_cases.png") |  | ||||||
| 
 |  | ||||||
| @ -36,8 +36,8 @@ def pn_infos_miner(log, miner): | |||||||
|         net, im, fm = pm4py.discover_petri_net_ilp(log) |         net, im, fm = pm4py.discover_petri_net_ilp(log) | ||||||
|     elif miner == "inductive": |     elif miner == "inductive": | ||||||
|         net, im, fm = pm4py.discover_petri_net_inductive(log) |         net, im, fm = pm4py.discover_petri_net_inductive(log) | ||||||
|     elif miner == "conformative": |     elif miner == "normative": | ||||||
|         net, im, fm = pm4py.read_pnml("results/haum/conformative_petrinet_con.pnml") |         net, im, fm = pm4py.read_pnml("results/normative_petrinet.pnml") | ||||||
| 
 | 
 | ||||||
|     eval = eval_append(log, net, im, fm) |     eval = eval_append(log, net, im, fm) | ||||||
|     eval.index = [miner] |     eval.index = [miner] | ||||||
|  | |||||||
		Loading…
	
	
			
			x
			
			
		
	
		Reference in New Issue
	
	Block a user