Compare commits
	
		
			No commits in common. "cba441f08beca2a379a9f497382512e20e43f3e1" and "26f90a7fec963dacad34aced8c7f6e93ef22caa6" have entirely different histories.
		
	
	
		
			cba441f08b
			...
			26f90a7fec
		
	
		
| @ -8,8 +8,8 @@ | |||||||
| #         ../data/metadata/feiertage.csv | #         ../data/metadata/feiertage.csv | ||||||
| #         ../data/metadata/schulferien_2016-2018_NI.csv | #         ../data/metadata/schulferien_2016-2018_NI.csv | ||||||
| #         ../data/metadata/schulferien_2019-2025_NI.csv | #         ../data/metadata/schulferien_2019-2025_NI.csv | ||||||
| # output: results/raw_logfiles_<timestamp>.csv | # output: raw_logfiles_<timestamp>.csv | ||||||
| #         results/event_logfiles_<timestamp>.csv | #         event_logfiles_<timestamp>.csv | ||||||
| # | # | ||||||
| # last mod: 2024-02-23, NW | # last mod: 2024-02-23, NW | ||||||
| 
 | 
 | ||||||
| @ -29,12 +29,12 @@ folders <- dir(path) | |||||||
| datraw <- parse_logfiles(folders, path) | datraw <- parse_logfiles(folders, path) | ||||||
| # 91 corrupt lines have been found and removed from the data set | # 91 corrupt lines have been found and removed from the data set | ||||||
| 
 | 
 | ||||||
| # datraw <- read.table("results/raw_logfiles_2023-10-25_16-20-45.csv", | # datraw <- read.table("results/haum/raw_logfiles_2023-10-25_16-20-45.csv", | ||||||
| #                      sep = ";", header = TRUE) | #                      sep = ";", header = TRUE) | ||||||
| 
 | 
 | ||||||
| ## Export data | ## Export data | ||||||
| 
 | 
 | ||||||
| write.table(datraw, paste0("results/raw_logfiles_", now, ".csv"), | write.table(datraw, paste0("results/haum/raw_logfiles_", now, ".csv"), | ||||||
|             sep = ";", row.names = FALSE) |             sep = ";", row.names = FALSE) | ||||||
| 
 | 
 | ||||||
| #--------------- (2) Create event logs --------------- | #--------------- (2) Create event logs --------------- | ||||||
| @ -131,6 +131,6 @@ dat2 <- dat2[order(dat2$fileId.start, dat2$date.start, dat2$timeMs.start), ] | |||||||
| 
 | 
 | ||||||
| ## Export data | ## Export data | ||||||
| 
 | 
 | ||||||
| write.table(dat2, paste0("results/event_logfiles_", now, ".csv"), | write.table(dat2, paste0("results/haum/event_logfiles_", now, ".csv"), | ||||||
|             sep = ";", row.names = FALSE) |             sep = ";", row.names = FALSE) | ||||||
| 
 | 
 | ||||||
|  | |||||||
| @ -9,30 +9,16 @@ | |||||||
| #           (3.4) Artwork sequences | #           (3.4) Artwork sequences | ||||||
| #           (3.5) Topics | #           (3.5) Topics | ||||||
| # | # | ||||||
| # input:  results/event_logfiles_2024-02-21_16-07-33.csv | # input:  results/haum/event_logfiles_2024-02-21_16-07-33.csv | ||||||
| #         results/raw_logfiles_2024-02-21_16-07-33.csv | # output: | ||||||
| # output: results/figures/counts_item.pdf |  | ||||||
| #         results/figures/counts_item_firsttouch.pdf |  | ||||||
| #         results/figures/duration.pdf |  | ||||||
| #         results/figures/heatmap_start.pdf |  | ||||||
| #         results/figures/heatmap_stop.pdf |  | ||||||
| #         results/figures/cases_per_day.pdf |  | ||||||
| #         results/figures/timeMs.pdf |  | ||||||
| #         results/figures/xycoord.pdf |  | ||||||
| #         results/figures/event-dist.pdf |  | ||||||
| #         results/figures/traceexplore_trace-event.pdf |  | ||||||
| #         results/figures/ra_trace-event.pdf |  | ||||||
| #         results/figures/traceexplore_case-event.pdf |  | ||||||
| #         results/figures/bp_tod.pdf |  | ||||||
| #         results/figures/bp_wd.pdf |  | ||||||
| #         results/figures/bp_wds.pdf |  | ||||||
| #         results/figures/bp_corona.pdf |  | ||||||
| #         results/figures/traceexplore_case-artwork_often080.pdf |  | ||||||
| # | # | ||||||
| # last mod: 2024-03-22 | # last mod: 2024-03-13 | ||||||
| 
 | 
 | ||||||
| # setwd("C:/Users/nwickelmaier/Nextcloud/Documents/MDS/2023ss/60100_master_thesis/analysis/code") | # setwd("C:/Users/nwickelmaier/Nextcloud/Documents/MDS/2023ss/60100_master_thesis/analysis/code") | ||||||
| 
 | 
 | ||||||
|  | library(lattice) | ||||||
|  | library(bupaverse) | ||||||
|  | 
 | ||||||
| # Overall Research Question: How do museum visitors interact with the | # Overall Research Question: How do museum visitors interact with the | ||||||
| # artworks presented on the MTT? | # artworks presented on the MTT? | ||||||
| 
 | 
 | ||||||
| @ -41,7 +27,7 @@ | |||||||
| 
 | 
 | ||||||
| #--------------- (1) Read data --------------- | #--------------- (1) Read data --------------- | ||||||
| 
 | 
 | ||||||
| datlogs <- read.table("results/event_logfiles_2024-02-21_16-07-33.csv", | datlogs <- read.table("results/haum/event_logfiles_2024-02-21_16-07-33.csv", | ||||||
|                       colClasses = c("character", "character", "POSIXct", |                       colClasses = c("character", "character", "POSIXct", | ||||||
|                                      "POSIXct", "character", "integer", |                                      "POSIXct", "character", "integer", | ||||||
|                                      "numeric", "character", "character", |                                      "numeric", "character", "character", | ||||||
| @ -54,7 +40,7 @@ datlogs$event <- factor(datlogs$event, levels = c("move", "flipCard", | |||||||
|                                                   "openTopic", |                                                   "openTopic", | ||||||
|                                                   "openPopup")) |                                                   "openPopup")) | ||||||
| 
 | 
 | ||||||
| datraw <- read.table("results/raw_logfiles_2024-02-21_16-07-33.csv", | datraw <- read.table("results/haum/raw_logfiles_2024-02-21_16-07-33.csv", | ||||||
|                      sep = ";", header = TRUE) |                      sep = ";", header = TRUE) | ||||||
| 
 | 
 | ||||||
| # Add weekdays to data frame | # Add weekdays to data frame | ||||||
| @ -88,263 +74,43 @@ lattice::dotplot(xtabs( ~ item + topic, datlogs), auto.key = TRUE) | |||||||
| mat <- t(as.matrix(xtabs( ~ item + topic, datlogs))) | mat <- t(as.matrix(xtabs( ~ item + topic, datlogs))) | ||||||
| mat[mat == 0] <- NA | mat[mat == 0] <- NA | ||||||
| image(mat, axes = F, col = rainbow(100)) | image(mat, axes = F, col = rainbow(100)) | ||||||
|  | heatmap(t(mat)) | ||||||
|  | 
 | ||||||
|  | 
 | ||||||
|  | datlogs$start <- datlogs$date.start | ||||||
|  | datlogs$complete <- datlogs$date.stop | ||||||
|  | 
 | ||||||
|  | 
 | ||||||
| 
 | 
 | ||||||
| #--------------- (2) Descriptives --------------- | #--------------- (2) Descriptives --------------- | ||||||
| 
 |  | ||||||
| ### Which item gets touched most often? |  | ||||||
| 
 |  | ||||||
| counts_item <- table(datlogs$item) |  | ||||||
| lattice::barchart(counts_item) |  | ||||||
| 
 |  | ||||||
| items <- unique(datlogs$item) |  | ||||||
| #items <- items[!items %in% c("504", "505")] |  | ||||||
| datart <- mtt::extract_artworks(items, |  | ||||||
|                            paste0(items, ".xml"), |  | ||||||
|                            "../data/haum/ContentEyevisit/eyevisit_cards_light/") |  | ||||||
| datart <- datart[order(datart$artwork), ] |  | ||||||
| names(counts_item) <- datart$title |  | ||||||
| 
 |  | ||||||
| pdf("results/figures/counts_item.pdf", width = 20, height = 10, pointsize = 10) |  | ||||||
| par(mai = c(5, .6, .1, .1)) |  | ||||||
| tmp <- barplot(counts_item, las = 2, ylim = c(0, 60000), |  | ||||||
|                border = NA, col = "#434F4F") |  | ||||||
| text(tmp, counts_item + 1000, datart$artwork) |  | ||||||
| dev.off() |  | ||||||
| 
 |  | ||||||
| ### Which item gets touched most often first? |  | ||||||
| 
 |  | ||||||
| datcase <- datlogs[!duplicated(datlogs$case), ] |  | ||||||
| counts_case <- table(datcase$item) |  | ||||||
| names(counts_case) <- datart$title |  | ||||||
| tmp <- barplot(counts_case, las = 2, border = "white") |  | ||||||
| text(tmp, counts_case + 100, datart$item) |  | ||||||
| 
 |  | ||||||
| counts <- rbind(counts_item, counts_case) |  | ||||||
| 
 |  | ||||||
| pdf("results/figures/counts_item_firsttouch.pdf", |  | ||||||
|     width = 20, height = 10, pointsize = 10) |  | ||||||
| par(mai = c(5, .6, .1, .1)) |  | ||||||
| 
 |  | ||||||
| tmp <- barplot(counts, las = 2, border = NA, col = c("#434F4F", "#FF6900"), ylim = c(0, 65000)) |  | ||||||
| text(tmp, counts_item + counts_case + 1000, datart$artwork) |  | ||||||
| legend("topleft", c("Total interactions", "First interactions"), |  | ||||||
|        col = c("#434F4F", "#FF6900"), pch = 15, bty = "n") |  | ||||||
| dev.off() |  | ||||||
| 
 |  | ||||||
| ### Which teasers seem to work well? |  | ||||||
| barplot(table(datlogs$topic), las = 2) |  | ||||||
| 
 |  | ||||||
| ### Dwell times/duration |  | ||||||
| datagg <- aggregate(duration ~ event + item, datlogs, mean) |  | ||||||
| datagg$ds <- datagg$duration / 1000   # in secs |  | ||||||
| 
 |  | ||||||
| lattice::bwplot(ds ~ event, datagg) |  | ||||||
| 
 |  | ||||||
| # without aggregation |  | ||||||
| lattice::bwplot(duration / 1000 / 60 ~ event, datlogs) |  | ||||||
| # in min |  | ||||||
| 
 |  | ||||||
| set.seed(1027) |  | ||||||
| 
 |  | ||||||
| pdf("results/figures/duration.pdf", width = 5, height = 5, pointsize = 10) |  | ||||||
| lattice::bwplot(I(duration/1000/60) ~ event, datlogs[sample(nrow(datlogs), 100000), ], |  | ||||||
|        ylab = "Duration in min") |  | ||||||
| dev.off() |  | ||||||
| 
 |  | ||||||
| ### Move events |  | ||||||
| 
 |  | ||||||
| datmove <- aggregate(cbind(duration, scaleSize, rotationDegree, distance, x.start, |  | ||||||
|                            y.start, x.stop, y.stop) ~ item, datlogs, |  | ||||||
|                      mean) |  | ||||||
| 
 |  | ||||||
| hist(log(datlogs$scaleSize)) |  | ||||||
| # --> better interpretable on logscale |  | ||||||
| 
 |  | ||||||
| plot(y.start ~ x.start, datmove, pch = 16, col = "gray") |  | ||||||
| points(y.start ~ x.start, datmove, col = "#3CB4DC", cex = datmove$scaleSize) |  | ||||||
| 
 |  | ||||||
| 
 |  | ||||||
| plot(y.start ~ x.start, datmove, type = "n", xlab = "x", ylab = "y", |  | ||||||
|      xlim = c(0, 3840), ylim = c(0, 2160)) |  | ||||||
| with(datmove, text(x.start, y.start, item, col = "gray", cex = 1.5)) |  | ||||||
| with(datmove, |  | ||||||
|      arrows(x.start, y.start, x.stop, y.stop, length = 0.07, lwd = 2) |  | ||||||
|      ) |  | ||||||
| abline(v = c(0, 3840), h = c(0, 2160), col = "#3CB4DC", lwd = 2) |  | ||||||
| 
 |  | ||||||
| datscale <- aggregate(scaleSize ~ item, datlogs, max) |  | ||||||
| plot(y.start ~ x.start, datmove, pch = 16, col = "gray") |  | ||||||
| points(y.start ~ x.start, datmove, col = "#3CB4DC", cex = datscale$scaleSize) |  | ||||||
| 
 |  | ||||||
| plot(y.start ~ x.start, datmove, type = "n", xlab = "x", ylab = "y", |  | ||||||
|      xlim = c(0, 3840), ylim = c(0, 2160)) |  | ||||||
| #with(datmove, text(x.stop, y.stop, item)) |  | ||||||
| with(datmove, text(x.start, y.start, item)) |  | ||||||
| 
 |  | ||||||
| 
 |  | ||||||
| ### Are there certain areas of the table that are touched most often? |  | ||||||
| 
 |  | ||||||
| # heatmap |  | ||||||
| cuts <- 100 |  | ||||||
| 
 |  | ||||||
| datlogs$x.start.cat <- cut(datlogs$x.start, cuts) |  | ||||||
| datlogs$y.start.cat <- cut(datlogs$y.start, cuts) |  | ||||||
| 
 |  | ||||||
| tab <- xtabs( ~ x.start.cat + y.start.cat, datlogs) |  | ||||||
| 
 |  | ||||||
| colnames(tab) <- paste0("c", 1:cuts) |  | ||||||
| rownames(tab) <- paste0("c", 1:cuts) |  | ||||||
| 
 |  | ||||||
| heatmap(tab, Rowv = NA, Colv = NA) |  | ||||||
| 
 |  | ||||||
| 
 |  | ||||||
| dattrim <- datlogs[datlogs$x.start < 3840 & |  | ||||||
|                    datlogs$x.start > 0 & |  | ||||||
|                    datlogs$y.start < 2160 & |  | ||||||
|                    datlogs$y.start > 0 & |  | ||||||
|                    datlogs$x.stop < 3840 & |  | ||||||
|                    datlogs$x.stop > 0 & |  | ||||||
|                    datlogs$y.stop < 2160 & |  | ||||||
|                    datlogs$y.stop > 0, ] |  | ||||||
| 
 |  | ||||||
| cuts <- 100 # 200, 100, 70, ... |  | ||||||
| 
 |  | ||||||
| # start |  | ||||||
| dattrim$x.start.cat <- cut(dattrim$x.start, cuts) |  | ||||||
| dattrim$y.start.cat <- cut(dattrim$y.start, cuts) |  | ||||||
| 
 |  | ||||||
| tab.start <- xtabs( ~ x.start.cat + y.start.cat, dattrim) |  | ||||||
| colnames(tab.start) <- NULL |  | ||||||
| rownames(tab.start) <- NULL |  | ||||||
| 
 |  | ||||||
| pdf("results/figures/heatmap_start.pdf", width = 5, height = 5, pointsize = 10) |  | ||||||
| heatmap(tab.start, Rowv = NA, Colv = NA) |  | ||||||
| dev.off() |  | ||||||
| 
 |  | ||||||
| # stop |  | ||||||
| dattrim$x.stop.cat <- cut(dattrim$x.stop, cuts) |  | ||||||
| dattrim$y.stop.cat <- cut(dattrim$y.stop, cuts) |  | ||||||
| tab.stop <- xtabs( ~ x.stop.cat + y.stop.cat, dattrim) |  | ||||||
| colnames(tab.stop) <- NULL |  | ||||||
| rownames(tab.stop) <- NULL |  | ||||||
| 
 |  | ||||||
| pdf("results/figures/heatmap_stop.pdf", width = 5, height = 5, pointsize = 10) |  | ||||||
| heatmap(tab.stop, Rowv = NA, Colv = NA) |  | ||||||
| dev.off() |  | ||||||
| 
 |  | ||||||
| ### How many visitors per day |  | ||||||
| 
 |  | ||||||
| datlogs$date <- as.Date(datlogs$date.start) |  | ||||||
| 
 |  | ||||||
| # Interactions per day |  | ||||||
| datint <- aggregate(case ~ date, datlogs, length) |  | ||||||
| plot(datint, type = "h") |  | ||||||
| 
 |  | ||||||
| # Cases per day |  | ||||||
| datcase <- aggregate(case ~ date, datlogs, function(x) length(unique(x))) |  | ||||||
| plot(datcase, type = "h") |  | ||||||
| 
 |  | ||||||
| # Paths per day |  | ||||||
| datpath <- aggregate(path ~ date, datlogs, function(x) length(unique(x))) |  | ||||||
| plot(datpath, type = "h") |  | ||||||
| 
 |  | ||||||
| plot(path ~ date, datpath, type = "h", col = "#3CB4DC") |  | ||||||
| points(case ~ date, datcase, type = "h") |  | ||||||
| 
 |  | ||||||
| pdf("results/figures/cases_per_day.pdf", width = 9, height = 5, pointsize = 10) |  | ||||||
| par(mai = c(.6, .6, .1, .1), mgp = c(2.4, 1, 0)) |  | ||||||
| plot(case ~ date, datcase, type = "h", col = "#434F4F") |  | ||||||
| abline(v = datcase$date[datcase$date %in% c("2020-03-13", "2022-10-25")], |  | ||||||
|        col = "#FF6900", lty = 2) |  | ||||||
| text(datcase$date[datcase$date == "2020-03-13"]+470, 80, |  | ||||||
|      "Corona gap from 2020-03-13 to 2022-10-25", |  | ||||||
|      col = "#FF6900") |  | ||||||
| dev.off() |  | ||||||
| 
 |  | ||||||
| ## weird behavior of timeMs |  | ||||||
| 
 |  | ||||||
| pdf("results/figures/timeMs.pdf", width = 9, height = 6, pointsize = 10) |  | ||||||
| #par(mai = c(.6, .6, .1, .1), mgp = c(2.4, 1, 0)) |  | ||||||
| #plot(timeMs.start ~ as.factor(fileId), datlogs[1:2000,], xlab = "fileId") |  | ||||||
| lattice::bwplot(timeMs.start ~ as.factor(fileId.start), datlogs[1:2000,], xlab = "", |  | ||||||
|        scales = list(x = list(rot = 90), y = list(rot = 90))) |  | ||||||
| dev.off() |  | ||||||
| 
 |  | ||||||
| ## x,y-coordinates out of range |  | ||||||
| 
 |  | ||||||
| set.seed(1522) |  | ||||||
| 
 |  | ||||||
| pdf("results/figures/xycoord.pdf", width = 5, height = 5, pointsize = 10) |  | ||||||
| par(mai = c(.6, .6, .1, .1), mgp = c(2.4, 1, 0)) |  | ||||||
| #par(mfrow = c(1, 2)) |  | ||||||
| plot(y.start ~ x.start, datlogs[sample(nrow(datlogs), 10000), ]) |  | ||||||
| abline(v = c(0, 3840), h = c(0, 2160), col = "#3CB4DC", lwd = 2) |  | ||||||
| #plot(y.stop ~ x.stop, datlogs) |  | ||||||
| #abline(v = c(0, 3840), h = c(0, 2160), col = "#3CB4DC", lwd = 2) |  | ||||||
| legend("bottomleft", "Random sample of 10,000", bg = "white") |  | ||||||
| legend("topleft", "4K-Display: 3840 x 2160", bg = "white") |  | ||||||
| dev.off() |  | ||||||
| 
 |  | ||||||
| ## moves |  | ||||||
| 
 |  | ||||||
| dat001 <- datlogs[which(datlogs$item == "001"), ] |  | ||||||
| 
 |  | ||||||
| index <- as.numeric(as.factor(dat001$path)) |  | ||||||
| cc <- sample(colors(), 100) |  | ||||||
| 
 |  | ||||||
| plot(y.start ~ x.start, dat001, type = "n", xlab = "x", ylab = "y", |  | ||||||
|      xlim = c(0, 3840), ylim = c(0, 2160)) |  | ||||||
| with(dat001[1:200,], arrows(x.start, y.start, x.stop, y.stop, |  | ||||||
|                             length = .07, col = cc[index])) |  | ||||||
| 
 |  | ||||||
| plot(y.start ~ x.start, dat001, xlab = "x", ylab = "y", |  | ||||||
|      xlim = c(0, 3840), ylim = c(0, 2160), pch = 16, col = "gray") |  | ||||||
| points(y.start ~ x.start, dat001, xlab = "x", ylab = "y", |  | ||||||
|      xlim = c(0, 3840), ylim = c(0, 2160), cex = dat001$scaleSize, |  | ||||||
|      col = "blue") |  | ||||||
| 
 |  | ||||||
| 
 |  | ||||||
| 
 |  | ||||||
| cc <- sample(colors(), 70) |  | ||||||
| 
 |  | ||||||
| dat1 <- datlogs[!duplicated(datlogs$item), ] |  | ||||||
| dat1 <- dat1[order(dat1$item), ] |  | ||||||
| 
 |  | ||||||
| plot(y.start ~ x.start, dat1, type = "n", xlim = c(-100, 4500), ylim = c(-100, 2500)) |  | ||||||
| abline(h = c(0, 2160), v = c(0, 3840), col = "lightgray") |  | ||||||
| with(dat1, points(x.start, y.start, col = cc, pch = 16)) |  | ||||||
| with(dat1, points(x.stop, y.stop, col = cc, pch = 16)) |  | ||||||
| with(dat1, arrows(x.start, y.start, x.stop, y.stop, length = .07, col = cc)) |  | ||||||
| 
 |  | ||||||
| 
 |  | ||||||
| # How many events per topic, per path, ... | # How many events per topic, per path, ... | ||||||
| # How many popups per artwork? | # How many popups per artwork? | ||||||
| 
 | 
 | ||||||
| # Number of events per artwork | # Number of events per artwork | ||||||
| tab <- xtabs( ~ item + event, datlogs) | tab <- xtabs( ~ artwork + event, datlogs) | ||||||
| addmargins(tab) | addmargins(tab) | ||||||
| 
 | 
 | ||||||
| proportions(tab, margin = "item") | proportions(tab, margin = "artwork") | ||||||
| proportions(tab, margin = "event") | proportions(tab, margin = "event") | ||||||
| 
 | 
 | ||||||
| cc <- palette.colors(palette = "Okabe-Ito")[c(3,2,4,8)] | cc <- palette.colors(palette = "Okabe-Ito")[c(3,2,4,8)] | ||||||
| 
 | 
 | ||||||
| pdf("results/figures/event-dist.pdf", height = 3.375, width = 12, pointsize = 10) | pdf("../figures/event-dist.pdf", height = 3.375, width = 12, pointsize = 10) | ||||||
| par(mai = c(.4,.4,.1,.1), mgp = c(2.4, 1, 0)) | par(mai = c(.4,.4,.1,.1), mgp = c(2.4, 1, 0)) | ||||||
| 
 | 
 | ||||||
| barplot(t(proportions(tab, margin = "item")), las = 2, col = c("#78004B", "#3CB4DC", "#91C86E", "#FF6900"), | barplot(t(proportions(tab, margin = "artwork")), las = 2, col = cc, | ||||||
|         legend.text = levels(datlogs$event), args.legend = list(x = "bottomleft", bg = "white")) |         legend.text = levels(datlogs$event), args.legend = list(x = "bottomleft", bg = "white")) | ||||||
| 
 | 
 | ||||||
| dev.off() | dev.off() | ||||||
| 
 | 
 | ||||||
| #lattice::barchart(proportions(tab, margin = "item"), las = 2) | #barchart(proportions(tab, margin = "artwork"), las = 2) | ||||||
| 
 | 
 | ||||||
| # Proportion of events | # Proportion of events | ||||||
| proportions(xtabs( ~ event, datlogs)) | proportions(xtabs( ~ event, datlogs)) | ||||||
| # Mean proportion of event per path | # Mean proportion of event per path | ||||||
| colMeans(proportions(xtabs( ~ path + event, datlogs), margin = "path")) | colMeans(proportions(xtabs( ~ path + event, datlogs), margin = "path")) | ||||||
| # Mean proportion of event per item | # Mean proportion of event per artwork | ||||||
| colMeans(proportions(tab, margin = "item")) | colMeans(proportions(tab, margin = "artwork")) | ||||||
| 
 | 
 | ||||||
| # Proportion of unclosed events | # Proportion of unclosed events | ||||||
| 
 | 
 | ||||||
| @ -360,17 +126,15 @@ sum(datlogs$fileId.start != datlogs$fileId.stop, na.rm = TRUE) / nrow(datlogs) | |||||||
| 
 | 
 | ||||||
| #--------------- (3.1) Check data quality --------------- | #--------------- (3.1) Check data quality --------------- | ||||||
| 
 | 
 | ||||||
| datlogs$start <- datlogs$date.start | alog <- activitylog(datlogs, | ||||||
| datlogs$complete <- datlogs$date.stop |  | ||||||
| 
 |  | ||||||
| alog <- bupaR::activitylog(datlogs, |  | ||||||
|                     case_id = "path", |                     case_id = "path", | ||||||
|                     activity_id = "event", |                     activity_id = "event", | ||||||
|                     #resource_id = "case", |                     #resource_id = "case", | ||||||
|                     resource_id = "item", |                     resource_id = "artwork", | ||||||
|                     timestamps = c("start", "complete")) |                     timestamps = c("start", "complete")) | ||||||
| 
 | 
 | ||||||
| processmapR::process_map(alog, processmapR::frequency("relative")) | # process_map(alog, frequency("relative")) | ||||||
|  | map_as_pdf(alog, file = "../figures/pm_trace-event.pdf") | ||||||
| 
 | 
 | ||||||
| alogf <- edeaR::filter_trace_frequency(alog, percentage = 0.9) | alogf <- edeaR::filter_trace_frequency(alog, percentage = 0.9) | ||||||
| 
 | 
 | ||||||
| @ -383,7 +147,7 @@ processmapR::process_map(alogf,   # alog, | |||||||
| 
 | 
 | ||||||
| alog_no_move <- alog[alog$event != "move", ] | alog_no_move <- alog[alog$event != "move", ] | ||||||
| 
 | 
 | ||||||
| pdf("results/figures/traceexplore_trace-event.pdf", height = 8, width = 12, pointsize = 10) | pdf("../figures/traceexplore_trace-event.pdf", height = 8, width = 12, pointsize = 10) | ||||||
| set.seed(1447) | set.seed(1447) | ||||||
| processmapR::trace_explorer(alog_no_move[alog_no_move$path %in% | processmapR::trace_explorer(alog_no_move[alog_no_move$path %in% | ||||||
|                             sample(unique(alog_no_move$path), 400),], |                             sample(unique(alog_no_move$path), 400),], | ||||||
| @ -391,7 +155,7 @@ processmapR::trace_explorer(alog_no_move[alog_no_move$path %in% | |||||||
|                             abbreviate = T) |                             abbreviate = T) | ||||||
| dev.off() | dev.off() | ||||||
| 
 | 
 | ||||||
| pdf("results/figures/ra_trace-event.pdf", height = 8, width = 12, pointsize = 10) | pdf("../figures/ra_trace-event.pdf", height = 8, width = 12, pointsize = 10) | ||||||
| 
 | 
 | ||||||
| ra_no_move <- edeaR::resource_frequency(alog_no_move, "resource-activity") | ra_no_move <- edeaR::resource_frequency(alog_no_move, "resource-activity") | ||||||
| levels(ra_no_move$event) <- c("flipCard", "flipCard", "openTopic", "openPopup") | levels(ra_no_move$event) <- c("flipCard", "flipCard", "openTopic", "openPopup") | ||||||
| @ -418,29 +182,29 @@ which.max(table(datlogs$artwork)) | |||||||
| which.min(table(datlogs$artwork)) | which.min(table(datlogs$artwork)) | ||||||
| which.min(table(datlogs$artwork)[-c(71,72)]) | which.min(table(datlogs$artwork)[-c(71,72)]) | ||||||
| 
 | 
 | ||||||
| alog080 <- bupaR::activitylog(datlogs[datlogs$artwork == "080",], | alog080 <- activitylog(datlogs[datlogs$artwork == "080",], | ||||||
|                     case_id = "path", |                     case_id = "path", | ||||||
|                     activity_id = "event", |                     activity_id = "event", | ||||||
|                     resource_id = "artwork", |                     resource_id = "artwork", | ||||||
|                     timestamps = c("start", "complete")) |                     timestamps = c("start", "complete")) | ||||||
| 
 | 
 | ||||||
| processmapR::process_map(alog80, processmapR::frequency("relative")) | map_as_pdf(alog080, file = "../figures/pm_trace-event_080.pdf") | ||||||
| 
 | 
 | ||||||
| alog087 <- bupaR::activitylog(datlogs[datlogs$artwork == "087",], | alog087 <- activitylog(datlogs[datlogs$artwork == "087",], | ||||||
|                     case_id = "path", |                     case_id = "path", | ||||||
|                     activity_id = "event", |                     activity_id = "event", | ||||||
|                     resource_id = "artwork", |                     resource_id = "artwork", | ||||||
|                     timestamps = c("start", "complete")) |                     timestamps = c("start", "complete")) | ||||||
| 
 | 
 | ||||||
| processmapR::process_map(alog087, processmapR::frequency("relative")) | map_as_pdf(alog087, file = "../figures/pm_trace-event_087.pdf") | ||||||
| 
 | 
 | ||||||
| alog504 <- bupaR::activitylog(datlogs[datlogs$artwork == "504",], | alog504 <- activitylog(datlogs[datlogs$artwork == "504",], | ||||||
|                     case_id = "path", |                     case_id = "path", | ||||||
|                     activity_id = "event", |                     activity_id = "event", | ||||||
|                     resource_id = "artwork", |                     resource_id = "artwork", | ||||||
|                     timestamps = c("start", "complete")) |                     timestamps = c("start", "complete")) | ||||||
| 
 | 
 | ||||||
| processmapR::process_map(alog504, processmapR::frequency("relative")) | map_as_pdf(alog504, file = "../figures/pm_trace-event_504.pdf") | ||||||
| 
 | 
 | ||||||
| #--------------- (3.3) Patterns of cases --------------- | #--------------- (3.3) Patterns of cases --------------- | ||||||
| 
 | 
 | ||||||
| @ -451,17 +215,17 @@ processmapR::process_map(alog504, processmapR::frequency("relative")) | |||||||
| # ... weekdays for "normal" and school vacation days? | # ... weekdays for "normal" and school vacation days? | ||||||
| # ... pre and post corona? | # ... pre and post corona? | ||||||
| 
 | 
 | ||||||
| alog <- bupaR::activitylog(datlogs, | alog <- activitylog(datlogs, | ||||||
|                     case_id = "case", |                     case_id = "case", | ||||||
|                     activity_id = "event", |                     activity_id = "event", | ||||||
|                     resource_id = "path", |                     resource_id = "path", | ||||||
|                     timestamps = c("start", "complete")) |                     timestamps = c("start", "complete")) | ||||||
| 
 | 
 | ||||||
| processmapR::process_map(alog, processmapR::frequency("relative")) | map_as_pdf(alog, file = "../figures/pm_case-event.pdf") | ||||||
| 
 | 
 | ||||||
| alog_no_move <- alog[alog$event != "move", ] | alog_no_move <- alog[alog$event != "move", ] | ||||||
| 
 | 
 | ||||||
| pdf("results/figures/traceexplore_case-event.pdf", height = 8, width = 12, pointsize = 10) | pdf("../figures/traceexplore_case-event.pdf", height = 8, width = 12, pointsize = 10) | ||||||
| set.seed(1050) | set.seed(1050) | ||||||
| processmapR::trace_explorer(alog_no_move[alog_no_move$path %in% | processmapR::trace_explorer(alog_no_move[alog_no_move$path %in% | ||||||
|                             sample(unique(alog_no_move$path), 300),], |                             sample(unique(alog_no_move$path), 300),], | ||||||
| @ -469,38 +233,38 @@ processmapR::trace_explorer(alog_no_move[alog_no_move$path %in% | |||||||
|                             abbreviate = T) |                             abbreviate = T) | ||||||
| dev.off() | dev.off() | ||||||
| 
 | 
 | ||||||
| processmapR::process_map(alog080, processmapR::frequency("relative")) | map_as_pdf(alog080, file = "../figures/pm_case-event_080.pdf") | ||||||
| 
 | 
 | ||||||
| alog087 <- bupaR::activitylog(datlogs[datlogs$artwork == "087",], | alog087 <- activitylog(datlogs[datlogs$artwork == "087",], | ||||||
|                        case_id = "case", |                        case_id = "case", | ||||||
|                        activity_id = "event", |                        activity_id = "event", | ||||||
|                        resource_id = "path", |                        resource_id = "path", | ||||||
|                        timestamps = c("start", "complete")) |                        timestamps = c("start", "complete")) | ||||||
| 
 | 
 | ||||||
| processmapR::process_map(alog087, processmapR::frequency("relative")) | map_as_pdf(alog087, file = "../figures/pm_case-event_087.pdf") | ||||||
| 
 | 
 | ||||||
| ### Mornings and afternoons | ### Mornings and afternoons | ||||||
| 
 | 
 | ||||||
| datlogs$tod <- ifelse(lubridate::hour(datlogs$start) > 13, "afternoon", "morning") | datlogs$tod <- ifelse(lubridate::hour(datlogs$start) > 13, "afternoon", "morning") | ||||||
| 
 | 
 | ||||||
| alog <- bupaR::activitylog(datlogs[datlogs$tod == "morning",], | alog <- activitylog(datlogs[datlogs$tod == "morning",], | ||||||
|                     case_id = "case", |                     case_id = "case", | ||||||
|                     activity_id = "event", |                     activity_id = "event", | ||||||
|                     resource_id = "path", |                     resource_id = "path", | ||||||
|                     timestamps = c("start", "complete")) |                     timestamps = c("start", "complete")) | ||||||
| 
 | 
 | ||||||
| processmapR::process_map(alog, processmapR::frequency("relative")) | map_as_pdf(alog, file = "../figures/pm_case-event_morning.pdf") | ||||||
| 
 | 
 | ||||||
| alog <- bupaR::activitylog(datlogs[datlogs$tod == "afternoon",], | alog <- activitylog(datlogs[datlogs$tod == "afternoon",], | ||||||
|                     case_id = "case", |                     case_id = "case", | ||||||
|                     activity_id = "event", |                     activity_id = "event", | ||||||
|                     resource_id = "path", |                     resource_id = "path", | ||||||
|                     timestamps = c("start", "complete")) |                     timestamps = c("start", "complete")) | ||||||
| 
 | 
 | ||||||
| processmapR::process_map(alog, processmapR::frequency("relative")) | map_as_pdf(alog, file = "../figures/pm_case-event_afternoon.pdf") | ||||||
| 
 | 
 | ||||||
| # Are the same artworks looked at? | # Are the same artworks looked at? | ||||||
| pdf("results/figures/bp_tod.pdf", height = 3.375, width = 12, pointsize = 10) | pdf("../figures/bp_tod.pdf", height = 3.375, width = 12, pointsize = 10) | ||||||
| par(mai = c(.5,.6,.1,.1), mgp = c(2.4, 1, 0)) | par(mai = c(.5,.6,.1,.1), mgp = c(2.4, 1, 0)) | ||||||
| 
 | 
 | ||||||
| barplot(proportions(xtabs( ~ tod + artwork, datlogs), margin = "tod"), #col = cc[1:2], | barplot(proportions(xtabs( ~ tod + artwork, datlogs), margin = "tod"), #col = cc[1:2], | ||||||
| @ -513,24 +277,24 @@ dev.off() | |||||||
| 
 | 
 | ||||||
| datlogs$wd <- ifelse(datlogs$weekdays %in% c("Saturday", "Sunday"), "weekend", "weekday") | datlogs$wd <- ifelse(datlogs$weekdays %in% c("Saturday", "Sunday"), "weekend", "weekday") | ||||||
| 
 | 
 | ||||||
| alog <- bupaR::activitylog(datlogs[datlogs$wd == "weekend",], | alog <- activitylog(datlogs[datlogs$wd == "weekend",], | ||||||
|                     case_id = "case", |                     case_id = "case", | ||||||
|                     activity_id = "event", |                     activity_id = "event", | ||||||
|                     resource_id = "path", |                     resource_id = "path", | ||||||
|                     timestamps = c("start", "complete")) |                     timestamps = c("start", "complete")) | ||||||
| 
 | 
 | ||||||
| processmapR::process_map(alog, processmapR::frequency("relative")) | map_as_pdf(alog, file = "../figures/pm_case-event_weekend.pdf") | ||||||
| 
 | 
 | ||||||
| alog <- bupaR::activitylog(datlogs[datlogs$wd == "weekday",], | alog <- activitylog(datlogs[datlogs$wd == "weekday",], | ||||||
|                     case_id = "case", |                     case_id = "case", | ||||||
|                     activity_id = "event", |                     activity_id = "event", | ||||||
|                     resource_id = "path", |                     resource_id = "path", | ||||||
|                     timestamps = c("start", "complete")) |                     timestamps = c("start", "complete")) | ||||||
| 
 | 
 | ||||||
| processmapR::process_map(alog, processmapR::frequency("relative")) | map_as_pdf(alog, file = "../figures/pm_case-event_weekday.pdf") | ||||||
| 
 | 
 | ||||||
| # Are the same artworks looked at? | # Are the same artworks looked at? | ||||||
| pdf("results/figures/bp_wd.pdf", height = 3.375, width = 12, pointsize = 10) | pdf("../figures/bp_wd.pdf", height = 3.375, width = 12, pointsize = 10) | ||||||
| par(mai = c(.5,.6,.1,.1), mgp = c(2.4, 1, 0)) | par(mai = c(.5,.6,.1,.1), mgp = c(2.4, 1, 0)) | ||||||
| 
 | 
 | ||||||
| barplot(proportions(xtabs( ~ wd + artwork, datlogs), margin = "wd"), | barplot(proportions(xtabs( ~ wd + artwork, datlogs), margin = "wd"), | ||||||
| @ -544,24 +308,24 @@ dev.off() | |||||||
| datlogs$wds <- ifelse(!is.na(datlogs$vacation), "vacation", "school") | datlogs$wds <- ifelse(!is.na(datlogs$vacation), "vacation", "school") | ||||||
| datlogs$wds[datlogs$wd == "weekend"] <- NA | datlogs$wds[datlogs$wd == "weekend"] <- NA | ||||||
| 
 | 
 | ||||||
| alog <- bupaR::activitylog(datlogs[which(datlogs$wds == "school"),], | alog <- activitylog(datlogs[which(datlogs$wds == "school"),], | ||||||
|                     case_id = "case", |                     case_id = "case", | ||||||
|                     activity_id = "event", |                     activity_id = "event", | ||||||
|                     resource_id = "path", |                     resource_id = "path", | ||||||
|                     timestamps = c("start", "complete")) |                     timestamps = c("start", "complete")) | ||||||
| 
 | 
 | ||||||
| processmapR::process_map(alog, processmapR::frequency("relative")) | map_as_pdf(alog, file = "../figures/pm_case-event_school.pdf") | ||||||
| 
 | 
 | ||||||
| alog <- bupaR::activitylog(datlogs[which(datlogs$wds == "vacation"),], | alog <- activitylog(datlogs[which(datlogs$wds == "vacation"),], | ||||||
|                     case_id = "case", |                     case_id = "case", | ||||||
|                     activity_id = "event", |                     activity_id = "event", | ||||||
|                     resource_id = "path", |                     resource_id = "path", | ||||||
|                     timestamps = c("start", "complete")) |                     timestamps = c("start", "complete")) | ||||||
| 
 | 
 | ||||||
| processmapR::process_map(alog, processmapR::frequency("relative")) | map_as_pdf(alog, file = "../figures/pm_case-event_vacation.pdf") | ||||||
| 
 | 
 | ||||||
| # Are the same artworks looked at? | # Are the same artworks looked at? | ||||||
| pdf("results/figures/bp_wds.pdf", height = 3.375, width = 12, pointsize = 10) | pdf("../figures/bp_wds.pdf", height = 3.375, width = 12, pointsize = 10) | ||||||
| par(mai = c(.5,.6,.1,.1), mgp = c(2.4, 1, 0)) | par(mai = c(.5,.6,.1,.1), mgp = c(2.4, 1, 0)) | ||||||
| 
 | 
 | ||||||
| #barplot(xtabs( ~ wds + artwork, datlogs), las = 2, beside = TRUE, | #barplot(xtabs( ~ wds + artwork, datlogs), las = 2, beside = TRUE, | ||||||
| @ -575,24 +339,24 @@ dev.off() | |||||||
| 
 | 
 | ||||||
| datlogs$corona <- ifelse(datlogs$date < "2020-03-14", "pre", "post") | datlogs$corona <- ifelse(datlogs$date < "2020-03-14", "pre", "post") | ||||||
| 
 | 
 | ||||||
| alog <- bupaR::activitylog(datlogs[which(datlogs$corona == "pre"),], | alog <- activitylog(datlogs[which(datlogs$corona == "pre"),], | ||||||
|                     case_id = "case", |                     case_id = "case", | ||||||
|                     activity_id = "event", |                     activity_id = "event", | ||||||
|                     resource_id = "path", |                     resource_id = "path", | ||||||
|                     timestamps = c("start", "complete")) |                     timestamps = c("start", "complete")) | ||||||
| 
 | 
 | ||||||
| processmapR::process_map(alog, processmapR::frequency("relative")) | map_as_pdf(alog, file = "../figures/pm_case-event_pre-corona.pdf") | ||||||
| 
 | 
 | ||||||
| alog <- bupaR::activitylog(datlogs[which(datlogs$corona == "post"),], | alog <- activitylog(datlogs[which(datlogs$corona == "post"),], | ||||||
|                     case_id = "case", |                     case_id = "case", | ||||||
|                     activity_id = "event", |                     activity_id = "event", | ||||||
|                     resource_id = "path", |                     resource_id = "path", | ||||||
|                     timestamps = c("start", "complete")) |                     timestamps = c("start", "complete")) | ||||||
| 
 | 
 | ||||||
| processmapR::process_map(alog, processmapR::frequency("relative")) | map_as_pdf(alog, file = "../figures/pm_case-event_post-corona.pdf") | ||||||
| 
 | 
 | ||||||
| # Are the same artworks looked at? | # Are the same artworks looked at? | ||||||
| pdf("results/figures/bp_corona.pdf", height = 3.375, width = 12, pointsize = 10) | pdf("../figures/bp_corona.pdf", height = 3.375, width = 12, pointsize = 10) | ||||||
| par(mai = c(.5,.6,.1,.1), mgp = c(2.4, 1, 0)) | par(mai = c(.5,.6,.1,.1), mgp = c(2.4, 1, 0)) | ||||||
| 
 | 
 | ||||||
| barplot(proportions(xtabs( ~ corona + artwork, datlogs), margin = "corona"), | barplot(proportions(xtabs( ~ corona + artwork, datlogs), margin = "corona"), | ||||||
| @ -605,13 +369,13 @@ dev.off() | |||||||
| # Order in which artworks are looked at | # Order in which artworks are looked at | ||||||
| 
 | 
 | ||||||
| nart <- 5     # select 5 artworks randomly | nart <- 5     # select 5 artworks randomly | ||||||
| alog <- bupaR::activitylog(datlogs,#[datlogs$artwork %in% sample(unique(datlogs$artwork), nart), ], | alog <- activitylog(datlogs,#[datlogs$artwork %in% sample(unique(datlogs$artwork), nart), ], | ||||||
|                     case_id = "case", |                     case_id = "case", | ||||||
|                     activity_id = "artwork", |                     activity_id = "artwork", | ||||||
|                     resource_id = "path", |                     resource_id = "path", | ||||||
|                     timestamps = c("start", "complete")) |                     timestamps = c("start", "complete")) | ||||||
| 
 | 
 | ||||||
| #map <- processmapR::process_map(alog, processmapR::frequency("relative")) | #map <- process_map(alog, frequency("relative")) | ||||||
| 
 | 
 | ||||||
| ## select cases with Vermeer | ## select cases with Vermeer | ||||||
| length(unique(datlogs[datlogs$artwork == "080", "case"])) | length(unique(datlogs[datlogs$artwork == "080", "case"])) | ||||||
| @ -628,16 +392,16 @@ which(table(tmp$artwork) > 14000) | |||||||
| 
 | 
 | ||||||
| often080 <- names(which(table(tmp$artwork) > 14000)) | often080 <- names(which(table(tmp$artwork) > 14000)) | ||||||
| 
 | 
 | ||||||
| alog <- bupaR::activitylog(datlogs[datlogs$artwork %in% often080, ], | alog <- activitylog(datlogs[datlogs$artwork %in% often080, ], | ||||||
|                     case_id = "case", |                     case_id = "case", | ||||||
|                     activity_id = "artwork", |                     activity_id = "artwork", | ||||||
|                     resource_id = "path", |                     resource_id = "path", | ||||||
|                     timestamps = c("start", "complete")) |                     timestamps = c("start", "complete")) | ||||||
| 
 | 
 | ||||||
| processmapR::process_map(alog, processmapR::frequency("relative")) | map_as_pdf(alog, file = "../figures/pm_case-artwork_often080.pdf") | ||||||
| 
 | 
 | ||||||
| 
 | 
 | ||||||
| pdf("results/figures/traceexplore_case-artwork_often080.pdf", height = 8, width = 12, pointsize = 10) | pdf("../figures/traceexplore_case-artwork_often080.pdf", height = 8, width = 12, pointsize = 10) | ||||||
| 
 | 
 | ||||||
| processmapR::trace_explorer(alog, | processmapR::trace_explorer(alog, | ||||||
|                             n_traces = 30, type = "frequent", |                             n_traces = 30, type = "frequent", | ||||||
| @ -650,35 +414,40 @@ dev.off() | |||||||
| # Are there certain topics that people are interested in more than others? | # Are there certain topics that people are interested in more than others? | ||||||
| # Do these topic distributions differ for comparable artworks? | # Do these topic distributions differ for comparable artworks? | ||||||
| 
 | 
 | ||||||
| alog <- bupaR::activitylog(datlogs[which(datlogs$event == "openTopic"),], | alog <- activitylog(datlogs[which(datlogs$event == "openTopic"),], | ||||||
|                     case_id = "case", |                     case_id = "case", | ||||||
|                     activity_id = "topic", |                     activity_id = "topic", | ||||||
|                     resource_id = "path", |                     resource_id = "path", | ||||||
|                     timestamps = c("start", "complete")) |                     timestamps = c("start", "complete")) | ||||||
| 
 | 
 | ||||||
| processmapR::process_map(alog, processmapR::frequency("relative")) | map_as_pdf(alog, file = "../figures/pm_case-topic.pdf") | ||||||
| 
 | 
 | ||||||
| # Order of topics for Vermeer | # Order of topics for Vermeer | ||||||
| # alog080 <- bupaR::activitylog(datlogs[datlogs$artwork == "080",], | # alog080 <- activitylog(datlogs[datlogs$artwork == "080",], | ||||||
| #                     case_id = "case", | #                     case_id = "case", | ||||||
| #                     activity_id = "topic", | #                     activity_id = "topic", | ||||||
| #                     resource_id = "path", | #                     resource_id = "path", | ||||||
| #                     timestamps = c("start", "complete")) | #                     timestamps = c("start", "complete")) | ||||||
| # | # | ||||||
| # alog080 <- bupaR::activitylog(datlogs[datlogs$artwork == "080",], | # map_as_pdf(alog080, file = "../figures/pm_case-topic_080.pdf") | ||||||
|  | # | ||||||
|  | # | ||||||
|  | # alog080 <- activitylog(datlogs[datlogs$artwork == "080",], | ||||||
| #                        case_id = "case", | #                        case_id = "case", | ||||||
| #                        activity_id = "topicFile", | #                        activity_id = "topicFile", | ||||||
| #                        resource_id = "path", | #                        resource_id = "path", | ||||||
| #                        timestamps = c("start", "complete")) | #                        timestamps = c("start", "complete")) | ||||||
| # | # | ||||||
| # #processmapR::process_map(alog080, processmapR::frequency("relative")) | # #process_map(alog080, frequency("relative")) | ||||||
| # | # | ||||||
| # # Comparable artwork | # # Comparable artwork | ||||||
| # alog083 <- bupaR::activitylog(datlogs[datlogs$artwork == "083",], | # alog083 <- activitylog(datlogs[datlogs$artwork == "083",], | ||||||
| #                        case_id = "case", | #                        case_id = "case", | ||||||
| #                        activity_id = "topic", | #                        activity_id = "topic", | ||||||
| #                        resource_id = "path", | #                        resource_id = "path", | ||||||
| #                        timestamps = c("start", "complete")) | #                        timestamps = c("start", "complete")) | ||||||
|  | # | ||||||
|  | # map_as_pdf(alog083, file = "../figures/pm_case-topic_083.pdf") | ||||||
| 
 | 
 | ||||||
| # artworks that have the same topics than Vermeer | # artworks that have the same topics than Vermeer | ||||||
| which(rowSums(xtabs( ~ artwork + topic, datlogs[datlogs$topic %in% | which(rowSums(xtabs( ~ artwork + topic, datlogs[datlogs$topic %in% | ||||||
| @ -689,13 +458,73 @@ which(rowSums(xtabs( ~ artwork + topic, datlogs[datlogs$topic %in% | |||||||
| 
 | 
 | ||||||
| for (art in c("037", "046", "062", "080", "083", "109")) { | for (art in c("037", "046", "062", "080", "083", "109")) { | ||||||
| 
 | 
 | ||||||
|   alog <- bupaR::activitylog(datlogs[datlogs$event == "openTopic" & datlogs$artwork == art,], |   alog <- activitylog(datlogs[datlogs$event == "openTopic" & datlogs$artwork == art,], | ||||||
|                       case_id = "case", |                       case_id = "case", | ||||||
|                       activity_id = "topic", |                       activity_id = "topic", | ||||||
|                       resource_id = "path", |                       resource_id = "path", | ||||||
|                       timestamps = c("start", "complete")) |                       timestamps = c("start", "complete")) | ||||||
| 
 | 
 | ||||||
|   processmapR::process_map(alog, processmapR::frequency("relative")) |   map_as_pdf(alog, file = paste0("../figures/pm_case-topic_", art, ".pdf")) | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
| 
 | 
 | ||||||
|  | 
 | ||||||
|  | # Angewandte Kunst, Graphik, Gemälde, Kultur | ||||||
|  | 
 | ||||||
|  | 
 | ||||||
|  | c("Kultur", "Kultur", "Graphik", "Gemälde", "Gemälde", "Gemälde", | ||||||
|  |   "Gemälde", "Gemälde", "Graphik", "Gemälde", "Angewandte Kunst", "", | ||||||
|  |   "Gemälde", "Angewandte Kunst", "", "", "Graphik", "Angewandte Kunst", | ||||||
|  |   "Angewandte Kunst", "Gemälde", "Angewandte Kunst", "Gemälde", "", | ||||||
|  |   "Gemälde", "Gemälde", "Gemälde", "Graphik", "Gemälde", "Gemälde", | ||||||
|  |   "Gemälde", "", "Angewandte Kunst", "Angewandte Kunst", "Gemälde", | ||||||
|  |   "Graphik", "Gemälde", "Gemälde", "Gemälde", "Gemälde", | ||||||
|  |   "Angewandte Kunst", "Gemälde", "Gemälde", "Gemälde", "Kultur", "Kultur", | ||||||
|  |   "Gemälde", "Kultur", "", "Gemälde", "", "Graphik", "Kultur", "Gemälde", | ||||||
|  |   "", "Kultur", "Gemälde", "Kultur", "Gemälde", "Gemälde", "Gemälde", | ||||||
|  |   "Kultur", "Kultur", "Kultur", "Kultur", "Kultur", "Kultur", | ||||||
|  |   "Angewandte Kunst", "Info", "Info", "Info", "Kultur", "Kultur") | ||||||
|  | 
 | ||||||
|  | 
 | ||||||
|  | 
 | ||||||
|  | # BURSTS | ||||||
|  | which.max(table(datlogs$date)) | ||||||
|  | tmp <- datlogs[datlogs$date == "2017-02-12", ] | ||||||
|  | 
 | ||||||
|  | # number of traces per case on 2017-02-12 | ||||||
|  | rowSums(xtabs( ~ case + path, tmp) != 0) | ||||||
|  | 
 | ||||||
|  | range(tmp$start) | ||||||
|  | hours <- lubridate::hour(tmp$start) | ||||||
|  | xtabs( ~ case + hours, tmp) | ||||||
|  | 
 | ||||||
|  | # distribution of cases over the day | ||||||
|  | colSums(xtabs( ~ case + hours, tmp) != 0) | ||||||
|  | barplot(colSums(xtabs( ~ case + hours, tmp) != 0)) | ||||||
|  | 
 | ||||||
|  | aggregate(path ~ case + hours, tmp, length) | ||||||
|  | 
 | ||||||
|  | 
 | ||||||
|  | 
 | ||||||
|  | 
 | ||||||
|  | tmp <- aggregate(path ~ case, datlogs, length) | ||||||
|  | tmp$date <- as.Date(datlogs[!duplicated(datlogs$case), "start"]) | ||||||
|  | tmp$time <- lubridate::hour(datlogs[!duplicated(datlogs$case), "start"]) | ||||||
|  | 
 | ||||||
|  | tmp[tmp$path > 200, ] | ||||||
|  | 
 | ||||||
|  | plot(path ~ time, tmp, cex = 2, col = rgb(0,0,0,.3)) | ||||||
|  | 
 | ||||||
|  | lattice::barchart(path ~ time, tmp, horizontal=F) | ||||||
|  | 
 | ||||||
|  | 
 | ||||||
|  | 
 | ||||||
|  | ########################################################################### | ||||||
|  | # HELPER | ||||||
|  | 
 | ||||||
|  | map_as_pdf <- function(alog, file, type = frequency("relative")) { | ||||||
|  |   map <- process_map(alog, type = type) | ||||||
|  |   g <- DiagrammeR::grViz(map$x$diagram) |> DiagrammeRsvg::export_svg() |> charToRaw() | ||||||
|  |   rsvg::rsvg_pdf(g, file) | ||||||
|  | } | ||||||
|  | 
 | ||||||
|  | |||||||
| @ -1,14 +1,18 @@ | |||||||
| # 03_create-petrinet.py | # 03_create-petrinet.py | ||||||
| # | # | ||||||
| # content: (1) Create places and transitions | # content: (1) Create places and transitions | ||||||
| #          (2) Normative net | #          (2) Sequential net | ||||||
|  | #          (3) Concurrent net | ||||||
| # | # | ||||||
| # input:  -- | # input:  -- | ||||||
| # output: results/normative_petrinet.pnml | # output: results/haum/conformative_petrinet_con.pnml | ||||||
| #         results/processmaps/normative_petrinet.png | #         results/processmaps/conformative_petrinet_con.png | ||||||
| #         results/processmaps/normative_bpmn.png | #         results/processmaps/conformative_bpmn_con.png | ||||||
|  | #         results/haum/conformative_petrinet_seq.pnml | ||||||
|  | #         results/processmaps/conformative_petrinet_seq.png | ||||||
|  | #         results/processmaps/conformative_bpmn_seq.png | ||||||
| # | # | ||||||
| # last mod: 2024-03-22 | # last mod: 2024-03-06 | ||||||
| 
 | 
 | ||||||
| import pm4py | import pm4py | ||||||
| from pm4py.objects.petri_net.obj import PetriNet, Marking | from pm4py.objects.petri_net.obj import PetriNet, Marking | ||||||
| @ -58,90 +62,93 @@ t_16 = PetriNet.Transition("t_16") | |||||||
| t_17 = PetriNet.Transition("t_17") | t_17 = PetriNet.Transition("t_17") | ||||||
| t_18 = PetriNet.Transition("t_18") | t_18 = PetriNet.Transition("t_18") | ||||||
| 
 | 
 | ||||||
| #--------------- (2) Normative net --------------- | #--------------- (2) Sequential net --------------- | ||||||
| 
 | 
 | ||||||
| net = PetriNet("new_petri_net") | net_seq = PetriNet("new_petri_net") | ||||||
| 
 | 
 | ||||||
| # Add places | # Add places | ||||||
| net.places.add(source) | net_seq.places.add(source) | ||||||
| net.places.add(sink) | net_seq.places.add(sink) | ||||||
| net.places.add(p_1) | net_seq.places.add(p_1) | ||||||
| net.places.add(p_2) | net_seq.places.add(p_2) | ||||||
| net.places.add(p_3) | net_seq.places.add(p_3) | ||||||
| net.places.add(p_4) | net_seq.places.add(p_4) | ||||||
| net.places.add(p_5) | net_seq.places.add(p_5) | ||||||
| net.places.add(p_6) | net_seq.places.add(p_6) | ||||||
| net.places.add(p_7) | net_seq.places.add(p_7) | ||||||
| net.places.add(p_8) | net_seq.places.add(p_8) | ||||||
| net.places.add(p_9) |  | ||||||
| net.places.add(p_10) |  | ||||||
| net.places.add(p_11) |  | ||||||
| net.places.add(p_12) |  | ||||||
| 
 | 
 | ||||||
| # Add transitions | # Add transitions | ||||||
| net.transitions.add(mv) | net_seq.transitions.add(mv) | ||||||
| net.transitions.add(fc) | net_seq.transitions.add(fc) | ||||||
| net.transitions.add(ot) | net_seq.transitions.add(ot) | ||||||
| net.transitions.add(op) | net_seq.transitions.add(op) | ||||||
| 
 | 
 | ||||||
| # Add hidden transitions | # Add hidden transitions | ||||||
| net.transitions.add(t_1) | net_seq.transitions.add(t_1) | ||||||
| net.transitions.add(t_2) | net_seq.transitions.add(t_2) | ||||||
| net.transitions.add(t_3) | net_seq.transitions.add(t_3) | ||||||
| net.transitions.add(t_4) | net_seq.transitions.add(t_4) | ||||||
| net.transitions.add(t_5) | net_seq.transitions.add(t_5) | ||||||
| net.transitions.add(t_6) | net_seq.transitions.add(t_6) | ||||||
| net.transitions.add(t_7) | net_seq.transitions.add(t_7) | ||||||
| net.transitions.add(t_8) | net_seq.transitions.add(t_8) | ||||||
| net.transitions.add(t_9) | net_seq.transitions.add(t_9) | ||||||
| net.transitions.add(t_10) | net_seq.transitions.add(t_10) | ||||||
| net.transitions.add(t_11) | net_seq.transitions.add(t_11) | ||||||
| net.transitions.add(t_12) | net_seq.transitions.add(t_12) | ||||||
| net.transitions.add(t_13) | net_seq.transitions.add(t_13) | ||||||
| net.transitions.add(t_14) | net_seq.transitions.add(t_14) | ||||||
| net.transitions.add(t_15) | net_seq.transitions.add(t_15) | ||||||
|  | net_seq.transitions.add(t_16) | ||||||
|  | net_seq.transitions.add(t_17) | ||||||
|  | net_seq.transitions.add(t_18) | ||||||
| 
 | 
 | ||||||
| # Add arcs | # Add arcs | ||||||
| petri_utils.add_arc_from_to(source, t_1, net) | petri_utils.add_arc_from_to(source, t_1, net_seq) | ||||||
| petri_utils.add_arc_from_to(t_1, p_1, net) | petri_utils.add_arc_from_to(source, t_2, net_seq) | ||||||
| petri_utils.add_arc_from_to(t_1, p_2, net) | petri_utils.add_arc_from_to(t_1, p_1, net_seq) | ||||||
| petri_utils.add_arc_from_to(p_1, t_2, net) | petri_utils.add_arc_from_to(t_2, p_2, net_seq) | ||||||
| petri_utils.add_arc_from_to(p_1, t_3, net) | petri_utils.add_arc_from_to(p_1, mv, net_seq) | ||||||
| petri_utils.add_arc_from_to(t_3, p_5, net) | petri_utils.add_arc_from_to(p_2, fc, net_seq) | ||||||
| petri_utils.add_arc_from_to(t_2, p_3, net) | petri_utils.add_arc_from_to(mv, p_3, net_seq) | ||||||
| petri_utils.add_arc_from_to(p_3, mv, net) | petri_utils.add_arc_from_to(p_3, t_3, net_seq) | ||||||
| petri_utils.add_arc_from_to(mv, p_4, net) | petri_utils.add_arc_from_to(p_3, t_4, net_seq) | ||||||
| petri_utils.add_arc_from_to(p_4, t_5, net) | petri_utils.add_arc_from_to(p_3, t_5, net_seq) | ||||||
| petri_utils.add_arc_from_to(p_4, t_6, net) | petri_utils.add_arc_from_to(p_3, t_6, net_seq) | ||||||
| petri_utils.add_arc_from_to(t_6, p_3, net) | petri_utils.add_arc_from_to(p_3, t_7, net_seq) | ||||||
| petri_utils.add_arc_from_to(t_5, p_5, net) | petri_utils.add_arc_from_to(t_7, p_1, net_seq) | ||||||
| petri_utils.add_arc_from_to(p_5, t_15, net) | petri_utils.add_arc_from_to(fc, p_4, net_seq) | ||||||
| petri_utils.add_arc_from_to(t_15, sink, net) | petri_utils.add_arc_from_to(p_4, t_8, net_seq) | ||||||
| petri_utils.add_arc_from_to(p_2, fc, net) | petri_utils.add_arc_from_to(p_4, t_9, net_seq) | ||||||
| petri_utils.add_arc_from_to(p_2, t_8, net) | petri_utils.add_arc_from_to(p_4, t_10, net_seq) | ||||||
| petri_utils.add_arc_from_to(t_8, p_12, net) | petri_utils.add_arc_from_to(t_9, p_1, net_seq) | ||||||
| petri_utils.add_arc_from_to(p_12, t_15, net) | petri_utils.add_arc_from_to(t_16, p_5, net_seq) | ||||||
| petri_utils.add_arc_from_to(fc, p_6, net) | petri_utils.add_arc_from_to(t_3, p_2, net_seq) | ||||||
| petri_utils.add_arc_from_to(p_6, t_9, net) | petri_utils.add_arc_from_to(t_5, p_6, net_seq) | ||||||
| petri_utils.add_arc_from_to(t_9, p_12, net) | petri_utils.add_arc_from_to(t_6, p_5, net_seq) | ||||||
| petri_utils.add_arc_from_to(p_6, t_4, net) | petri_utils.add_arc_from_to(p_6, ot, net_seq) | ||||||
| petri_utils.add_arc_from_to(t_4, p_7, net) | petri_utils.add_arc_from_to(p_5, op, net_seq) | ||||||
| petri_utils.add_arc_from_to(p_7, ot, net) | petri_utils.add_arc_from_to(ot, p_8, net_seq) | ||||||
| petri_utils.add_arc_from_to(ot, p_8, net) | petri_utils.add_arc_from_to(op, p_7, net_seq) | ||||||
| petri_utils.add_arc_from_to(p_8, t_10, net) | petri_utils.add_arc_from_to(p_8, t_11, net_seq) | ||||||
| petri_utils.add_arc_from_to(t_10, p_11, net) | petri_utils.add_arc_from_to(p_8, t_12, net_seq) | ||||||
| petri_utils.add_arc_from_to(p_11, t_13, net) | petri_utils.add_arc_from_to(p_8, t_13, net_seq) | ||||||
| petri_utils.add_arc_from_to(t_13, p_12, net) | petri_utils.add_arc_from_to(p_8, t_17, net_seq) | ||||||
| petri_utils.add_arc_from_to(p_8, t_7, net) | petri_utils.add_arc_from_to(t_10, p_6, net_seq) | ||||||
| petri_utils.add_arc_from_to(t_7, p_9, net) | petri_utils.add_arc_from_to(t_17, p_6, net_seq) | ||||||
| petri_utils.add_arc_from_to(p_9, op, net) | petri_utils.add_arc_from_to(p_7, t_14, net_seq) | ||||||
| petri_utils.add_arc_from_to(op, p_10, net) | petri_utils.add_arc_from_to(p_7, t_15, net_seq) | ||||||
| petri_utils.add_arc_from_to(p_10, t_11, net) | petri_utils.add_arc_from_to(p_7, t_16, net_seq) | ||||||
| petri_utils.add_arc_from_to(p_10, t_12, net) | petri_utils.add_arc_from_to(p_7, t_18, net_seq) | ||||||
| petri_utils.add_arc_from_to(t_12, p_9, net) | petri_utils.add_arc_from_to(t_18, p_6, net_seq) | ||||||
| petri_utils.add_arc_from_to(t_11, p_11, net) | petri_utils.add_arc_from_to(t_13, p_5, net_seq) | ||||||
| petri_utils.add_arc_from_to(p_11, t_14, net) | petri_utils.add_arc_from_to(t_15, p_1, net_seq) | ||||||
| petri_utils.add_arc_from_to(t_14, p_7, net) | petri_utils.add_arc_from_to(t_11, p_1, net_seq) | ||||||
|  | petri_utils.add_arc_from_to(t_4, sink, net_seq) | ||||||
|  | petri_utils.add_arc_from_to(t_8, sink, net_seq) | ||||||
|  | petri_utils.add_arc_from_to(t_12, sink, net_seq) | ||||||
|  | petri_utils.add_arc_from_to(t_14, sink, net_seq) | ||||||
| 
 | 
 | ||||||
| # Add tokens | # Add tokens | ||||||
| initial_marking = Marking() | initial_marking = Marking() | ||||||
| @ -149,14 +156,116 @@ initial_marking[source] = 1 | |||||||
| final_marking = Marking() | final_marking = Marking() | ||||||
| final_marking[sink] = 1 | final_marking[sink] = 1 | ||||||
| 
 | 
 | ||||||
| pm4py.view_petri_net(net, initial_marking, final_marking) | pm4py.view_petri_net(net_seq, initial_marking, final_marking) | ||||||
| pm4py.write_pnml(net, initial_marking, final_marking, | pm4py.write_pnml(net_seq, initial_marking, final_marking, "results/haum/conformative_petrinet_seq.pnml") | ||||||
|         "results/normative_petrinet.pnml") |  | ||||||
| 
 | 
 | ||||||
| pm4py.vis.save_vis_petri_net(net, initial_marking, final_marking, | pm4py.vis.save_vis_petri_net(net_seq, initial_marking, final_marking, | ||||||
|         "results/processmaps/normative_petrinet.png") |         "results/processmaps/conformative_petrinet_seq.png") | ||||||
| 
 | 
 | ||||||
| bpmn = pm4py.convert.convert_to_bpmn(net, initial_marking, final_marking) | bpmn = pm4py.convert.convert_to_bpmn(net_seq, initial_marking, final_marking) | ||||||
| pm4py.view_bpmn(bpmn) | pm4py.view_bpmn(bpmn) | ||||||
| 
 | 
 | ||||||
| pm4py.vis.save_vis_bpmn(bpmn, "results/processmaps/normative_bpmn.png") | pm4py.vis.save_vis_bpmn(bpmn, "results/processmaps/conformative_bpmn_seq.png") | ||||||
|  | 
 | ||||||
|  | 
 | ||||||
|  | #--------------- (3) Concurrent net --------------- | ||||||
|  | 
 | ||||||
|  | net_con = PetriNet("new_petri_net") | ||||||
|  | 
 | ||||||
|  | # Add places | ||||||
|  | net_con.places.add(source) | ||||||
|  | net_con.places.add(sink) | ||||||
|  | net_con.places.add(p_1) | ||||||
|  | net_con.places.add(p_2) | ||||||
|  | net_con.places.add(p_3) | ||||||
|  | net_con.places.add(p_4) | ||||||
|  | net_con.places.add(p_5) | ||||||
|  | net_con.places.add(p_6) | ||||||
|  | net_con.places.add(p_7) | ||||||
|  | net_con.places.add(p_8) | ||||||
|  | net_con.places.add(p_9) | ||||||
|  | net_con.places.add(p_10) | ||||||
|  | net_con.places.add(p_11) | ||||||
|  | net_con.places.add(p_12) | ||||||
|  | 
 | ||||||
|  | # Add transitions | ||||||
|  | net_con.transitions.add(mv) | ||||||
|  | net_con.transitions.add(fc) | ||||||
|  | net_con.transitions.add(ot) | ||||||
|  | net_con.transitions.add(op) | ||||||
|  | 
 | ||||||
|  | # Add hidden transitions | ||||||
|  | net_con.transitions.add(t_1) | ||||||
|  | net_con.transitions.add(t_2) | ||||||
|  | net_con.transitions.add(t_3) | ||||||
|  | net_con.transitions.add(t_4) | ||||||
|  | net_con.transitions.add(t_5) | ||||||
|  | net_con.transitions.add(t_6) | ||||||
|  | net_con.transitions.add(t_7) | ||||||
|  | net_con.transitions.add(t_8) | ||||||
|  | net_con.transitions.add(t_9) | ||||||
|  | net_con.transitions.add(t_10) | ||||||
|  | net_con.transitions.add(t_11) | ||||||
|  | net_con.transitions.add(t_12) | ||||||
|  | net_con.transitions.add(t_13) | ||||||
|  | net_con.transitions.add(t_14) | ||||||
|  | net_con.transitions.add(t_15) | ||||||
|  | 
 | ||||||
|  | # Add arcs | ||||||
|  | petri_utils.add_arc_from_to(source, t_1, net_con) | ||||||
|  | petri_utils.add_arc_from_to(t_1, p_1, net_con) | ||||||
|  | petri_utils.add_arc_from_to(t_1, p_2, net_con) | ||||||
|  | petri_utils.add_arc_from_to(p_1, t_2, net_con) | ||||||
|  | petri_utils.add_arc_from_to(p_1, t_3, net_con) | ||||||
|  | petri_utils.add_arc_from_to(t_3, p_5, net_con) | ||||||
|  | petri_utils.add_arc_from_to(t_2, p_3, net_con) | ||||||
|  | petri_utils.add_arc_from_to(p_3, mv, net_con) | ||||||
|  | petri_utils.add_arc_from_to(mv, p_4, net_con) | ||||||
|  | petri_utils.add_arc_from_to(p_4, t_5, net_con) | ||||||
|  | petri_utils.add_arc_from_to(p_4, t_6, net_con) | ||||||
|  | petri_utils.add_arc_from_to(t_6, p_3, net_con) | ||||||
|  | petri_utils.add_arc_from_to(t_5, p_5, net_con) | ||||||
|  | petri_utils.add_arc_from_to(p_5, t_15, net_con) | ||||||
|  | petri_utils.add_arc_from_to(t_15, sink, net_con) | ||||||
|  | petri_utils.add_arc_from_to(p_2, fc, net_con) | ||||||
|  | petri_utils.add_arc_from_to(p_2, t_8, net_con) | ||||||
|  | petri_utils.add_arc_from_to(t_8, p_12, net_con) | ||||||
|  | petri_utils.add_arc_from_to(p_12, t_15, net_con) | ||||||
|  | petri_utils.add_arc_from_to(fc, p_6, net_con) | ||||||
|  | petri_utils.add_arc_from_to(p_6, t_9, net_con) | ||||||
|  | petri_utils.add_arc_from_to(t_9, p_12, net_con) | ||||||
|  | petri_utils.add_arc_from_to(p_6, t_4, net_con) | ||||||
|  | petri_utils.add_arc_from_to(t_4, p_7, net_con) | ||||||
|  | petri_utils.add_arc_from_to(p_7, ot, net_con) | ||||||
|  | petri_utils.add_arc_from_to(ot, p_8, net_con) | ||||||
|  | petri_utils.add_arc_from_to(p_8, t_10, net_con) | ||||||
|  | petri_utils.add_arc_from_to(t_10, p_11, net_con) | ||||||
|  | petri_utils.add_arc_from_to(p_11, t_13, net_con) | ||||||
|  | petri_utils.add_arc_from_to(t_13, p_12, net_con) | ||||||
|  | petri_utils.add_arc_from_to(p_8, t_7, net_con) | ||||||
|  | petri_utils.add_arc_from_to(t_7, p_9, net_con) | ||||||
|  | petri_utils.add_arc_from_to(p_9, op, net_con) | ||||||
|  | petri_utils.add_arc_from_to(op, p_10, net_con) | ||||||
|  | petri_utils.add_arc_from_to(p_10, t_11, net_con) | ||||||
|  | petri_utils.add_arc_from_to(p_10, t_12, net_con) | ||||||
|  | petri_utils.add_arc_from_to(t_12, p_9, net_con) | ||||||
|  | petri_utils.add_arc_from_to(t_11, p_11, net_con) | ||||||
|  | petri_utils.add_arc_from_to(p_11, t_14, net_con) | ||||||
|  | petri_utils.add_arc_from_to(t_14, p_7, net_con) | ||||||
|  | 
 | ||||||
|  | # Add tokens | ||||||
|  | initial_marking = Marking() | ||||||
|  | initial_marking[source] = 1 | ||||||
|  | final_marking = Marking() | ||||||
|  | final_marking[sink] = 1 | ||||||
|  | 
 | ||||||
|  | pm4py.view_petri_net(net_con, initial_marking, final_marking) | ||||||
|  | pm4py.write_pnml(net_con, initial_marking, final_marking, "results/haum/conformative_petrinet_con.pnml") | ||||||
|  | 
 | ||||||
|  | pm4py.vis.save_vis_petri_net(net_con, initial_marking, final_marking, | ||||||
|  |         "results/processmaps/conformative_petrinet_con.png") | ||||||
|  | 
 | ||||||
|  | bpmn = pm4py.convert.convert_to_bpmn(net_con, initial_marking, final_marking) | ||||||
|  | pm4py.view_bpmn(bpmn) | ||||||
|  | 
 | ||||||
|  | pm4py.vis.save_vis_bpmn(bpmn, "results/processmaps/conformative_bpmn_con.png") | ||||||
|  | |||||||
| @ -1,24 +1,25 @@ | |||||||
| # 04_conformance-checking.py | # 04_conformance-checking.py | ||||||
| # | # | ||||||
| # content: (1) Load data and create event log | # content: (1) Load data and create event log | ||||||
| #          (2) Check against normative Petri Net | #          (2) Infos for items | ||||||
| # | # | ||||||
| # input:  results/event_logfiles_2024-02-21_16-07-33.csv | # input:  results/haum/event_logfiles_2024-02-21_16-07-33.csv | ||||||
| #         results/normative_petrinet.pnml | #         results/haum/conformative_petrinet_con.pnml | ||||||
| # output: results/eval_all-miners_complete.csv | # output: results/processmaps/dfg_complete_python.png | ||||||
|  | #         results/eval_all-miners_complete.csv | ||||||
| #         results/eval_all-miners_clean.csv | #         results/eval_all-miners_clean.csv | ||||||
| #         ../../thesis/figures/petrinet_normative.png | #         results/processmaps/petrinet_conformative.png | ||||||
| #         ../../thesis/figures/petrinet_heuristics_clean.png | #         results/processmaps/petrinet_heuristics_clean.png | ||||||
| #         ../../thesis/figures/petrinet_alpha_clean.png | #         results/processmaps/petrinet_alpha_clean.png | ||||||
| #         ../../thesis/figures/petrinet_inductive_clean.png | #         results/processmaps/petrinet_inductive_clean.png | ||||||
| #         ../../thesis/figures/petrinet_ilp_clean.png | #         results/processmaps/petrinet_ilp_clean.png | ||||||
| #         ../../thesis/figures/bpmn_normative.png | #         results/processmaps/bpmn_conformative.png | ||||||
| #         ../../thesis/figures/bpmn_inductive_clean.png | #         results/processmaps/bpmn_inductive_clean.png | ||||||
| #         ../../thesis/figures/bpmn_ilp_clean.png | #         results/processmaps/bpmn_ilp_clean.png | ||||||
| #         ../../thesis/figures/bpmn_alpha_clean.png | #         results/processmaps/bpmn_alpha_clean.png | ||||||
| #         ../../thesis/figures/bpmn_heuristics_clean.png | #         results/processmaps/bpmn_heuristics_clean.png | ||||||
| # | # | ||||||
| # last mod: 2024-03-22 | # last mod: 2024-03-06 | ||||||
| 
 | 
 | ||||||
| import pm4py | import pm4py | ||||||
| import pandas as pd | import pandas as pd | ||||||
| @ -28,13 +29,13 @@ from python_helpers import eval_pm, pn_infos_miner | |||||||
| 
 | 
 | ||||||
| #--------------- (1) Load data and create event logs --------------- | #--------------- (1) Load data and create event logs --------------- | ||||||
| 
 | 
 | ||||||
| dat = pd.read_csv("results/event_logfiles_2024-02-21_16-07-33.csv", sep = ";") | dat = pd.read_csv("results/haum/event_logfiles_2024-02-21_16-07-33.csv", sep = ";") | ||||||
| 
 | 
 | ||||||
| event_log = pm4py.format_dataframe(dat, case_id = "path", | event_log = pm4py.format_dataframe(dat, case_id = "path", | ||||||
|                                    activity_key = "event", |                                    activity_key = "event", | ||||||
|                                    timestamp_key = "date.start") |                                    timestamp_key = "date.start") | ||||||
| 
 | 
 | ||||||
| ## Descriptives of log data | ###### Descriptives of log data ###### | ||||||
| 
 | 
 | ||||||
| # Distribution of events | # Distribution of events | ||||||
| event_log.event.value_counts() | event_log.event.value_counts() | ||||||
| @ -56,9 +57,9 @@ len(variants_no_move) | |||||||
| sorted_variants_no_move = dict(sorted(variants_no_move.items(), key=lambda item: item[1], reverse = True)) | sorted_variants_no_move = dict(sorted(variants_no_move.items(), key=lambda item: item[1], reverse = True)) | ||||||
| {k: sorted_variants_no_move[k] for k in list(sorted_variants_no_move)[:20]} | {k: sorted_variants_no_move[k] for k in list(sorted_variants_no_move)[:20]} | ||||||
| 
 | 
 | ||||||
| #--------------- (2) Check against normative Petri Net --------------- | ###### Check against "conformative" Petri Net ###### | ||||||
| 
 | 
 | ||||||
| basenet, initial_marking, final_marking = pm4py.read_pnml("results/normative_petrinet.pnml") | basenet, initial_marking, final_marking = pm4py.read_pnml("results/haum/conformative_petrinet_con.pnml") | ||||||
| 
 | 
 | ||||||
| # TBR | # TBR | ||||||
| replayed_traces = pm4py.conformance_diagnostics_token_based_replay(event_log, basenet, initial_marking, final_marking) | replayed_traces = pm4py.conformance_diagnostics_token_based_replay(event_log, basenet, initial_marking, final_marking) | ||||||
| @ -92,13 +93,23 @@ event_log[event_log["@@case_index"] == index_broken[0]].item.unique().tolist() | |||||||
| event_log[event_log["@@case_index"] == index_broken[0]]["fileId.start"].unique().tolist() | event_log[event_log["@@case_index"] == index_broken[0]]["fileId.start"].unique().tolist() | ||||||
| # --> logging error in raw file | # --> logging error in raw file | ||||||
| 
 | 
 | ||||||
|  | ## Footprints                       | ||||||
|  | from pm4py.algo.discovery.footprints import algorithm as footprints_discovery | ||||||
|  | from pm4py.visualization.footprints import visualizer as fp_visualizer | ||||||
|  | fp_log = footprints_discovery.apply(event_log, variant=footprints_discovery.Variants.ENTIRE_EVENT_LOG) | ||||||
|  | fp_net = footprints_discovery.apply(basenet, initial_marking, final_marking) | ||||||
|  | gviz = fp_visualizer.apply(fp_net, parameters={fp_visualizer.Variants.SINGLE.value.Parameters.FORMAT: "svg"}) | ||||||
|  | fp_visualizer.view(gviz) | ||||||
|  | 
 | ||||||
|  | efg_graph = pm4py.discover_eventually_follows_graph(event_log) | ||||||
|  | 
 | ||||||
| ## Fitting different miners | ## Fitting different miners | ||||||
| 
 | 
 | ||||||
| eval = pd.DataFrame(columns = ["fitness", "precision", "generalizability", | eval = pd.DataFrame(columns = ["fitness", "precision", "generalizability", | ||||||
|                                "simplicity", "sound", "narcs", "ntrans", |                                "simplicity", "sound", "narcs", "ntrans", | ||||||
|                                "nplaces", "nvariants", "mostfreq"]) |                                "nplaces", "nvariants", "mostfreq"]) | ||||||
| 
 | 
 | ||||||
| for miner in ["normative", "alpha", "heuristics", "inductive", "ilp"]: | for miner in ["conformative", "alpha", "heuristics", "inductive", "ilp"]: | ||||||
|     eval = pd.concat([eval, pn_infos_miner(event_log, miner)]) |     eval = pd.concat([eval, pn_infos_miner(event_log, miner)]) | ||||||
| 
 | 
 | ||||||
| eval.to_csv("results/eval_all-miners_complete.csv", sep = ";") | eval.to_csv("results/eval_all-miners_complete.csv", sep = ";") | ||||||
| @ -110,7 +121,7 @@ eval_clean = pd.DataFrame(columns = ["fitness", "precision", "generalizability", | |||||||
|                                      "simplicity", "sound", "narcs", "ntrans", |                                      "simplicity", "sound", "narcs", "ntrans", | ||||||
|                                      "nplaces", "nvariants", "mostfreq"]) |                                      "nplaces", "nvariants", "mostfreq"]) | ||||||
| 
 | 
 | ||||||
| for miner in ["normative", "alpha", "heuristics", "inductive", "ilp"]: | for miner in ["conformative", "alpha", "heuristics", "inductive", "ilp"]: | ||||||
|     eval_clean = pd.concat([eval_clean, pn_infos_miner(event_log_clean, miner)]) |     eval_clean = pd.concat([eval_clean, pn_infos_miner(event_log_clean, miner)]) | ||||||
| 
 | 
 | ||||||
| eval_clean.to_csv("results/eval_all-miners_clean.csv", sep = ";") | eval_clean.to_csv("results/eval_all-miners_clean.csv", sep = ";") | ||||||
| @ -118,27 +129,28 @@ eval_clean.to_csv("results/eval_all-miners_clean.csv", sep = ";") | |||||||
| ## Directly-follows graph | ## Directly-follows graph | ||||||
| dfg, start_activities, end_activities = pm4py.discover_dfg(event_log_clean) | dfg, start_activities, end_activities = pm4py.discover_dfg(event_log_clean) | ||||||
| pm4py.view_dfg(dfg, start_activities, end_activities) | pm4py.view_dfg(dfg, start_activities, end_activities) | ||||||
|  | pm4py.save_vis_dfg(dfg, start_activities, end_activities, "results/processmaps/dfg_complete_python.png") | ||||||
| 
 | 
 | ||||||
| ## Export petri nets | ## Export petri nets | ||||||
| pm4py.vis.save_vis_petri_net(basenet, initial_marking, final_marking, | pm4py.vis.save_vis_petri_net(basenet, initial_marking, final_marking, "results/processmaps/petrinet_conformative.png") | ||||||
|         "../../thesis/figures/petrinet_normative.png") |  | ||||||
| h_net, h_im, h_fm = pm4py.discover_petri_net_heuristics(event_log_clean) | h_net, h_im, h_fm = pm4py.discover_petri_net_heuristics(event_log_clean) | ||||||
| pm4py.vis.save_vis_petri_net(h_net, h_im, h_fm, "../../thesis/figures/petrinet_heuristics_clean.png") | pm4py.vis.save_vis_petri_net(h_net, h_im, h_fm, "results/processmaps/petrinet_heuristics_clean.png") | ||||||
| a_net, a_im, a_fm = pm4py.discover_petri_net_alpha(event_log_clean) | a_net, a_im, a_fm = pm4py.discover_petri_net_alpha(event_log_clean) | ||||||
| pm4py.vis.save_vis_petri_net(a_net, a_im, a_fm, "../../thesis/figures/petrinet_alpha_clean.png") | pm4py.vis.save_vis_petri_net(a_net, a_im, a_fm, "results/processmaps/petrinet_alpha_clean.png") | ||||||
| i_net, i_im, i_fm = pm4py.discover_petri_net_inductive(event_log_clean) | i_net, i_im, i_fm = pm4py.discover_petri_net_inductive(event_log_clean) | ||||||
| pm4py.vis.save_vis_petri_net(i_net, i_im, i_fm, "../../thesis/figures/petrinet_inductive_clean.png") | pm4py.vis.save_vis_petri_net(i_net, i_im, i_fm, "results/processmaps/petrinet_inductive_clean.png") | ||||||
| ilp_net, ilp_im, ilp_fm = pm4py.discover_petri_net_ilp(event_log_clean) | ilp_net, ilp_im, ilp_fm = pm4py.discover_petri_net_ilp(event_log_clean) | ||||||
| pm4py.vis.save_vis_petri_net(ilp_net, ilp_im, ilp_fm, "../../thesis/figures/petrinet_ilp_clean.png") | pm4py.vis.save_vis_petri_net(ilp_net, ilp_im, ilp_fm, "results/processmaps/petrinet_ilp_clean.png") | ||||||
| 
 | 
 | ||||||
| # convert to BPMN | # convert to BPMN | ||||||
| base_bpmn = pm4py.convert.convert_to_bpmn(basenet, initial_marking, final_marking) | base_bpmn = pm4py.convert.convert_to_bpmn(basenet, initial_marking, final_marking) | ||||||
| pm4py.vis.save_vis_bpmn(base_bpmn, "../../thesis/figures/bpmn_normative.png") | pm4py.vis.save_vis_bpmn(base_bpmn, "results/processmaps/bpmn_conformative.png") | ||||||
| i_bpmn = pm4py.convert.convert_to_bpmn(i_net, i_im, i_fm) | i_bpmn = pm4py.convert.convert_to_bpmn(i_net, i_im, i_fm) | ||||||
| pm4py.vis.save_vis_bpmn(i_bpmn, "../../thesis/figures/bpmn_inductive_clean.png") | pm4py.vis.save_vis_bpmn(i_bpmn, "results/processmaps/bpmn_inductive_clean.png") | ||||||
| ilp_bpmn = pm4py.convert.convert_to_bpmn(ilp_net, ilp_im, ilp_fm) | ilp_bpmn = pm4py.convert.convert_to_bpmn(ilp_net, ilp_im, ilp_fm) | ||||||
| pm4py.vis.save_vis_bpmn(ilp_bpmn, "../../thesis/figures/bpmn_ilp_clean.png") | pm4py.vis.save_vis_bpmn(ilp_bpmn, "results/processmaps/bpmn_ilp_clean.png") | ||||||
| a_bpmn = pm4py.convert.convert_to_bpmn(a_net, a_im, a_fm) | a_bpmn = pm4py.convert.convert_to_bpmn(a_net, a_im, a_fm) | ||||||
| pm4py.vis.save_vis_bpmn(a_bpmn, "../../thesis/figures/bpmn_alpha_clean.png") | pm4py.vis.save_vis_bpmn(a_bpmn, "results/processmaps/bpmn_alpha_clean.png") | ||||||
| h_bpmn = pm4py.convert.convert_to_bpmn(h_net, h_im, h_fm) | h_bpmn = pm4py.convert.convert_to_bpmn(h_net, h_im, h_fm) | ||||||
| pm4py.vis.save_vis_bpmn(h_bpmn, "../../thesis/figures/bpmn_heuristics_clean.png") | pm4py.vis.save_vis_bpmn(h_bpmn, "results/processmaps/bpmn_heuristics_clean.png") | ||||||
|  | 
 | ||||||
|  | |||||||
| @ -5,23 +5,22 @@ | |||||||
| #          (3) DFG for complete data | #          (3) DFG for complete data | ||||||
| #          (4) Export data frame for analyses | #          (4) Export data frame for analyses | ||||||
| # | # | ||||||
| # input:  results/event_logfiles_2024-02-21_16-07-33.csv | # input:  results/haum/event_logfiles_2024-02-21_16-07-33.csv | ||||||
| #         results/raw_logfiles_2024-02-21_16-07-33.csv | #         results/haum/raw_logfiles_2024-02-21_16-07-33.csv | ||||||
| # output: results/eventlogs_pre-corona_cleaned.RData | # output: results/haum/eventlogs_pre-corona_cleaned.RData | ||||||
| #         results/eventlogs_pre-corona_cleaned.csv | #         results/haum/eventlogs_pre-corona_cleaned.csv | ||||||
| #         ../../thesis/figures/dfg_complete_WFnet_R.pdf |  | ||||||
| # | # | ||||||
| # last mod: 2024-03-23 | # last mod: 2024-03-06 | ||||||
| 
 | 
 | ||||||
| # setwd("C:/Users/nwickelmaier/Nextcloud/Documents/MDS/2023ss/60100_master_thesis/analysis/code") | # setwd("C:/Users/nwickelmaier/Nextcloud/Documents/MDS/2023ss/60100_master_thesis/analysis/code") | ||||||
| 
 | 
 | ||||||
| #--------------- (1) Look at broken trace --------------- | #--------------- (1) Look at broken trace --------------- | ||||||
| 
 | 
 | ||||||
| datraw <- read.table("results/raw_logfiles_2024-02-21_16-07-33.csv", | datraw <- read.table("results/haum/raw_logfiles_2024-02-21_16-07-33.csv", | ||||||
|                      header = TRUE, sep = ";") |                    header = TRUE, sep = ";") | ||||||
| 
 | 
 | ||||||
| 
 | 
 | ||||||
| datlogs <- read.table("results/event_logfiles_2024-02-21_16-07-33.csv", | datlogs <- read.table("results/haum/event_logfiles_2024-02-21_16-07-33.csv", | ||||||
|                       colClasses = c("character", "character", "POSIXct", |                       colClasses = c("character", "character", "POSIXct", | ||||||
|                                      "POSIXct", "character", "integer", |                                      "POSIXct", "character", "integer", | ||||||
|                                      "numeric", "character", "character", |                                      "numeric", "character", "character", | ||||||
| @ -85,7 +84,7 @@ dfg <- processmapR::process_map(alog, | |||||||
|   render     = FALSE) |   render     = FALSE) | ||||||
| 
 | 
 | ||||||
| processmapR::export_map(dfg, | processmapR::export_map(dfg, | ||||||
|   file_name = paste0("../../thesis/figures/dfg_complete_WFnet_R.pdf"), |   file_name = paste0("results/processmaps/dfg_complete_R.pdf"), | ||||||
|   file_type = "pdf") |   file_type = "pdf") | ||||||
| 
 | 
 | ||||||
| rm(tmp) | rm(tmp) | ||||||
| @ -110,10 +109,10 @@ dat <- datlogs[as.Date(datlogs$date.start) < "2020-03-13", ] | |||||||
| # Remove corrupt trace | # Remove corrupt trace | ||||||
| dat <- dat[dat$path != 106098, ] | dat <- dat[dat$path != 106098, ] | ||||||
| 
 | 
 | ||||||
| save(dat, file = "results/eventlogs_pre-corona_cleaned.RData") | save(dat, file = "results/haum/eventlogs_pre-corona_cleaned.RData") | ||||||
| 
 | 
 | ||||||
| write.table(dat, | write.table(dat, | ||||||
|             file = "results/eventlogs_pre-corona_cleaned.csv", |             file = "results/haum/eventlogs_pre-corona_cleaned.csv", | ||||||
|             sep = ";", |             sep = ";", | ||||||
|             quote = FALSE, |             quote = FALSE, | ||||||
|             row.names = FALSE) |             row.names = FALSE) | ||||||
|  | |||||||
| @ -3,10 +3,10 @@ | |||||||
| # content: (1) Load data and create event log | # content: (1) Load data and create event log | ||||||
| #          (2) Infos for items | #          (2) Infos for items | ||||||
| # | # | ||||||
| # input:  results/eventlogs_pre-corona_cleaned.csv | # input:  results/haum/eventlogs_pre-corona_cleaned.csv | ||||||
| # output: results/pn_infos_items.csv | # output: results/haum/pn_infos_items.csv | ||||||
| # | # | ||||||
| # last mod: 2024-03-22 | # last mod: 2024-03-06 | ||||||
| 
 | 
 | ||||||
| import pm4py | import pm4py | ||||||
| import pandas as pd | import pandas as pd | ||||||
| @ -16,7 +16,7 @@ from python_helpers import eval_pm, pn_infos | |||||||
| 
 | 
 | ||||||
| #--------------- (1) Load data and create event logs --------------- | #--------------- (1) Load data and create event logs --------------- | ||||||
| 
 | 
 | ||||||
| dat = pd.read_csv("results/eventlogs_pre-corona_cleaned", sep = ";") | dat = pd.read_csv("results/haum/eventlogs_pre-corona_cleaned", sep = ";") | ||||||
| 
 | 
 | ||||||
| log_path = pm4py.format_dataframe(dat, case_id = "path", activity_key = "event", | log_path = pm4py.format_dataframe(dat, case_id = "path", activity_key = "event", | ||||||
|                                   timestamp_key = "date.start") |                                   timestamp_key = "date.start") | ||||||
| @ -33,5 +33,5 @@ for item in log_path.item.unique().tolist(): | |||||||
| eval = eval.sort_index() | eval = eval.sort_index() | ||||||
| 
 | 
 | ||||||
| # Export | # Export | ||||||
| eval.to_csv("results/pn_infos_items.csv", sep = ";") | eval.to_csv("results/haum/pn_infos_items.csv", sep = ";") | ||||||
| 
 | 
 | ||||||
|  | |||||||
| @ -7,12 +7,15 @@ | |||||||
| #          (2) Clustering | #          (2) Clustering | ||||||
| #          (3) Visualization with pictures | #          (3) Visualization with pictures | ||||||
| # | # | ||||||
| # input:  results/eventlogs_pre-corona_cleaned.RData | # input:  results/haum/eventlogs_pre-corona_cleaned.RData | ||||||
| #         results/pn_infos_items.csv | #         results/haum/pn_infos_items.csv | ||||||
| # output: results/eventlogs_pre-corona_item-clusters.csv | # output: results/haum/eventlogs_pre-corona_item-clusters.csv | ||||||
| #         ../../thesis/figures/data/clustering_items.RData" | #         results/figures/dendrogram_items.pdf | ||||||
|  | #         results/figures/clustering_items.pdf | ||||||
|  | #         results/figures/clustering_artworks.pdf | ||||||
|  | #         results/figures/clustering_artworks.png | ||||||
| # | # | ||||||
| # last mod: 2024-03-22 | # last mod: 2024-03-08 | ||||||
| 
 | 
 | ||||||
| # setwd("C:/Users/nwickelmaier/Nextcloud/Documents/MDS/2023ss/60100_master_thesis/analysis/code") | # setwd("C:/Users/nwickelmaier/Nextcloud/Documents/MDS/2023ss/60100_master_thesis/analysis/code") | ||||||
| 
 | 
 | ||||||
| @ -22,11 +25,11 @@ source("R_helpers.R") | |||||||
| 
 | 
 | ||||||
| #--------------- (1.1) Read log event data --------------- | #--------------- (1.1) Read log event data --------------- | ||||||
| 
 | 
 | ||||||
| load("results/eventlogs_pre-corona_cleaned.RData") | load("results/haum/eventlogs_pre-corona_cleaned.RData") | ||||||
| 
 | 
 | ||||||
| #--------------- (1.2) Read infos for PM for items --------------- | #--------------- (1.2) Read infos for PM for items --------------- | ||||||
| 
 | 
 | ||||||
| datitem <- read.table("results/pn_infos_items.csv", header = TRUE, | datitem <- read.table("results/haum/pn_infos_items.csv", header = TRUE, | ||||||
|                       sep = ";", row.names = 1) |                       sep = ";", row.names = 1) | ||||||
| 
 | 
 | ||||||
| #--------------- (1.3) Extract additional infos for clustering --------------- | #--------------- (1.3) Extract additional infos for clustering --------------- | ||||||
| @ -93,6 +96,9 @@ mycols <- c("#434F4F", "#78004B", "#FF6900", "#3CB4DC", "#91C86E", "Black") | |||||||
| 
 | 
 | ||||||
| cluster <- cutree(hc, k = k) | cluster <- cutree(hc, k = k) | ||||||
| 
 | 
 | ||||||
|  | pdf("results/figures/dendrogram_items.pdf", width = 6.5, height = 5.5, pointsize = 10) | ||||||
|  | # TODO: Move code for plots to /thesis/ | ||||||
|  | 
 | ||||||
| factoextra::fviz_dend(hc, k = k, | factoextra::fviz_dend(hc, k = k, | ||||||
|                       cex = 0.5, |                       cex = 0.5, | ||||||
|                       k_colors = mycols, |                       k_colors = mycols, | ||||||
| @ -103,6 +109,10 @@ factoextra::fviz_dend(hc, k = k, | |||||||
|                       #ggtheme = ggplot2::theme_bw() |                       #ggtheme = ggplot2::theme_bw() | ||||||
| ) | ) | ||||||
| 
 | 
 | ||||||
|  | dev.off() | ||||||
|  | 
 | ||||||
|  | pdf("results/figures/clustering_items.pdf", width = 6.5, height = 5.5, pointsize = 10) | ||||||
|  | 
 | ||||||
| factoextra::fviz_cluster(list(data = df, cluster = cluster), | factoextra::fviz_cluster(list(data = df, cluster = cluster), | ||||||
|                          palette = mycols, |                          palette = mycols, | ||||||
|                          ellipse.type = "convex", |                          ellipse.type = "convex", | ||||||
| @ -111,6 +121,8 @@ factoextra::fviz_cluster(list(data = df, cluster = cluster), | |||||||
|                          main = "", |                          main = "", | ||||||
|                          ggtheme = ggplot2::theme_bw()) |                          ggtheme = ggplot2::theme_bw()) | ||||||
| 
 | 
 | ||||||
|  | dev.off() | ||||||
|  | 
 | ||||||
| aggregate(cbind(precision, generalizability, nvariants, duration, distance, | aggregate(cbind(precision, generalizability, nvariants, duration, distance, | ||||||
|                 scaleSize , rotationDegree, npaths, ncases, nmoves, |                 scaleSize , rotationDegree, npaths, ncases, nmoves, | ||||||
|                 nflipCard, nopenTopic, nopenPopup) ~ cluster, datitem, |                 nflipCard, nopenTopic, nopenPopup) ~ cluster, datitem, | ||||||
| @ -126,6 +138,18 @@ item <- sprintf("%03d", as.numeric(gsub("item_([0-9]{3})", "\\1", | |||||||
| res <- merge(dat, data.frame(item, cluster), by = "item", all.x = TRUE) | res <- merge(dat, data.frame(item, cluster), by = "item", all.x = TRUE) | ||||||
| res <- res[order(res$fileId.start, res$date.start, res$timeMs.start), ] | res <- res[order(res$fileId.start, res$date.start, res$timeMs.start), ] | ||||||
| 
 | 
 | ||||||
|  | # Look at clusters | ||||||
|  | par(mfrow = c(2,2)) | ||||||
|  | vioplot::vioplot(duration ~ cluster, res) | ||||||
|  | vioplot::vioplot(distance ~ cluster, res) | ||||||
|  | vioplot::vioplot(scaleSize ~ cluster, res) | ||||||
|  | vioplot::vioplot(rotationDegree ~ cluster, res) | ||||||
|  | 
 | ||||||
|  | write.table(res, | ||||||
|  |             file = "results/haum/eventlogs_pre-corona_item-clusters.csv", | ||||||
|  |             sep = ";", | ||||||
|  |             quote = FALSE, | ||||||
|  |             row.names = FALSE) | ||||||
| 
 | 
 | ||||||
| # DFGs for clusters | # DFGs for clusters | ||||||
| res$start <- res$date.start | res$start <- res$date.start | ||||||
| @ -138,31 +162,64 @@ for (clst in sort(unique(res$cluster))) { | |||||||
|     activity_id = "event", |     activity_id = "event", | ||||||
|     resource_id = "item", |     resource_id = "item", | ||||||
|     timestamps  = c("start", "complete")) |     timestamps  = c("start", "complete")) | ||||||
| 
 |    | ||||||
|   processmapR::process_map(alog, |   dfg <- processmapR::process_map(alog, | ||||||
|     type_nodes = processmapR::frequency("relative", color_scale = "Greys"), |     type_nodes = processmapR::frequency("relative", color_scale = "Greys"), | ||||||
|     sec_nodes  = processmapR::frequency("absolute"), |     sec_nodes  = processmapR::frequency("absolute"), | ||||||
|     type_edges = processmapR::frequency("relative", color_edges = "#FF6900"), |     type_edges = processmapR::frequency("relative", color_edges = "#FF6900"), | ||||||
|     sec_edges  = processmapR::frequency("absolute"), |     sec_edges  = processmapR::frequency("absolute"), | ||||||
|     rankdir    = "LR") |     rankdir    = "LR", | ||||||
|  |     render     = FALSE) | ||||||
|  | 
 | ||||||
|  |   processmapR::export_map(dfg, | ||||||
|  |     file_name = paste0("results/processmaps/dfg_cluster", clst, "_R.pdf"), | ||||||
|  |     file_type = "pdf", | ||||||
|  |     title     = paste("DFG Cluster", clst)) | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
|  | #--------------- (3) Visualization with pictures --------------- | ||||||
| 
 | 
 | ||||||
| # Look at clusters | coor_2d <- cmdscale(dist_mat, k = 2) | ||||||
| par(mfrow = c(2,2)) |  | ||||||
| vioplot::vioplot(duration ~ cluster, res) |  | ||||||
| vioplot::vioplot(distance ~ cluster, res) |  | ||||||
| vioplot::vioplot(scaleSize ~ cluster, res) |  | ||||||
| vioplot::vioplot(rotationDegree ~ cluster, res) |  | ||||||
| 
 | 
 | ||||||
| write.table(res, | items <- sprintf("%03d", as.numeric(rownames(datitem))) | ||||||
|             file = "results/eventlogs_pre-corona_item-clusters.csv", |  | ||||||
|             sep = ";", |  | ||||||
|             quote = FALSE, |  | ||||||
|             row.names = FALSE) |  | ||||||
| 
 | 
 | ||||||
| # Save data for plots and tables | pdf("results/figures/clustering_artworks.pdf", height = 8, width = 8, pointsize = 16) | ||||||
|  | #png("results/figures/clustering_artworks.png", units = "in", height = 8, width = 8, pointsize = 16, res = 300) | ||||||
| 
 | 
 | ||||||
| save(hc, k, res, dist_mat, datitem, df, | par(mai = c(.6,.6,.1,.1), mgp = c(2.4, 1, 0)) | ||||||
|      file = "../../thesis/figures/data/clustering_items.RData") | 
 | ||||||
|  | plot(coor_2d, type = "n", ylim = c(-3.7, 2.6), xlim = c(-5, 10.5), | ||||||
|  |      xlab = "", ylab = "") | ||||||
|  | 
 | ||||||
|  | for (item in items) { | ||||||
|  | 
 | ||||||
|  |   if (item == "125") { | ||||||
|  | 
 | ||||||
|  |     pic <- jpeg::readJPEG(paste0("../data/haum/ContentEyevisit/eyevisit_cards_light/", | ||||||
|  |                           item, "/", item, ".jpg")) | ||||||
|  |   } else { | ||||||
|  |     pic <- png::readPNG(paste0("../data/haum/ContentEyevisit/eyevisit_cards_light/", | ||||||
|  |                         item, "/", item, ".png")) | ||||||
|  |   } | ||||||
|  | 
 | ||||||
|  |   img <- as.raster(pic[,,1:3]) | ||||||
|  | 
 | ||||||
|  |   x <- coor_2d[items == item, 1] | ||||||
|  |   y <- coor_2d[items == item, 2] | ||||||
|  | 
 | ||||||
|  |   points(x, y, | ||||||
|  |          col = mycols[cluster[items == item]], | ||||||
|  |          cex = 6, | ||||||
|  |          pch = 15) | ||||||
|  | 
 | ||||||
|  |   rasterImage(img, | ||||||
|  |               xleft = x - .45, | ||||||
|  |               xright = x + .45, | ||||||
|  |               ybottom = y - .2, | ||||||
|  |               ytop = y + .2) | ||||||
|  | 
 | ||||||
|  | } | ||||||
|  | legend("topright", paste("Cluster", 1:k), col = mycols, pch = 15, bty = "n") | ||||||
|  | 
 | ||||||
|  | dev.off() | ||||||
| 
 | 
 | ||||||
|  | |||||||
							
								
								
									
										45
									
								
								code/08_infos-clusters.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										45
									
								
								code/08_infos-clusters.py
									
									
									
									
									
										Normal file
									
								
							| @ -0,0 +1,45 @@ | |||||||
|  | # 08_infos-clusters.py | ||||||
|  | # | ||||||
|  | # content: (1) Load data and create event log | ||||||
|  | #          (2) Infos for clusters | ||||||
|  | #          (3) Process maps for clusters | ||||||
|  | # | ||||||
|  | # input:  results/haum/eventlogs_pre-corona_item-clusters.csv | ||||||
|  | # output: results/haum/pn_infos_clusters.csv | ||||||
|  | # | ||||||
|  | # last mod: 2024-03-06 | ||||||
|  | 
 | ||||||
|  | import pm4py | ||||||
|  | import pandas as pd | ||||||
|  | 
 | ||||||
|  | from python_helpers import eval_pm, pn_infos | ||||||
|  | 
 | ||||||
|  | #--------------- (1) Load data and create event logs --------------- | ||||||
|  | 
 | ||||||
|  | dat = pd.read_csv("results/haum/eventlogs_pre-corona_item-clusters.csv", sep = ";") | ||||||
|  | 
 | ||||||
|  | log_path = pm4py.format_dataframe(dat, case_id = "path", activity_key = "event", | ||||||
|  |                                   timestamp_key = "date.start") | ||||||
|  | 
 | ||||||
|  | #--------------- (2) Infos for clusters --------------- | ||||||
|  | 
 | ||||||
|  | # Merge clusters into data frame | ||||||
|  | eval = pd.DataFrame(columns = ["fitness", "precision", "generalizability", | ||||||
|  |                                "simplicity", "sound", "narcs", "ntrans", | ||||||
|  |                                "nplaces", "nvariants", "mostfreq"]) | ||||||
|  | for cluster in log_path.grp.unique().tolist(): | ||||||
|  |     eval = pd.concat([eval, pn_infos(log_path, "grp", cluster)]) | ||||||
|  | eval = eval.sort_index() | ||||||
|  | 
 | ||||||
|  | eval.to_csv("results/haum/pn_infos_clusters.csv", sep = ";") | ||||||
|  | 
 | ||||||
|  | #--------------- (3) Process maps for clusters --------------- | ||||||
|  | 
 | ||||||
|  | for cluster in log_path.grp.unique().tolist(): | ||||||
|  |     subdata = log_path[log_path.grp == cluster] | ||||||
|  |     subnet, subim, subfm = pm4py.discover_petri_net_inductive(subdata, noise_threshold=0.5) | ||||||
|  |     pm4py.save_vis_petri_net(subnet, subim, subfm, | ||||||
|  |        "results/processmaps/petrinet_cluster" + str(cluster).zfill(3) + ".png") | ||||||
|  |     bpmn = pm4py.convert.convert_to_bpmn(subnet, subim, subfm) | ||||||
|  |     pm4py.vis.save_vis_bpmn(bpmn, "results/processmaps/bpmn_cluster_" + | ||||||
|  |             str(cluster).zfill(3) + ".png") | ||||||
| @ -5,11 +5,10 @@ | |||||||
| #          (3) Select features for navigation behavior | #          (3) Select features for navigation behavior | ||||||
| #          (4) Export data frames | #          (4) Export data frames | ||||||
| # | # | ||||||
| # input:  results/event_logfiles_2024-02-21_16-07-33.csv | # input:  results/haum/event_logfiles_2024-02-21_16-07-33.csv | ||||||
| # output: results/dataframes_case_2019.RData | # output: results/haum/eventlogs_pre-corona_case-clusters.csv | ||||||
| #         results/centrality_cases.RData |  | ||||||
| # | # | ||||||
| # last mod: 2024-03-22 | # last mod: 2024-03-08 | ||||||
| 
 | 
 | ||||||
| # setwd("C:/Users/nwickelmaier/Nextcloud/Documents/MDS/2023ss/60100_master_thesis/analysis/code") | # setwd("C:/Users/nwickelmaier/Nextcloud/Documents/MDS/2023ss/60100_master_thesis/analysis/code") | ||||||
| 
 | 
 | ||||||
| @ -17,7 +16,7 @@ source("R_helpers.R") | |||||||
| 
 | 
 | ||||||
| #--------------- (1) Read data --------------- | #--------------- (1) Read data --------------- | ||||||
| 
 | 
 | ||||||
| load("results/eventlogs_pre-corona_cleaned.RData") | load("results/haum/eventlogs_pre-corona_cleaned.RData") | ||||||
| 
 | 
 | ||||||
| # Select one year to handle number of cases | # Select one year to handle number of cases | ||||||
| dat <- dat[as.Date(dat$date.start) > "2018-12-31" & | dat <- dat[as.Date(dat$date.start) > "2018-12-31" & | ||||||
| @ -136,8 +135,8 @@ dattree <- data.frame(case = datcase$case, | |||||||
| ) | ) | ||||||
| 
 | 
 | ||||||
| # centrality <- pbapply::pbsapply(dattree$case, get_centrality, data = dat) | # centrality <- pbapply::pbsapply(dattree$case, get_centrality, data = dat) | ||||||
| # save(centrality, file = "results/centrality_cases.RData") | # save(centrality, file = "results/haum/tmp_centrality.RData") | ||||||
| load("results/centrality_cases.RData") | load("results/haum/tmp_centrality.RData") | ||||||
| 
 | 
 | ||||||
| dattree$BetweenCentrality <- centrality | dattree$BetweenCentrality <- centrality | ||||||
| 
 | 
 | ||||||
| @ -172,5 +171,17 @@ dattree$AvDurItemNorm <- normalize(dattree$AvDurItem) | |||||||
| 
 | 
 | ||||||
| #--------------- (4) Export data frames --------------- | #--------------- (4) Export data frames --------------- | ||||||
| 
 | 
 | ||||||
| save(dat, datcase, dattree, file = "results/dataframes_case_2019.RData") | save(dat, datcase, dattree, file = "results/haum/dataframes_case_2019.RData") | ||||||
|  | 
 | ||||||
|  | write.table(datcase, | ||||||
|  |             file = "results/haum/datcase.csv", | ||||||
|  |             sep = ";", | ||||||
|  |             quote = FALSE, | ||||||
|  |             row.names = FALSE) | ||||||
|  | 
 | ||||||
|  | write.table(datcase, | ||||||
|  |             file = "results/haum/dattree.csv", | ||||||
|  |             sep = ";", | ||||||
|  |             quote = FALSE, | ||||||
|  |             row.names = FALSE) | ||||||
| 
 | 
 | ||||||
| @ -4,18 +4,18 @@ | |||||||
| #          (2) Clustering | #          (2) Clustering | ||||||
| #          (3) Fit tree | #          (3) Fit tree | ||||||
| # | # | ||||||
| # input:  results/dataframes_case_2019.RData | # input:  results/haum/dataframes_case_2019.RData | ||||||
| # output: results/eventlogs_2019_case-clusters.csv | # output: results/haum/eventlogs_2019_case-clusters.csv | ||||||
| #         results/user-navigation.RData | #         results/haum/tmp_user-navigation.RData | ||||||
| #         ../../thesis/figures/data/clustering_cases.RData | #         ../../thesis/figures/data/clustering_cases.RData | ||||||
| # | # | ||||||
| # last mod: 2024-03-22 | # last mod: 2024-03-15 | ||||||
| 
 | 
 | ||||||
| # setwd("C:/Users/nwickelmaier/Nextcloud/Documents/MDS/2023ss/60100_master_thesis/analysis/code") | # setwd("C:/Users/nwickelmaier/Nextcloud/Documents/MDS/2023ss/60100_master_thesis/analysis/code") | ||||||
| 
 | 
 | ||||||
| #--------------- (1) Load data --------------- | #--------------- (1) Load data --------------- | ||||||
| 
 | 
 | ||||||
| load("results/dataframes_case_2019.RData") | load("results/haum/dataframes_case_2019.RData") | ||||||
| 
 | 
 | ||||||
| #--------------- (2) Clustering --------------- | #--------------- (2) Clustering --------------- | ||||||
| 
 | 
 | ||||||
| @ -119,13 +119,13 @@ aggregate(cbind(duration, distance, scaleSize, rotationDegree) ~ cluster, res, m | |||||||
| aggregate(cbind(duration, distance, scaleSize, rotationDegree) ~ cluster, res, median) | aggregate(cbind(duration, distance, scaleSize, rotationDegree) ~ cluster, res, median) | ||||||
| 
 | 
 | ||||||
| write.table(res, | write.table(res, | ||||||
|             file = "results/eventlogs_2019_case-clusters.csv", |             file = "results/haum/eventlogs_2019_case-clusters.csv", | ||||||
|             sep = ";", |             sep = ";", | ||||||
|             quote = FALSE, |             quote = FALSE, | ||||||
|             row.names = FALSE) |             row.names = FALSE) | ||||||
| 
 | 
 | ||||||
| save(res, dist_mat, hcs, acs, coor_2d, coor_3d, | save(res, dist_mat, hcs, acs, coor_2d, coor_3d, | ||||||
|      file = "results/user-navigation.RData") |      file = "results/haum/tmp_user-navigation.RData") | ||||||
| 
 | 
 | ||||||
| save(coor_2d, coor_3d, cluster, dattree, | save(coor_2d, coor_3d, cluster, dattree, | ||||||
|      file = "../../thesis/figures/data/clustering_cases.RData") |      file = "../../thesis/figures/data/clustering_cases.RData") | ||||||
| @ -6,10 +6,10 @@ | |||||||
| #          (4) Clustering | #          (4) Clustering | ||||||
| #          (5) Fit tree | #          (5) Fit tree | ||||||
| # | # | ||||||
| # input:  results/event_logfiles_2024-02-21_16-07-33.csv | # input:  results/haum/event_logfiles_2024-02-21_16-07-33.csv | ||||||
| # output: -- | # output: results/haum/eventlogs_pre-corona_case-clusters.csv | ||||||
| # | # | ||||||
| # last mod: 2024-03-22 | # last mod: 2024-03-15 | ||||||
| 
 | 
 | ||||||
| # setwd("C:/Users/nwickelmaier/Nextcloud/Documents/MDS/2023ss/60100_master_thesis/analysis/code") | # setwd("C:/Users/nwickelmaier/Nextcloud/Documents/MDS/2023ss/60100_master_thesis/analysis/code") | ||||||
| 
 | 
 | ||||||
| @ -17,7 +17,7 @@ source("R_helpers.R") | |||||||
| 
 | 
 | ||||||
| #--------------- (1) Read data --------------- | #--------------- (1) Read data --------------- | ||||||
| 
 | 
 | ||||||
| load("results/eventlogs_pre-corona_cleaned.RData") | load("results/haum/eventlogs_pre-corona_cleaned.RData") | ||||||
| 
 | 
 | ||||||
| # Select one year to handle number of cases | # Select one year to handle number of cases | ||||||
| dat <- dat[as.Date(dat$date.start) > "2017-12-31" & | dat <- dat[as.Date(dat$date.start) > "2017-12-31" & | ||||||
| @ -77,8 +77,8 @@ dattree18 <- data.frame(case = datcase18$case, | |||||||
| ) | ) | ||||||
| 
 | 
 | ||||||
| # centrality <- pbapply::pbsapply(dattree18$case, get_centrality, data = dat) | # centrality <- pbapply::pbsapply(dattree18$case, get_centrality, data = dat) | ||||||
| # save(centrality, file = "results/centrality_2018.RData") | # save(centrality, file = "results/haum/tmp_centrality_2018.RData") | ||||||
| load("results/centrality_2018.RData") | load("results/haum/tmp_centrality_2018.RData") | ||||||
| 
 | 
 | ||||||
| dattree18$BetweenCentrality <- centrality | dattree18$BetweenCentrality <- centrality | ||||||
| 
 | 
 | ||||||
| @ -1,93 +0,0 @@ | |||||||
| # 13_dfgs-case-clusters.R |  | ||||||
| # |  | ||||||
| # content: (1) Read data |  | ||||||
| #          (2) Export DFGs for clusters |  | ||||||
| # |  | ||||||
| # input:  results/user-navigation.RData |  | ||||||
| # output: ../../thesis/figures/dfg_cases_cluster1_R.pdf |  | ||||||
| #         ../../thesis/figures/dfg_cases_cluster2_R.pdf |  | ||||||
| #         ../../thesis/figures/dfg_cases_cluster3_R.pdf |  | ||||||
| #         ../../thesis/figures/dfg_cases_cluster4_R.pdf |  | ||||||
| #         ../../thesis/figures/dfg_cases_cluster5_R.pdf |  | ||||||
| # |  | ||||||
| # last mod: 2024-03-22 |  | ||||||
| 
 |  | ||||||
| # setwd("C:/Users/nwickelmaier/Nextcloud/Documents/MDS/2023ss/60100_master_thesis/analysis/code") |  | ||||||
| 
 |  | ||||||
| #--------------- (1) Read data --------------- |  | ||||||
| 
 |  | ||||||
| load("results/user-navigation.RData") |  | ||||||
| 
 |  | ||||||
| dat <- res |  | ||||||
| 
 |  | ||||||
| dat$start <- as.POSIXct(dat$date.start) |  | ||||||
| dat$complete <- as.POSIXct(dat$date.stop) |  | ||||||
| 
 |  | ||||||
| alog <- bupaR::activitylog(dat[dat$cluster == cluster, ], |  | ||||||
|                             case_id     = "case", |  | ||||||
|                             activity_id = "item", |  | ||||||
|                             resource_id = "path", |  | ||||||
|                             timestamps  = c("start", "complete")) |  | ||||||
| 
 |  | ||||||
| processmapR::trace_explorer(alog, n_traces = 25) |  | ||||||
| 
 |  | ||||||
| tr <- bupaR::traces(alog) |  | ||||||
| tab <- table(tr$absolute_frequency) |  | ||||||
| 
 |  | ||||||
| tab[1] / nrow(tr) |  | ||||||
| 
 |  | ||||||
| alog |> edeaR::filter_infrequent_flows(min_n = 20) |> processmapR::process_map() |  | ||||||
| 
 |  | ||||||
| #--------------- (2) Export DFGs for clusters --------------- |  | ||||||
| 
 |  | ||||||
| mycols <- c("#3CB4DC", "#FF6900", "#78004B", "#91C86E", "#434F4F") |  | ||||||
| cl_names <- c("Scanning", "Exploring", "Flitting", "Searching", "Info") |  | ||||||
| 
 |  | ||||||
| ns <- c(30, 20, 10, 5, 30) |  | ||||||
| 
 |  | ||||||
| for (i in 1:5) { |  | ||||||
| 
 |  | ||||||
|   alog <- bupaR::activitylog(dat[dat$cluster == i, ], |  | ||||||
|                              case_id     = "case", |  | ||||||
|                              activity_id = "item", |  | ||||||
|                              resource_id = "path", |  | ||||||
|                              timestamps  = c("start", "complete")) |  | ||||||
|    |  | ||||||
|   dfg <- processmapR::process_map(edeaR::filter_infrequent_flows(alog, min_n = ns[i]), |  | ||||||
|     type_nodes = processmapR::frequency("relative", color_scale = "Greys"), |  | ||||||
|     sec_nodes  = processmapR::frequency("absolute"), |  | ||||||
|     type_edges = processmapR::frequency("relative", color_edges = mycols[i]), |  | ||||||
|     sec_edges  = processmapR::frequency("absolute"), |  | ||||||
|     rankdir    = "LR", |  | ||||||
|     render     = FALSE) |  | ||||||
| 
 |  | ||||||
|   processmapR::export_map(dfg, |  | ||||||
|     file_name = paste0("../../thesis/figures/dfg_cases_cluster", i, "_R.pdf"), |  | ||||||
|     file_type = "pdf", |  | ||||||
|     title     = cl_names[i]) |  | ||||||
| } |  | ||||||
| 
 |  | ||||||
| # cluster 1: 50 |  | ||||||
| # cluster 2: 30 o. 20 |  | ||||||
| # cluster 3: 20 - 30 |  | ||||||
| # cluster 4: 5 |  | ||||||
| # cluster 5: 20 |  | ||||||
| 
 |  | ||||||
| get_percent_variants <- function(log, cluster, min_n) { |  | ||||||
| 
 |  | ||||||
|   alog <- bupaR::activitylog(log[log$cluster == cluster, ], |  | ||||||
|                              case_id     = "case", |  | ||||||
|                              activity_id = "item", |  | ||||||
|                              resource_id = "path", |  | ||||||
|                              timestamps  = c("start", "complete")) |  | ||||||
| 
 |  | ||||||
|   nrow(edeaR::filter_infrequent_flows(alog, min_n = min_n)) / |  | ||||||
|     nrow(alog) |  | ||||||
| } |  | ||||||
| 
 |  | ||||||
| perc <- numeric(5) |  | ||||||
| 
 |  | ||||||
| for (i in 1:5) { |  | ||||||
|   perc[i] <- get_percent_variants(log = dat, cluster = i, min_n = ns[i]) |  | ||||||
| } |  | ||||||
| 
 |  | ||||||
| @ -3,11 +3,10 @@ | |||||||
| # content: (1) Read data | # content: (1) Read data | ||||||
| #          (2) Investigate variants | #          (2) Investigate variants | ||||||
| # | # | ||||||
| # input:  results/eventlogs_pre-corona_cleaned.RData | # input:  results/haum/eventlogs_pre-corona_case-clusters.RData | ||||||
| # output: ../../thesis/figures/freq-traces.pdf | # output:  | ||||||
| #         ../../thesis/figures/freq-traces_powerlaw.pdf |  | ||||||
| # | # | ||||||
| # last mod: 2024-03-22 | # last mod: 2024-03-12 | ||||||
| 
 | 
 | ||||||
| # setwd("C:/Users/nwickelmaier/Nextcloud/Documents/MDS/2023ss/60100_master_thesis/analysis/code") | # setwd("C:/Users/nwickelmaier/Nextcloud/Documents/MDS/2023ss/60100_master_thesis/analysis/code") | ||||||
| 
 | 
 | ||||||
| @ -15,7 +14,7 @@ library(bupaverse) | |||||||
| 
 | 
 | ||||||
| #--------------- (1) Read data --------------- | #--------------- (1) Read data --------------- | ||||||
| 
 | 
 | ||||||
| load("results/eventlogs_pre-corona_cleaned.RData") | load("results/haum/eventlogs_pre-corona_cleaned.RData") | ||||||
| 
 | 
 | ||||||
| #--------------- (2) Investigate variants --------------- | #--------------- (2) Investigate variants --------------- | ||||||
| 
 | 
 | ||||||
| @ -28,7 +27,7 @@ alog <- activitylog(dat, | |||||||
|                     resource_id = "path", |                     resource_id = "path", | ||||||
|                     timestamps  = c("start", "complete")) |                     timestamps  = c("start", "complete")) | ||||||
| 
 | 
 | ||||||
| pdf("../../thesis/figures/freq-traces.pdf", height = 7, width = 6, pointsize = 10) | pdf("results/figures/freq-traces.pdf", height = 7, width = 6, pointsize = 10) | ||||||
| trace_explorer(alog, n_traces = 25) | trace_explorer(alog, n_traces = 25) | ||||||
| # --> sequences of artworks are just too rare | # --> sequences of artworks are just too rare | ||||||
| dev.off() | dev.off() | ||||||
| @ -55,7 +54,7 @@ y <- as.numeric(tab) | |||||||
| p1 <- lm(log(y) ~ log(x)) | p1 <- lm(log(y) ~ log(x)) | ||||||
| pre <- exp(coef(p1)[1]) * x^coef(p1)[2] | pre <- exp(coef(p1)[1]) * x^coef(p1)[2] | ||||||
| 
 | 
 | ||||||
| pdf("../../thesis/figures/freq-traces_powerlaw.pdf", height = 3.375, | pdf("results/figures/freq-traces_powerlaw.pdf", height = 3.375, | ||||||
|     width = 3.375, pointsize = 10) |     width = 3.375, pointsize = 10) | ||||||
| par(mai = c(.6,.6,.1,.1), mgp = c(2.4, 1, 0)) | par(mai = c(.6,.6,.1,.1), mgp = c(2.4, 1, 0)) | ||||||
| 
 | 
 | ||||||
							
								
								
									
										46
									
								
								code/13_pm-case-clusters.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										46
									
								
								code/13_pm-case-clusters.py
									
									
									
									
									
										Normal file
									
								
							| @ -0,0 +1,46 @@ | |||||||
|  | # 12_pm-case-clusters.py | ||||||
|  | # | ||||||
|  | # content: (1) Load data and create event log | ||||||
|  | #          (2) Infos for clusters | ||||||
|  | #          (3) Process maps for clusters | ||||||
|  | # | ||||||
|  | # input:  results/haum/eventlogs_pre-corona_item-clusters.csv | ||||||
|  | # output: results/haum/pn_infos_clusters.csv | ||||||
|  | # | ||||||
|  | # last mod: 2024-03-10 | ||||||
|  | 
 | ||||||
|  | import pm4py | ||||||
|  | import pandas as pd | ||||||
|  | 
 | ||||||
|  | from python_helpers import eval_pm, pn_infos | ||||||
|  | 
 | ||||||
|  | #--------------- (1) Load data and create event logs --------------- | ||||||
|  | 
 | ||||||
|  | dat = pd.read_csv("results/haum/eventlogs_2019_case-clusters_new.csv", sep = ";") | ||||||
|  | 
 | ||||||
|  | event_log = pm4py.format_dataframe(dat, case_id = "case", activity_key = "event_new", | ||||||
|  |                                   timestamp_key = "date.start") | ||||||
|  | 
 | ||||||
|  | #--------------- (2) Infos for clusters --------------- | ||||||
|  | 
 | ||||||
|  | # Merge clusters into data frame | ||||||
|  | eval = pd.DataFrame(columns = ["fitness", "precision", "generalizability", | ||||||
|  |                                "simplicity", "sound", "narcs", "ntrans", | ||||||
|  |                                "nplaces", "nvariants", "mostfreq"]) | ||||||
|  | for cluster in event_log.cluster.unique().tolist(): | ||||||
|  |     eval = pd.concat([eval, pn_infos(event_log, "cluster", cluster)]) | ||||||
|  | eval = eval.sort_index() | ||||||
|  | 
 | ||||||
|  | eval.to_csv("results/haum/pn_infos_clusters.csv", sep = ";") | ||||||
|  | 
 | ||||||
|  | #--------------- (3) Process maps for clusters --------------- | ||||||
|  | 
 | ||||||
|  | for cluster in event_log.cluster.unique().tolist(): | ||||||
|  |     subdata = event_log[event_log.cluster == cluster] | ||||||
|  |     subnet, subim, subfm = pm4py.discover_petri_net_inductive(subdata, noise_threshold = .7) | ||||||
|  |     pm4py.save_vis_petri_net(subnet, subim, subfm, | ||||||
|  |        "results/processmaps/petrinet_cluster" + str(cluster) + "_cases.png") | ||||||
|  |     bpmn = pm4py.convert.convert_to_bpmn(subnet, subim, subfm) | ||||||
|  |     pm4py.vis.save_vis_bpmn(bpmn, "results/processmaps/bpmn_cluster" + | ||||||
|  |             str(cluster) + "_cases.png") | ||||||
|  | 
 | ||||||
| @ -36,8 +36,8 @@ def pn_infos_miner(log, miner): | |||||||
|         net, im, fm = pm4py.discover_petri_net_ilp(log) |         net, im, fm = pm4py.discover_petri_net_ilp(log) | ||||||
|     elif miner == "inductive": |     elif miner == "inductive": | ||||||
|         net, im, fm = pm4py.discover_petri_net_inductive(log) |         net, im, fm = pm4py.discover_petri_net_inductive(log) | ||||||
|     elif miner == "normative": |     elif miner == "conformative": | ||||||
|         net, im, fm = pm4py.read_pnml("results/normative_petrinet.pnml") |         net, im, fm = pm4py.read_pnml("results/haum/conformative_petrinet_con.pnml") | ||||||
| 
 | 
 | ||||||
|     eval = eval_append(log, net, im, fm) |     eval = eval_append(log, net, im, fm) | ||||||
|     eval.index = [miner] |     eval.index = [miner] | ||||||
|  | |||||||
		Loading…
	
	
			
			x
			
			
		
	
		Reference in New Issue
	
	Block a user