257 lines
		
	
	
		
			8.5 KiB
		
	
	
	
		
			R
		
	
	
	
	
	
			
		
		
	
	
			257 lines
		
	
	
		
			8.5 KiB
		
	
	
	
		
			R
		
	
	
	
	
	
| # 00_current_analysis.R
 | |
| #
 | |
| # content: (1) Read evalutation data
 | |
| #          (2) Clustering
 | |
| #          (3) Visualization with pictures
 | |
| #
 | |
| # input: results/eval_heuristics_artworks.csv
 | |
| #        results/eval_all-miners_complete.csv
 | |
| # output: --
 | |
| #
 | |
| # last mod: 2023-12-08, NW
 | |
| 
 | |
| # setwd("C:/Users/nwickelmaier/Nextcloud/Documents/MDS/2023ss/60100_master_thesis/code")
 | |
| 
 | |
| #--------------- (1) Read evaluation data ---------------
 | |
| 
 | |
| eval_heuristics <- read.table("results/eval_heuristics_artworks.csv", header = TRUE,
 | |
|                        sep = ";", row.names = 1)
 | |
| eval_inductive <- read.table("results/eval_inductive_artworks.csv", header = TRUE,
 | |
|                        sep = ";", row.names = 1)
 | |
| 
 | |
| #--------------- (2) Clustering ---------------
 | |
| 
 | |
| set.seed(1607)
 | |
| 
 | |
| # Heuristics Miner
 | |
| 
 | |
| k1 <- kmeans(eval_heuristics, 4)
 | |
| 
 | |
| colors <- c("#3CB4DC", "#78004B", "#91C86E", "#FF6900")
 | |
| 
 | |
| plot(generalizability ~ precision, eval_heuristics, pch = 16, col = colors[k1$cluster])
 | |
| 
 | |
| 
 | |
| ## Scree plot
 | |
| 
 | |
| ks <- 1:10
 | |
| 
 | |
| sse <- NULL
 | |
| for (k in ks) sse <- c(sse, kmeans(eval_heuristics, k)$tot.withinss)
 | |
| 
 | |
| plot(sse ~ ks, type = "l")
 | |
| 
 | |
| # Inductive Miner
 | |
| 
 | |
| k2 <- kmeans(eval_inductive, 4)
 | |
| 
 | |
| plot(generalizability ~ precision, eval_inductive, pch = 16, col = colors[k2$cluster])
 | |
| 
 | |
| 
 | |
| ## Scree plot
 | |
| 
 | |
| ks <- 1:10
 | |
| 
 | |
| sse <- NULL
 | |
| for (k in ks) sse <- c(sse, kmeans(eval_inductive, k)$tot.withinss)
 | |
| 
 | |
| plot(sse ~ ks, type = "l")
 | |
| 
 | |
| #--------------- (3) Visualization with pictures ---------------
 | |
| 
 | |
| library(png)
 | |
| library(jpeg)
 | |
| library(grid)
 | |
| 
 | |
| ## Heuristics Miner
 | |
| #pdf("../figures/clustering_heuristics.pdf", height = 8, width = 8, pointsize = 10)
 | |
| png("../figures/clustering_heuristics.png", units = "in", height = 8, width = 8, pointsize = 10, res = 300)
 | |
| par(mai = c(.6,.6,.1,.1), mgp = c(2.4, 1, 0))
 | |
| 
 | |
| plot(generalizability ~ precision, eval_heuristics, type = "n", ylim = c(0.845, 0.98))
 | |
| 
 | |
| for (art in as.numeric(rownames(eval_heuristics))) {
 | |
| 
 | |
|   art_string <- sprintf("%03d", art)
 | |
| 
 | |
|   if (art == 125) {
 | |
| 
 | |
|     pic <- readJPEG(paste0("../data/haum/ContentEyevisit/eyevisit_cards_light/",
 | |
|                           art_string, "/", art_string, ".jpg"))
 | |
|   } else {
 | |
|     pic <- readPNG(paste0("../data/haum/ContentEyevisit/eyevisit_cards_light/",
 | |
|                           art_string, "/", art_string, ".png"))
 | |
|   }
 | |
| 
 | |
|   img <- as.raster(pic[,,1:3])
 | |
| 
 | |
|   x <- eval_heuristics[rownames(eval_heuristics) == art, "precision"]
 | |
|   y <- eval_heuristics[rownames(eval_heuristics) == art, "generalizability"]
 | |
| 
 | |
|   points(x, y, col = colors[k1$cluster[as.character(art)]], cex = 8, pch = 15)
 | |
| 
 | |
|   rasterImage(img,
 | |
|               xleft = x - .002,
 | |
|               xright = x + .002,
 | |
|               ybottom = y - .004,
 | |
|               ytop = y + .004)
 | |
| 
 | |
| }
 | |
| 
 | |
| dev.off()
 | |
| 
 | |
| ## Inductive Miner
 | |
| plot(generalizability ~ precision, eval_inductive, col = colors[k2$cluster],
 | |
|        cex = 8, pch = 15)
 | |
| 
 | |
| for (art in as.numeric(rownames(eval_inductive))) {
 | |
| 
 | |
|   art_string <- sprintf("%03d", art)
 | |
| 
 | |
|   if (art == 125) {
 | |
| 
 | |
|     pic <- readJPEG(paste0("../data/haum/ContentEyevisit/eyevisit_cards_light/",
 | |
|                           art_string, "/", art_string, ".jpg"))
 | |
|   } else {
 | |
|     pic <- readPNG(paste0("../data/haum/ContentEyevisit/eyevisit_cards_light/",
 | |
|                           art_string, "/", art_string, ".png"))
 | |
|   }
 | |
| 
 | |
|   img <- as.raster(pic[,,1:3])
 | |
| 
 | |
|   x <- eval_inductive[rownames(eval_inductive) == art, "precision"]
 | |
|   y <- eval_inductive[rownames(eval_inductive) == art, "generalizability"]
 | |
| 
 | |
|   rasterImage(img,
 | |
|               xleft = x - .001,
 | |
|               xright = x + .001,
 | |
|               ybottom = y - .002,
 | |
|               ytop = y + .002)
 | |
| 
 | |
| }
 | |
| 
 | |
| #--------------- (4) Read event logs ---------------
 | |
| 
 | |
| dat <- read.table("../data/haum/event_logfiles_glossar_2023-11-03_17-46-28.csv",
 | |
|                   sep = ";", header = TRUE)
 | |
| dat$date <- as.POSIXct(dat$date)
 | |
| dat$date.start <- as.POSIXct(dat$date.start)
 | |
| dat$date.stop <- as.POSIXct(dat$date.stop)
 | |
| dat$artwork <- sprintf("%03d", dat$artwork)
 | |
| dat$event <- factor(dat$event, levels = c("move", "flipCard", "openTopic", "openPopup"))
 | |
| 
 | |
| dat$weekdays <- factor(weekdays(dat$date.start),
 | |
|                        levels = c("Montag", "Dienstag", "Mittwoch",
 | |
|                                   "Donnerstag", "Freitag", "Samstag",
 | |
|                                   "Sonntag"),
 | |
|                        labels = c("Monday", "Tuesday", "Wednesday",
 | |
|                                   "Thursday", "Friday", "Saturday",
 | |
|                                   "Sunday"))
 | |
| 
 | |
| 
 | |
| #--------------- (5) Frequency plot for clusters ---------------
 | |
| 
 | |
| # Only pre Corona
 | |
| dat <- dat[dat$date < "2020-03-13",]
 | |
| 
 | |
| counts_artwork <- table(dat$artwork)
 | |
| dat_count <- as.data.frame(counts_artwork)
 | |
| names(dat_count) <- c("artwork", "freq")
 | |
| dat_count$cluster <- k1$cluster[order(as.numeric(names(k1$cluster)))]
 | |
| dat_count$cluster <- factor(dat_count$cluster, levels = c(4, 2, 1, 3), labels = 4:1)
 | |
| dat_count <- dat_count[order(dat_count$cluster, dat_count$freq, decreasing = TRUE), ]
 | |
| dat_count$artwork <- factor(dat_count$artwork, levels = unique(dat_count$artwork))
 | |
| 
 | |
| barplot(freq ~ artwork, dat_count, las = 2, ylim = c(0, 60000),
 | |
|         border = "white", ylab = "",
 | |
|         col = c("#FF6900", "#78004B", "#3CB4DC", "#91C86E" )[dat_count$cluster])
 | |
| 
 | |
| # compare to clusters
 | |
| plot(generalizability ~ precision, eval_heuristics, type = "n", ylim = c(0.845, 0.98))
 | |
| with(eval_heuristics, text(precision, generalizability,
 | |
|                            rownames(eval_heuristics),
 | |
|                            col = colors[k1$cluster]))
 | |
| 
 | |
| #--------------- (6) DFGs for clusters ---------------
 | |
| 
 | |
| library(bupaverse)
 | |
| 
 | |
| 
 | |
| dat$start <- dat$date.start
 | |
| dat$complete <- dat$date.stop
 | |
| 
 | |
| 
 | |
| alog <- activitylog(dat,
 | |
|                     case_id = "trace",
 | |
|                     activity_id = "event",
 | |
|                     resource_id = "artwork",
 | |
|                     timestamps = c("start", "complete"))
 | |
| 
 | |
| 
 | |
| alog_c1 <- filter_case_condition(alog,
 | |
|                     artwork %in% dat_count[dat_count$cluster == 1, "artwork"])
 | |
| alog_c2 <- filter_case_condition(alog,
 | |
|                     artwork %in% dat_count[dat_count$cluster == 2, "artwork"])
 | |
| alog_c3 <- filter_case_condition(alog,
 | |
|                     artwork %in% dat_count[dat_count$cluster == 3, "artwork"])
 | |
| alog_c4 <- filter_case_condition(alog,
 | |
|                     artwork %in% dat_count[dat_count$cluster == 4, "artwork"])
 | |
| 
 | |
| dfg_complete <- process_map(alog,
 | |
|             type_nodes = frequency("absolute", color_scale = "Greys"),
 | |
|             sec_nodes  = frequency("relative"),
 | |
|             type_edges = frequency("absolute", color_edges = "#FF6900"),
 | |
|             sec_edges  = frequency("relative"),
 | |
|             rankdir    = "TB",
 | |
|             render     = FALSE)
 | |
| export_map(dfg_complete,
 | |
|            file_name = "../figures/processmaps/dfg_complete_R.pdf",
 | |
|            file_type = "pdf",
 | |
|            title     = "DFG complete")
 | |
| dfg_c1 <- process_map(alog_c1,
 | |
|             type_nodes = frequency("absolute", color_scale = "Greys"),
 | |
|             sec_nodes  = frequency("relative"),
 | |
|             type_edges = frequency("absolute", color_edges = "#FF6900"),
 | |
|             sec_edges  = frequency("relative"),
 | |
|             rankdir    = "TB",
 | |
|             render     = FALSE)
 | |
| export_map(dfg_c1,
 | |
|            file_name = "../figures/processmaps/dfg_cluster1_R.pdf",
 | |
|            file_type = "pdf",
 | |
|            title     = "DFG Cluster 1")
 | |
| dfg_c2 <- process_map(alog_c2,
 | |
|             type_nodes = frequency("absolute", color_scale = "Greys"),
 | |
|             sec_nodes  = frequency("relative"),
 | |
|             type_edges = frequency("absolute", color_edges = "#FF6900"),
 | |
|             sec_edges  = frequency("relative"),
 | |
|             rankdir    = "TB",
 | |
|             render     = FALSE)
 | |
| export_map(dfg_c2,
 | |
|            file_name = "../figures/processmaps/dfg_cluster2_R.pdf",
 | |
|            file_type = "pdf",
 | |
|            title     = "DFG Cluster 2")
 | |
| dfg_c3 <- process_map(alog_c3,
 | |
|             type_nodes = frequency("absolute", color_scale = "Greys"),
 | |
|             sec_nodes  = frequency("relative"),
 | |
|             type_edges = frequency("absolute", color_edges = "#FF6900"),
 | |
|             sec_edges  = frequency("relative"),
 | |
|             rankdir    = "TB",
 | |
|             render     = FALSE)
 | |
| export_map(dfg_c3,
 | |
|            file_name = "../figures/processmaps/dfg_cluster3_R.pdf",
 | |
|            file_type = "pdf",
 | |
|            title     = "DFG Cluster 3")
 | |
| dfg_c4 <- process_map(alog_c4,
 | |
|             type_nodes = frequency("absolute", color_scale = "Greys"),
 | |
|             sec_nodes  = frequency("relative"),
 | |
|             type_edges = frequency("absolute", color_edges = "#FF6900"),
 | |
|             sec_edges  = frequency("relative"),
 | |
|             rankdir    = "TB",
 | |
|             render     = FALSE)
 | |
| export_map(dfg_c4,
 | |
|            file_name = "../figures/processmaps/dfg_cluster4_R.pdf",
 | |
|            file_type = "pdf",
 | |
|            title     = "DFG Cluster 4")
 | |
| 
 | |
| 
 |