Worked on analysis
This commit is contained in:
parent
a276d84cd6
commit
07b1f5adc4
@ -41,7 +41,7 @@ write.table(datraw, paste0("results/haum/raw_logfiles_", now, ".csv"),
|
|||||||
|
|
||||||
datlogs <- create_eventlogs(datraw,
|
datlogs <- create_eventlogs(datraw,
|
||||||
#xmlpath = "../data/haum/ContentEyevisit/eyevisit_cards_light/",
|
#xmlpath = "../data/haum/ContentEyevisit/eyevisit_cards_light/",
|
||||||
glossar = FALSE)
|
glossar = FALSE, save = TRUE)
|
||||||
# 2,136,694 no change moves removed
|
# 2,136,694 no change moves removed
|
||||||
|
|
||||||
# OLD:
|
# OLD:
|
||||||
|
@ -2,78 +2,148 @@
|
|||||||
|
|
||||||
# Read data
|
# Read data
|
||||||
|
|
||||||
dat0 <- read.table("results/haum/event_logfiles_2023-10-25_17-29-52.csv",
|
dat <- read.table("results/haum/event_logfiles_2024-01-02_19-44-50.csv",
|
||||||
|
colClasses = c("character", "character", "POSIXct",
|
||||||
|
"POSIXct", "character", "integer",
|
||||||
|
"numeric", "character", "character",
|
||||||
|
rep("numeric", 3), "character",
|
||||||
|
"character", rep("numeric", 11),
|
||||||
|
"character", "character"),
|
||||||
sep = ";", header = TRUE)
|
sep = ";", header = TRUE)
|
||||||
dat0$date.start <- as.POSIXct(dat0$date.start)
|
|
||||||
dat0$date.stop <- as.POSIXct(dat0$date.stop)
|
|
||||||
dat0$artwork <- sprintf("%03d", dat0$artwork)
|
|
||||||
|
|
||||||
table(dat0[!duplicated(dat0$trace), "event"])
|
dat$event <- factor(dat$event, levels = c("move", "flipCard", "openTopic",
|
||||||
|
"openPopup"))
|
||||||
|
|
||||||
proportions(table(dat0[!duplicated(dat0$trace), "event"]))
|
dat$weekdays <- factor(weekdays(dat$date.start),
|
||||||
|
levels = c("Montag", "Dienstag", "Mittwoch",
|
||||||
|
"Donnerstag", "Freitag", "Samstag",
|
||||||
|
"Sonntag"),
|
||||||
|
labels = c("Monday", "Tuesday", "Wednesday",
|
||||||
|
"Thursday", "Friday", "Saturday",
|
||||||
|
"Sunday"))
|
||||||
|
|
||||||
tmp <- dat0[!duplicated(dat0$trace) & dat0$event %in% c("openTopic",
|
# Select data pre Corona
|
||||||
"openPopup"), ]
|
dat <- dat[as.Date(dat$date.start) < "2020-03-13", ]
|
||||||
|
dat <- dat[dat["path"] != 81621, ]
|
||||||
|
|
||||||
dat <- dat0
|
table(dat$event)
|
||||||
i <- 1
|
proportions(table(dat$event))
|
||||||
stop <- 1
|
|
||||||
|
|
||||||
while (stop > 0) {
|
# Investigate paths (will separate items and give clusters of artworks!)
|
||||||
stop <- sum(!duplicated(dat$trace) & dat$event %in% c("openTopic", "openPopup"))
|
length(unique(dat$path))
|
||||||
dat <- dat[!(!duplicated(dat$trace) &
|
|
||||||
dat$event %in% c("openTopic", "openPopup")), ]
|
datpath <- aggregate(cbind(duration, distance, scaleSize, rotationDegree) ~
|
||||||
print(i)
|
path, dat, function(x) mean(x, na.rm = TRUE), na.action = NULL)
|
||||||
i <- i + 1
|
|
||||||
print(table(dat[!duplicated(dat$trace), "event"]))
|
datpath$length <- aggregate(item ~ path, dat, length)$item
|
||||||
|
datpath$nitems <- aggregate(item ~ path, dat, function(x)
|
||||||
|
length(unique(x)), na.action = NULL)$item
|
||||||
|
datpath$ntopics <- aggregate(topic ~ path, dat,
|
||||||
|
function(x) ifelse(all(is.na(x)), NA, length(unique(na.omit(x)))),
|
||||||
|
na.action = NULL)$topic
|
||||||
|
|
||||||
|
datpath$vacation <- aggregate(vacation ~ path, dat,
|
||||||
|
function(x) ifelse(all(is.na(x)), 0, 1),
|
||||||
|
na.action = NULL)$vacation
|
||||||
|
datpath$holiday <- aggregate(holiday ~ path, dat,
|
||||||
|
function(x) ifelse(all(is.na(x)), 0, 1),
|
||||||
|
na.action = NULL)$holiday
|
||||||
|
datpath$weekend <- aggregate(weekdays ~ path, dat,
|
||||||
|
function(x) ifelse(any(x %in% c("Saturday", "Sunday")), 1, 0),
|
||||||
|
na.action = NULL)$weekdays
|
||||||
|
datpath$morning <- aggregate(date.start ~ path, dat,
|
||||||
|
function(x) ifelse(lubridate::hour(x[1]) > 13, 0, 1),
|
||||||
|
na.action = NULL)$date.start
|
||||||
|
|
||||||
|
|
||||||
|
# Investigate cases (= interactions per time intervall)
|
||||||
|
length(unique(dat$case))
|
||||||
|
|
||||||
|
datcase <- aggregate(cbind(duration, distance, scaleSize, rotationDegree) ~
|
||||||
|
case, dat, function(x) mean(x, na.rm = TRUE), na.action = NULL)
|
||||||
|
|
||||||
|
datcase$length <- aggregate(item ~ case, dat, length)$item
|
||||||
|
datcase$nitems <- aggregate(item ~ case, dat, function(x)
|
||||||
|
length(unique(x)), na.action = NULL)$item
|
||||||
|
datcase$ntopics <- aggregate(topic ~ case, dat,
|
||||||
|
function(x) ifelse(all(is.na(x)), NA, length(unique(na.omit(x)))),
|
||||||
|
na.action = NULL)$topic
|
||||||
|
|
||||||
|
datcase$vacation <- aggregate(vacation ~ case, dat,
|
||||||
|
function(x) ifelse(all(is.na(x)), 0, 1),
|
||||||
|
na.action = NULL)$vacation
|
||||||
|
datcase$holiday <- aggregate(holiday ~ case, dat,
|
||||||
|
function(x) ifelse(all(is.na(x)), 0, 1),
|
||||||
|
na.action = NULL)$holiday
|
||||||
|
datcase$weekend <- aggregate(weekdays ~ case, dat,
|
||||||
|
function(x) ifelse(any(x %in% c("Saturday", "Sunday")), 1, 0),
|
||||||
|
na.action = NULL)$weekdays
|
||||||
|
datcase$morning <- aggregate(date.start ~ case, dat,
|
||||||
|
function(x) ifelse(lubridate::hour(x[1]) > 13, 0, 1),
|
||||||
|
na.action = NULL)$date.start
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
# Paths with more than one case associated
|
||||||
|
tmp <- aggregate(case ~ path, dat, function(x) length(unique(x)))
|
||||||
|
sum(tmp$case > 1)
|
||||||
|
table(tmp$case)
|
||||||
|
|
||||||
|
dat$date <- as.Date(dat$date.start)
|
||||||
|
|
||||||
|
tmp <- aggregate(date ~ path, dat, function(x) length(unique(x)))
|
||||||
|
sum(tmp$date > 1)
|
||||||
|
table(tmp$date)
|
||||||
|
tmp[tmp$date > 1, ]
|
||||||
|
|
||||||
|
for (p in tmp$path[tmp$date > 1]) {
|
||||||
|
print(dat[dat$path == p, 3:9])
|
||||||
|
cat("\n\n")
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
dat[dat$date == "2017-02-28" & dat$item == "503", ]
|
||||||
|
|
||||||
|
|
||||||
# Creating event logs
|
# Creating event logs
|
||||||
|
|
||||||
library(bupaverse)
|
library(bupaverse)
|
||||||
|
|
||||||
names(dat)[names(dat) %in% c("date.start", "date.stop")] <- c("start",
|
dat$start <- dat$date.start
|
||||||
"complete")
|
dat$complete <- dat$date.stop
|
||||||
|
|
||||||
table(table(dat$start))
|
table(table(dat$start))
|
||||||
# --> hmm...
|
# --> hmm...
|
||||||
|
|
||||||
summary(aggregate(duration ~ trace, dat, mean))
|
summary(aggregate(duration ~ path, dat, mean))
|
||||||
|
|
||||||
# TODO: Find trace that has flipCard --> openPopup --> openTopic
|
|
||||||
dato <- dat[dat$event != "move", ]
|
|
||||||
dato_split <- split(dato, ~ trace)
|
|
||||||
tmp <- lapply(dato_split, function(x) unique(x$event))
|
|
||||||
#tmp <- lapply(unique(dato$trace), function(x) unique(dato[dato$trace == x, "event"]))
|
|
||||||
|
|
||||||
ids <- sapply(tmp, length) == 3
|
|
||||||
tmp2 <- as.data.frame(do.call(rbind, tmp[ids]))
|
|
||||||
names(tmp2) <- c("flipCard", "openTopic", "openPopup")
|
|
||||||
|
|
||||||
table(tmp2$flipCard)
|
|
||||||
table(tmp2$openTopic)
|
|
||||||
table(tmp2$openPopup)
|
|
||||||
|
|
||||||
frag_ids <- which(tmp2$openTopic == "openPopup")
|
|
||||||
|
|
||||||
tmp3 <- dat[dat$trace %in% rownames(tmp2)[frag_ids], ]
|
|
||||||
|
|
||||||
tmp4 <- tmp3[!tmp3$glossar == 1, ]
|
|
||||||
|
|
||||||
dat6 <- rbind(dat[!dat$trace %in% rownames(tmp2)[frag_ids], ], tmp4)
|
|
||||||
|
|
||||||
|
|
||||||
alog <- activitylog(dat,
|
alog <- activitylog(dat,
|
||||||
case_id = "trace",
|
case_id = "path",
|
||||||
activity_id = "event",
|
activity_id = "event",
|
||||||
#resource_id = "case",
|
resource_id = "item",
|
||||||
resource_id = "artwork",
|
|
||||||
timestamps = c("start", "complete"))
|
timestamps = c("start", "complete"))
|
||||||
|
|
||||||
process_map(alog)
|
process_map(alog,
|
||||||
|
type_nodes = frequency("absolute"),
|
||||||
|
sec_nodes = frequency("relative"),
|
||||||
|
type_edges = frequency("absolute"),
|
||||||
|
sec_edges = frequency("relative"),
|
||||||
|
rankdir = "LR")
|
||||||
|
|
||||||
|
|
||||||
|
alog2 <- activitylog(dat,
|
||||||
|
case_id = "case",
|
||||||
|
activity_id = "event",
|
||||||
|
resource_id = "item",
|
||||||
|
timestamps = c("start", "complete"))
|
||||||
|
process_map(alog2,
|
||||||
|
type_nodes = frequency("absolute"),
|
||||||
|
sec_nodes = frequency("relative"),
|
||||||
|
type_edges = frequency("absolute"),
|
||||||
|
sec_edges = frequency("relative"),
|
||||||
|
rankdir = "LR")
|
||||||
|
|
||||||
|
|
||||||
process_map(alog, frequency("relative"))
|
|
||||||
process_map(alog, frequency("relative_consequent"))
|
|
||||||
|
|
||||||
library(processanimateR)
|
library(processanimateR)
|
||||||
|
|
||||||
@ -112,30 +182,4 @@ animate_process(elog[elog$artwork %in% c("080", "054"), ],
|
|||||||
mapping = token_aes(color = token_scale("artwork",
|
mapping = token_aes(color = token_scale("artwork",
|
||||||
scale = "ordinal",
|
scale = "ordinal",
|
||||||
range = c("black", "gray"))))
|
range = c("black", "gray"))))
|
||||||
# --> not sure, yet, how to interpret this...
|
|
||||||
|
|
||||||
alog080 <- activitylog(dat[dat$artwork %in% "080", ],
|
|
||||||
#case_id = "case",
|
|
||||||
case_id = "trace",
|
|
||||||
activity_id = "event",
|
|
||||||
#resource_id = "trace",
|
|
||||||
resource_id = "case",
|
|
||||||
timestamps = c("start", "complete"))
|
|
||||||
|
|
||||||
process_map(alog080, frequency("relative"))
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
alog054 <- activitylog(dat[dat$artwork %in% "054", ],
|
|
||||||
#case_id = "case",
|
|
||||||
case_id = "trace",
|
|
||||||
activity_id = "event",
|
|
||||||
#resource_id = "trace",
|
|
||||||
resource_id = "case",
|
|
||||||
timestamps = c("start", "complete"))
|
|
||||||
|
|
||||||
process_map(alog054, frequency("relative"))
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
@ -10,24 +10,7 @@ net_con.places
|
|||||||
net_con.transitions
|
net_con.transitions
|
||||||
net_con.arcs
|
net_con.arcs
|
||||||
|
|
||||||
help(pm4py.objects.petri_net.obj.Marking)
|
final_marking = Marking()
|
||||||
|
|
||||||
# Places
|
|
||||||
source = PetriNet.Place("source")
|
|
||||||
sink = PetriNet.Place("sink")
|
|
||||||
p_1 = PetriNet.Place("p_1")
|
|
||||||
p_2 = PetriNet.Place("p_2")
|
|
||||||
p_3 = PetriNet.Place("p_3")
|
|
||||||
p_4 = PetriNet.Place("p_4")
|
|
||||||
p_5 = PetriNet.Place("p_5")
|
|
||||||
p_6 = PetriNet.Place("p_6")
|
|
||||||
p_7 = PetriNet.Place("p_7")
|
|
||||||
p_8 = PetriNet.Place("p_8")
|
|
||||||
p_9 = PetriNet.Place("p_9")
|
|
||||||
p_10 = PetriNet.Place("p_10")
|
|
||||||
p_11 = PetriNet.Place("p_11")
|
|
||||||
p_12 = PetriNet.Place("p_12")
|
|
||||||
|
|
||||||
|
|
||||||
# Add tokens for traces
|
# Add tokens for traces
|
||||||
# ('flipCard', 'openTopic', 'openPopup', 'openTopic', 'move'): 14
|
# ('flipCard', 'openTopic', 'openPopup', 'openTopic', 'move'): 14
|
||||||
@ -75,7 +58,8 @@ pm4py.vis.save_vis_petri_net(net_con, marking, final_marking, file_path="../figu
|
|||||||
marking = pm4py.generate_marking(net_con, {'p_5': 1, 'p_12' : 1})
|
marking = pm4py.generate_marking(net_con, {'p_5': 1, 'p_12' : 1})
|
||||||
pm4py.vis.save_vis_petri_net(net_con, marking, final_marking, file_path="../figures/processmaps/conformative_net_con_markings_1_15.png")
|
pm4py.vis.save_vis_petri_net(net_con, marking, final_marking, file_path="../figures/processmaps/conformative_net_con_markings_1_15.png")
|
||||||
#pm4py.view_petri_net(net_con, marking)
|
#pm4py.view_petri_net(net_con, marking)
|
||||||
pm4py.vis.save_vis_petri_net(net_con, final_marking, final_marking, file_path="../figures/processmaps/conformative_net_con_markings_1_16.png")
|
marking = pm4py.generate_marking(net_con, {'sink': 1})
|
||||||
|
pm4py.vis.save_vis_petri_net(net_con, marking, final_marking, file_path="../figures/processmaps/conformative_net_con_markings_1_16.png")
|
||||||
#pm4py.view_petri_net(net_con, final_marking)
|
#pm4py.view_petri_net(net_con, final_marking)
|
||||||
|
|
||||||
# ('move', 'move', 'flipCard', 'move', 'openTopic', 'openPopup'): 14
|
# ('move', 'move', 'flipCard', 'move', 'openTopic', 'openPopup'): 14
|
||||||
@ -110,5 +94,6 @@ marking = pm4py.generate_marking(net_con, {'p_4': 1, 'p_12' : 1})
|
|||||||
pm4py.vis.save_vis_petri_net(net_con, marking, final_marking, file_path="../figures/processmaps/conformative_net_con_markings_2_15.png")
|
pm4py.vis.save_vis_petri_net(net_con, marking, final_marking, file_path="../figures/processmaps/conformative_net_con_markings_2_15.png")
|
||||||
marking = pm4py.generate_marking(net_con, {'p_5': 1, 'p_12' : 1})
|
marking = pm4py.generate_marking(net_con, {'p_5': 1, 'p_12' : 1})
|
||||||
pm4py.vis.save_vis_petri_net(net_con, marking, final_marking, file_path="../figures/processmaps/conformative_net_con_markings_2_16.png")
|
pm4py.vis.save_vis_petri_net(net_con, marking, final_marking, file_path="../figures/processmaps/conformative_net_con_markings_2_16.png")
|
||||||
pm4py.vis.save_vis_petri_net(net_con, final_marking, final_marking, file_path="../figures/processmaps/conformative_net_con_markings_2_17.png")
|
marking = pm4py.generate_marking(net_con, {'sink': 1})
|
||||||
|
pm4py.vis.save_vis_petri_net(net_con, marking, final_marking, file_path="../figures/processmaps/conformative_net_con_markings_2_17.png")
|
||||||
|
|
||||||
|
@ -15,7 +15,7 @@ dat = dat[dat["date.start"] < "2020-03-13"]
|
|||||||
|
|
||||||
event_log = pm4py.format_dataframe(dat, case_id='path', activity_key='event',
|
event_log = pm4py.format_dataframe(dat, case_id='path', activity_key='event',
|
||||||
timestamp_key='date.start')
|
timestamp_key='date.start')
|
||||||
event_log = event_log.rename(columns={'artwork': 'case:artwork'})
|
event_log = event_log.rename(columns={'item': 'case:item'})
|
||||||
|
|
||||||
###### Descrptives of log data ######
|
###### Descrptives of log data ######
|
||||||
|
|
||||||
@ -34,10 +34,10 @@ sorted_variants = dict(sorted(variants.items(), key=lambda item: item[1], revers
|
|||||||
{k: sorted_variants[k] for k in list(sorted_variants)[:20]}
|
{k: sorted_variants[k] for k in list(sorted_variants)[:20]}
|
||||||
|
|
||||||
filtered_log = event_log[event_log["event"] != "move"]
|
filtered_log = event_log[event_log["event"] != "move"]
|
||||||
variants = pm4py.get_variants(filtered_log)
|
variants_no_move = pm4py.get_variants(filtered_log)
|
||||||
len(variants)
|
len(variants_no_move)
|
||||||
sorted_variants = dict(sorted(variants.items(), key=lambda item: item[1], reverse = True))
|
sorted_variants_no_move = dict(sorted(variants_no_move.items(), key=lambda item: item[1], reverse = True))
|
||||||
{k: sorted_variants[k] for k in list(sorted_variants)[:20]}
|
{k: sorted_variants_no_move[k] for k in list(sorted_variants_no_move)[:20]}
|
||||||
|
|
||||||
# Path length
|
# Path length
|
||||||
event_log.path.value_counts()
|
event_log.path.value_counts()
|
||||||
@ -94,8 +94,11 @@ l4[index_broken]
|
|||||||
|
|
||||||
replayed_traces[index_broken]
|
replayed_traces[index_broken]
|
||||||
|
|
||||||
# 216295 # --> broken trace! Must be in artwork 176!!!!!
|
event_log[event_log['@@case_index'] == index_broken].event
|
||||||
|
event_log[event_log['@@case_index'] == index_broken].path
|
||||||
|
event_log[event_log['@@case_index'] == index_broken].item
|
||||||
|
event_log[event_log['@@case_index'] == index_broken]["fileId.start"]
|
||||||
|
# --> logging error in file!
|
||||||
|
|
||||||
from pm4py.algo.conformance.tokenreplay import algorithm as token_based_replay
|
from pm4py.algo.conformance.tokenreplay import algorithm as token_based_replay
|
||||||
parameters_tbr = {token_based_replay.Variants.TOKEN_REPLAY.value.Parameters.DISABLE_VARIANTS: True, token_based_replay.Variants.TOKEN_REPLAY.value.Parameters.ENABLE_PLTR_FITNESS: True}
|
parameters_tbr = {token_based_replay.Variants.TOKEN_REPLAY.value.Parameters.DISABLE_VARIANTS: True, token_based_replay.Variants.TOKEN_REPLAY.value.Parameters.ENABLE_PLTR_FITNESS: True}
|
||||||
@ -156,9 +159,9 @@ pm4py.save_vis_dfg(dfg, start_activities, end_activities, '../figures/processmap
|
|||||||
|
|
||||||
## Heuristics Miner
|
## Heuristics Miner
|
||||||
h_net, im, fm = pm4py.discover_petri_net_heuristics(event_log)
|
h_net, im, fm = pm4py.discover_petri_net_heuristics(event_log)
|
||||||
h_eval = eval_pm(event_log, h_net, im, fm)
|
|
||||||
pm4py.vis.view_petri_net(h_net, im, fm)
|
pm4py.vis.view_petri_net(h_net, im, fm)
|
||||||
pm4py.vis.save_vis_petri_net(h_net, im, fm, "../figures/processmaps/pn_heuristics_complete.png")
|
pm4py.vis.save_vis_petri_net(h_net, im, fm, "../figures/processmaps/petrinet_heuristics_complete.png")
|
||||||
|
h_eval = eval_pm(event_log, h_net, im, fm)
|
||||||
|
|
||||||
is_sound = pm4py.check_soundness(h_net, im, fm)
|
is_sound = pm4py.check_soundness(h_net, im, fm)
|
||||||
is_sound[0]
|
is_sound[0]
|
||||||
@ -172,7 +175,7 @@ len(h_net.places)
|
|||||||
from pm4py.visualization.petri_net import visualizer as pn_visualizer
|
from pm4py.visualization.petri_net import visualizer as pn_visualizer
|
||||||
parameters = {pn_visualizer.Variants.FREQUENCY.value.Parameters.FORMAT: "png"}
|
parameters = {pn_visualizer.Variants.FREQUENCY.value.Parameters.FORMAT: "png"}
|
||||||
gviz = pn_visualizer.apply(h_net, im, fm, parameters=parameters, variant=pn_visualizer.Variants.FREQUENCY, log=event_log)
|
gviz = pn_visualizer.apply(h_net, im, fm, parameters=parameters, variant=pn_visualizer.Variants.FREQUENCY, log=event_log)
|
||||||
pn_visualizer.save(gviz, "../figures/processmaps/pn_heuristics_complete_decorated.png")
|
pn_visualizer.save(gviz, "../figures/processmaps/petrinet_heuristics_complete_decorated.png")
|
||||||
|
|
||||||
# convert to BPMN
|
# convert to BPMN
|
||||||
bpmn = pm4py.convert.convert_to_bpmn(h_net, im, fm)
|
bpmn = pm4py.convert.convert_to_bpmn(h_net, im, fm)
|
||||||
@ -180,9 +183,9 @@ pm4py.vis.view_bpmn(bpmn)
|
|||||||
|
|
||||||
## Alpha Miner
|
## Alpha Miner
|
||||||
a_net, im, fm = pm4py.discover_petri_net_alpha(event_log)
|
a_net, im, fm = pm4py.discover_petri_net_alpha(event_log)
|
||||||
a_eval = eval_pm(event_log, a_net, im, fm)
|
|
||||||
pm4py.vis.view_petri_net(a_net, im, fm)
|
pm4py.vis.view_petri_net(a_net, im, fm)
|
||||||
pm4py.vis.save_vis_petri_net(a_net, im, fm, "../figures/processmaps/pn_alpha_complete.png")
|
pm4py.vis.save_vis_petri_net(a_net, im, fm, "../figures/processmaps/petrinet_alpha_complete.png")
|
||||||
|
a_eval = eval_pm(event_log, a_net, im, fm)
|
||||||
|
|
||||||
is_sound = pm4py.check_soundness(a_net, im, fm)
|
is_sound = pm4py.check_soundness(a_net, im, fm)
|
||||||
is_sound[0]
|
is_sound[0]
|
||||||
@ -193,9 +196,9 @@ len(a_net.places)
|
|||||||
|
|
||||||
## Inductive Miner
|
## Inductive Miner
|
||||||
i_net, im, fm = pm4py.discover_petri_net_inductive(event_log)
|
i_net, im, fm = pm4py.discover_petri_net_inductive(event_log)
|
||||||
i_eval = eval_pm(event_log, i_net, im, fm)
|
|
||||||
pm4py.vis.view_petri_net(i_net, im, fm)
|
pm4py.vis.view_petri_net(i_net, im, fm)
|
||||||
pm4py.vis.save_vis_petri_net(i_net, im, fm, "../figures/processmaps/pn_induction_complete.png")
|
pm4py.vis.save_vis_petri_net(i_net, im, fm, "../figures/processmaps/petrinet_induction_complete.png")
|
||||||
|
i_eval = eval_pm(event_log, i_net, im, fm)
|
||||||
|
|
||||||
# as process tree (does not work for heuristics miner!)
|
# as process tree (does not work for heuristics miner!)
|
||||||
pt = pm4py.discover_process_tree_inductive(event_log)
|
pt = pm4py.discover_process_tree_inductive(event_log)
|
||||||
@ -217,9 +220,9 @@ pm4py.view_bpmn(bpmn)
|
|||||||
from pm4py.algo.conformance.tokenreplay import algorithm as token_based_replay
|
from pm4py.algo.conformance.tokenreplay import algorithm as token_based_replay
|
||||||
parameters_tbr = {token_based_replay.Variants.TOKEN_REPLAY.value.Parameters.DISABLE_VARIANTS: True, token_based_replay.Variants.TOKEN_REPLAY.value.Parameters.ENABLE_PLTR_FITNESS: True}
|
parameters_tbr = {token_based_replay.Variants.TOKEN_REPLAY.value.Parameters.DISABLE_VARIANTS: True, token_based_replay.Variants.TOKEN_REPLAY.value.Parameters.ENABLE_PLTR_FITNESS: True}
|
||||||
replayed_traces, place_fitness, trans_fitness, unwanted_activities = token_based_replay.apply(event_log, i_net,
|
replayed_traces, place_fitness, trans_fitness, unwanted_activities = token_based_replay.apply(event_log, i_net,
|
||||||
im,
|
im,
|
||||||
fm,
|
fm,
|
||||||
parameters=parameters_tbr)
|
parameters=parameters_tbr)
|
||||||
|
|
||||||
l1 = list()
|
l1 = list()
|
||||||
l2 = list()
|
l2 = list()
|
||||||
@ -232,15 +235,29 @@ for i in range(len(replayed_traces)):
|
|||||||
l4.append(replayed_traces[i]["transitions_with_problems"])
|
l4.append(replayed_traces[i]["transitions_with_problems"])
|
||||||
|
|
||||||
np.mean(l1)
|
np.mean(l1)
|
||||||
|
set(l1)
|
||||||
|
index_broken = l1.index(1)
|
||||||
np.mean(l2)
|
np.mean(l2)
|
||||||
|
set(l2)
|
||||||
|
l2.index(1)
|
||||||
set(l3)
|
set(l3)
|
||||||
l4.count([])
|
l4.count([])
|
||||||
|
|
||||||
|
l3[index_broken]
|
||||||
|
l4[index_broken]
|
||||||
|
|
||||||
|
replayed_traces[index_broken]
|
||||||
|
|
||||||
|
event_log[event_log['@@case_index'] == index_broken].event
|
||||||
|
event_log[event_log['@@case_index'] == index_broken].path
|
||||||
|
event_log[event_log['@@case_index'] == index_broken].item
|
||||||
|
event_log[event_log['@@case_index'] == index_broken]["fileId.start"]
|
||||||
|
|
||||||
## ILP Miner
|
## ILP Miner
|
||||||
ilp_net, im, fm = pm4py.discover_petri_net_ilp(event_log)
|
ilp_net, im, fm = pm4py.discover_petri_net_ilp(event_log)
|
||||||
ilp_eval = eval_pm(event_log, ilp_net, im, fm)
|
|
||||||
pm4py.vis.view_petri_net(ilp_net, im, fm)
|
pm4py.vis.view_petri_net(ilp_net, im, fm)
|
||||||
pm4py.vis.save_vis_petri_net(ilp_net, im, fm, "../figures/processmaps/pn_ilp_complete.png")
|
pm4py.vis.save_vis_petri_net(ilp_net, im, fm, "../figures/processmaps/petrinet_ilp_complete.png")
|
||||||
|
ilp_eval = eval_pm(event_log, ilp_net, im, fm)
|
||||||
|
|
||||||
is_sound = pm4py.check_soundness(ilp_net, im, fm)
|
is_sound = pm4py.check_soundness(ilp_net, im, fm)
|
||||||
is_sound[0]
|
is_sound[0]
|
||||||
@ -257,6 +274,27 @@ eval
|
|||||||
|
|
||||||
eval.to_csv("results/eval_all-miners_complete.csv", sep=";")
|
eval.to_csv("results/eval_all-miners_complete.csv", sep=";")
|
||||||
|
|
||||||
|
## Without broken trace
|
||||||
|
event_log_clean = event_log[event_log['@@case_index'] != index_broken]
|
||||||
|
h_net, a_im, h_fm = pm4py.discover_petri_net_heuristics(event_log_clean)
|
||||||
|
a_net, h_im, a_fm = pm4py.discover_petri_net_alpha(event_log_clean)
|
||||||
|
i_net, i_im, i_fm = pm4py.discover_petri_net_inductive(event_log_clean)
|
||||||
|
ilp_net, ilp_im, ilp_fm = pm4py.discover_petri_net_ilp(event_log_clean)
|
||||||
|
|
||||||
|
baseline_eval = eval_pm(event_log_clean, basenet, initial_marking, final_marking)
|
||||||
|
h_eval = eval_pm(event_log_clean, h_net, h_im, h_fm)
|
||||||
|
a_eval = eval_pm(event_log_clean, a_net, a_im, a_fm)
|
||||||
|
i_eval = eval_pm(event_log_clean, i_net, i_im, i_fm)
|
||||||
|
ilp_eval = eval_pm(event_log_clean, ilp_net, ilp_im, ilp_fm)
|
||||||
|
|
||||||
|
eval = pd.DataFrame(np.row_stack([baseline_eval, h_eval, a_eval, i_eval, ilp_eval]))
|
||||||
|
eval.columns = ["fitness", "precision", "generalizability", "simplicity"]
|
||||||
|
eval.index = ["conformative", "heuristics", "alpha", "inductive", "ilp"]
|
||||||
|
eval
|
||||||
|
|
||||||
|
eval.to_csv("results/eval_all-miners_clean.csv", sep=";")
|
||||||
|
|
||||||
|
|
||||||
###### Process Mining - individual artworks ######
|
###### Process Mining - individual artworks ######
|
||||||
|
|
||||||
def pm_artworks(miner):
|
def pm_artworks(miner):
|
||||||
@ -308,40 +346,4 @@ for miner in ["heuristics", "inductive", "alpha", "ilp"]:
|
|||||||
|
|
||||||
eval_art = pm_artworks(miner = "inductive")
|
eval_art = pm_artworks(miner = "inductive")
|
||||||
|
|
||||||
##### Clustering ######
|
|
||||||
|
|
||||||
## KMeans
|
|
||||||
|
|
||||||
#eval_artworks = eval_art[eval_art.nettype == "alldata"].iloc[:,range(1,5)]
|
|
||||||
eval_artworks = eval_art[eval_art.nettype == "subdata"].iloc[:,range(1,5)]
|
|
||||||
|
|
||||||
kmeans = KMeans(n_clusters=4, max_iter=1000).fit(eval_artworks)
|
|
||||||
|
|
||||||
#from sklearn.manifold import MDS
|
|
||||||
#coord = pd.DataFrame(MDS(normalized_stress='auto').fit_transform(eval_artworks))
|
|
||||||
|
|
||||||
coord = eval_artworks
|
|
||||||
coord["clusters"] = kmeans.labels_
|
|
||||||
|
|
||||||
for i in coord.clusters.unique():
|
|
||||||
#plt.scatter(coord[coord.clusters == i].iloc[:,0], coord[coord.clusters == i].iloc[:,1],
|
|
||||||
plt.scatter(coord[coord.clusters == i].iloc[:,1], coord[coord.clusters == i].iloc[:,2],
|
|
||||||
#plt.scatter(coord[coord.clusters == i].iloc[:,2], coord[coord.clusters == i].iloc[:,4],
|
|
||||||
label = i)
|
|
||||||
plt.legend()
|
|
||||||
plt.show()
|
|
||||||
|
|
||||||
### Scree plot
|
|
||||||
|
|
||||||
sse = {}
|
|
||||||
for k in range(1, 10):
|
|
||||||
kmeans = KMeans(n_clusters=k, max_iter=1000).fit(eval_artworks[["precision", "generalizability"]])
|
|
||||||
#data["clusters"] = kmeans.labels_
|
|
||||||
#print(data["clusters"])
|
|
||||||
sse[k] = kmeans.inertia_ # Inertia: Sum of distances of samples to their closest cluster center
|
|
||||||
plt.figure()
|
|
||||||
plt.plot(list(sse.keys()), list(sse.values()))
|
|
||||||
plt.xlabel("Number of clusters")
|
|
||||||
plt.ylabel("SSE")
|
|
||||||
plt.show()
|
|
||||||
|
|
57
code/pm_navigation-behavior.py
Normal file
57
code/pm_navigation-behavior.py
Normal file
@ -0,0 +1,57 @@
|
|||||||
|
%reset
|
||||||
|
|
||||||
|
import pm4py
|
||||||
|
|
||||||
|
import pandas as pd
|
||||||
|
import numpy as np
|
||||||
|
import matplotlib.pyplot as plt
|
||||||
|
from pm4py.visualization.petri_net import visualizer as pn_visualizer
|
||||||
|
parameters = {pn_visualizer.Variants.FREQUENCY.value.Parameters.FORMAT: "png"}
|
||||||
|
|
||||||
|
###### Load data and create event logs ######
|
||||||
|
|
||||||
|
dat = pd.read_csv("results/haum/event_logfiles_2024-01-02_19-44-50.csv", sep = ";")
|
||||||
|
dat = dat[dat["date.start"] < "2020-03-13"]
|
||||||
|
dat = dat[dat["path"] != 81621] # exclude broken trace
|
||||||
|
# --> only pre corona (before artworks were updated)
|
||||||
|
|
||||||
|
event_log = pm4py.format_dataframe(dat, case_id='case', activity_key='event',
|
||||||
|
timestamp_key='date.start')
|
||||||
|
|
||||||
|
event_log.event.value_counts()
|
||||||
|
event_log.event.value_counts(normalize=True)
|
||||||
|
|
||||||
|
dfg, start_activities, end_activities = pm4py.discover_dfg(event_log)
|
||||||
|
pm4py.view_dfg(dfg, start_activities, end_activities)
|
||||||
|
|
||||||
|
#filtered_log = pm4py.filter_event_attribute_values(event_log, 'item', [80])
|
||||||
|
|
||||||
|
i_net, im, fm = pm4py.discover_petri_net_inductive(event_log)
|
||||||
|
pm4py.vis.view_petri_net(i_net, im, fm)
|
||||||
|
gviz = pn_visualizer.apply(i_net, im, fm, parameters=parameters,
|
||||||
|
variant=pn_visualizer.Variants.FREQUENCY,
|
||||||
|
log=event_log)
|
||||||
|
pn_visualizer.view(gviz)
|
||||||
|
|
||||||
|
len(i_net.places)
|
||||||
|
len(i_net.transitions)
|
||||||
|
len(i_net.arcs)
|
||||||
|
|
||||||
|
a_net, im, fm = pm4py.discover_petri_net_alpha(event_log)
|
||||||
|
pm4py.vis.view_petri_net(a_net, im, fm)
|
||||||
|
gviz = pn_visualizer.apply(a_net, im, fm, parameters=parameters,
|
||||||
|
variant=pn_visualizer.Variants.FREQUENCY,
|
||||||
|
log=event_log)
|
||||||
|
pn_visualizer.view(gviz)
|
||||||
|
|
||||||
|
|
||||||
|
len(a_net.places)
|
||||||
|
len(a_net.transitions)
|
||||||
|
len(a_net.arcs)
|
||||||
|
|
||||||
|
h_net, im, fm = pm4py.discover_petri_net_heuristics(filtered_log)
|
||||||
|
pm4py.vis.view_petri_net(h_net, im, fm)
|
||||||
|
|
||||||
|
len(h_net.places)
|
||||||
|
len(h_net.transitions)
|
||||||
|
len(h_net.arcs)
|
41
code/trace-clustering.py
Normal file
41
code/trace-clustering.py
Normal file
@ -0,0 +1,41 @@
|
|||||||
|
from sklearn.cluster import KMeans
|
||||||
|
|
||||||
|
import matplotlib.pyplot as plt
|
||||||
|
|
||||||
|
|
||||||
|
##### Clustering ######
|
||||||
|
|
||||||
|
## KMeans
|
||||||
|
|
||||||
|
#eval_artworks = eval_art[eval_art.nettype == "alldata"].iloc[:,range(1,5)]
|
||||||
|
eval_artworks = eval_art[eval_art.nettype == "subdata"].iloc[:,range(1,5)]
|
||||||
|
|
||||||
|
kmeans = KMeans(n_clusters=4, max_iter=1000).fit(eval_artworks)
|
||||||
|
|
||||||
|
#from sklearn.manifold import MDS
|
||||||
|
#coord = pd.DataFrame(MDS(normalized_stress='auto').fit_transform(eval_artworks))
|
||||||
|
|
||||||
|
coord = eval_artworks
|
||||||
|
coord["clusters"] = kmeans.labels_
|
||||||
|
|
||||||
|
for i in coord.clusters.unique():
|
||||||
|
#plt.scatter(coord[coord.clusters == i].iloc[:,0], coord[coord.clusters == i].iloc[:,1],
|
||||||
|
plt.scatter(coord[coord.clusters == i].iloc[:,1], coord[coord.clusters == i].iloc[:,2],
|
||||||
|
#plt.scatter(coord[coord.clusters == i].iloc[:,2], coord[coord.clusters == i].iloc[:,4],
|
||||||
|
label = i)
|
||||||
|
plt.legend()
|
||||||
|
plt.show()
|
||||||
|
|
||||||
|
### Scree plot
|
||||||
|
|
||||||
|
sse = {}
|
||||||
|
for k in range(1, 10):
|
||||||
|
kmeans = KMeans(n_clusters=k, max_iter=1000).fit(eval_artworks[["precision", "generalizability"]])
|
||||||
|
#data["clusters"] = kmeans.labels_
|
||||||
|
#print(data["clusters"])
|
||||||
|
sse[k] = kmeans.inertia_ # Inertia: Sum of distances of samples to their closest cluster center
|
||||||
|
plt.figure()
|
||||||
|
plt.plot(list(sse.keys()), list(sse.values()))
|
||||||
|
plt.xlabel("Number of clusters")
|
||||||
|
plt.ylabel("SSE")
|
||||||
|
plt.show()
|
Loading…
Reference in New Issue
Block a user