diff --git a/code/00_current-analysis.R b/code/00_current-analysis.R index d66cbfc..b87e05a 100644 --- a/code/00_current-analysis.R +++ b/code/00_current-analysis.R @@ -9,7 +9,7 @@ # (3.4) Artwork sequences # (3.5) Topics # -# input: results/haum/event_logfiles_glossar_2023-10-29_10-26-42.csv +# input: results/haum/event_logfiles_glossar_2023-12-28_09-49-43.csv # output: # # last mod: 2023-11-15, NW @@ -27,16 +27,25 @@ library(bupaverse) #--------------- (1) Read data --------------- -dat <- read.table("results/haum/event_logfiles_glossar_2023-11-03_17-46-28.csv", - sep = ";", header = TRUE) -dat$date <- as.POSIXct(dat$date) -dat$date.start <- as.POSIXct(dat$date.start) -dat$date.stop <- as.POSIXct(dat$date.stop) -dat$artwork <- sprintf("%03d", dat$artwork) +dat <- read.table("results/haum/event_logfiles_glossar_2023-12-28_09-49-43.csv", + sep = ";", header = TRUE, + colClasses = c("POSIXct", "character", "integer", + "integer", "numeric", "integer", + "character", "character", "character", + "character", "POSIXct", "POSIXct", + "numeric", "numeric", "numeric", + "integer", "character", + rep("numeric", 11), "integer", + "character", "character", "logical", + "logical", "logical", "character", + "character")) + +dat$date <- NULL +# TODO: Remove, after rerunning preprocessing + dat$event <- factor(dat$event, levels = c("move", "flipCard", "openTopic", "openPopup")) # Add weekdays to data frame - dat$weekdays <- factor(weekdays(dat$date.start), levels = c("Montag", "Dienstag", "Mittwoch", "Donnerstag", "Freitag", "Samstag", diff --git a/code/00_pm.py b/code/00_pm.py index 206b450..1e8166d 100644 --- a/code/00_pm.py +++ b/code/00_pm.py @@ -18,6 +18,8 @@ dat = dat[dat.date < "2020-03-13"] event_log = pm4py.format_dataframe(dat, case_id='trace', activity_key='event', timestamp_key='date.start') +# event_log = pm4py.format_dataframe(dat, case_id='trace', activity_key='event', +# timestamp_key='date.stop', start_timestamp_key='date.start') event_log = event_log.rename(columns={'artwork': 'case:artwork'}) #event_log = pm4py.convert_to_event_log(dat_log) # deprecated @@ -48,6 +50,15 @@ pm4py.vis.save_vis_petri_net(net, im, fm, "../figures/processmaps/pn_heuristics_ is_sound = pm4py.check_soundness(net, im, fm) is_sound[0] +len(is_sound[1]["s_c_net"].arcs) +# 46 +len(is_sound[1]["s_c_net"].transitions) +# 23 +len(is_sound[1]["s_c_net"].places) +# 10 + + + # decorated petri net from pm4py.visualization.petri_net import visualizer as pn_visualizer parameters = {pn_visualizer.Variants.FREQUENCY.value.Parameters.FORMAT: "png"} @@ -67,6 +78,10 @@ pm4py.vis.save_vis_petri_net(net, im, fm, "../figures/processmaps/pn_alpha_compl is_sound = pm4py.check_soundness(net, im, fm) is_sound[0] +len(is_sound[1]["s_c_net"].arcs) +len(is_sound[1]["s_c_net"].transitions) +len(is_sound[1]["s_c_net"].places) + ## Inductive Miner net, im, fm = pm4py.discover_petri_net_inductive(event_log) i_eval = eval_pm(event_log, net, im, fm) @@ -99,9 +114,6 @@ eval.to_csv("results/eval_all-miners_complete.csv", sep=";") ###### Process Mining - individual artworks ###### -net, im, fm = pm4py.discover_petri_net_heuristics(event_log) -#net, im, fm = pm4py.discover_petri_net_inductive(event_log) - def pm_artworks(miner): retval1 = np.empty((len(event_log["case:artwork"].unique()), 4)) @@ -129,8 +141,8 @@ def pm_artworks(miner): subnet, subim, subfm = pm4py.discover_petri_net_alpha(subdata) elif miner == "ilp": subnet, subim, subfm = pm4py.discover_petri_net_ilp(subdata) - pm4py.save_vis_petri_net(subnet, subim, subfm, - "../figures/processmaps/artworks/petrinet_" + miner + "_" + str(artwork).zfill(3) + ".png") + #pm4py.save_vis_petri_net(subnet, subim, subfm, + # "../figures/processmaps/artworks/petrinet_" + miner + "_" + str(artwork).zfill(3) + ".png") retval1[i] = eval_pm(subdata, net, im, fm) retval2[i] = eval_pm(subdata, subnet, subim, subfm) @@ -149,17 +161,21 @@ for miner in ["heuristics", "inductive", "alpha", "ilp"]: eval_art = pm_artworks(miner = miner) eval_art.to_csv("results/eval_artworks_" + miner + ".csv", sep=";") +eval_art = pm_artworks(miner = "inductive") ##### Clustering ###### ## KMeans -kmeans = KMeans(n_clusters=4, max_iter=1000).fit(eval_art) +#eval_artworks = eval_art[eval_art.nettype == "alldata"].iloc[:,range(1,5)] +eval_artworks = eval_art[eval_art.nettype == "subdata"].iloc[:,range(1,5)] + +kmeans = KMeans(n_clusters=4, max_iter=1000).fit(eval_artworks) #from sklearn.manifold import MDS -#coord = pd.DataFrame(MDS(normalized_stress='auto').fit_transform(eval_art)) +#coord = pd.DataFrame(MDS(normalized_stress='auto').fit_transform(eval_artworks)) -coord = eval_art +coord = eval_artworks coord["clusters"] = kmeans.labels_ for i in coord.clusters.unique(): @@ -174,7 +190,7 @@ plt.show() sse = {} for k in range(1, 10): - kmeans = KMeans(n_clusters=k, max_iter=1000).fit(eval_art[["precision", "generalizability"]]) + kmeans = KMeans(n_clusters=k, max_iter=1000).fit(eval_artworks[["precision", "generalizability"]]) #data["clusters"] = kmeans.labels_ #print(data["clusters"]) sse[k] = kmeans.inertia_ # Inertia: Sum of distances of samples to their closest cluster center @@ -183,3 +199,4 @@ plt.plot(list(sse.keys()), list(sse.values())) plt.xlabel("Number of clusters") plt.ylabel("SSE") plt.show() +