From fb35fcfe4af3780fb6cef59d41994c4f9b0a23e9 Mon Sep 17 00:00:00 2001 From: nwickel Date: Mon, 6 Nov 2023 09:15:03 +0100 Subject: [PATCH] Updated analysis and slides after fixing add_case --- code/00_current-anaylsis.R | 8 ++++---- code/01_preprocessing_haum.R | 4 ++-- 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/code/00_current-anaylsis.R b/code/00_current-anaylsis.R index 2db5af9..ec0fae9 100644 --- a/code/00_current-anaylsis.R +++ b/code/00_current-anaylsis.R @@ -29,7 +29,7 @@ library(bupaverse) #--------------- (1) Read data --------------- -dat <- read.table("../data/haum/event_logfiles_glossar_2023-10-29_10-26-42.csv", +dat <- read.table("../data/haum/event_logfiles_glossar_2023-11-03_17-46-28.csv", sep = ";", header = TRUE) dat$date <- as.POSIXct(dat$date) dat$date.start <- as.POSIXct(dat$date.start) @@ -312,10 +312,10 @@ table(tmp$artwork) # select the ones most often (I am aiming for 10...) barplot(table(tmp$artwork)) -abline(h = 12000, col = "red") -which(table(tmp$artwork) > 12000) +abline(h = 14000, col = "red") +which(table(tmp$artwork) > 14000) -often080 <- names(which(table(tmp$artwork) > 12000)) +often080 <- names(which(table(tmp$artwork) > 14000)) alog <- activitylog(dat[dat$artwork %in% often080, ], case_id = "case", diff --git a/code/01_preprocessing_haum.R b/code/01_preprocessing_haum.R index ea8e5f3..4df1f24 100644 --- a/code/01_preprocessing_haum.R +++ b/code/01_preprocessing_haum.R @@ -42,9 +42,9 @@ datraw <- read.table("../data/haum/raw_logfiles_2023-10-25_16-20-45.csv", datlogs <- create_eventlogs(datraw, xmlpath = "../data/haum/ContentEyevisit/eyevisit_cards_light/", - glossar = TRUE) + glossar = TRUE, save = TRUE) -artworks <- unique(na.omit(datlogs$artwork)) +artworks <- unique(datlogs$artwork) topics <- extract_topics(artworks, xmlfiles = paste0(artworks, ".xml"), xmlpath = "../data/haum/ContentEyevisit/eyevisit_cards_light/")