diff --git a/code/00_current-anaylsis.R b/code/00_current-anaylsis.R index 2db5af9..ec0fae9 100644 --- a/code/00_current-anaylsis.R +++ b/code/00_current-anaylsis.R @@ -29,7 +29,7 @@ library(bupaverse) #--------------- (1) Read data --------------- -dat <- read.table("../data/haum/event_logfiles_glossar_2023-10-29_10-26-42.csv", +dat <- read.table("../data/haum/event_logfiles_glossar_2023-11-03_17-46-28.csv", sep = ";", header = TRUE) dat$date <- as.POSIXct(dat$date) dat$date.start <- as.POSIXct(dat$date.start) @@ -312,10 +312,10 @@ table(tmp$artwork) # select the ones most often (I am aiming for 10...) barplot(table(tmp$artwork)) -abline(h = 12000, col = "red") -which(table(tmp$artwork) > 12000) +abline(h = 14000, col = "red") +which(table(tmp$artwork) > 14000) -often080 <- names(which(table(tmp$artwork) > 12000)) +often080 <- names(which(table(tmp$artwork) > 14000)) alog <- activitylog(dat[dat$artwork %in% often080, ], case_id = "case", diff --git a/code/01_preprocessing_haum.R b/code/01_preprocessing_haum.R index ea8e5f3..4df1f24 100644 --- a/code/01_preprocessing_haum.R +++ b/code/01_preprocessing_haum.R @@ -42,9 +42,9 @@ datraw <- read.table("../data/haum/raw_logfiles_2023-10-25_16-20-45.csv", datlogs <- create_eventlogs(datraw, xmlpath = "../data/haum/ContentEyevisit/eyevisit_cards_light/", - glossar = TRUE) + glossar = TRUE, save = TRUE) -artworks <- unique(na.omit(datlogs$artwork)) +artworks <- unique(datlogs$artwork) topics <- extract_topics(artworks, xmlfiles = paste0(artworks, ".xml"), xmlpath = "../data/haum/ContentEyevisit/eyevisit_cards_light/")