Worked on closing events and checking traces; needs more love still

This commit is contained in:
2023-10-18 12:57:15 +02:00
parent 9efa1d10a9
commit 5ab8496056
5 changed files with 60 additions and 16 deletions
+4 -5
View File
@@ -11,21 +11,20 @@ path <- "../data/haum/LogFiles/"
folders <- dir(path)
# parse raw log files
datraw <- parse_logfiles(folders, path)
#datraw <- parse_logfiles(folders, path)
datraw <- read.table("../data/haum/raw_logfiles_small_2023-09-26_13-50-20.csv",
sep = ";", header = TRUE)
# export data
write.table(datraw, paste0("../data/haum/raw_logfiles_", now, ".csv"),
sep = ";", row.names = FALSE)
#save(datraw, file = paste0("../data/haum/datraw_", now, ".RData"))
#load("../data/haum/datraw_2023-09-23_01-31-30.RData")
artworks <- unique(na.omit(datraw$artwork))
# convert to log events
datlogs <- create_eventlogs(datraw,
xmlpath = "../data/haum/ContentEyevisit/eyevisit_cards_light/")
artworks <- unique(na.omit(datlogs$artwork))
artworks <- artworks[!artworks %in% c("504", "505")]
topics <- extract_topics(artworks, xmlfiles = paste0(artworks, ".xml"),
xmlpath = "../data/haum/ContentEyevisit/eyevisit_cards_light/")
+3 -2
View File
@@ -47,7 +47,8 @@ for (i in seq_len(nrow(sf))) {
# data sets... Not important here, since I only do NI.
# load (small) event log data set
dat <- read.table("data/haum/event_logfiles_2023-09-23_01-31-30.csv",
#dat <- read.table("data/haum/event_logfiles_2023-09-23_01-31-30.csv",
dat <- read.table("data/haum/event_logfiles_small_2023-10-15_10-08-43.csv",
sep = ";", header = TRUE)
dat$date.start <- as.POSIXct(dat$date.start)
dat$date.stop <- as.POSIXct(dat$date.stop)
@@ -64,7 +65,7 @@ dat2 <- merge(dat1, sfdat, by = "date", all.x = TRUE)
## Export data
write.table(dat2,
file = "data/haum/event_logfiles_metadata_2023-09-23_01-31-30.csv",
file = "data/haum/event_logfiles_small_metadata_2023-10-15_10-08-43.csv",
sep = ";", row.names = FALSE)
# TODO: Maybe add infos about artworks?
+35 -5
View File
@@ -4,14 +4,14 @@
# dat0 <- read.table("../data/haum/event_logfiles_metadata_2023-09-23_01-31-30.csv",
# sep = ";", header = TRUE)
dat0 <- read.table("../data/haum/event_logfiles_small_metadata_2023-09-25_09-56-34.csv",
dat0 <- read.table("../data/haum/event_logfiles_small_metadata_2023-10-15_10-08-43.csv",
sep = ";", header = TRUE)
dat0$date <- as.Date(dat0$date)
dat0$date.start <- as.POSIXct(dat0$date.start)
dat0$date.stop <- as.POSIXct(dat0$date.stop)
dat0$artwork <- sprintf("%03d", dat0$artwork)
# TODO: Write a functions that closes events spanning different log files
# TODO: Write a function that closes events spanning different log files
# OR: Remove openTopic and OpenPopup events that do not start with a
# flipCard (AND openPopup events without openTopic event beforehand)
@@ -48,16 +48,46 @@ table(table(dat$start))
summary(aggregate(duration ~ trace, dat, mean))
# remove fragmented traces
tab <- xtabs( ~ trace + event, dat)
alog <- activitylog(dat,
fragments <- NULL
for (i in seq_len(nrow(tab))) {
if (tab[i, "openPopup"] != 0 & tab[i, "flipCard"] == 0) {
fragments <- c(fragments, rownames(tab)[i])
} else if (tab[i, "openTopic"] != 0 & tab[i, "flipCard"] == 0) {
fragments <- c(fragments, rownames(tab)[i])
} else if (tab[i, "openPopup"] != 0 & tab[i, "openTopic"] == 0) {
fragments <- c(fragments, rownames(tab)[i])
}
}
datrm <- dat[!dat$trace %in% fragments, ]
# TODO: Find trace that has flipCard --> openPopup --> openTopic
dato <- datrm[datrm$event != "move", ]
tmp <- lapply(unique(dato$trace), function(x) unique(dato[dato$trace == x, "event"]))
names(tmp) <- unique(dato$trace)
ids <- sapply(tmp, length) == 3
do.call(rbind, tmp[ids])
# TODO:
# fragmentary traces: for 4591 openTopic for topic 1 is in the raw log files, but gets
# probably removed in close_events(); how can I prevent that? How can I fix
# the traces and eventIds that do not match correctly ???
ct <- c(4591, 5937, 7080, 8412, 8279)
datrm[datrm$trace %in% ct, 1:10]
### WHY?????
alog <- activitylog(datrm,
case_id = "trace",
activity_id = "event",
#resource_id = "case",
resource_id = "artwork",
timestamps = c("start", "complete"))
# --> have not understood, yet, which ist what...
process_map(alog)
process_map(alog, frequency("relative"))
+4 -4
View File
@@ -2,10 +2,10 @@
# Read data
dat <- read.table("../data/haum/event_logfiles_metadata_2023-09-23_01-31-30.csv",
sep = ";", header = TRUE)
# dat <- read.table("../data/haum/event_logfiles_small_metadata_2023-09-25_09-56-34.csv",
# sep = ";", header = TRUE)
# dat <- read.table("../data/haum/event_logfiles_metadata_2023-09-23_01-31-30.csv",
# sep = ";", header = TRUE)
dat <- read.table("../data/haum/event_logfiles_small_metadata_2023-10-15_10-08-43.csv",
sep = ";", header = TRUE)
dat$date <- as.Date(dat$date)
dat$date.start <- as.POSIXct(dat$date.start)
dat$date.stop <- as.POSIXct(dat$date.stop)