Worked on closing events and checking traces; needs more love still

This commit is contained in:
Nora Wickelmaier 2023-10-18 12:57:15 +02:00
parent 9efa1d10a9
commit 5ab8496056
5 changed files with 60 additions and 16 deletions

View File

@ -181,6 +181,20 @@ details.
UPDATE: By now I remove all events that span more than one log file. This UPDATE: By now I remove all events that span more than one log file. This
lets me improve speed considerably. lets me improve speed considerably.
UPDATE: Infos from Philipp:
"Bin außerdem gerade den Code von damals durchgegangen. Das Logging läuft
so: Mit Start der Anwendung wird alle 10 Minuten ein neues Logfile
erstellt. Die Startzeit, von der aus die Duration berechnet wird, wird
jeweils neu gesetzt. Duration ist also nicht "Dauer seit Start der
Anwendung" sondern "Dauer seit Restart des Loggers". Deine Vermutung ist
also richtig - es sollte keine Durations >10 Minuten geben. Der erste
Eintrag eines Logfiles kann alles zwischen 0 und 10 Minuten sein (je
nachdem, ob der Tisch zum Zeitpunkt des neuen Logging-Intervalls in
Benutzung war). Wenn ein Case also über 2+ Logs verteilt ist, musst du auf
die Duration jeweils 10 Minuten pro Logfile nach dem ersten addieren, damit
es passt."
## Left padding of file IDs ## Left padding of file IDs
The file names of the raw log files are automatically generated and contain The file names of the raw log files are automatically generated and contain

View File

@ -11,21 +11,20 @@ path <- "../data/haum/LogFiles/"
folders <- dir(path) folders <- dir(path)
# parse raw log files # parse raw log files
datraw <- parse_logfiles(folders, path) #datraw <- parse_logfiles(folders, path)
datraw <- read.table("../data/haum/raw_logfiles_small_2023-09-26_13-50-20.csv",
sep = ";", header = TRUE)
# export data # export data
write.table(datraw, paste0("../data/haum/raw_logfiles_", now, ".csv"), write.table(datraw, paste0("../data/haum/raw_logfiles_", now, ".csv"),
sep = ";", row.names = FALSE) sep = ";", row.names = FALSE)
#save(datraw, file = paste0("../data/haum/datraw_", now, ".RData"))
#load("../data/haum/datraw_2023-09-23_01-31-30.RData")
artworks <- unique(na.omit(datraw$artwork))
# convert to log events # convert to log events
datlogs <- create_eventlogs(datraw, datlogs <- create_eventlogs(datraw,
xmlpath = "../data/haum/ContentEyevisit/eyevisit_cards_light/") xmlpath = "../data/haum/ContentEyevisit/eyevisit_cards_light/")
artworks <- unique(na.omit(datlogs$artwork)) artworks <- unique(na.omit(datlogs$artwork))
artworks <- artworks[!artworks %in% c("504", "505")] artworks <- artworks[!artworks %in% c("504", "505")]
topics <- extract_topics(artworks, xmlfiles = paste0(artworks, ".xml"), topics <- extract_topics(artworks, xmlfiles = paste0(artworks, ".xml"),
xmlpath = "../data/haum/ContentEyevisit/eyevisit_cards_light/") xmlpath = "../data/haum/ContentEyevisit/eyevisit_cards_light/")

View File

@ -47,7 +47,8 @@ for (i in seq_len(nrow(sf))) {
# data sets... Not important here, since I only do NI. # data sets... Not important here, since I only do NI.
# load (small) event log data set # load (small) event log data set
dat <- read.table("data/haum/event_logfiles_2023-09-23_01-31-30.csv", #dat <- read.table("data/haum/event_logfiles_2023-09-23_01-31-30.csv",
dat <- read.table("data/haum/event_logfiles_small_2023-10-15_10-08-43.csv",
sep = ";", header = TRUE) sep = ";", header = TRUE)
dat$date.start <- as.POSIXct(dat$date.start) dat$date.start <- as.POSIXct(dat$date.start)
dat$date.stop <- as.POSIXct(dat$date.stop) dat$date.stop <- as.POSIXct(dat$date.stop)
@ -64,7 +65,7 @@ dat2 <- merge(dat1, sfdat, by = "date", all.x = TRUE)
## Export data ## Export data
write.table(dat2, write.table(dat2,
file = "data/haum/event_logfiles_metadata_2023-09-23_01-31-30.csv", file = "data/haum/event_logfiles_small_metadata_2023-10-15_10-08-43.csv",
sep = ";", row.names = FALSE) sep = ";", row.names = FALSE)
# TODO: Maybe add infos about artworks? # TODO: Maybe add infos about artworks?

View File

@ -4,14 +4,14 @@
# dat0 <- read.table("../data/haum/event_logfiles_metadata_2023-09-23_01-31-30.csv", # dat0 <- read.table("../data/haum/event_logfiles_metadata_2023-09-23_01-31-30.csv",
# sep = ";", header = TRUE) # sep = ";", header = TRUE)
dat0 <- read.table("../data/haum/event_logfiles_small_metadata_2023-09-25_09-56-34.csv", dat0 <- read.table("../data/haum/event_logfiles_small_metadata_2023-10-15_10-08-43.csv",
sep = ";", header = TRUE) sep = ";", header = TRUE)
dat0$date <- as.Date(dat0$date) dat0$date <- as.Date(dat0$date)
dat0$date.start <- as.POSIXct(dat0$date.start) dat0$date.start <- as.POSIXct(dat0$date.start)
dat0$date.stop <- as.POSIXct(dat0$date.stop) dat0$date.stop <- as.POSIXct(dat0$date.stop)
dat0$artwork <- sprintf("%03d", dat0$artwork) dat0$artwork <- sprintf("%03d", dat0$artwork)
# TODO: Write a functions that closes events spanning different log files # TODO: Write a function that closes events spanning different log files
# OR: Remove openTopic and OpenPopup events that do not start with a # OR: Remove openTopic and OpenPopup events that do not start with a
# flipCard (AND openPopup events without openTopic event beforehand) # flipCard (AND openPopup events without openTopic event beforehand)
@ -48,16 +48,46 @@ table(table(dat$start))
summary(aggregate(duration ~ trace, dat, mean)) summary(aggregate(duration ~ trace, dat, mean))
# remove fragmented traces
tab <- xtabs( ~ trace + event, dat)
alog <- activitylog(dat, fragments <- NULL
for (i in seq_len(nrow(tab))) {
if (tab[i, "openPopup"] != 0 & tab[i, "flipCard"] == 0) {
fragments <- c(fragments, rownames(tab)[i])
} else if (tab[i, "openTopic"] != 0 & tab[i, "flipCard"] == 0) {
fragments <- c(fragments, rownames(tab)[i])
} else if (tab[i, "openPopup"] != 0 & tab[i, "openTopic"] == 0) {
fragments <- c(fragments, rownames(tab)[i])
}
}
datrm <- dat[!dat$trace %in% fragments, ]
# TODO: Find trace that has flipCard --> openPopup --> openTopic
dato <- datrm[datrm$event != "move", ]
tmp <- lapply(unique(dato$trace), function(x) unique(dato[dato$trace == x, "event"]))
names(tmp) <- unique(dato$trace)
ids <- sapply(tmp, length) == 3
do.call(rbind, tmp[ids])
# TODO:
# fragmentary traces: for 4591 openTopic for topic 1 is in the raw log files, but gets
# probably removed in close_events(); how can I prevent that? How can I fix
# the traces and eventIds that do not match correctly ???
ct <- c(4591, 5937, 7080, 8412, 8279)
datrm[datrm$trace %in% ct, 1:10]
### WHY?????
alog <- activitylog(datrm,
case_id = "trace", case_id = "trace",
activity_id = "event", activity_id = "event",
#resource_id = "case", #resource_id = "case",
resource_id = "artwork", resource_id = "artwork",
timestamps = c("start", "complete")) timestamps = c("start", "complete"))
# --> have not understood, yet, which ist what...
process_map(alog) process_map(alog)
process_map(alog, frequency("relative")) process_map(alog, frequency("relative"))

View File

@ -2,10 +2,10 @@
# Read data # Read data
dat <- read.table("../data/haum/event_logfiles_metadata_2023-09-23_01-31-30.csv", # dat <- read.table("../data/haum/event_logfiles_metadata_2023-09-23_01-31-30.csv",
sep = ";", header = TRUE)
# dat <- read.table("../data/haum/event_logfiles_small_metadata_2023-09-25_09-56-34.csv",
# sep = ";", header = TRUE) # sep = ";", header = TRUE)
dat <- read.table("../data/haum/event_logfiles_small_metadata_2023-10-15_10-08-43.csv",
sep = ";", header = TRUE)
dat$date <- as.Date(dat$date) dat$date <- as.Date(dat$date)
dat$date.start <- as.POSIXct(dat$date.start) dat$date.start <- as.POSIXct(dat$date.start)
dat$date.stop <- as.POSIXct(dat$date.stop) dat$date.stop <- as.POSIXct(dat$date.stop)