Worked on closing events and checking traces; needs more love still
This commit is contained in:
parent
9efa1d10a9
commit
5ab8496056
14
README.Rmd
14
README.Rmd
@ -181,6 +181,20 @@ details.
|
||||
UPDATE: By now I remove all events that span more than one log file. This
|
||||
lets me improve speed considerably.
|
||||
|
||||
UPDATE: Infos from Philipp:
|
||||
|
||||
"Bin außerdem gerade den Code von damals durchgegangen. Das Logging läuft
|
||||
so: Mit Start der Anwendung wird alle 10 Minuten ein neues Logfile
|
||||
erstellt. Die Startzeit, von der aus die Duration berechnet wird, wird
|
||||
jeweils neu gesetzt. Duration ist also nicht "Dauer seit Start der
|
||||
Anwendung" sondern "Dauer seit Restart des Loggers". Deine Vermutung ist
|
||||
also richtig - es sollte keine Durations >10 Minuten geben. Der erste
|
||||
Eintrag eines Logfiles kann alles zwischen 0 und 10 Minuten sein (je
|
||||
nachdem, ob der Tisch zum Zeitpunkt des neuen Logging-Intervalls in
|
||||
Benutzung war). Wenn ein Case also über 2+ Logs verteilt ist, musst du auf
|
||||
die Duration jeweils 10 Minuten pro Logfile nach dem ersten addieren, damit
|
||||
es passt."
|
||||
|
||||
## Left padding of file IDs
|
||||
|
||||
The file names of the raw log files are automatically generated and contain
|
||||
|
@ -11,21 +11,20 @@ path <- "../data/haum/LogFiles/"
|
||||
folders <- dir(path)
|
||||
|
||||
# parse raw log files
|
||||
datraw <- parse_logfiles(folders, path)
|
||||
#datraw <- parse_logfiles(folders, path)
|
||||
datraw <- read.table("../data/haum/raw_logfiles_small_2023-09-26_13-50-20.csv",
|
||||
sep = ";", header = TRUE)
|
||||
|
||||
# export data
|
||||
write.table(datraw, paste0("../data/haum/raw_logfiles_", now, ".csv"),
|
||||
sep = ";", row.names = FALSE)
|
||||
#save(datraw, file = paste0("../data/haum/datraw_", now, ".RData"))
|
||||
#load("../data/haum/datraw_2023-09-23_01-31-30.RData")
|
||||
artworks <- unique(na.omit(datraw$artwork))
|
||||
|
||||
# convert to log events
|
||||
datlogs <- create_eventlogs(datraw,
|
||||
xmlpath = "../data/haum/ContentEyevisit/eyevisit_cards_light/")
|
||||
|
||||
artworks <- unique(na.omit(datlogs$artwork))
|
||||
artworks <- artworks[!artworks %in% c("504", "505")]
|
||||
|
||||
topics <- extract_topics(artworks, xmlfiles = paste0(artworks, ".xml"),
|
||||
xmlpath = "../data/haum/ContentEyevisit/eyevisit_cards_light/")
|
||||
|
||||
|
@ -47,7 +47,8 @@ for (i in seq_len(nrow(sf))) {
|
||||
# data sets... Not important here, since I only do NI.
|
||||
|
||||
# load (small) event log data set
|
||||
dat <- read.table("data/haum/event_logfiles_2023-09-23_01-31-30.csv",
|
||||
#dat <- read.table("data/haum/event_logfiles_2023-09-23_01-31-30.csv",
|
||||
dat <- read.table("data/haum/event_logfiles_small_2023-10-15_10-08-43.csv",
|
||||
sep = ";", header = TRUE)
|
||||
dat$date.start <- as.POSIXct(dat$date.start)
|
||||
dat$date.stop <- as.POSIXct(dat$date.stop)
|
||||
@ -64,7 +65,7 @@ dat2 <- merge(dat1, sfdat, by = "date", all.x = TRUE)
|
||||
## Export data
|
||||
|
||||
write.table(dat2,
|
||||
file = "data/haum/event_logfiles_metadata_2023-09-23_01-31-30.csv",
|
||||
file = "data/haum/event_logfiles_small_metadata_2023-10-15_10-08-43.csv",
|
||||
sep = ";", row.names = FALSE)
|
||||
|
||||
# TODO: Maybe add infos about artworks?
|
||||
|
@ -4,14 +4,14 @@
|
||||
|
||||
# dat0 <- read.table("../data/haum/event_logfiles_metadata_2023-09-23_01-31-30.csv",
|
||||
# sep = ";", header = TRUE)
|
||||
dat0 <- read.table("../data/haum/event_logfiles_small_metadata_2023-09-25_09-56-34.csv",
|
||||
dat0 <- read.table("../data/haum/event_logfiles_small_metadata_2023-10-15_10-08-43.csv",
|
||||
sep = ";", header = TRUE)
|
||||
dat0$date <- as.Date(dat0$date)
|
||||
dat0$date.start <- as.POSIXct(dat0$date.start)
|
||||
dat0$date.stop <- as.POSIXct(dat0$date.stop)
|
||||
dat0$artwork <- sprintf("%03d", dat0$artwork)
|
||||
|
||||
# TODO: Write a functions that closes events spanning different log files
|
||||
# TODO: Write a function that closes events spanning different log files
|
||||
# OR: Remove openTopic and OpenPopup events that do not start with a
|
||||
# flipCard (AND openPopup events without openTopic event beforehand)
|
||||
|
||||
@ -48,16 +48,46 @@ table(table(dat$start))
|
||||
|
||||
summary(aggregate(duration ~ trace, dat, mean))
|
||||
|
||||
# remove fragmented traces
|
||||
tab <- xtabs( ~ trace + event, dat)
|
||||
|
||||
alog <- activitylog(dat,
|
||||
fragments <- NULL
|
||||
|
||||
for (i in seq_len(nrow(tab))) {
|
||||
if (tab[i, "openPopup"] != 0 & tab[i, "flipCard"] == 0) {
|
||||
fragments <- c(fragments, rownames(tab)[i])
|
||||
} else if (tab[i, "openTopic"] != 0 & tab[i, "flipCard"] == 0) {
|
||||
fragments <- c(fragments, rownames(tab)[i])
|
||||
} else if (tab[i, "openPopup"] != 0 & tab[i, "openTopic"] == 0) {
|
||||
fragments <- c(fragments, rownames(tab)[i])
|
||||
}
|
||||
}
|
||||
datrm <- dat[!dat$trace %in% fragments, ]
|
||||
|
||||
# TODO: Find trace that has flipCard --> openPopup --> openTopic
|
||||
dato <- datrm[datrm$event != "move", ]
|
||||
tmp <- lapply(unique(dato$trace), function(x) unique(dato[dato$trace == x, "event"]))
|
||||
names(tmp) <- unique(dato$trace)
|
||||
|
||||
ids <- sapply(tmp, length) == 3
|
||||
do.call(rbind, tmp[ids])
|
||||
|
||||
# TODO:
|
||||
# fragmentary traces: for 4591 openTopic for topic 1 is in the raw log files, but gets
|
||||
# probably removed in close_events(); how can I prevent that? How can I fix
|
||||
# the traces and eventIds that do not match correctly ???
|
||||
ct <- c(4591, 5937, 7080, 8412, 8279)
|
||||
datrm[datrm$trace %in% ct, 1:10]
|
||||
|
||||
### WHY?????
|
||||
|
||||
alog <- activitylog(datrm,
|
||||
case_id = "trace",
|
||||
activity_id = "event",
|
||||
#resource_id = "case",
|
||||
resource_id = "artwork",
|
||||
timestamps = c("start", "complete"))
|
||||
|
||||
# --> have not understood, yet, which ist what...
|
||||
|
||||
process_map(alog)
|
||||
|
||||
process_map(alog, frequency("relative"))
|
||||
|
@ -2,10 +2,10 @@
|
||||
|
||||
# Read data
|
||||
|
||||
dat <- read.table("../data/haum/event_logfiles_metadata_2023-09-23_01-31-30.csv",
|
||||
sep = ";", header = TRUE)
|
||||
# dat <- read.table("../data/haum/event_logfiles_small_metadata_2023-09-25_09-56-34.csv",
|
||||
# sep = ";", header = TRUE)
|
||||
# dat <- read.table("../data/haum/event_logfiles_metadata_2023-09-23_01-31-30.csv",
|
||||
# sep = ";", header = TRUE)
|
||||
dat <- read.table("../data/haum/event_logfiles_small_metadata_2023-10-15_10-08-43.csv",
|
||||
sep = ";", header = TRUE)
|
||||
dat$date <- as.Date(dat$date)
|
||||
dat$date.start <- as.POSIXct(dat$date.start)
|
||||
dat$date.stop <- as.POSIXct(dat$date.stop)
|
||||
|
Loading…
Reference in New Issue
Block a user