Worked on closing events and checking traces; needs more love still
This commit is contained in:
parent
9efa1d10a9
commit
5ab8496056
14
README.Rmd
14
README.Rmd
@ -181,6 +181,20 @@ details.
|
|||||||
UPDATE: By now I remove all events that span more than one log file. This
|
UPDATE: By now I remove all events that span more than one log file. This
|
||||||
lets me improve speed considerably.
|
lets me improve speed considerably.
|
||||||
|
|
||||||
|
UPDATE: Infos from Philipp:
|
||||||
|
|
||||||
|
"Bin außerdem gerade den Code von damals durchgegangen. Das Logging läuft
|
||||||
|
so: Mit Start der Anwendung wird alle 10 Minuten ein neues Logfile
|
||||||
|
erstellt. Die Startzeit, von der aus die Duration berechnet wird, wird
|
||||||
|
jeweils neu gesetzt. Duration ist also nicht "Dauer seit Start der
|
||||||
|
Anwendung" sondern "Dauer seit Restart des Loggers". Deine Vermutung ist
|
||||||
|
also richtig - es sollte keine Durations >10 Minuten geben. Der erste
|
||||||
|
Eintrag eines Logfiles kann alles zwischen 0 und 10 Minuten sein (je
|
||||||
|
nachdem, ob der Tisch zum Zeitpunkt des neuen Logging-Intervalls in
|
||||||
|
Benutzung war). Wenn ein Case also über 2+ Logs verteilt ist, musst du auf
|
||||||
|
die Duration jeweils 10 Minuten pro Logfile nach dem ersten addieren, damit
|
||||||
|
es passt."
|
||||||
|
|
||||||
## Left padding of file IDs
|
## Left padding of file IDs
|
||||||
|
|
||||||
The file names of the raw log files are automatically generated and contain
|
The file names of the raw log files are automatically generated and contain
|
||||||
|
@ -11,21 +11,20 @@ path <- "../data/haum/LogFiles/"
|
|||||||
folders <- dir(path)
|
folders <- dir(path)
|
||||||
|
|
||||||
# parse raw log files
|
# parse raw log files
|
||||||
datraw <- parse_logfiles(folders, path)
|
#datraw <- parse_logfiles(folders, path)
|
||||||
|
datraw <- read.table("../data/haum/raw_logfiles_small_2023-09-26_13-50-20.csv",
|
||||||
|
sep = ";", header = TRUE)
|
||||||
|
|
||||||
# export data
|
# export data
|
||||||
write.table(datraw, paste0("../data/haum/raw_logfiles_", now, ".csv"),
|
write.table(datraw, paste0("../data/haum/raw_logfiles_", now, ".csv"),
|
||||||
sep = ";", row.names = FALSE)
|
sep = ";", row.names = FALSE)
|
||||||
#save(datraw, file = paste0("../data/haum/datraw_", now, ".RData"))
|
|
||||||
#load("../data/haum/datraw_2023-09-23_01-31-30.RData")
|
|
||||||
artworks <- unique(na.omit(datraw$artwork))
|
|
||||||
|
|
||||||
# convert to log events
|
# convert to log events
|
||||||
datlogs <- create_eventlogs(datraw,
|
datlogs <- create_eventlogs(datraw,
|
||||||
xmlpath = "../data/haum/ContentEyevisit/eyevisit_cards_light/")
|
xmlpath = "../data/haum/ContentEyevisit/eyevisit_cards_light/")
|
||||||
|
|
||||||
artworks <- unique(na.omit(datlogs$artwork))
|
artworks <- unique(na.omit(datlogs$artwork))
|
||||||
artworks <- artworks[!artworks %in% c("504", "505")]
|
artworks <- artworks[!artworks %in% c("504", "505")]
|
||||||
|
|
||||||
topics <- extract_topics(artworks, xmlfiles = paste0(artworks, ".xml"),
|
topics <- extract_topics(artworks, xmlfiles = paste0(artworks, ".xml"),
|
||||||
xmlpath = "../data/haum/ContentEyevisit/eyevisit_cards_light/")
|
xmlpath = "../data/haum/ContentEyevisit/eyevisit_cards_light/")
|
||||||
|
|
||||||
|
@ -47,7 +47,8 @@ for (i in seq_len(nrow(sf))) {
|
|||||||
# data sets... Not important here, since I only do NI.
|
# data sets... Not important here, since I only do NI.
|
||||||
|
|
||||||
# load (small) event log data set
|
# load (small) event log data set
|
||||||
dat <- read.table("data/haum/event_logfiles_2023-09-23_01-31-30.csv",
|
#dat <- read.table("data/haum/event_logfiles_2023-09-23_01-31-30.csv",
|
||||||
|
dat <- read.table("data/haum/event_logfiles_small_2023-10-15_10-08-43.csv",
|
||||||
sep = ";", header = TRUE)
|
sep = ";", header = TRUE)
|
||||||
dat$date.start <- as.POSIXct(dat$date.start)
|
dat$date.start <- as.POSIXct(dat$date.start)
|
||||||
dat$date.stop <- as.POSIXct(dat$date.stop)
|
dat$date.stop <- as.POSIXct(dat$date.stop)
|
||||||
@ -64,7 +65,7 @@ dat2 <- merge(dat1, sfdat, by = "date", all.x = TRUE)
|
|||||||
## Export data
|
## Export data
|
||||||
|
|
||||||
write.table(dat2,
|
write.table(dat2,
|
||||||
file = "data/haum/event_logfiles_metadata_2023-09-23_01-31-30.csv",
|
file = "data/haum/event_logfiles_small_metadata_2023-10-15_10-08-43.csv",
|
||||||
sep = ";", row.names = FALSE)
|
sep = ";", row.names = FALSE)
|
||||||
|
|
||||||
# TODO: Maybe add infos about artworks?
|
# TODO: Maybe add infos about artworks?
|
||||||
|
@ -4,14 +4,14 @@
|
|||||||
|
|
||||||
# dat0 <- read.table("../data/haum/event_logfiles_metadata_2023-09-23_01-31-30.csv",
|
# dat0 <- read.table("../data/haum/event_logfiles_metadata_2023-09-23_01-31-30.csv",
|
||||||
# sep = ";", header = TRUE)
|
# sep = ";", header = TRUE)
|
||||||
dat0 <- read.table("../data/haum/event_logfiles_small_metadata_2023-09-25_09-56-34.csv",
|
dat0 <- read.table("../data/haum/event_logfiles_small_metadata_2023-10-15_10-08-43.csv",
|
||||||
sep = ";", header = TRUE)
|
sep = ";", header = TRUE)
|
||||||
dat0$date <- as.Date(dat0$date)
|
dat0$date <- as.Date(dat0$date)
|
||||||
dat0$date.start <- as.POSIXct(dat0$date.start)
|
dat0$date.start <- as.POSIXct(dat0$date.start)
|
||||||
dat0$date.stop <- as.POSIXct(dat0$date.stop)
|
dat0$date.stop <- as.POSIXct(dat0$date.stop)
|
||||||
dat0$artwork <- sprintf("%03d", dat0$artwork)
|
dat0$artwork <- sprintf("%03d", dat0$artwork)
|
||||||
|
|
||||||
# TODO: Write a functions that closes events spanning different log files
|
# TODO: Write a function that closes events spanning different log files
|
||||||
# OR: Remove openTopic and OpenPopup events that do not start with a
|
# OR: Remove openTopic and OpenPopup events that do not start with a
|
||||||
# flipCard (AND openPopup events without openTopic event beforehand)
|
# flipCard (AND openPopup events without openTopic event beforehand)
|
||||||
|
|
||||||
@ -48,16 +48,46 @@ table(table(dat$start))
|
|||||||
|
|
||||||
summary(aggregate(duration ~ trace, dat, mean))
|
summary(aggregate(duration ~ trace, dat, mean))
|
||||||
|
|
||||||
|
# remove fragmented traces
|
||||||
|
tab <- xtabs( ~ trace + event, dat)
|
||||||
|
|
||||||
alog <- activitylog(dat,
|
fragments <- NULL
|
||||||
|
|
||||||
|
for (i in seq_len(nrow(tab))) {
|
||||||
|
if (tab[i, "openPopup"] != 0 & tab[i, "flipCard"] == 0) {
|
||||||
|
fragments <- c(fragments, rownames(tab)[i])
|
||||||
|
} else if (tab[i, "openTopic"] != 0 & tab[i, "flipCard"] == 0) {
|
||||||
|
fragments <- c(fragments, rownames(tab)[i])
|
||||||
|
} else if (tab[i, "openPopup"] != 0 & tab[i, "openTopic"] == 0) {
|
||||||
|
fragments <- c(fragments, rownames(tab)[i])
|
||||||
|
}
|
||||||
|
}
|
||||||
|
datrm <- dat[!dat$trace %in% fragments, ]
|
||||||
|
|
||||||
|
# TODO: Find trace that has flipCard --> openPopup --> openTopic
|
||||||
|
dato <- datrm[datrm$event != "move", ]
|
||||||
|
tmp <- lapply(unique(dato$trace), function(x) unique(dato[dato$trace == x, "event"]))
|
||||||
|
names(tmp) <- unique(dato$trace)
|
||||||
|
|
||||||
|
ids <- sapply(tmp, length) == 3
|
||||||
|
do.call(rbind, tmp[ids])
|
||||||
|
|
||||||
|
# TODO:
|
||||||
|
# fragmentary traces: for 4591 openTopic for topic 1 is in the raw log files, but gets
|
||||||
|
# probably removed in close_events(); how can I prevent that? How can I fix
|
||||||
|
# the traces and eventIds that do not match correctly ???
|
||||||
|
ct <- c(4591, 5937, 7080, 8412, 8279)
|
||||||
|
datrm[datrm$trace %in% ct, 1:10]
|
||||||
|
|
||||||
|
### WHY?????
|
||||||
|
|
||||||
|
alog <- activitylog(datrm,
|
||||||
case_id = "trace",
|
case_id = "trace",
|
||||||
activity_id = "event",
|
activity_id = "event",
|
||||||
#resource_id = "case",
|
#resource_id = "case",
|
||||||
resource_id = "artwork",
|
resource_id = "artwork",
|
||||||
timestamps = c("start", "complete"))
|
timestamps = c("start", "complete"))
|
||||||
|
|
||||||
# --> have not understood, yet, which ist what...
|
|
||||||
|
|
||||||
process_map(alog)
|
process_map(alog)
|
||||||
|
|
||||||
process_map(alog, frequency("relative"))
|
process_map(alog, frequency("relative"))
|
||||||
|
@ -2,10 +2,10 @@
|
|||||||
|
|
||||||
# Read data
|
# Read data
|
||||||
|
|
||||||
dat <- read.table("../data/haum/event_logfiles_metadata_2023-09-23_01-31-30.csv",
|
# dat <- read.table("../data/haum/event_logfiles_metadata_2023-09-23_01-31-30.csv",
|
||||||
sep = ";", header = TRUE)
|
# sep = ";", header = TRUE)
|
||||||
# dat <- read.table("../data/haum/event_logfiles_small_metadata_2023-09-25_09-56-34.csv",
|
dat <- read.table("../data/haum/event_logfiles_small_metadata_2023-10-15_10-08-43.csv",
|
||||||
# sep = ";", header = TRUE)
|
sep = ";", header = TRUE)
|
||||||
dat$date <- as.Date(dat$date)
|
dat$date <- as.Date(dat$date)
|
||||||
dat$date.start <- as.POSIXct(dat$date.start)
|
dat$date.start <- as.POSIXct(dat$date.start)
|
||||||
dat$date.stop <- as.POSIXct(dat$date.stop)
|
dat$date.stop <- as.POSIXct(dat$date.stop)
|
||||||
|
Loading…
Reference in New Issue
Block a user