Tried out current state of mtt; parsing for 8o8m still not working correctly
This commit is contained in:
parent
d4e9676dd3
commit
fb448f01a9
@ -10,17 +10,26 @@ folders <- dir("../data/8o8m/LogFiles/")
|
||||
|
||||
# parse raw log files
|
||||
datraw <- parse_logfiles(folders, path = "../data/8o8m/LogFiles/")
|
||||
artworks <- unique(na.omit(datraw$artwork))
|
||||
#artworks <- unique(na.omit(datraw$artwork))
|
||||
|
||||
# export data
|
||||
write.table(datraw, paste0("../data/8o8m/raw_logfiles_", now, ".csv"),
|
||||
sep = ";", row.names = FALSE)
|
||||
|
||||
datraw2 <- datraw[!is.na(datraw$artwork), ]
|
||||
#datraw[is.na(datraw$artwork), ]
|
||||
datraw <- datraw[!is.na(datraw$artwork), ]
|
||||
# TODO: Why is this happening?
|
||||
|
||||
# convert to log events
|
||||
datlogs <- create_eventlogs(datraw2, xmlpath = "../data/8o8m/Content8o8m/")
|
||||
datlogs <- create_eventlogs(datraw, xmlpath = "../data/8o8m/Content8o8m/")
|
||||
|
||||
artworks <- unique(datlogs$artwork)
|
||||
|
||||
|
||||
topics <- extract_topics(artworks, xmlfiles = paste0(artworks, "_en.xml"),
|
||||
xmlpath = "../data/8o8m/Content8o8m/")
|
||||
# TODO: What is wrong with the German XML files that the topics are
|
||||
# extracted like this? (It works fine for the English versions...)
|
||||
|
||||
# export data
|
||||
write.table(datlogs, paste0("../data/8o8m/event_logfiles_", now, ".csv"),
|
||||
|
@ -24,20 +24,25 @@ now <- format(Sys.time(), "%Y-%m-%d_%H-%M-%S")
|
||||
|
||||
#--------------- (1) Parse raw log files ---------------
|
||||
|
||||
path <- "../data/haum/LogFiles/"
|
||||
folders <- dir(path)
|
||||
#path <- "../data/haum/LogFiles/"
|
||||
#folders <- dir(path)
|
||||
#folders <- "2016"
|
||||
|
||||
datraw <- parse_logfiles(folders, path)
|
||||
#datraw <- parse_logfiles(folders, path)
|
||||
|
||||
datraw <- read.table("../data/haum/raw_logfiles_2023-10-25_16-20-45.csv",
|
||||
sep = ";", header = TRUE)
|
||||
|
||||
## Export data
|
||||
|
||||
write.table(datraw, paste0("../data/haum/raw_logfiles_", now, ".csv"),
|
||||
sep = ";", row.names = FALSE)
|
||||
#write.table(datraw, paste0("../data/haum/raw_logfiles_small_", now, ".csv"),
|
||||
# sep = ";", row.names = FALSE)
|
||||
|
||||
#--------------- (2) Create event logs ---------------
|
||||
|
||||
datlogs <- create_eventlogs(datraw,
|
||||
xmlpath = "../data/haum/ContentEyevisit/eyevisit_cards_light/")
|
||||
xmlpath = "../data/haum/ContentEyevisit/eyevisit_cards_light/",
|
||||
glossar = TRUE)
|
||||
|
||||
artworks <- unique(na.omit(datlogs$artwork))
|
||||
topics <- extract_topics(artworks, xmlfiles = paste0(artworks, ".xml"),
|
||||
@ -95,12 +100,14 @@ for (i in seq_len(nrow(sf))) {
|
||||
|
||||
## Merge data
|
||||
|
||||
dat1 <- merge(datlogs_topics, hd, by.x = "date.start", by.y = "date", all.x = TRUE)
|
||||
dat2 <- merge(dat1, sfdat, by.x = "date.start", by.y = "date", all.x = TRUE)
|
||||
datlogs_topics$date <- as.Date(datlogs_topics$date.start)
|
||||
|
||||
dat1 <- merge(datlogs_topics, hd, by.x = "date", by.y = "date", all.x = TRUE)
|
||||
dat2 <- merge(dat1, sfdat, by.x = "date", by.y = "date", all.x = TRUE)
|
||||
|
||||
## Export data
|
||||
|
||||
write.table(dat2, paste0("../data/haum/event_logfiles_", now, ".csv"),
|
||||
write.table(dat2, paste0("../data/haum/event_logfiles_glossar_", now, ".csv"),
|
||||
sep = ";", row.names = FALSE)
|
||||
|
||||
# TODO: Maybe add infos about artworks?
|
||||
|
@ -2,22 +2,14 @@
|
||||
|
||||
# Read data
|
||||
|
||||
# dat0 <- read.table("../data/haum/event_logfiles_metadata_2023-09-23_01-31-30.csv",
|
||||
# sep = ";", header = TRUE)
|
||||
dat0 <- read.table("../data/haum/event_logfiles_small_metadata_2023-10-19_18-25-26.csv",
|
||||
dat0 <- read.table("../data/haum/event_logfiles_2023-10-25_17-29-52.csv",
|
||||
sep = ";", header = TRUE)
|
||||
dat0$date <- as.Date(dat0$date)
|
||||
dat0$date.start <- as.POSIXct(dat0$date.start)
|
||||
dat0$date.stop <- as.POSIXct(dat0$date.stop)
|
||||
dat0$artwork <- sprintf("%03d", dat0$artwork)
|
||||
|
||||
# TODO: Write a function that closes events spanning different log files
|
||||
# OR: Remove openTopic and OpenPopup events that do not start with a
|
||||
# flipCard (AND openPopup events without openTopic event beforehand)
|
||||
|
||||
table(dat0[!duplicated(dat0$trace), "event"])
|
||||
# flipCard move openPopup openTopic
|
||||
# 45528 247718 981 3457
|
||||
|
||||
proportions(table(dat0[!duplicated(dat0$trace), "event"]))
|
||||
|
||||
tmp <- dat0[!duplicated(dat0$trace) & dat0$event %in% c("openTopic",
|
||||
@ -48,38 +40,28 @@ table(table(dat$start))
|
||||
|
||||
summary(aggregate(duration ~ trace, dat, mean))
|
||||
|
||||
# remove fragmented traces
|
||||
tab <- xtabs( ~ trace + event, dat)
|
||||
|
||||
fragments <- NULL
|
||||
|
||||
for (i in seq_len(nrow(tab))) {
|
||||
if (tab[i, "openPopup"] != 0 & tab[i, "flipCard"] == 0) {
|
||||
fragments <- c(fragments, rownames(tab)[i])
|
||||
} else if (tab[i, "openTopic"] != 0 & tab[i, "flipCard"] == 0) {
|
||||
fragments <- c(fragments, rownames(tab)[i])
|
||||
} else if (tab[i, "openPopup"] != 0 & tab[i, "openTopic"] == 0) {
|
||||
fragments <- c(fragments, rownames(tab)[i])
|
||||
}
|
||||
}
|
||||
datrm <- dat[!dat$trace %in% fragments, ]
|
||||
|
||||
# TODO: Find trace that has flipCard --> openPopup --> openTopic
|
||||
dato <- datrm[datrm$event != "move", ]
|
||||
tmp <- lapply(unique(dato$trace), function(x) unique(dato[dato$trace == x, "event"]))
|
||||
names(tmp) <- unique(dato$trace)
|
||||
dato <- dat[dat$event != "move", ]
|
||||
dato_split <- split(dato, ~ trace)
|
||||
tmp <- lapply(dato_split, function(x) unique(x$event))
|
||||
#tmp <- lapply(unique(dato$trace), function(x) unique(dato[dato$trace == x, "event"]))
|
||||
|
||||
ids <- sapply(tmp, length) == 3
|
||||
do.call(rbind, tmp[ids])
|
||||
tmp2 <- as.data.frame(do.call(rbind, tmp[ids]))
|
||||
names(tmp2) <- c("flipCard", "openTopic", "openPopup")
|
||||
|
||||
# TODO:
|
||||
# fragmentary traces: for 4591 openTopic for topic 1 is in the raw log files, but gets
|
||||
# probably removed in close_events(); how can I prevent that? How can I fix
|
||||
# the traces and eventIds that do not match correctly ???
|
||||
ct <- c(4591, 5937, 7080, 8412, 8279)
|
||||
datrm[datrm$trace %in% ct, 1:10]
|
||||
table(tmp2$flipCard)
|
||||
table(tmp2$openTopic)
|
||||
table(tmp2$openPopup)
|
||||
|
||||
frag_ids <- which(tmp2$openTopic == "openPopup")
|
||||
|
||||
tmp3 <- dat[dat$trace %in% rownames(tmp2)[frag_ids], ]
|
||||
|
||||
tmp4 <- tmp3[!tmp3$glossar == 1, ]
|
||||
|
||||
dat6 <- rbind(dat[!dat$trace %in% rownames(tmp2)[frag_ids], ], tmp4)
|
||||
|
||||
### WHY?????
|
||||
|
||||
alog <- activitylog(dat,
|
||||
case_id = "trace",
|
||||
|
Loading…
Reference in New Issue
Block a user