Tried out current state of mtt; parsing for 8o8m still not working correctly
This commit is contained in:
parent
d4e9676dd3
commit
fb448f01a9
@ -10,17 +10,26 @@ folders <- dir("../data/8o8m/LogFiles/")
|
|||||||
|
|
||||||
# parse raw log files
|
# parse raw log files
|
||||||
datraw <- parse_logfiles(folders, path = "../data/8o8m/LogFiles/")
|
datraw <- parse_logfiles(folders, path = "../data/8o8m/LogFiles/")
|
||||||
artworks <- unique(na.omit(datraw$artwork))
|
#artworks <- unique(na.omit(datraw$artwork))
|
||||||
|
|
||||||
# export data
|
# export data
|
||||||
write.table(datraw, paste0("../data/8o8m/raw_logfiles_", now, ".csv"),
|
write.table(datraw, paste0("../data/8o8m/raw_logfiles_", now, ".csv"),
|
||||||
sep = ";", row.names = FALSE)
|
sep = ";", row.names = FALSE)
|
||||||
|
|
||||||
datraw2 <- datraw[!is.na(datraw$artwork), ]
|
#datraw[is.na(datraw$artwork), ]
|
||||||
|
datraw <- datraw[!is.na(datraw$artwork), ]
|
||||||
# TODO: Why is this happening?
|
# TODO: Why is this happening?
|
||||||
|
|
||||||
# convert to log events
|
# convert to log events
|
||||||
datlogs <- create_eventlogs(datraw2, xmlpath = "../data/8o8m/Content8o8m/")
|
datlogs <- create_eventlogs(datraw, xmlpath = "../data/8o8m/Content8o8m/")
|
||||||
|
|
||||||
|
artworks <- unique(datlogs$artwork)
|
||||||
|
|
||||||
|
|
||||||
|
topics <- extract_topics(artworks, xmlfiles = paste0(artworks, "_en.xml"),
|
||||||
|
xmlpath = "../data/8o8m/Content8o8m/")
|
||||||
|
# TODO: What is wrong with the German XML files that the topics are
|
||||||
|
# extracted like this? (It works fine for the English versions...)
|
||||||
|
|
||||||
# export data
|
# export data
|
||||||
write.table(datlogs, paste0("../data/8o8m/event_logfiles_", now, ".csv"),
|
write.table(datlogs, paste0("../data/8o8m/event_logfiles_", now, ".csv"),
|
||||||
|
@ -24,20 +24,25 @@ now <- format(Sys.time(), "%Y-%m-%d_%H-%M-%S")
|
|||||||
|
|
||||||
#--------------- (1) Parse raw log files ---------------
|
#--------------- (1) Parse raw log files ---------------
|
||||||
|
|
||||||
path <- "../data/haum/LogFiles/"
|
#path <- "../data/haum/LogFiles/"
|
||||||
folders <- dir(path)
|
#folders <- dir(path)
|
||||||
|
#folders <- "2016"
|
||||||
|
|
||||||
datraw <- parse_logfiles(folders, path)
|
#datraw <- parse_logfiles(folders, path)
|
||||||
|
|
||||||
|
datraw <- read.table("../data/haum/raw_logfiles_2023-10-25_16-20-45.csv",
|
||||||
|
sep = ";", header = TRUE)
|
||||||
|
|
||||||
## Export data
|
## Export data
|
||||||
|
|
||||||
write.table(datraw, paste0("../data/haum/raw_logfiles_", now, ".csv"),
|
#write.table(datraw, paste0("../data/haum/raw_logfiles_small_", now, ".csv"),
|
||||||
sep = ";", row.names = FALSE)
|
# sep = ";", row.names = FALSE)
|
||||||
|
|
||||||
#--------------- (2) Create event logs ---------------
|
#--------------- (2) Create event logs ---------------
|
||||||
|
|
||||||
datlogs <- create_eventlogs(datraw,
|
datlogs <- create_eventlogs(datraw,
|
||||||
xmlpath = "../data/haum/ContentEyevisit/eyevisit_cards_light/")
|
xmlpath = "../data/haum/ContentEyevisit/eyevisit_cards_light/",
|
||||||
|
glossar = TRUE)
|
||||||
|
|
||||||
artworks <- unique(na.omit(datlogs$artwork))
|
artworks <- unique(na.omit(datlogs$artwork))
|
||||||
topics <- extract_topics(artworks, xmlfiles = paste0(artworks, ".xml"),
|
topics <- extract_topics(artworks, xmlfiles = paste0(artworks, ".xml"),
|
||||||
@ -95,12 +100,14 @@ for (i in seq_len(nrow(sf))) {
|
|||||||
|
|
||||||
## Merge data
|
## Merge data
|
||||||
|
|
||||||
dat1 <- merge(datlogs_topics, hd, by.x = "date.start", by.y = "date", all.x = TRUE)
|
datlogs_topics$date <- as.Date(datlogs_topics$date.start)
|
||||||
dat2 <- merge(dat1, sfdat, by.x = "date.start", by.y = "date", all.x = TRUE)
|
|
||||||
|
dat1 <- merge(datlogs_topics, hd, by.x = "date", by.y = "date", all.x = TRUE)
|
||||||
|
dat2 <- merge(dat1, sfdat, by.x = "date", by.y = "date", all.x = TRUE)
|
||||||
|
|
||||||
## Export data
|
## Export data
|
||||||
|
|
||||||
write.table(dat2, paste0("../data/haum/event_logfiles_", now, ".csv"),
|
write.table(dat2, paste0("../data/haum/event_logfiles_glossar_", now, ".csv"),
|
||||||
sep = ";", row.names = FALSE)
|
sep = ";", row.names = FALSE)
|
||||||
|
|
||||||
# TODO: Maybe add infos about artworks?
|
# TODO: Maybe add infos about artworks?
|
||||||
|
@ -2,22 +2,14 @@
|
|||||||
|
|
||||||
# Read data
|
# Read data
|
||||||
|
|
||||||
# dat0 <- read.table("../data/haum/event_logfiles_metadata_2023-09-23_01-31-30.csv",
|
dat0 <- read.table("../data/haum/event_logfiles_2023-10-25_17-29-52.csv",
|
||||||
# sep = ";", header = TRUE)
|
|
||||||
dat0 <- read.table("../data/haum/event_logfiles_small_metadata_2023-10-19_18-25-26.csv",
|
|
||||||
sep = ";", header = TRUE)
|
sep = ";", header = TRUE)
|
||||||
dat0$date <- as.Date(dat0$date)
|
|
||||||
dat0$date.start <- as.POSIXct(dat0$date.start)
|
dat0$date.start <- as.POSIXct(dat0$date.start)
|
||||||
dat0$date.stop <- as.POSIXct(dat0$date.stop)
|
dat0$date.stop <- as.POSIXct(dat0$date.stop)
|
||||||
dat0$artwork <- sprintf("%03d", dat0$artwork)
|
dat0$artwork <- sprintf("%03d", dat0$artwork)
|
||||||
|
|
||||||
# TODO: Write a function that closes events spanning different log files
|
|
||||||
# OR: Remove openTopic and OpenPopup events that do not start with a
|
|
||||||
# flipCard (AND openPopup events without openTopic event beforehand)
|
|
||||||
|
|
||||||
table(dat0[!duplicated(dat0$trace), "event"])
|
table(dat0[!duplicated(dat0$trace), "event"])
|
||||||
# flipCard move openPopup openTopic
|
|
||||||
# 45528 247718 981 3457
|
|
||||||
proportions(table(dat0[!duplicated(dat0$trace), "event"]))
|
proportions(table(dat0[!duplicated(dat0$trace), "event"]))
|
||||||
|
|
||||||
tmp <- dat0[!duplicated(dat0$trace) & dat0$event %in% c("openTopic",
|
tmp <- dat0[!duplicated(dat0$trace) & dat0$event %in% c("openTopic",
|
||||||
@ -48,38 +40,28 @@ table(table(dat$start))
|
|||||||
|
|
||||||
summary(aggregate(duration ~ trace, dat, mean))
|
summary(aggregate(duration ~ trace, dat, mean))
|
||||||
|
|
||||||
# remove fragmented traces
|
|
||||||
tab <- xtabs( ~ trace + event, dat)
|
|
||||||
|
|
||||||
fragments <- NULL
|
|
||||||
|
|
||||||
for (i in seq_len(nrow(tab))) {
|
|
||||||
if (tab[i, "openPopup"] != 0 & tab[i, "flipCard"] == 0) {
|
|
||||||
fragments <- c(fragments, rownames(tab)[i])
|
|
||||||
} else if (tab[i, "openTopic"] != 0 & tab[i, "flipCard"] == 0) {
|
|
||||||
fragments <- c(fragments, rownames(tab)[i])
|
|
||||||
} else if (tab[i, "openPopup"] != 0 & tab[i, "openTopic"] == 0) {
|
|
||||||
fragments <- c(fragments, rownames(tab)[i])
|
|
||||||
}
|
|
||||||
}
|
|
||||||
datrm <- dat[!dat$trace %in% fragments, ]
|
|
||||||
|
|
||||||
# TODO: Find trace that has flipCard --> openPopup --> openTopic
|
# TODO: Find trace that has flipCard --> openPopup --> openTopic
|
||||||
dato <- datrm[datrm$event != "move", ]
|
dato <- dat[dat$event != "move", ]
|
||||||
tmp <- lapply(unique(dato$trace), function(x) unique(dato[dato$trace == x, "event"]))
|
dato_split <- split(dato, ~ trace)
|
||||||
names(tmp) <- unique(dato$trace)
|
tmp <- lapply(dato_split, function(x) unique(x$event))
|
||||||
|
#tmp <- lapply(unique(dato$trace), function(x) unique(dato[dato$trace == x, "event"]))
|
||||||
|
|
||||||
ids <- sapply(tmp, length) == 3
|
ids <- sapply(tmp, length) == 3
|
||||||
do.call(rbind, tmp[ids])
|
tmp2 <- as.data.frame(do.call(rbind, tmp[ids]))
|
||||||
|
names(tmp2) <- c("flipCard", "openTopic", "openPopup")
|
||||||
|
|
||||||
# TODO:
|
table(tmp2$flipCard)
|
||||||
# fragmentary traces: for 4591 openTopic for topic 1 is in the raw log files, but gets
|
table(tmp2$openTopic)
|
||||||
# probably removed in close_events(); how can I prevent that? How can I fix
|
table(tmp2$openPopup)
|
||||||
# the traces and eventIds that do not match correctly ???
|
|
||||||
ct <- c(4591, 5937, 7080, 8412, 8279)
|
frag_ids <- which(tmp2$openTopic == "openPopup")
|
||||||
datrm[datrm$trace %in% ct, 1:10]
|
|
||||||
|
tmp3 <- dat[dat$trace %in% rownames(tmp2)[frag_ids], ]
|
||||||
|
|
||||||
|
tmp4 <- tmp3[!tmp3$glossar == 1, ]
|
||||||
|
|
||||||
|
dat6 <- rbind(dat[!dat$trace %in% rownames(tmp2)[frag_ids], ], tmp4)
|
||||||
|
|
||||||
### WHY?????
|
|
||||||
|
|
||||||
alog <- activitylog(dat,
|
alog <- activitylog(dat,
|
||||||
case_id = "trace",
|
case_id = "trace",
|
||||||
|
Loading…
Reference in New Issue
Block a user