2019-05-31 13:57:43 +02:00
|
|
|
|
/**
|
2019-06-06 16:17:58 +02:00
|
|
|
|
*
|
|
|
|
|
* make screenshots and log errors to
|
2019-05-31 13:57:43 +02:00
|
|
|
|
* console
|
|
|
|
|
* (c) 2019 - Leibniz-Insitut für Wissensmedien
|
2019-06-06 16:17:58 +02:00
|
|
|
|
*
|
2019-05-31 13:57:43 +02:00
|
|
|
|
*/
|
|
|
|
|
|
|
|
|
|
const puppeteer = require('puppeteer');
|
2019-06-03 09:42:45 +02:00
|
|
|
|
const fs_bare = require("fs") // required for fs-extra
|
|
|
|
|
const fs = require("fs-extra")
|
2019-05-31 13:57:43 +02:00
|
|
|
|
const path = require("path")
|
|
|
|
|
const start_dir = process.cwd()
|
|
|
|
|
const start_file = path.join(start_dir,"lib","index.html")
|
|
|
|
|
|
|
|
|
|
const start_file_uri = path.join("file:///", start_file )
|
|
|
|
|
|
|
|
|
|
// define events and log them
|
|
|
|
|
const events = ["error","pageerror"]
|
|
|
|
|
function logPageEvent(event){
|
|
|
|
|
if (event !== undefined){
|
|
|
|
|
console.log(event.toString())
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
async function makeScreenshot(href){
|
2019-06-06 16:17:58 +02:00
|
|
|
|
|
2019-05-31 13:57:43 +02:00
|
|
|
|
const browser = await puppeteer.launch({args: [
|
|
|
|
|
'–allow-file-access-from-files',
|
|
|
|
|
],});
|
|
|
|
|
|
|
|
|
|
const page = await browser.newPage();
|
2019-06-06 16:17:58 +02:00
|
|
|
|
|
2019-05-31 13:57:43 +02:00
|
|
|
|
await page.setViewport({width: 1024,height : 624})
|
|
|
|
|
|
|
|
|
|
// register events
|
|
|
|
|
for (var i = 0; i < events.length; i++) {
|
|
|
|
|
page.on(events[i],logPageEvent)
|
|
|
|
|
}
|
|
|
|
|
page.once("load",logPageEvent)
|
|
|
|
|
|
|
|
|
|
await page.goto(href)
|
2019-06-03 09:42:45 +02:00
|
|
|
|
href = href.replace("file:///","")
|
2019-05-31 13:57:43 +02:00
|
|
|
|
const fname = path.parse(href).name
|
2019-06-03 09:42:45 +02:00
|
|
|
|
let fpath
|
2019-05-31 13:57:43 +02:00
|
|
|
|
if (fname != "index"){
|
|
|
|
|
image_url = href.replace(fname + ".html" ,"thumbnails/" + fname + ".png")
|
2019-06-03 09:42:45 +02:00
|
|
|
|
fpath = href.replace(fname + ".html", "thumbnails")
|
2019-05-31 13:57:43 +02:00
|
|
|
|
}
|
|
|
|
|
else{
|
|
|
|
|
image_url = href.replace(fname + ".html" ,"thumbnail.png")
|
2019-06-03 09:42:45 +02:00
|
|
|
|
fpath = href.replace(fname + ".html", "")
|
2019-05-31 13:57:43 +02:00
|
|
|
|
}
|
2019-06-03 09:42:45 +02:00
|
|
|
|
// image_url = image_url.replace("file:///","")
|
|
|
|
|
// fpath = fpath.replace("file:///","")
|
2019-06-06 16:17:58 +02:00
|
|
|
|
|
2019-05-31 13:57:43 +02:00
|
|
|
|
page.removeAllListeners()
|
2019-06-06 16:17:58 +02:00
|
|
|
|
|
2019-06-03 09:42:45 +02:00
|
|
|
|
fs.ensureDir(fpath)
|
|
|
|
|
|
2019-05-31 13:57:43 +02:00
|
|
|
|
await page.screenshot({path: image_url});
|
|
|
|
|
await browser.close();
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/**
|
2019-06-06 16:17:58 +02:00
|
|
|
|
*
|
2019-05-31 13:57:43 +02:00
|
|
|
|
* collect all navigational links in all documents
|
2019-06-06 16:17:58 +02:00
|
|
|
|
*
|
|
|
|
|
* */
|
2019-05-31 13:57:43 +02:00
|
|
|
|
|
|
|
|
|
async function collectLinks(href,reflist)
|
|
|
|
|
{
|
|
|
|
|
const browser = await puppeteer.launch();
|
|
|
|
|
const page = await browser.newPage();
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
await page.goto(href)
|
|
|
|
|
let hrefs = await page.$$('a.wrapper')
|
2019-06-06 16:17:58 +02:00
|
|
|
|
|
2019-05-31 13:57:43 +02:00
|
|
|
|
for (var i=0; i < hrefs.length; i++) {
|
|
|
|
|
let hrefValue = await hrefs[i].getProperty('href')
|
|
|
|
|
let linkText = await hrefValue.jsonValue();
|
|
|
|
|
if (!linkText.startsWith("file:"))
|
2019-06-06 16:17:58 +02:00
|
|
|
|
{
|
2019-05-31 13:57:43 +02:00
|
|
|
|
continue;
|
|
|
|
|
}
|
|
|
|
|
if(linkText.endsWith("#")) continue;
|
|
|
|
|
if(linkText.endsWith("index.html")){
|
|
|
|
|
await collectLinks(linkText,reflist)
|
|
|
|
|
}
|
|
|
|
|
reflist.push(linkText)
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
await browser.close()
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
(async function(){
|
|
|
|
|
var reflist = []
|
|
|
|
|
let linkText = "file:///" + start_file.replace(/\\/g,"/")
|
|
|
|
|
reflist.push(linkText)
|
|
|
|
|
await collectLinks(start_file_uri,reflist)
|
|
|
|
|
|
|
|
|
|
// sort by path length to get depth first
|
|
|
|
|
reflist.sort(function(a,b){
|
|
|
|
|
let al = a.split("/").length
|
|
|
|
|
let bl = b.split("/").length
|
|
|
|
|
|
|
|
|
|
if (al < bl) {return 1 }
|
|
|
|
|
if (al > bl) {return -1 }
|
|
|
|
|
if (al == bl) {return 0 }
|
|
|
|
|
})
|
|
|
|
|
for (var i=0;i<reflist.length; i++) {
|
|
|
|
|
await makeScreenshot(reflist[i])
|
|
|
|
|
console.log(i,reflist[i])
|
|
|
|
|
}
|
2019-06-06 16:17:58 +02:00
|
|
|
|
})()
|