/** * * make screenshots and log errors to * console * (c) 2019 - Leibniz-Insitut für Wissensmedien * */ const puppeteer = require('puppeteer'); const fs_bare = require("fs") // required for fs-extra const fs = require("fs-extra") const path = require("path") const start_dir = process.cwd() const start_file = path.join(start_dir,"lib","index.html") const start_file_uri = path.join("file:///", start_file ) // define events and log them const events = ["error","pageerror"] function logPageEvent(event){ if (event !== undefined){ console.log(event.toString()) } } async function makeScreenshot(href){ const browser = await puppeteer.launch({args: [ '–allow-file-access-from-files', ],}); const page = await browser.newPage(); await page.setViewport({width: 1024,height : 624}) // register events for (var i = 0; i < events.length; i++) { page.on(events[i],logPageEvent) } page.once("load",logPageEvent) await page.goto(href) href = href.replace("file:///","") const fname = path.parse(href).name let fpath if (fname != "index"){ image_url = href.replace(fname + ".html" ,"thumbnails/" + fname + ".png") fpath = href.replace(fname + ".html", "thumbnails") } else{ image_url = href.replace(fname + ".html" ,"thumbnail.png") fpath = href.replace(fname + ".html", "") } // image_url = image_url.replace("file:///","") // fpath = fpath.replace("file:///","") page.removeAllListeners() fs.ensureDir(fpath) await page.screenshot({path: image_url}); await browser.close(); } /** * * collect all navigational links in all documents * * */ async function collectLinks(href,reflist) { const browser = await puppeteer.launch(); const page = await browser.newPage(); await page.goto(href) let hrefs = await page.$$('a.wrapper') for (var i=0; i < hrefs.length; i++) { let hrefValue = await hrefs[i].getProperty('href') let linkText = await hrefValue.jsonValue(); if (!linkText.startsWith("file:")) { continue; } if(linkText.endsWith("#")) continue; if(linkText.endsWith("index.html")){ await collectLinks(linkText,reflist) } reflist.push(linkText) } await browser.close() } (async function(){ var reflist = [] let linkText = "file:///" + start_file.replace(/\\/g,"/") reflist.push(linkText) await collectLinks(start_file_uri,reflist) // sort by path length to get depth first reflist.sort(function(a,b){ let al = a.split("/").length let bl = b.split("/").length if (al < bl) {return 1 } if (al > bl) {return -1 } if (al == bl) {return 0 } }) for (var i=0;i