From a4ef41ceffa0fbb95cc26912c1eebd259624ea24 Mon Sep 17 00:00:00 2001 From: Jeffrey Yasskin Date: Fri, 8 May 2020 20:10:20 -0700 Subject: [PATCH] Trigger builds when includes are modified (#60) This triggers a build if any of the includes of a spec are modified by a pull request. This is in preparation for adding support for Bikeshed includes. ReSpec includes should already be supported. --- lib/mixins/fetchable.js | 4 ++ lib/models/pr.js | 14 +++-- lib/scan-includes.js | 111 ++++++++++++++++++++++++++++++++++++++++ lib/services.js | 6 +++ test/scan-includes.js | 87 +++++++++++++++++++++++++++++++ 5 files changed, 218 insertions(+), 4 deletions(-) create mode 100644 lib/scan-includes.js create mode 100644 test/scan-includes.js diff --git a/lib/mixins/fetchable.js b/lib/mixins/fetchable.js index 9d00a0a..0e114b2 100644 --- a/lib/mixins/fetchable.js +++ b/lib/mixins/fetchable.js @@ -65,6 +65,10 @@ module.exports = (superclass) => class extends superclass { return `https://raw.githubusercontent.com/${ this.file_path }`; } + get github_repo_url() { + return `https://raw.githubusercontent.com/${ this.owner }/${ this.repo }/${ this.sha }/`; + } + get cdn_url() { // This is necessary to serve content with correct HTTP headers return `https://rawcdn.githack.com/${ this.file_path }`; } diff --git a/lib/models/pr.js b/lib/models/pr.js index 179b9a3..8361f9e 100644 --- a/lib/models/pr.js +++ b/lib/models/pr.js @@ -11,6 +11,7 @@ const Config = require("./config"); const GithubAPI = require("../github-api"); const GH_API = require("../GH_API"); const WattsiClient = require("../wattsi-client"); +const scanIncludes = require("../scan-includes"); class PR { constructor(id, installation) { @@ -34,6 +35,7 @@ class PR { return this.setupApi(options) .then(_ => this.requestConfig()) .then(_ => this.requestPR()) + .then(_ => this.requestIncludes()) .then(_ => this.requestMergeBase()) .then(_ => this.getPreviewFiles()); } @@ -201,13 +203,17 @@ class PR { return !(//.test(this.body)); } + async requestIncludes() { + this.includes = await scanIncludes(Head.fromPR(this).github_repo_url, this.config.src_file, this.config.type); + } + touchesSrcFile() { - let relFiles = this.relevantSrcFiles; - return this.files.some(f => relFiles.indexOf(f.filename) > -1); + let relFiles = this.getRelevantSrcFiles(); + return this.files.some(f => relFiles.includes(f.filename)); } - get relevantSrcFiles() { - return [this.config.src_file]; + getRelevantSrcFiles() { + return [this.config.src_file, ...(this.includes || [])]; } getPreviewFiles() { diff --git a/lib/scan-includes.js b/lib/scan-includes.js new file mode 100644 index 0000000..4e41599 --- /dev/null +++ b/lib/scan-includes.js @@ -0,0 +1,111 @@ +const realFetchUrl = require("./fetch-url"); +const GITHUB_SERVICE = require("./services").GITHUB; + +/** Resolves path relative to scopeUrl, with the restriction that path can't use + * any components that might go "above" the base URL. + * + * @param {string} scopeUrl + * @param {string} path + * + * @returns {string|undefined} `undefined` if either URL can't be parsed, or if + * the path navigated "up" in any way. + */ +function resolveSubUrl(scopeUrl, path) { + if (!scopeUrl.endsWith('/')) { + throw new Error(`${JSON.stringify(scopeUrl)} isn't a good scope URL.`) + } + if (path.startsWith('/') || + path.includes('../') || + // Block schemes, a bit more aggressively than necessary. + path.includes(':')) { + return undefined; + } + + let resolvedUrl; + try { + resolvedUrl = new URL(path, scopeUrl).href; + } catch { + return undefined; + } + if (!resolvedUrl.startsWith(scopeUrl)) { + throw new Error(`Path restrictions in resolveSubUrl(${JSON.stringify(scopeUrl)}, ${JSON.stringify(path)}) failed to catch an upward navigation.`); + } + return resolvedUrl; + +} + +/** Scans for recursively included paths from a root specification. + * + * @param {string} repositoryUrl The base URL of the repository. The function + * won't fetch anything outside of this scope. This must end with a `/`. + * @param {string} rootSpec A path relative to the `repositoryUrl` pointing to + * the root specification in the `specType` format. + * @param {"bikeshed"|"respec"|"html"|"wattsi"} specType + * + * @returns {Promise>} Paths relative to `repositoryUrl` that are + * recursively included by the specification, not including the specification itself. + */ +module.exports = async function scanIncludes(repositoryUrl, rootSpec, specType, fetchUrl = realFetchUrl) { + if (!repositoryUrl.endsWith('/')) { + throw new Error(`${repositoryUrl} must end with a '/'.`); + } + let includeRE; + switch (specType) { + case "bikeshed": + // https://tabatkins.github.io/bikeshed/#including + includeRE = /^path:([^\n]+)$/gm; + break; + case "respec": + // https://github.com/w3c/respec/wiki/data--include + includeRE = /data-include=["']?([^"'>]+)["']?/g; + break; + case "html": + case "wattsi": + // We don't support include scanning in pure HTML or Wattsi specs. + return new Set(); + default: + throw new Error(`Unexpected specification type: ${JSON.stringify(specType)}`); + } + + /** All the paths that successfully fetched go here. This is the result of + * the overall scan. */ + const recursiveIncludes = new Set(); + /** All the files we ever fetched, for the purpose of short-circuiting. */ + const everFetched = new Set(); + + /** Fetches path relative to repositoryUrl, adds it to recursiveIncludes if + * the fetch worked, and then recursively scans any includes found in the + * response. + */ + async function scanOneFile(path) { + if (everFetched.has(path)) return; + everFetched.add(path); + + const resolvedUrl = resolveSubUrl(repositoryUrl, path); + if (resolvedUrl === undefined) return; + + let body; + try { + body = await fetchUrl(resolvedUrl, GITHUB_SERVICE); + console.log(`scanIncludes: Fetched ${resolvedUrl}.`) + } catch (err) { + // This include scan can have false positives, so if one + // fails to fetch, we assume it's not a real include and + // just ignore the fetch failure. + console.log(`scanIncludes: Fetching ${resolvedUrl} failed: ${err.stack || JSON.stringify(err)}`) + return; + } + recursiveIncludes.add(path); + // Intentionally serial since Node's treatment of connection pooling is + // hard to figure out from the documentation: + for (const match of body.matchAll(includeRE)) { + await scanOneFile(match[1].trim()); + } + } + console.log(`scanIncludes: Recursively scanning ${rootSpec} in ${repositoryUrl} for includes.`) + await scanOneFile(rootSpec); + + recursiveIncludes.delete(rootSpec); + console.log(`scanIncludes: Found includes: ${JSON.stringify([...recursiveIncludes])}`) + return [...recursiveIncludes].sort(); +} diff --git a/lib/services.js b/lib/services.js index b925c6d..bae766e 100644 --- a/lib/services.js +++ b/lib/services.js @@ -21,3 +21,9 @@ exports.WATTSI = { description: "Wattsi Server is the web service used to build the WHATWG HTML spec.", url: "https://github.com/domenic/wattsi-server" }; + +exports.GITHUB = { + name: "GitHub", + description: "Files hosted on GitHub are recursively fetched and scanned in order to find includes.", + url: "https://www.github.com/" +}; diff --git a/test/scan-includes.js b/test/scan-includes.js new file mode 100644 index 0000000..1f58f04 --- /dev/null +++ b/test/scan-includes.js @@ -0,0 +1,87 @@ +const assert = require('assert'); +const scanIncludes = require('../lib/scan-includes'); +const GITHUB_SERVICE = require("../lib/services").GITHUB; + +function fakeFetch(results) { + return async function fetch(url, service) { + assert.equal(service, GITHUB_SERVICE); + await new Promise(resolve => setImmediate(resolve)); + const result = results[url]; + assert(result); + if (result.body !== undefined) { + return result.body; + } + throw new Error(result.error); + } +} + +suite('scanIncludes', function () { + test('No includes', async function () { + assert.deepStrictEqual(await scanIncludes("https://github.example/repo/", "spec.bs", "bikeshed", fakeFetch({ + "https://github.example/repo/spec.bs": { body: "bikeshed stuff" }, + })), + []); + }); + test('None succeed', async function () { + assert.deepStrictEqual(await scanIncludes("https://github.example/repo/", "spec.bs", "bikeshed", fakeFetch({ + "https://github.example/repo/spec.bs": { error: "" }, + })), + []); + }); + test('3 levels', async function () { + assert.deepStrictEqual(await scanIncludes("https://github.example/repo/", "spec.bs", "bikeshed", fakeFetch({ + "https://github.example/repo/spec.bs": { body: "path: helper.inc" }, + "https://github.example/repo/helper.inc": { body: "path: helper2.inc\njunk\npath: doesntexist.inc" }, + "https://github.example/repo/helper2.inc": { body: "path: helper3.inc " }, + "https://github.example/repo/helper3.inc": { body: "" }, + "https://github.example/repo/doesntexist.inc": { error: "404" }, + })), + [ + "helper.inc", + "helper2.inc", + "helper3.inc", + ]); + }); + test('Outside repository', async function () { + assert.deepStrictEqual(await scanIncludes("https://github.example/repo/", "spec.bs", "bikeshed", fakeFetch({ + "https://github.example/repo/spec.bs": { body: "path: ../helper.inc\npath: //otherserver.example/helper2.inc\nhttps://yet.another.server.example/helper3.inc" }, + "https://github.example/helper.inc": { body: "path: https://github.example/repo/poison.inc" }, + "https://otherserver.example/helper2.inc": { body: "path: https://github.example/repo/poison.inc" }, + "https://yet.another.server.example/helper3.inc": { body: "path: https://github.example/repo/poison.inc" }, + "https://github.example/repo/poison.inc": { body: "Shouldn't fetch this." }, + })), + []); + }); + test('Bad URL bits inside repository', async function () { + assert.deepStrictEqual(await scanIncludes("https://github.example/repo/", "spec.bs", "bikeshed", fakeFetch({ + "https://github.example/repo/spec.bs": { body: "path: ../repo/poison.inc\npath: /repo/poison.inc\npath: //github.example/repo/poison.inc\npath: https://github.example/repo/poison.inc" }, + "https://github.example/repo/poison.inc": { body: "" }, + })), + []); + }); + test('include loop should terminate', async function () { + assert.deepStrictEqual(await scanIncludes("https://github.example/repo/", "spec.bs", "bikeshed", fakeFetch({ + "https://github.example/repo/spec.bs": { body: "path: loop.inc" }, + "https://github.example/repo/loop.inc": { body: "path: spec.bs" }, + })), + ["loop.inc"]); + }); + test('respec-style includes', async function () { + assert.deepStrictEqual(await scanIncludes("https://github.example/repo/", "index.html", "respec", fakeFetch({ + "https://github.example/repo/index.html": { + body: ` +
+
+
+
` }, + "https://github.example/repo/single.html": { body: "" }, + "https://github.example/repo/double.html": { body: "" }, + })), + [ + "double.html", + "single.html", + ]); + }); +});