From f5d42cb0d3051e20ddc3c3b3dc49b4c5d7b57506 Mon Sep 17 00:00:00 2001 From: "Thomas F. K. Jorna" Date: Wed, 7 May 2025 15:15:36 +0200 Subject: [PATCH 01/36] feat(serialization): add first pass at serialization --- core/lib/editor/htm.ts | 58 + core/lib/editor/prosemirror-rehype.ts | 5 + core/lib/editor/renderToHTML.tsx | 344 ++++++ core/lib/editor/to-html.test.ts | 41 + core/package.json | 4 + packages/context-editor/src/schemas/image.ts | 1 + pnpm-lock.yaml | 1082 +++++++++++++++--- 7 files changed, 1368 insertions(+), 167 deletions(-) create mode 100644 core/lib/editor/htm.ts create mode 100644 core/lib/editor/prosemirror-rehype.ts create mode 100644 core/lib/editor/renderToHTML.tsx create mode 100644 core/lib/editor/to-html.test.ts diff --git a/core/lib/editor/htm.ts b/core/lib/editor/htm.ts new file mode 100644 index 0000000000..d902f4cb08 --- /dev/null +++ b/core/lib/editor/htm.ts @@ -0,0 +1,58 @@ +import { baseSchema } from "context-editor/schemas"; +import { JSDOM } from "jsdom"; +import { DOMParser, DOMSerializer, Node } from "prosemirror-model"; +import rehypeFormat from "rehype-format"; +import rehypeParse from "rehype-parse"; +import rehypeStringify from "rehype-stringify"; +import { unified } from "unified"; + +export const renderNodeToHTML = (node: Record): string => { + const base = Node.fromJSON(baseSchema, node); + + const dom = new JSDOM(); + const document = dom.window.document; + + const fragment = DOMSerializer.fromSchema(baseSchema).serializeFragment(base.content, { + document, + }); + + const container = document.createElement("div"); + container.appendChild(fragment); + + return container.innerHTML; +}; + +export const fromHTMLToNode = (html: string) => { + const dom = new JSDOM(html); + + const node = DOMParser.fromSchema(baseSchema).parse(dom.window.document); + + return node; +}; + +export const processEditorHTML = ( + html: string, + opts?: { + plugins?: any[]; + settings?: { + fragment?: boolean; + pretty?: boolean; + }; + } +) => { + const processor = unified().use(rehypeParse, opts?.settings); + + if (opts?.settings?.pretty) { + processor.use(rehypeFormat); + } + if (opts?.plugins) { + opts.plugins.forEach((plugin) => { + processor.use(plugin); + }); + } + + return { + html: async () => String(await processor.use(rehypeStringify).process(html)), + processor, + }; +}; diff --git a/core/lib/editor/prosemirror-rehype.ts b/core/lib/editor/prosemirror-rehype.ts new file mode 100644 index 0000000000..cf74612a8c --- /dev/null +++ b/core/lib/editor/prosemirror-rehype.ts @@ -0,0 +1,5 @@ +import { baseSchema } from "context-editor/schemas"; +import { MarkdownExtension } from "prosemirror-remark"; +import { ProseMirrorUnified } from "prosemirror-unified"; + +export const prosemirrorUnified = new ProseMirrorUnified([new MarkdownExtension()]); diff --git a/core/lib/editor/renderToHTML.tsx b/core/lib/editor/renderToHTML.tsx new file mode 100644 index 0000000000..f79b1e2c04 --- /dev/null +++ b/core/lib/editor/renderToHTML.tsx @@ -0,0 +1,344 @@ +/* eslint-disable no-underscore-dangle, react/no-array-index-key, react/prop-types, global-require */ +import "server-only"; + +import fs from "fs"; +import path from "path"; + +import React from "react"; +import { editorSchema, renderStatic } from "components/Editor"; +import ReactDOMServer from "react-dom/server"; +import { DocJson } from "types"; + +import { intersperse, unique } from "utils/arrays"; + +import { renderNotesForListing } from "../../../utils/notes"; +import SimpleNotesList from "./SimpleNotesList"; +import { NotesData, PubMetadata } from "./types"; +import { digestCitation, getAffiliations, getDedupedAffliations } from "./util"; + +const nonExportableNodeTypes = ["discussion"]; +const katexCdnPrefix = "https://cdnjs.cloudflare.com/ajax/libs/KaTeX/0.13.18/"; + +// This script is provided by the "cjk-fonts" Web Fonts project that we manage from here: +// https://fonts.adobe.com/my_fonts#web_projects-section +const loadCjkFontsScript = ` +(function(d) { + var config = { + kitId: 'seb8nix', + scriptTimeout: 3000, + async: true + }, + h=d.documentElement,t=setTimeout(function(){h.className=h.className.replace(/\bwf-loading\b/g,"")+" wf-inactive";},config.scriptTimeout),tk=d.createElement("script"),f=false,s=d.getElementsByTagName("script")[0],a;h.className+=" wf-loading";tk.src='https://use.typekit.net/'+config.kitId+'.js';tk.async=true;tk.onload=tk.onreadystatechange=function(){a=this.readyState;if(f||a&&a!="complete"&&a!="loaded")return;f=true;clearTimeout(t);try{Typekit.load(config)}catch(e){}};s.parentNode.insertBefore(tk,s) +})(document); +`; + +const createCss = () => { + const entrypoint = path.join(__dirname, "styles", "printDocument.scss"); + const cssPath = path.join(__dirname, "styles", "printDocument.css"); + // HACK(ian): We use node-sass to build a CSS bundle that is used by our HTML/PDF exports. + // Unfortunately, the export task runs in a thread managed by the worker_threads API, which + // node-sass does not support (see https://github.com/sass/node-sass/issues/2746). So we will + // just generate the bundle once per Heroku deploy and save it to a file. + if (!fs.existsSync(cssPath)) { + const sass = require("sass"); + const nodeModulesPath = path.join(process.env.PWD!, "node_modules"); + const clientPath = path.join(process.env.PWD!, "client"); + const entrypointContents = fs.readFileSync(entrypoint).toString(); + const data = "$PUBPUB_EXPORT: true;\n" + entrypointContents; + const css = sass + .renderSync({ + data, + includePaths: [nodeModulesPath, clientPath], + importer: (url) => { + if (url.startsWith("~")) { + return { file: path.join(nodeModulesPath, url.slice(1)) }; + } + return null; + }, + }) + .css.toString() + // Find all things like url(fonts/KaTeX_whatever) and replace them with a version that + // is loaded from an external CDN. + .replace( + /url\((fonts\/KaTeX_(?:[A-z0-9\-_]*?).(?:[A-z0-9]+))\)/g, + (_, fontPath) => `url(${katexCdnPrefix + fontPath})` + ); + fs.writeFileSync(cssPath, css); + } + return fs.readFileSync(cssPath).toString(); +}; + +const staticCss = createCss(); + +const filterNonExportableNodes = (nodes) => + nodes.filter((n) => !nonExportableNodeTypes.includes(n.type)); + +const addAttrsToNodes = (newAttrs, matchNodeTypes, nodes) => + nodes.map((node) => { + if (matchNodeTypes.includes(node.type)) { + return { + ...node, + attrs: { + ...node.attrs, + ...(typeof newAttrs === "function" ? newAttrs(node) || {} : newAttrs), + }, + }; + } + if (node.content) { + return { + ...node, + content: addAttrsToNodes(newAttrs, matchNodeTypes, node.content), + }; + } + return node; + }); + +const getCitationLinkage = (unstructuredValue, structuredValue, nodeId = null) => { + const digest = digestCitation(unstructuredValue, structuredValue); + return { + inlineElementId: `citation-${digest}${nodeId ? "-" + nodeId : ""}-inline`, + bottomElementId: `citation-${digest}-bottom`, + }; +}; + +const getFootnoteLinkage = (index) => { + return { + inlineElementId: `fn-${index}-inline`, + bottomElementId: `fn-${index}-bottom`, + }; +}; + +const addHrefsToNotes = (nodes) => { + let footnoteIndex = -1; + return addAttrsToNodes( + (node) => { + if (node.type === "citation") { + const { inlineElementId, bottomElementId } = getCitationLinkage( + node.attrs.unstructuredValue, + node.attrs.value, + node.attrs.id + ); + return { + href: `#${bottomElementId}`, + id: inlineElementId, + }; + } + if (node.type === "footnote") { + footnoteIndex++; + const { inlineElementId, bottomElementId } = getFootnoteLinkage(footnoteIndex); + return { + href: `#${bottomElementId}`, + id: inlineElementId, + }; + } + return {}; + }, + ["citation", "footnote"], + nodes + ); +}; + +const blankIframes = (nodes) => + addAttrsToNodes( + { + url: "data:text/html;charset=utf-8,%3Chtml%3E%3Cbody%20frameborder%3D%220%22%20style%3D%22background-color%3A%23ccc%3Bborder%3A0%3Btext-align%3Acenter%3B%22%3EVisit%20the%20web%20version%20of%20this%20article%20to%20view%20interactive%20content.%3C%2Fbody%3E%3C%2Fhtml%3E", + height: "50", + }, + ["iframe"], + nodes + ); + +const renderDetails = ({ updatedDateString, publishedDateString, doi, license, pubUrl }) => { + const showUpdatedDate = updatedDateString && updatedDateString !== publishedDateString; + return ( + <> + {showUpdatedDate && ( +
+ Updated on: {updatedDateString} +
+ )} + {doi ? ( +
+ DOI: + {`https://doi.org/${doi}`} +
+ ) : ( +
+ URL: + {pubUrl} +
+ )} + {license && ( +
+ License:  + + {license.full} {license.summary && `(${license.summary})`} + +
+ )} + + ); +}; + +const getHeadingItems = (metadata: PubMetadata) => { + const { primaryCollectionKind, primaryCollectionTitle, publisher, communityTitle } = metadata; + if (primaryCollectionKind === "book" || primaryCollectionKind === "conference") { + // For books and conferences, prefer showing the publisher string to the Community title + return [publisher || communityTitle, primaryCollectionTitle]; + } + return [communityTitle, primaryCollectionTitle]; +}; + +const renderHeadingItems = (metadata: PubMetadata) => { + const items = unique(getHeadingItems(metadata).filter((x): x is string => !!x)); + return intersperse(items, " • "); +}; + +const renderFrontMatter = (metadata: PubMetadata) => { + const { + updatedDateString, + publishedDateString, + doi, + title, + pubUrl, + accentColor, + attributions, + publisher, + license, + } = metadata; + + const affiliations = getDedupedAffliations(attributions); + + return ( +
+

{renderHeadingItems(metadata)}

+

+ {title} +

+ {attributions.length > 0 && ( +
+

+ {attributions.map((attr, index) => { + const { + user: { fullName }, + } = attr; + const affs = getAffiliations(attr); + return ( + + {fullName} + {affs?.length > 0 && + affs.map((affiliation, affIndex) => ( + + {1 + affiliations.indexOf(affiliation)} + {affs.length > 1 && + affIndex < affs.length - 1 && + ","} + + ))} + + ); + })} +

+ {affiliations.length > 0 && ( +
+ {affiliations.map((aff, index) => ( + + {index + 1} + {aff} + {index < affiliations.length - 1 && ", "} + + ))} +
+ )} +
+ )} + {publisher &&

{publisher}

} +
+ {publishedDateString && ( +
+ Published on: {publishedDateString} +
+ )} + {renderDetails({ + updatedDateString, + publishedDateString, + doi, + pubUrl, + license, + })} +
+
+ ); +}; + +type RenderStaticHtmlOptions = { + pubDoc: DocJson; + pubMetadata: PubMetadata; + notesData: NotesData; + pagedTarget: boolean; +}; + +export const renderStaticHtml = async (options: RenderStaticHtmlOptions) => { + const { pubDoc, pubMetadata, notesData, pagedTarget } = options; + const { title, nodeLabels, citationInlineStyle } = pubMetadata; + const { footnotes, citations, noteManager, renderedStructuredValues } = notesData; + + const renderedNotes = renderNotesForListing({ + footnotes, + citations, + citationInlineStyle, + renderedStructuredValues, + }); + + const renderableNodes = pagedTarget + ? [filterNonExportableNodes, addHrefsToNotes, blankIframes] + .filter((x): x is (nodes: any) => any => !!x) + .reduce((nodes, fn) => fn(nodes), pubDoc.content) + : [filterNonExportableNodes, addHrefsToNotes] + .filter((x): x is (nodes: any) => any => !!x) + .reduce((nodes, fn) => fn(nodes), pubDoc.content); + + const docContent = renderStatic({ + schema: editorSchema, + doc: { type: "doc", content: renderableNodes }, + noteManager, + nodeLabels, + }); + + return ReactDOMServer.renderToStaticMarkup( + + + {title} + +