From 27db872311ed5e6e8664eba4d1b8becc72db0c4f Mon Sep 17 00:00:00 2001 From: Damien Chantelouve Date: Fri, 29 Mar 2024 15:40:32 +0100 Subject: [PATCH 1/5] Add support for inner content / text for blocks that don't have inner blocks --- index.html | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/index.html b/index.html index a966d4e..b0e31b4 100644 --- a/index.html +++ b/index.html @@ -128,6 +128,20 @@

WPHTML Converter

* @returns {string} */ const parseBlock = (block) => { + + // Add support for RichText + const innerHTML = block?.innerHTML; + const innerBlocks = block?.innerBlocks; + let innerContentAttr = block.blockName === 'core/button' ? 'text' : 'content'; + + // Add content only if there is no innerBlocks & some innerHTML + if ( innerHTML && !innerBlocks?.length ) { + + // only innerText is needed + const innerText = new DOMParser().parseFromString(innerHTML, "text/html").body.textContent || ""; + block.attrs[innerContentAttr] = innerText; + } + let data = `['${block.blockName}',${JSON.stringify(block.attrs, null, "")},[`; block.innerBlocks?.forEach((innerBlock) => { From 4d69e84c0dcc1c4d028113ab5c3ef77c248b399a Mon Sep 17 00:00:00 2001 From: Damien Chantelouve Date: Fri, 29 Mar 2024 15:44:51 +0100 Subject: [PATCH 2/5] Fix indent using tabs --- index.html | 26 +++++++++++++------------- 1 file changed, 13 insertions(+), 13 deletions(-) diff --git a/index.html b/index.html index b0e31b4..d1edec0 100644 --- a/index.html +++ b/index.html @@ -129,19 +129,19 @@

WPHTML Converter

*/ const parseBlock = (block) => { - // Add support for RichText - const innerHTML = block?.innerHTML; - const innerBlocks = block?.innerBlocks; - let innerContentAttr = block.blockName === 'core/button' ? 'text' : 'content'; - - // Add content only if there is no innerBlocks & some innerHTML - if ( innerHTML && !innerBlocks?.length ) { - - // only innerText is needed - const innerText = new DOMParser().parseFromString(innerHTML, "text/html").body.textContent || ""; - block.attrs[innerContentAttr] = innerText; - } - + // Add support for RichText + const innerHTML = block?.innerHTML; + const innerBlocks = block?.innerBlocks; + let innerContentAttr = block.blockName === 'core/button' ? 'text' : 'content'; + + // Add content only if there is no innerBlocks & some innerHTML + if (innerHTML && !innerBlocks?.length) { + + // only innerText is needed + const innerText = new DOMParser().parseFromString(innerHTML, "text/html").body.textContent || ""; + block.attrs[ innerContentAttr ] = innerText; + } + let data = `['${block.blockName}',${JSON.stringify(block.attrs, null, "")},[`; block.innerBlocks?.forEach((innerBlock) => { From 05e823bbc690cb9b0292eae129b6b34ebf8c83e0 Mon Sep 17 00:00:00 2001 From: Damien Chantelouve Date: Fri, 29 Mar 2024 16:49:42 +0100 Subject: [PATCH 3/5] Add inner html support (filter some elements to prevent issues) --- index.html | 63 ++++++++++++++++++++++++++++++++++++++++++++++++++---- 1 file changed, 59 insertions(+), 4 deletions(-) diff --git a/index.html b/index.html index d1edec0..9afe8eb 100644 --- a/index.html +++ b/index.html @@ -130,16 +130,45 @@

WPHTML Converter

const parseBlock = (block) => { // Add support for RichText - const innerHTML = block?.innerHTML; + let innerHTML = block?.innerHTML; const innerBlocks = block?.innerBlocks; let innerContentAttr = block.blockName === 'core/button' ? 'text' : 'content'; + // Ideally we should get those infos from each block.json but we hardcode it for simplicity + let blockTagNames = ''; + switch (block.blockName) { + case 'core/heading': + blockTagNames = 'h1,h2,h3,h4,h5,h6'; + break; + + case 'core/paragraph': + blockTagNames = 'p'; + break; + + case 'core/button': + blockTagNames = 'div,a'; + break; + + default: + break; + } + const unAllowedTags = [ + 'div', + 'script', + 'style', + ]; + // Add content only if there is no innerBlocks & some innerHTML if (innerHTML && !innerBlocks?.length) { - // only innerText is needed - const innerText = new DOMParser().parseFromString(innerHTML, "text/html").body.textContent || ""; - block.attrs[ innerContentAttr ] = innerText; + // Remove root tags from blocks + const blockTags = blockTagNames.split(','); + blockTags.map(tag => innerHTML = removeRootTag(tag, innerHTML)); + + // Remove unallowed tags + unAllowedTags.forEach(tag => innerHTML = removeTag(tag, innerHTML)); + + block.attrs[ innerContentAttr ] = innerHTML; } let data = `['${block.blockName}',${JSON.stringify(block.attrs, null, "")},[`; @@ -153,6 +182,32 @@

WPHTML Converter

return data; }; + /** + * Remove root tag from html (helper taken from RichText code) + * @link https://github.com/WordPress/gutenberg/blob/32a90263edaa9f6e6f0a9675722febeb2055143a/packages/block-editor/src/components/rich-text/native/index.native.js#L291 + */ + const removeRootTag = ( tag, html ) => { + const openingTagRegexp = RegExp( '^<' + tag + '[^>]*>', 'gim' ); + const closingTagRegexp = RegExp( '$', 'gim' ); + + return html + .replace( openingTagRegexp, '' ) + .replace( closingTagRegexp, '' ); + } + + /** + * Remove specific tag from html (helper taken from RichText code) + * @link https://github.com/WordPress/gutenberg/blob/32a90263edaa9f6e6f0a9675722febeb2055143a/packages/block-editor/src/components/rich-text/native/index.native.js#L300 + */ + const removeTag = ( tag, html ) => { + const openingTagRegexp = RegExp( '<' + tag + '>', 'gim' ); + const closingTagRegexp = RegExp( '', 'gim' ); + + return html + .replace( openingTagRegexp, '' ) + .replace( closingTagRegexp, '' ); + } + /** * Convert WPHTML from the page's textarea into a string representation of * the JavaScript object. From 38d27ba3b43a12e4176f2891d847b91aa5e88129 Mon Sep 17 00:00:00 2001 From: Damien Chantelouve Date: Fri, 29 Mar 2024 16:50:35 +0100 Subject: [PATCH 4/5] Fix indent --- index.html | 108 ++++++++++++++++++++++++++--------------------------- 1 file changed, 54 insertions(+), 54 deletions(-) diff --git a/index.html b/index.html index 9afe8eb..2e11b45 100644 --- a/index.html +++ b/index.html @@ -134,39 +134,39 @@

WPHTML Converter

const innerBlocks = block?.innerBlocks; let innerContentAttr = block.blockName === 'core/button' ? 'text' : 'content'; - // Ideally we should get those infos from each block.json but we hardcode it for simplicity - let blockTagNames = ''; - switch (block.blockName) { - case 'core/heading': - blockTagNames = 'h1,h2,h3,h4,h5,h6'; - break; - - case 'core/paragraph': - blockTagNames = 'p'; - break; - - case 'core/button': - blockTagNames = 'div,a'; - break; - - default: - break; - } - const unAllowedTags = [ - 'div', - 'script', - 'style', - ]; + // Ideally we should get those infos from each block.json but we hardcode it for simplicity + let blockTagNames = ''; + switch (block.blockName) { + case 'core/heading': + blockTagNames = 'h1,h2,h3,h4,h5,h6'; + break; + + case 'core/paragraph': + blockTagNames = 'p'; + break; + + case 'core/button': + blockTagNames = 'div,a'; + break; + + default: + break; + } + const unAllowedTags = [ + 'div', + 'script', + 'style', + ]; // Add content only if there is no innerBlocks & some innerHTML if (innerHTML && !innerBlocks?.length) { - // Remove root tags from blocks - const blockTags = blockTagNames.split(','); - blockTags.map(tag => innerHTML = removeRootTag(tag, innerHTML)); - - // Remove unallowed tags - unAllowedTags.forEach(tag => innerHTML = removeTag(tag, innerHTML)); + // Remove root tags from blocks + const blockTags = blockTagNames.split(','); + blockTags.map(tag => innerHTML = removeRootTag(tag, innerHTML)); + + // Remove unallowed tags + unAllowedTags.forEach(tag => innerHTML = removeTag(tag, innerHTML)); block.attrs[ innerContentAttr ] = innerHTML; } @@ -182,31 +182,31 @@

WPHTML Converter

return data; }; - /** - * Remove root tag from html (helper taken from RichText code) - * @link https://github.com/WordPress/gutenberg/blob/32a90263edaa9f6e6f0a9675722febeb2055143a/packages/block-editor/src/components/rich-text/native/index.native.js#L291 - */ - const removeRootTag = ( tag, html ) => { - const openingTagRegexp = RegExp( '^<' + tag + '[^>]*>', 'gim' ); - const closingTagRegexp = RegExp( '$', 'gim' ); - - return html - .replace( openingTagRegexp, '' ) - .replace( closingTagRegexp, '' ); - } - - /** - * Remove specific tag from html (helper taken from RichText code) - * @link https://github.com/WordPress/gutenberg/blob/32a90263edaa9f6e6f0a9675722febeb2055143a/packages/block-editor/src/components/rich-text/native/index.native.js#L300 - */ - const removeTag = ( tag, html ) => { - const openingTagRegexp = RegExp( '<' + tag + '>', 'gim' ); - const closingTagRegexp = RegExp( '', 'gim' ); - - return html - .replace( openingTagRegexp, '' ) - .replace( closingTagRegexp, '' ); - } + /** + * Remove root tag from html (helper taken from RichText code) + * @link https://github.com/WordPress/gutenberg/blob/32a90263edaa9f6e6f0a9675722febeb2055143a/packages/block-editor/src/components/rich-text/native/index.native.js#L291 + */ + const removeRootTag = ( tag, html ) => { + const openingTagRegexp = RegExp( '^<' + tag + '[^>]*>', 'gim' ); + const closingTagRegexp = RegExp( '$', 'gim' ); + + return html + .replace( openingTagRegexp, '' ) + .replace( closingTagRegexp, '' ); + } + + /** + * Remove specific tag from html (helper taken from RichText code) + * @link https://github.com/WordPress/gutenberg/blob/32a90263edaa9f6e6f0a9675722febeb2055143a/packages/block-editor/src/components/rich-text/native/index.native.js#L300 + */ + const removeTag = ( tag, html ) => { + const openingTagRegexp = RegExp( '<' + tag + '>', 'gim' ); + const closingTagRegexp = RegExp( '', 'gim' ); + + return html + .replace( openingTagRegexp, '' ) + .replace( closingTagRegexp, '' ); + } /** * Convert WPHTML from the page's textarea into a string representation of From 95154ac2c8a6be84968ccf8fbed24fc6e66320c0 Mon Sep 17 00:00:00 2001 From: Damien Chantelouve Date: Fri, 29 Mar 2024 16:51:17 +0100 Subject: [PATCH 5/5] Fix indent --- index.html | 22 +++++++++++----------- 1 file changed, 11 insertions(+), 11 deletions(-) diff --git a/index.html b/index.html index 2e11b45..b1d24c4 100644 --- a/index.html +++ b/index.html @@ -183,22 +183,22 @@

WPHTML Converter

}; /** - * Remove root tag from html (helper taken from RichText code) - * @link https://github.com/WordPress/gutenberg/blob/32a90263edaa9f6e6f0a9675722febeb2055143a/packages/block-editor/src/components/rich-text/native/index.native.js#L291 - */ + * Remove root tag from html (helper taken from RichText code) + * @link https://github.com/WordPress/gutenberg/blob/32a90263edaa9f6e6f0a9675722febeb2055143a/packages/block-editor/src/components/rich-text/native/index.native.js#L291 + */ const removeRootTag = ( tag, html ) => { - const openingTagRegexp = RegExp( '^<' + tag + '[^>]*>', 'gim' ); - const closingTagRegexp = RegExp( '$', 'gim' ); + const openingTagRegexp = RegExp( '^<' + tag + '[^>]*>', 'gim' ); + const closingTagRegexp = RegExp( '$', 'gim' ); - return html - .replace( openingTagRegexp, '' ) - .replace( closingTagRegexp, '' ); + return html + .replace( openingTagRegexp, '' ) + .replace( closingTagRegexp, '' ); } /** - * Remove specific tag from html (helper taken from RichText code) - * @link https://github.com/WordPress/gutenberg/blob/32a90263edaa9f6e6f0a9675722febeb2055143a/packages/block-editor/src/components/rich-text/native/index.native.js#L300 - */ + * Remove specific tag from html (helper taken from RichText code) + * @link https://github.com/WordPress/gutenberg/blob/32a90263edaa9f6e6f0a9675722febeb2055143a/packages/block-editor/src/components/rich-text/native/index.native.js#L300 + */ const removeTag = ( tag, html ) => { const openingTagRegexp = RegExp( '<' + tag + '>', 'gim' ); const closingTagRegexp = RegExp( '', 'gim' );