diff --git a/scripts/concatMdByToc.js b/scripts/concatMdByToc.js index 0d8d9dbbe4fcc..aef3c1c7da628 100644 --- a/scripts/concatMdByToc.js +++ b/scripts/concatMdByToc.js @@ -126,7 +126,7 @@ const main = () => { let mergedStr = ""; fileList.forEach((filePath) => { - mergedStr += `${handleSingleMd(`.${filePath}`)}\n\n`; + mergedStr += `${handleSingleMd(`./${filePath}`)}\n\n`; }); const variables = JSON.parse( diff --git a/scripts/filterCloudDoc.js b/scripts/filterCloudDoc.js index 8211fcc0e1d38..6661002398883 100644 --- a/scripts/filterCloudDoc.js +++ b/scripts/filterCloudDoc.js @@ -1,5 +1,5 @@ import { - getAllMdList, + getAllCloudMdList, copySingleFileSync, copyFileWithCustomContentSync, copyDirectoryWithCustomContentSync, @@ -25,9 +25,9 @@ const extractFilefromList = ( }; const main = () => { - const filteredLinkList = getAllMdList("TOC-tidb-cloud.md"); + const allFilePaths = getAllCloudMdList(); - extractFilefromList(filteredLinkList, ".", "./tmp"); + extractFilefromList(allFilePaths, "./", "./tmp"); copySingleFileSync("TOC-tidb-cloud.md", "./tmp/TOC.md"); copyDirectoryWithCustomContentSync( "./tidb-cloud/", diff --git a/scripts/filterCloudInitFiles.js b/scripts/filterCloudInitFiles.js index e83a3ee3ba99c..06cc7781862f9 100644 --- a/scripts/filterCloudInitFiles.js +++ b/scripts/filterCloudInitFiles.js @@ -1,20 +1,14 @@ import * as fs from "fs"; import path from "path"; +import { getAllCloudMdList } from "./utils.js"; -// Read the TOC file -const tocContent = fs.readFileSync("TOC-tidb-cloud.md", "utf8"); +const allFilePaths = getAllCloudMdList(); -// Regular expression to match markdown links -const linkRegex = /\[([^\]]+)\]\(([^)]+)\)/g; - -// Set to store unique file paths +// Set to store filtered file paths const filePaths = new Set(); -// Extract all file paths from markdown links -let match; -while ((match = linkRegex.exec(tocContent)) !== null) { - const filePath = match[2]; - +// Filter the file paths +for (const filePath of allFilePaths) { // Skip external links (starting with http/https) if (filePath.startsWith("http")) { continue; @@ -25,9 +19,6 @@ while ((match = linkRegex.exec(tocContent)) !== null) { continue; } - // Remove leading slash if present - const cleanPath = filePath.startsWith("/") ? filePath.slice(1) : filePath; - // Skip files in tidb-cloud folder if (cleanPath.startsWith("tidb-cloud/")) { continue; diff --git a/scripts/filterNonCloudDoc.js b/scripts/filterNonCloudDoc.js index aae0eda35766e..768a5281200cc 100644 --- a/scripts/filterNonCloudDoc.js +++ b/scripts/filterNonCloudDoc.js @@ -3,6 +3,7 @@ import { copySingleFileSync, copyFileWithCustomContentSync, removeCustomContent, + CLOUD_TOC_LIST, } from "./utils.js"; const contentHandler = (content = "") => { @@ -17,7 +18,7 @@ const extractFilefromList = ( fileList.forEach((filePath = "") => { if ( filePath.includes(`/tidb-cloud/`) || - filePath.includes(`TOC-tidb-cloud.md`) + CLOUD_TOC_LIST.some((tocFile) => filePath.includes(tocFile)) ) { return; } diff --git a/scripts/filterUpdateFiles.js b/scripts/filterUpdateFiles.js index 6f60c9b29f3aa..eaed3279ed2e2 100755 --- a/scripts/filterUpdateFiles.js +++ b/scripts/filterUpdateFiles.js @@ -2,6 +2,7 @@ import * as fs from "fs"; import path from "path"; import axios from "axios"; import { Octokit } from "octokit"; +import { CLOUD_TOC_LIST, getAllCloudMdList } from "./utils.js"; const GH_TOKEN = process.env.GH_TOKEN || ""; @@ -92,68 +93,29 @@ const deleteFile = (targetFile) => { } }; -// read toc file and parse the file paths -const parseTOCFile = (tocPath) => { - try { - if (!fs.existsSync(tocPath)) { - console.log(`TOC file not found: ${tocPath}`); - return new Set(); - } - - const content = fs.readFileSync(tocPath, "utf8"); - const filePaths = new Set(); - - // use regex to match the file paths in markdown links - // match [text](path) format - const linkRegex = /\[([^\]]+)\]\(([^)]+)\)/g; - let match; - - while ((match = linkRegex.exec(content)) !== null) { - const link = match[2]; - // only process links ending with .md - if (link.endsWith(".md")) { - // remove ./ or / at the beginning to ensure path consistency - const normalizedPath = link.replace(/^\.?\//, ""); - filePaths.add(normalizedPath); - } - } - - console.log(`Found ${filePaths.size} files in TOC: ${tocPath}`); - if (filePaths.size > 0) { - console.log( - "Files in TOC:", - Array.from(filePaths).slice(0, 5).join(", "), - filePaths.size > 5 ? `... and ${filePaths.size - 5} more` : "" - ); - } - return filePaths; - } catch (error) { - console.error(`Error parsing TOC file ${tocPath}:`, error); - return new Set(); - } -}; - // get the file list from the toc file const getCloudTOCFiles = () => { - // check ./tmp/TOC-tidb-cloud.md first - const tmpTocPath = "./tmp/TOC-tidb-cloud.md"; - const localTocPath = "TOC-tidb-cloud.md"; + const tmpTocFiles = getAllCloudMdList([ + "./tmp/TOC-tidb-cloud.md", + "./tmp/TOC-tidb-cloud-starter.md", + "./tmp/TOC-tidb-cloud-essential.md", + ]); + const tocFiles = getAllCloudMdList(CLOUD_TOC_LIST); - let tocFiles = parseTOCFile(tmpTocPath); + // Convert to Set + const tmpTocFilesSet = new Set(tmpTocFiles); + const tocFilesSet = new Set(tocFiles); - // if not found in /tmp, check the current directory - if (tocFiles.size === 0) { - console.log(`No files found in ${tmpTocPath}, trying ${localTocPath}`); - tocFiles = parseTOCFile(localTocPath); - } + // Use tmpTocFiles if not empty, otherwise use tocFiles + const finalTocFiles = tmpTocFilesSet.size > 0 ? tmpTocFilesSet : tocFilesSet; - if (tocFiles.size === 0) { + if (finalTocFiles.size === 0) { console.log( "Warning: No TOC file found or no files in TOC. All .md files will be processed." ); } - return tocFiles; + return finalTocFiles; }; // filter the files in tmp directory by the toc file diff --git a/scripts/utils.js b/scripts/utils.js index 2a727cb5acbbf..9d2160321604a 100644 --- a/scripts/utils.js +++ b/scripts/utils.js @@ -59,13 +59,34 @@ const filterLink = (srcList = []) => { }; export const getAllMdList = (tocFile) => { + if (!fs.existsSync(tocFile)) { + console.log(`TOC file not found: ${tocFile}`); + return []; + } + const tocFileContent = fs.readFileSync(tocFile); const mdAst = generateMdAstFromFile(tocFileContent); const linkList = extractLinkNodeFromAst(mdAst); - const filteredLinkList = filterLink(linkList); + const filteredLinkList = filterLink(linkList).map((link) => + link.replace(/^\.?\//, "") + ); return filteredLinkList; }; +export const CLOUD_TOC_LIST = [ + "TOC-tidb-cloud.md", + "TOC-tidb-cloud-essential.md", + "TOC-tidb-cloud-starter.md", +]; + +export const getAllCloudMdList = (tocFiles = CLOUD_TOC_LIST) => { + // Get all MD files from multiple TOCs and deduplicate + const allFilteredLinkLists = tocFiles.map((tocFile) => getAllMdList(tocFile)); + const flattenedList = allFilteredLinkLists.flat(); + const allFilePaths = [...new Set(flattenedList)]; // Deduplicate + return allFilePaths; +}; + const checkDestDir = (destPath) => { const dir = path.dirname(destPath);