diff --git a/cloud/app/lib/content/frontmatter.ts b/cloud/app/lib/content/frontmatter.ts new file mode 100644 index 0000000000..c4986e5811 --- /dev/null +++ b/cloud/app/lib/content/frontmatter.ts @@ -0,0 +1,44 @@ +/** + * Parse frontmatter from MDX content + */ +export function parseFrontmatter(content: string): { + frontmatter: Record; + content: string; +} { + if (!content.startsWith("---")) { + return { frontmatter: {}, content }; + } + + const parts = content.split("---"); + + if (parts.length >= 3 && parts[1].trim() === "") { + return { + frontmatter: {}, + content: parts.slice(2).join("---").trimStart(), + }; + } + + if (parts.length >= 3) { + const frontmatterStr = parts[1].trim(); + const contentParts = parts.slice(2).join("---"); + const cleanContent = contentParts.trimStart(); + + const frontmatter: Record = {}; + + frontmatterStr.split("\n").forEach((line) => { + const trimmedLine = line.trim(); + if (!trimmedLine) return; + + const colonIndex = trimmedLine.indexOf(":"); + if (colonIndex > 0) { + const key = trimmedLine.slice(0, colonIndex).trim(); + const value = trimmedLine.slice(colonIndex + 1).trim(); + frontmatter[key] = value.replace(/^["'](.*)["']$/, "$1"); + } + }); + + return { frontmatter, content: cleanContent }; + } + + return { frontmatter: {}, content }; +} diff --git a/cloud/app/lib/content/toc.ts b/cloud/app/lib/content/toc.ts new file mode 100644 index 0000000000..1ab648dcb5 --- /dev/null +++ b/cloud/app/lib/content/toc.ts @@ -0,0 +1,25 @@ +/** + * Extract table of contents from MDX content + */ +export function extractTOC(content: string): Array<{ + id: string; + text: string; + level: number; +}> { + const headingRegex = /^(#{1,6})\s+(.+)$/gm; + const toc: Array<{ id: string; text: string; level: number }> = []; + let match; + + while ((match = headingRegex.exec(content)) !== null) { + const level = match[1].length; + const text = match[2].trim(); + const id = text + .toLowerCase() + .replace(/[^a-z0-9]+/g, "-") + .replace(/^-|-$/g, ""); + + toc.push({ id, text, level }); + } + + return toc; +} diff --git a/cloud/app/lib/content/types.ts b/cloud/app/lib/content/types.ts new file mode 100644 index 0000000000..b58b855494 --- /dev/null +++ b/cloud/app/lib/content/types.ts @@ -0,0 +1,64 @@ +/** + * Table of contents item extracted from MDX headings + */ +export interface TOCItem { + id: string; + text: string; + level: number; + children?: TOCItem[]; +} + +/* ========== CONTENT TYPES =========== */ + +/** + * All recognized content types in the system + * Each type is mapped to: + * - Source directory: content/{type} + * - Output directory: static/content/{type} + * - Metadata file: static/content-meta/{type}/index.json + */ +export type ContentType = "docs" | "blog" | "policy" | "dev" | "llm-docs"; +export const CONTENT_TYPES: ContentType[] = ["docs", "blog", "policy", "dev"]; + +/** + * Base metadata interface that all content types extend + * This metadata is generated during preprocessing and stored with the content + */ +export interface ContentMeta { + title: string; + description: string; + path: string; + slug: string; + type: ContentType; + route: string; // Full URL route for cross-referencing with search results +} + +/** + * Core content interface that combines metadata with content + * The meta and content are loaded from JSON, with MDX processed on demand + */ +export interface Content { + meta: T; // Typed, validated metadata + content: string; // MDX with frontmatter stripped out + + // MDX structure expected by components (used in MDXRenderer) + mdx: { + code: string; // Compiled MDX code + frontmatter: Record; // Extracted frontmatter + tableOfContents: TOCItem[]; // Table of contents extracted from headings + }; +} + +/* ========== BLOG CONTENT TYPES =========== */ + +/** + * Blog-specific metadata extends the base ContentMeta + */ +export interface BlogMeta extends ContentMeta { + date: string; // Publication date in YYYY-MM-DD format + author: string; // Author name + readTime: string; // Estimated reading time + lastUpdated: string; // Last update date +} + +export type BlogContent = Content; diff --git a/cloud/app/lib/mdx/types.ts b/cloud/app/lib/mdx/types.ts index 5e4e42bb1b..2960c87003 100644 --- a/cloud/app/lib/mdx/types.ts +++ b/cloud/app/lib/mdx/types.ts @@ -1,3 +1,5 @@ +import type { TOCItem } from "@/app/lib/content/types"; + /** * MDX Type Definitions * @@ -6,16 +8,6 @@ import type React from "react"; -/** - * Table of contents item extracted from MDX headings - */ -export interface TOCItem { - id: string; - text: string; - level: number; - children?: TOCItem[]; -} - /** * Frontmatter extracted from MDX files */ diff --git a/cloud/app/types/virtual-content-meta.d.ts b/cloud/app/types/virtual-content-meta.d.ts new file mode 100644 index 0000000000..f131345578 --- /dev/null +++ b/cloud/app/types/virtual-content-meta.d.ts @@ -0,0 +1,6 @@ +import type { ContentMeta, BlogMeta } from "@/app/lib/content/types"; + +declare module "virtual:content-meta" { + export const blogPosts: BlogMeta[]; + export const allContent: ContentMeta[]; +} diff --git a/cloud/vite-plugins/README.md b/cloud/vite-plugins/README.md index d07d7e047c..e995158078 100644 --- a/cloud/vite-plugins/README.md +++ b/cloud/vite-plugins/README.md @@ -19,7 +19,7 @@ Transforms `.mdx` files into importable ES modules at build time. Import MDX files directly in your components: ```typescript -import { mdx } from "@/content/docs/v1/getting-started.mdx"; +import { mdx } from "@/content/docs/v1/placeholder.mdx"; // mdx is a React component with metadata attached console.log(mdx.frontmatter.title); // Access frontmatter @@ -40,12 +40,17 @@ description: Learn how to use Mirascope Your content here... ``` -### Adding New MDX Files +For blog posts, include additional frontmatter: -1. Create a new `.mdx` file in `content/docs/v1/` -2. Add frontmatter (optional but recommended) -3. Import and use it in your route components -4. The plugin automatically handles compilation and metadata extraction +```mdx +--- +title: My Blog Post +description: A description of the post +date: "2024-01-15" +author: "Author Name" +readTime: "5 min read" +--- +``` ### Build-time vs Runtime @@ -55,7 +60,64 @@ Your content here... ### Type Safety -TypeScript types are provided in `app/types/mdx.d.ts` for proper autocomplete and type checking. +TypeScript types are provided in: +- `app/types/mdx.d.ts` - types for MDX imports + +## Content Plugin (`content.ts`) + +Scans the content directory and maintains metadata about all MDX files for listing and querying. + +### Features + +- **Directory scanning**: Scans `content/` on startup with parallel processing +- **Metadata extraction**: Builds metadata for all MDX files (title, description, slug, route, etc.) +- **Virtual module**: Exposes meta via `virtual:content-meta` +- **Hot Module Replacement**: Meta is automatically updated when files change + +### Usage + +Access content metadata via the virtual module: + +```typescript +// @ts-expect-error - virtual module resolved by vite plugin +import { blogPosts, allContent } from "virtual:content-meta"; + +// blogPosts: BlogMeta[] - blog posts sorted by date (newest first) +// allContent: ContentMeta[] - all MDX content entries + +blogPosts.forEach(post => { + console.log(post.title, post.date, post.route); +}); +``` + +### Content Types + +The plugin recognizes content types based on directory structure: +- `content/blog/` → type: "blog" +- `content/docs/` → type: "docs" +- `content/policy/` → type: "policy" +- `content/dev/` → type: "dev" + +### Blog Metadata + +Blog posts include additional fields: +- `date`: Publication date +- `author`: Author name +- `readTime`: Estimated reading time +- `lastUpdated`: Last update date + +### Adding New Content + +1. Create a new `.mdx` file in the appropriate `content/` subdirectory +2. Add frontmatter with required fields +3. The meta is automatically updated during development (HMR) +4. Use `blogPosts` or `allContent` to list and query content + +### Type Safety + +TypeScript types are provided in: +- `app/types/virtual-content-meta.d.ts` - types for the virtual module +- `app/lib/content/types.ts` - `ContentMeta` and `BlogMeta` interfaces ## Images Plugin (`images.ts`) diff --git a/cloud/vite-plugins/content.ts b/cloud/vite-plugins/content.ts new file mode 100644 index 0000000000..c89683c57d --- /dev/null +++ b/cloud/vite-plugins/content.ts @@ -0,0 +1,283 @@ +/** + * Vite plugin for content meta management + * + * This plugin scans the content directory for MDX files and maintains + * metadata about all content for listing and querying. + * + * Features: + * - Scans content directory on startup + * - Builds metadata for all MDX files (title, description, slug, etc.) + * - Exposes meta via virtual:content-meta module + * - Supports Hot Module Replacement (HMR) in development + * + * Usage: + * ```typescript + * import { blogPosts, allContent } from "virtual:content-meta"; + * + * // blogPosts: BlogMeta[] - blog posts sorted by date (newest first) + * // allContent: ContentMeta[] - all MDX content entries + * ``` + */ + +import type { Plugin } from "vite"; +import fs from "node:fs"; +import type { Dirent } from "node:fs"; +import path from "node:path"; +import type { + ContentType, + ContentMeta, + BlogMeta, +} from "../app/lib/content/types"; +import { parseFrontmatter } from "../app/lib/content/frontmatter"; + +const VIRTUAL_MODULE_ID = "virtual:content-meta"; +// The "\0" prefix is a Vite convention that marks this as a virtual module, +// preventing Vite from trying to resolve it as a real file path +const RESOLVED_VIRTUAL_MODULE_ID = "\0" + VIRTUAL_MODULE_ID; + +/** Content meta keyed by absolute file path */ +const contentMeta = new Map(); + +export interface ViteContentOptions { + contentDir: string; +} + +/** + * Derive content type from file path relative to content directory + */ +function getContentType(contentDir: string, filePath: string): ContentType { + const relativePath = path.relative(contentDir, filePath); + const firstDir = relativePath.split(path.sep)[0]; + + // Map directory names to content types + const typeMap: Record = { + blog: "blog", + docs: "docs", + policy: "policy", + dev: "dev", + }; + + return typeMap[firstDir] || "docs"; +} + +/** + * Build a meta entry from an MDX file + */ +function buildMetaEntry( + contentDir: string, + filePath: string, + frontmatter: Record, +): ContentMeta | BlogMeta { + const urlPath = path.join( + path + .relative(path.join(process.cwd(), "content"), filePath) + .replace(/\.mdx$/, ""), + ); + const contentType = getContentType(contentDir, filePath); + const slug = path.basename(filePath, ".mdx"); + const route = `/${contentType}/${slug}`; + + const baseEntry: ContentMeta = { + title: frontmatter.title || slug, + description: frontmatter.description || "", + path: urlPath, + slug, + type: contentType, + route, + }; + + // Add blog-specific fields if this is a blog post + if (contentType === "blog") { + const blogEntry: BlogMeta = { + ...baseEntry, + date: frontmatter.date || "", + author: frontmatter.author || "", + readTime: frontmatter.readTime || "", + lastUpdated: frontmatter.lastUpdated || "", + }; + return blogEntry; + } + + return baseEntry; +} + +/** + * Update the meta entry for a single MDX file + * Used for both initial scan and HMR updates + */ +async function updateMetaEntry( + contentDir: string, + filePath: string, +): Promise { + try { + const rawContent = await fs.promises.readFile(filePath, "utf-8"); + const { frontmatter } = parseFrontmatter(rawContent); + const metaEntry = buildMetaEntry(contentDir, filePath, frontmatter); + contentMeta.set(filePath, metaEntry); + } catch (error) { + console.error(`[content] Error updating meta for ${filePath}:`, error); + } +} + +/** + * Recursively scan a directory for MDX files and populate the content meta + * Processes files in parallel batches with concurrency control + */ +async function scanContentDirectory( + contentDir: string, + currentDir: string, +): Promise { + const concurrency = 20; + const entries = await fs.promises.readdir(currentDir, { + withFileTypes: true, + }); + + const processEntry = async (entry: Dirent): Promise => { + const fullPath = path.join(currentDir, entry.name); + + if (entry.isDirectory()) { + await scanContentDirectory(contentDir, fullPath); + } else if (entry.isFile() && entry.name.endsWith(".mdx")) { + await updateMetaEntry(contentDir, fullPath); + } + }; + + // Process entries in parallel batches with concurrency limit + for (let i = 0; i < entries.length; i += concurrency) { + const batch = entries.slice(i, i + concurrency); + await Promise.all(batch.map(processEntry)); + } +} + +/** + * Print content statistics based on types in the map + */ +function printContentStats( + contentMeta: Map, + verb: "Generated" | "Updated", + additionalInfo?: string, +): void { + const allEntries = Array.from(contentMeta.values()); + const typeCounts = new Map(); + + // Count entries by type + for (const entry of allEntries) { + const count = typeCounts.get(entry.type) || 0; + typeCounts.set(entry.type, count + 1); + } + + // Build stats string + const statsParts: string[] = []; + for (const [type, count] of typeCounts.entries()) { + statsParts.push(`${count} ${type}${count !== 1 ? "s" : ""}`); + } + + const statsString = statsParts.join(", "); + const totalString = `${allEntries.length} total entr${allEntries.length !== 1 ? "ies" : "y"}`; + const infoString = additionalInfo ? ` ${additionalInfo}` : ""; + + console.log( + `[content] ${verb} virtual module: ${statsString}, ${totalString}${infoString}`, + ); +} + +/** + * Generate the virtual module code from the content meta + */ +function generateMetaModule(): string { + const allEntries = Array.from(contentMeta.values()); + + // Filter and sort blog posts by date (newest first) + const blogPosts = allEntries + .filter((entry): entry is BlogMeta => entry.type === "blog") + .sort((a, b) => { + return new Date(b.date || 0).getTime() - new Date(a.date || 0).getTime(); + }); + + return ` +/** @type {import('../app/lib/content/types').BlogMeta[]} */ +export const blogPosts = ${JSON.stringify(blogPosts, null, 2)}; +/** @type {import('../app/lib/content/types').ContentMeta[]} */ +export const allContent = ${JSON.stringify(allEntries, null, 2)}; + `.trim(); +} + +/** + * Scan content directory and build meta + */ +async function buildContentMeta(contentDir: string): Promise { + if (fs.existsSync(contentDir)) { + console.log(`[content] Building content meta from: ${contentDir}`); + const startTime = Date.now(); + await scanContentDirectory(contentDir, contentDir); + const duration = Date.now() - startTime; + printContentStats(contentMeta, "Generated", `in ${duration}ms`); + } else { + console.warn(`[content] Content directory not found: ${contentDir}`); + } +} + +export function viteContent(options: ViteContentOptions): Plugin { + if (!options.contentDir) { + throw new Error( + "[vite-plugin-content] contentDir option is required and must be a non-empty string", + ); + } + + const contentDir = path.resolve(options.contentDir); + let isBuild = false; + + return { + name: "vite-plugin-content", + + // Detect build vs serve mode + config(_config, { command }) { + isBuild = command === "build"; + }, + + async configureServer() { + // Scan content directory on startup + await buildContentMeta(contentDir); + }, + + async buildStart() { + // Scan content directory during build (only in build mode) + if (!isBuild) { + return; + } + await buildContentMeta(contentDir); + }, + + // Resolve virtual module imports + resolveId(id) { + if (id === VIRTUAL_MODULE_ID) { + return RESOLVED_VIRTUAL_MODULE_ID; + } + }, + + // Load the virtual module content + load(id) { + if (id === RESOLVED_VIRTUAL_MODULE_ID) { + return generateMetaModule(); + } + }, + + // Enable HMR for content meta updates + async handleHotUpdate({ file, server }) { + if (file.endsWith(".mdx") && file.startsWith(contentDir)) { + console.log(`[content] Updating meta for ${file}`); + + // Update meta entry for the changed file + await updateMetaEntry(contentDir, file); + + // Invalidate the virtual meta module so it regenerates + const metaModule = server.moduleGraph.getModuleById( + RESOLVED_VIRTUAL_MODULE_ID, + ); + if (metaModule) { + server.moduleGraph.invalidateModule(metaModule); + } + } + }, + }; +} diff --git a/cloud/vite-plugins/mdx.ts b/cloud/vite-plugins/mdx.ts index a9cdd99429..6f06915c3d 100644 --- a/cloud/vite-plugins/mdx.ts +++ b/cloud/vite-plugins/mdx.ts @@ -27,77 +27,8 @@ import { compile } from "@mdx-js/mdx"; import remarkGfm from "remark-gfm"; import rehypePrettyCode from "rehype-pretty-code"; import fs from "node:fs"; - -/** - * Parse frontmatter from MDX content - */ -function parseFrontmatter(content: string): { - frontmatter: Record; - content: string; -} { - if (!content.startsWith("---")) { - return { frontmatter: {}, content }; - } - - const parts = content.split("---"); - - if (parts.length >= 3 && parts[1].trim() === "") { - return { - frontmatter: {}, - content: parts.slice(2).join("---").trimStart(), - }; - } - - if (parts.length >= 3) { - const frontmatterStr = parts[1].trim(); - const contentParts = parts.slice(2).join("---"); - const cleanContent = contentParts.trimStart(); - - const frontmatter: Record = {}; - - frontmatterStr.split("\n").forEach((line) => { - const trimmedLine = line.trim(); - if (!trimmedLine) return; - - const colonIndex = trimmedLine.indexOf(":"); - if (colonIndex > 0) { - const key = trimmedLine.slice(0, colonIndex).trim(); - const value = trimmedLine.slice(colonIndex + 1).trim(); - frontmatter[key] = value.replace(/^["'](.*)["']$/, "$1"); - } - }); - - return { frontmatter, content: cleanContent }; - } - - return { frontmatter: {}, content }; -} - -/** - * Extract table of contents from MDX content - */ -function extractTOC(content: string): Array<{ - id: string; - text: string; - level: number; -}> { - const headingRegex = /^(#{1,6})\s+(.+)$/gm; - const toc: Array<{ id: string; text: string; level: number }> = []; - let match; - - while ((match = headingRegex.exec(content)) !== null) { - const level = match[1].length; - const text = match[2].trim(); - const id = text - .toLowerCase() - .replace(/[^a-z0-9]+/g, "-") - .replace(/^-|-$/g, ""); - - toc.push({ id, text, level }); - } - - return toc; -} +import { parseFrontmatter } from "../app/lib/content/frontmatter"; +import { extractTOC } from "../app/lib/content/toc"; export function viteMDX(): Plugin { return { diff --git a/cloud/vite.config.ts b/cloud/vite.config.ts index 3201ecfae9..fbb5f0e45b 100644 --- a/cloud/vite.config.ts +++ b/cloud/vite.config.ts @@ -5,10 +5,12 @@ import { tanstackStart } from "@tanstack/react-start/plugin/vite"; import viteReact from "@vitejs/plugin-react"; import path from "path"; import { viteMDX } from "./vite-plugins/mdx"; +import { viteContent } from "./vite-plugins/content"; import { viteImages } from "./vite-plugins/images"; import { defineConfig } from "vite"; export default defineConfig(() => { + const contentDir = path.resolve(process.cwd(), "content"); return { server: { port: 3000, @@ -17,6 +19,7 @@ export default defineConfig(() => { tsConfigPaths({ projects: ["./tsconfig.json"], }), + viteContent({ contentDir }), viteMDX(), viteImages({ viteEnvironments: ["client"] }), cloudflare({ viteEnvironment: { name: "ssr" } }), @@ -33,12 +36,13 @@ export default defineConfig(() => { retryDelay: 0, maxRedirects: 5, failOnError: true, - filter: (page) => page.path.startsWith("/docs"), + filter: (page: { path: string }) => + page.path.startsWith("/docs") || page.path.startsWith("/blog"), // todo(sebastian): Consider post-processing sitemap/pages to set the changefreq. // When using autoStaticPathsDiscovery, you can't set the sitemap changefreq or // other sitemap options per page—frequency can only be set on a per-page basis if you provide // an explicit pages array. For auto-discovered pages, control over frequency is not available. - onSuccess: ({ page }) => { + onSuccess: ({ page }: { page: { path: string } }) => { console.log(`Rendered ${page.path}!`); return { sitemap: { changefreq: "daily" } }; },