Mirascope · sourishkrout · Jan 14, 2026 · Jan 14, 2026 · Jan 14, 2026
@@ -76,8 +76,6 @@ export interface ContentRouteOptions<TMeta extends ContentMeta> {
 
   /** Content type for Open Graph (defaults to "website") */
   ogType?: "website" | "article";
-  /** Robots directive (e.g., "noindex, nofollow") */
-  robots?: string;
   /** Function to generate social card image path from meta */
   getImagePath?: (meta: TMeta) => string;
 }
@@ -120,7 +118,6 @@ export function createContentRouteConfig<TMeta extends ContentMeta>(
     head: createContentHead<TMeta>({
       allMetas,
       ogType: options.ogType,
-      robots: options.robots,
       getImagePath: options.getImagePath,
     }),
 
@@ -194,7 +191,6 @@ function isBlogMeta(meta: ContentMeta): meta is BlogMeta {
 interface CreateContentHeadOptions<TMeta extends ContentMeta> {
   allMetas: TMeta[];
   ogType?: "website" | "article";
-  robots?: string;
   getImagePath?: (meta: TMeta) => string;
 }
 
@@ -209,7 +205,7 @@ interface CreateContentHeadOptions<TMeta extends ContentMeta> {
 function createContentHead<TMeta extends ContentMeta>(
   options: CreateContentHeadOptions<TMeta>,
 ) {
-  const { allMetas, ogType = "website", robots, getImagePath } = options;
+  const { allMetas, ogType = "website", getImagePath } = options;
 
   return (ctx: {
     match: { pathname: string };
@@ -245,11 +241,6 @@ function createContentHead<TMeta extends ContentMeta>(
       { name: "description", content: meta.description },
     ];
 
-    // Add robots if specified
-    if (robots) {
-      metaTags.push({ name: "robots", content: robots });
-    }
-
     // Add Open Graph tags
     metaTags.push(
       ...generateOpenGraphMeta({

@@ -0,0 +1,221 @@
+import { describe, it, expect } from "vitest";
+import {
+  parseSitemapForUrlsWithoutChangefreq,
+  generateRobotsTxt,
+} from "./robots";
+
+describe("parseSitemapForUrlsWithoutChangefreq", () => {
+  it("returns empty array for empty sitemap", () => {
+    expect(parseSitemapForUrlsWithoutChangefreq("")).toEqual([]);
+  });
+
+  it("returns empty array for sitemap with no url blocks", () => {
+    const sitemap = `<?xml version="1.0" encoding="UTF-8"?>
+<urlset xmlns="http://www.sitemaps.org/schemas/sitemap/0.9">
+</urlset>`;
+    expect(parseSitemapForUrlsWithoutChangefreq(sitemap)).toEqual([]);
+  });
+
+  it("extracts paths from URLs without changefreq", () => {
+    const sitemap = `<?xml version="1.0" encoding="UTF-8"?>
+<urlset xmlns="http://www.sitemaps.org/schemas/sitemap/0.9">
+  <url>
+    <loc>https://example.com/page1</loc>
+  </url>
+  <url>
+    <loc>https://example.com/page2</loc>
+  </url>
+</urlset>`;
+    expect(parseSitemapForUrlsWithoutChangefreq(sitemap)).toEqual([
+      "/page1",
+      "/page2",
+    ]);
+  });
+
+  it("excludes URLs that have changefreq", () => {
+    const sitemap = `<?xml version="1.0" encoding="UTF-8"?>
+<urlset xmlns="http://www.sitemaps.org/schemas/sitemap/0.9">
+  <url>
+    <loc>https://example.com/static</loc>
+  </url>
+  <url>
+    <loc>https://example.com/dynamic</loc>
+    <changefreq>weekly</changefreq>
+  </url>
+</urlset>`;
+    expect(parseSitemapForUrlsWithoutChangefreq(sitemap)).toEqual(["/static"]);
+  });
+
+  it("handles changefreq with different values", () => {
+    const sitemap = `<?xml version="1.0" encoding="UTF-8"?>
+<urlset>
+  <url>
+    <loc>https://example.com/daily</loc>
+    <changefreq>daily</changefreq>
+  </url>
+  <url>
+    <loc>https://example.com/monthly</loc>
+    <changefreq>monthly</changefreq>
+  </url>
+  <url>
+    <loc>https://example.com/never</loc>
+    <changefreq>never</changefreq>
+  </url>
+  <url>
+    <loc>https://example.com/no-freq</loc>
+  </url>
+</urlset>`;
+    expect(parseSitemapForUrlsWithoutChangefreq(sitemap)).toEqual(["/no-freq"]);
+  });
+
+  it("is case-insensitive for changefreq tag", () => {
+    const sitemap = `<?xml version="1.0" encoding="UTF-8"?>
+<urlset>
+  <url>
+    <loc>https://example.com/upper</loc>
+    <CHANGEFREQ>weekly</CHANGEFREQ>
+  </url>
+  <url>
+    <loc>https://example.com/mixed</loc>
+    <ChangeFreq>weekly</ChangeFreq>
+  </url>
+  <url>
+    <loc>https://example.com/none</loc>
+  </url>
+</urlset>`;
+    expect(parseSitemapForUrlsWithoutChangefreq(sitemap)).toEqual(["/none"]);
+  });
+
+  it("excludes root path /", () => {
+    const sitemap = `<?xml version="1.0" encoding="UTF-8"?>
+<urlset>
+  <url>
+    <loc>https://example.com/</loc>
+  </url>
+  <url>
+    <loc>https://example.com/page</loc>
+  </url>
+</urlset>`;
+    expect(parseSitemapForUrlsWithoutChangefreq(sitemap)).toEqual(["/page"]);
+  });
+
+  it("handles nested paths", () => {
+    const sitemap = `<?xml version="1.0" encoding="UTF-8"?>
+<urlset>
+  <url>
+    <loc>https://example.com/docs/getting-started</loc>
+  </url>
+  <url>
+    <loc>https://example.com/blog/2024/post</loc>
+  </url>
+</urlset>`;
+    expect(parseSitemapForUrlsWithoutChangefreq(sitemap)).toEqual([
+      "/docs/getting-started",
+      "/blog/2024/post",
+    ]);
+  });
+
+  it("throws on invalid URL in loc", () => {
+    const sitemap = `<?xml version="1.0" encoding="UTF-8"?>
+<urlset>
+  <url>
+    <loc>not-a-valid-url</loc>
+  </url>
+</urlset>`;
+    expect(() => parseSitemapForUrlsWithoutChangefreq(sitemap)).toThrow();
+  });
+
+  it("handles url blocks with other tags", () => {
+    const sitemap = `<?xml version="1.0" encoding="UTF-8"?>
+<urlset>
+  <url>
+    <loc>https://example.com/with-priority</loc>
+    <priority>0.8</priority>
+    <lastmod>2024-01-01</lastmod>
+  </url>
+  <url>
+    <loc>https://example.com/with-freq</loc>
+    <priority>0.5</priority>
+    <changefreq>weekly</changefreq>
+  </url>
+</urlset>`;
+    expect(parseSitemapForUrlsWithoutChangefreq(sitemap)).toEqual([
+      "/with-priority",
+    ]);
+  });
+
+  it("handles multiline url blocks", () => {
+    const sitemap = `<?xml version="1.0" encoding="UTF-8"?>
+<urlset>
+  <url>
+    <loc>https://example.com/multiline</loc>
+    <priority>0.5</priority>
+  </url>
+</urlset>`;
+    expect(parseSitemapForUrlsWithoutChangefreq(sitemap)).toEqual([
+      "/multiline",
+    ]);
+  });
+
+  it("skips url blocks without loc", () => {
+    const sitemap = `<?xml version="1.0" encoding="UTF-8"?>
+<urlset>
+  <url>
+    <priority>0.5</priority>
+  </url>
+  <url>
+    <loc>https://example.com/valid</loc>
+  </url>
+</urlset>`;
+    expect(parseSitemapForUrlsWithoutChangefreq(sitemap)).toEqual(["/valid"]);
+  });
+});
+
+describe("generateRobotsTxt", () => {
+  const siteUrl = "https://example.com";
+  const sitemapUrl = "https://example.com/sitemap.xml";
+
+  it("generates robots.txt with no disallow paths", () => {
+    const result = generateRobotsTxt([], siteUrl, sitemapUrl);
+    expect(result).toBe(`# robots.txt for https://example.com
+User-agent: *
+Allow: /
+
+Sitemap: https://example.com/sitemap.xml`);
+  });
+
+  it("generates robots.txt with single disallow path", () => {
+    const result = generateRobotsTxt(["/private"], siteUrl, sitemapUrl);
+    expect(result).toBe(`# robots.txt for https://example.com
+User-agent: *
+Allow: /
+Disallow: /private
+
+Sitemap: https://example.com/sitemap.xml`);
+  });
+
+  it("generates robots.txt with multiple disallow paths", () => {
+    const result = generateRobotsTxt(
+      ["/private", "/admin", "/internal"],
+      siteUrl,
+      sitemapUrl,
+    );
+    expect(result).toBe(`# robots.txt for https://example.com
+User-agent: *
+Allow: /
+Disallow: /private
+Disallow: /admin
+Disallow: /internal
+
+Sitemap: https://example.com/sitemap.xml`);
+  });
+
+  it("uses provided sitemap URL", () => {
+    const result = generateRobotsTxt(
+      [],
+      "https://mirascope.com",
+      "https://mirascope.com/sitemap.xml",
+    );
+    expect(result).toContain("Sitemap: https://mirascope.com/sitemap.xml");
+  });
+});
@@ -0,0 +1,42 @@
+/**
+ * Pure functions for robots.txt generation from sitemap data
+ */
+
+/**
+ * Parse sitemap XML and extract URLs that don't have a changefreq tag.
+ * URLs without changefreq are considered low-priority for crawling.
+ * The root path "/" is excluded as changefreq doesn't apply globally.
+ */
+export function parseSitemapForUrlsWithoutChangefreq(
+  sitemapXml: string,
+): string[] {
+  const urlMatches = sitemapXml.match(/<url>[\s\S]*?<\/url>/g) ?? [];
+
+  return (
+    urlMatches
+      .filter((urlBlock) => !/<changefreq>.*?<\/changefreq>/i.test(urlBlock))
+      .map((urlBlock) => urlBlock.match(/<loc>(.*?)<\/loc>/)?.[1])
+      .filter((loc): loc is string => loc !== undefined)
+      .map((loc) => new URL(loc).pathname)
+      // exclude root path
+      .filter((pathname) => pathname !== "/")
+  );
+}
+
+/**
+ * Generate robots.txt content from a list of disallow paths
+ */
+export function generateRobotsTxt(
+  disallowPaths: string[],
+  siteUrl: string,
+  sitemapUrl: string,
+): string {
+  const baseRules = `# robots.txt for ${siteUrl}
+User-agent: *
+Allow: /
+`;
+
+  const disallowRules = disallowPaths.map((p) => `Disallow: ${p}`).join("\n");
+
+  return `${baseRules}${disallowRules ? `${disallowRules}\n` : ""}\nSitemap: ${sitemapUrl}`;
+}
@@ -296,9 +296,6 @@ describe("createPageHead", () => {
         content: "summary_large_image",
       });
 
-      // No robots
-      expect(findMeta(result.meta, "robots")).toBeUndefined();
-
       // No scripts (not an article)
       expect(result.scripts).toBeUndefined();
     });
@@ -446,23 +443,7 @@ describe("createPageHead", () => {
     });
   });
 
-  describe("Test Case 7: With robots noindex", () => {
-    it("includes robots meta tag", () => {
-      const result = createPageHead({
-        route: "/dev/tools",
-        title: "Dev Tools",
-        description: "Development tools",
-        robots: "noindex, nofollow",
-      });
-
-      expect(findMeta(result.meta, "robots")).toEqual({
-        name: "robots",
-        content: "noindex, nofollow",
-      });
-    });
-  });
-
-  describe("Test Case 8: Empty description", () => {
+  describe("Test Case 7: Empty description", () => {
     it("handles empty description", () => {
       const result = createPageHead({
         route: "/empty-desc",

@@ -11,7 +11,7 @@ import { BASE_URL } from "@/app/lib/site";
 
 /**
  * Head metadata entry for routes.
- * Supports title, name-based meta tags (e.g., description, robots),
+ * Supports title, name-based meta tags (e.g., description),
  * property-based meta tags (e.g., og:title, twitter:card), and charset.
  */
 export type HeadMetaEntry =
@@ -70,8 +70,6 @@ export interface PageHeadOptions {
   ogType?: "website" | "article";
   /** Custom image path or URL for social cards */
   image?: string;
-  /** Robots directive (e.g., "noindex, nofollow") */
-  robots?: string;
   /** Article metadata for blog posts */
   article?: ArticleMeta;
 }
@@ -252,7 +250,6 @@ export function createPageHead(options: PageHeadOptions): HeadResult {
     description,
     ogType = "website",
     image,
-    robots,
     article,
   } = options;
 
@@ -273,11 +270,6 @@ export function createPageHead(options: PageHeadOptions): HeadResult {
     { name: "description", content: description },
   ];
 
-  // Add robots if specified
-  if (robots) {
-    metaTags.push({ name: "robots", content: robots });
-  }
-
   // Add Open Graph tags
   metaTags.push(
     ...generateOpenGraphMeta({

@@ -1,3 +1,5 @@
-# https://www.robotstxt.org/robotstxt.html
+# robots.txt for development (overwritten by vite build)
 User-agent: *
-Disallow: /
+Allow: /
+
+Sitemap: https://mirascope.com/sitemap.xml