feat: add PDF watermark functionality (v1.8.0)

Libres-coder · Libres-coder · commit ab95f98dab25 · 2025-12-18T15:47:33.000+08:00
- Add text watermark with custom font, color, opacity, rotation
- Add image watermark support (PNG/JPG)
- Support 5 positions: center, top-left, top-right, bottom-left, bottom-right
- Support page selection for targeted watermarking
- New MCP tools: add_watermark, add_image_watermark
- Total tools: 22
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -9,10 +9,42 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
 
 ### Planned
 
-- Semantic search with vector embeddings
-- PDF merge/split functionality
-- PDF encryption/decryption
-- Performance optimization with streaming and parallel processing
+- PDF compression optimization
+- Streaming for large files
+- LRU caching
+
+## [1.8.0] - 2025-12-18
+
+### 💧 PDF Watermark Release
+
+#### ✨ New Features
+
+- ✅ **Text Watermark** - Add customizable text watermarks to PDF
+- ✅ **Image Watermark** - Add PNG/JPG images as watermarks
+- ✅ **Position Control** - 5 positions: center, top-left, top-right, bottom-left, bottom-right
+- ✅ **Page Selection** - Apply watermark to specific pages
+
+#### 📦 New Components
+
+- `PDFUtils.addWatermark()` - Add text watermark with custom font, color, opacity, rotation
+- `PDFUtils.addImageWatermark()` - Add image watermark (PNG/JPG support)
+- MCP tools: `add_watermark`, `add_image_watermark`
+
+#### 🎨 Watermark Options
+
+- Text, opacity, fontSize, rotation, color (RGB), position, pages
+
+#### 🔢 Total MCP Tools: 22
+
+- PDF: 10 tools (+2)
+- Word: 2 tools
+- Excel: 2 tools
+- PowerPoint: 2 tools
+- OCR: 2 tools
+- Semantic: 2 tools
+- Batch: 2 tools
+
+---
 
 ## [1.7.0] - 2025-12-17
 
diff --git a/README.md b/README.md
@@ -129,7 +129,7 @@ const results = await parser.searchText('slides.pptx', '关键词');
 }
 ```
 
-### 可用工具（20 个）
+### 可用工具（22 个）
 
 | 类别 | 工具 | 描述 |
 |------|------|------|
@@ -141,6 +141,8 @@ const results = await parser.searchText('slides.pptx', '关键词');
 | | `merge_pdf` | 合并多个 PDF |
 | | `split_pdf` | 拆分为单页 |
 | | `extract_pdf_pages` | 提取指定页码 |
+| | `add_watermark` | 添加文字水印 |
+| | `add_image_watermark` | 添加图片水印 |
 | **Word** | `extract_word` | 提取文本/HTML |
 | | `search_word` | 文本搜索 |
 | **Excel** | `extract_excel` | 提取数据 |
@@ -160,6 +162,7 @@ const results = await parser.searchText('slides.pptx', '关键词');
 
 | 版本 | 功能 |
 |------|------|
+| v1.8.0 | 💧 PDF 水印（文字/图片水印） |
 | v1.7.0 | 📦 批量处理（并行处理多文件） |
 | v1.6.0 | 🧠 语义搜索（AI 向量嵌入） |
 | v1.5.0 | 📄 PDF 合并/拆分/提取 |
diff --git a/examples/watermark-examples.ts b/examples/watermark-examples.ts
@@ -0,0 +1,67 @@
+/**
+ * PDF 水印功能示例
+ */
+
+import { PDFUtils } from 'parseflow-core';
+import path from 'path';
+
+const pdfUtils = new PDFUtils();
+
+// 示例 1: 添加文字水印
+async function addTextWatermark() {
+  const result = await pdfUtils.addWatermark(
+    'path/to/input.pdf',
+    'path/to/output.pdf',
+    {
+      text: 'CONFIDENTIAL',
+      opacity: 0.3,
+      fontSize: 48,
+      rotation: 45,
+      position: 'center',
+    }
+  );
+  console.log(result.message);
+}
+
+// 示例 2: 自定义颜色水印
+async function addColoredWatermark() {
+  await pdfUtils.addWatermark(
+    'path/to/input.pdf',
+    'path/to/output.pdf',
+    {
+      text: 'DRAFT',
+      opacity: 0.5,
+      fontSize: 60,
+      rotation: 0,
+      color: { r: 1, g: 0, b: 0 }, // 红色
+      position: 'bottom-right',
+    }
+  );
+}
+
+// 示例 3: 指定页码添加水印
+async function addWatermarkToPages() {
+  await pdfUtils.addWatermark(
+    'path/to/input.pdf',
+    'path/to/output.pdf',
+    {
+      text: 'INTERNAL',
+      pages: [1, 3, 5], // 只在第 1、3、5 页
+    }
+  );
+}
+
+// 示例 4: 添加图片水印
+async function addImageWatermark() {
+  await pdfUtils.addImageWatermark(
+    'path/to/input.pdf',
+    'path/to/output.pdf',
+    {
+      imagePath: 'path/to/logo.png',
+      opacity: 0.2,
+      position: 'center',
+    }
+  );
+}
+
+export { addTextWatermark, addColoredWatermark, addWatermarkToPages, addImageWatermark };
diff --git a/packages/mcp-server/package.json b/packages/mcp-server/package.json
@@ -1,8 +1,8 @@
 {
   "name": "parseflow-mcp-server",
-  "version": "1.7.1",
+  "version": "1.8.0",
   "mcpName": "io.github.libres-coder/parseflow",
-  "description": "AI-powered document parsing with 20 tools: PDF, Word, Excel, PowerPoint, OCR, semantic search, and batch processing",
+  "description": "AI-powered document parsing with 22 tools: PDF (watermark, merge, split), Word, Excel, PowerPoint, OCR, semantic search, and batch processing",
   "keywords": [
     "mcp",
     "mcp-server",
diff --git a/packages/mcp-server/src/tools/index.ts b/packages/mcp-server/src/tools/index.ts
@@ -87,6 +87,10 @@ export class ToolHandler {
           return await this.batchExtract(args);
         case 'batch_search':
           return await this.batchSearch(args);
+        case 'add_watermark':
+          return await this.addWatermark(args);
+        case 'add_image_watermark':
+          return await this.addImageWatermark(args);
         default:
           throw new Error(`Unknown tool: ${name}`);
       }
@@ -589,6 +593,103 @@ export class ToolHandler {
           required: ['paths', 'query'],
         },
       },
+      {
+        name: 'add_watermark',
+        description:
+          'Add text watermark to PDF pages. Supports custom text, opacity, rotation, color, and position.',
+        inputSchema: {
+          type: 'object',
+          properties: {
+            inputPath: {
+              type: 'string',
+              description: 'Absolute path to the input PDF file',
+            },
+            outputPath: {
+              type: 'string',
+              description: 'Absolute path for the output PDF file',
+            },
+            text: {
+              type: 'string',
+              description: 'Watermark text (default: "WATERMARK")',
+            },
+            opacity: {
+              type: 'number',
+              description: 'Opacity value between 0 and 1 (default: 0.3)',
+              default: 0.3,
+            },
+            fontSize: {
+              type: 'number',
+              description: 'Font size in points (default: 48)',
+              default: 48,
+            },
+            rotation: {
+              type: 'number',
+              description: 'Rotation angle in degrees (default: 45)',
+              default: 45,
+            },
+            color: {
+              type: 'object',
+              description: 'RGB color object with r, g, b values (0-1)',
+              properties: {
+                r: { type: 'number' },
+                g: { type: 'number' },
+                b: { type: 'number' },
+              },
+            },
+            position: {
+              type: 'string',
+              description: 'Watermark position: center, top-left, top-right, bottom-left, bottom-right',
+              enum: ['center', 'top-left', 'top-right', 'bottom-left', 'bottom-right'],
+              default: 'center',
+            },
+            pages: {
+              type: 'array',
+              items: { type: 'number' },
+              description: 'Array of page numbers to watermark (1-indexed). If not specified, all pages',
+            },
+          },
+          required: ['inputPath', 'outputPath'],
+        },
+      },
+      {
+        name: 'add_image_watermark',
+        description:
+          'Add image watermark to PDF pages. Supports PNG and JPG images with custom opacity and position.',
+        inputSchema: {
+          type: 'object',
+          properties: {
+            inputPath: {
+              type: 'string',
+              description: 'Absolute path to the input PDF file',
+            },
+            outputPath: {
+              type: 'string',
+              description: 'Absolute path for the output PDF file',
+            },
+            imagePath: {
+              type: 'string',
+              description: 'Absolute path to the watermark image (PNG or JPG)',
+            },
+            opacity: {
+              type: 'number',
+              description: 'Opacity value between 0 and 1 (default: 0.3)',
+              default: 0.3,
+            },
+            position: {
+              type: 'string',
+              description: 'Watermark position: center, top-left, top-right, bottom-left, bottom-right',
+              enum: ['center', 'top-left', 'top-right', 'bottom-left', 'bottom-right'],
+              default: 'center',
+            },
+            pages: {
+              type: 'array',
+              items: { type: 'number' },
+              description: 'Array of page numbers to watermark (1-indexed). If not specified, all pages',
+            },
+          },
+          required: ['inputPath', 'outputPath', 'imagePath'],
+        },
+      },
     ];
   }
 
@@ -1253,4 +1354,80 @@ ${r.matches.map((m) => `   • ${m.text}${m.context ? ` [${m.context}]` : ''}`).
       ],
     };
   }
+
+  /**
+   * 添加文字水印工具
+   */
+  private async addWatermark(args: Record<string, unknown>): Promise<{ content: TextContent[] }> {
+    const inputPath = this.pathResolver.resolve(args.inputPath as string);
+    const outputPath = this.pathResolver.resolve(args.outputPath as string);
+    const text = args.text as string | undefined;
+    const opacity = args.opacity as number | undefined;
+    const fontSize = args.fontSize as number | undefined;
+    const rotation = args.rotation as number | undefined;
+    const color = args.color as { r: number; g: number; b: number } | undefined;
+    const position = args.position as 'center' | 'top-left' | 'top-right' | 'bottom-left' | 'bottom-right' | undefined;
+    const pages = args.pages as number[] | undefined;
+
+    logger.info('Adding watermark to PDF', { inputPath, outputPath, text });
+
+    const result = await this.pdfUtils.addWatermark(inputPath, outputPath, {
+      text,
+      opacity,
+      fontSize,
+      rotation,
+      color,
+      position,
+      pages,
+    });
+
+    return {
+      content: [
+        {
+          type: 'text',
+          text: `✅ ${result.message}
+
+📄 Input: ${inputPath}
+📝 Output: ${outputPath}
+📊 Pages: ${result.pageCount}
+💧 Watermark: "${text || 'WATERMARK'}"`,
+        },
+      ],
+    };
+  }
+
+  /**
+   * 添加图片水印工具
+   */
+  private async addImageWatermark(args: Record<string, unknown>): Promise<{ content: TextContent[] }> {
+    const inputPath = this.pathResolver.resolve(args.inputPath as string);
+    const outputPath = this.pathResolver.resolve(args.outputPath as string);
+    const imagePath = this.pathResolver.resolve(args.imagePath as string);
+    const opacity = args.opacity as number | undefined;
+    const position = args.position as 'center' | 'top-left' | 'top-right' | 'bottom-left' | 'bottom-right' | undefined;
+    const pages = args.pages as number[] | undefined;
+
+    logger.info('Adding image watermark to PDF', { inputPath, outputPath, imagePath });
+
+    const result = await this.pdfUtils.addImageWatermark(inputPath, outputPath, {
+      imagePath,
+      opacity,
+      position,
+      pages,
+    });
+
+    return {
+      content: [
+        {
+          type: 'text',
+          text: `✅ ${result.message}
+
+📄 Input: ${inputPath}
+📝 Output: ${outputPath}
+📊 Pages: ${result.pageCount}
+🖼️ Watermark Image: ${imagePath}`,
+        },
+      ],
+    };
+  }
 }
diff --git a/packages/pdf-parser-core/package.json b/packages/pdf-parser-core/package.json
@@ -1,6 +1,6 @@
 {
   "name": "parseflow-core",
-  "version": "1.7.0",
+  "version": "1.8.0",
   "description": "Document parsing library for ParseFlow - Extract text and data from PDF, Word (docx), and Excel (xlsx) files",
   "type": "module",
   "main": "dist/index.js",
diff --git a/packages/pdf-parser-core/src/PDFUtils.ts b/packages/pdf-parser-core/src/PDFUtils.ts
diff --git a/packages/pdf-parser-core/src/index.ts b/packages/pdf-parser-core/src/index.ts

Original file line number	Diff line number	Diff line change
`@@ -1,6 +1,6 @@`
`1`	`1`	`{`
`2`	`2`	`"name": "parseflow-core",`
`3`		`- "version": "1.7.0",`
	`3`	`+ "version": "1.8.0",`
`4`	`4`	`"description": "Document parsing library for ParseFlow - Extract text and data from PDF, Word (docx), and Excel (xlsx) files",`
`5`	`5`	`"type": "module",`
`6`	`6`	`"main": "dist/index.js",`