Skip to content

Commit ab95f98

Browse files
committed
feat: add PDF watermark functionality (v1.8.0)
- Add text watermark with custom font, color, opacity, rotation - Add image watermark support (PNG/JPG) - Support 5 positions: center, top-left, top-right, bottom-left, bottom-right - Support page selection for targeted watermarking - New MCP tools: add_watermark, add_image_watermark - Total tools: 22
1 parent 3b91ab1 commit ab95f98

File tree

8 files changed

+483
-11
lines changed

8 files changed

+483
-11
lines changed

CHANGELOG.md

Lines changed: 36 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -9,10 +9,42 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
99

1010
### Planned
1111

12-
- Semantic search with vector embeddings
13-
- PDF merge/split functionality
14-
- PDF encryption/decryption
15-
- Performance optimization with streaming and parallel processing
12+
- PDF compression optimization
13+
- Streaming for large files
14+
- LRU caching
15+
16+
## [1.8.0] - 2025-12-18
17+
18+
### 💧 PDF Watermark Release
19+
20+
#### ✨ New Features
21+
22+
-**Text Watermark** - Add customizable text watermarks to PDF
23+
-**Image Watermark** - Add PNG/JPG images as watermarks
24+
-**Position Control** - 5 positions: center, top-left, top-right, bottom-left, bottom-right
25+
-**Page Selection** - Apply watermark to specific pages
26+
27+
#### 📦 New Components
28+
29+
- `PDFUtils.addWatermark()` - Add text watermark with custom font, color, opacity, rotation
30+
- `PDFUtils.addImageWatermark()` - Add image watermark (PNG/JPG support)
31+
- MCP tools: `add_watermark`, `add_image_watermark`
32+
33+
#### 🎨 Watermark Options
34+
35+
- Text, opacity, fontSize, rotation, color (RGB), position, pages
36+
37+
#### 🔢 Total MCP Tools: 22
38+
39+
- PDF: 10 tools (+2)
40+
- Word: 2 tools
41+
- Excel: 2 tools
42+
- PowerPoint: 2 tools
43+
- OCR: 2 tools
44+
- Semantic: 2 tools
45+
- Batch: 2 tools
46+
47+
---
1648

1749
## [1.7.0] - 2025-12-17
1850

README.md

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -129,7 +129,7 @@ const results = await parser.searchText('slides.pptx', '关键词');
129129
}
130130
```
131131

132-
### 可用工具(20 个)
132+
### 可用工具(22 个)
133133

134134
| 类别 | 工具 | 描述 |
135135
|------|------|------|
@@ -141,6 +141,8 @@ const results = await parser.searchText('slides.pptx', '关键词');
141141
| | `merge_pdf` | 合并多个 PDF |
142142
| | `split_pdf` | 拆分为单页 |
143143
| | `extract_pdf_pages` | 提取指定页码 |
144+
| | `add_watermark` | 添加文字水印 |
145+
| | `add_image_watermark` | 添加图片水印 |
144146
| **Word** | `extract_word` | 提取文本/HTML |
145147
| | `search_word` | 文本搜索 |
146148
| **Excel** | `extract_excel` | 提取数据 |
@@ -160,6 +162,7 @@ const results = await parser.searchText('slides.pptx', '关键词');
160162

161163
| 版本 | 功能 |
162164
|------|------|
165+
| v1.8.0 | 💧 PDF 水印(文字/图片水印) |
163166
| v1.7.0 | 📦 批量处理(并行处理多文件) |
164167
| v1.6.0 | 🧠 语义搜索(AI 向量嵌入) |
165168
| v1.5.0 | 📄 PDF 合并/拆分/提取 |

examples/watermark-examples.ts

Lines changed: 67 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,67 @@
1+
/**
2+
* PDF 水印功能示例
3+
*/
4+
5+
import { PDFUtils } from 'parseflow-core';
6+
import path from 'path';
7+
8+
const pdfUtils = new PDFUtils();
9+
10+
// 示例 1: 添加文字水印
11+
async function addTextWatermark() {
12+
const result = await pdfUtils.addWatermark(
13+
'path/to/input.pdf',
14+
'path/to/output.pdf',
15+
{
16+
text: 'CONFIDENTIAL',
17+
opacity: 0.3,
18+
fontSize: 48,
19+
rotation: 45,
20+
position: 'center',
21+
}
22+
);
23+
console.log(result.message);
24+
}
25+
26+
// 示例 2: 自定义颜色水印
27+
async function addColoredWatermark() {
28+
await pdfUtils.addWatermark(
29+
'path/to/input.pdf',
30+
'path/to/output.pdf',
31+
{
32+
text: 'DRAFT',
33+
opacity: 0.5,
34+
fontSize: 60,
35+
rotation: 0,
36+
color: { r: 1, g: 0, b: 0 }, // 红色
37+
position: 'bottom-right',
38+
}
39+
);
40+
}
41+
42+
// 示例 3: 指定页码添加水印
43+
async function addWatermarkToPages() {
44+
await pdfUtils.addWatermark(
45+
'path/to/input.pdf',
46+
'path/to/output.pdf',
47+
{
48+
text: 'INTERNAL',
49+
pages: [1, 3, 5], // 只在第 1、3、5 页
50+
}
51+
);
52+
}
53+
54+
// 示例 4: 添加图片水印
55+
async function addImageWatermark() {
56+
await pdfUtils.addImageWatermark(
57+
'path/to/input.pdf',
58+
'path/to/output.pdf',
59+
{
60+
imagePath: 'path/to/logo.png',
61+
opacity: 0.2,
62+
position: 'center',
63+
}
64+
);
65+
}
66+
67+
export { addTextWatermark, addColoredWatermark, addWatermarkToPages, addImageWatermark };

packages/mcp-server/package.json

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,8 @@
11
{
22
"name": "parseflow-mcp-server",
3-
"version": "1.7.1",
3+
"version": "1.8.0",
44
"mcpName": "io.github.libres-coder/parseflow",
5-
"description": "AI-powered document parsing with 20 tools: PDF, Word, Excel, PowerPoint, OCR, semantic search, and batch processing",
5+
"description": "AI-powered document parsing with 22 tools: PDF (watermark, merge, split), Word, Excel, PowerPoint, OCR, semantic search, and batch processing",
66
"keywords": [
77
"mcp",
88
"mcp-server",

packages/mcp-server/src/tools/index.ts

Lines changed: 177 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -87,6 +87,10 @@ export class ToolHandler {
8787
return await this.batchExtract(args);
8888
case 'batch_search':
8989
return await this.batchSearch(args);
90+
case 'add_watermark':
91+
return await this.addWatermark(args);
92+
case 'add_image_watermark':
93+
return await this.addImageWatermark(args);
9094
default:
9195
throw new Error(`Unknown tool: ${name}`);
9296
}
@@ -589,6 +593,103 @@ export class ToolHandler {
589593
required: ['paths', 'query'],
590594
},
591595
},
596+
{
597+
name: 'add_watermark',
598+
description:
599+
'Add text watermark to PDF pages. Supports custom text, opacity, rotation, color, and position.',
600+
inputSchema: {
601+
type: 'object',
602+
properties: {
603+
inputPath: {
604+
type: 'string',
605+
description: 'Absolute path to the input PDF file',
606+
},
607+
outputPath: {
608+
type: 'string',
609+
description: 'Absolute path for the output PDF file',
610+
},
611+
text: {
612+
type: 'string',
613+
description: 'Watermark text (default: "WATERMARK")',
614+
},
615+
opacity: {
616+
type: 'number',
617+
description: 'Opacity value between 0 and 1 (default: 0.3)',
618+
default: 0.3,
619+
},
620+
fontSize: {
621+
type: 'number',
622+
description: 'Font size in points (default: 48)',
623+
default: 48,
624+
},
625+
rotation: {
626+
type: 'number',
627+
description: 'Rotation angle in degrees (default: 45)',
628+
default: 45,
629+
},
630+
color: {
631+
type: 'object',
632+
description: 'RGB color object with r, g, b values (0-1)',
633+
properties: {
634+
r: { type: 'number' },
635+
g: { type: 'number' },
636+
b: { type: 'number' },
637+
},
638+
},
639+
position: {
640+
type: 'string',
641+
description: 'Watermark position: center, top-left, top-right, bottom-left, bottom-right',
642+
enum: ['center', 'top-left', 'top-right', 'bottom-left', 'bottom-right'],
643+
default: 'center',
644+
},
645+
pages: {
646+
type: 'array',
647+
items: { type: 'number' },
648+
description: 'Array of page numbers to watermark (1-indexed). If not specified, all pages',
649+
},
650+
},
651+
required: ['inputPath', 'outputPath'],
652+
},
653+
},
654+
{
655+
name: 'add_image_watermark',
656+
description:
657+
'Add image watermark to PDF pages. Supports PNG and JPG images with custom opacity and position.',
658+
inputSchema: {
659+
type: 'object',
660+
properties: {
661+
inputPath: {
662+
type: 'string',
663+
description: 'Absolute path to the input PDF file',
664+
},
665+
outputPath: {
666+
type: 'string',
667+
description: 'Absolute path for the output PDF file',
668+
},
669+
imagePath: {
670+
type: 'string',
671+
description: 'Absolute path to the watermark image (PNG or JPG)',
672+
},
673+
opacity: {
674+
type: 'number',
675+
description: 'Opacity value between 0 and 1 (default: 0.3)',
676+
default: 0.3,
677+
},
678+
position: {
679+
type: 'string',
680+
description: 'Watermark position: center, top-left, top-right, bottom-left, bottom-right',
681+
enum: ['center', 'top-left', 'top-right', 'bottom-left', 'bottom-right'],
682+
default: 'center',
683+
},
684+
pages: {
685+
type: 'array',
686+
items: { type: 'number' },
687+
description: 'Array of page numbers to watermark (1-indexed). If not specified, all pages',
688+
},
689+
},
690+
required: ['inputPath', 'outputPath', 'imagePath'],
691+
},
692+
},
592693
];
593694
}
594695

@@ -1253,4 +1354,80 @@ ${r.matches.map((m) => ` • ${m.text}${m.context ? ` [${m.context}]` : ''}`).
12531354
],
12541355
};
12551356
}
1357+
1358+
/**
1359+
* 添加文字水印工具
1360+
*/
1361+
private async addWatermark(args: Record<string, unknown>): Promise<{ content: TextContent[] }> {
1362+
const inputPath = this.pathResolver.resolve(args.inputPath as string);
1363+
const outputPath = this.pathResolver.resolve(args.outputPath as string);
1364+
const text = args.text as string | undefined;
1365+
const opacity = args.opacity as number | undefined;
1366+
const fontSize = args.fontSize as number | undefined;
1367+
const rotation = args.rotation as number | undefined;
1368+
const color = args.color as { r: number; g: number; b: number } | undefined;
1369+
const position = args.position as 'center' | 'top-left' | 'top-right' | 'bottom-left' | 'bottom-right' | undefined;
1370+
const pages = args.pages as number[] | undefined;
1371+
1372+
logger.info('Adding watermark to PDF', { inputPath, outputPath, text });
1373+
1374+
const result = await this.pdfUtils.addWatermark(inputPath, outputPath, {
1375+
text,
1376+
opacity,
1377+
fontSize,
1378+
rotation,
1379+
color,
1380+
position,
1381+
pages,
1382+
});
1383+
1384+
return {
1385+
content: [
1386+
{
1387+
type: 'text',
1388+
text: `✅ ${result.message}
1389+
1390+
📄 Input: ${inputPath}
1391+
📝 Output: ${outputPath}
1392+
📊 Pages: ${result.pageCount}
1393+
💧 Watermark: "${text || 'WATERMARK'}"`,
1394+
},
1395+
],
1396+
};
1397+
}
1398+
1399+
/**
1400+
* 添加图片水印工具
1401+
*/
1402+
private async addImageWatermark(args: Record<string, unknown>): Promise<{ content: TextContent[] }> {
1403+
const inputPath = this.pathResolver.resolve(args.inputPath as string);
1404+
const outputPath = this.pathResolver.resolve(args.outputPath as string);
1405+
const imagePath = this.pathResolver.resolve(args.imagePath as string);
1406+
const opacity = args.opacity as number | undefined;
1407+
const position = args.position as 'center' | 'top-left' | 'top-right' | 'bottom-left' | 'bottom-right' | undefined;
1408+
const pages = args.pages as number[] | undefined;
1409+
1410+
logger.info('Adding image watermark to PDF', { inputPath, outputPath, imagePath });
1411+
1412+
const result = await this.pdfUtils.addImageWatermark(inputPath, outputPath, {
1413+
imagePath,
1414+
opacity,
1415+
position,
1416+
pages,
1417+
});
1418+
1419+
return {
1420+
content: [
1421+
{
1422+
type: 'text',
1423+
text: `✅ ${result.message}
1424+
1425+
📄 Input: ${inputPath}
1426+
📝 Output: ${outputPath}
1427+
📊 Pages: ${result.pageCount}
1428+
🖼️ Watermark Image: ${imagePath}`,
1429+
},
1430+
],
1431+
};
1432+
}
12561433
}

packages/pdf-parser-core/package.json

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
{
22
"name": "parseflow-core",
3-
"version": "1.7.0",
3+
"version": "1.8.0",
44
"description": "Document parsing library for ParseFlow - Extract text and data from PDF, Word (docx), and Excel (xlsx) files",
55
"type": "module",
66
"main": "dist/index.js",

0 commit comments

Comments
 (0)