Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
778 changes: 655 additions & 123 deletions package-lock.json

Large diffs are not rendered by default.

6 changes: 6 additions & 0 deletions package.json
Original file line number Diff line number Diff line change
Expand Up @@ -78,8 +78,10 @@
"@semantic-release/changelog": "^6.0.3",
"@semantic-release/exec": "^7.1.0",
"@semantic-release/git": "^10.0.1",
"@types/jsdom": "^21.1.7",
"@types/mdast": "^4.0.4",
"@types/node": "^24.0.1",
"@types/turndown": "^5.0.5",
"@types/unist": "^3.0.0",
"@typescript-eslint/eslint-plugin": "^8.34.0",
"@typescript-eslint/parser": "^8.34.0",
Expand Down Expand Up @@ -108,11 +110,15 @@
},
"dependencies": {
"@modelcontextprotocol/sdk": "^1.12.3",
"@mozilla/readability": "^0.6.0",
"commander": "^14.0.0",
"glob": "^11.0.3",
"jsdom": "^26.1.0",
"node-html-parser": "^7.0.1",
"remark": "^15.0.0",
"remark-parse": "^11.0.0",
"remark-stringify": "^11.0.0",
"turndown": "^7.2.0",
"unified": "^11.0.0",
"unist-util-visit": "^5.0.0"
},
Expand Down
4 changes: 2 additions & 2 deletions src/cli.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -146,8 +146,8 @@ describe('CLI Entry Point', () => {
it('should set action handlers for commands', async () => {
await import('./cli.js');

// Should call action 9 times (once for each command: convert, move, split, join, merge, index, barrel, toc, validate)
expect(mockAction).toHaveBeenCalledTimes(9);
// Should call action 10 times (once for each command: clip, convert, move, split, join, merge, index, barrel, toc, validate)
expect(mockAction).toHaveBeenCalledTimes(10);
});

it('should add help text for convert command', async () => {
Expand Down
63 changes: 63 additions & 0 deletions src/cli.ts
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
#!/usr/bin/env node

import { Command } from 'commander';
import { clipCommand } from './commands/clip.js';
import { convertCommand } from './commands/convert.js';
import { indexCommand } from './commands/index.js';
import { joinCommand } from './commands/join.js';
Expand All @@ -17,6 +18,68 @@ program
.description('CLI for markdown file operations with intelligent link refactoring')
.version('0.1.0');

program
.command('clip')
.description('Convert web pages to markdown (web clipper)')
.argument('<urls...>', 'URLs to clip or paths to files containing URLs (use --batch)')
.option('-o, --output <file>', 'Output file name (single URL only)')
.option('--output-dir <dir>', 'Output directory for clipped files')
.option('--batch', 'Process multiple URLs from input files')
.option(
'--strategy <strategy>',
'Extraction strategy: auto|readability|manual|full|structured',
'auto'
)
.option(
'--image-strategy <strategy>',
'Image handling: skip|link-only|download|base64',
'link-only'
)
.option('--image-dir <dir>', 'Directory for downloaded images', './images')
.option('--selectors <selectors>', 'CSS selectors for manual extraction (comma-separated)')
.option('--no-frontmatter', 'Skip frontmatter generation')
.option('--timeout <ms>', 'Request timeout in milliseconds', parseInt, 30000)
.option('--user-agent <agent>', 'Custom User-Agent string')
.option('--headers <headers>', 'Custom HTTP headers (JSON format)')
.option('--cookies <file>', 'Path to cookies file')
.option('--no-follow-redirects', 'Don\'t follow HTTP redirects')
.option('--max-redirects <count>', 'Maximum redirects to follow', parseInt, 5)
.option('-d, --dry-run', 'Show what would be clipped without creating files')
.option('-v, --verbose', 'Show detailed output with processing information')
.option('--json', 'Output results in JSON format')
.addHelpText(
'after',
`
Examples:
$ markmv clip https://example.com/article
$ markmv clip https://example.com/article -o article.md
$ markmv clip urls.txt --batch --output-dir ./clipped
$ markmv clip https://docs.site.com --strategy manual --selectors "article,.content"
$ markmv clip https://blog.com/post --strategy readability --image-strategy download
$ markmv clip https://example.com --dry-run --verbose

Extraction Strategies:
auto Automatically choose best strategy based on content
readability Mozilla Readability algorithm (best for articles/blogs)
manual Extract using custom CSS selectors
full Extract entire page content
structured Use Schema.org and semantic markup

Image Strategies:
skip Don't process images at all
link-only Keep images as external links (fastest)
download Download images locally and update paths
base64 Embed small images as base64 (increases file size)

Advanced Features:
--headers '{"Authorization": "Bearer token"}' Custom headers for auth
--cookies cookies.txt Use cookies for protected content
--selectors "article,.post-content,main" Custom content selectors
--timeout 60000 Extended timeout for slow sites
--user-agent "Custom Bot 1.0" Custom user agent string`
)
.action(clipCommand);

program
.command('convert')
.description('Convert markdown link formats and path resolution')
Expand Down
Loading
Loading