diff --git a/README.md b/README.md index c48dbfac..7980278c 100644 --- a/README.md +++ b/README.md @@ -79,6 +79,31 @@ For contributors, regenerate manifests with: ./scripts/publish.sh ``` +### OpenCode + +This repository includes `opencode.json` for first-class OpenCode integration. + +The configuration is auto-generated from the skills/ directory and includes: + +- Skill discovery configuration (`skills/` directory, Agent Skills format) +- All skill entries with names, descriptions, and paths +- AGENTS context file reference (`agents/AGENTS.md`) +- MCP server settings aligned with other agent configurations + +**For users:** + +OpenCode will automatically discover skills from this repository when it's added as a skill source. + +**For contributors:** + +Regenerate `opencode.json` with: + +```bash +./scripts/publish.sh +``` + +The generation script (`scripts/generate_opencode_config.py`) ensures OpenCode configuration stays consistent with Claude, Cursor, and Gemini formats. + ## Skills This repository contains a few skills to get you started. You can also contribute your own skills to the repository. diff --git a/opencode.json b/opencode.json new file mode 100644 index 00000000..b48dc0a3 --- /dev/null +++ b/opencode.json @@ -0,0 +1,89 @@ +{ + "name": "huggingface-skills", + "version": "1.0.1", + "description": "Agent Skills for AI/ML tasks including dataset creation, model training, evaluation, and research paper publishing on Hugging Face Hub", + "skills": { + "discovery": { + "locations": [ + "skills/" + ], + "format": "agent-skills" + }, + "entries": [ + { + "name": "hf-cli", + "description": "Hugging Face Hub CLI (`hf`) for downloading, uploading, and managing repositories, models, datasets, and Spaces on the Hugging Face Hub. Replaces now deprecated `huggingface-cli` command.", + "path": "skills/hf-cli" + }, + { + "name": "hugging-face-dataset-viewer", + "description": "Use this skill for Hugging Face Dataset Viewer API workflows that fetch subset/split metadata, paginate rows, search text, apply filters, download parquet URLs, and read size or statistics.", + "path": "skills/hugging-face-dataset-viewer" + }, + { + "name": "hugging-face-datasets", + "description": "Create and manage datasets on Hugging Face Hub. Supports initializing repos, defining configs/system prompts, streaming row updates, and SQL-based dataset querying/transformation. Designed to work alongside HF MCP server for comprehensive dataset workflows.", + "path": "skills/hugging-face-datasets" + }, + { + "name": "hugging-face-evaluation", + "description": "Add and manage evaluation results in Hugging Face model cards. Supports extracting eval tables from README content, importing scores from Artificial Analysis API, and running custom model evaluations with vLLM/lighteval. Works with the model-index metadata format.", + "path": "skills/hugging-face-evaluation" + }, + { + "name": "hugging-face-jobs", + "description": "This skill should be used when users want to run any workload on Hugging Face Jobs infrastructure. Covers UV scripts, Docker-based jobs, hardware selection, cost estimation, authentication with tokens, secrets management, timeout configuration, and result persistence. Designed for general-purpose compute workloads including data processing, inference, experiments, batch jobs, and any Python-based tasks. Should be invoked for tasks involving cloud compute, GPU workloads, or when users mention running jobs on Hugging Face infrastructure without local setup.", + "path": "skills/hugging-face-jobs" + }, + { + "name": "hugging-face-model-trainer", + "description": "This skill should be used when users want to train or fine-tune language models using TRL (Transformer Reinforcement Learning) on Hugging Face Jobs infrastructure. Covers SFT, DPO, GRPO and reward modeling training methods, plus GGUF conversion for local deployment. Includes guidance on the TRL Jobs package, UV scripts with PEP 723 format, dataset preparation and validation, hardware selection, cost estimation, Trackio monitoring, Hub authentication, and model persistence. Should be invoked for tasks involving cloud GPU training, GGUF conversion, or when users mention training on Hugging Face Jobs without local GPU setup.", + "path": "skills/hugging-face-model-trainer" + }, + { + "name": "hugging-face-paper-pages", + "description": "Look up and read Hugging Face paper pages in markdown, and use the papers API for structured metadata such as authors, linked models/datasets/spaces, Github repo and project page. Use when the user shares a Hugging Face paper page URL, an arXiv URL or ID, or asks to summarize, explain, or analyze an AI research paper.", + "path": "skills/hugging-face-paper-pages" + }, + { + "name": "hugging-face-paper-publisher", + "description": "Publish and manage research papers on Hugging Face Hub. Supports creating paper pages, linking papers to models/datasets, claiming authorship, and generating professional markdown-based research articles.", + "path": "skills/hugging-face-paper-publisher" + }, + { + "name": "hugging-face-tool-builder", + "description": "Use this skill when the user wants to build tool/scripts or achieve a task where using data from the Hugging Face API would help. This is especially useful when chaining or combining API calls or the task will be repeated/automated. This Skill creates a reusable script to fetch, enrich or process data.", + "path": "skills/hugging-face-tool-builder" + }, + { + "name": "hugging-face-trackio", + "description": "Track and visualize ML training experiments with Trackio. Use when logging metrics during training (Python API), firing alerts for training diagnostics, or retrieving/analyzing logged metrics (CLI). Supports real-time dashboard visualization, alerts with webhooks, HF Space syncing, and JSON output for automation.", + "path": "skills/hugging-face-trackio" + }, + { + "name": "hugging-face-vision-trainer", + "description": "Trains and fine-tunes vision models for object detection (D-FINE, RT-DETR v2, DETR, YOLOS), image classification (timm models — MobileNetV3, MobileViT, ResNet, ViT/DINOv3 — plus any Transformers classifier), and SAM/SAM2 segmentation using Hugging Face Transformers on Hugging Face Jobs cloud GPUs. Covers COCO-format dataset preparation, Albumentations augmentation, mAP/mAR evaluation, accuracy metrics, SAM segmentation with bbox/point prompts, DiceCE loss, hardware selection, cost estimation, Trackio monitoring, and Hub persistence. Use when users mention training object detection, image classification, SAM, SAM2, segmentation, image matting, DETR, D-FINE, RT-DETR, ViT, timm, MobileNet, ResNet, bounding box models, or fine-tuning vision models on Hugging Face Jobs.", + "path": "skills/hugging-face-vision-trainer" + }, + { + "name": "gradio", + "description": "Build Gradio web UIs and demos in Python. Use when creating or editing Gradio apps, components, event listeners, layouts, or chatbots.", + "path": "skills/huggingface-gradio" + }, + { + "name": "transformers-js", + "description": "Use Transformers.js to run state-of-the-art machine learning models directly in JavaScript/TypeScript. Supports NLP (text classification, translation, summarization), computer vision (image classification, object detection), audio (speech recognition, audio classification), and multimodal tasks. Works in Node.js and browsers (with WebGPU/WASM) using pre-trained models from Hugging Face Hub.", + "path": "skills/transformers.js" + } + ] + }, + "context": { + "agentsFile": "agents/AGENTS.md" + }, + "mcpServers": { + "huggingface-skills": { + "url": "https://huggingface.co/mcp?login", + "description": "Hugging Face MCP server for dataset/model operations" + } + } +} diff --git a/scripts/generate_opencode_config.py b/scripts/generate_opencode_config.py new file mode 100755 index 00000000..2867b425 --- /dev/null +++ b/scripts/generate_opencode_config.py @@ -0,0 +1,183 @@ +#!/usr/bin/env -S uv run +# /// script +# requires-python = ">=3.10" +# dependencies = [] +# /// +"""Generate OpenCode configuration from existing repo metadata. + +Outputs: +- opencode.json + +Design goals: +- Keep OpenCode configuration in sync with other agent formats. +- Reuse .claude-plugin/plugin.json as primary metadata source. +- Discover skills from skills/*/SKILL.md. +- Include AGENTS context from agents/AGENTS.md. +- Align MCP server settings with .mcp.json / gemini-extension.json. +""" + +from __future__ import annotations + +import argparse +import json +import re +import sys +from pathlib import Path + + +ROOT = Path(__file__).resolve().parent.parent +CLAUDE_PLUGIN_MANIFEST = ROOT / ".claude-plugin" / "plugin.json" +GEMINI_EXTENSION = ROOT / "gemini-extension.json" +MCP_CONFIG = ROOT / ".mcp.json" +AGENTS_CONTEXT = ROOT / "agents" / "AGENTS.md" +OPENCODE_CONFIG = ROOT / "opencode.json" + +DEFAULT_MCP_SERVER_NAME = "huggingface-skills" +DEFAULT_MCP_URL = "https://huggingface.co/mcp?login" + + +def load_json(path: Path) -> dict: + if not path.exists(): + raise FileNotFoundError(f"Missing required file: {path}") + return json.loads(path.read_text(encoding="utf-8")) + + +def parse_frontmatter(text: str) -> dict[str, str]: + match = re.search(r"^---\s*\n(.*?)\n---\s*", text, re.DOTALL) + if not match: + return {} + data: dict[str, str] = {} + for line in match.group(1).splitlines(): + if ":" not in line: + continue + key, value = line.split(":", 1) + data[key.strip()] = value.strip().strip('"') + return data + + +def collect_skills() -> list[dict[str, str]]: + """Collect all skills from skills/*/SKILL.md""" + skills: list[dict[str, str]] = [] + for skill_md in sorted(ROOT.glob("skills/*/SKILL.md")): + text = skill_md.read_text(encoding="utf-8") + meta = parse_frontmatter(text) + name = meta.get("name", "").strip() + description = meta.get("description", "").strip() + if not name: + continue + + skills.append({ + "name": name, + "description": description, + "path": str(skill_md.parent.relative_to(ROOT)), + }) + return skills + + +def get_mcp_server_url() -> str: + """Extract MCP server URL from gemini-extension.json or use default""" + try: + gemini = load_json(GEMINI_EXTENSION) + servers = gemini.get("mcpServers", {}) + hf_server = servers.get(DEFAULT_MCP_SERVER_NAME, {}) + return hf_server.get("url", DEFAULT_MCP_URL) + except (FileNotFoundError, KeyError): + return DEFAULT_MCP_URL + + +def build_opencode_config() -> dict: + """Build opencode.json configuration""" + try: + claude = load_json(CLAUDE_PLUGIN_MANIFEST) + except FileNotFoundError: + claude = {} + + skills = collect_skills() + mcp_url = get_mcp_server_url() + + config = { + "name": claude.get("name", "huggingface-skills"), + "version": claude.get("version", "1.0.0"), + "description": claude.get("description", "Hugging Face ecosystem skills for OpenCode"), + "skills": { + "discovery": { + "locations": [ + "skills/" + ], + "format": "agent-skills" + }, + "entries": [ + { + "name": skill["name"], + "description": skill["description"], + "path": skill["path"] + } + for skill in skills + ] + }, + "context": { + "agentsFile": "agents/AGENTS.md" if AGENTS_CONTEXT.exists() else None + }, + "mcpServers": { + DEFAULT_MCP_SERVER_NAME: { + "url": mcp_url, + "description": "Hugging Face MCP server for dataset/model operations" + } + } + } + + # Remove None values + if not config["context"]["agentsFile"]: + del config["context"] + + return config + + +def write_opencode_config(config: dict) -> None: + """Write opencode.json with pretty formatting""" + OPENCODE_CONFIG.write_text( + json.dumps(config, indent=2, ensure_ascii=False) + "\n", + encoding="utf-8" + ) + + +def main() -> int: + parser = argparse.ArgumentParser( + description="Generate OpenCode configuration from skill metadata" + ) + parser.add_argument( + "--check", + action="store_true", + help="Check if generated config matches existing (exit 1 if different)" + ) + args = parser.parse_args() + + try: + new_config = build_opencode_config() + + if args.check: + if not OPENCODE_CONFIG.exists(): + print(f"Missing: {OPENCODE_CONFIG}", file=sys.stderr) + print("Run: ./scripts/publish.sh", file=sys.stderr) + return 1 + + existing = load_json(OPENCODE_CONFIG) + if existing != new_config: + print(f"Outdated: {OPENCODE_CONFIG}", file=sys.stderr) + print("Run: ./scripts/publish.sh", file=sys.stderr) + return 1 + + print(f"✓ {OPENCODE_CONFIG.name} is up to date") + return 0 + + write_opencode_config(new_config) + print(f"Generated: {OPENCODE_CONFIG}") + return 0 + + except Exception as e: + print(f"Error: {e}", file=sys.stderr) + return 1 + + +if __name__ == "__main__": + sys.exit(main()) diff --git a/scripts/publish.sh b/scripts/publish.sh index 9afe06ea..b6f30820 100755 --- a/scripts/publish.sh +++ b/scripts/publish.sh @@ -9,6 +9,7 @@ GENERATED_FILES=( "README.md" ".cursor-plugin/plugin.json" ".mcp.json" + "opencode.json" ) file_sig() { @@ -23,6 +24,7 @@ file_sig() { run_generate() { uv run scripts/generate_agents.py uv run scripts/generate_cursor_plugin.py + uv run scripts/generate_opencode_config.py } run_check() { @@ -54,6 +56,9 @@ run_check() { # Extra explicit check for cursor-only artifacts uv run scripts/generate_cursor_plugin.py --check + + # Extra explicit check for opencode artifacts + uv run scripts/generate_opencode_config.py --check echo "All generated artifacts are up to date." } @@ -77,6 +82,7 @@ This script regenerates: - README.md (skills table section) - .cursor-plugin/plugin.json - .mcp.json + - opencode.json EOF ;; *)