diff --git a/README.md b/README.md
index d0a89a9..a92cc77 100644
--- a/README.md
+++ b/README.md
@@ -90,9 +90,29 @@ with GlobalGPUController(gpu_ids=[0, 1], vram_to_keep="750MB", interval=90, busy
 - ROCm-only tests carry `@pytest.mark.rocm`; run with `pytest --run-rocm tests/rocm_controller`.
 - Markers: `rocm` (needs ROCm stack) and `large_memory` (opt-in locally).
 
+### MCP endpoint (experimental)
+
+- Start a simple JSON-RPC server on stdin/stdout:
+  ```bash
+  keep-gpu-mcp-server
+  ```
+- Example request (one per line):
+  ```json
+  {"id": 1, "method": "start_keep", "params": {"gpu_ids": [0], "vram": "512MB", "interval": 60, "busy_threshold": 20}}
+  ```
+- Methods: `start_keep`, `stop_keep` (optional `job_id`, default stops all), `status` (optional `job_id`), `list_gpus` (basic info).
+- Minimal client config (stdio MCP):
+  ```yaml
+  servers:
+    keepgpu:
+      command: ["keep-gpu-mcp-server"]
+      adapter: stdio
+  ```
+
 ## Contributing
 
 Contributions are welcome—especially around ROCm support, platform fallbacks, and scheduler-specific recipes. Open an issue or PR if you hit edge cases on your cluster.
+See `docs/contributing.md` for dev setup, test commands, and PR tips.
 
 ## Credits
 
diff --git a/docs/contributing.md b/docs/contributing.md
new file mode 100644
index 0000000..9754a33
--- /dev/null
+++ b/docs/contributing.md
@@ -0,0 +1,62 @@
+# Contributing & Development
+
+Thanks for helping improve KeepGPU! This page collects the key commands and
+expectations so you can get productive quickly and avoid surprises in CI.
+
+## Setup
+
+- Clone and install dev extras:
+  ```bash
+  git clone https://github.com/Wangmerlyn/KeepGPU.git
+  cd KeepGPU
+  pip install -e ".[dev]"       # add .[rocm] if you need ROCm SMI
+  ```
+- Ensure you have the right torch build for your platform (CUDA/ROCm/CPU).
+- Optional: install `nvidia-ml-py` (CUDA) or `rocm-smi` (ROCm) for telemetry.
+
+## Tests
+
+- Fast CUDA suite:
+  ```bash
+  pytest tests/cuda_controller tests/global_controller \
+    tests/utilities/test_platform_manager.py tests/test_cli_thresholds.py
+  ```
+- ROCm-only tests are marked `rocm` and skipped by default; run with:
+  ```bash
+  pytest --run-rocm tests/rocm_controller
+  ```
+- MCP + utilities:
+  ```bash
+  pytest tests/mcp tests/utilities/test_gpu_info.py
+  ```
+- All tests honor markers `rocm` and `large_memory`; avoid enabling
+  `large_memory` in CI.
+
+## Lint/format
+
+- Run pre-commit hooks locally before pushing:
+  ```bash
+  pre-commit run --all-files
+  ```
+
+## MCP server (experimental)
+
+- Start: `keep-gpu-mcp-server` (stdin/stdout JSON-RPC)
+- Methods: `start_keep`, `stop_keep`, `status`, `list_gpus`
+- Example request:
+  ```json
+  {"id":1,"method":"start_keep","params":{"gpu_ids":[0],"vram":"512MB","interval":60,"busy_threshold":20}}
+  ```
+
+## Pull requests
+
+- Keep changesets focused; small commits are welcome.
+- Add/adjust tests for new behavior; skip GPU-specific tests in CI by way of markers.
+- Update docs/README when behavior or interfaces change.
+- Stick to the existing style (Typer CLI, Rich logging) and keep code paths
+  simple—avoid over-engineering.
+
+## Support
+
+- Issues/PRs: https://github.com/Wangmerlyn/KeepGPU
+- Code of Conduct: see `CODE_OF_CONDUCT.rst`
diff --git a/docs/getting-started.md b/docs/getting-started.md
index f3949fe..00b8f55 100644
--- a/docs/getting-started.md
+++ b/docs/getting-started.md
@@ -45,6 +45,38 @@ understand the minimum knobs you need to keep a GPU occupied.
 - Fast CUDA checks: `pytest tests/cuda_controller tests/global_controller tests/utilities/test_platform_manager.py tests/test_cli_thresholds.py`
 - ROCm-only tests are marked `rocm`; run with `pytest --run-rocm tests/rocm_controller`.
 
+## MCP endpoint (experimental)
+
+For automation clients that speak JSON-RPC (MCP-style), KeepGPU ships a tiny
+stdin/stdout server:
+
+```bash
+keep-gpu-mcp-server
+# each request is a single JSON line; example:
+echo '{"id":1,"method":"start_keep","params":{"gpu_ids":[0],"vram":"512MB","interval":60,"busy_threshold":20}}' | keep-gpu-mcp-server
+```
+
+Supported methods:
+- `start_keep(gpu_ids?, vram?, interval?, busy_threshold?, job_id?)`
+- `status(job_id?)`
+- `stop_keep(job_id?)` (no job_id stops all)
+- `list_gpus()` (basic info)
+
+### Example MCP client config (stdio)
+
+If your agent expects an MCP server definition, a minimal stdio config looks like:
+
+```yaml
+servers:
+  keepgpu:
+    description: "KeepGPU MCP server"
+    command: ["keep-gpu-mcp-server"]
+    adapter: stdio
+```
+
+Tools exposed: `start_keep`, `stop_keep`, `status`, `list_gpus`. Each request is
+a single JSON line; see above for an example payload.
+
 === "Editable dev install"
     ```bash
     git clone https://github.com/Wangmerlyn/KeepGPU.git
diff --git a/mkdocs.yml b/mkdocs.yml
index 6c3a200..d26c9b8 100644
--- a/mkdocs.yml
+++ b/mkdocs.yml
@@ -23,6 +23,8 @@ nav:
   - Reference:
       - CLI Reference: reference/cli.md
       - API Reference: reference/api.md
+  - Project:
+      - Contributing: contributing.md
 
 plugins:
   - search
diff --git a/pyproject.toml b/pyproject.toml
index dedf135..c5b859b 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -34,6 +34,7 @@ dependencies = [
 
 [project.scripts]
 keep-gpu = "keep_gpu.cli:app"
+keep-gpu-mcp-server = "keep_gpu.mcp.server:main"
 
 [project.optional-dependencies]
 dev = [
diff --git a/src/keep_gpu/mcp/server.py b/src/keep_gpu/mcp/server.py
new file mode 100644
index 0000000..47a06f4
--- /dev/null
+++ b/src/keep_gpu/mcp/server.py
@@ -0,0 +1,157 @@
+"""
+Minimal MCP-style JSON-RPC server for KeepGPU.
+
+The server reads JSON lines from stdin and writes JSON responses to stdout.
+Supported methods:
+  - start_keep(gpu_ids, vram, interval, busy_threshold, job_id)
+  - stop_keep(job_id=None)  # None stops all
+  - status(job_id=None)     # None lists all
+"""
+
+from __future__ import annotations
+
+import atexit
+import json
+import sys
+import uuid
+from dataclasses import dataclass
+from typing import Any, Callable, Dict, List, Optional
+
+from keep_gpu.global_gpu_controller.global_gpu_controller import GlobalGPUController
+from keep_gpu.utilities.gpu_info import get_gpu_info
+from keep_gpu.utilities.logger import setup_logger
+
+logger = setup_logger(__name__)
+
+
+@dataclass
+class Session:
+    controller: GlobalGPUController
+    params: Dict[str, Any]
+
+
+class KeepGPUServer:
+    def __init__(
+        self,
+        controller_factory: Optional[Callable[..., GlobalGPUController]] = None,
+    ) -> None:
+        self._sessions: Dict[str, Session] = {}
+        self._controller_factory = controller_factory or GlobalGPUController
+        atexit.register(self.shutdown)
+
+    def start_keep(
+        self,
+        gpu_ids: Optional[List[int]] = None,
+        vram: str = "1GiB",
+        interval: int = 300,
+        busy_threshold: int = -1,
+        job_id: Optional[str] = None,
+    ) -> Dict[str, Any]:
+        job_id = job_id or str(uuid.uuid4())
+        if job_id in self._sessions:
+            raise ValueError(f"job_id {job_id} already exists")
+
+        controller = self._controller_factory(
+            gpu_ids=gpu_ids,
+            interval=interval,
+            vram_to_keep=vram,
+            busy_threshold=busy_threshold,
+        )
+        controller.keep()
+        self._sessions[job_id] = Session(
+            controller=controller,
+            params={
+                "gpu_ids": gpu_ids,
+                "vram": vram,
+                "interval": interval,
+                "busy_threshold": busy_threshold,
+            },
+        )
+        logger.info("Started keep session %s on GPUs %s", job_id, gpu_ids)
+        return {"job_id": job_id}
+
+    def stop_keep(self, job_id: Optional[str] = None) -> Dict[str, Any]:
+        if job_id:
+            session = self._sessions.pop(job_id, None)
+            if session:
+                session.controller.release()
+                logger.info("Stopped keep session %s", job_id)
+                return {"stopped": [job_id]}
+            return {"stopped": [], "message": "job_id not found"}
+
+        stopped_ids = list(self._sessions.keys())
+        for job_id in stopped_ids:
+            session = self._sessions.pop(job_id)
+            session.controller.release()
+        if stopped_ids:
+            logger.info("Stopped sessions: %s", stopped_ids)
+        return {"stopped": stopped_ids}
+
+    def status(self, job_id: Optional[str] = None) -> Dict[str, Any]:
+        if job_id:
+            session = self._sessions.get(job_id)
+            if not session:
+                return {"active": False, "job_id": job_id}
+            return {
+                "active": True,
+                "job_id": job_id,
+                "params": session.params,
+            }
+        return {
+            "active_jobs": [
+                {"job_id": jid, "params": sess.params}
+                for jid, sess in self._sessions.items()
+            ]
+        }
+
+    def list_gpus(self) -> Dict[str, Any]:
+        """Return detailed GPU info (id, name, memory, utilization)."""
+        infos = get_gpu_info()
+        return {"gpus": infos}
+
+    def shutdown(self) -> None:
+        try:
+            self.stop_keep(None)
+        except Exception:  # pragma: no cover - defensive
+            # Avoid noisy errors during interpreter teardown
+            return
+
+
+def _handle_request(server: KeepGPUServer, payload: Dict[str, Any]) -> Dict[str, Any]:
+    method = payload.get("method")
+    params = payload.get("params", {}) or {}
+    req_id = payload.get("id")
+    try:
+        if method == "start_keep":
+            result = server.start_keep(**params)
+        elif method == "stop_keep":
+            result = server.stop_keep(**params)
+        elif method == "status":
+            result = server.status(**params)
+        elif method == "list_gpus":
+            result = server.list_gpus()
+        else:
+            raise ValueError(f"Unknown method: {method}")
+        return {"id": req_id, "result": result}
+    except Exception as exc:  # pragma: no cover - defensive
+        logger.exception("Request failed")
+        return {"id": req_id, "error": {"message": str(exc)}}
+
+
+def main() -> None:
+    server = KeepGPUServer()
+    for line in sys.stdin:
+        line = line.strip()
+        if not line:
+            continue
+        try:
+            payload = json.loads(line)
+            response = _handle_request(server, payload)
+        except Exception as exc:
+            response = {"error": {"message": str(exc)}}
+        sys.stdout.write(json.dumps(response) + "\n")
+        sys.stdout.flush()
+
+
+if __name__ == "__main__":
+    main()
diff --git a/src/keep_gpu/utilities/gpu_info.py b/src/keep_gpu/utilities/gpu_info.py
new file mode 100644
index 0000000..babf540
--- /dev/null
+++ b/src/keep_gpu/utilities/gpu_info.py
@@ -0,0 +1,161 @@
+from __future__ import annotations
+
+from typing import Any, Dict, List
+
+import torch
+
+from keep_gpu.utilities.logger import setup_logger
+
+logger = setup_logger(__name__)
+
+
+def _query_nvml() -> List[Dict[str, Any]]:
+    import pynvml
+
+    pynvml.nvmlInit()
+    infos: List[Dict[str, Any]] = []
+    try:
+        count = pynvml.nvmlDeviceGetCount()
+        for idx in range(count):
+            handle = pynvml.nvmlDeviceGetHandleByIndex(idx)
+            mem = pynvml.nvmlDeviceGetMemoryInfo(handle)
+            util = pynvml.nvmlDeviceGetUtilizationRates(handle).gpu
+            name = pynvml.nvmlDeviceGetName(handle)
+            if isinstance(name, bytes):
+                name = name.decode(errors="ignore")
+            infos.append(
+                {
+                    "id": idx,
+                    "platform": "cuda",
+                    "name": name,
+                    "memory_total": int(mem.total),
+                    "memory_used": int(mem.used),
+                    "utilization": int(util),
+                }
+            )
+    finally:
+        try:
+            pynvml.nvmlShutdown()
+        except Exception:
+            pass
+    return infos
+
+
+def _query_rocm() -> List[Dict[str, Any]]:
+    try:
+        import rocm_smi  # type: ignore
+    except Exception as exc:  # pragma: no cover - env-specific
+        logger.debug("rocm_smi import failed: %s", exc)
+        return []
+
+    infos: List[Dict[str, Any]] = []
+    current_device = None
+    try:
+        rocm_smi.rsmi_init()
+        if torch.cuda.is_available():
+            current_device = torch.cuda.current_device()
+        # Use torch to enumerate devices for names/memory
+        count = torch.cuda.device_count() if torch.cuda.is_available() else 0
+        for idx in range(count):
+            util = None
+            try:
+                util = int(rocm_smi.rsmi_dev_busy_percent_get(idx))
+            except Exception as exc:
+                logger.debug("ROCm util query failed for %s: %s", idx, exc)
+
+            try:
+                torch.cuda.set_device(idx)
+                free, total = torch.cuda.mem_get_info()
+                used = total - free
+            except Exception:
+                total = used = None
+
+            try:
+                name = torch.cuda.get_device_name(idx)
+            except Exception:
+                name = f"rocm:{idx}"
+
+            infos.append(
+                {
+                    "id": idx,
+                    "platform": "rocm",
+                    "name": name,
+                    "memory_total": int(total) if total is not None else None,
+                    "memory_used": int(used) if used is not None else None,
+                    "utilization": util,
+                }
+            )
+    finally:
+        if current_device is not None:
+            try:
+                torch.cuda.set_device(current_device)
+            except Exception:
+                pass
+        try:
+            rocm_smi.rsmi_shut_down()
+        except Exception:
+            pass
+    return infos
+
+
+def _query_torch() -> List[Dict[str, Any]]:
+    infos: List[Dict[str, Any]] = []
+    if not torch.cuda.is_available():
+        return infos
+    current_device = torch.cuda.current_device()
+    try:
+        count = torch.cuda.device_count()
+        for idx in range(count):
+            torch.cuda.set_device(idx)
+            try:
+                free, total = torch.cuda.mem_get_info()
+                used = total - free
+            except Exception:
+                total = used = None
+            try:
+                name = torch.cuda.get_device_name(idx)
+            except Exception:
+                name = f"cuda:{idx}"
+            infos.append(
+                {
+                    "id": idx,
+                    "platform": "cuda" if torch.version.hip is None else "rocm",
+                    "name": name,
+                    "memory_total": int(total) if total is not None else None,
+                    "memory_used": int(used) if used is not None else None,
+                    "utilization": None,
+                }
+            )
+    except Exception as exc:  # pragma: no cover - defensive
+        logger.debug("Torch GPU info failed: %s", exc)
+    finally:
+        try:
+            torch.cuda.set_device(current_device)
+        except Exception:
+            pass
+    return infos
+
+
+def get_gpu_info() -> List[Dict[str, Any]]:
+    """
+    Return a list of GPU info dicts: id, platform, name, memory_total, memory_used, utilization.
+    Tries NVML first (CUDA), then ROCm SMI, then falls back to torch.cuda data.
+    """
+    try:
+        infos = _query_nvml()
+        if infos:
+            return infos
+    except Exception as exc:
+        logger.debug("NVML info failed: %s", exc)
+
+    try:
+        infos = _query_rocm()
+        if infos:
+            return infos
+    except Exception as exc:
+        logger.debug("ROCm info failed: %s", exc)
+
+    return _query_torch()
+
+
+__all__ = ["get_gpu_info"]
diff --git a/tests/conftest.py b/tests/conftest.py
index 22d1680..33fd177 100644
--- a/tests/conftest.py
+++ b/tests/conftest.py
@@ -11,11 +11,6 @@ def pytest_addoption(parser):
     )
 
 
-def pytest_configure(config):
-    config.addinivalue_line("markers", "rocm: tests that require ROCm stack")
-    config.addinivalue_line("markers", "large_memory: tests that use large VRAM")
-
-
 def pytest_collection_modifyitems(config, items):
     if config.getoption("--run-rocm"):
         return
diff --git a/tests/mcp/test_server.py b/tests/mcp/test_server.py
new file mode 100644
index 0000000..494deb4
--- /dev/null
+++ b/tests/mcp/test_server.py
@@ -0,0 +1,95 @@
+from keep_gpu.mcp.server import KeepGPUServer, _handle_request
+
+
+class DummyController:
+    def __init__(self, gpu_ids=None, interval=0, vram_to_keep=None, busy_threshold=0):
+        self.gpu_ids = gpu_ids
+        self.interval = interval
+        self.vram_to_keep = vram_to_keep
+        self.busy_threshold = busy_threshold
+        self.kept = False
+        self.released = False
+
+    def keep(self):
+        self.kept = True
+
+    def release(self):
+        self.released = True
+
+
+def dummy_factory(**kwargs):
+    return DummyController(**kwargs)
+
+
+def test_start_status_stop_cycle():
+    server = KeepGPUServer(controller_factory=dummy_factory)
+    res = server.start_keep(gpu_ids=[1], vram="2GiB", interval=5, busy_threshold=20)
+    job_id = res["job_id"]
+
+    status = server.status(job_id)
+    assert status["active"]
+    assert status["params"]["gpu_ids"] == [1]
+    assert status["params"]["vram"] == "2GiB"
+    assert status["params"]["interval"] == 5
+    assert status["params"]["busy_threshold"] == 20
+
+    stopped = server.stop_keep(job_id)
+    assert job_id in stopped["stopped"]
+    assert server.status(job_id)["active"] is False
+
+
+def test_stop_all():
+    server = KeepGPUServer(controller_factory=dummy_factory)
+    job_a = server.start_keep()["job_id"]
+    job_b = server.start_keep()["job_id"]
+
+    stopped = server.stop_keep()
+    assert set(stopped["stopped"]) == {job_a, job_b}
+    assert server.status(job_a)["active"] is False
+    assert server.status(job_b)["active"] is False
+
+
+def test_list_gpus():
+    server = KeepGPUServer(controller_factory=dummy_factory)
+    info = server.list_gpus()
+    assert "gpus" in info
+
+
+def test_end_to_end_jsonrpc():
+    server = KeepGPUServer(controller_factory=dummy_factory)
+    # start_keep
+    req = {
+        "id": 1,
+        "method": "start_keep",
+        "params": {"gpu_ids": [0], "vram": "256MB", "interval": 1, "busy_threshold": 5},
+    }
+    resp = _handle_request(server, req)
+    assert "result" in resp and "job_id" in resp["result"]
+    job_id = resp["result"]["job_id"]
+
+    # status
+    status_req = {"id": 2, "method": "status", "params": {"job_id": job_id}}
+    status_resp = _handle_request(server, status_req)
+    assert status_resp["result"]["active"] is True
+
+    # stop_keep
+    stop_req = {"id": 3, "method": "stop_keep", "params": {"job_id": job_id}}
+    stop_resp = _handle_request(server, stop_req)
+    assert job_id in stop_resp["result"]["stopped"]
+
+
+def test_status_all():
+    server = KeepGPUServer(controller_factory=dummy_factory)
+    job_a = server.start_keep(gpu_ids=[0])["job_id"]
+    job_b = server.start_keep(gpu_ids=[1])["job_id"]
+
+    status = server.status()
+    assert "active_jobs" in status
+    assert len(status["active_jobs"]) == 2
+
+    job_statuses = {job["job_id"]: job for job in status["active_jobs"]}
+    assert job_a in job_statuses
+    assert job_b in job_statuses
+    assert job_statuses[job_a]["params"]["gpu_ids"] == [0]
+    assert job_statuses[job_b]["params"]["gpu_ids"] == [1]
+    assert "controller" not in job_statuses[job_a]
diff --git a/tests/utilities/test_gpu_info.py b/tests/utilities/test_gpu_info.py
new file mode 100644
index 0000000..f07eae9
--- /dev/null
+++ b/tests/utilities/test_gpu_info.py
@@ -0,0 +1,126 @@
+import sys
+
+import pytest
+
+from keep_gpu.utilities import gpu_info
+
+
+class DummyNVMLMemory:
+    def __init__(self, total: int, used: int):
+        self.total = total
+        self.used = used
+
+
+class DummyNVMLUtil:
+    def __init__(self, gpu: int):
+        self.gpu = gpu
+
+
+@pytest.mark.skipif(
+    not hasattr(gpu_info, "torch") or not gpu_info.torch.cuda.is_available(),
+    reason="CUDA not available for NVML path",
+)
+def test_get_gpu_info_nvml(monkeypatch):
+    class DummyNVML:
+        def __init__(self):
+            self.shutdown_calls = 0
+
+        @staticmethod
+        def nvmlInit():
+            return None
+
+        @staticmethod
+        def nvmlDeviceGetCount():
+            return 1
+
+        @staticmethod
+        def nvmlDeviceGetHandleByIndex(index):
+            assert index == 0
+            return "handle"
+
+        @staticmethod
+        def nvmlDeviceGetMemoryInfo(handle):
+            return DummyNVMLMemory(total=2048, used=1024)
+
+        @staticmethod
+        def nvmlDeviceGetUtilizationRates(handle):
+            return DummyNVMLUtil(gpu=55)
+
+        @staticmethod
+        def nvmlDeviceGetName(handle):
+            return b"Mock GPU"
+
+        def nvmlShutdown(self):
+            self.shutdown_calls += 1
+
+    dummy_nvml = DummyNVML()
+    monkeypatch.setitem(sys.modules, "pynvml", dummy_nvml)
+
+    infos = gpu_info.get_gpu_info()
+    assert len(infos) == 1
+    info = infos[0]
+    assert info["name"] == "Mock GPU"
+    assert info["memory_total"] == 2048
+    assert info["memory_used"] == 1024
+    assert info["utilization"] == 55
+
+
+@pytest.mark.rocm
+def test_get_gpu_info_rocm(monkeypatch):
+    # remove nvml so ROCm path is used
+    monkeypatch.setitem(sys.modules, "pynvml", None)
+
+    class DummyTorchCuda:
+        @staticmethod
+        def is_available():
+            return True
+
+        @staticmethod
+        def device_count():
+            return 1
+
+        @staticmethod
+        def mem_get_info():
+            return (50, 100)
+
+        @staticmethod
+        def get_device_name(idx):
+            return f"ROCm {idx}"
+
+        @staticmethod
+        def set_device(idx):
+            return None
+
+    monkeypatch.setattr(
+        gpu_info,
+        "torch",
+        type(
+            "T", (), {"cuda": DummyTorchCuda, "version": type("V", (), {"hip": "6.0"})}
+        ),
+    )
+
+    class DummyROCM:
+        calls = 0
+
+        @classmethod
+        def rsmi_init(cls):
+            cls.calls += 1
+
+        @classmethod
+        def rsmi_dev_busy_percent_get(cls, idx):
+            assert idx == 0
+            return 77
+
+        @classmethod
+        def rsmi_shut_down(cls):
+            cls.calls += 1
+
+    monkeypatch.setitem(sys.modules, "rocm_smi", DummyROCM)
+
+    infos = gpu_info.get_gpu_info()
+    assert len(infos) == 1
+    info = infos[0]
+    assert info["platform"] == "rocm"
+    assert info["utilization"] == 77
+    assert info["memory_total"] == 100
+    assert info["memory_used"] == 50