diff --git a/README.md b/README.md index 5ea35126d..e13a7f150 100644 --- a/README.md +++ b/README.md @@ -21,29 +21,36 @@ Directory also leverages [CSIT](https://github.com/agntcy/csit) for continuous s ADS enables several key capabilities for the agentic AI ecosystem: - **Capability-Based Discovery**: Agents publish structured metadata describing their -functional characteristics as described by the [OASF](https://github.com/agntcy/oasf). -The system organizes this information using hierarchical taxonomies, -enabling efficient matching of capabilities to requirements. + functional characteristics as described by the [OASF](https://github.com/agntcy/oasf). + The system organizes this information using hierarchical taxonomies, + enabling efficient matching of capabilities to requirements. - **Verifiable Claims**: While agent capabilities are often subjectively evaluated, -ADS provides cryptographic mechanisms for data integrity and provenance tracking. -This allows users to make informed decisions about agent selection. + ADS provides cryptographic mechanisms for data integrity and provenance tracking. + This allows users to make informed decisions about agent selection. - **Semantic Linkage**: Components can be securely linked to create various relationships -like version histories for evolutionary development, collaborative partnerships where -complementary skills solve complex problems, and dependency chains for composite agent workflows. + like version histories for evolutionary development, collaborative partnerships where + complementary skills solve complex problems, and dependency chains for composite agent workflows. - **Distributed Architecture**: Built on proven distributed systems principles, -ADS uses content-addressing for global uniqueness and implements distributed hash tables (DHT) -for scalable content discovery and synchronization across decentralized networks. + ADS uses content-addressing for global uniqueness and implements distributed hash tables (DHT) + for scalable content discovery and synchronization across decentralized networks. - **Tooling and Integration**: Provides a suite of command-line tools, SDKs, and APIs -to facilitate interaction with the system, enabling developers to manage Directory -records and node operations programmatically. + to facilitate interaction with the system, enabling developers to manage Directory + records and node operations programmatically. - **Security and Trust**: Incorporates robust security measures including -cryptographic signing, verification of claims, secure communication protocols, and access controls -to ensure the integrity and authenticity of Directory records and nodes. + cryptographic signing, verification of claims, secure communication protocols, and access controls + to ensure the integrity and authenticity of Directory records and nodes. ## Documentation Check the [Documentation](https://docs.agntcy.org/dir/overview/) for a full walkthrough of all the Directory features. +## Trust Ranking (Reference PoC) + +This repository includes a **reference-only trust ranking extension** for directory results, +implemented as an optional add-on under `extensions/trust_ranking/`. + +Details: `extensions/trust_ranking/REFERENCE.md` + ## Source tree - [proto](./proto) - gRPC specification for data models and services diff --git a/examples/directory_sample.json b/examples/directory_sample.json new file mode 100644 index 000000000..a5fbd4d05 --- /dev/null +++ b/examples/directory_sample.json @@ -0,0 +1,131 @@ +{ + "agents": [ + { + "id": "agent_alpha_clean", + "name": "Alpha Services", + "url": "https://alpha.example", + "capabilities": ["book", "quote", "support"], + "contact": "ops@alpha.example", + "updated_at": "2025-12-20", + "domain_verified": true, + "key_present": true, + "handshake_fail_ratio": 0.01, + "rate_limit_violations": 0, + "complaint_flags": 0 + }, + { + "id": "agent_beta_clean", + "name": "Beta Concierge", + "url": "https://beta.example", + "capabilities": ["schedule", "cancel", "status"], + "contact": "support@beta.example", + "updated_at": "2025-12-10", + "domain_verified": true, + "key_present": true, + "handshake_fail_ratio": 0.03, + "rate_limit_violations": 1, + "complaint_flags": 0 + }, + { + "id": "agent_gamma_ok", + "name": "Gamma Helper", + "url": "https://gamma.example", + "capabilities": ["info"], + "contact": "hello@gamma.example", + "updated_at": "2025-10-01", + "domain_verified": false, + "key_present": true, + "handshake_fail_ratio": 0.08, + "rate_limit_violations": 2, + "complaint_flags": 1 + }, + { + "id": "agent_delta_sparse", + "name": "Delta Agent", + "url": "https://delta.example", + "capabilities": [], + "updated_at": "2025-07-15", + "domain_verified": false, + "key_present": false, + "handshake_fail_ratio": 0.12, + "rate_limit_violations": 3, + "complaint_flags": 0 + }, + { + "id": "agent_epsilon_missing_contact", + "name": "Epsilon Tools", + "url": "https://epsilon.example", + "capabilities": ["quote"], + "updated_at": "2025-11-05", + "domain_verified": true, + "key_present": false, + "handshake_fail_ratio": 0.06, + "rate_limit_violations": 0, + "complaint_flags": 0 + }, + { + "id": "agent_zeta_suspicious", + "name": "Zeta Fast-Track", + "url": "https://zeta.example", + "capabilities": ["book", "pay", "refund"], + "contact": "contact@zeta.example", + "updated_at": "2024-12-01", + "domain_verified": false, + "key_present": false, + "handshake_fail_ratio": 0.45, + "rate_limit_violations": 25, + "complaint_flags": 7 + }, + { + "id": "agent_eta_suspicious", + "name": "ETA Deals", + "url": "https://eta.example", + "capabilities": ["book"], + "contact": "admin@eta.example", + "updated_at": "2025-01-10", + "domain_verified": false, + "key_present": false, + "handshake_fail_ratio": 0.33, + "rate_limit_violations": 12, + "complaint_flags": 4 + }, + { + "id": "agent_theta_clean", + "name": "Theta Support", + "url": "https://theta.example", + "capabilities": ["support", "status"], + "contact": "help@theta.example", + "updated_at": "2025-12-28", + "domain_verified": true, + "key_present": true, + "handshake_fail_ratio": 0.00, + "rate_limit_violations": 0, + "complaint_flags": 0 + }, + { + "id": "agent_iota_ok", + "name": "Iota Scheduling", + "url": "https://iota.example", + "capabilities": ["schedule"], + "contact": "team@iota.example", + "updated_at": "2025-09-09", + "domain_verified": false, + "key_present": true, + "handshake_fail_ratio": 0.10, + "rate_limit_violations": 1, + "complaint_flags": 0 + }, + { + "id": "agent_kappa_broken", + "name": "Kappa Broken Link", + "url": "", + "capabilities": ["book"], + "updated_at": "2023-06-01", + "domain_verified": false, + "key_present": false, + "handshake_fail_ratio": 0.60, + "rate_limit_violations": 40, + "complaint_flags": 10 + } + ] +} diff --git a/examples/directory_sample_degraded.json b/examples/directory_sample_degraded.json new file mode 100644 index 000000000..9b4ed4e01 --- /dev/null +++ b/examples/directory_sample_degraded.json @@ -0,0 +1,131 @@ +{ + "agents": [ + { + "id": "agent_alpha_clean", + "name": "Alpha Services", + "url": "https://alpha.example", + "capabilities": ["book", "quote", "support"], + "contact": "ops@alpha.example", + "updated_at": "2025-12-20", + "domain_verified": true, + "key_present": true, + "handshake_fail_ratio": 0.01, + "rate_limit_violations": 0, + "complaint_flags": 0 + }, + { + "id": "agent_beta_clean", + "name": "Beta Concierge", + "url": "https://beta.example", + "capabilities": ["schedule", "cancel", "status"], + "contact": "support@beta.example", + "updated_at": "2025-12-10", + "domain_verified": true, + "key_present": true, + "handshake_fail_ratio": 0.03, + "rate_limit_violations": 1, + "complaint_flags": 0 + }, + { + "id": "agent_gamma_ok", + "name": "Gamma Helper", + "url": "https://gamma.example", + "capabilities": ["info"], + "contact": "hello@gamma.example", + "updated_at": "2023-06-01", + "domain_verified": false, + "key_present": true, + "handshake_fail_ratio": 0.56, + "rate_limit_violations": 30, + "complaint_flags": 12 + }, + { + "id": "agent_delta_sparse", + "name": "Delta Agent", + "url": "https://delta.example", + "capabilities": [], + "updated_at": "2025-07-15", + "domain_verified": false, + "key_present": false, + "handshake_fail_ratio": 0.12, + "rate_limit_violations": 3, + "complaint_flags": 0 + }, + { + "id": "agent_epsilon_missing_contact", + "name": "Epsilon Tools", + "url": "https://epsilon.example", + "capabilities": ["quote"], + "updated_at": "2025-11-05", + "domain_verified": true, + "key_present": false, + "handshake_fail_ratio": 0.06, + "rate_limit_violations": 0, + "complaint_flags": 0 + }, + { + "id": "agent_zeta_suspicious", + "name": "Zeta Fast-Track", + "url": "https://zeta.example", + "capabilities": ["book", "pay", "refund"], + "contact": "contact@zeta.example", + "updated_at": "2024-12-01", + "domain_verified": false, + "key_present": false, + "handshake_fail_ratio": 0.45, + "rate_limit_violations": 25, + "complaint_flags": 7 + }, + { + "id": "agent_eta_suspicious", + "name": "ETA Deals", + "url": "https://eta.example", + "capabilities": ["book"], + "contact": "admin@eta.example", + "updated_at": "2025-01-10", + "domain_verified": false, + "key_present": false, + "handshake_fail_ratio": 0.33, + "rate_limit_violations": 12, + "complaint_flags": 4 + }, + { + "id": "agent_theta_clean", + "name": "Theta Support", + "url": "https://theta.example", + "capabilities": ["support", "status"], + "contact": "help@theta.example", + "updated_at": "2025-12-28", + "domain_verified": true, + "key_present": true, + "handshake_fail_ratio": 0.00, + "rate_limit_violations": 0, + "complaint_flags": 0 + }, + { + "id": "agent_iota_ok", + "name": "Iota Scheduling", + "url": "https://iota.example", + "capabilities": ["schedule"], + "contact": "team@iota.example", + "updated_at": "2025-09-09", + "domain_verified": false, + "key_present": true, + "handshake_fail_ratio": 0.10, + "rate_limit_violations": 1, + "complaint_flags": 0 + }, + { + "id": "agent_kappa_broken", + "name": "Kappa Broken Link", + "url": "", + "capabilities": ["book"], + "updated_at": "2023-06-01", + "domain_verified": false, + "key_present": false, + "handshake_fail_ratio": 0.60, + "rate_limit_violations": 40, + "complaint_flags": 10 + } + ] +} diff --git a/extensions/trust_ranking/REFERENCE.md b/extensions/trust_ranking/REFERENCE.md new file mode 100644 index 000000000..84541f3da --- /dev/null +++ b/extensions/trust_ranking/REFERENCE.md @@ -0,0 +1,238 @@ +# Trust Ranking Extension (Reference PoC) + +> **Status:** Reference-only implementation +> **Scope:** Demonstration and discussion +> **Non-goals:** Security guarantees, standards, production readiness + +This document describes a **reference trust-ranking extension** that can be used +*alongside* the AGNTCY directory. It is intentionally optional, additive, and +non-authoritative. + +## Overview + +The AGNTCY directory enables capability-based discovery: + +> “Find agents that can do X.” + +As ecosystems grow, this becomes insufficient on its own. +When many agents claim the same capability, consumers need additional signals +to decide *which* agent to try first. + +This reference extension demonstrates how **trust-related signals** could +influence ranking decisions without changing directory semantics or protocol +behavior. + +## Background + +Identity verification answers “who is this agent?” +Trust-related signals answer “how reliable does this agent appear to be?” + +Both dimensions matter. +This reference focuses on ranking based on the latter. + +## What this extension is + +- A **sidecar** ranking module +- A **toy scoring model** using simple heuristics +- A **runnable demo** that produces explainable results +- A way to explore *interfaces*, not prescribe policy + +## What this extension is not + +- Not a security system +- Not a standard +- Not a recommendation for production use +- Not a source of truth for trust decisions + +All trust logic here is local, subjective, and replaceable. + +## Architecture + +``` +User / Client + | + v +Directory Search → [Capable Agents] + | + v + Trust Ranking (optional) + | + v + [Ranked Results] +``` + +- The AGNTCY directory remains unchanged. +- Ranking occurs **after** discovery. +- Consumers opt in by choosing to apply a ranker. + +## Scoring Model (Reference Only) + +The reference implementation evaluates four dimensions. +Weights are fixed and chosen for clarity, not optimality. + +### 1. Completeness (35%) + +**What:** Is the agent profile reasonably complete? + +**Signals:** + +- presence of `id`, `name`, `url` +- non-empty `capabilities` +- contact information +- `updated_at` timestamp + +**Rationale:** Complete profiles are easier to understand and maintain. + +### 2. Verification (25%) + +**What:** Are basic identity signals present? + +**Signals:** + +- `domain_verified` +- `key_present` + +**Rationale:** Verification raises the cost of impersonation, even if imperfect. + +### 3. Freshness (20%) + +**What:** Is the profile actively maintained? + +**Signals:** + +- recently updated +- moderately recent +- stale + +**Rationale:** Abandoned profiles correlate with broken integrations. + +### 4. Behavior (20%) + +**What:** Are there basic indicators of operational reliability? + +**Signals (simulated in demo):** + +- handshake failure ratio +- rate limit violations +- complaint flags + +**Rationale:** Past behavior is often predictive of future reliability. + +## Final Score + +Scores are combined into a 0–100 range and capped below 100 +to avoid implying certainty. + +**Trust bands:** + +- **Green:** high confidence +- **Yellow:** medium confidence +- **Red:** low confidence + +Scores are accompanied by **human-readable reasons** to make +ranking decisions inspectable. + +## Usage + +### Run the demo + +```bash +python scripts/run_trust_ranking.py --top 10 +``` + +### JSON Output + +```bash +python scripts/run_trust_ranking.py --json > ranked.json +``` + +Each returned agent may include: + +```json +"trust": { + "score": 77.0, + "band": "yellow", + "reasons": [ + "Profile is somewhat complete", + "Updated this quarter", + "No rate limit violations" + ] +} +``` + +### Example Output (Illustrative) + +``` +1. Theta Support + id: agent_theta_clean + url: https://theta.example + trust: 99.0 (green) + reason: Profile is complete; Recently updated; Low handshake failure rate + +2. Alpha Services + id: agent_alpha_clean + url: https://alpha.example + trust: 99.0 (green) + reason: Profile is complete; Recently updated; Domain verified +``` + +Output is dependent on local scoring logic and input data; results are not authoritative. + +## Integration Patterns + +### Pattern 1: Client-side ranking (recommended) + +```python +# Fetch capable agents from a directory +agents = directory_search(...) + +# Apply optional trust ranking locally +ranked_agents = rank_agents(agents) + +# Select a preferred agent +selected = ranked_agents[0] +``` + +- No server or protocol changes required +- Multiple ranking models can coexist +- Trust preferences remain local to the client + + + +### Pattern 2: Proxy service + +A proxy queries the directory, applies ranking, and returns ordered results. +Useful for shared logic, but introduces centralization tradeoffs. + +### Pattern 3: Directory plugin (future) + +Trust ranking as an optional server-side hook. +This requires community discussion and governance alignment. + +## Limitations + +This PoC intentionally omits: + +- adversarial robustness and Sybil resistance +- cryptographic binding of behavior to identity +- adaptive or context-dependent weighting +- trust decay, recovery, or volatility +- cross-observer reputation aggregation +- production concerns (scale, abuse, monitoring) + +These omissions are deliberate. + +## Purpose + +This reference exists to support discussion around: + +- where trust-based ranking should live +- how ranking logic can remain optional +- how explanations improve transparency +- how ecosystems avoid a single “trust authority” + +Feedback and alternative approaches are encouraged. + +## License + +Apache 2.0 (same as AGNTCY dir) diff --git a/extensions/trust_ranking/interface.py b/extensions/trust_ranking/interface.py new file mode 100644 index 000000000..f716b45f2 --- /dev/null +++ b/extensions/trust_ranking/interface.py @@ -0,0 +1,28 @@ +""" +Trust ranking interface (reference only). + +This defines the minimal contract a trust ranker must implement. +It is intentionally simple and non-prescriptive. +""" + +from typing import List, Dict, Any + + +def rank_agents( + agents: List[Dict[str, Any]], + query: Dict[str, Any] | None = None, + context: Dict[str, Any] | None = None, +) -> List[Dict[str, Any]]: + """ + Rank a list of agent directory entries. + + Parameters: + agents: list of agent-like dicts from a directory + query: optional user or agent query context + context: optional execution or environment context + + Returns: + The same agents, ordered by preference. + Each agent MAY include a 'trust' field with scoring metadata. + """ + raise NotImplementedError("Trust ranker not implemented") diff --git a/extensions/trust_ranking/reference_ranker.py b/extensions/trust_ranking/reference_ranker.py new file mode 100644 index 000000000..bd65f6779 --- /dev/null +++ b/extensions/trust_ranking/reference_ranker.py @@ -0,0 +1,254 @@ +""" +Reference trust ranker (toy scoring). + +This is NOT a security system. +It is a demo of how directories could incorporate trust signals +into ranking decisions in an explainable way. +""" + +from __future__ import annotations + +from typing import List, Dict, Any, Tuple +from datetime import datetime, timezone + + +def _parse_date_yyyy_mm_dd(s: str | None) -> datetime | None: + if not s: + return None + try: + return datetime.strptime(s, "%Y-%m-%d").replace(tzinfo=timezone.utc) + except ValueError: + return None + + +def _clamp(x: float, lo: float, hi: float) -> float: + return max(lo, min(hi, x)) + + +def _completeness(agent: Dict[str, Any]) -> Tuple[float, List[str]]: + """ + Completeness based on presence of common directory fields. + Returns score 0..1 and reasons. + """ + required = ["id", "name", "url", "capabilities", "contact", "updated_at"] + present = 0 + missing = [] + + for k in required: + v = agent.get(k) + ok = v is not None and v != "" and (v != [] if k == "capabilities" else True) + if ok: + present += 1 + else: + missing.append(k) + + score = present / float(len(required)) + reasons = [] + if score >= 0.9: + reasons.append("Profile is complete") + elif score >= 0.6: + reasons.append("Profile is somewhat complete") + else: + reasons.append("Profile is missing key fields") + + if missing: + reasons.append("Missing: " + ", ".join(missing[:3]) + ("..." if len(missing) > 3 else "")) + + return score, reasons + + +def _freshness(agent: Dict[str, Any]) -> Tuple[float, List[str]]: + """ + Freshness score based on updated_at. 0..1. + Newer is better. Very old is bad. + """ + dt = _parse_date_yyyy_mm_dd(agent.get("updated_at")) + if not dt: + return 0.2, ["No valid updated_at date"] + + now = datetime.now(timezone.utc) + days = (now - dt).days + + # Simple buckets + if days <= 30: + return 1.0, ["Recently updated"] + if days <= 120: + return 0.7, ["Updated this quarter"] + if days <= 365: + return 0.4, ["Updated within a year"] + + return 0.1, ["Stale profile"] + + +def _verification(agent: Dict[str, Any]) -> Tuple[float, List[str]]: + """ + Verification score based on flags. 0..1. + """ + domain_verified = bool(agent.get("domain_verified")) + key_present = bool(agent.get("key_present")) + + score = 0.0 + reasons = [] + + if domain_verified: + score += 0.6 + reasons.append("Domain verified") + else: + reasons.append("Domain not verified") + + if key_present: + score += 0.4 + reasons.append("Key present") + else: + reasons.append("No key") + + return score, reasons + + +def _behavior(agent: Dict[str, Any]) -> Tuple[float, List[str]]: + """ + Behavior score from simulated hints. 0..1. + Lower failures/violations/complaints is better. + """ + fail_ratio = agent.get("handshake_fail_ratio") + violations = agent.get("rate_limit_violations") + complaints = agent.get("complaint_flags") + + # defaults if absent + try: + fail_ratio = float(fail_ratio) if fail_ratio is not None else 0.10 + except (TypeError, ValueError): + fail_ratio = 0.10 + + try: + violations = int(violations) if violations is not None else 0 + except (TypeError, ValueError): + violations = 0 + + try: + complaints = int(complaints) if complaints is not None else 0 + except (TypeError, ValueError): + complaints = 0 + + # Convert to penalties (toy) + # Fail ratio: 0.0 -> 0 penalty, 0.5 -> heavy penalty + fail_pen = _clamp(fail_ratio / 0.5, 0.0, 1.0) + + # Violations: 0 -> 0 penalty, 25+ -> heavy penalty + viol_pen = _clamp(violations / 25.0, 0.0, 1.0) + + # Complaints: 0 -> 0 penalty, 10+ -> heavy penalty + comp_pen = _clamp(complaints / 10.0, 0.0, 1.0) + + penalty = 0.5 * fail_pen + 0.3 * viol_pen + 0.2 * comp_pen + score = 1.0 - _clamp(penalty, 0.0, 1.0) + + reasons = [] + if fail_ratio >= 0.30: + reasons.append("High handshake failure rate") + elif fail_ratio <= 0.05: + reasons.append("Low handshake failure rate") + + if violations >= 10: + reasons.append("Many rate limit violations") + elif violations == 0: + reasons.append("No rate limit violations") + + if complaints >= 3: + reasons.append("Multiple complaint flags") + elif complaints == 0: + reasons.append("No complaint flags") + + return score, reasons + + +def _band(score_0_100: float) -> str: + if score_0_100 >= 80: + return "green" + if score_0_100 >= 50: + return "yellow" + return "red" + + +def _top_reasons(reasons: List[str], limit: int = 3) -> List[str]: + # Keep unique, preserve order + out = [] + seen = set() + for r in reasons: + r = r.strip() + if not r or r in seen: + continue + out.append(r) + seen.add(r) + if len(out) >= limit: + break + return out + + +def rank_agents( + agents: List[Dict[str, Any]], + query: Dict[str, Any] | None = None, + context: Dict[str, Any] | None = None, +) -> List[Dict[str, Any]]: + """ + Toy ranker. Produces: + trust.score 0..100 + trust.band green|yellow|red + trust.reasons[] (top 3, human-readable) + + Returns ranked list (descending trust.score). + """ + scored: List[Tuple[float, Dict[str, Any]]] = [] + + for agent in agents: + a = dict(agent) + + comp, comp_r = _completeness(a) + fresh, fresh_r = _freshness(a) + ver, ver_r = _verification(a) + beh, beh_r = _behavior(a) + + # Weights (toy). Sum to 1.0. + score_0_1 = 0.35 * comp + 0.20 * fresh + 0.25 * ver + 0.20 * beh + score_0_100 = round(_clamp(score_0_1, 0.0, 1.0) * 100.0, 1) + # Avoid "perfect trust" optics in a PoC + score_0_100 = min(score_0_100, 99.0) + + # Build an explanation that covers different categories. + # We want: completeness, freshness, and either verification OR behavior, + # but behavior should show up when it has something to say. + comp_pick = comp_r[:1] + fresh_pick = fresh_r[:1] + ver_pick = ver_r[:1] + beh_pick = beh_r[:1] + + # Start with completeness + freshness + preferred = comp_pick + fresh_pick + + # Then prefer behavior if it's informative (not empty) + if beh_pick: + preferred += beh_pick + else: + preferred += ver_pick + + # Fill remaining slots from everything else, preserving uniqueness + reasons_all = comp_r + fresh_r + ver_r + beh_r + reasons = _top_reasons(preferred + reasons_all, limit=3) + + a["trust"] = { + "score": score_0_100, + "band": _band(score_0_100), + "reasons": reasons, + } + + scored.append((score_0_100, a)) + + # Stable ordering: score desc, then name/id asc + scored.sort( + key=lambda t: ( + -t[0], + (t[1].get("name") or "").lower(), + (t[1].get("id") or "").lower(), + ) + ) + return [a for _, a in scored] diff --git a/extensions/trust_ranking/tests/test_reference_ranker.py b/extensions/trust_ranking/tests/test_reference_ranker.py new file mode 100644 index 000000000..6d29ad3d7 --- /dev/null +++ b/extensions/trust_ranking/tests/test_reference_ranker.py @@ -0,0 +1,98 @@ +import unittest + +from extensions.trust_ranking.reference_ranker import rank_agents + + +class TestReferenceRanker(unittest.TestCase): + def setUp(self): + # Minimal "good" agent: complete + verified + fresh + clean behavior + self.good_agent = { + "id": "agent_good", + "name": "Good Agent", + "url": "https://good.example", + "capabilities": ["book"], + "contact": "ops@good.example", + "updated_at": "2025-12-28", + "domain_verified": True, + "key_present": True, + "handshake_fail_ratio": 0.0, + "rate_limit_violations": 0, + "complaint_flags": 0, + } + + # Minimal "bad" agent: missing fields + stale + unverified + bad behavior + self.bad_agent = { + "id": "agent_bad", + "name": "Bad Agent", + "url": "", + "capabilities": [], + "updated_at": "2023-01-01", + "domain_verified": False, + "key_present": False, + "handshake_fail_ratio": 0.60, + "rate_limit_violations": 40, + "complaint_flags": 10, + } + + # Two agents with identical score inputs except name/id to test tie-break stability + self.tie_a = { + "id": "agent_tie_a", + "name": "Alpha", + "url": "https://tie.example/a", + "capabilities": ["info"], + "contact": "a@tie.example", + "updated_at": "2025-12-28", + "domain_verified": True, + "key_present": True, + "handshake_fail_ratio": 0.01, + "rate_limit_violations": 0, + "complaint_flags": 0, + } + self.tie_b = dict(self.tie_a) + self.tie_b["id"] = "agent_tie_b" + self.tie_b["name"] = "Beta" + self.tie_b["url"] = "https://tie.example/b" + + def test_good_agent_scores_high(self): + ranked = rank_agents([self.good_agent]) + trust = ranked[0].get("trust", {}) + self.assertIn("score", trust) + self.assertIn("band", trust) + self.assertIn("reasons", trust) + + # "High" threshold. Adjust if you change weights later. + self.assertGreaterEqual(trust["score"], 80.0) + self.assertEqual(trust["band"], "green") + self.assertTrue(isinstance(trust["reasons"], list)) + self.assertGreaterEqual(len(trust["reasons"]), 1) + + def test_bad_agent_scores_low(self): + ranked = rank_agents([self.bad_agent]) + trust = ranked[0].get("trust", {}) + self.assertLessEqual(trust["score"], 49.9) + self.assertEqual(trust["band"], "red") + + def test_ranking_orders_by_score_desc(self): + ranked = rank_agents([self.bad_agent, self.good_agent]) + self.assertEqual(ranked[0]["id"], "agent_good") + self.assertEqual(ranked[-1]["id"], "agent_bad") + + # Explicit score ordering check + top_score = ranked[0]["trust"]["score"] + bottom_score = ranked[-1]["trust"]["score"] + self.assertGreaterEqual(top_score, bottom_score) + + def test_deterministic_tie_break(self): + # With identical scores, we expect stable ordering: + # score desc, then name asc, then id asc (per your sort key). + ranked = rank_agents([self.tie_b, self.tie_a]) + self.assertEqual(ranked[0]["id"], "agent_tie_a") + self.assertEqual(ranked[1]["id"], "agent_tie_b") + + # Run again to ensure repeatability + ranked2 = rank_agents([self.tie_b, self.tie_a]) + self.assertEqual([a["id"] for a in ranked], [a["id"] for a in ranked2]) + + +if __name__ == "__main__": + unittest.main() diff --git a/scripts/run_trust_ranking.py b/scripts/run_trust_ranking.py new file mode 100644 index 000000000..ad3d8addc --- /dev/null +++ b/scripts/run_trust_ranking.py @@ -0,0 +1,86 @@ +#!/usr/bin/env python3 +""" +Run the reference trust ranking demo. + +Example: + python scripts/run_trust_ranking.py --top 10 +""" + +from __future__ import annotations + +import argparse +import json +import sys +from pathlib import Path + +# Allow running from repo root without installing as a package +REPO_ROOT = Path(__file__).resolve().parents[1] +sys.path.insert(0, str(REPO_ROOT)) + +from extensions.trust_ranking.reference_ranker import rank_agents # noqa: E402 + + +def _load_agents(path: Path) -> list[dict]: + data = json.loads(path.read_text(encoding="utf-8")) + agents = data.get("agents") + if not isinstance(agents, list): + raise ValueError("Input JSON must contain an 'agents' list") + return agents + + +def main() -> int: + parser = argparse.ArgumentParser(description="Trust ranking PoC runner (reference only)") + parser.add_argument( + "--input", + default="examples/directory_sample.json", + help="Path to JSON file containing {'agents': [...]}", + ) + parser.add_argument("--top", type=int, default=10, help="How many results to print") + parser.add_argument("--json", action="store_true", help="Output full ranked list as JSON") + args = parser.parse_args() + + input_path = (REPO_ROOT / args.input).resolve() + if not input_path.exists(): + print(f"ERROR: input file not found: {input_path}", file=sys.stderr) + return 2 + + try: + agents = _load_agents(input_path) + ranked = rank_agents(agents) + except Exception as e: + print(f"ERROR: {e}", file=sys.stderr) + return 2 + + if args.json: + print(json.dumps({"agents": ranked}, indent=2, ensure_ascii=False)) + return 0 + + top_n = max(0, min(args.top, len(ranked))) + + print("Trust Ranking PoC (reference only)") + print(f"Input: {args.input}") + print(f"Results: top {top_n} of {len(ranked)}") + print("") + + for i, a in enumerate(ranked[:top_n], start=1): + trust = a.get("trust") or {} + score = trust.get("score", "n/a") + band = trust.get("band", "n/a") + reasons = trust.get("reasons", []) + name = a.get("name") or a.get("id") or "(unnamed)" + url = a.get("url") or "" + + reasons_str = "; ".join(reasons) if isinstance(reasons, list) else str(reasons) + + print(f"{i:>2}. {name}") + print(f" id: {a.get('id')}") + print(f" url: {url}") + print(f" trust: {score} ({band})") + print(f"reason: {reasons_str}") + print("") + + return 0 + + +if __name__ == "__main__": + raise SystemExit(main())