From 22c024b972f35c758bee405071bc2ddad5c87ced Mon Sep 17 00:00:00 2001 From: Satabrata Paul Date: Wed, 20 Aug 2025 14:33:29 +0530 Subject: [PATCH 01/28] add: standard library imports --- modelcontextprotocol/settings.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/modelcontextprotocol/settings.py b/modelcontextprotocol/settings.py index a1c4ab6..4b5d876 100644 --- a/modelcontextprotocol/settings.py +++ b/modelcontextprotocol/settings.py @@ -1,6 +1,10 @@ """Configuration settings for the application.""" +import requests +from typing import Any, Dict, Optional +from urllib.parse import urlencode from pydantic_settings import BaseSettings + from version import __version__ as MCP_VERSION From 18c4b04ebf0af88454798be00b3a4b523f2b5815 Mon Sep 17 00:00:00 2001 From: Satabrata Paul Date: Wed, 20 Aug 2025 14:34:21 +0530 Subject: [PATCH 02/28] add: static methods for atlan api requests --- modelcontextprotocol/settings.py | 66 ++++++++++++++++++++++++++++++++ 1 file changed, 66 insertions(+) diff --git a/modelcontextprotocol/settings.py b/modelcontextprotocol/settings.py index 4b5d876..fb081e7 100644 --- a/modelcontextprotocol/settings.py +++ b/modelcontextprotocol/settings.py @@ -16,6 +16,7 @@ class Settings(BaseSettings): ATLAN_AGENT_ID: str = "NA" ATLAN_AGENT: str = "atlan-mcp" ATLAN_MCP_USER_AGENT: str = f"Atlan MCP Server {MCP_VERSION}" + ATLAN_TYPEDEF_API_ENDPOINT: Optional[str] = "/api/meta/types/typedefs/" @property def headers(self) -> dict: @@ -26,6 +27,71 @@ def headers(self) -> dict: "X-Atlan-Agent-Id": self.ATLAN_AGENT_ID, "X-Atlan-Client-Origin": self.ATLAN_AGENT, } + + @staticmethod + def build_api_url(path: str, query_params: Optional[Dict[str, Any]] = None) -> str: + current_settings = Settings() + if not current_settings: + raise ValueError( + "Atlan API URL (ATLAN_API_URL) is not configured in settings." + ) + + base_url = current_settings.ATLAN_BASE_URL.rstrip("/") + + if ( + path + and not path.startswith("/") + and not base_url.endswith("/") + and not path.startswith(("http://", "https://")) + ): + full_path = f"{base_url}/{path.lstrip('/')}" + elif path.startswith(("http://", "https://")): + full_path = path + else: + full_path = f"{base_url}{path}" + + if query_params: + active_query_params = { + k: v for k, v in query_params.items() if v is not None + } + if active_query_params: + query_string = urlencode(active_query_params) + return f"{full_path}?{query_string}" + return full_path + + @staticmethod + def get_atlan_typedef_api_endpoint(param: str) -> str: + current_settings = Settings() + if not current_settings.ATLAN_TYPEDEF_API_ENDPOINT: + raise ValueError( + "Default API endpoint for typedefs (api_endpoint) is not configured in settings." + ) + + return Settings.build_api_url( + path=current_settings.ATLAN_TYPEDEF_API_ENDPOINT, query_params={"type": param} + ) + + @staticmethod + def make_request(url: str) -> Optional[Dict[str, Any]]: + current_settings = Settings() + headers = { + "Authorization": f"Bearer {current_settings.ATLAN_API_KEY}", + "x-atlan-client-origin": "atlan-search-app", + } + try: + response = requests.get( + url, + headers=headers, + ) + if response.status_code != 200: + raise Exception( + f"Failed to make request to {url}: {response.status_code} {response.text}" + ) + return response.json() + except Exception as e: + raise Exception(f"Failed to make request to {url}: {e}") + + class Config: env_file = ".env" From ff5f8089473949e026bd8f787b511b602fe855d2 Mon Sep 17 00:00:00 2001 From: Satabrata Paul Date: Wed, 20 Aug 2025 14:35:18 +0530 Subject: [PATCH 03/28] export: get_custom_metadata_context tool --- modelcontextprotocol/tools/__init__.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/modelcontextprotocol/tools/__init__.py b/modelcontextprotocol/tools/__init__.py index 5e057c5..81dbf5a 100644 --- a/modelcontextprotocol/tools/__init__.py +++ b/modelcontextprotocol/tools/__init__.py @@ -2,6 +2,7 @@ from .dsl import get_assets_by_dsl from .lineage import traverse_lineage from .assets import update_assets +from .custom_metadata_context import get_custom_metadata_context from .glossary import ( create_glossary_category_assets, create_glossary_assets, @@ -21,6 +22,7 @@ "get_assets_by_dsl", "traverse_lineage", "update_assets", + "get_custom_metadata_context", "create_glossary_category_assets", "create_glossary_assets", "create_glossary_term_assets", From 56d4e6fb13d646f66bff0060baf480acd98ab188 Mon Sep 17 00:00:00 2001 From: Satabrata Paul Date: Wed, 20 Aug 2025 14:35:40 +0530 Subject: [PATCH 04/28] add: tool to fetch custom metadata context --- .../tools/custom_metadata_context.py | 162 ++++++++++++++++++ 1 file changed, 162 insertions(+) create mode 100644 modelcontextprotocol/tools/custom_metadata_context.py diff --git a/modelcontextprotocol/tools/custom_metadata_context.py b/modelcontextprotocol/tools/custom_metadata_context.py new file mode 100644 index 0000000..9fb8ee4 --- /dev/null +++ b/modelcontextprotocol/tools/custom_metadata_context.py @@ -0,0 +1,162 @@ +import logging +from typing import Any, Dict, List, Optional +from settings import Settings + +logger = logging.getLogger(__name__) + + +def process_business_metadata( + bm_def: Dict[str, Any], +) -> Dict[str, Any]: + """ + Generates context prompt for a given Atlan business metadata definition. + + Args: + bm_def: A dictionary representing the business metadata definition. + Expected keys: 'displayName', 'description', 'attributeDefs'. + + Returns: + A list containing a single string: the formatted semantic search prompt, + and a list containing the metadata dictionary. + """ + bm_def_name_for_prompt = bm_def.get("name", "N/A") + bm_def_display_name = bm_def.get("displayName", "N/A") + description_for_prompt = bm_def.get("description", "No description available.") + + attribute_defs = bm_def.get("attributeDefs", []) + guid = bm_def.get("guid") + + # For prompt: comma separated attribute names and descriptions + attributes_list_for_prompt: List[str] = [] + if attribute_defs: + for attr in attribute_defs: + attr_name = attr.get("displayName", attr.get("name", "Unnamed attribute")) + attr_desc = attr.get( + "description", "No description" + ) # As per schema: names and descriptions + attributes_list_for_prompt.append(f"{str(attr_name)}:{str(attr_desc)}") + attributes_str_for_prompt = ( + ", ".join(attributes_list_for_prompt) if attributes_list_for_prompt else "None" + ) + + # For metadata: list of attribute objects + parsed_attributes_for_metadata: List[Dict[str, Any]] = [] + if attribute_defs: + for attr_def_item in attribute_defs: + base_description = attr_def_item.get("description", "") + + # Check for enum enrichment and enhance description + enum_enrichment = attr_def_item.get("enumEnrichment") + enhanced_description = base_description + if enum_enrichment and enum_enrichment.get("values"): + enum_values = enum_enrichment["values"] + if enum_values: + # Create comma-separated quoted values + quoted_values = ", ".join([f"'{value}'" for value in enum_values]) + enum_suffix = f" This attribute can have enum values: {quoted_values}." + enhanced_description = f"{base_description}{enum_suffix}".strip() + + attribute_metadata = { + "name": attr_def_item.get("name"), + "display_name": attr_def_item.get("displayName"), + "data_type": attr_def_item.get( + "typeName" + ), # Assuming typeName is data_type + "description": enhanced_description, + } + + # Include enum enrichment data if present + if enum_enrichment: + attribute_metadata["enumEnrichment"] = enum_enrichment + + parsed_attributes_for_metadata.append(attribute_metadata) + + metadata: Dict[str, Any] = { + "name": bm_def_name_for_prompt, + "display_name": bm_def_display_name, + "description": description_for_prompt, + "attributes": parsed_attributes_for_metadata, + } + + prompt = f"""{bm_def_display_name}|{description_for_prompt}|{attributes_str_for_prompt} + +This is a business metadata used in the data catalog to add more information to an asset""" + + return {"prompt": prompt, "metadata": metadata, "id": guid} + + +def get_custom_metadata_context() -> Dict[str, Any]: + display_name: str = "Business Metadata" + business_metadata_results: List[Dict[str, Any]] = [] + + try: + # Fetch enum definitions for enrichment + enum_endpoint: str = Settings.get_atlan_typedef_api_endpoint(param="ENUM") + enum_response: Optional[Dict[str, Any]] = Settings.make_request(enum_endpoint) + enum_lookup: Dict[str, Dict[str, Any]] = {} + if enum_response: + enum_defs = enum_response.get("enumDefs", []) + for enum_def in enum_defs: + enum_name = enum_def.get("name", "") + if enum_name: + enum_lookup[enum_name] = { + "guid": enum_def.get("guid", ""), + "description": enum_def.get("description", ""), + "values": [ + element.get("value", "") + for element in enum_def.get("elementDefs", []) + ], + "elementDefs": enum_def.get("elementDefs", []), + "version": enum_def.get("version", 1), + "createTime": enum_def.get("createTime", 0), + "updateTime": enum_def.get("updateTime", 0), + } + + # Fetch business metadata definitions + business_metadata_endpoint: str = Settings.get_atlan_typedef_api_endpoint(param="BUSINESS_METADATA") + business_metadata_response: Optional[Dict[str, Any]] = Settings.make_request(business_metadata_endpoint) + if business_metadata_response is None: + logger.error( + f"Service: Failed to make request to {business_metadata_endpoint} for {display_name}. No data returned." + ) + return [] + + business_metadata_defs: List[Dict[str, Any]] = business_metadata_response.get("businessMetadataDefs", []) + + # Enrich business metadata with enum information before processing + for business_metadata_def in business_metadata_defs: + # Enrich each business metadata definition with enum data + attribute_defs = business_metadata_def.get("attributeDefs", []) + for attribute in attribute_defs: + options = attribute.get("options", {}) + is_enum = options.get("isEnum") == "true" + + if is_enum: + enum_type = options.get("enumType", "") + if enum_type and enum_type in enum_lookup: + enum_def = enum_lookup[enum_type] + attribute["enumEnrichment"] = { + "status": "ENRICHED", + "enumType": enum_type, + "enumGuid": enum_def["guid"], + "enumDescription": enum_def["description"], + "enumVersion": enum_def["version"], + "values": enum_def["values"], + "elementDefs": enum_def["elementDefs"], + "enrichedTimestamp": None, + } + + # Process the enriched business metadata + business_metadata_results.append(process_business_metadata(business_metadata_def)) + + except Exception as e: + logger.error( + f"Service: Error fetching or processing {display_name}: {e}", + exc_info=True, + ) + return [] + + logger.info( + f"Fetched {len(business_metadata_results)} {display_name} definitions with enum enrichment." + ) + return business_metadata_results \ No newline at end of file From 99b8d55ee24b450abdbdb0543f475509a0506667 Mon Sep 17 00:00:00 2001 From: Satabrata Paul Date: Wed, 20 Aug 2025 14:36:22 +0530 Subject: [PATCH 05/28] add: custom_metadata_context tool registration --- modelcontextprotocol/server.py | 55 ++++++++++++++++++++++++++++++++++ 1 file changed, 55 insertions(+) diff --git a/modelcontextprotocol/server.py b/modelcontextprotocol/server.py index 080e9b9..a2dea84 100644 --- a/modelcontextprotocol/server.py +++ b/modelcontextprotocol/server.py @@ -7,6 +7,7 @@ get_assets_by_dsl, traverse_lineage, update_assets, + get_custom_metadata_context, create_glossary_category_assets, create_glossary_assets, create_glossary_term_assets, @@ -678,6 +679,60 @@ def create_glossary_categories(categories) -> List[Dict[str, Any]]: return create_glossary_category_assets(categories) +@mcp.tool() +def custom_metadata_context() -> List[Dict[str, Any]]: + """ + Get custom metadata context for business metadata definitions in Atlan. + + This tool provides comprehensive information about all business metadata definitions + available in the Atlan tenant, including their attributes, descriptions, and + enum values. This context is essential when users refer to custom metadata in + their queries, as it helps the LLM understand the structure and available options + for business metadata. + + Returns: + List[Dict[str, Any]]: List of business metadata definitions, each containing: + - prompt: Formatted string with metadata information for LLM context + - metadata: Detailed metadata structure including: + - name: Internal name of the business metadata + - display_name: Human-readable display name + - description: Description of the business metadata + - attributes: List of attribute definitions with: + - name: Attribute internal name + - display_name: Attribute display name + - data_type: Data type of the attribute + - description: Attribute description (enhanced with enum values if applicable) + - enumEnrichment: Enum information if the attribute is an enum type + - id: GUID of the business metadata definition + + Examples: + # Get all custom metadata context + context = get_custom_metadata_context_tool() + + # The returned data helps understand available business metadata like: + # - Data Quality metadata with attributes like "Accuracy Score", "Completeness" + # - Data Classification with attributes like "Sensitivity Level", "Data Category" + # - Business Context with attributes like "Business Owner", "Data Steward" + + # Each attribute may have enum values, for example: + # Sensitivity Level: 'Public', 'Internal', 'Confidential', 'Restricted' + # Data Category: 'PII', 'Financial', 'Operational', 'Marketing' + + Use Cases: + - Understanding available business metadata when users ask about custom metadata + - Providing context for business metadata attributes and their possible values + - Helping users understand what business metadata can be applied to assets + - Supporting queries about data classification, quality metrics, and business context + """ + try: + return get_custom_metadata_context() + except Exception as e: + return { + "error": f"Failed to fetch custom metadata context: {str(e)}", + "context": [] + } + + def main(): mcp.run() From 8652d58c19964607d8993f22de763fca54e92c57 Mon Sep 17 00:00:00 2001 From: Satabrata Paul Date: Tue, 26 Aug 2025 17:18:09 +0530 Subject: [PATCH 06/28] remove: get_custom_metadata_context tool --- modelcontextprotocol/tools/__init__.py | 4 +- .../tools/custom_metadata_context.py | 162 ------------------ 2 files changed, 2 insertions(+), 164 deletions(-) delete mode 100644 modelcontextprotocol/tools/custom_metadata_context.py diff --git a/modelcontextprotocol/tools/__init__.py b/modelcontextprotocol/tools/__init__.py index 81dbf5a..7cf6ae3 100644 --- a/modelcontextprotocol/tools/__init__.py +++ b/modelcontextprotocol/tools/__init__.py @@ -2,7 +2,7 @@ from .dsl import get_assets_by_dsl from .lineage import traverse_lineage from .assets import update_assets -from .custom_metadata_context import get_custom_metadata_context +from .custom_metadata_detector import detect_custom_metadata_trigger from .glossary import ( create_glossary_category_assets, create_glossary_assets, @@ -22,7 +22,7 @@ "get_assets_by_dsl", "traverse_lineage", "update_assets", - "get_custom_metadata_context", + "detect_custom_metadata_trigger", "create_glossary_category_assets", "create_glossary_assets", "create_glossary_term_assets", diff --git a/modelcontextprotocol/tools/custom_metadata_context.py b/modelcontextprotocol/tools/custom_metadata_context.py deleted file mode 100644 index 9fb8ee4..0000000 --- a/modelcontextprotocol/tools/custom_metadata_context.py +++ /dev/null @@ -1,162 +0,0 @@ -import logging -from typing import Any, Dict, List, Optional -from settings import Settings - -logger = logging.getLogger(__name__) - - -def process_business_metadata( - bm_def: Dict[str, Any], -) -> Dict[str, Any]: - """ - Generates context prompt for a given Atlan business metadata definition. - - Args: - bm_def: A dictionary representing the business metadata definition. - Expected keys: 'displayName', 'description', 'attributeDefs'. - - Returns: - A list containing a single string: the formatted semantic search prompt, - and a list containing the metadata dictionary. - """ - bm_def_name_for_prompt = bm_def.get("name", "N/A") - bm_def_display_name = bm_def.get("displayName", "N/A") - description_for_prompt = bm_def.get("description", "No description available.") - - attribute_defs = bm_def.get("attributeDefs", []) - guid = bm_def.get("guid") - - # For prompt: comma separated attribute names and descriptions - attributes_list_for_prompt: List[str] = [] - if attribute_defs: - for attr in attribute_defs: - attr_name = attr.get("displayName", attr.get("name", "Unnamed attribute")) - attr_desc = attr.get( - "description", "No description" - ) # As per schema: names and descriptions - attributes_list_for_prompt.append(f"{str(attr_name)}:{str(attr_desc)}") - attributes_str_for_prompt = ( - ", ".join(attributes_list_for_prompt) if attributes_list_for_prompt else "None" - ) - - # For metadata: list of attribute objects - parsed_attributes_for_metadata: List[Dict[str, Any]] = [] - if attribute_defs: - for attr_def_item in attribute_defs: - base_description = attr_def_item.get("description", "") - - # Check for enum enrichment and enhance description - enum_enrichment = attr_def_item.get("enumEnrichment") - enhanced_description = base_description - if enum_enrichment and enum_enrichment.get("values"): - enum_values = enum_enrichment["values"] - if enum_values: - # Create comma-separated quoted values - quoted_values = ", ".join([f"'{value}'" for value in enum_values]) - enum_suffix = f" This attribute can have enum values: {quoted_values}." - enhanced_description = f"{base_description}{enum_suffix}".strip() - - attribute_metadata = { - "name": attr_def_item.get("name"), - "display_name": attr_def_item.get("displayName"), - "data_type": attr_def_item.get( - "typeName" - ), # Assuming typeName is data_type - "description": enhanced_description, - } - - # Include enum enrichment data if present - if enum_enrichment: - attribute_metadata["enumEnrichment"] = enum_enrichment - - parsed_attributes_for_metadata.append(attribute_metadata) - - metadata: Dict[str, Any] = { - "name": bm_def_name_for_prompt, - "display_name": bm_def_display_name, - "description": description_for_prompt, - "attributes": parsed_attributes_for_metadata, - } - - prompt = f"""{bm_def_display_name}|{description_for_prompt}|{attributes_str_for_prompt} - -This is a business metadata used in the data catalog to add more information to an asset""" - - return {"prompt": prompt, "metadata": metadata, "id": guid} - - -def get_custom_metadata_context() -> Dict[str, Any]: - display_name: str = "Business Metadata" - business_metadata_results: List[Dict[str, Any]] = [] - - try: - # Fetch enum definitions for enrichment - enum_endpoint: str = Settings.get_atlan_typedef_api_endpoint(param="ENUM") - enum_response: Optional[Dict[str, Any]] = Settings.make_request(enum_endpoint) - enum_lookup: Dict[str, Dict[str, Any]] = {} - if enum_response: - enum_defs = enum_response.get("enumDefs", []) - for enum_def in enum_defs: - enum_name = enum_def.get("name", "") - if enum_name: - enum_lookup[enum_name] = { - "guid": enum_def.get("guid", ""), - "description": enum_def.get("description", ""), - "values": [ - element.get("value", "") - for element in enum_def.get("elementDefs", []) - ], - "elementDefs": enum_def.get("elementDefs", []), - "version": enum_def.get("version", 1), - "createTime": enum_def.get("createTime", 0), - "updateTime": enum_def.get("updateTime", 0), - } - - # Fetch business metadata definitions - business_metadata_endpoint: str = Settings.get_atlan_typedef_api_endpoint(param="BUSINESS_METADATA") - business_metadata_response: Optional[Dict[str, Any]] = Settings.make_request(business_metadata_endpoint) - if business_metadata_response is None: - logger.error( - f"Service: Failed to make request to {business_metadata_endpoint} for {display_name}. No data returned." - ) - return [] - - business_metadata_defs: List[Dict[str, Any]] = business_metadata_response.get("businessMetadataDefs", []) - - # Enrich business metadata with enum information before processing - for business_metadata_def in business_metadata_defs: - # Enrich each business metadata definition with enum data - attribute_defs = business_metadata_def.get("attributeDefs", []) - for attribute in attribute_defs: - options = attribute.get("options", {}) - is_enum = options.get("isEnum") == "true" - - if is_enum: - enum_type = options.get("enumType", "") - if enum_type and enum_type in enum_lookup: - enum_def = enum_lookup[enum_type] - attribute["enumEnrichment"] = { - "status": "ENRICHED", - "enumType": enum_type, - "enumGuid": enum_def["guid"], - "enumDescription": enum_def["description"], - "enumVersion": enum_def["version"], - "values": enum_def["values"], - "elementDefs": enum_def["elementDefs"], - "enrichedTimestamp": None, - } - - # Process the enriched business metadata - business_metadata_results.append(process_business_metadata(business_metadata_def)) - - except Exception as e: - logger.error( - f"Service: Error fetching or processing {display_name}: {e}", - exc_info=True, - ) - return [] - - logger.info( - f"Fetched {len(business_metadata_results)} {display_name} definitions with enum enrichment." - ) - return business_metadata_results \ No newline at end of file From bf9ccb985c20e76a1caeccc0b5570d8959d6a8f6 Mon Sep 17 00:00:00 2001 From: Satabrata Paul Date: Tue, 26 Aug 2025 17:19:20 +0530 Subject: [PATCH 07/28] add: cache manager for perisisting custom metadata context between multiple MCP calls --- modelcontextprotocol/utils/cache_manager.py | 158 ++++++++++++++++++++ 1 file changed, 158 insertions(+) create mode 100644 modelcontextprotocol/utils/cache_manager.py diff --git a/modelcontextprotocol/utils/cache_manager.py b/modelcontextprotocol/utils/cache_manager.py new file mode 100644 index 0000000..451be82 --- /dev/null +++ b/modelcontextprotocol/utils/cache_manager.py @@ -0,0 +1,158 @@ +""" +Advanced cache management using singleton pattern for persistent caching across MCP tool calls. + +This module provides a thread-safe singleton cache manager that can be used as an alternative +to the module-level caching approach. It offers more control and flexibility for cache management. +""" + +import logging +import threading +import time +from typing import Any, Dict, List, Optional, TypeVar, Generic, Callable + +logger = logging.getLogger(__name__) + +T = TypeVar('T') + + +class SingletonCacheManager(Generic[T]): + """ + Thread-safe singleton cache manager for persistent data caching across MCP tool calls. + + This class ensures that cache data persists across multiple tool calls within the same + MCP server process. + """ + + # Private class variables to store instances and lock + _instances: Dict[str, 'SingletonCacheManager'] = {} + _lock = threading.Lock() + + # Create or return existing singleton instance for the given cache name + def __new__(cls, cache_name: str, ttl_seconds: float = 300.0): + """ + Create or return existing singleton instance for the given cache name. + + Args: + cache_name: Unique identifier for this cache instance + ttl_seconds: Time-to-live for cached data in seconds + """ + # Lock to ensure thread-safe access to instances + with cls._lock: + if cache_name not in cls._instances: + instance = super().__new__(cls) + instance._initialized = False + cls._instances[cache_name] = instance + return cls._instances[cache_name] + + def __init__(self, cache_name: str, ttl_seconds: float = 300.0): + """ + Initialize the cache manager (only runs once per cache_name). + + Args: + cache_name: Unique identifier for this cache instance + ttl_seconds: Time-to-live for cached data in seconds + """ + if self._initialized: + return + + self.cache_name = cache_name + self.ttl_seconds = ttl_seconds + self._cache_data: Optional[T] = None + self._cache_timestamp: Optional[float] = None + self._data_lock = threading.Lock() + self._initialized = True + + logger.debug(f"Initialized singleton cache manager: {cache_name} (TTL: {ttl_seconds}s)") + + def get_or_fetch(self, fetch_function: Callable[[], T]) -> T: + """ + Get cached data or fetch fresh data if cache is invalid. + + Args: + fetch_function: Function to call to fetch fresh data when cache is invalid + + Returns: + Cached or freshly fetched data + """ + with self._data_lock: + current_time = time.time() + + # Check if cache is valid + if self._is_cache_valid(current_time): + logger.debug(f"Using cached data from {self.cache_name}") + return self._cache_data + + # Fetch fresh data + logger.debug(f"Fetching fresh data for {self.cache_name}") + try: + fresh_data = fetch_function() + self._cache_data = fresh_data + self._cache_timestamp = current_time + logger.info(f"Updated cache {self.cache_name} with fresh data") + return fresh_data + except Exception as e: + logger.error(f"Failed to fetch fresh data for {self.cache_name}: {e}") + # Return stale cache if available + if self._cache_data is not None: + logger.warning(f"Returning stale cached data for {self.cache_name}") + return self._cache_data + raise + + def _is_cache_valid(self, current_time: float) -> bool: + """Check if the current cache is valid based on TTL.""" + return (self._cache_data is not None and + self._cache_timestamp is not None and + (current_time - self._cache_timestamp) < self.ttl_seconds) + + def invalidate(self) -> None: + """Clear the cached data, forcing a fresh fetch on next access.""" + with self._data_lock: + self._cache_data = None + self._cache_timestamp = None + logger.info(f"Invalidated cache: {self.cache_name}") + + def get_cache_info(self) -> Dict[str, Any]: + """ + Get information about the current cache state. + + Returns: + Dict containing cache metadata + """ + with self._data_lock: + current_time = time.time() + cache_size = len(self._cache_data) if isinstance(self._cache_data, (list, dict, str)) else 1 if self._cache_data else 0 + + return { + "cache_name": self.cache_name, + "cache_size": cache_size, + "cache_timestamp": self._cache_timestamp, + "current_time": current_time, + "ttl_seconds": self.ttl_seconds, + "is_valid": self._is_cache_valid(current_time), + "age_seconds": current_time - self._cache_timestamp if self._cache_timestamp else None + } + + @classmethod + def get_all_cache_info(cls) -> Dict[str, Dict[str, Any]]: + """Get information about all active cache instances.""" + with cls._lock: + return {name: instance.get_cache_info() for name, instance in cls._instances.items()} + + @classmethod + def invalidate_all(cls) -> None: + """Invalidate all cache instances.""" + with cls._lock: + for instance in cls._instances.values(): + instance.invalidate() + logger.info("Invalidated all cache instances") + + +# Convenience function for custom metadata caching +def get_custom_metadata_cache_manager() -> SingletonCacheManager[List[Dict[str, Any]]]: + """ + Get the singleton cache manager for custom metadata context. + + Returns: + Singleton cache manager instance for custom metadata + """ + return SingletonCacheManager[List[Dict[str, Any]]]("custom_metadata_context", ttl_seconds=900.0) From 8f0eaf47b18a01ddc19cbcc58eb4a23b9c3d17c2 Mon Sep 17 00:00:00 2001 From: Satabrata Paul Date: Tue, 26 Aug 2025 17:20:02 +0530 Subject: [PATCH 08/28] add: utility function to fetch all custom metadata context from a tenant --- .../utils/custom_metadata_context.py | 165 ++++++++++++++++++ 1 file changed, 165 insertions(+) create mode 100644 modelcontextprotocol/utils/custom_metadata_context.py diff --git a/modelcontextprotocol/utils/custom_metadata_context.py b/modelcontextprotocol/utils/custom_metadata_context.py new file mode 100644 index 0000000..70738e7 --- /dev/null +++ b/modelcontextprotocol/utils/custom_metadata_context.py @@ -0,0 +1,165 @@ +import logging +from typing import Any, Dict, List, Optional +from settings import Settings + +logger = logging.getLogger(__name__) + + +def process_business_metadata( + bm_def: Dict[str, Any], +) -> Dict[str, Any]: + """ + Generates context prompt for a given Atlan business metadata definition. + + Args: + bm_def: A dictionary representing the business metadata definition. + Expected keys: 'displayName', 'description', 'attributeDefs'. + + Returns: + A list containing a single string: the formatted semantic search prompt, + and a list containing the metadata dictionary. + """ + bm_def_name_for_prompt = bm_def.get("name", "N/A") + bm_def_display_name = bm_def.get("displayName", "N/A") + description_for_prompt = bm_def.get("description", "No description available.") + + attribute_defs = bm_def.get("attributeDefs", []) + guid = bm_def.get("guid") + + # For prompt: comma separated attribute names and descriptions + attributes_list_for_prompt: List[str] = [] + if attribute_defs: + for attr in attribute_defs: + attr_name = attr.get("displayName", attr.get("name", "Unnamed attribute")) + attr_desc = attr.get( + "description", "No description" + ) # As per schema: names and descriptions + attributes_list_for_prompt.append(f"{str(attr_name)}:{str(attr_desc)}") + attributes_str_for_prompt = ( + ", ".join(attributes_list_for_prompt) if attributes_list_for_prompt else "None" + ) + + # For metadata: list of attribute objects + parsed_attributes_for_metadata: List[Dict[str, Any]] = [] + if attribute_defs: + for attr_def_item in attribute_defs: + base_description = attr_def_item.get("description", "") + + # Check for enum enrichment and enhance description + enum_enrichment = attr_def_item.get("enumEnrichment") + enhanced_description = base_description + if enum_enrichment and enum_enrichment.get("values"): + enum_values = enum_enrichment["values"] + if enum_values: + # Create comma-separated quoted values + quoted_values = ", ".join([f"'{value}'" for value in enum_values]) + enum_suffix = f" This attribute can have enum values: {quoted_values}." + enhanced_description = f"{base_description}{enum_suffix}".strip() + + attribute_metadata = { + "name": attr_def_item.get("name"), + "display_name": attr_def_item.get("displayName"), + "data_type": attr_def_item.get( + "typeName" + ), # Assuming typeName is data_type + "description": enhanced_description, + } + + # Include enum enrichment data if present + if enum_enrichment: + attribute_metadata["enumEnrichment"] = enum_enrichment + + parsed_attributes_for_metadata.append(attribute_metadata) + + metadata: Dict[str, Any] = { + "name": bm_def_name_for_prompt, + "display_name": bm_def_display_name, + "description": description_for_prompt, + "attributes": parsed_attributes_for_metadata, + } + + prompt = f"""{bm_def_display_name}|{description_for_prompt}|{attributes_str_for_prompt} + +This is a business metadata used in the data catalog to add more information to an asset""" + + return {"prompt": prompt, "metadata": metadata, "id": guid} + + +def get_custom_metadata_context() -> Dict[str, Any]: + display_name: str = "Business Metadata" + business_metadata_results: List[Dict[str, Any]] = [] + + try: + # Fetch enum definitions for enrichment + enum_endpoint: str = Settings.get_atlan_typedef_api_endpoint(param="ENUM") + enum_response: Optional[Dict[str, Any]] = Settings.make_request(enum_endpoint) + enum_lookup: Dict[str, Dict[str, Any]] = {} + if enum_response: + enum_defs = enum_response.get("enumDefs", []) + for enum_def in enum_defs: + enum_name = enum_def.get("name", "") + if enum_name: + enum_lookup[enum_name] = { + "guid": enum_def.get("guid", ""), + "description": enum_def.get("description", ""), + "values": [ + element.get("value", "") + for element in enum_def.get("elementDefs", []) + ], + "elementDefs": enum_def.get("elementDefs", []), + "version": enum_def.get("version", 1), + "createTime": enum_def.get("createTime", 0), + "updateTime": enum_def.get("updateTime", 0), + } + + # Fetch business metadata definitions + business_metadata_endpoint: str = Settings.get_atlan_typedef_api_endpoint(param="BUSINESS_METADATA") + business_metadata_response: Optional[Dict[str, Any]] = Settings.make_request(business_metadata_endpoint) + if business_metadata_response is None: + logger.error( + f"Service: Failed to make request to {business_metadata_endpoint} for {display_name}. No data returned." + ) + return [] + + business_metadata_defs: List[Dict[str, Any]] = business_metadata_response.get("businessMetadataDefs", []) + + # Enrich business metadata with enum information before processing + for business_metadata_def in business_metadata_defs: + # Enrich each business metadata definition with enum data + attribute_defs = business_metadata_def.get("attributeDefs", []) + for attribute in attribute_defs: + options = attribute.get("options", {}) + is_enum = options.get("isEnum") == "true" + + if is_enum: + enum_type = options.get("enumType", "") + if enum_type and enum_type in enum_lookup: + enum_def = enum_lookup[enum_type] + attribute["enumEnrichment"] = { + "status": "ENRICHED", + "enumType": enum_type, + "enumGuid": enum_def["guid"], + "enumDescription": enum_def["description"], + "enumVersion": enum_def["version"], + "values": enum_def["values"], + "elementDefs": enum_def["elementDefs"], + "enrichedTimestamp": None, + } + + # Process the enriched business metadata + business_metadata_results.append(process_business_metadata(business_metadata_def)) + + except Exception as e: + logger.error( + f"Service: Error fetching or processing {display_name}: {e}", + exc_info=True, + ) + return [] + + logger.info( + f"Fetched {len(business_metadata_results)} {display_name} definitions with enum enrichment." + ) + return business_metadata_results + +if __name__ == "__main__": + print(get_custom_metadata_context()) \ No newline at end of file From 7eb389fec4a7e8143b1cfdde3d9b4786e0fb68b5 Mon Sep 17 00:00:00 2001 From: Satabrata Paul Date: Tue, 26 Aug 2025 17:21:11 +0530 Subject: [PATCH 09/28] add: detect_custom_metadata_trigger tool --- .../tools/custom_metadata_detector.py | 60 +++++++ .../utils/custom_metadata_detector.py | 169 ++++++++++++++++++ 2 files changed, 229 insertions(+) create mode 100644 modelcontextprotocol/tools/custom_metadata_detector.py create mode 100644 modelcontextprotocol/utils/custom_metadata_detector.py diff --git a/modelcontextprotocol/tools/custom_metadata_detector.py b/modelcontextprotocol/tools/custom_metadata_detector.py new file mode 100644 index 0000000..21a8903 --- /dev/null +++ b/modelcontextprotocol/tools/custom_metadata_detector.py @@ -0,0 +1,60 @@ +import logging +from typing import Any, Dict +from utils.custom_metadata_detector import detect_custom_metadata_with_singleton + +logger = logging.getLogger(__name__) + + +def detect_custom_metadata_trigger(query_text: str) -> Dict[str, Any]: + """ + Detect custom metadata triggers from natural language queries. + + This function analyzes natural language text to identify when users are referencing + custom metadata (business metadata) and automatically provides context about + available custom metadata definitions. + + Args: + query_text (str): Natural language query text to analyze for custom metadata references + + Returns: + Dict[str, Any]: Dictionary containing: + - detected: Boolean indicating if custom metadata was detected + - context: Custom metadata context if detected (list of metadata definitions) + - detection_reasons: List of reasons why custom metadata was detected + - suggested_attributes: List of suggested custom metadata attributes + + Examples: + # Query mentioning data classification + result = detect_custom_metadata_trigger("Find all tables with sensitive data classification") + + # Query about data quality + result = detect_custom_metadata_trigger("Show me assets with poor data quality scores") + + # Query about business ownership + result = detect_custom_metadata_trigger("Which datasets have John as the business owner?") + + # Query about compliance + result = detect_custom_metadata_trigger("Find all PII data that needs GDPR compliance review") + """ + logger.info(f"Detecting custom metadata triggers in query: {query_text[:100]}...") + + try: + result = detect_custom_metadata_with_singleton(query_text) + + if result["detected"]: + logger.info(f"Custom metadata detected with reasons: {result['detection_reasons']}") + context_count = len(result.get("context", [])) + logger.info(f"Provided {context_count} custom metadata definitions for context enrichment") + else: + logger.debug("No custom metadata triggers detected in the query") + + return result + + except Exception as e: + logger.error(f"Error detecting custom metadata triggers: {str(e)}") + return { + "detected": False, + "context": None, + "detection_reasons": [], + "error": str(e) + } diff --git a/modelcontextprotocol/utils/custom_metadata_detector.py b/modelcontextprotocol/utils/custom_metadata_detector.py new file mode 100644 index 0000000..3070453 --- /dev/null +++ b/modelcontextprotocol/utils/custom_metadata_detector.py @@ -0,0 +1,169 @@ +""" +This demonstrates how to use the SingletonCacheManager for persistent caching +across MCP tool calls as an alternative to module-level caching. +""" + +import logging +from typing import Any, Dict, List +from utils.custom_metadata_context import get_custom_metadata_context +from utils.cache_manager import get_custom_metadata_cache_manager + +logger = logging.getLogger(__name__) + + +class CustomMetadataDetectorWithSingleton: + """ + Custom metadata detector using singleton cache manager for persistent caching. + + This version uses the SingletonCacheManager to maintain cache across multiple + tool calls, providing better control and thread safety compared to module-level caching. + """ + + # Common keywords that indicate custom metadata usage + CUSTOM_METADATA_KEYWORDS = { + 'business metadata', 'custom metadata', 'custom metadata filters','business attributes', + 'data classification', 'data quality', + 'business context', 'metadata attributes', 'business properties', + 'custom attributes', 'business tags', 'data governance' + } + + def __init__(self): + """Initialize the custom metadata detector with singleton cache manager.""" + self.cache_manager = get_custom_metadata_cache_manager() + + def detect_from_natural_language( + self, + query_text: str + ) -> Dict[str, Any]: + """ + Detect if a natural language query involves custom metadata and provide the appropriate context. + + Args: + query_text: Natural language query text to analyze + + Returns: + Dict containing: + - detected: Boolean indicating if custom metadata was detected + - context: Custom metadata context if detected + - detection_reasons: List of reasons why custom metadata was detected + - suggested_attributes: List of suggested custom metadata attributes + """ + logger.debug(f"Starting custom metadata detection analysis for query: {query_text[:100]}...") + + detection_reasons: List[str] = [] + + if not query_text or not query_text.strip(): + return { + "detected": False, + "detection_reasons": [], + "context": None + } + + # Check query text for custom metadata keywords + detected_keywords = self._detect_keywords_in_text(query_text) + if detected_keywords: + detection_reasons.append(f"Custom metadata keywords detected: {', '.join(detected_keywords)}") + + # Check for data governance and quality terms + governance_terms = self._detect_governance_terms(query_text) + if governance_terms: + detection_reasons.append(f"Data governance terms detected: {', '.join(governance_terms)}") + + # Determine if custom metadata was detected + detected = len(detection_reasons) > 0 + + result = { + "detected": detected, + "detection_reasons": detection_reasons, + "context": None + } + + # If custom metadata was detected, fetch and provide context using custom metadata cache + if detected: + logger.info(f"Custom metadata detected. Reasons: {detection_reasons}") + try: + # Use singleton cache manager to get context with persistent caching + context = self.cache_manager.get_or_fetch(get_custom_metadata_context) + result["context"] = context + logger.info(f"Provided custom metadata context with {len(context)} definitions using singleton cache") + except Exception as e: + logger.error(f"Failed to fetch custom metadata context: {e}") + result["context"] = [] + else: + logger.debug("No custom metadata usage detected") + + return result + + def _detect_keywords_in_text(self, text: str) -> List[str]: + """ + Detect custom metadata keywords in text. + + Args: + text: Text to analyze + + Returns: + List of detected keywords + """ + if not text: + return [] + + text_lower = text.lower() + detected = [] + + for keyword in self.CUSTOM_METADATA_KEYWORDS: + if keyword in text_lower: + detected.append(keyword) + + return detected + + def _detect_governance_terms(self, text: str) -> List[str]: + """ + Detect data governance and quality terms in text. + + Args: + text: Text to analyze + + Returns: + List of detected governance terms + """ + if not text: + return [] + + text_lower = text.lower() + detected = [] + + governance_terms = [ + 'pii', 'personally identifiable information', 'gdpr', 'compliance', + 'data lineage', 'data quality', 'data governance', 'data catalog', + 'master data', 'reference data', 'critical data', 'sensitive data', + 'public data', 'internal data', 'confidential data', 'restricted data', + 'data retention', 'data lifecycle', 'data archival', 'data purging' + ] + + for term in governance_terms: + if term in text_lower: + detected.append(term) + + return detected + + def get_cache_info(self) -> Dict[str, Any]: + """Get information about the singleton cache state.""" + return self.cache_manager.get_cache_info() + + def invalidate_cache(self) -> None: + """Invalidate the singleton cache, forcing fresh data on next request.""" + self.cache_manager.invalidate() + + +def detect_custom_metadata_with_singleton(query_text: str) -> Dict[str, Any]: + """ + Convenience function using singleton cache manager approach. + + Args: + query_text: Natural language query text to analyze + + Returns: + Dict containing detection results and context + """ + detector = CustomMetadataDetectorWithSingleton() + return detector.detect_from_natural_language(query_text=query_text) From d73074a3267ecef156c1562676a6028029df8ace Mon Sep 17 00:00:00 2001 From: Satabrata Paul Date: Tue, 26 Aug 2025 17:24:45 +0530 Subject: [PATCH 10/28] add: registration of detect_custom_metadata_from_query mcp tool --- modelcontextprotocol/server.py | 82 ++++++++++++++++++---------------- 1 file changed, 43 insertions(+), 39 deletions(-) diff --git a/modelcontextprotocol/server.py b/modelcontextprotocol/server.py index a2dea84..277c3e5 100644 --- a/modelcontextprotocol/server.py +++ b/modelcontextprotocol/server.py @@ -7,7 +7,7 @@ get_assets_by_dsl, traverse_lineage, update_assets, - get_custom_metadata_context, + detect_custom_metadata_trigger, create_glossary_category_assets, create_glossary_assets, create_glossary_term_assets, @@ -680,56 +680,60 @@ def create_glossary_categories(categories) -> List[Dict[str, Any]]: @mcp.tool() -def custom_metadata_context() -> List[Dict[str, Any]]: +def detect_custom_metadata_from_query(query_text: str) -> Dict[str, Any]: """ - Get custom metadata context for business metadata definitions in Atlan. + Detect custom metadata triggers from natural language queries. - This tool provides comprehensive information about all business metadata definitions - available in the Atlan tenant, including their attributes, descriptions, and - enum values. This context is essential when users refer to custom metadata in - their queries, as it helps the LLM understand the structure and available options - for business metadata. + This tool analyzes natural language text to identify when users are referencing + custom metadata (business metadata) and automatically provides context about + available custom metadata definitions. Use this tool when you receive natural + language queries that might involve custom metadata concepts. + Args: + query_text (str): Natural language query text to analyze for custom metadata references + Returns: - List[Dict[str, Any]]: List of business metadata definitions, each containing: - - prompt: Formatted string with metadata information for LLM context - - metadata: Detailed metadata structure including: - - name: Internal name of the business metadata - - display_name: Human-readable display name - - description: Description of the business metadata - - attributes: List of attribute definitions with: - - name: Attribute internal name - - display_name: Attribute display name - - data_type: Data type of the attribute - - description: Attribute description (enhanced with enum values if applicable) - - enumEnrichment: Enum information if the attribute is an enum type - - id: GUID of the business metadata definition - + Dict[str, Any]: Dictionary containing: + - detected: Boolean indicating if custom metadata was detected + - context: Custom metadata context if detected (list of metadata definitions) + - detection_reasons: List of reasons why custom metadata was detected + - suggested_attributes: List of suggested custom metadata attributes + + Detection Triggers: + The tool detects custom metadata usage when the query contains: + - Business metadata keywords (e.g., "business metadata", "data classification") + - Data governance terms (e.g., "PII", "GDPR", "compliance", "data quality") + - Attribute patterns (e.g., "sensitivity level", "business owner", "data steward") + - Quality and classification terms (e.g., "quality score", "classification level") + Examples: - # Get all custom metadata context - context = get_custom_metadata_context_tool() + # Query mentioning data classification + result = detect_custom_metadata_from_query("Find all tables with sensitive data classification") - # The returned data helps understand available business metadata like: - # - Data Quality metadata with attributes like "Accuracy Score", "Completeness" - # - Data Classification with attributes like "Sensitivity Level", "Data Category" - # - Business Context with attributes like "Business Owner", "Data Steward" + # Query about data quality + result = detect_custom_metadata_from_query("Show me assets with poor data quality scores") + + # Query about business ownership + result = detect_custom_metadata_from_query("Which datasets have John as the business owner?") + + # Query about compliance + result = detect_custom_metadata_from_query("Find all PII data that needs GDPR compliance review") - # Each attribute may have enum values, for example: - # Sensitivity Level: 'Public', 'Internal', 'Confidential', 'Restricted' - # Data Category: 'PII', 'Financial', 'Operational', 'Marketing' - Use Cases: - - Understanding available business metadata when users ask about custom metadata - - Providing context for business metadata attributes and their possible values - - Helping users understand what business metadata can be applied to assets - - Supporting queries about data classification, quality metrics, and business context + - Analyze user queries before executing searches to provide custom metadata context + - Understand when users are asking about business metadata attributes + - Provide enriched context about available custom metadata definitions + - Help users discover relevant custom metadata attributes for their queries """ try: - return get_custom_metadata_context() + return detect_custom_metadata_trigger(query_text) except Exception as e: return { - "error": f"Failed to fetch custom metadata context: {str(e)}", - "context": [] + "detected": False, + "context": None, + "detection_reasons": [], + "suggested_attributes": [], + "error": f"Failed to detect custom metadata: {str(e)}" } From 8bfe222573a475fbca5a5694cd8e0d1d94aa848d Mon Sep 17 00:00:00 2001 From: Satabrata Paul Date: Tue, 26 Aug 2025 17:32:43 +0530 Subject: [PATCH 11/28] add: pre-commit fixes and update pre-commit versions --- .pre-commit-config.yaml | 6 +- modelcontextprotocol/settings.py | 12 +- .../tools/custom_metadata_detector.py | 36 ++--- modelcontextprotocol/utils/cache_manager.py | 79 +++++++---- .../utils/custom_metadata_context.py | 31 ++-- .../utils/custom_metadata_detector.py | 132 +++++++++++------- 6 files changed, 176 insertions(+), 120 deletions(-) diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 507b8c9..95c41a4 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -1,6 +1,6 @@ repos: - repo: https://github.com/pre-commit/pre-commit-hooks - rev: v4.5.0 + rev: v6.0.0 hooks: - id: trailing-whitespace - id: end-of-file-fixer @@ -12,14 +12,14 @@ repos: - id: detect-private-key - repo: https://github.com/alessandrojcm/commitlint-pre-commit-hook - rev: v9.11.0 + rev: v9.22.0 hooks: - id: commitlint stages: [commit-msg] additional_dependencies: ['@commitlint/config-conventional'] - repo: https://github.com/astral-sh/ruff-pre-commit - rev: v0.3.0 + rev: v0.12.10 hooks: - id: ruff args: [--fix, --exit-non-zero-on-fix] diff --git a/modelcontextprotocol/settings.py b/modelcontextprotocol/settings.py index fb081e7..e54d4a0 100644 --- a/modelcontextprotocol/settings.py +++ b/modelcontextprotocol/settings.py @@ -1,4 +1,5 @@ """Configuration settings for the application.""" + import requests from typing import Any, Dict, Optional from urllib.parse import urlencode @@ -27,7 +28,7 @@ def headers(self) -> dict: "X-Atlan-Agent-Id": self.ATLAN_AGENT_ID, "X-Atlan-Client-Origin": self.ATLAN_AGENT, } - + @staticmethod def build_api_url(path: str, query_params: Optional[Dict[str, Any]] = None) -> str: current_settings = Settings() @@ -58,7 +59,7 @@ def build_api_url(path: str, query_params: Optional[Dict[str, Any]] = None) -> s query_string = urlencode(active_query_params) return f"{full_path}?{query_string}" return full_path - + @staticmethod def get_atlan_typedef_api_endpoint(param: str) -> str: current_settings = Settings() @@ -68,9 +69,10 @@ def get_atlan_typedef_api_endpoint(param: str) -> str: ) return Settings.build_api_url( - path=current_settings.ATLAN_TYPEDEF_API_ENDPOINT, query_params={"type": param} + path=current_settings.ATLAN_TYPEDEF_API_ENDPOINT, + query_params={"type": param}, ) - + @staticmethod def make_request(url: str) -> Optional[Dict[str, Any]]: current_settings = Settings() @@ -90,8 +92,6 @@ def make_request(url: str) -> Optional[Dict[str, Any]]: return response.json() except Exception as e: raise Exception(f"Failed to make request to {url}: {e}") - - class Config: env_file = ".env" diff --git a/modelcontextprotocol/tools/custom_metadata_detector.py b/modelcontextprotocol/tools/custom_metadata_detector.py index 21a8903..2fcc1bf 100644 --- a/modelcontextprotocol/tools/custom_metadata_detector.py +++ b/modelcontextprotocol/tools/custom_metadata_detector.py @@ -8,53 +8,57 @@ def detect_custom_metadata_trigger(query_text: str) -> Dict[str, Any]: """ Detect custom metadata triggers from natural language queries. - - This function analyzes natural language text to identify when users are referencing - custom metadata (business metadata) and automatically provides context about + + This function analyzes natural language text to identify when users are referencing + custom metadata (business metadata) and automatically provides context about available custom metadata definitions. - + Args: query_text (str): Natural language query text to analyze for custom metadata references - + Returns: Dict[str, Any]: Dictionary containing: - detected: Boolean indicating if custom metadata was detected - context: Custom metadata context if detected (list of metadata definitions) - detection_reasons: List of reasons why custom metadata was detected - suggested_attributes: List of suggested custom metadata attributes - + Examples: # Query mentioning data classification result = detect_custom_metadata_trigger("Find all tables with sensitive data classification") - + # Query about data quality result = detect_custom_metadata_trigger("Show me assets with poor data quality scores") - + # Query about business ownership result = detect_custom_metadata_trigger("Which datasets have John as the business owner?") - + # Query about compliance result = detect_custom_metadata_trigger("Find all PII data that needs GDPR compliance review") """ logger.info(f"Detecting custom metadata triggers in query: {query_text[:100]}...") - + try: result = detect_custom_metadata_with_singleton(query_text) - + if result["detected"]: - logger.info(f"Custom metadata detected with reasons: {result['detection_reasons']}") + logger.info( + f"Custom metadata detected with reasons: {result['detection_reasons']}" + ) context_count = len(result.get("context", [])) - logger.info(f"Provided {context_count} custom metadata definitions for context enrichment") + logger.info( + f"Provided {context_count} custom metadata definitions for context enrichment" + ) else: logger.debug("No custom metadata triggers detected in the query") - + return result - + except Exception as e: logger.error(f"Error detecting custom metadata triggers: {str(e)}") return { "detected": False, "context": None, "detection_reasons": [], - "error": str(e) + "error": str(e), } diff --git a/modelcontextprotocol/utils/cache_manager.py b/modelcontextprotocol/utils/cache_manager.py index 451be82..4ab36d9 100644 --- a/modelcontextprotocol/utils/cache_manager.py +++ b/modelcontextprotocol/utils/cache_manager.py @@ -12,26 +12,26 @@ logger = logging.getLogger(__name__) -T = TypeVar('T') +T = TypeVar("T") class SingletonCacheManager(Generic[T]): """ Thread-safe singleton cache manager for persistent data caching across MCP tool calls. - + This class ensures that cache data persists across multiple tool calls within the same MCP server process. """ - + # Private class variables to store instances and lock - _instances: Dict[str, 'SingletonCacheManager'] = {} + _instances: Dict[str, "SingletonCacheManager"] = {} _lock = threading.Lock() - + # Create or return existing singleton instance for the given cache name def __new__(cls, cache_name: str, ttl_seconds: float = 300.0): """ Create or return existing singleton instance for the given cache name. - + Args: cache_name: Unique identifier for this cache instance ttl_seconds: Time-to-live for cached data in seconds @@ -43,45 +43,47 @@ def __new__(cls, cache_name: str, ttl_seconds: float = 300.0): instance._initialized = False cls._instances[cache_name] = instance return cls._instances[cache_name] - + def __init__(self, cache_name: str, ttl_seconds: float = 300.0): """ Initialize the cache manager (only runs once per cache_name). - + Args: cache_name: Unique identifier for this cache instance ttl_seconds: Time-to-live for cached data in seconds """ if self._initialized: return - + self.cache_name = cache_name self.ttl_seconds = ttl_seconds self._cache_data: Optional[T] = None self._cache_timestamp: Optional[float] = None self._data_lock = threading.Lock() self._initialized = True - - logger.debug(f"Initialized singleton cache manager: {cache_name} (TTL: {ttl_seconds}s)") - + + logger.debug( + f"Initialized singleton cache manager: {cache_name} (TTL: {ttl_seconds}s)" + ) + def get_or_fetch(self, fetch_function: Callable[[], T]) -> T: """ Get cached data or fetch fresh data if cache is invalid. - + Args: fetch_function: Function to call to fetch fresh data when cache is invalid - + Returns: Cached or freshly fetched data """ with self._data_lock: current_time = time.time() - + # Check if cache is valid if self._is_cache_valid(current_time): logger.debug(f"Using cached data from {self.cache_name}") return self._cache_data - + # Fetch fresh data logger.debug(f"Fetching fresh data for {self.cache_name}") try: @@ -97,31 +99,39 @@ def get_or_fetch(self, fetch_function: Callable[[], T]) -> T: logger.warning(f"Returning stale cached data for {self.cache_name}") return self._cache_data raise - + def _is_cache_valid(self, current_time: float) -> bool: """Check if the current cache is valid based on TTL.""" - return (self._cache_data is not None and - self._cache_timestamp is not None and - (current_time - self._cache_timestamp) < self.ttl_seconds) - + return ( + self._cache_data is not None + and self._cache_timestamp is not None + and (current_time - self._cache_timestamp) < self.ttl_seconds + ) + def invalidate(self) -> None: """Clear the cached data, forcing a fresh fetch on next access.""" with self._data_lock: self._cache_data = None self._cache_timestamp = None logger.info(f"Invalidated cache: {self.cache_name}") - + def get_cache_info(self) -> Dict[str, Any]: """ Get information about the current cache state. - + Returns: Dict containing cache metadata """ with self._data_lock: current_time = time.time() - cache_size = len(self._cache_data) if isinstance(self._cache_data, (list, dict, str)) else 1 if self._cache_data else 0 - + cache_size = ( + len(self._cache_data) + if isinstance(self._cache_data, (list, dict, str)) + else 1 + if self._cache_data + else 0 + ) + return { "cache_name": self.cache_name, "cache_size": cache_size, @@ -129,15 +139,20 @@ def get_cache_info(self) -> Dict[str, Any]: "current_time": current_time, "ttl_seconds": self.ttl_seconds, "is_valid": self._is_cache_valid(current_time), - "age_seconds": current_time - self._cache_timestamp if self._cache_timestamp else None + "age_seconds": current_time - self._cache_timestamp + if self._cache_timestamp + else None, } - + @classmethod def get_all_cache_info(cls) -> Dict[str, Dict[str, Any]]: """Get information about all active cache instances.""" with cls._lock: - return {name: instance.get_cache_info() for name, instance in cls._instances.items()} - + return { + name: instance.get_cache_info() + for name, instance in cls._instances.items() + } + @classmethod def invalidate_all(cls) -> None: """Invalidate all cache instances.""" @@ -151,8 +166,10 @@ def invalidate_all(cls) -> None: def get_custom_metadata_cache_manager() -> SingletonCacheManager[List[Dict[str, Any]]]: """ Get the singleton cache manager for custom metadata context. - + Returns: Singleton cache manager instance for custom metadata """ - return SingletonCacheManager[List[Dict[str, Any]]]("custom_metadata_context", ttl_seconds=900.0) + return SingletonCacheManager[List[Dict[str, Any]]]( + "custom_metadata_context", ttl_seconds=900.0 + ) diff --git a/modelcontextprotocol/utils/custom_metadata_context.py b/modelcontextprotocol/utils/custom_metadata_context.py index 70738e7..be3cdf3 100644 --- a/modelcontextprotocol/utils/custom_metadata_context.py +++ b/modelcontextprotocol/utils/custom_metadata_context.py @@ -44,7 +44,7 @@ def process_business_metadata( if attribute_defs: for attr_def_item in attribute_defs: base_description = attr_def_item.get("description", "") - + # Check for enum enrichment and enhance description enum_enrichment = attr_def_item.get("enumEnrichment") enhanced_description = base_description @@ -53,9 +53,11 @@ def process_business_metadata( if enum_values: # Create comma-separated quoted values quoted_values = ", ".join([f"'{value}'" for value in enum_values]) - enum_suffix = f" This attribute can have enum values: {quoted_values}." + enum_suffix = ( + f" This attribute can have enum values: {quoted_values}." + ) enhanced_description = f"{base_description}{enum_suffix}".strip() - + attribute_metadata = { "name": attr_def_item.get("name"), "display_name": attr_def_item.get("displayName"), @@ -113,15 +115,21 @@ def get_custom_metadata_context() -> Dict[str, Any]: } # Fetch business metadata definitions - business_metadata_endpoint: str = Settings.get_atlan_typedef_api_endpoint(param="BUSINESS_METADATA") - business_metadata_response: Optional[Dict[str, Any]] = Settings.make_request(business_metadata_endpoint) + business_metadata_endpoint: str = Settings.get_atlan_typedef_api_endpoint( + param="BUSINESS_METADATA" + ) + business_metadata_response: Optional[Dict[str, Any]] = Settings.make_request( + business_metadata_endpoint + ) if business_metadata_response is None: logger.error( f"Service: Failed to make request to {business_metadata_endpoint} for {display_name}. No data returned." ) return [] - - business_metadata_defs: List[Dict[str, Any]] = business_metadata_response.get("businessMetadataDefs", []) + + business_metadata_defs: List[Dict[str, Any]] = business_metadata_response.get( + "businessMetadataDefs", [] + ) # Enrich business metadata with enum information before processing for business_metadata_def in business_metadata_defs: @@ -147,7 +155,9 @@ def get_custom_metadata_context() -> Dict[str, Any]: } # Process the enriched business metadata - business_metadata_results.append(process_business_metadata(business_metadata_def)) + business_metadata_results.append( + process_business_metadata(business_metadata_def) + ) except Exception as e: logger.error( @@ -155,11 +165,12 @@ def get_custom_metadata_context() -> Dict[str, Any]: exc_info=True, ) return [] - + logger.info( f"Fetched {len(business_metadata_results)} {display_name} definitions with enum enrichment." ) return business_metadata_results + if __name__ == "__main__": - print(get_custom_metadata_context()) \ No newline at end of file + print(get_custom_metadata_context()) diff --git a/modelcontextprotocol/utils/custom_metadata_detector.py b/modelcontextprotocol/utils/custom_metadata_detector.py index 3070453..f821f7e 100644 --- a/modelcontextprotocol/utils/custom_metadata_detector.py +++ b/modelcontextprotocol/utils/custom_metadata_detector.py @@ -14,33 +14,38 @@ class CustomMetadataDetectorWithSingleton: """ Custom metadata detector using singleton cache manager for persistent caching. - + This version uses the SingletonCacheManager to maintain cache across multiple tool calls, providing better control and thread safety compared to module-level caching. """ - + # Common keywords that indicate custom metadata usage CUSTOM_METADATA_KEYWORDS = { - 'business metadata', 'custom metadata', 'custom metadata filters','business attributes', - 'data classification', 'data quality', - 'business context', 'metadata attributes', 'business properties', - 'custom attributes', 'business tags', 'data governance' + "business metadata", + "custom metadata", + "custom metadata filters", + "business attributes", + "data classification", + "data quality", + "business context", + "metadata attributes", + "business properties", + "custom attributes", + "business tags", + "data governance", } - + def __init__(self): """Initialize the custom metadata detector with singleton cache manager.""" self.cache_manager = get_custom_metadata_cache_manager() - - def detect_from_natural_language( - self, - query_text: str - ) -> Dict[str, Any]: + + def detect_from_natural_language(self, query_text: str) -> Dict[str, Any]: """ Detect if a natural language query involves custom metadata and provide the appropriate context. - + Args: query_text: Natural language query text to analyze - + Returns: Dict containing: - detected: Boolean indicating if custom metadata was detected @@ -48,36 +53,38 @@ def detect_from_natural_language( - detection_reasons: List of reasons why custom metadata was detected - suggested_attributes: List of suggested custom metadata attributes """ - logger.debug(f"Starting custom metadata detection analysis for query: {query_text[:100]}...") - + logger.debug( + f"Starting custom metadata detection analysis for query: {query_text[:100]}..." + ) + detection_reasons: List[str] = [] - + if not query_text or not query_text.strip(): - return { - "detected": False, - "detection_reasons": [], - "context": None - } - + return {"detected": False, "detection_reasons": [], "context": None} + # Check query text for custom metadata keywords detected_keywords = self._detect_keywords_in_text(query_text) if detected_keywords: - detection_reasons.append(f"Custom metadata keywords detected: {', '.join(detected_keywords)}") - + detection_reasons.append( + f"Custom metadata keywords detected: {', '.join(detected_keywords)}" + ) + # Check for data governance and quality terms governance_terms = self._detect_governance_terms(query_text) if governance_terms: - detection_reasons.append(f"Data governance terms detected: {', '.join(governance_terms)}") - + detection_reasons.append( + f"Data governance terms detected: {', '.join(governance_terms)}" + ) + # Determine if custom metadata was detected detected = len(detection_reasons) > 0 - + result = { "detected": detected, "detection_reasons": detection_reasons, - "context": None + "context": None, } - + # If custom metadata was detected, fetch and provide context using custom metadata cache if detected: logger.info(f"Custom metadata detected. Reasons: {detection_reasons}") @@ -85,71 +92,88 @@ def detect_from_natural_language( # Use singleton cache manager to get context with persistent caching context = self.cache_manager.get_or_fetch(get_custom_metadata_context) result["context"] = context - logger.info(f"Provided custom metadata context with {len(context)} definitions using singleton cache") + logger.info( + f"Provided custom metadata context with {len(context)} definitions using singleton cache" + ) except Exception as e: logger.error(f"Failed to fetch custom metadata context: {e}") result["context"] = [] else: logger.debug("No custom metadata usage detected") - + return result - + def _detect_keywords_in_text(self, text: str) -> List[str]: """ Detect custom metadata keywords in text. - + Args: text: Text to analyze - + Returns: List of detected keywords """ if not text: return [] - + text_lower = text.lower() detected = [] - + for keyword in self.CUSTOM_METADATA_KEYWORDS: if keyword in text_lower: detected.append(keyword) - + return detected - + def _detect_governance_terms(self, text: str) -> List[str]: """ Detect data governance and quality terms in text. - + Args: text: Text to analyze - + Returns: List of detected governance terms """ if not text: return [] - + text_lower = text.lower() detected = [] - + governance_terms = [ - 'pii', 'personally identifiable information', 'gdpr', 'compliance', - 'data lineage', 'data quality', 'data governance', 'data catalog', - 'master data', 'reference data', 'critical data', 'sensitive data', - 'public data', 'internal data', 'confidential data', 'restricted data', - 'data retention', 'data lifecycle', 'data archival', 'data purging' + "pii", + "personally identifiable information", + "gdpr", + "compliance", + "data lineage", + "data quality", + "data governance", + "data catalog", + "master data", + "reference data", + "critical data", + "sensitive data", + "public data", + "internal data", + "confidential data", + "restricted data", + "data retention", + "data lifecycle", + "data archival", + "data purging", ] - + for term in governance_terms: if term in text_lower: detected.append(term) - + return detected - + def get_cache_info(self) -> Dict[str, Any]: """Get information about the singleton cache state.""" return self.cache_manager.get_cache_info() - + def invalidate_cache(self) -> None: """Invalidate the singleton cache, forcing fresh data on next request.""" self.cache_manager.invalidate() @@ -158,10 +182,10 @@ def invalidate_cache(self) -> None: def detect_custom_metadata_with_singleton(query_text: str) -> Dict[str, Any]: """ Convenience function using singleton cache manager approach. - + Args: query_text: Natural language query text to analyze - + Returns: Dict containing detection results and context """ From fcd70a541b8abfcc36e4816df1dca2f85d9a19c5 Mon Sep 17 00:00:00 2001 From: Satabrata Paul Date: Tue, 26 Aug 2025 17:45:06 +0530 Subject: [PATCH 12/28] add: support for custom_metadata filterds in search_assets tool --- modelcontextprotocol/server.py | 38 +++++++++----- modelcontextprotocol/tools/search.py | 16 ++++++ modelcontextprotocol/utils/search.py | 74 +++++++++++++++++++++++++++- 3 files changed, 115 insertions(+), 13 deletions(-) diff --git a/modelcontextprotocol/server.py b/modelcontextprotocol/server.py index 277c3e5..2b028c7 100644 --- a/modelcontextprotocol/server.py +++ b/modelcontextprotocol/server.py @@ -27,6 +27,7 @@ @mcp.tool() def search_assets_tool( conditions=None, + custom_metadata_conditions=None, negative_conditions=None, some_conditions=None, min_somes=1, @@ -50,6 +51,8 @@ def search_assets_tool( Args: conditions (Dict[str, Any], optional): Dictionary of attribute conditions to match. Format: {"attribute_name": value} or {"attribute_name": {"operator": operator, "value": value}} + custom_metadata_conditions (List[Dict[str, Any]], optional): List of custom metadata conditions to match. + Format: [{"custom_metadata": value}] or [{"custom_metadata": {"operator": operator, "value": value}}] negative_conditions (Dict[str, Any], optional): Dictionary of attribute conditions to exclude. Format: {"attribute_name": value} or {"attribute_name": {"operator": operator, "value": value}} some_conditions (Dict[str, Any], optional): Conditions for where_some() queries that require min_somes of them to match. @@ -95,6 +98,15 @@ def search_assets_tool( include_attributes=["owner_users", "owner_groups"] ) + # Search for assets with custom metadata + asset_list_1 = search_assets( + custom_metadata_conditions=[{"custom_metadata_filter": {"display_name": "test-mcp", "property_filters": [{"property_name": "mcp_allow_status", "property_value": "yes"}]}}] + ) + + asset_list_2 = search_assets( + custom_metadata_conditions=[{"custom_metadata_filter": {"display_name": "test-mcp", "property_filters": [{"property_name": "mcp_allow_status", "property_value": "yes", "operator": "eq"}]}}] + ) + # Search for columns with specific certificate status columns = search_assets( asset_type="Column", @@ -219,6 +231,7 @@ def search_assets_tool( try: # Parse JSON string parameters if needed conditions = parse_json_parameter(conditions) + custom_metadata_conditions = parse_json_parameter(custom_metadata_conditions) negative_conditions = parse_json_parameter(negative_conditions) some_conditions = parse_json_parameter(some_conditions) date_range = parse_json_parameter(date_range) @@ -229,6 +242,7 @@ def search_assets_tool( return search_assets( conditions, + custom_metadata_conditions, negative_conditions, some_conditions, min_somes, @@ -683,42 +697,42 @@ def create_glossary_categories(categories) -> List[Dict[str, Any]]: def detect_custom_metadata_from_query(query_text: str) -> Dict[str, Any]: """ Detect custom metadata triggers from natural language queries. - - This tool analyzes natural language text to identify when users are referencing - custom metadata (business metadata) and automatically provides context about + + This tool analyzes natural language text to identify when users are referencing + custom metadata (business metadata) and automatically provides context about available custom metadata definitions. Use this tool when you receive natural language queries that might involve custom metadata concepts. - + Args: query_text (str): Natural language query text to analyze for custom metadata references - + Returns: Dict[str, Any]: Dictionary containing: - detected: Boolean indicating if custom metadata was detected - context: Custom metadata context if detected (list of metadata definitions) - detection_reasons: List of reasons why custom metadata was detected - suggested_attributes: List of suggested custom metadata attributes - + Detection Triggers: The tool detects custom metadata usage when the query contains: - Business metadata keywords (e.g., "business metadata", "data classification") - Data governance terms (e.g., "PII", "GDPR", "compliance", "data quality") - Attribute patterns (e.g., "sensitivity level", "business owner", "data steward") - Quality and classification terms (e.g., "quality score", "classification level") - + Examples: # Query mentioning data classification result = detect_custom_metadata_from_query("Find all tables with sensitive data classification") - + # Query about data quality result = detect_custom_metadata_from_query("Show me assets with poor data quality scores") - + # Query about business ownership result = detect_custom_metadata_from_query("Which datasets have John as the business owner?") - + # Query about compliance result = detect_custom_metadata_from_query("Find all PII data that needs GDPR compliance review") - + Use Cases: - Analyze user queries before executing searches to provide custom metadata context - Understand when users are asking about business metadata attributes @@ -733,7 +747,7 @@ def detect_custom_metadata_from_query(query_text: str) -> Dict[str, Any]: "context": None, "detection_reasons": [], "suggested_attributes": [], - "error": f"Failed to detect custom metadata: {str(e)}" + "error": f"Failed to detect custom metadata: {str(e)}", } diff --git a/modelcontextprotocol/tools/search.py b/modelcontextprotocol/tools/search.py index 3a1c399..dd84cec 100644 --- a/modelcontextprotocol/tools/search.py +++ b/modelcontextprotocol/tools/search.py @@ -14,6 +14,7 @@ def search_assets( conditions: Optional[Union[Dict[str, Any], str]] = None, + custom_metadata_conditions: Optional[List[Dict[str, Any]]] = None, negative_conditions: Optional[Dict[str, Any]] = None, some_conditions: Optional[Dict[str, Any]] = None, min_somes: int = 1, @@ -40,6 +41,8 @@ def search_assets( Args: conditions (Dict[str, Any], optional): Dictionary of attribute conditions to match. Format: {"attribute_name": value} or {"attribute_name": {"operator": operator, "value": value}} + custom_metadata_conditions (List[Dict[str, Any]], optional): List of custom metadata conditions to match. + Format: [{"custom_metadata": value}] or [{"custom_metadata": {"operator": operator, "value": value}}] negative_conditions (Dict[str, Any], optional): Dictionary of attribute conditions to exclude. Format: {"attribute_name": value} or {"attribute_name": {"operator": operator, "value": value}} some_conditions (Dict[str, Any], optional): Conditions for where_some() queries that require min_somes of them to match. @@ -187,6 +190,19 @@ def search_assets( ) search = search.min_somes(min_somes) + if custom_metadata_conditions: + logger.debug( + f"Applying custom metadata conditions: {custom_metadata_conditions}" + ) + for custom_metadata_filter_onject in custom_metadata_conditions: + if isinstance(custom_metadata_filter_onject, dict): + _, condition = next(iter(custom_metadata_filter_onject.items())) + else: + condition = custom_metadata_filter_onject + search = SearchUtils._process_custom_metadata_condition( + search, condition, "where" + ) + # Apply date range filters if date_range: logger.debug(f"Applying date range filters: {date_range}") diff --git a/modelcontextprotocol/utils/search.py b/modelcontextprotocol/utils/search.py index b69377d..d3eb706 100644 --- a/modelcontextprotocol/utils/search.py +++ b/modelcontextprotocol/utils/search.py @@ -1,6 +1,8 @@ -from typing import Dict, Any import logging +from typing import Dict, Any from pyatlan.model.assets import Asset +from pyatlan.model.fields.atlan_fields import CustomMetadataField +from pyatlan.model.fluent_search import FluentSearch logger = logging.getLogger(__name__) @@ -170,3 +172,73 @@ def _process_condition( ) search = search_method(attr.eq(condition)) return search + + @staticmethod + def _process_custom_metadata_condition( + search: FluentSearch, condition: Dict[str, Any], search_method_name: str + ): + """ + Process a single custom metadata condition and apply it to the search using the specified method. + + Args: + search: The FluentSearch object + condition: Dictionary containing display_name, property_name, property_value, and optional operator + search_method_name: The search method to use ('where', 'where_not', 'where_some') + + Returns: + FluentSearch: The updated search object + """ + if not isinstance(condition, dict): + logger.warning("Custom metadata condition must be a dictionary") + return search + + # Validate required fields + required_fields = ["display_name", "property_filters"] + if not all(field in condition for field in required_fields): + logger.warning( + f"Custom metadata condition missing required fields: {required_fields}" + ) + return search + + search_method = getattr(search, search_method_name) + + # Get operator, default to "eq" + for property_filter in condition["property_filters"]: + operator = property_filter.get("operator", "eq") + property_name = property_filter.get("property_name") + property_value = property_filter.get("property_value") + + try: + # Create the custom metadata field + custom_metadata_field = CustomMetadataField( + set_name=condition["display_name"], attribute_name=property_name + ) + + # Apply the appropriate operator + if property_value == "any": + # For "any" value, use has_any_value() method + query_condition = custom_metadata_field.has_any_value() + else: + # Get the operator method dynamically + if hasattr(custom_metadata_field, operator): + operator_method = getattr(custom_metadata_field, operator) + query_condition = operator_method(property_value) + else: + # Fallback to eq if operator not found + logger.warning( + f"Operator '{operator}' not found, falling back to 'eq'" + ) + query_condition = custom_metadata_field.eq(property_value) + + # Apply the condition to the search + search = search_method(query_condition) + logger.info(search) + logger.debug( + f"Applied custom metadata condition: {condition['display_name']}.{condition['property_name']} {operator} {condition['property_value']}" + ) + + except Exception as e: + logger.error(f"Error processing custom metadata condition: {e}") + logger.exception("Exception details:") + + return search From 150d08b5f2111de6e8d2eda080a13ae404935ce4 Mon Sep 17 00:00:00 2001 From: Satabrata Paul Date: Tue, 2 Sep 2025 11:11:21 +0530 Subject: [PATCH 13/28] remove: custom metadata detector from query tool --- .../tools/custom_metadata_detector.py | 64 ------ modelcontextprotocol/utils/cache_manager.py | 175 ---------------- .../utils/custom_metadata_detector.py | 193 ------------------ 3 files changed, 432 deletions(-) delete mode 100644 modelcontextprotocol/tools/custom_metadata_detector.py delete mode 100644 modelcontextprotocol/utils/cache_manager.py delete mode 100644 modelcontextprotocol/utils/custom_metadata_detector.py diff --git a/modelcontextprotocol/tools/custom_metadata_detector.py b/modelcontextprotocol/tools/custom_metadata_detector.py deleted file mode 100644 index 2fcc1bf..0000000 --- a/modelcontextprotocol/tools/custom_metadata_detector.py +++ /dev/null @@ -1,64 +0,0 @@ -import logging -from typing import Any, Dict -from utils.custom_metadata_detector import detect_custom_metadata_with_singleton - -logger = logging.getLogger(__name__) - - -def detect_custom_metadata_trigger(query_text: str) -> Dict[str, Any]: - """ - Detect custom metadata triggers from natural language queries. - - This function analyzes natural language text to identify when users are referencing - custom metadata (business metadata) and automatically provides context about - available custom metadata definitions. - - Args: - query_text (str): Natural language query text to analyze for custom metadata references - - Returns: - Dict[str, Any]: Dictionary containing: - - detected: Boolean indicating if custom metadata was detected - - context: Custom metadata context if detected (list of metadata definitions) - - detection_reasons: List of reasons why custom metadata was detected - - suggested_attributes: List of suggested custom metadata attributes - - Examples: - # Query mentioning data classification - result = detect_custom_metadata_trigger("Find all tables with sensitive data classification") - - # Query about data quality - result = detect_custom_metadata_trigger("Show me assets with poor data quality scores") - - # Query about business ownership - result = detect_custom_metadata_trigger("Which datasets have John as the business owner?") - - # Query about compliance - result = detect_custom_metadata_trigger("Find all PII data that needs GDPR compliance review") - """ - logger.info(f"Detecting custom metadata triggers in query: {query_text[:100]}...") - - try: - result = detect_custom_metadata_with_singleton(query_text) - - if result["detected"]: - logger.info( - f"Custom metadata detected with reasons: {result['detection_reasons']}" - ) - context_count = len(result.get("context", [])) - logger.info( - f"Provided {context_count} custom metadata definitions for context enrichment" - ) - else: - logger.debug("No custom metadata triggers detected in the query") - - return result - - except Exception as e: - logger.error(f"Error detecting custom metadata triggers: {str(e)}") - return { - "detected": False, - "context": None, - "detection_reasons": [], - "error": str(e), - } diff --git a/modelcontextprotocol/utils/cache_manager.py b/modelcontextprotocol/utils/cache_manager.py deleted file mode 100644 index 4ab36d9..0000000 --- a/modelcontextprotocol/utils/cache_manager.py +++ /dev/null @@ -1,175 +0,0 @@ -""" -Advanced cache management using singleton pattern for persistent caching across MCP tool calls. - -This module provides a thread-safe singleton cache manager that can be used as an alternative -to the module-level caching approach. It offers more control and flexibility for cache management. -""" - -import logging -import threading -import time -from typing import Any, Dict, List, Optional, TypeVar, Generic, Callable - -logger = logging.getLogger(__name__) - -T = TypeVar("T") - - -class SingletonCacheManager(Generic[T]): - """ - Thread-safe singleton cache manager for persistent data caching across MCP tool calls. - - This class ensures that cache data persists across multiple tool calls within the same - MCP server process. - """ - - # Private class variables to store instances and lock - _instances: Dict[str, "SingletonCacheManager"] = {} - _lock = threading.Lock() - - # Create or return existing singleton instance for the given cache name - def __new__(cls, cache_name: str, ttl_seconds: float = 300.0): - """ - Create or return existing singleton instance for the given cache name. - - Args: - cache_name: Unique identifier for this cache instance - ttl_seconds: Time-to-live for cached data in seconds - """ - # Lock to ensure thread-safe access to instances - with cls._lock: - if cache_name not in cls._instances: - instance = super().__new__(cls) - instance._initialized = False - cls._instances[cache_name] = instance - return cls._instances[cache_name] - - def __init__(self, cache_name: str, ttl_seconds: float = 300.0): - """ - Initialize the cache manager (only runs once per cache_name). - - Args: - cache_name: Unique identifier for this cache instance - ttl_seconds: Time-to-live for cached data in seconds - """ - if self._initialized: - return - - self.cache_name = cache_name - self.ttl_seconds = ttl_seconds - self._cache_data: Optional[T] = None - self._cache_timestamp: Optional[float] = None - self._data_lock = threading.Lock() - self._initialized = True - - logger.debug( - f"Initialized singleton cache manager: {cache_name} (TTL: {ttl_seconds}s)" - ) - - def get_or_fetch(self, fetch_function: Callable[[], T]) -> T: - """ - Get cached data or fetch fresh data if cache is invalid. - - Args: - fetch_function: Function to call to fetch fresh data when cache is invalid - - Returns: - Cached or freshly fetched data - """ - with self._data_lock: - current_time = time.time() - - # Check if cache is valid - if self._is_cache_valid(current_time): - logger.debug(f"Using cached data from {self.cache_name}") - return self._cache_data - - # Fetch fresh data - logger.debug(f"Fetching fresh data for {self.cache_name}") - try: - fresh_data = fetch_function() - self._cache_data = fresh_data - self._cache_timestamp = current_time - logger.info(f"Updated cache {self.cache_name} with fresh data") - return fresh_data - except Exception as e: - logger.error(f"Failed to fetch fresh data for {self.cache_name}: {e}") - # Return stale cache if available - if self._cache_data is not None: - logger.warning(f"Returning stale cached data for {self.cache_name}") - return self._cache_data - raise - - def _is_cache_valid(self, current_time: float) -> bool: - """Check if the current cache is valid based on TTL.""" - return ( - self._cache_data is not None - and self._cache_timestamp is not None - and (current_time - self._cache_timestamp) < self.ttl_seconds - ) - - def invalidate(self) -> None: - """Clear the cached data, forcing a fresh fetch on next access.""" - with self._data_lock: - self._cache_data = None - self._cache_timestamp = None - logger.info(f"Invalidated cache: {self.cache_name}") - - def get_cache_info(self) -> Dict[str, Any]: - """ - Get information about the current cache state. - - Returns: - Dict containing cache metadata - """ - with self._data_lock: - current_time = time.time() - cache_size = ( - len(self._cache_data) - if isinstance(self._cache_data, (list, dict, str)) - else 1 - if self._cache_data - else 0 - ) - - return { - "cache_name": self.cache_name, - "cache_size": cache_size, - "cache_timestamp": self._cache_timestamp, - "current_time": current_time, - "ttl_seconds": self.ttl_seconds, - "is_valid": self._is_cache_valid(current_time), - "age_seconds": current_time - self._cache_timestamp - if self._cache_timestamp - else None, - } - - @classmethod - def get_all_cache_info(cls) -> Dict[str, Dict[str, Any]]: - """Get information about all active cache instances.""" - with cls._lock: - return { - name: instance.get_cache_info() - for name, instance in cls._instances.items() - } - - @classmethod - def invalidate_all(cls) -> None: - """Invalidate all cache instances.""" - with cls._lock: - for instance in cls._instances.values(): - instance.invalidate() - logger.info("Invalidated all cache instances") - - -# Convenience function for custom metadata caching -def get_custom_metadata_cache_manager() -> SingletonCacheManager[List[Dict[str, Any]]]: - """ - Get the singleton cache manager for custom metadata context. - - Returns: - Singleton cache manager instance for custom metadata - """ - return SingletonCacheManager[List[Dict[str, Any]]]( - "custom_metadata_context", ttl_seconds=900.0 - ) diff --git a/modelcontextprotocol/utils/custom_metadata_detector.py b/modelcontextprotocol/utils/custom_metadata_detector.py deleted file mode 100644 index f821f7e..0000000 --- a/modelcontextprotocol/utils/custom_metadata_detector.py +++ /dev/null @@ -1,193 +0,0 @@ -""" -This demonstrates how to use the SingletonCacheManager for persistent caching -across MCP tool calls as an alternative to module-level caching. -""" - -import logging -from typing import Any, Dict, List -from utils.custom_metadata_context import get_custom_metadata_context -from utils.cache_manager import get_custom_metadata_cache_manager - -logger = logging.getLogger(__name__) - - -class CustomMetadataDetectorWithSingleton: - """ - Custom metadata detector using singleton cache manager for persistent caching. - - This version uses the SingletonCacheManager to maintain cache across multiple - tool calls, providing better control and thread safety compared to module-level caching. - """ - - # Common keywords that indicate custom metadata usage - CUSTOM_METADATA_KEYWORDS = { - "business metadata", - "custom metadata", - "custom metadata filters", - "business attributes", - "data classification", - "data quality", - "business context", - "metadata attributes", - "business properties", - "custom attributes", - "business tags", - "data governance", - } - - def __init__(self): - """Initialize the custom metadata detector with singleton cache manager.""" - self.cache_manager = get_custom_metadata_cache_manager() - - def detect_from_natural_language(self, query_text: str) -> Dict[str, Any]: - """ - Detect if a natural language query involves custom metadata and provide the appropriate context. - - Args: - query_text: Natural language query text to analyze - - Returns: - Dict containing: - - detected: Boolean indicating if custom metadata was detected - - context: Custom metadata context if detected - - detection_reasons: List of reasons why custom metadata was detected - - suggested_attributes: List of suggested custom metadata attributes - """ - logger.debug( - f"Starting custom metadata detection analysis for query: {query_text[:100]}..." - ) - - detection_reasons: List[str] = [] - - if not query_text or not query_text.strip(): - return {"detected": False, "detection_reasons": [], "context": None} - - # Check query text for custom metadata keywords - detected_keywords = self._detect_keywords_in_text(query_text) - if detected_keywords: - detection_reasons.append( - f"Custom metadata keywords detected: {', '.join(detected_keywords)}" - ) - - # Check for data governance and quality terms - governance_terms = self._detect_governance_terms(query_text) - if governance_terms: - detection_reasons.append( - f"Data governance terms detected: {', '.join(governance_terms)}" - ) - - # Determine if custom metadata was detected - detected = len(detection_reasons) > 0 - - result = { - "detected": detected, - "detection_reasons": detection_reasons, - "context": None, - } - - # If custom metadata was detected, fetch and provide context using custom metadata cache - if detected: - logger.info(f"Custom metadata detected. Reasons: {detection_reasons}") - try: - # Use singleton cache manager to get context with persistent caching - context = self.cache_manager.get_or_fetch(get_custom_metadata_context) - result["context"] = context - logger.info( - f"Provided custom metadata context with {len(context)} definitions using singleton cache" - ) - except Exception as e: - logger.error(f"Failed to fetch custom metadata context: {e}") - result["context"] = [] - else: - logger.debug("No custom metadata usage detected") - - return result - - def _detect_keywords_in_text(self, text: str) -> List[str]: - """ - Detect custom metadata keywords in text. - - Args: - text: Text to analyze - - Returns: - List of detected keywords - """ - if not text: - return [] - - text_lower = text.lower() - detected = [] - - for keyword in self.CUSTOM_METADATA_KEYWORDS: - if keyword in text_lower: - detected.append(keyword) - - return detected - - def _detect_governance_terms(self, text: str) -> List[str]: - """ - Detect data governance and quality terms in text. - - Args: - text: Text to analyze - - Returns: - List of detected governance terms - """ - if not text: - return [] - - text_lower = text.lower() - detected = [] - - governance_terms = [ - "pii", - "personally identifiable information", - "gdpr", - "compliance", - "data lineage", - "data quality", - "data governance", - "data catalog", - "master data", - "reference data", - "critical data", - "sensitive data", - "public data", - "internal data", - "confidential data", - "restricted data", - "data retention", - "data lifecycle", - "data archival", - "data purging", - ] - - for term in governance_terms: - if term in text_lower: - detected.append(term) - - return detected - - def get_cache_info(self) -> Dict[str, Any]: - """Get information about the singleton cache state.""" - return self.cache_manager.get_cache_info() - - def invalidate_cache(self) -> None: - """Invalidate the singleton cache, forcing fresh data on next request.""" - self.cache_manager.invalidate() - - -def detect_custom_metadata_with_singleton(query_text: str) -> Dict[str, Any]: - """ - Convenience function using singleton cache manager approach. - - Args: - query_text: Natural language query text to analyze - - Returns: - Dict containing detection results and context - """ - detector = CustomMetadataDetectorWithSingleton() - return detector.detect_from_natural_language(query_text=query_text) From 042babec7afac932b4faa562fbe2b4c150aded4e Mon Sep 17 00:00:00 2001 From: Satabrata Paul Date: Tue, 2 Sep 2025 11:13:19 +0530 Subject: [PATCH 14/28] remove: detect_custom_metadata_from_query tool registration --- modelcontextprotocol/server.py | 59 ---------------------------------- 1 file changed, 59 deletions(-) diff --git a/modelcontextprotocol/server.py b/modelcontextprotocol/server.py index 2b028c7..f3cd6a8 100644 --- a/modelcontextprotocol/server.py +++ b/modelcontextprotocol/server.py @@ -7,7 +7,6 @@ get_assets_by_dsl, traverse_lineage, update_assets, - detect_custom_metadata_trigger, create_glossary_category_assets, create_glossary_assets, create_glossary_term_assets, @@ -693,64 +692,6 @@ def create_glossary_categories(categories) -> List[Dict[str, Any]]: return create_glossary_category_assets(categories) -@mcp.tool() -def detect_custom_metadata_from_query(query_text: str) -> Dict[str, Any]: - """ - Detect custom metadata triggers from natural language queries. - - This tool analyzes natural language text to identify when users are referencing - custom metadata (business metadata) and automatically provides context about - available custom metadata definitions. Use this tool when you receive natural - language queries that might involve custom metadata concepts. - - Args: - query_text (str): Natural language query text to analyze for custom metadata references - - Returns: - Dict[str, Any]: Dictionary containing: - - detected: Boolean indicating if custom metadata was detected - - context: Custom metadata context if detected (list of metadata definitions) - - detection_reasons: List of reasons why custom metadata was detected - - suggested_attributes: List of suggested custom metadata attributes - - Detection Triggers: - The tool detects custom metadata usage when the query contains: - - Business metadata keywords (e.g., "business metadata", "data classification") - - Data governance terms (e.g., "PII", "GDPR", "compliance", "data quality") - - Attribute patterns (e.g., "sensitivity level", "business owner", "data steward") - - Quality and classification terms (e.g., "quality score", "classification level") - - Examples: - # Query mentioning data classification - result = detect_custom_metadata_from_query("Find all tables with sensitive data classification") - - # Query about data quality - result = detect_custom_metadata_from_query("Show me assets with poor data quality scores") - - # Query about business ownership - result = detect_custom_metadata_from_query("Which datasets have John as the business owner?") - - # Query about compliance - result = detect_custom_metadata_from_query("Find all PII data that needs GDPR compliance review") - - Use Cases: - - Analyze user queries before executing searches to provide custom metadata context - - Understand when users are asking about business metadata attributes - - Provide enriched context about available custom metadata definitions - - Help users discover relevant custom metadata attributes for their queries - """ - try: - return detect_custom_metadata_trigger(query_text) - except Exception as e: - return { - "detected": False, - "context": None, - "detection_reasons": [], - "suggested_attributes": [], - "error": f"Failed to detect custom metadata: {str(e)}", - } - - def main(): mcp.run() From 0a523514ba2fabdea43740cbf6d671f2cb40a98c Mon Sep 17 00:00:00 2001 From: Satabrata Paul Date: Tue, 2 Sep 2025 11:14:01 +0530 Subject: [PATCH 15/28] add: custom_metadata_context tool --- .../{utils => tools}/custom_metadata_context.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) rename modelcontextprotocol/{utils => tools}/custom_metadata_context.py (99%) diff --git a/modelcontextprotocol/utils/custom_metadata_context.py b/modelcontextprotocol/tools/custom_metadata_context.py similarity index 99% rename from modelcontextprotocol/utils/custom_metadata_context.py rename to modelcontextprotocol/tools/custom_metadata_context.py index be3cdf3..b0b1d7c 100644 --- a/modelcontextprotocol/utils/custom_metadata_context.py +++ b/modelcontextprotocol/tools/custom_metadata_context.py @@ -173,4 +173,4 @@ def get_custom_metadata_context() -> Dict[str, Any]: if __name__ == "__main__": - print(get_custom_metadata_context()) + get_custom_metadata_context() From 7512a6baaf29f73111f2207c4ff311f58c6a42d8 Mon Sep 17 00:00:00 2001 From: Satabrata Paul Date: Tue, 2 Sep 2025 13:56:45 +0530 Subject: [PATCH 16/28] update: custom_metadata_context tool import --- modelcontextprotocol/tools/__init__.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/modelcontextprotocol/tools/__init__.py b/modelcontextprotocol/tools/__init__.py index 7cf6ae3..81dbf5a 100644 --- a/modelcontextprotocol/tools/__init__.py +++ b/modelcontextprotocol/tools/__init__.py @@ -2,7 +2,7 @@ from .dsl import get_assets_by_dsl from .lineage import traverse_lineage from .assets import update_assets -from .custom_metadata_detector import detect_custom_metadata_trigger +from .custom_metadata_context import get_custom_metadata_context from .glossary import ( create_glossary_category_assets, create_glossary_assets, @@ -22,7 +22,7 @@ "get_assets_by_dsl", "traverse_lineage", "update_assets", - "detect_custom_metadata_trigger", + "get_custom_metadata_context", "create_glossary_category_assets", "create_glossary_assets", "create_glossary_term_assets", From 93cc2f6ea8d19a298d823f83272387bde81ca18d Mon Sep 17 00:00:00 2001 From: Satabrata Paul Date: Tue, 9 Sep 2025 01:36:56 +0530 Subject: [PATCH 17/28] add: get_custom_metadata_context_tool registration in server.py --- modelcontextprotocol/server.py | 289 ++++++++++++++++++++++++++++++++- 1 file changed, 283 insertions(+), 6 deletions(-) diff --git a/modelcontextprotocol/server.py b/modelcontextprotocol/server.py index f3cd6a8..8a12fd9 100644 --- a/modelcontextprotocol/server.py +++ b/modelcontextprotocol/server.py @@ -7,6 +7,7 @@ get_assets_by_dsl, traverse_lineage, update_assets, + get_custom_metadata_context, create_glossary_category_assets, create_glossary_assets, create_glossary_term_assets, @@ -98,12 +99,18 @@ def search_assets_tool( ) # Search for assets with custom metadata - asset_list_1 = search_assets( - custom_metadata_conditions=[{"custom_metadata_filter": {"display_name": "test-mcp", "property_filters": [{"property_name": "mcp_allow_status", "property_value": "yes"}]}}] - ) - - asset_list_2 = search_assets( - custom_metadata_conditions=[{"custom_metadata_filter": {"display_name": "test-mcp", "property_filters": [{"property_name": "mcp_allow_status", "property_value": "yes", "operator": "eq"}]}}] + assets = search_assets( + custom_metadata_conditions=[{ + "custom_metadata_filter": { + "display_name": "Business Ownership", # This is the display name of the business metadata + "property_filters": [{ + "property_name": "business_owner", # This is the display name of the property + "property_value": "John", # This is the value of the property + "operator": "eq" + }] + } + }], + include_attributes=["name", "qualified_name", "type_name", "description", "certificate_status"] ) # Search for columns with specific certificate status @@ -692,6 +699,276 @@ def create_glossary_categories(categories) -> List[Dict[str, Any]]: return create_glossary_category_assets(categories) +@mcp.tool() +def get_custom_metadata_context_tool() -> Dict[str, Any]: + """ + Fetch the custom metadata context for all business metadata definitions in the Atlan instance. + + This tool is used to get the custom metadata context for all business metadata definitions + present in the Atlan instance. Whenever a user gives a query to search for assets with + filters on custom metadata, this tool will be used to get the custom metadata context + for the business metadata definitions present in the Atlan instance. + + Eventually, this tool helps to prepare the payload for search_assets tool, when users + want to search for assets with filters on custom metadata. + + Returns: + List[Dict[str, Any]]: List of business metadata definitions, each containing: + - prompt: Formatted string prompt for the business metadata definition + - metadata: Dictionary with business metadata details including: + - name: Internal name of the business metadata + - display_name: Display name of the business metadata + - description: Description of the business metadata + - attributes: List of attribute definitions with name, display_name, data_type, description, and optional enumEnrichment + - id: GUID of the business metadata definition + + Raises: + Exception: If there's an error retrieving the custom metadata context + + Examples: + # Step 1: Get custom metadata context to understand available business metadata + context = get_custom_metadata_context_tool() + + # Step 2: Use the context to prepare custom_metadata_conditions for search_assets_tool + # Example context result might show business metadata like "Data Classification" with attributes + + # Example 1: Equality operator (eq) - exact match + assets = search_assets_tool( + asset_type="Table", + custom_metadata_conditions=[{ + "custom_metadata_filter": { + "display_name": "Data Classification", # This is the display name of the business metadata + "property_filters": [{ + "property_name": "sensitivity_level", # This is the display name of the property + "property_value": "sensitive", # This is the value of the property + "operator": "eq" + }] + } + }], + include_attributes=["name", "qualified_name", "type_name", "description", "certificate_status"] + ) + + # Example 2: Equality with case insensitive matching + assets = search_assets_tool( + custom_metadata_conditions=[{ + "custom_metadata_filter": { + "display_name": "Data Classification", + "property_filters": [{ + "property_name": "sensitivity_level", + "property_value": "SENSITIVE", + "operator": "eq", + "case_insensitive": True + }] + } + }], + include_attributes=["name", "qualified_name", "type_name", "description", "certificate_status"] + ) + + # Example 3: Starts with operator (startswith) - prefix matching + assets = search_assets_tool( + custom_metadata_conditions=[{ + "custom_metadata_filter": { + "display_name": "Business Ownership", + "property_filters": [{ + "property_name": "business_owner", + "property_value": "John", + "operator": "startswith" + }] + } + }], + include_attributes=["name", "qualified_name", "type_name", "description", "certificate_status"] + ) + + # Example 4: Starts with operator with case insensitive matching + assets = search_assets_tool( + custom_metadata_conditions=[{ + "custom_metadata_filter": { + "display_name": "Business Ownership", + "property_filters": [{ + "property_name": "business_owner", + "property_value": "john", + "operator": "startswith", + "case_insensitive": True + }] + } + }], + include_attributes=["name", "qualified_name", "type_name", "description", "certificate_status"] + ) + + # Example 5: Less than operator (lt) - numeric/date comparison + assets = search_assets_tool( + custom_metadata_conditions=[{ + "custom_metadata_filter": { + "display_name": "Data Quality", + "property_filters": [{ + "property_name": "quality_score", + "property_value": 50, + "operator": "lt" + }] + } + }], + include_attributes=["name", "qualified_name", "type_name", "description", "certificate_status"] + ) + + # Example 6: Less than or equal operator (lte) + assets = search_assets_tool( + custom_metadata_conditions=[{ + "custom_metadata_filter": { + "display_name": "Data Quality", + "property_filters": [{ + "property_name": "quality_score", + "property_value": 75, + "operator": "lte" + }] + } + }], + include_attributes=["name", "qualified_name", "type_name", "description", "certificate_status"] + ) + + # Example 7: Greater than operator (gt) + assets = search_assets_tool( + custom_metadata_conditions=[{ + "custom_metadata_filter": { + "display_name": "Data Quality", + "property_filters": [{ + "property_name": "quality_score", + "property_value": 80, + "operator": "gt" + }] + } + }], + include_attributes=["name", "qualified_name", "type_name", "description", "certificate_status"] + ) + + # Example 8: Greater than or equal operator (gte) + assets = search_assets_tool( + custom_metadata_conditions=[{ + "custom_metadata_filter": { + "display_name": "Data Quality", + "property_filters": [{ + "property_name": "quality_score", + "property_value": 90, + "operator": "gte" + }] + } + }], + include_attributes=["name", "qualified_name", "type_name", "description", "certificate_status"] + ) + + # Example 9: Match operator (match) - full-text search + assets = search_assets_tool( + custom_metadata_conditions=[{ + "custom_metadata_filter": { + "display_name": "Business Context", + "property_filters": [{ + "property_name": "description", + "property_value": "customer data analytics", + "operator": "match" + }] + } + }], + include_attributes=["name", "qualified_name", "type_name", "description", "certificate_status"] + ) + + # Example 10: Has any value operator (has_any_value) - check if field is populated + assets = search_assets_tool( + custom_metadata_conditions=[{ + "custom_metadata_filter": { + "display_name": "Business Ownership", + "property_filters": [{ + "property_name": "business_owner", + "operator": "has_any_value" + }] + } + }], + include_attributes=["name", "qualified_name", "type_name", "description", "certificate_status"] + ) + + # Example 11: Between operator (between) - range queries + assets = search_assets_tool( + custom_metadata_conditions=[{ + "custom_metadata_filter": { + "display_name": "Data Quality", + "property_filters": [{ + "property_name": "quality_score", + "property_value": [50, 90], # [start, end] range + "operator": "between" + }] + } + }], + include_attributes=["name", "qualified_name", "type_name", "description", "certificate_status"] + ) + + # Example 12: Within operator (within) - multiple value matching + assets = search_assets_tool( + custom_metadata_conditions=[{ + "custom_metadata_filter": { + "display_name": "Data Classification", + "property_filters": [{ + "property_name": "sensitivity_level", + "property_value": ["sensitive", "confidential", "restricted"], # list of values + "operator": "within" + }] + } + }], + include_attributes=["name", "qualified_name", "type_name", "description", "certificate_status"] + ) + + # Example 13: Multiple property filters in same business metadata + assets = search_assets_tool( + custom_metadata_conditions=[{ + "custom_metadata_filter": { + "display_name": "Data Governance", + "property_filters": [ + { + "property_name": "data_owner", + "property_value": "John Smith", + "operator": "eq" + }, + { + "property_name": "retention_period", + "property_value": 365, + "operator": "gte" + } + ] + } + }], + include_attributes=["name", "qualified_name", "type_name", "description", "certificate_status"] + ) + + # Example 14: Multiple business metadata filters + assets = search_assets_tool( + custom_metadata_conditions=[ + { + "custom_metadata_filter": { + "display_name": "Data Classification", + "property_filters": [{ + "property_name": "sensitivity_level", + "property_value": "sensitive", + "operator": "eq" + }] + } + }, + { + "custom_metadata_filter": { + "display_name": "Data Quality", + "property_filters": [{ + "property_name": "quality_score", + "property_value": 80, + "operator": "gte" + }] + } + } + ], + include_attributes=["name", "qualified_name", "type_name", "description", "certificate_status"] + ) + """ + try: + return get_custom_metadata_context() + except Exception as e: + return {"error": f"Error getting custom metadata context: {str(e)}"} + + def main(): mcp.run() From 017d738ab90f63c06a4b35c426efefc1d9f6885f Mon Sep 17 00:00:00 2001 From: Satabrata Paul Date: Tue, 9 Sep 2025 01:41:47 +0530 Subject: [PATCH 18/28] update: procesisng logic for custom metadata filters --- modelcontextprotocol/utils/search.py | 90 +++++++++++++++++++--------- 1 file changed, 61 insertions(+), 29 deletions(-) diff --git a/modelcontextprotocol/utils/search.py b/modelcontextprotocol/utils/search.py index d3eb706..dc9ab48 100644 --- a/modelcontextprotocol/utils/search.py +++ b/modelcontextprotocol/utils/search.py @@ -1,4 +1,5 @@ import logging +from client import get_atlan_client from typing import Dict, Any from pyatlan.model.assets import Asset from pyatlan.model.fields.atlan_fields import CustomMetadataField @@ -8,6 +9,20 @@ class SearchUtils: + + CUSTOM_METADATAFIELD_OPERATOR_MAP = { + "eq": lambda custom_metadata_field_class, value, ci: custom_metadata_field_class.eq(value, case_insensitive=ci), + "startswith": lambda custom_metadata_field_class, value, ci: custom_metadata_field_class.startswith(value, case_insensitive=ci), + "lt": lambda custom_metadata_field_class, value: custom_metadata_field_class.lt(value), + "lte": lambda custom_metadata_field_class, value: custom_metadata_field_class.lte(value), + "gt": lambda custom_metadata_field_class, value: custom_metadata_field_class.gt(value), + "gte": lambda custom_metadata_field_class, value: custom_metadata_field_class.gte(value), + "match": lambda custom_metadata_field_class, value: custom_metadata_field_class.match(value), + "has_any_value": lambda attr: attr.has_any_value(), + } + + CUSTOM_METADATAFIELD_NO_CASE_INSENSITIVE_OPERATORS = {"lt", "lte", "gt", "gte", "match"} + @staticmethod def process_results(results: Any) -> Dict[str, Any]: """ @@ -182,15 +197,12 @@ def _process_custom_metadata_condition( Args: search: The FluentSearch object - condition: Dictionary containing display_name, property_name, property_value, and optional operator + condition: Dictionary containing display_name (display name of the business metadata), property_filters (list of propert or attribute filters) search_method_name: The search method to use ('where', 'where_not', 'where_some') Returns: FluentSearch: The updated search object """ - if not isinstance(condition, dict): - logger.warning("Custom metadata condition must be a dictionary") - return search # Validate required fields required_fields = ["display_name", "property_filters"] @@ -200,45 +212,65 @@ def _process_custom_metadata_condition( ) return search + # Get the search method search_method = getattr(search, search_method_name) - # Get operator, default to "eq" - for property_filter in condition["property_filters"]: - operator = property_filter.get("operator", "eq") - property_name = property_filter.get("property_name") - property_value = property_filter.get("property_value") + try: - try: - # Create the custom metadata field + # Initializes the AtlanClient class from pyatlan.client.atlan by executing the get_atlan_client function from client.py + # This registers the client in the thread-local storage (TLS) + client = get_atlan_client() + + # Process each property filter + for property_filter in condition["property_filters"]: + operator = property_filter.get("operator", "eq") + property_name = property_filter.get("property_name") + property_value = property_filter.get("property_value") + case_insensitive = property_filter.get("case_insensitive", False) + + # Create the custom metadata field for this specific property custom_metadata_field = CustomMetadataField( set_name=condition["display_name"], attribute_name=property_name ) - # Apply the appropriate operator - if property_value == "any": - # For "any" value, use has_any_value() method - query_condition = custom_metadata_field.has_any_value() - else: - # Get the operator method dynamically - if hasattr(custom_metadata_field, operator): - operator_method = getattr(custom_metadata_field, operator) - query_condition = operator_method(property_value) + # Custom handling for between and within operators + if operator == "between": + if isinstance(property_value, (list, tuple)) and len(property_value) == 2: + query_condition = custom_metadata_field.between(property_value[0], property_value[1]) + else: + raise ValueError( + f"Invalid value format for 'between' operator: {property_value}, expected [start, end]" + ) + elif operator == "within": + if isinstance(property_value, list): + query_condition = custom_metadata_field.within(property_value) else: - # Fallback to eq if operator not found - logger.warning( - f"Operator '{operator}' not found, falling back to 'eq'" + raise ValueError( + f"Invalid value format for 'within' operator: {property_value}, expected list" ) - query_condition = custom_metadata_field.eq(property_value) + elif operator in SearchUtils.CUSTOM_METADATAFIELD_OPERATOR_MAP: + # Get the operator method dynamically based on the operator from the property filter + # Supports case insensitive matching for eq and startswith operators + operator_method = SearchUtils.CUSTOM_METADATAFIELD_OPERATOR_MAP[operator] + + if operator not in SearchUtils.CUSTOM_METADATAFIELD_NO_CASE_INSENSITIVE_OPERATORS: + query_condition = operator_method(custom_metadata_field, property_value, case_insensitive) + else: + query_condition = operator_method(custom_metadata_field, property_value) + else: + # Fallback to eq if operator not found + logger.warning(f"Operator '{operator}' not found, falling back to 'eq' operator for custom metadata field") + query_condition = custom_metadata_field.eq(property_value, case_insensitive) + - # Apply the condition to the search + # Apply the condition to the search object search = search_method(query_condition) - logger.info(search) logger.debug( f"Applied custom metadata condition: {condition['display_name']}.{condition['property_name']} {operator} {condition['property_value']}" ) - except Exception as e: - logger.error(f"Error processing custom metadata condition: {e}") - logger.exception("Exception details:") + except Exception as e: + logger.error(f"Error processing custom metadata condition: {e}") + logger.exception("Exception details:") return search From e2f26544d33b2947127811fb82d0b82acd2c4261 Mon Sep 17 00:00:00 2001 From: Satabrata Paul Date: Tue, 9 Sep 2025 02:51:18 +0530 Subject: [PATCH 19/28] fix: return type of get_custom_metadata_context_tool --- modelcontextprotocol/server.py | 4 +++- modelcontextprotocol/tools/custom_metadata_context.py | 2 +- 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/modelcontextprotocol/server.py b/modelcontextprotocol/server.py index cd83e48..0795e4e 100644 --- a/modelcontextprotocol/server.py +++ b/modelcontextprotocol/server.py @@ -716,7 +716,7 @@ def create_glossary_categories(categories) -> List[Dict[str, Any]]: @mcp.tool() -def get_custom_metadata_context_tool() -> Dict[str, Any]: +def get_custom_metadata_context_tool() -> List[Dict[str, Any]]: """ Fetch the custom metadata context for all business metadata definitions in the Atlan instance. @@ -728,6 +728,8 @@ def get_custom_metadata_context_tool() -> Dict[str, Any]: Eventually, this tool helps to prepare the payload for search_assets tool, when users want to search for assets with filters on custom metadata. + This tool can only be called once in a chat conversation. + Returns: List[Dict[str, Any]]: List of business metadata definitions, each containing: - prompt: Formatted string prompt for the business metadata definition diff --git a/modelcontextprotocol/tools/custom_metadata_context.py b/modelcontextprotocol/tools/custom_metadata_context.py index b0b1d7c..2c393a2 100644 --- a/modelcontextprotocol/tools/custom_metadata_context.py +++ b/modelcontextprotocol/tools/custom_metadata_context.py @@ -87,7 +87,7 @@ def process_business_metadata( return {"prompt": prompt, "metadata": metadata, "id": guid} -def get_custom_metadata_context() -> Dict[str, Any]: +def get_custom_metadata_context() -> List[Dict[str, Any]]: display_name: str = "Business Metadata" business_metadata_results: List[Dict[str, Any]] = [] From a08271cacaba8820c2825bfff61492517882e3c0 Mon Sep 17 00:00:00 2001 From: Satabrata Paul Date: Tue, 9 Sep 2025 02:53:22 +0530 Subject: [PATCH 20/28] fix: use active client loaded with env for custom_metadata_field search --- modelcontextprotocol/utils/search.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/modelcontextprotocol/utils/search.py b/modelcontextprotocol/utils/search.py index dc9ab48..b30d028 100644 --- a/modelcontextprotocol/utils/search.py +++ b/modelcontextprotocol/utils/search.py @@ -230,7 +230,7 @@ def _process_custom_metadata_condition( # Create the custom metadata field for this specific property custom_metadata_field = CustomMetadataField( - set_name=condition["display_name"], attribute_name=property_name + client=client, set_name=condition["display_name"], attribute_name=property_name ) # Custom handling for between and within operators From 069a2ccc6096a20541c446f7ba858ded8a9582a6 Mon Sep 17 00:00:00 2001 From: Satabrata Paul Date: Tue, 9 Sep 2025 17:42:41 +0530 Subject: [PATCH 21/28] fix: custom_metadata_conditions in search_assets_tool --- modelcontextprotocol/server.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/modelcontextprotocol/server.py b/modelcontextprotocol/server.py index 0795e4e..61b411e 100644 --- a/modelcontextprotocol/server.py +++ b/modelcontextprotocol/server.py @@ -68,7 +68,7 @@ def search_assets_tool( conditions (Dict[str, Any], optional): Dictionary of attribute conditions to match. Format: {"attribute_name": value} or {"attribute_name": {"operator": operator, "value": value}} custom_metadata_conditions (List[Dict[str, Any]], optional): List of custom metadata conditions to match. - Format: [{"custom_metadata": value}] or [{"custom_metadata": {"operator": operator, "value": value}}] + Format: [{"custom_metadata_filter": {"display_name": "Business Metadata Name", "property_filters": [{"property_name": "property", "property_value": "value", "operator": "eq"}]}}] negative_conditions (Dict[str, Any], optional): Dictionary of attribute conditions to exclude. Format: {"attribute_name": value} or {"attribute_name": {"operator": operator, "value": value}} some_conditions (Dict[str, Any], optional): Conditions for where_some() queries that require min_somes of them to match. From abe14304317eb80311020c54f48ef8145fea1f13 Mon Sep 17 00:00:00 2001 From: Satabrata Paul Date: Thu, 11 Sep 2025 00:57:04 +0530 Subject: [PATCH 22/28] fix: back earlier versions of pre-commit hooks --- .pre-commit-config.yaml | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 95c41a4..507b8c9 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -1,6 +1,6 @@ repos: - repo: https://github.com/pre-commit/pre-commit-hooks - rev: v6.0.0 + rev: v4.5.0 hooks: - id: trailing-whitespace - id: end-of-file-fixer @@ -12,14 +12,14 @@ repos: - id: detect-private-key - repo: https://github.com/alessandrojcm/commitlint-pre-commit-hook - rev: v9.22.0 + rev: v9.11.0 hooks: - id: commitlint stages: [commit-msg] additional_dependencies: ['@commitlint/config-conventional'] - repo: https://github.com/astral-sh/ruff-pre-commit - rev: v0.12.10 + rev: v0.3.0 hooks: - id: ruff args: [--fix, --exit-non-zero-on-fix] From 3fa116d0a2a587d66591eb1f9a5b738e0dad07e0 Mon Sep 17 00:00:00 2001 From: Satabrata Paul Date: Thu, 11 Sep 2025 01:16:11 +0530 Subject: [PATCH 23/28] remove: main guard clause from custom_metadata_context.py file --- modelcontextprotocol/tools/custom_metadata_context.py | 4 ---- 1 file changed, 4 deletions(-) diff --git a/modelcontextprotocol/tools/custom_metadata_context.py b/modelcontextprotocol/tools/custom_metadata_context.py index 2c393a2..f144081 100644 --- a/modelcontextprotocol/tools/custom_metadata_context.py +++ b/modelcontextprotocol/tools/custom_metadata_context.py @@ -170,7 +170,3 @@ def get_custom_metadata_context() -> List[Dict[str, Any]]: f"Fetched {len(business_metadata_results)} {display_name} definitions with enum enrichment." ) return business_metadata_results - - -if __name__ == "__main__": - get_custom_metadata_context() From 9960d503619999c818e2db55123df2f81d4892da Mon Sep 17 00:00:00 2001 From: Satabrata Paul Date: Thu, 11 Sep 2025 03:39:10 +0530 Subject: [PATCH 24/28] fix: return type of get_custom_metadata_context_tool --- modelcontextprotocol/server.py | 2 +- modelcontextprotocol/tools/custom_metadata_context.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/modelcontextprotocol/server.py b/modelcontextprotocol/server.py index 61b411e..8475d93 100644 --- a/modelcontextprotocol/server.py +++ b/modelcontextprotocol/server.py @@ -716,7 +716,7 @@ def create_glossary_categories(categories) -> List[Dict[str, Any]]: @mcp.tool() -def get_custom_metadata_context_tool() -> List[Dict[str, Any]]: +def get_custom_metadata_context_tool() -> Dict[str, Any]: """ Fetch the custom metadata context for all business metadata definitions in the Atlan instance. diff --git a/modelcontextprotocol/tools/custom_metadata_context.py b/modelcontextprotocol/tools/custom_metadata_context.py index f144081..b97cd46 100644 --- a/modelcontextprotocol/tools/custom_metadata_context.py +++ b/modelcontextprotocol/tools/custom_metadata_context.py @@ -87,7 +87,7 @@ def process_business_metadata( return {"prompt": prompt, "metadata": metadata, "id": guid} -def get_custom_metadata_context() -> List[Dict[str, Any]]: +def get_custom_metadata_context() -> Dict[str, Any]: display_name: str = "Business Metadata" business_metadata_results: List[Dict[str, Any]] = [] From b049283bce280653aab8bec79d9d2f18f8fc024e Mon Sep 17 00:00:00 2001 From: Satabrata Paul Date: Thu, 11 Sep 2025 03:40:35 +0530 Subject: [PATCH 25/28] update: base prompt for the entire result set, not every bm --- modelcontextprotocol/tools/custom_metadata_context.py | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/modelcontextprotocol/tools/custom_metadata_context.py b/modelcontextprotocol/tools/custom_metadata_context.py index b97cd46..16eff3e 100644 --- a/modelcontextprotocol/tools/custom_metadata_context.py +++ b/modelcontextprotocol/tools/custom_metadata_context.py @@ -80,9 +80,7 @@ def process_business_metadata( "attributes": parsed_attributes_for_metadata, } - prompt = f"""{bm_def_display_name}|{description_for_prompt}|{attributes_str_for_prompt} - -This is a business metadata used in the data catalog to add more information to an asset""" + prompt = f"""{bm_def_display_name}|{description_for_prompt}|{attributes_str_for_prompt}""" return {"prompt": prompt, "metadata": metadata, "id": guid} @@ -169,4 +167,7 @@ def get_custom_metadata_context() -> Dict[str, Any]: logger.info( f"Fetched {len(business_metadata_results)} {display_name} definitions with enum enrichment." ) - return business_metadata_results + return { + "context": "This is the list of business metadata definitions used in the data catalog to add more information to an asset", + "business_metadata_results": business_metadata_results, + } From 2860c2fb92d5e3ac39c68d32f7f735cc991b32b4 Mon Sep 17 00:00:00 2001 From: Satabrata Paul Date: Thu, 11 Sep 2025 03:42:33 +0530 Subject: [PATCH 26/28] update: remove extra examples from get_custom_metadata_context_tool and merge generic ones into search_assets_tool --- modelcontextprotocol/server.py | 240 ++++++++++----------------------- 1 file changed, 68 insertions(+), 172 deletions(-) diff --git a/modelcontextprotocol/server.py b/modelcontextprotocol/server.py index 8475d93..b483f5d 100644 --- a/modelcontextprotocol/server.py +++ b/modelcontextprotocol/server.py @@ -114,7 +114,7 @@ def search_assets_tool( include_attributes=["owner_users", "owner_groups"] ) - # Search for assets with custom metadata + # Search for assets with custom metadata having a specific property filter (eq) assets = search_assets( custom_metadata_conditions=[{ "custom_metadata_filter": { @@ -129,6 +129,71 @@ def search_assets_tool( include_attributes=["name", "qualified_name", "type_name", "description", "certificate_status"] ) + # Search for assets with custom metadata having a specific property filter (gt) + assets = search_assets( + custom_metadata_conditions=[{ + "custom_metadata_filter": { + "display_name": "Data Quality", + "property_filters": [{ + "property_name": "quality_score", + "property_value": 80, + "operator": "gt" + }] + } + }], + include_attributes=["name", "qualified_name", "type_name", "description", "certificate_status"] + ) + + # Search for assets with custom metadata having multiple property filters (eq and gte) + assets = search_assets( + custom_metadata_conditions=[{ + "custom_metadata_filter": { + "display_name": "Data Governance", + "property_filters": [ + { + "property_name": "data_owner", + "property_value": "John Smith", + "operator": "eq" + }, + { + "property_name": "retention_period", + "property_value": 365, + "operator": "gte" + } + ] + } + }], + include_attributes=["name", "qualified_name", "type_name", "description", "certificate_status"] + ) + + # Search for assets with custom metadata having multiple business metadata filters (eq and gte) + assets = search_assets( + custom_metadata_conditions=[ + { + "custom_metadata_filter": { + "display_name": "Data Classification", + "property_filters": [{ + "property_name": "sensitivity_level", + "property_value": "sensitive", + "operator": "eq" + }] + } + }, + { + "custom_metadata_filter": { + "display_name": "Data Quality", + "property_filters": [{ + "property_name": "quality_score", + "property_value": 80, + "operator": "gte" + }] + } + } + ], + include_attributes=["name", "qualified_name", "type_name", "description", "certificate_status"] + ) + + # Search for columns with specific certificate status columns = search_assets( asset_type="Column", @@ -782,22 +847,7 @@ def get_custom_metadata_context_tool() -> Dict[str, Any]: include_attributes=["name", "qualified_name", "type_name", "description", "certificate_status"] ) - # Example 3: Starts with operator (startswith) - prefix matching - assets = search_assets_tool( - custom_metadata_conditions=[{ - "custom_metadata_filter": { - "display_name": "Business Ownership", - "property_filters": [{ - "property_name": "business_owner", - "property_value": "John", - "operator": "startswith" - }] - } - }], - include_attributes=["name", "qualified_name", "type_name", "description", "certificate_status"] - ) - - # Example 4: Starts with operator with case insensitive matching + # Example 3: Starts with operator with case insensitive matching assets = search_assets_tool( custom_metadata_conditions=[{ "custom_metadata_filter": { @@ -813,82 +863,7 @@ def get_custom_metadata_context_tool() -> Dict[str, Any]: include_attributes=["name", "qualified_name", "type_name", "description", "certificate_status"] ) - # Example 5: Less than operator (lt) - numeric/date comparison - assets = search_assets_tool( - custom_metadata_conditions=[{ - "custom_metadata_filter": { - "display_name": "Data Quality", - "property_filters": [{ - "property_name": "quality_score", - "property_value": 50, - "operator": "lt" - }] - } - }], - include_attributes=["name", "qualified_name", "type_name", "description", "certificate_status"] - ) - - # Example 6: Less than or equal operator (lte) - assets = search_assets_tool( - custom_metadata_conditions=[{ - "custom_metadata_filter": { - "display_name": "Data Quality", - "property_filters": [{ - "property_name": "quality_score", - "property_value": 75, - "operator": "lte" - }] - } - }], - include_attributes=["name", "qualified_name", "type_name", "description", "certificate_status"] - ) - - # Example 7: Greater than operator (gt) - assets = search_assets_tool( - custom_metadata_conditions=[{ - "custom_metadata_filter": { - "display_name": "Data Quality", - "property_filters": [{ - "property_name": "quality_score", - "property_value": 80, - "operator": "gt" - }] - } - }], - include_attributes=["name", "qualified_name", "type_name", "description", "certificate_status"] - ) - - # Example 8: Greater than or equal operator (gte) - assets = search_assets_tool( - custom_metadata_conditions=[{ - "custom_metadata_filter": { - "display_name": "Data Quality", - "property_filters": [{ - "property_name": "quality_score", - "property_value": 90, - "operator": "gte" - }] - } - }], - include_attributes=["name", "qualified_name", "type_name", "description", "certificate_status"] - ) - - # Example 9: Match operator (match) - full-text search - assets = search_assets_tool( - custom_metadata_conditions=[{ - "custom_metadata_filter": { - "display_name": "Business Context", - "property_filters": [{ - "property_name": "description", - "property_value": "customer data analytics", - "operator": "match" - }] - } - }], - include_attributes=["name", "qualified_name", "type_name", "description", "certificate_status"] - ) - - # Example 10: Has any value operator (has_any_value) - check if field is populated + # Example 4: Has any value operator (has_any_value) - check if field is populated assets = search_assets_tool( custom_metadata_conditions=[{ "custom_metadata_filter": { @@ -901,85 +876,6 @@ def get_custom_metadata_context_tool() -> Dict[str, Any]: }], include_attributes=["name", "qualified_name", "type_name", "description", "certificate_status"] ) - - # Example 11: Between operator (between) - range queries - assets = search_assets_tool( - custom_metadata_conditions=[{ - "custom_metadata_filter": { - "display_name": "Data Quality", - "property_filters": [{ - "property_name": "quality_score", - "property_value": [50, 90], # [start, end] range - "operator": "between" - }] - } - }], - include_attributes=["name", "qualified_name", "type_name", "description", "certificate_status"] - ) - - # Example 12: Within operator (within) - multiple value matching - assets = search_assets_tool( - custom_metadata_conditions=[{ - "custom_metadata_filter": { - "display_name": "Data Classification", - "property_filters": [{ - "property_name": "sensitivity_level", - "property_value": ["sensitive", "confidential", "restricted"], # list of values - "operator": "within" - }] - } - }], - include_attributes=["name", "qualified_name", "type_name", "description", "certificate_status"] - ) - - # Example 13: Multiple property filters in same business metadata - assets = search_assets_tool( - custom_metadata_conditions=[{ - "custom_metadata_filter": { - "display_name": "Data Governance", - "property_filters": [ - { - "property_name": "data_owner", - "property_value": "John Smith", - "operator": "eq" - }, - { - "property_name": "retention_period", - "property_value": 365, - "operator": "gte" - } - ] - } - }], - include_attributes=["name", "qualified_name", "type_name", "description", "certificate_status"] - ) - - # Example 14: Multiple business metadata filters - assets = search_assets_tool( - custom_metadata_conditions=[ - { - "custom_metadata_filter": { - "display_name": "Data Classification", - "property_filters": [{ - "property_name": "sensitivity_level", - "property_value": "sensitive", - "operator": "eq" - }] - } - }, - { - "custom_metadata_filter": { - "display_name": "Data Quality", - "property_filters": [{ - "property_name": "quality_score", - "property_value": 80, - "operator": "gte" - }] - } - } - ], - include_attributes=["name", "qualified_name", "type_name", "description", "certificate_status"] - ) """ try: return get_custom_metadata_context() From 37142c512c4644b81e176adfb1f608e48c15765d Mon Sep 17 00:00:00 2001 From: Satabrata Paul Date: Fri, 12 Sep 2025 19:51:30 +0530 Subject: [PATCH 27/28] fix: repitive description --- modelcontextprotocol/server.py | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/modelcontextprotocol/server.py b/modelcontextprotocol/server.py index b483f5d..44816e0 100644 --- a/modelcontextprotocol/server.py +++ b/modelcontextprotocol/server.py @@ -786,9 +786,7 @@ def get_custom_metadata_context_tool() -> Dict[str, Any]: Fetch the custom metadata context for all business metadata definitions in the Atlan instance. This tool is used to get the custom metadata context for all business metadata definitions - present in the Atlan instance. Whenever a user gives a query to search for assets with - filters on custom metadata, this tool will be used to get the custom metadata context - for the business metadata definitions present in the Atlan instance. + present in the Atlan instance. Eventually, this tool helps to prepare the payload for search_assets tool, when users want to search for assets with filters on custom metadata. From 582a1250da886b8ff70e196cfea68b4c3592afa7 Mon Sep 17 00:00:00 2001 From: Satabrata Paul Date: Mon, 15 Sep 2025 09:34:13 +0530 Subject: [PATCH 28/28] fix: variable name --- modelcontextprotocol/tools/search.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/modelcontextprotocol/tools/search.py b/modelcontextprotocol/tools/search.py index dd84cec..b679b6c 100644 --- a/modelcontextprotocol/tools/search.py +++ b/modelcontextprotocol/tools/search.py @@ -194,11 +194,11 @@ def search_assets( logger.debug( f"Applying custom metadata conditions: {custom_metadata_conditions}" ) - for custom_metadata_filter_onject in custom_metadata_conditions: - if isinstance(custom_metadata_filter_onject, dict): - _, condition = next(iter(custom_metadata_filter_onject.items())) + for custom_metadata_filter_object in custom_metadata_conditions: + if isinstance(custom_metadata_filter_object, dict): + _, condition = next(iter(custom_metadata_filter_object.items())) else: - condition = custom_metadata_filter_onject + condition = custom_metadata_filter_object search = SearchUtils._process_custom_metadata_condition( search, condition, "where" )