Skip to content
Open
Show file tree
Hide file tree
Changes from 12 commits
Commits
Show all changes
29 commits
Select commit Hold shift + click to select a range
22c024b
add: standard library imports
SatabrataPaul-GitAc Aug 20, 2025
18c4b04
add: static methods for atlan api requests
SatabrataPaul-GitAc Aug 20, 2025
ff5f808
export: get_custom_metadata_context tool
SatabrataPaul-GitAc Aug 20, 2025
56d4e6f
add: tool to fetch custom metadata context
SatabrataPaul-GitAc Aug 20, 2025
99b8d55
add: custom_metadata_context tool registration
SatabrataPaul-GitAc Aug 20, 2025
8652d58
remove: get_custom_metadata_context tool
SatabrataPaul-GitAc Aug 26, 2025
bf9ccb9
add: cache manager for perisisting custom metadata context between mu…
SatabrataPaul-GitAc Aug 26, 2025
8f0eaf4
add: utility function to fetch all custom metadata context from a tenant
SatabrataPaul-GitAc Aug 26, 2025
7eb389f
add: detect_custom_metadata_trigger tool
SatabrataPaul-GitAc Aug 26, 2025
d73074a
add: registration of detect_custom_metadata_from_query mcp tool
SatabrataPaul-GitAc Aug 26, 2025
8bfe222
add: pre-commit fixes and update pre-commit versions
SatabrataPaul-GitAc Aug 26, 2025
fcd70a5
add: support for custom_metadata filterds in search_assets tool
SatabrataPaul-GitAc Aug 26, 2025
150d08b
remove: custom metadata detector from query tool
SatabrataPaul-GitAc Sep 2, 2025
042babe
remove: detect_custom_metadata_from_query tool registration
SatabrataPaul-GitAc Sep 2, 2025
0a52351
add: custom_metadata_context tool
SatabrataPaul-GitAc Sep 2, 2025
7512a6b
update: custom_metadata_context tool import
SatabrataPaul-GitAc Sep 2, 2025
93cc2f6
add: get_custom_metadata_context_tool registration in server.py
SatabrataPaul-GitAc Sep 8, 2025
017d738
update: procesisng logic for custom metadata filters
SatabrataPaul-GitAc Sep 8, 2025
dafac19
Merge branch 'main' into MCP-8
SatabrataPaul-GitAc Sep 8, 2025
e2f2654
fix: return type of get_custom_metadata_context_tool
SatabrataPaul-GitAc Sep 8, 2025
a08271c
fix: use active client loaded with env for custom_metadata_field search
SatabrataPaul-GitAc Sep 8, 2025
069a2cc
fix: custom_metadata_conditions in search_assets_tool
SatabrataPaul-GitAc Sep 9, 2025
abe1430
fix: back earlier versions of pre-commit hooks
SatabrataPaul-GitAc Sep 10, 2025
3fa116d
remove: main guard clause from custom_metadata_context.py file
SatabrataPaul-GitAc Sep 10, 2025
9960d50
fix: return type of get_custom_metadata_context_tool
SatabrataPaul-GitAc Sep 10, 2025
b049283
update: base prompt for the entire result set, not every bm
SatabrataPaul-GitAc Sep 10, 2025
2860c2f
update: remove extra examples from get_custom_metadata_context_tool a…
SatabrataPaul-GitAc Sep 10, 2025
37142c5
fix: repitive description
SatabrataPaul-GitAc Sep 12, 2025
582a125
fix: variable name
SatabrataPaul-GitAc Sep 15, 2025
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 3 additions & 3 deletions .pre-commit-config.yaml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
repos:
- repo: https://github.com/pre-commit/pre-commit-hooks
rev: v4.5.0
rev: v6.0.0
hooks:
- id: trailing-whitespace
- id: end-of-file-fixer
Expand All @@ -12,14 +12,14 @@ repos:
- id: detect-private-key

- repo: https://github.com/alessandrojcm/commitlint-pre-commit-hook
rev: v9.11.0
rev: v9.22.0
hooks:
- id: commitlint
stages: [commit-msg]
additional_dependencies: ['@commitlint/config-conventional']

- repo: https://github.com/astral-sh/ruff-pre-commit
rev: v0.3.0
rev: v0.12.10
hooks:
- id: ruff
args: [--fix, --exit-non-zero-on-fix]
Expand Down
73 changes: 73 additions & 0 deletions modelcontextprotocol/server.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@
get_assets_by_dsl,
traverse_lineage,
update_assets,
detect_custom_metadata_trigger,
create_glossary_category_assets,
create_glossary_assets,
create_glossary_term_assets,
Expand All @@ -26,6 +27,7 @@
@mcp.tool()
def search_assets_tool(
conditions=None,
custom_metadata_conditions=None,
negative_conditions=None,
some_conditions=None,
min_somes=1,
Expand All @@ -49,6 +51,8 @@ def search_assets_tool(
Args:
conditions (Dict[str, Any], optional): Dictionary of attribute conditions to match.
Format: {"attribute_name": value} or {"attribute_name": {"operator": operator, "value": value}}
custom_metadata_conditions (List[Dict[str, Any]], optional): List of custom metadata conditions to match.
Format: [{"custom_metadata": value}] or [{"custom_metadata": {"operator": operator, "value": value}}]
negative_conditions (Dict[str, Any], optional): Dictionary of attribute conditions to exclude.
Format: {"attribute_name": value} or {"attribute_name": {"operator": operator, "value": value}}
some_conditions (Dict[str, Any], optional): Conditions for where_some() queries that require min_somes of them to match.
Expand Down Expand Up @@ -94,6 +98,15 @@ def search_assets_tool(
include_attributes=["owner_users", "owner_groups"]
)

# Search for assets with custom metadata
asset_list_1 = search_assets(
custom_metadata_conditions=[{"custom_metadata_filter": {"display_name": "test-mcp", "property_filters": [{"property_name": "mcp_allow_status", "property_value": "yes"}]}}]
)

asset_list_2 = search_assets(
custom_metadata_conditions=[{"custom_metadata_filter": {"display_name": "test-mcp", "property_filters": [{"property_name": "mcp_allow_status", "property_value": "yes", "operator": "eq"}]}}]
)

# Search for columns with specific certificate status
columns = search_assets(
asset_type="Column",
Expand Down Expand Up @@ -218,6 +231,7 @@ def search_assets_tool(
try:
# Parse JSON string parameters if needed
conditions = parse_json_parameter(conditions)
custom_metadata_conditions = parse_json_parameter(custom_metadata_conditions)
negative_conditions = parse_json_parameter(negative_conditions)
some_conditions = parse_json_parameter(some_conditions)
date_range = parse_json_parameter(date_range)
Expand All @@ -228,6 +242,7 @@ def search_assets_tool(

return search_assets(
conditions,
custom_metadata_conditions,
negative_conditions,
some_conditions,
min_somes,
Expand Down Expand Up @@ -678,6 +693,64 @@ def create_glossary_categories(categories) -> List[Dict[str, Any]]:
return create_glossary_category_assets(categories)


@mcp.tool()
def detect_custom_metadata_from_query(query_text: str) -> Dict[str, Any]:
"""
Detect custom metadata triggers from natural language queries.

This tool analyzes natural language text to identify when users are referencing
custom metadata (business metadata) and automatically provides context about
available custom metadata definitions. Use this tool when you receive natural
language queries that might involve custom metadata concepts.

Args:
query_text (str): Natural language query text to analyze for custom metadata references

Returns:
Dict[str, Any]: Dictionary containing:
- detected: Boolean indicating if custom metadata was detected
- context: Custom metadata context if detected (list of metadata definitions)
- detection_reasons: List of reasons why custom metadata was detected
- suggested_attributes: List of suggested custom metadata attributes

Detection Triggers:
The tool detects custom metadata usage when the query contains:
- Business metadata keywords (e.g., "business metadata", "data classification")
- Data governance terms (e.g., "PII", "GDPR", "compliance", "data quality")
- Attribute patterns (e.g., "sensitivity level", "business owner", "data steward")
- Quality and classification terms (e.g., "quality score", "classification level")

Examples:
# Query mentioning data classification
result = detect_custom_metadata_from_query("Find all tables with sensitive data classification")

# Query about data quality
result = detect_custom_metadata_from_query("Show me assets with poor data quality scores")

# Query about business ownership
result = detect_custom_metadata_from_query("Which datasets have John as the business owner?")

# Query about compliance
result = detect_custom_metadata_from_query("Find all PII data that needs GDPR compliance review")

Use Cases:
- Analyze user queries before executing searches to provide custom metadata context
- Understand when users are asking about business metadata attributes
- Provide enriched context about available custom metadata definitions
- Help users discover relevant custom metadata attributes for their queries
"""
try:
return detect_custom_metadata_trigger(query_text)
except Exception as e:
return {
"detected": False,
"context": None,
"detection_reasons": [],
"suggested_attributes": [],
"error": f"Failed to detect custom metadata: {str(e)}",
}


def main():
mcp.run()

Expand Down
70 changes: 70 additions & 0 deletions modelcontextprotocol/settings.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,11 @@
"""Configuration settings for the application."""

import requests
from typing import Any, Dict, Optional
from urllib.parse import urlencode

from pydantic_settings import BaseSettings

from version import __version__ as MCP_VERSION


Expand All @@ -12,6 +17,7 @@ class Settings(BaseSettings):
ATLAN_AGENT_ID: str = "NA"
ATLAN_AGENT: str = "atlan-mcp"
ATLAN_MCP_USER_AGENT: str = f"Atlan MCP Server {MCP_VERSION}"
ATLAN_TYPEDEF_API_ENDPOINT: Optional[str] = "/api/meta/types/typedefs/"

@property
def headers(self) -> dict:
Expand All @@ -23,6 +29,70 @@ def headers(self) -> dict:
"X-Atlan-Client-Origin": self.ATLAN_AGENT,
}

@staticmethod
def build_api_url(path: str, query_params: Optional[Dict[str, Any]] = None) -> str:
current_settings = Settings()
if not current_settings:
raise ValueError(
"Atlan API URL (ATLAN_API_URL) is not configured in settings."
)

base_url = current_settings.ATLAN_BASE_URL.rstrip("/")

if (
path
and not path.startswith("/")
and not base_url.endswith("/")
and not path.startswith(("http://", "https://"))
):
full_path = f"{base_url}/{path.lstrip('/')}"
elif path.startswith(("http://", "https://")):
full_path = path
else:
full_path = f"{base_url}{path}"

if query_params:
active_query_params = {
k: v for k, v in query_params.items() if v is not None
}
if active_query_params:
query_string = urlencode(active_query_params)
return f"{full_path}?{query_string}"
return full_path

@staticmethod
def get_atlan_typedef_api_endpoint(param: str) -> str:
current_settings = Settings()
if not current_settings.ATLAN_TYPEDEF_API_ENDPOINT:
raise ValueError(
"Default API endpoint for typedefs (api_endpoint) is not configured in settings."
)

return Settings.build_api_url(
path=current_settings.ATLAN_TYPEDEF_API_ENDPOINT,
query_params={"type": param},
)

@staticmethod
def make_request(url: str) -> Optional[Dict[str, Any]]:
current_settings = Settings()
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

why is this initialization required?

Copy link
Contributor Author

@SatabrataPaul-GitAc SatabrataPaul-GitAc Sep 10, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The following variables are defined as class variables :

ATLAN_BASE_URL: str
ATLAN_API_KEY: str
ATLAN_AGENT_ID: str = "NA"
ATLAN_AGENT: str = "atlan-mcp"
ATLAN_MCP_USER_AGENT: str = f"Atlan MCP Server {MCP_VERSION}"
ATLAN_TYPEDEF_API_ENDPOINT: Optional[str] = "/api/meta/types/typedefs/"

The values for ATLAN_BASE_URL and ATLAN_API_KEY are loaded from env variables
Since Settings inherits from BaseSettings (Pydantic), the environment variables (ATLAN_BASE_URL, ATLAN_API_KEY) are only loaded when we create an instance, because that's when Pydantic reads the environment/.env file.

In the following @staticmethods :

  • build_api_url () -> ATLAN_BASE_URL is required
  • make_request () -> ATLAN_API_KEY is required

Hence, the initialization ( instance creation ) is necessary

headers = {
"Authorization": f"Bearer {current_settings.ATLAN_API_KEY}",
"x-atlan-client-origin": "atlan-search-app",
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Why add these and not just leverage the CustomMetadataCache pyatlan class to fetch them?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The API call mechanism is added to get additional context of custom metadata attributes which are of Enum type ( i.e.: Options ) -> which have a fixed set of values

The CustomMetadataCache does have method to get information of on all custom metadata definitions, including attribute definitions, but no context of enum defs can be retrieved

Hence, the API call mechanism addresses both custom metadata definitions ( with attribute defs ) and provide additional context of attribute defs which are of Enum Type

@firecast

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

}
try:
response = requests.get(
url,
headers=headers,
)
if response.status_code != 200:
raise Exception(
f"Failed to make request to {url}: {response.status_code} {response.text}"
)
return response.json()
except Exception as e:
raise Exception(f"Failed to make request to {url}: {e}")

class Config:
env_file = ".env"
env_file_encoding = "utf-8"
Expand Down
2 changes: 2 additions & 0 deletions modelcontextprotocol/tools/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
from .dsl import get_assets_by_dsl
from .lineage import traverse_lineage
from .assets import update_assets
from .custom_metadata_detector import detect_custom_metadata_trigger
from .glossary import (
create_glossary_category_assets,
create_glossary_assets,
Expand All @@ -21,6 +22,7 @@
"get_assets_by_dsl",
"traverse_lineage",
"update_assets",
"detect_custom_metadata_trigger",
"create_glossary_category_assets",
"create_glossary_assets",
"create_glossary_term_assets",
Expand Down
64 changes: 64 additions & 0 deletions modelcontextprotocol/tools/custom_metadata_detector.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,64 @@
import logging
from typing import Any, Dict
from utils.custom_metadata_detector import detect_custom_metadata_with_singleton

logger = logging.getLogger(__name__)


def detect_custom_metadata_trigger(query_text: str) -> Dict[str, Any]:
"""
Detect custom metadata triggers from natural language queries.
This function analyzes natural language text to identify when users are referencing
custom metadata (business metadata) and automatically provides context about
available custom metadata definitions.
Args:
query_text (str): Natural language query text to analyze for custom metadata references
Returns:
Dict[str, Any]: Dictionary containing:
- detected: Boolean indicating if custom metadata was detected
- context: Custom metadata context if detected (list of metadata definitions)
- detection_reasons: List of reasons why custom metadata was detected
- suggested_attributes: List of suggested custom metadata attributes
Examples:
# Query mentioning data classification
result = detect_custom_metadata_trigger("Find all tables with sensitive data classification")
# Query about data quality
result = detect_custom_metadata_trigger("Show me assets with poor data quality scores")
# Query about business ownership
result = detect_custom_metadata_trigger("Which datasets have John as the business owner?")
# Query about compliance
result = detect_custom_metadata_trigger("Find all PII data that needs GDPR compliance review")
"""
logger.info(f"Detecting custom metadata triggers in query: {query_text[:100]}...")

try:
result = detect_custom_metadata_with_singleton(query_text)

if result["detected"]:
logger.info(
f"Custom metadata detected with reasons: {result['detection_reasons']}"
)
context_count = len(result.get("context", []))
logger.info(
f"Provided {context_count} custom metadata definitions for context enrichment"
)
else:
logger.debug("No custom metadata triggers detected in the query")

return result

except Exception as e:
logger.error(f"Error detecting custom metadata triggers: {str(e)}")
return {
"detected": False,
"context": None,
"detection_reasons": [],
"error": str(e),
}
16 changes: 16 additions & 0 deletions modelcontextprotocol/tools/search.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@

def search_assets(
conditions: Optional[Union[Dict[str, Any], str]] = None,
custom_metadata_conditions: Optional[List[Dict[str, Any]]] = None,
negative_conditions: Optional[Dict[str, Any]] = None,
some_conditions: Optional[Dict[str, Any]] = None,
min_somes: int = 1,
Expand All @@ -40,6 +41,8 @@ def search_assets(
Args:
conditions (Dict[str, Any], optional): Dictionary of attribute conditions to match.
Format: {"attribute_name": value} or {"attribute_name": {"operator": operator, "value": value}}
custom_metadata_conditions (List[Dict[str, Any]], optional): List of custom metadata conditions to match.
Format: [{"custom_metadata": value}] or [{"custom_metadata": {"operator": operator, "value": value}}]
negative_conditions (Dict[str, Any], optional): Dictionary of attribute conditions to exclude.
Format: {"attribute_name": value} or {"attribute_name": {"operator": operator, "value": value}}
some_conditions (Dict[str, Any], optional): Conditions for where_some() queries that require min_somes of them to match.
Expand Down Expand Up @@ -187,6 +190,19 @@ def search_assets(
)
search = search.min_somes(min_somes)

if custom_metadata_conditions:
logger.debug(
f"Applying custom metadata conditions: {custom_metadata_conditions}"
)
for custom_metadata_filter_onject in custom_metadata_conditions:
if isinstance(custom_metadata_filter_onject, dict):
_, condition = next(iter(custom_metadata_filter_onject.items()))
else:
condition = custom_metadata_filter_onject
search = SearchUtils._process_custom_metadata_condition(
search, condition, "where"
)

# Apply date range filters
if date_range:
logger.debug(f"Applying date range filters: {date_range}")
Expand Down
Loading