diff --git a/modelcontextprotocol/README.md b/modelcontextprotocol/README.md index 8869c8d..a653e6b 100644 --- a/modelcontextprotocol/README.md +++ b/modelcontextprotocol/README.md @@ -23,12 +23,14 @@ The Atlan [Model Context Protocol](https://modelcontextprotocol.io/introduction) ## Available Tools -| Tool | Description | -| ------------------------- | ----------------------------------------------------------------- | -| `search_assets` | Search for assets based on conditions | -| `get_assets_by_dsl` | Retrieve assets using a DSL query | -| `traverse_lineage` | Retrieve lineage for an asset | -| `update_assets` | Update asset attributes (user description and certificate status) | +| Tool | Description | +| ------------------------- | --------------------------------------------------------------------------- | +| `search_assets` | Search for assets based on conditions | +| `get_assets_by_dsl` | Retrieve assets using a DSL query | +| `traverse_lineage` | Retrieve lineage for an asset | +| `update_assets` | Update asset attributes (user description and certificate status) | +| `create_unstructured_asset` | Create a File asset (PDF, Excel, etc.) and optionally set custom metadata | +| `read_custom_metadata` | Retrieve one or more custom metadata sets for any asset | ## Running the MCP server - There are 2 different ways to run the Atlan MCP server locally @@ -209,6 +211,62 @@ Want to develop locally? Check out our [Local Build](./docs/LOCAL_BUILD.md) Guid - You can also directly create a [GitHub issue](https://github.com/atlanhq/agent-toolkit/issues) and we will answer it for you ## Troubleshooting +## Working with unstructured assets + +The new `create_unstructured_asset` tool lets your MCP client register documents like PDFs or Excel workbooks as Atlan `File` assets. + +Minimum inputs: + +```json +{ + "name": "Quarterly Financials", + "connection_qualified_name": "default/s3", + "file_type": "excel" +} +``` + +Optional fields: + +- `file_path`: Provide a local path or object storage URI so teammates can locate the document. +- `description`: Human friendly summary. +- `custom_metadata`: Map of custom metadata set names to attribute/value pairs. Example: + +```json +{ + "custom_metadata": { + "Document Details": { + "Owner": "finance@acme.com", + "Quarter": "Q1 FY26" + } + } +} +``` + +The response contains the Atlan GUID and qualified name so you can link follow-up actions (add terms, tag, etc.). + +## Reading custom metadata + +Use the `read_custom_metadata` tool to pull template values for any asset: + +```json +{ + "guid": "12345678-90ab-cdef-1234-567890abcdef", + "custom_metadata_sets": [ + "Document Details", + "Data Sensitivity" + ], + "include_unset": true +} +``` + +Parameters: + +- Supply either `guid` **or** `qualified_name`. +- `asset_type` defaults to `Asset`, but you can pass a concrete type like `"File"` or `"Table"` for stricter validation. +- When `custom_metadata_sets` is omitted, all custom metadata on the asset is returned. +- `include_unset` fills in attributes defined on the template but currently blank so you can see everything that is expected. + +These additions make it easy for MCP clients to create unstructured documents and inspect their governance metadata without leaving your IDE or chat workflow. 1. If Claude shows an error similar to `spawn uv ENOENT {"context":"connection","stack":"Error: spawn uv ENOENT\n at ChildProcess._handle.onexit`, it is most likely [this](https://github.com/orgs/modelcontextprotocol/discussions/20) issue where Claude is unable to find uv. To fix it: - Make sure uv is installed and available in your PATH - Run `which uv` to verify the installation path diff --git a/modelcontextprotocol/server.py b/modelcontextprotocol/server.py index ca5f4d9..7090de8 100644 --- a/modelcontextprotocol/server.py +++ b/modelcontextprotocol/server.py @@ -5,6 +5,8 @@ get_assets_by_dsl, traverse_lineage, update_assets, + create_unstructured_asset, + read_custom_metadata, UpdatableAttribute, CertificateStatus, UpdatableAsset, @@ -390,6 +392,90 @@ def update_assets_tool( return {"updated_count": 0, "errors": [str(e)]} +@mcp.tool() +def create_unstructured_asset_tool( + name, + connection_qualified_name, + file_type, + file_path=None, + description=None, + custom_metadata=None, +): + """ + Create a File asset for an unstructured document such as a PDF or Excel workbook. + + Args: + name (str): Display name for the file asset (for example, "Customer Playbook.pdf"). + connection_qualified_name (str): Qualified name of the connection that should own the asset. + file_type (str): Supported values include "pdf", "excel", "xls", "xlsx", "xlsm". + file_path (str, optional): Optional file system path or object storage URI for reference. + description (str, optional): Optional human-readable description. + custom_metadata (dict, optional): Mapping of custom metadata set names to attribute/value pairs. + + Returns: + Dict[str, Any]: Information about the created asset including the assigned GUID. + + Example: + create_unstructured_asset_tool( + name="Quarterly Financials", + connection_qualified_name="default/s3", + file_type="excel", + file_path="s3://corp-data/finance/q1.xlsx", + custom_metadata={ + "Document Details": {"Owner": "finance@acme.com", "Quarter": "Q1"} + } + ) + """ + + return create_unstructured_asset( + name=name, + connection_qualified_name=connection_qualified_name, + file_type=file_type, + file_path=file_path, + description=description, + custom_metadata=custom_metadata, + ) + + +@mcp.tool() +def read_custom_metadata_tool( + guid=None, + qualified_name=None, + asset_type="Asset", + custom_metadata_sets=None, + include_unset=False, +): + """ + Retrieve custom metadata values for an asset. + + Args: + guid (str, optional): GUID of the asset. + qualified_name (str, optional): Qualified name of the asset (required if GUID is not provided). + asset_type (str): Atlan asset type name (e.g., "File", "Table"). Defaults to "Asset". + custom_metadata_sets (Union[str, List[str]], optional): Specific custom metadata set names to read. + When omitted, all custom metadata sets on the asset are returned. + include_unset (bool): Include attributes that exist on the template but do not yet have values. + + Returns: + Dict[str, Any]: Asset identifiers and the requested custom metadata. + + Example: + read_custom_metadata_tool( + guid="12345678-90ab-cdef-1234-567890abcdef", + custom_metadata_sets=["Document Details", "Data Sensitivity"], + include_unset=True, + ) + """ + + return read_custom_metadata( + guid=guid, + qualified_name=qualified_name, + asset_type=asset_type, + custom_metadata_sets=custom_metadata_sets, + include_unset=include_unset, + ) + + def main(): mcp.run() diff --git a/modelcontextprotocol/tools/__init__.py b/modelcontextprotocol/tools/__init__.py index 05deb29..b670fc3 100644 --- a/modelcontextprotocol/tools/__init__.py +++ b/modelcontextprotocol/tools/__init__.py @@ -1,7 +1,11 @@ from .search import search_assets from .dsl import get_assets_by_dsl from .lineage import traverse_lineage -from .assets import update_assets +from .assets import ( + create_unstructured_asset, + read_custom_metadata, + update_assets, +) from .models import CertificateStatus, UpdatableAttribute, UpdatableAsset __all__ = [ @@ -9,6 +13,8 @@ "get_assets_by_dsl", "traverse_lineage", "update_assets", + "create_unstructured_asset", + "read_custom_metadata", "CertificateStatus", "UpdatableAttribute", "UpdatableAsset", diff --git a/modelcontextprotocol/tools/assets.py b/modelcontextprotocol/tools/assets.py index 60c85fb..6707fc1 100644 --- a/modelcontextprotocol/tools/assets.py +++ b/modelcontextprotocol/tools/assets.py @@ -1,12 +1,27 @@ import logging -from typing import List, Union, Dict, Any +from collections.abc import Mapping +from typing import Any, Dict, List, Optional, Union from client import get_atlan_client -from .models import UpdatableAsset, UpdatableAttribute, CertificateStatus +from pyatlan.model.assets import Asset as AssetModel, File +from pyatlan.model.custom_metadata import CustomMetadataDict +from pyatlan.model.enums import FileType +from .models import CertificateStatus, UpdatableAsset, UpdatableAttribute # Configure logging logger = logging.getLogger(__name__) +FILE_TYPE_ALIASES = { + "pdf": FileType.PDF, + "doc": FileType.DOC, + "docx": FileType.DOC, + "xls": FileType.XLS, + "xlsx": FileType.XLS, + "xlsm": FileType.XLSM, + "excel": FileType.XLS, + "csv": FileType.CSV, +} + def update_assets( updatable_assets: Union[UpdatableAsset, List[UpdatableAsset]], @@ -84,3 +99,194 @@ def update_assets( error_msg = f"Error updating assets: {str(e)}" logger.error(error_msg) return {"updated_count": 0, "errors": [error_msg]} + + +def _resolve_file_type(file_type: Union[str, FileType]) -> FileType: + if isinstance(file_type, FileType): + return file_type + if not isinstance(file_type, str): + raise ValueError("file_type must be a string or FileType enum member.") + normalized = file_type.strip().lower() + if normalized in FILE_TYPE_ALIASES: + return FILE_TYPE_ALIASES[normalized] + try: + return FileType[normalized.upper()] + except KeyError as err: + raise ValueError( + f"Unsupported file_type '{file_type}'. " + "Supported values include 'pdf', 'excel', 'xls', 'xlsx', 'xlsm'." + ) from err + + +def _apply_custom_metadata_to_asset( + asset: File, custom_metadata: Mapping[str, Mapping[str, Any]] +) -> List[str]: + """ + Apply custom metadata values to an asset. + + Returns: + List[str]: Names of custom metadata sets that were modified. + """ + client = get_atlan_client() + applied_sets: List[str] = [] + + for cm_name, attributes in custom_metadata.items(): + if not isinstance(attributes, Mapping): + raise ValueError( + f"Custom metadata for '{cm_name}' must be a mapping of attribute names to values." + ) + cm_dict: CustomMetadataDict = asset.get_custom_metadata(client, cm_name) + for attr_name, attr_value in attributes.items(): + try: + cm_dict[attr_name] = attr_value + except KeyError as exc: + raise ValueError( + f"'{attr_name}' is not a valid attribute for custom metadata set '{cm_name}'." + ) from exc + asset.set_custom_metadata(client, cm_dict) + applied_sets.append(cm_name) + + return applied_sets + + +def create_unstructured_asset( + *, + name: str, + connection_qualified_name: str, + file_type: Union[str, FileType], + file_path: Optional[str] = None, + description: Optional[str] = None, + custom_metadata: Optional[Mapping[str, Mapping[str, Any]]] = None, +) -> Dict[str, Any]: + """ + Create a File asset in Atlan to represent an unstructured object (PDF, Excel, etc.). + + Args: + name: Display name of the file asset. + connection_qualified_name: Qualified name of the Atlan connection where the asset should live. + file_type: FileType enum member or string alias (pdf, excel, xls, xlsx, xlsm, etc.). + file_path: Optional path or URI to the underlying document. + description: Optional human-readable description. + custom_metadata: Optional mapping of custom metadata sets to attribute/value pairs. + + Returns: + Dict[str, Any]: Details about the created asset including the assigned GUID and qualified name. + """ + + if not name or not connection_qualified_name: + raise ValueError("Both 'name' and 'connection_qualified_name' are required.") + + file_type_enum = _resolve_file_type(file_type) + client = get_atlan_client() + + file_asset = File.creator( + name=name, connection_qualified_name=connection_qualified_name, file_type=file_type_enum + ) + + if file_path: + file_asset.file_path = file_path + if description: + file_asset.description = description + + applied_cm: List[str] = [] + if custom_metadata: + applied_cm = _apply_custom_metadata_to_asset(file_asset, custom_metadata) + + response = client.asset.save(file_asset) + assigned_guid = None + if response.guid_assignments: + assigned_guid = response.guid_assignments.get(file_asset.guid) + + created_assets = response.assets_created(File) + qualified_name = ( + created_assets[0].qualified_name if created_assets and created_assets[0].qualified_name else None + ) + + return { + "guid": assigned_guid, + "qualified_name": qualified_name or file_asset.qualified_name, + "name": name, + "type_name": file_asset.type_name, + "file_type": file_type_enum.value, + "file_path": file_path, + "custom_metadata_applied": applied_cm, + } + + +def read_custom_metadata( + *, + guid: Optional[str] = None, + qualified_name: Optional[str] = None, + asset_type: str = "Asset", + custom_metadata_sets: Optional[Union[str, List[str]]] = None, + include_unset: bool = False, +) -> Dict[str, Any]: + """ + Retrieve custom metadata values for an asset. + + Args: + guid: GUID of the target asset. + qualified_name: Qualified name of the target asset (required if guid is not provided). + asset_type: The Atlan asset type name (for example, File, Table). Defaults to Asset. + custom_metadata_sets: Single set name or list of set names to fetch. + If omitted, all custom metadata sets will be returned. + include_unset: Whether to include attributes that are defined on the template but currently unset. + + Returns: + Dict[str, Any]: Asset identifiers and a mapping of custom metadata values. + """ + + if not guid and not qualified_name: + raise ValueError("Either 'guid' or 'qualified_name' must be provided.") + + try: + asset_cls = getattr( + __import__("pyatlan.model.assets", fromlist=[asset_type]), asset_type + ) + except AttributeError as exc: + raise ValueError(f"Unknown asset_type '{asset_type}'.") from exc + + client = get_atlan_client() + attributes = [AssetModel.CUSTOM_ATTRIBUTES] + + if guid: + asset = client.asset.get_by_guid( + guid=guid, + asset_type=asset_cls, + attributes=attributes, + ) + else: + asset = client.asset.get_by_qualified_name( + qualified_name=qualified_name, asset_type=asset_cls, attributes=attributes + ) + + if isinstance(custom_metadata_sets, str): + cm_sets = [custom_metadata_sets] + else: + cm_sets = custom_metadata_sets + + metadata: Dict[str, Dict[str, Any]] = {} + + def _serialize_custom_metadata(cm_dict: CustomMetadataDict) -> Dict[str, Any]: + values = dict(cm_dict) + if include_unset: + for attr_name in cm_dict.attribute_names: + values.setdefault(attr_name, None) + return values + + if cm_sets: + for cm_name in cm_sets: + cm_dict = asset.get_custom_metadata(client, cm_name) + metadata[cm_name] = _serialize_custom_metadata(cm_dict) + else: + business_attributes = asset.business_attributes or {} + for cm_id in business_attributes.keys(): + cm_name = client.custom_metadata_cache.get_name_for_id(cm_id) + cm_dict = asset.get_custom_metadata(client, cm_name) + metadata[cm_name] = _serialize_custom_metadata(cm_dict) + + return { + "guid": asset.guid, + "qualified_name": asset.qualified_name, + "custom_metadata": metadata, + }