diff --git a/bases/lif/semantic_search_mcp_server/core.py b/bases/lif/semantic_search_mcp_server/core.py
index c743cc4..762d1e7 100644
--- a/bases/lif/semantic_search_mcp_server/core.py
+++ b/bases/lif/semantic_search_mcp_server/core.py
@@ -1,24 +1,32 @@
-import json
+"""
+LIF Semantic Search MCP Server.
+
+This module provides an MCP (Model Context Protocol) server for semantic search
+over LIF data fields. It dynamically loads the schema from MDR at startup
+(falling back to file if MDR is unavailable).
+
+Features:
+    - Schema loading from MDR with file fallback
+    - Dynamic filter and mutation model generation
+    - Semantic search over schema leaves
+    - Health and status endpoints
+    - Schema refresh capability
+"""
+
 import os
-import sys
 from typing import Annotated, List
 
 from fastmcp import FastMCP
 from pydantic import Field
-from sentence_transformers import SentenceTransformer
 from starlette.requests import Request
-from starlette.responses import PlainTextResponse
+from starlette.responses import JSONResponse, PlainTextResponse
 
-from lif.lif_schema_config import LIFSchemaConfig, DEFAULT_ATTRIBUTE_KEYS
+from lif.lif_schema_config import DEFAULT_ATTRIBUTE_KEYS, LIFSchemaConfig
 from lif.logging import get_logger
-from lif.mdr_client import get_openapi_lif_data_model_from_file
-from lif.openapi_schema_parser import load_schema_leaves
+from lif.schema_state_manager import SchemaStateManager
 from lif.semantic_search_service.core import (
-    build_embeddings,
-    run_semantic_search,
-    build_dynamic_filter_model,
-    build_dynamic_mutation_model,
     run_mutation,
+    run_semantic_search,
 )
 
 logger = get_logger(__name__)
@@ -30,126 +38,127 @@
 CONFIG = LIFSchemaConfig.from_environment()
 
 # Extract values for convenience
-ROOT_NODES = CONFIG.all_root_types
 DEFAULT_ROOT_NODE = CONFIG.root_type_name
 LIF_GRAPHQL_API_URL = os.getenv("LIF_GRAPHQL_API_URL", "http://localhost:8002/graphql")
 TOP_K = CONFIG.semantic_search_top_k
-MODEL_NAME = CONFIG.semantic_search_model_name
 
 ATTRIBUTE_KEYS = DEFAULT_ATTRIBUTE_KEYS
 
 
-# --------- LOAD VECTOR STORE & EMBEDDINGS ---------
-
-# get the OpenAPI specification for the LIF data model from the MDR
-openapi = get_openapi_lif_data_model_from_file()
-
-# Load schema leaves for all root nodes
-ALL_LEAVES: List = []
-LEAVES_BY_ROOT: dict = {}
-
-for root_node in ROOT_NODES:
-    try:
-        root_leaves = load_schema_leaves(openapi, root_node, attribute_keys=ATTRIBUTE_KEYS)
-        LEAVES_BY_ROOT[root_node] = root_leaves
-        ALL_LEAVES.extend(root_leaves)
-        logger.info(f"Loaded {len(root_leaves)} schema leaves for root '{root_node}'")
-    except Exception as e:
-        # Primary root (Person) is required; additional roots are optional
-        if root_node == DEFAULT_ROOT_NODE:
-            logger.critical(f"Failed to load schema leaves for required root '{root_node}': {e}")
-            sys.exit(1)
-        else:
-            logger.warning(f"Failed to load schema leaves for optional root '{root_node}': {e}")
-
-logger.info(f"Total schema leaves loaded: {len(ALL_LEAVES)}")
-
-# Build filter and mutation models for each root
-FILTER_MODELS: dict = {}
-MUTATION_MODELS: dict = {}
-
-for root_node, root_leaves in LEAVES_BY_ROOT.items():
-    try:
-        filter_model = build_dynamic_filter_model(root_leaves)
-        if root_node in filter_model:
-            FILTER_MODELS[root_node] = filter_model[root_node]
-            logger.info(f"Dynamic Filter Schema for '{root_node}':\n" + json.dumps(filter_model[root_node].model_json_schema(), indent=2))
-    except Exception as e:
-        logger.warning(f"Failed to build dynamic filter model for '{root_node}': {e}")
-
-    try:
-        mutation_model = build_dynamic_mutation_model(root_leaves)
-        if root_node in mutation_model:
-            MUTATION_MODELS[root_node] = mutation_model[root_node]
-            logger.info(f"Dynamic Mutation Schema for '{root_node}':\n" + json.dumps(mutation_model[root_node].model_json_schema(), indent=2))
-    except Exception as e:
-        logger.warning(f"Failed to build dynamic mutation model for '{root_node}': {e}")
-
-# Use default root for backwards compatibility
-Filter = FILTER_MODELS.get(DEFAULT_ROOT_NODE)
-MutationModel = MUTATION_MODELS.get(DEFAULT_ROOT_NODE)
+# --------- SCHEMA STATE MANAGER ---------
 
-if not Filter:
-    logger.critical(f"Failed to build filter model for default root '{DEFAULT_ROOT_NODE}'")
-    sys.exit(1)
+# Initialize the state manager synchronously at module load time
+# This is required because FastMCP tool decorators need the Pydantic models
+# to be available at decoration time
+_state_manager = SchemaStateManager(CONFIG, attribute_keys=ATTRIBUTE_KEYS)
+_state_manager.initialize_sync(force_file=CONFIG.use_openapi_from_file)
 
-try:
-    MODEL = SentenceTransformer(MODEL_NAME)
-except Exception as e:
-    logger.critical(f"Failed to load SentenceTransformer model: {e}")
-    sys.exit(1)
+# Get the state for use in tool definitions
+_state = _state_manager.state
 
-# ------ ALWAYS embed only the descriptions for ALL leaves (all root entities) ------
-EMBEDDING_TEXTS = [leaf.description for leaf in ALL_LEAVES]
+logger.info(
+    f"Schema loaded from {_state.source}. "
+    f"Total leaves: {len(_state.leaves)}, "
+    f"Filter models: {list(_state.filter_models.keys())}"
+)
+
+# Get filter and mutation models for the default root type
+Filter = _state.filter_models.get(DEFAULT_ROOT_NODE)
+MutationModel = _state.mutation_models.get(DEFAULT_ROOT_NODE)
 
-try:
-    EMBEDDINGS = build_embeddings(EMBEDDING_TEXTS, MODEL)
-except Exception as e:
-    logger.critical(f"EMBEDDINGS failed: {e}")
-    sys.exit(1)
+if not Filter:
+    raise RuntimeError(f"Filter model not available for root '{DEFAULT_ROOT_NODE}'")
 
-# Keep a reference to all leaves for search
-LEAVES = ALL_LEAVES
 
+# --------- MCP SERVER SETUP ---------
 
 mcp = FastMCP(name="LIF-Query-Server")
 
 
+# --------- HTTP ENDPOINTS ---------
+
+
 @mcp.custom_route("/health", methods=["GET"])
 async def health_check(request: Request) -> PlainTextResponse:
+    """Health check endpoint for readiness probes."""
+    if not _state_manager.is_initialized:
+        return PlainTextResponse("NOT_READY", status_code=503)
     return PlainTextResponse("OK")
 
 
+@mcp.custom_route("/schema/status", methods=["GET"])
+async def schema_status(request: Request) -> JSONResponse:
+    """Get current schema status and metadata."""
+    status = _state_manager.get_status()
+    return JSONResponse(status)
+
+
+@mcp.custom_route("/schema/refresh", methods=["POST"])
+async def schema_refresh(request: Request) -> JSONResponse:
+    """
+    Trigger a schema refresh from MDR.
+
+    Note: This updates the internal state but does NOT update the MCP tool
+    definitions, as those are set at module load time. To use new schema
+    definitions for tools, the server must be restarted.
+    """
+    result = await _state_manager.refresh()
+    status_code = 200 if result.get("success") else 500
+
+    # Add a note about tool definitions
+    if result.get("success"):
+        result["note"] = (
+            "Schema state updated. Note: MCP tool definitions use the schema "
+            "from server startup. Restart the server to use new schema in tools."
+        )
+
+    return JSONResponse(result, status_code=status_code)
+
+
+# --------- MCP TOOLS ---------
+
+
 @mcp.tool(
-    name="lif_query", description="Use this tool to run a LIF data query", annotations={"title": "Execute LIF Query"}
+    name="lif_query",
+    description="Use this tool to run a LIF data query",
+    annotations={"title": "Execute LIF Query"},
 )
 async def lif_query(
     filter: Annotated[Filter, Field(description="Parameters for LIF query")],
     query: Annotated[str, Field(description="Natural language query used to determine LIF data to retrieve")],
 ) -> List[dict]:
+    """Perform a semantic search and retrieve LIF data."""
+    # Get current state (may have been refreshed)
+    state = _state_manager.state
     return await run_semantic_search(
         filter=filter,
         query=query,
-        model=MODEL,
-        embeddings=EMBEDDINGS,
-        leaves=LEAVES,
+        model=state.model,
+        embeddings=state.embeddings,
+        leaves=state.leaves,
         top_k=TOP_K,
         graphql_url=LIF_GRAPHQL_API_URL,
         config=CONFIG,
     )
 
 
-@mcp.tool(
-    name="lif_mutation",
-    description="Use this tool to mutate/update LIF data fields. You must specify a filter to select records.",
-    annotations={"title": "Execute LIF Mutation"},
-)
-async def lif_mutation(
-    filter: Annotated[Filter, Field(description="Filter to select records for mutation")],
-    input: Annotated[MutationModel, Field(description="Fields to update")],
-) -> dict:
-    """Run a mutation on LIF data fields."""
-    return await run_mutation(filter=filter, input=input, graphql_url=LIF_GRAPHQL_API_URL)
+# Only register mutation tool if mutation model is available
+if MutationModel:
+
+    @mcp.tool(
+        name="lif_mutation",
+        description="Use this tool to mutate/update LIF data fields. You must specify a filter to select records.",
+        annotations={"title": "Execute LIF Mutation"},
+    )
+    async def lif_mutation(
+        filter: Annotated[Filter, Field(description="Filter to select records for mutation")],
+        input: Annotated[MutationModel, Field(description="Fields to update")],
+    ) -> dict:
+        """Run a mutation on LIF data fields."""
+        return await run_mutation(filter=filter, input=input, graphql_url=LIF_GRAPHQL_API_URL)
+
+else:
+    logger.warning(f"Mutation model not available for root '{DEFAULT_ROOT_NODE}', lif_mutation tool not registered")
 
 
 http_app = mcp.http_app()
diff --git a/components/lif/lif_schema_config/core.py b/components/lif/lif_schema_config/core.py
index 6bd2d62..46ea8b3 100644
--- a/components/lif/lif_schema_config/core.py
+++ b/components/lif/lif_schema_config/core.py
@@ -100,6 +100,7 @@ class LIFSchemaConfig:
     # MDR Configuration
     mdr_api_url: str = "http://localhost:8012"
     mdr_api_auth_token: str = "no_auth_token_set"
+    mdr_timeout_seconds: int = 30
     openapi_data_model_id: Optional[str] = None
     openapi_json_filename: str = "openapi_constrained_with_interactions.json"
     use_openapi_from_file: bool = False
@@ -129,6 +130,8 @@ def validate(self) -> None:
         # Validate timeouts are positive
         if self.query_timeout_seconds <= 0:
             errors.append(f"query_timeout_seconds must be positive, got {self.query_timeout_seconds}")
+        if self.mdr_timeout_seconds <= 0:
+            errors.append(f"mdr_timeout_seconds must be positive, got {self.mdr_timeout_seconds}")
         if self.semantic_search_timeout <= 0:
             errors.append(f"semantic_search_timeout must be positive, got {self.semantic_search_timeout}")
         if self.semantic_search_top_k <= 0:
@@ -155,6 +158,7 @@ def from_environment(cls) -> "LIFSchemaConfig":
             LIF_QUERY_TIMEOUT_SECONDS: Query timeout in seconds
             LIF_MDR_API_URL: MDR API URL
             LIF_MDR_API_AUTH_TOKEN: MDR authentication token
+            MDR_TIMEOUT_SECONDS: Timeout for MDR API calls (default: 30)
             OPENAPI_DATA_MODEL_ID: MDR data model ID
             OPENAPI_JSON_FILENAME: Local OpenAPI filename
             USE_OPENAPI_DATA_MODEL_FROM_FILE: Use local file instead of MDR
@@ -192,6 +196,7 @@ def from_environment(cls) -> "LIFSchemaConfig":
             # MDR
             mdr_api_url=os.getenv("LIF_MDR_API_URL", "http://localhost:8012"),
             mdr_api_auth_token=os.getenv("LIF_MDR_API_AUTH_TOKEN", "no_auth_token_set"),
+            mdr_timeout_seconds=int(os.getenv("MDR_TIMEOUT_SECONDS", "30")),
             openapi_data_model_id=os.getenv("OPENAPI_DATA_MODEL_ID"),
             openapi_json_filename=os.getenv(
                 "OPENAPI_JSON_FILENAME",
@@ -217,7 +222,10 @@ def from_environment(cls) -> "LIFSchemaConfig":
     @property
     def graphql_query_name(self) -> str:
         """GraphQL query field name (e.g., 'person' for root 'Person')."""
-        return to_graphql_query_name(self.root_type_name)
+        # root_type_name is validated to be non-empty, so result will never be None
+        result = to_graphql_query_name(self.root_type_name)
+        assert result is not None  # Validated in __post_init__
+        return result
 
     @property
     def mutation_name(self) -> str:
diff --git a/components/lif/mdr_client/__init__.py b/components/lif/mdr_client/__init__.py
index 2d48610..944d318 100644
--- a/components/lif/mdr_client/__init__.py
+++ b/components/lif/mdr_client/__init__.py
@@ -1,13 +1,33 @@
 from lif.mdr_client.core import (
-    get_openapi_lif_data_model,
+    # Config-based functions (preferred)
+    load_openapi_schema,
+    fetch_schema_from_mdr,
+    # File loading
     get_openapi_lif_data_model_from_file,
+    # Legacy env-var based functions
+    get_openapi_lif_data_model,
+    get_openapi_lif_data_model_sync,
     get_data_model_schema,
+    get_data_model_schema_sync,
     get_data_model_transformation,
+    # Exceptions
+    MDRClientException,
+    MDRConfigurationError,
 )
 
 __all__ = [
-    "get_openapi_lif_data_model",
+    # Config-based functions (preferred)
+    "load_openapi_schema",
+    "fetch_schema_from_mdr",
+    # File loading
     "get_openapi_lif_data_model_from_file",
+    # Legacy env-var based functions
+    "get_openapi_lif_data_model",
+    "get_openapi_lif_data_model_sync",
     "get_data_model_schema",
+    "get_data_model_schema_sync",
     "get_data_model_transformation",
+    # Exceptions
+    "MDRClientException",
+    "MDRConfigurationError",
 ]
diff --git a/components/lif/mdr_client/core.py b/components/lif/mdr_client/core.py
index a00ace6..21af273 100644
--- a/components/lif/mdr_client/core.py
+++ b/components/lif/mdr_client/core.py
@@ -1,14 +1,25 @@
 import json
 import os
 from importlib.resources import files
-from typing import AsyncGenerator
+from typing import TYPE_CHECKING, AsyncGenerator, Optional
 
 import httpx
 from lif.exceptions.core import LIFException, ResourceNotFoundException
 from lif.logging import get_logger
 
+if TYPE_CHECKING:
+    from lif.lif_schema_config import LIFSchemaConfig
+
 logger = get_logger(__name__)
 
+# Default timeout for MDR API calls (in seconds)
+DEFAULT_MDR_TIMEOUT_SECONDS = 30
+
+
+# =============================================================================
+# Legacy env-var based helpers (kept for backward compatibility)
+# =============================================================================
+
 
 def _get_mdr_api_url() -> str:
     return os.getenv("LIF_MDR_API_URL", "http://localhost:8012")
@@ -18,14 +29,8 @@ def _get_mdr_api_auth_token() -> str:
     return os.getenv("LIF_MDR_API_AUTH_TOKEN", "no_auth_token_set")
 
 
-async def _get_mdr_client() -> AsyncGenerator[httpx.AsyncClient]:
-    """
-    Generator that yields an httpx AsyncClient.
-
-    Allows a test harness to override this method to connect to an in-memory MDR instance.
-    """
-    async with httpx.AsyncClient() as client:
-        yield client
+def _get_mdr_timeout_seconds() -> int:
+    return int(os.getenv("MDR_TIMEOUT_SECONDS", str(DEFAULT_MDR_TIMEOUT_SECONDS)))
 
 
 def _get_openapi_json_filename() -> str:
@@ -40,21 +45,278 @@ def _get_use_openapi_from_file() -> bool:
     return os.getenv("USE_OPENAPI_DATA_MODEL_FROM_FILE", "false").lower() == "true"
 
 
-def _build_mdr_headers() -> dict:
-    auth_token = _get_mdr_api_auth_token()
+def _build_mdr_headers(auth_token: Optional[str] = None) -> dict:
+    if auth_token is None:
+        auth_token = _get_mdr_api_auth_token()
     return {"X-API-Key": auth_token}
 
 
-def get_openapi_lif_data_model_from_file() -> dict:
-    openapi_json_filename: str = _get_openapi_json_filename()
-    logger.info(f"Fetching OpenAPI data model from file {openapi_json_filename}")
-    resource_path = files("lif.mdr_client.resources") / openapi_json_filename
+# =============================================================================
+# HTTP Client factories
+# =============================================================================
+
+
+async def _get_mdr_client() -> AsyncGenerator[httpx.AsyncClient]:
+    """
+    Generator that yields an httpx AsyncClient.
+
+    Allows a test harness to override this method to connect to an in-memory MDR instance.
+    """
+    timeout = _get_mdr_timeout_seconds()
+    async with httpx.AsyncClient(timeout=timeout) as client:
+        yield client
+
+
+def _create_sync_client(timeout: int) -> httpx.Client:
+    """Create a synchronous httpx Client with the specified timeout."""
+    return httpx.Client(timeout=timeout)
+
+
+# =============================================================================
+# File-based schema loading
+# =============================================================================
+
+
+def get_openapi_lif_data_model_from_file(filename: Optional[str] = None) -> dict:
+    """
+    Load OpenAPI data model from bundled file.
+
+    Args:
+        filename: Optional filename to load. Defaults to env var or standard file.
+
+    Returns:
+        The OpenAPI schema dictionary
+    """
+    if filename is None:
+        filename = _get_openapi_json_filename()
+    logger.info(f"Loading OpenAPI data model from file: {filename}")
+    resource_path = files("lif.mdr_client.resources") / filename
 
     with resource_path.open("r", encoding="utf-8") as f:
         return json.load(f)
 
 
+# =============================================================================
+# Config-based MDR functions (preferred for new code)
+# =============================================================================
+
+
+def fetch_schema_from_mdr(
+    config: "LIFSchemaConfig",
+    include_attr_md: bool = True,
+    include_entity_md: bool = False,
+) -> dict:
+    """
+    Fetch OpenAPI schema from MDR using configuration.
+
+    This is the preferred method for fetching schemas - it uses centralized
+    configuration and does NOT fall back to file on failure.
+
+    Args:
+        config: LIFSchemaConfig instance with MDR settings
+        include_attr_md: Include attribute metadata in response
+        include_entity_md: Include entity metadata in response
+
+    Returns:
+        The OpenAPI schema dictionary
+
+    Raises:
+        MDRClientException: If MDR is unavailable or returns an error
+        MDRConfigurationError: If OPENAPI_DATA_MODEL_ID is not configured
+        ResourceNotFoundException: If the data model is not found
+    """
+    if not config.openapi_data_model_id:
+        raise MDRConfigurationError(
+            "OPENAPI_DATA_MODEL_ID must be set when USE_OPENAPI_DATA_MODEL_FROM_FILE is false. "
+            "Either set OPENAPI_DATA_MODEL_ID or set USE_OPENAPI_DATA_MODEL_FROM_FILE=true."
+        )
+
+    url = (
+        f"{config.mdr_api_url}/datamodels/open_api_schema/{config.openapi_data_model_id}"
+        f"?include_attr_md={str(include_attr_md).lower()}"
+        f"&include_entity_md={str(include_entity_md).lower()}"
+    )
+
+    headers = _build_mdr_headers(config.mdr_api_auth_token)
+
+    logger.info(f"Fetching OpenAPI schema from MDR: {config.openapi_data_model_id}")
+
+    try:
+        with _create_sync_client(config.mdr_timeout_seconds) as client:
+            response = client.get(url, headers=headers)
+        response.raise_for_status()
+        logger.info("Successfully loaded OpenAPI schema from MDR")
+        return response.json()
+
+    except httpx.TimeoutException as e:
+        msg = f"MDR request timed out after {config.mdr_timeout_seconds}s: {e}"
+        logger.error(msg)
+        raise MDRClientException(msg)
+
+    except httpx.ConnectError as e:
+        msg = f"Failed to connect to MDR at {config.mdr_api_url}: {e}"
+        logger.error(msg)
+        raise MDRClientException(msg)
+
+    except httpx.HTTPStatusError as e:
+        logger.error(f"MDR HTTP error: {e.response.status_code} - {e.response.text}")
+        if e.response.status_code == 404:
+            raise ResourceNotFoundException(
+                resource_id=config.openapi_data_model_id,
+                message=f"Data model '{config.openapi_data_model_id}' not found in MDR.",
+            )
+        raise MDRClientException(f"MDR returned HTTP {e.response.status_code}: {e.response.text}")
+
+    except Exception as e:
+        msg = f"Unexpected error fetching from MDR: {e}"
+        logger.error(msg)
+        raise MDRClientException(msg)
+
+
+def load_openapi_schema(config: "LIFSchemaConfig") -> tuple[dict, str]:
+    """
+    Load OpenAPI schema based on configuration.
+
+    Uses file if USE_OPENAPI_DATA_MODEL_FROM_FILE is true, otherwise fetches from MDR.
+    Does NOT fall back to file if MDR fails - this prevents silent use of stale data.
+
+    Args:
+        config: LIFSchemaConfig instance
+
+    Returns:
+        Tuple of (openapi_dict, source) where source is "mdr" or "file"
+
+    Raises:
+        MDRClientException: If MDR is configured but unavailable
+        MDRConfigurationError: If MDR is expected but not properly configured
+    """
+    if config.use_openapi_from_file:
+        logger.info("USE_OPENAPI_DATA_MODEL_FROM_FILE=true, loading from bundled file")
+        return get_openapi_lif_data_model_from_file(config.openapi_json_filename), "file"
+
+    # MDR is expected - fetch it (will raise on failure, no fallback)
+    return fetch_schema_from_mdr(config), "mdr"
+
+
+# =============================================================================
+# Legacy sync functions (kept for backward compatibility)
+# =============================================================================
+
+
+def get_data_model_schema_sync(
+    data_model_id: str,
+    include_attr_md: bool = False,
+    include_entity_md: bool = False,
+    timeout: Optional[int] = None,
+) -> dict:
+    """
+    Synchronous version of get_data_model_schema using env vars.
+
+    DEPRECATED: Prefer fetch_schema_from_mdr() with LIFSchemaConfig.
+
+    Args:
+        data_model_id: The MDR data model ID to fetch
+        include_attr_md: Include attribute metadata in response
+        include_entity_md: Include entity metadata in response
+        timeout: Optional timeout in seconds
+
+    Returns:
+        The OpenAPI schema dictionary
+
+    Raises:
+        ResourceNotFoundException: If the data model is not found
+        MDRClientException: For other MDR errors
+    """
+    mdr_api_url = _get_mdr_api_url()
+    if timeout is None:
+        timeout = _get_mdr_timeout_seconds()
+
+    url = (
+        f"{mdr_api_url}/datamodels/open_api_schema/{data_model_id}"
+        f"?include_attr_md={str(include_attr_md).lower()}"
+        f"&include_entity_md={str(include_entity_md).lower()}"
+    )
+    try:
+        with _create_sync_client(timeout) as client:
+            response = client.get(url, headers=_build_mdr_headers())
+        response.raise_for_status()
+        return response.json()
+    except httpx.TimeoutException as e:
+        msg = f"MDR Client timeout after {timeout}s: {e}"
+        logger.error(msg)
+        raise MDRClientException(msg)
+    except httpx.HTTPStatusError as e:
+        logger.error(f"MDR Client HTTP error: {e.response.status_code} - {e.response.text}")
+        if e.response.status_code == 404:
+            raise ResourceNotFoundException(
+                resource_id=data_model_id,
+                message=f"Data model with ID {data_model_id} not found in MDR.",
+            )
+        else:
+            raise e
+    except Exception as e:
+        msg = f"MDR Client error: {e}"
+        logger.error(msg)
+        raise MDRClientException(msg)
+
+
+def get_openapi_lif_data_model_sync(timeout: Optional[int] = None) -> tuple[dict, str]:
+    """
+    Synchronous schema loading using env vars.
+
+    DEPRECATED: Prefer load_openapi_schema() with LIFSchemaConfig.
+
+    This function falls back to file if MDR fails - use load_openapi_schema()
+    for stricter behavior that fails instead of using potentially stale data.
+
+    Args:
+        timeout: Optional timeout in seconds for MDR calls
+
+    Returns:
+        Tuple of (openapi_dict, source) where source is "mdr" or "file"
+    """
+    use_openapi_from_file = _get_use_openapi_from_file()
+    openapi_data_model_id = _get_openapi_data_model_id()
+    openapi_json_filename = _get_openapi_json_filename()
+
+    if use_openapi_from_file:
+        logger.info("USE_OPENAPI_DATA_MODEL_FROM_FILE is set, loading from file")
+        return get_openapi_lif_data_model_from_file(), "file"
+
+    if openapi_data_model_id is None:
+        logger.warning(
+            f"OPENAPI_DATA_MODEL_ID not set. Falling back to file {openapi_json_filename}"
+        )
+        return get_openapi_lif_data_model_from_file(), "file"
+
+    # Try MDR - this legacy function falls back to file on failure
+    logger.info(f"Fetching OpenAPI data model {openapi_data_model_id} from MDR")
+    try:
+        openapi = get_data_model_schema_sync(
+            openapi_data_model_id,
+            include_attr_md=True,
+            include_entity_md=False,
+            timeout=timeout,
+        )
+        logger.info("Successfully loaded OpenAPI data model from MDR")
+        return openapi, "mdr"
+    except Exception as e:
+        logger.error(f"Failed to fetch OpenAPI data model from MDR: {e}")
+        logger.warning(f"Falling back to file {openapi_json_filename}")
+        return get_openapi_lif_data_model_from_file(), "file"
+
+
+# =============================================================================
+# Async functions (existing API preserved)
+# =============================================================================
+
+
 async def get_openapi_lif_data_model() -> dict | None:
+    """
+    Async schema loading using env vars.
+
+    Note: This function falls back to file if MDR fails.
+    """
     use_openapi_from_file = _get_use_openapi_from_file()
     openapi_data_model_id = _get_openapi_data_model_id()
     openapi_json_filename: str = _get_openapi_json_filename()
@@ -133,8 +395,20 @@ async def get_data_model_transformation(source_data_model_id: str, target_data_m
         raise MDRClientException(msg)
 
 
+# =============================================================================
+# Exceptions
+# =============================================================================
+
+
 class MDRClientException(LIFException):
     """Base exception for MDR Client errors."""
 
     def __init__(self, message="MDR Client error occurred"):
         super().__init__(message)
+
+
+class MDRConfigurationError(MDRClientException):
+    """Raised when MDR is not properly configured."""
+
+    def __init__(self, message="MDR configuration error"):
+        super().__init__(message)
diff --git a/components/lif/schema_state_manager/__init__.py b/components/lif/schema_state_manager/__init__.py
new file mode 100644
index 0000000..3b3861c
--- /dev/null
+++ b/components/lif/schema_state_manager/__init__.py
@@ -0,0 +1,3 @@
+from lif.schema_state_manager.core import SchemaState, SchemaStateManager
+
+__all__ = ["SchemaState", "SchemaStateManager"]
diff --git a/components/lif/schema_state_manager/core.py b/components/lif/schema_state_manager/core.py
new file mode 100644
index 0000000..f88a994
--- /dev/null
+++ b/components/lif/schema_state_manager/core.py
@@ -0,0 +1,440 @@
+"""
+Schema State Manager Component.
+
+This component encapsulates schema state management for services that need to load
+and maintain LIF schema data from the MDR or from bundled files.
+
+Features:
+    - Sync and async initialization supporting MDR or file-based schema loading
+    - Thread-safe state access via lock
+    - Fallback to bundled file when MDR is unavailable
+    - Source tracking ("mdr" or "file")
+    - Schema refresh capability (async only)
+
+Usage:
+    from lif.schema_state_manager import SchemaStateManager
+
+    # Sync initialization (for module-level tool registration)
+    manager = SchemaStateManager(config)
+    manager.initialize_sync()
+
+    # Or async initialization
+    await manager.initialize()
+
+    # Access state
+    state = manager.state
+    filter_model = state.filter_models.get("Person")
+"""
+
+import sys
+import threading
+from dataclasses import dataclass
+from typing import Any, Dict, List, Optional, Type
+
+import numpy as np
+from pydantic import BaseModel
+from sentence_transformers import SentenceTransformer
+
+from lif.lif_schema_config import LIFSchemaConfig
+from lif.logging import get_logger
+from lif.mdr_client import (
+    load_openapi_schema,
+    get_openapi_lif_data_model_from_file,
+    MDRClientException,
+    MDRConfigurationError,
+)
+from lif.openapi_schema_parser import load_schema_leaves
+from lif.openapi_schema_parser.core import SchemaLeaf
+from lif.semantic_search_service.core import (
+    build_embeddings,
+    build_dynamic_filter_model,
+    build_dynamic_mutation_model,
+)
+
+logger = get_logger(__name__)
+
+
+@dataclass
+class SchemaState:
+    """
+    Immutable state container for schema-derived data.
+
+    Attributes:
+        openapi: The raw OpenAPI specification dictionary
+        leaves: All schema leaves across all root types
+        leaves_by_root: Schema leaves organized by root type name
+        filter_models: Dynamic Pydantic filter models keyed by root type
+        mutation_models: Dynamic Pydantic mutation models keyed by root type
+        embeddings: Pre-computed numpy embeddings for semantic search
+        model: The loaded SentenceTransformer model
+        source: Source of the schema ("mdr" or "file")
+    """
+
+    openapi: dict
+    leaves: List[SchemaLeaf]
+    leaves_by_root: Dict[str, List[SchemaLeaf]]
+    filter_models: Dict[str, Type[BaseModel]]
+    mutation_models: Dict[str, Type[BaseModel]]
+    embeddings: np.ndarray
+    model: SentenceTransformer
+    source: str
+
+
+class SchemaStateManager:
+    """
+    Manages schema state lifecycle with sync/async initialization and thread-safe access.
+
+    This class handles:
+    - Loading schema from MDR (with fallback to file)
+    - Building filter and mutation models for each root type
+    - Computing embeddings for semantic search
+    - Thread-safe state access
+
+    Args:
+        config: LIFSchemaConfig instance with schema configuration
+        attribute_keys: Attribute keys to extract from schema leaves
+    """
+
+    def __init__(
+        self,
+        config: LIFSchemaConfig,
+        attribute_keys: Optional[List[str]] = None,
+    ):
+        self._config = config
+        self._attribute_keys = attribute_keys or [
+            "dataType",
+            "xQueryable",
+            "xMutable",
+            "x-mutable",
+            "enum",
+            "x-queryable",
+        ]
+        self._state: Optional[SchemaState] = None
+        self._lock = threading.Lock()
+        self._initialized = False
+
+    @property
+    def state(self) -> SchemaState:
+        """Get the current schema state. Raises if not initialized."""
+        with self._lock:
+            if self._state is None:
+                raise RuntimeError("SchemaStateManager not initialized. Call initialize() first.")
+            return self._state
+
+    @property
+    def is_initialized(self) -> bool:
+        """Check if the manager has been initialized."""
+        with self._lock:
+            return self._initialized
+
+    def initialize_sync(self, force_file: bool = False) -> None:
+        """
+        Synchronously initialize the schema state by loading from MDR or file.
+
+        This is the preferred method for module-level initialization where async
+        is not available (e.g., FastMCP tool registration at import time).
+
+        Args:
+            force_file: If True, skip MDR and load directly from file
+
+        Raises:
+            SystemExit: If critical initialization fails (required root not found)
+        """
+        logger.info("Initializing SchemaStateManager (sync)...")
+
+        # Load OpenAPI schema synchronously
+        openapi, source = self._load_openapi_schema_sync(force_file)
+        if openapi is None:
+            logger.critical("Failed to load OpenAPI schema from both MDR and file")
+            sys.exit(1)
+
+        # Type narrowing: openapi is guaranteed to be dict after the check above
+        assert openapi is not None
+
+        # Complete the initialization with the loaded schema
+        self._complete_initialization(openapi, source)
+
+    async def initialize(self, force_file: bool = False) -> None:
+        """
+        Asynchronously initialize the schema state by loading from MDR or file.
+
+        Args:
+            force_file: If True, skip MDR and load directly from file
+
+        Raises:
+            SystemExit: If critical initialization fails (required root not found)
+        """
+        logger.info("Initializing SchemaStateManager (async)...")
+
+        # Load OpenAPI schema asynchronously
+        openapi, source = await self._load_openapi_schema_async(force_file)
+        if openapi is None:
+            logger.critical("Failed to load OpenAPI schema from both MDR and file")
+            sys.exit(1)
+
+        # Type narrowing: openapi is guaranteed to be dict after the check above
+        assert openapi is not None
+
+        # Complete the initialization with the loaded schema
+        self._complete_initialization(openapi, source)
+
+    def _complete_initialization(self, openapi: dict, source: str) -> None:
+        """Complete initialization with the loaded OpenAPI schema."""
+        # Load schema leaves for all root nodes
+        leaves, leaves_by_root = self._load_schema_leaves(openapi)
+
+        # Build filter and mutation models
+        filter_models, mutation_models = self._build_models(leaves_by_root)
+
+        # Load SentenceTransformer model
+        model = self._load_sentence_transformer()
+
+        # Build embeddings
+        embeddings = self._build_embeddings(leaves, model)
+
+        # Create and store state
+        state = SchemaState(
+            openapi=openapi,
+            leaves=leaves,
+            leaves_by_root=leaves_by_root,
+            filter_models=filter_models,
+            mutation_models=mutation_models,
+            embeddings=embeddings,
+            model=model,
+            source=source,
+        )
+
+        with self._lock:
+            self._state = state
+            self._initialized = True
+
+        logger.info(
+            f"SchemaStateManager initialized successfully. "
+            f"Source: {source}, Leaves: {len(leaves)}, "
+            f"Filter models: {list(filter_models.keys())}"
+        )
+
+    async def refresh(self) -> Dict[str, Any]:
+        """
+        Refresh the schema by reloading from MDR.
+
+        Returns:
+            Dict with refresh status and metadata
+
+        Note:
+            If refresh fails, the existing state is preserved.
+        """
+        if not self._initialized:
+            return {"success": False, "error": "Manager not initialized"}
+
+        logger.info("Refreshing schema from MDR...")
+
+        try:
+            openapi, source = await self._load_openapi_schema_async(force_file=False)
+            if openapi is None:
+                return {
+                    "success": False,
+                    "error": "Failed to load schema from MDR",
+                    "current_source": self._state.source if self._state else None,
+                }
+
+            leaves, leaves_by_root = self._load_schema_leaves(openapi)
+            filter_models, mutation_models = self._build_models(leaves_by_root)
+
+            # Reuse existing model for performance
+            model = self._state.model if self._state else self._load_sentence_transformer()
+            embeddings = self._build_embeddings(leaves, model)
+
+            new_state = SchemaState(
+                openapi=openapi,
+                leaves=leaves,
+                leaves_by_root=leaves_by_root,
+                filter_models=filter_models,
+                mutation_models=mutation_models,
+                embeddings=embeddings,
+                model=model,
+                source=source,
+            )
+
+            with self._lock:
+                old_leaf_count = len(self._state.leaves) if self._state else 0
+                self._state = new_state
+
+            return {
+                "success": True,
+                "source": source,
+                "leaf_count": len(leaves),
+                "previous_leaf_count": old_leaf_count,
+                "filter_models": list(filter_models.keys()),
+            }
+
+        except Exception as e:
+            logger.error(f"Schema refresh failed: {e}")
+            return {
+                "success": False,
+                "error": str(e),
+                "current_source": self._state.source if self._state else None,
+            }
+
+    def get_status(self) -> Dict[str, Any]:
+        """Get current schema status and metadata."""
+        with self._lock:
+            if not self._initialized or self._state is None:
+                return {
+                    "initialized": False,
+                    "source": None,
+                    "leaf_count": 0,
+                    "roots": [],
+                }
+
+            return {
+                "initialized": True,
+                "source": self._state.source,
+                "leaf_count": len(self._state.leaves),
+                "roots": list(self._state.leaves_by_root.keys()),
+                "filter_models": list(self._state.filter_models.keys()),
+                "mutation_models": list(self._state.mutation_models.keys()),
+            }
+
+    def _load_openapi_schema_sync(self, force_file: bool) -> tuple[Optional[dict], str]:
+        """
+        Load OpenAPI schema synchronously.
+
+        Uses the config-based load_openapi_schema() which does NOT fall back
+        to file if MDR is configured but fails. This prevents silent use of
+        stale data.
+
+        Args:
+            force_file: If True, load from file regardless of config
+
+        Returns:
+            Tuple of (openapi_dict, source) or (None, source) on failure
+        """
+        if force_file:
+            logger.info("Loading OpenAPI schema from bundled file (force_file=True)")
+            try:
+                openapi = get_openapi_lif_data_model_from_file(
+                    self._config.openapi_json_filename
+                )
+                return openapi, "file"
+            except Exception as e:
+                logger.error(f"Failed to load OpenAPI schema from file: {e}")
+                return None, "file"
+
+        # Use config-based loading (no silent fallback to file)
+        try:
+            openapi, source = load_openapi_schema(self._config)
+            return openapi, source
+        except MDRConfigurationError as e:
+            logger.critical(f"MDR configuration error: {e}")
+            return None, "mdr"
+        except MDRClientException as e:
+            logger.critical(f"Failed to load schema from MDR: {e}")
+            return None, "mdr"
+        except Exception as e:
+            logger.critical(f"Unexpected error loading OpenAPI schema: {e}")
+            return None, "unknown"
+
+    async def _load_openapi_schema_async(self, force_file: bool) -> tuple[Optional[dict], str]:
+        """
+        Load OpenAPI schema asynchronously.
+
+        Note: Currently uses sync loading since the config-based function is sync.
+        For true async, would need to add async versions of the MDR functions.
+
+        Args:
+            force_file: If True, load from file regardless of config
+
+        Returns:
+            Tuple of (openapi_dict, source) or (None, source) on failure
+        """
+        # For now, delegate to sync version since load_openapi_schema is sync
+        # A future enhancement could add async MDR fetch
+        return self._load_openapi_schema_sync(force_file)
+
+    def _load_schema_leaves(
+        self, openapi: dict
+    ) -> tuple[List[SchemaLeaf], Dict[str, List[SchemaLeaf]]]:
+        """Load schema leaves for all configured root types."""
+        all_leaves: List[SchemaLeaf] = []
+        leaves_by_root: Dict[str, List[SchemaLeaf]] = {}
+
+        for root_node in self._config.all_root_types:
+            try:
+                root_leaves = load_schema_leaves(
+                    openapi, root_node, attribute_keys=self._attribute_keys
+                )
+                leaves_by_root[root_node] = root_leaves
+                all_leaves.extend(root_leaves)
+                logger.info(f"Loaded {len(root_leaves)} schema leaves for root '{root_node}'")
+            except Exception as e:
+                # Primary root is required; additional roots are optional
+                if root_node == self._config.root_type_name:
+                    logger.critical(
+                        f"Failed to load schema leaves for required root '{root_node}': {e}"
+                    )
+                    sys.exit(1)
+                else:
+                    logger.warning(
+                        f"Failed to load schema leaves for optional root '{root_node}': {e}"
+                    )
+
+        logger.info(f"Total schema leaves loaded: {len(all_leaves)}")
+        return all_leaves, leaves_by_root
+
+    def _build_models(
+        self, leaves_by_root: Dict[str, List[SchemaLeaf]]
+    ) -> tuple[Dict[str, Type[BaseModel]], Dict[str, Type[BaseModel]]]:
+        """Build filter and mutation models for each root type."""
+        filter_models: Dict[str, Type[BaseModel]] = {}
+        mutation_models: Dict[str, Type[BaseModel]] = {}
+
+        for root_node, root_leaves in leaves_by_root.items():
+            # Build filter model
+            try:
+                filter_model = build_dynamic_filter_model(root_leaves)
+                if root_node in filter_model:
+                    filter_models[root_node] = filter_model[root_node]
+                    logger.info(f"Built dynamic filter model for '{root_node}'")
+            except Exception as e:
+                logger.warning(f"Failed to build dynamic filter model for '{root_node}': {e}")
+
+            # Build mutation model
+            try:
+                mutation_model = build_dynamic_mutation_model(root_leaves)
+                if root_node in mutation_model:
+                    mutation_models[root_node] = mutation_model[root_node]
+                    logger.info(f"Built dynamic mutation model for '{root_node}'")
+            except Exception as e:
+                logger.warning(f"Failed to build dynamic mutation model for '{root_node}': {e}")
+
+        # Verify required root has filter model
+        if self._config.root_type_name not in filter_models:
+            logger.critical(
+                f"Failed to build filter model for required root '{self._config.root_type_name}'"
+            )
+            sys.exit(1)
+
+        return filter_models, mutation_models
+
+    def _load_sentence_transformer(self) -> SentenceTransformer:
+        """Load the SentenceTransformer model."""
+        model_name = self._config.semantic_search_model_name
+        logger.info(f"Loading SentenceTransformer model: {model_name}")
+        try:
+            return SentenceTransformer(model_name)
+        except Exception as e:
+            logger.critical(f"Failed to load SentenceTransformer model: {e}")
+            sys.exit(1)
+
+    def _build_embeddings(
+        self, leaves: List[SchemaLeaf], model: SentenceTransformer
+    ) -> np.ndarray:
+        """Build embeddings for all schema leaves."""
+        logger.info(f"Building embeddings for {len(leaves)} schema leaves")
+        embedding_texts = [leaf.description for leaf in leaves]
+        try:
+            return build_embeddings(embedding_texts, model)
+        except Exception as e:
+            logger.critical(f"Failed to build embeddings: {e}")
+            sys.exit(1)
diff --git a/deployments/advisor-demo-docker/docker-compose.yml b/deployments/advisor-demo-docker/docker-compose.yml
index a70a7a1..c59da1d 100644
--- a/deployments/advisor-demo-docker/docker-compose.yml
+++ b/deployments/advisor-demo-docker/docker-compose.yml
@@ -353,6 +353,8 @@ services:
       OPENAPI_JSON_FILENAME: ${OPENAPI_JSON_FILENAME:-openapi_constrained_with_interactions.json}
       SEMANTIC_SEARCH_SERVICE__GRAPHQL_TIMEOUT__READ: ${SEMANTIC_SEARCH_SERVICE__GRAPHQL_TIMEOUT__READ:-300}
       USE_OPENAPI_DATA_MODEL_FROM_FILE: ${USE_OPENAPI_DATA_MODEL_FROM_FILE:-false}
+    ports:
+      - "8003:8003"
     networks:
       - lif-net-org1
     depends_on:
diff --git a/integration_tests/conftest.py b/integration_tests/conftest.py
index 15fe61b..620bf66 100644
--- a/integration_tests/conftest.py
+++ b/integration_tests/conftest.py
@@ -151,3 +151,11 @@ def require_query_planner(org_ports: OrgPorts, skip_unavailable: bool) -> None:
     if not org_ports.query_planner_url:
         pytest.skip(f"Query Planner not exposed for {org_ports.org_id}")
     check_service_available(org_ports.query_planner_url, skip_unavailable)
+
+
+@pytest.fixture
+def require_semantic_search(skip_unavailable: bool) -> None:
+    """Ensure Semantic Search MCP server is available."""
+    from utils.ports import SEMANTIC_SEARCH_HEALTH_URL
+
+    check_service_available(SEMANTIC_SEARCH_HEALTH_URL, skip_unavailable)
diff --git a/integration_tests/test_06_semantic_search.py b/integration_tests/test_06_semantic_search.py
new file mode 100644
index 0000000..9a8afb1
--- /dev/null
+++ b/integration_tests/test_06_semantic_search.py
@@ -0,0 +1,565 @@
+"""Integration tests for Semantic Search MCP Server.
+
+Verifies that the Semantic Search MCP server is operational and returns
+relevant results based on well-known sample data.
+
+Test users:
+- Core org1 users (native to org1): Matt, Renee, Sarah, Tracy
+- Async-ingested users (from org2, ingested via orchestration): Alan, Jenna
+
+The async users are checked via GraphQL queries to the actual service,
+not just sample data files, to verify orchestration has completed.
+"""
+
+import pytest
+from typing import Any
+
+import httpx
+
+from utils.ports import (
+    SEMANTIC_SEARCH_HEALTH_URL,
+    SEMANTIC_SEARCH_STATUS_URL,
+    SEMANTIC_SEARCH_MCP_URL,
+    get_org_ports,
+)
+from utils.sample_data import SampleDataLoader
+
+
+# GraphQL URL for org1 (semantic search is connected to org1)
+ORG1_GRAPHQL_URL = get_org_ports("org1").graphql_url
+
+
+@pytest.fixture
+def require_graphql_org1(skip_unavailable: bool) -> None:
+    """Ensure org1's GraphQL API is available."""
+    from conftest import check_service_available
+    check_service_available(ORG1_GRAPHQL_URL, skip_unavailable)
+
+
+def query_graphql_by_identifier(
+    identifier: str,
+    identifier_type: str = "SCHOOL_ASSIGNED_NUMBER",
+) -> dict[str, Any] | None:
+    """Query GraphQL for a person by their identifier.
+
+    Args:
+        identifier: The identifier value (e.g., school assigned number)
+        identifier_type: The type of identifier
+
+    Returns:
+        The person data dict if found, None if not found
+    """
+    query = """
+    query GetPerson($filter: PersonInput!) {
+        person(filter: $filter) {
+            Name {
+                firstName
+                lastName
+                informationSourceId
+            }
+            Identifier {
+                identifier
+                identifierType
+            }
+            Proficiency {
+                name
+                description
+                identifier
+            }
+            CredentialAward {
+                identifier
+            }
+            CourseLearningExperience {
+                identifier
+            }
+            EmploymentLearningExperience {
+                identifier
+            }
+        }
+    }
+    """
+
+    variables = {
+        "filter": {
+            "Identifier": [
+                {
+                    "identifier": identifier,
+                    "identifierType": identifier_type,
+                }
+            ]
+        }
+    }
+
+    try:
+        with httpx.Client(timeout=60.0) as client:
+            response = client.post(
+                ORG1_GRAPHQL_URL,
+                json={"query": query, "variables": variables},
+            )
+
+        if response.status_code != 200:
+            return None
+
+        data = response.json()
+        if "errors" in data and data["errors"]:
+            return None
+
+        persons = data.get("data", {}).get("person", [])
+        if persons:
+            return persons[0]
+
+        return None
+
+    except Exception:
+        return None
+
+
+@pytest.mark.layer("semantic_search")
+class TestSemanticSearchHealth:
+    """Tests for Semantic Search MCP Server health and status endpoints."""
+
+    def test_health_endpoint(self, require_semantic_search: None) -> None:
+        """Verify the health endpoint returns OK."""
+        with httpx.Client(timeout=30.0) as client:
+            response = client.get(SEMANTIC_SEARCH_HEALTH_URL)
+
+        assert response.status_code == 200
+        assert response.text == "OK"
+
+    def test_schema_status_endpoint(self, require_semantic_search: None) -> None:
+        """Verify the schema status endpoint returns expected metadata."""
+        with httpx.Client(timeout=30.0) as client:
+            response = client.get(SEMANTIC_SEARCH_STATUS_URL)
+
+        assert response.status_code == 200
+
+        status = response.json()
+        assert status["initialized"] is True
+        assert status["source"] in ("mdr", "file")
+        assert status["leaf_count"] > 0
+        assert "Person" in status["roots"]
+        assert "Person" in status["filter_models"]
+
+    def test_schema_status_has_embeddings_ready(
+        self, require_semantic_search: None
+    ) -> None:
+        """Verify schema has been loaded with embeddings (leaf_count > 0)."""
+        with httpx.Client(timeout=30.0) as client:
+            response = client.get(SEMANTIC_SEARCH_STATUS_URL)
+
+        assert response.status_code == 200
+        status = response.json()
+
+        # Schema should have a reasonable number of leaves for LIF
+        assert status["leaf_count"] >= 50, (
+            f"Expected at least 50 schema leaves, got {status['leaf_count']}"
+        )
+
+
+@pytest.mark.layer("semantic_search")
+class TestSemanticSearchMCPTools:
+    """Tests for Semantic Search MCP tool functionality.
+
+    These tests call the MCP server's tools via the HTTP interface
+    to verify semantic search returns relevant results.
+    """
+
+    def _call_mcp_tool(
+        self,
+        tool_name: str,
+        arguments: dict[str, Any],
+    ) -> dict[str, Any]:
+        """
+        Call an MCP tool via the HTTP interface.
+
+        The MCP protocol uses JSON-RPC style messaging. For FastMCP servers,
+        tools can be invoked via POST to the /mcp endpoint.
+        """
+        # MCP uses JSON-RPC 2.0 style requests
+        request_body = {
+            "jsonrpc": "2.0",
+            "id": 1,
+            "method": "tools/call",
+            "params": {
+                "name": tool_name,
+                "arguments": arguments,
+            },
+        }
+
+        with httpx.Client(timeout=120.0) as client:
+            response = client.post(
+                SEMANTIC_SEARCH_MCP_URL,
+                json=request_body,
+                headers={"Content-Type": "application/json"},
+            )
+
+        if response.status_code != 200:
+            return {"error": f"HTTP {response.status_code}: {response.text}"}
+
+        return response.json()
+
+    def _make_person_filter(
+        self,
+        identifier: str,
+        identifier_type: str = "SCHOOL_ASSIGNED_NUMBER",
+    ) -> dict[str, Any]:
+        """Build a person filter for the lif_query tool."""
+        return {
+            "Identifier": [
+                {
+                    "identifier": identifier,
+                    "identifierType": identifier_type,
+                }
+            ]
+        }
+
+
+@pytest.mark.layer("semantic_search")
+class TestSemanticSearchHTTPInterface:
+    """Tests for Semantic Search via direct HTTP calls.
+
+    Since MCP protocol may not be directly testable via simple HTTP,
+    these tests verify the HTTP endpoints work correctly.
+    """
+
+    def test_mcp_endpoint_responds(self, require_semantic_search: None) -> None:
+        """Verify the MCP endpoint is accessible (even if it rejects invalid requests)."""
+        with httpx.Client(timeout=30.0) as client:
+            # Send an empty request to verify the endpoint exists
+            response = client.post(
+                SEMANTIC_SEARCH_MCP_URL,
+                json={},
+                headers={"Content-Type": "application/json"},
+            )
+
+        # MCP endpoint should respond (may return error for invalid request)
+        # but should not be 404 or 503
+        assert response.status_code != 404, "MCP endpoint not found"
+        assert response.status_code != 503, "MCP server not ready"
+
+
+@pytest.mark.layer("semantic_search")
+class TestSemanticSearchDataConsistency:
+    """Tests verifying semantic search has access to all expected test users.
+
+    The semantic search MCP server is connected to org1's GraphQL API.
+    These tests query GraphQL by identifier (from sample data) to verify users.
+
+    Test users:
+    - Core org1 users (native to org1): Matt, Renee, Sarah, Tracy
+    - Async-ingested users (from org2, via orchestration): Alan, Jenna
+
+    Core users must be present; async users warn if missing.
+    """
+
+    # Core users are native to org1
+    CORE_USERS = ["Matt", "Renee", "Sarah", "Tracy"]
+
+    # Async users are ingested from org2 via orchestration
+    ASYNC_USERS = ["Alan", "Jenna"]
+
+    ALL_USERS = CORE_USERS + ASYNC_USERS
+
+    # Source org for async users (where their sample data lives)
+    ASYNC_USER_SOURCE_ORG = "advisor-demo-org2"
+
+    @pytest.fixture
+    def org1_sample_data(self) -> SampleDataLoader:
+        """Load sample data for org1 (core users)."""
+        return SampleDataLoader(
+            org_id="org1",
+            sample_data_key="advisor-demo-org1",
+        )
+
+    @pytest.fixture
+    def org2_sample_data(self) -> SampleDataLoader:
+        """Load sample data for org2 (source of async users Alan, Jenna)."""
+        return SampleDataLoader(
+            org_id="org2",
+            sample_data_key=self.ASYNC_USER_SOURCE_ORG,
+        )
+
+    def _is_async_user(self, user_name: str) -> bool:
+        """Check if user is async-ingested."""
+        return user_name in self.ASYNC_USERS
+
+    def _get_sample_data_for_user(
+        self,
+        user_name: str,
+        org1_sample_data: SampleDataLoader,
+        org2_sample_data: SampleDataLoader,
+    ):
+        """Get sample data for a user from appropriate org."""
+        if self._is_async_user(user_name):
+            return org2_sample_data.get_person_by_name(user_name)
+        return org1_sample_data.get_person_by_name(user_name)
+
+    def _get_user_identifier(
+        self,
+        user_name: str,
+        org1_sample_data: SampleDataLoader,
+        org2_sample_data: SampleDataLoader,
+    ) -> str | None:
+        """Get SCHOOL_ASSIGNED_NUMBER for a user from sample data."""
+        sample_person = self._get_sample_data_for_user(
+            user_name, org1_sample_data, org2_sample_data
+        )
+        if sample_person:
+            return sample_person.school_assigned_number
+        return None
+
+    def _query_user_in_graphql(
+        self,
+        user_name: str,
+        org1_sample_data: SampleDataLoader,
+        org2_sample_data: SampleDataLoader,
+    ) -> dict[str, Any] | None:
+        """Query GraphQL for a user by their identifier from sample data."""
+        identifier = self._get_user_identifier(
+            user_name, org1_sample_data, org2_sample_data
+        )
+        if not identifier:
+            return None
+        return query_graphql_by_identifier(identifier)
+
+    def _handle_missing_user_graphql(self, user_name: str, graphql_data: Any) -> None:
+        """Handle user not found in GraphQL - fail for core, skip for async."""
+        if graphql_data is None:
+            if self._is_async_user(user_name):
+                pytest.skip(f"{user_name} not yet available in GraphQL (async user)")
+            else:
+                pytest.fail(f"{user_name} not found in GraphQL (core user should exist)")
+
+    def test_all_users_in_graphql(
+        self,
+        require_semantic_search: None,
+        require_graphql_org1: None,
+        org1_sample_data: SampleDataLoader,
+        org2_sample_data: SampleDataLoader,
+    ) -> None:
+        """Verify all test users are queryable via GraphQL.
+
+        Core users must be present; async users warn if missing.
+        """
+        found_users = set()
+        missing_core = []
+        missing_async = []
+
+        for user_name in self.ALL_USERS:
+            graphql_data = self._query_user_in_graphql(
+                user_name, org1_sample_data, org2_sample_data
+            )
+            if graphql_data:
+                found_users.add(user_name)
+            elif self._is_async_user(user_name):
+                missing_async.append(user_name)
+            else:
+                missing_core.append(user_name)
+
+        # Core users must be present
+        assert not missing_core, f"Core users missing from GraphQL: {missing_core}"
+
+        # Log summary
+        print(f"\n--- GraphQL User Availability ---")
+        print(f"Users found: {sorted(found_users)}")
+        if missing_async:
+            print(f"Async users pending: {missing_async}")
+
+    @pytest.mark.parametrize("user_name", ["Matt", "Renee", "Sarah", "Tracy", "Alan", "Jenna"])
+    def test_user_queryable_in_graphql(
+        self,
+        require_semantic_search: None,
+        require_graphql_org1: None,
+        org1_sample_data: SampleDataLoader,
+        org2_sample_data: SampleDataLoader,
+        user_name: str,
+    ) -> None:
+        """Verify each test user is queryable via GraphQL."""
+        graphql_data = self._query_user_in_graphql(
+            user_name, org1_sample_data, org2_sample_data
+        )
+        self._handle_missing_user_graphql(user_name, graphql_data)
+
+        # User found - verify they have a name
+        names = graphql_data.get("Name", [])
+        assert len(names) > 0, f"{user_name} has no Name records in GraphQL"
+
+    @pytest.mark.parametrize("user_name", ["Matt", "Renee", "Sarah", "Tracy", "Alan", "Jenna"])
+    def test_user_has_proficiencies_in_graphql(
+        self,
+        require_semantic_search: None,
+        require_graphql_org1: None,
+        org1_sample_data: SampleDataLoader,
+        org2_sample_data: SampleDataLoader,
+        user_name: str,
+    ) -> None:
+        """Verify each user has proficiency data in GraphQL for semantic search."""
+        graphql_data = self._query_user_in_graphql(
+            user_name, org1_sample_data, org2_sample_data
+        )
+        self._handle_missing_user_graphql(user_name, graphql_data)
+
+        proficiencies = graphql_data.get("Proficiency", [])
+        assert len(proficiencies) > 0, (
+            f"{user_name} has no proficiencies in GraphQL - semantic search needs this data"
+        )
+
+    @pytest.mark.parametrize("user_name", ["Matt", "Renee", "Sarah", "Tracy", "Alan", "Jenna"])
+    def test_user_has_identifier_in_graphql(
+        self,
+        require_semantic_search: None,
+        require_graphql_org1: None,
+        org1_sample_data: SampleDataLoader,
+        org2_sample_data: SampleDataLoader,
+        user_name: str,
+    ) -> None:
+        """Verify each user has identifier data in GraphQL."""
+        graphql_data = self._query_user_in_graphql(
+            user_name, org1_sample_data, org2_sample_data
+        )
+        self._handle_missing_user_graphql(user_name, graphql_data)
+
+        identifiers = graphql_data.get("Identifier", [])
+        assert len(identifiers) > 0, f"{user_name} has no identifiers in GraphQL"
+
+    @pytest.mark.parametrize("user_name", ["Matt", "Renee", "Sarah", "Tracy", "Alan", "Jenna"])
+    def test_user_proficiencies_have_descriptions(
+        self,
+        require_semantic_search: None,
+        require_graphql_org1: None,
+        org1_sample_data: SampleDataLoader,
+        org2_sample_data: SampleDataLoader,
+        user_name: str,
+    ) -> None:
+        """Verify user proficiencies have descriptions (from sample data).
+
+        Users with zero proficiencies in sample data pass - this is valid.
+        Users with proficiencies should have at least some with descriptions.
+        """
+        # Get expected data from sample files
+        sample_person = self._get_sample_data_for_user(
+            user_name, org1_sample_data, org2_sample_data
+        )
+        if sample_person is None:
+            # No sample data for this user - valid for async users
+            return
+
+        proficiencies = sample_person.person.get("Proficiency", [])
+        if not proficiencies:
+            # Zero proficiencies is valid - some users don't have proficiency data
+            return
+
+        # If user has proficiencies, at least some should have descriptions
+        with_descriptions = [p for p in proficiencies if p.get("description")]
+        assert len(with_descriptions) > 0, (
+            f"{user_name} has {len(proficiencies)} proficiencies but none have descriptions"
+        )
+
+    def test_all_users_summary(
+        self,
+        require_semantic_search: None,
+        require_graphql_org1: None,
+        org1_sample_data: SampleDataLoader,
+        org2_sample_data: SampleDataLoader,
+    ) -> None:
+        """Summary showing all 6 test users and their GraphQL availability."""
+        summary = []
+        missing_async = []
+
+        for user_name in self.ALL_USERS:
+            graphql_data = self._query_user_in_graphql(
+                user_name, org1_sample_data, org2_sample_data
+            )
+
+            if graphql_data is None:
+                status = "ASYNC PENDING" if self._is_async_user(user_name) else "MISSING"
+                summary.append(f"{user_name}: {status}")
+                if self._is_async_user(user_name):
+                    missing_async.append(user_name)
+                continue
+
+            proficiencies = graphql_data.get("Proficiency") or []
+            credentials = graphql_data.get("CredentialAward") or []
+            courses = graphql_data.get("CourseLearningExperience") or []
+            employment = graphql_data.get("EmploymentLearningExperience") or []
+
+            names = graphql_data.get("Name") or []
+            full_name = user_name
+            if names:
+                full_name = f"{names[0].get('firstName', '')} {names[0].get('lastName', '')}".strip()
+
+            user_type = "async" if self._is_async_user(user_name) else "core"
+            summary.append(
+                f"{full_name} ({user_type}): "
+                f"{len(proficiencies)} proficiencies, "
+                f"{len(credentials)} credentials, "
+                f"{len(courses)} courses, "
+                f"{len(employment)} employment"
+            )
+
+        # Print summary
+        print("\n--- GraphQL Test User Summary (All 6 Users) ---")
+        for line in summary:
+            print(f"  {line}")
+        if missing_async:
+            print(f"  Async users pending ingestion: {missing_async}")
+
+
+@pytest.mark.layer("semantic_search")
+class TestSchemaSourceIntegrity:
+    """Tests verifying the schema source and integrity."""
+
+    def test_schema_loaded_from_expected_source(
+        self, require_semantic_search: None
+    ) -> None:
+        """Verify schema is loaded from MDR when MDR is available."""
+        with httpx.Client(timeout=30.0) as client:
+            response = client.get(SEMANTIC_SEARCH_STATUS_URL)
+
+        assert response.status_code == 200
+        status = response.json()
+
+        # In docker-compose setup, should load from MDR
+        # USE_OPENAPI_DATA_MODEL_FROM_FILE defaults to false
+        if status["source"] == "file":
+            pytest.xfail(
+                "Schema loaded from file instead of MDR. "
+                "This may indicate MDR was not available at startup."
+            )
+
+        assert status["source"] == "mdr", (
+            f"Expected schema from 'mdr', got '{status['source']}'"
+        )
+
+    def test_schema_has_required_filter_models(
+        self, require_semantic_search: None
+    ) -> None:
+        """Verify schema includes required filter models for querying."""
+        with httpx.Client(timeout=30.0) as client:
+            response = client.get(SEMANTIC_SEARCH_STATUS_URL)
+
+        assert response.status_code == 200
+        status = response.json()
+
+        filter_models = status.get("filter_models", [])
+        assert "Person" in filter_models, "Person filter model required"
+
+    def test_schema_has_mutation_models(
+        self, require_semantic_search: None
+    ) -> None:
+        """Verify schema includes mutation models if mutations are supported."""
+        with httpx.Client(timeout=30.0) as client:
+            response = client.get(SEMANTIC_SEARCH_STATUS_URL)
+
+        assert response.status_code == 200
+        status = response.json()
+
+        # Mutation models may or may not be present depending on schema
+        mutation_models = status.get("mutation_models", [])
+        # Just log for now - presence is optional
+        if not mutation_models:
+            pytest.skip("No mutation models available (this may be expected)")
+
+        assert "Person" in mutation_models, (
+            "If mutation models exist, Person should be included"
+        )
diff --git a/integration_tests/utils/ports.py b/integration_tests/utils/ports.py
index c3eb6a4..ce68a93 100644
--- a/integration_tests/utils/ports.py
+++ b/integration_tests/utils/ports.py
@@ -75,15 +75,26 @@ def query_planner_url(self) -> Optional[str]:
 
 # Ports that are in use by other services (avoid these for testing)
 RESERVED_PORTS = {
+    8003,  # lif-semantic-search-mcp-server
     8004,  # lif-advisor-api
     8005,  # lif-orchestrator-api
     8007,  # lif-translator
     8011,  # lif-example-data-source-rest-api
-    8012,  # Reserved/in use
+    8012,  # lif-mdr-api
     3000,  # dagster-webserver
     5174,  # lif-advisor-app
 }
 
+# Global service URLs (not per-org)
+SEMANTIC_SEARCH_PORT = 8003
+SEMANTIC_SEARCH_BASE_URL = f"http://localhost:{SEMANTIC_SEARCH_PORT}"
+SEMANTIC_SEARCH_HEALTH_URL = f"{SEMANTIC_SEARCH_BASE_URL}/health"
+SEMANTIC_SEARCH_STATUS_URL = f"{SEMANTIC_SEARCH_BASE_URL}/schema/status"
+SEMANTIC_SEARCH_MCP_URL = f"{SEMANTIC_SEARCH_BASE_URL}/mcp"
+
+MDR_PORT = 8012
+MDR_BASE_URL = f"http://localhost:{MDR_PORT}"
+
 
 def get_org_ports(org_id: str) -> OrgPorts:
     """Get port configuration for a specific organization."""
diff --git a/projects/lif_semantic_search_mcp_server/pyproject.toml b/projects/lif_semantic_search_mcp_server/pyproject.toml
index bc55cca..39f68e1 100644
--- a/projects/lif_semantic_search_mcp_server/pyproject.toml
+++ b/projects/lif_semantic_search_mcp_server/pyproject.toml
@@ -30,6 +30,7 @@ packages = ["lif"]
 "../../components/lif/datatypes" = "lif/datatypes"
 "../../components/lif/exceptions" = "lif/exceptions"
 "../../components/lif/openapi_schema_parser" = "lif/openapi_schema_parser"
+"../../components/lif/schema_state_manager" = "lif/schema_state_manager"
 "../../components/lif/semantic_search_service" = "lif/semantic_search_service"
 "../../components/lif/string_utils" = "lif/string_utils"
 "../../components/lif/mdr_client" = "lif/mdr_client"
diff --git a/test/bases/lif/semantic_search_mcp_server/conftest.py b/test/bases/lif/semantic_search_mcp_server/conftest.py
new file mode 100644
index 0000000..a91a2b0
--- /dev/null
+++ b/test/bases/lif/semantic_search_mcp_server/conftest.py
@@ -0,0 +1,23 @@
+"""Pytest configuration for semantic search MCP server tests.
+
+The semantic search MCP server initializes schema at module import time,
+so we need to set environment variables before the test module is imported.
+"""
+
+import os
+import pytest
+
+# Store original value and set env var before any test imports
+_original_value = os.environ.get("USE_OPENAPI_DATA_MODEL_FROM_FILE")
+os.environ["USE_OPENAPI_DATA_MODEL_FROM_FILE"] = "true"
+
+
+@pytest.fixture(scope="module", autouse=True)
+def _cleanup_env_after_module():
+    """Clean up environment variable after this test module completes."""
+    yield
+    # Restore original value after module tests complete
+    if _original_value is None:
+        os.environ.pop("USE_OPENAPI_DATA_MODEL_FROM_FILE", None)
+    else:
+        os.environ["USE_OPENAPI_DATA_MODEL_FROM_FILE"] = _original_value
diff --git a/test/bases/lif/semantic_search_mcp_server/test_core.py b/test/bases/lif/semantic_search_mcp_server/test_core.py
index 70e89a6..325e3ae 100644
--- a/test/bases/lif/semantic_search_mcp_server/test_core.py
+++ b/test/bases/lif/semantic_search_mcp_server/test_core.py
@@ -1,5 +1,82 @@
+"""Tests for the Semantic Search MCP Server module.
+
+Note on test strategy:
+- The MCP server module initializes schema at import time (required for FastMCP
+  tool registration which needs Pydantic models at decoration time).
+- For unit tests, we use USE_OPENAPI_DATA_MODEL_FROM_FILE=true (set in conftest.py)
+  to avoid requiring MDR to be running.
+- MDR-based schema loading is tested at the component level in:
+  test/components/lif/schema_state_manager/test_core.py
+  which mocks the HTTP client and verifies MDR path works correctly.
+- Integration tests (integration_tests/test_06_semantic_search.py) verify the full
+  MDR-based flow with real services.
+"""
+
+# conftest.py sets USE_OPENAPI_DATA_MODEL_FROM_FILE=true before this import
 from lif.semantic_search_mcp_server import core
 
 
-def test_sample():
-    assert core is not None
+class TestMCPServerModule:
+    """Tests verifying the MCP server module loads and initializes correctly."""
+
+    def test_module_loads(self):
+        """Verify the semantic search MCP server module loads successfully."""
+        assert core is not None
+
+    def test_mcp_server_exists(self):
+        """Verify the MCP server instance is created."""
+        assert core.mcp is not None
+        assert core.mcp.name == "LIF-Query-Server"
+
+    def test_filter_model_exists(self):
+        """Verify filter model is generated for the default root type."""
+        assert core.Filter is not None
+
+    def test_state_manager_initialized(self):
+        """Verify the state manager is initialized with schema data."""
+        state = core._state_manager.state
+        assert state is not None
+        assert len(state.leaves) > 0
+        assert len(state.filter_models) > 0
+        assert "Person" in state.filter_models
+
+    def test_state_source_is_file_in_unit_tests(self):
+        """Verify unit tests use file-based schema (MDR tested at component level)."""
+        # This test documents that unit tests use file-based schema.
+        # MDR-based loading is tested in test/components/lif/schema_state_manager/
+        state = core._state_manager.state
+        assert state.source == "file"
+
+
+class TestMCPServerConfiguration:
+    """Tests verifying MCP server configuration."""
+
+    def test_default_root_node_is_person(self):
+        """Verify default root node is Person."""
+        assert core.DEFAULT_ROOT_NODE == "Person"
+
+    def test_config_loaded_from_environment(self):
+        """Verify configuration is loaded."""
+        assert core.CONFIG is not None
+        assert core.CONFIG.root_type_name == "Person"
+
+    def test_lif_query_tool_registered(self):
+        """Verify the lif_query tool is available."""
+        # The tool should be registered with the MCP server
+        assert hasattr(core, "lif_query")
+
+
+class TestMCPServerEndpoints:
+    """Tests for HTTP endpoint handlers (without running server)."""
+
+    def test_health_check_handler_exists(self):
+        """Verify health check endpoint handler is defined."""
+        assert hasattr(core, "health_check")
+
+    def test_schema_status_handler_exists(self):
+        """Verify schema status endpoint handler is defined."""
+        assert hasattr(core, "schema_status")
+
+    def test_schema_refresh_handler_exists(self):
+        """Verify schema refresh endpoint handler is defined."""
+        assert hasattr(core, "schema_refresh")
diff --git a/test/components/lif/schema_state_manager/__init__.py b/test/components/lif/schema_state_manager/__init__.py
new file mode 100644
index 0000000..e69de29
diff --git a/test/components/lif/schema_state_manager/test_core.py b/test/components/lif/schema_state_manager/test_core.py
new file mode 100644
index 0000000..7891e07
--- /dev/null
+++ b/test/components/lif/schema_state_manager/test_core.py
@@ -0,0 +1,302 @@
+"""Tests for the SchemaStateManager component."""
+
+import os
+from unittest import mock
+from unittest.mock import MagicMock, patch
+
+import httpx
+import pytest
+
+from lif.lif_schema_config import LIFSchemaConfig
+from lif.mdr_client import MDRClientException, MDRConfigurationError
+from lif.schema_state_manager import SchemaState, SchemaStateManager
+
+
+@pytest.fixture
+def mock_config_file():
+    """Create a test configuration that uses file."""
+    return LIFSchemaConfig(
+        root_type_name="Person",
+        additional_root_types=["Course", "Organization"],
+        mdr_api_url="http://localhost:8012",
+        mdr_timeout_seconds=5,
+        use_openapi_from_file=True,  # Force file
+        semantic_search_model_name="all-MiniLM-L6-v2",
+    )
+
+
+@pytest.fixture
+def mock_config_mdr():
+    """Create a test configuration that uses MDR."""
+    return LIFSchemaConfig(
+        root_type_name="Person",
+        additional_root_types=["Course", "Organization"],
+        mdr_api_url="http://localhost:8012",
+        mdr_timeout_seconds=5,
+        openapi_data_model_id="test-model-123",
+        use_openapi_from_file=False,  # Use MDR
+        semantic_search_model_name="all-MiniLM-L6-v2",
+    )
+
+
+class TestSchemaStateManager:
+    """Tests for SchemaStateManager class."""
+
+    def test_init(self, mock_config_file):
+        """Test SchemaStateManager initialization."""
+        manager = SchemaStateManager(mock_config_file)
+        assert manager._config == mock_config_file
+        assert manager._initialized is False
+        assert manager._state is None
+
+    def test_is_initialized_false_before_init(self, mock_config_file):
+        """Test is_initialized returns False before initialization."""
+        manager = SchemaStateManager(mock_config_file)
+        assert manager.is_initialized is False
+
+    def test_state_raises_before_init(self, mock_config_file):
+        """Test accessing state before initialization raises error."""
+        manager = SchemaStateManager(mock_config_file)
+        with pytest.raises(RuntimeError, match="not initialized"):
+            _ = manager.state
+
+    def test_initialize_sync_from_file(self, mock_config_file):
+        """Test synchronous initialization from file."""
+        manager = SchemaStateManager(mock_config_file)
+        manager.initialize_sync()
+
+        assert manager.is_initialized is True
+        state = manager.state
+        assert state.source == "file"
+        assert len(state.leaves) > 0
+        assert "Person" in state.filter_models
+        assert state.model is not None
+        assert state.embeddings is not None
+
+    def test_initialize_sync_force_file(self, mock_config_mdr):
+        """Test synchronous initialization with force_file=True ignores MDR config."""
+        manager = SchemaStateManager(mock_config_mdr)
+        manager.initialize_sync(force_file=True)
+
+        assert manager.is_initialized is True
+        state = manager.state
+        assert state.source == "file"
+
+    def test_initialize_sync_loads_leaves_by_root(self, mock_config_file):
+        """Test that initialize_sync correctly organizes leaves by root."""
+        manager = SchemaStateManager(mock_config_file)
+        manager.initialize_sync()
+
+        state = manager.state
+        assert "Person" in state.leaves_by_root
+        # Additional roots may or may not load depending on schema
+        assert len(state.leaves_by_root) >= 1
+
+    def test_get_status_before_init(self, mock_config_file):
+        """Test get_status before initialization."""
+        manager = SchemaStateManager(mock_config_file)
+        status = manager.get_status()
+
+        assert status["initialized"] is False
+        assert status["source"] is None
+        assert status["leaf_count"] == 0
+        assert status["roots"] == []
+
+    def test_get_status_after_init(self, mock_config_file):
+        """Test get_status after initialization."""
+        manager = SchemaStateManager(mock_config_file)
+        manager.initialize_sync()
+
+        status = manager.get_status()
+        assert status["initialized"] is True
+        assert status["source"] == "file"
+        assert status["leaf_count"] > 0
+        assert "Person" in status["roots"]
+        assert "Person" in status["filter_models"]
+
+    @pytest.mark.asyncio
+    async def test_initialize_async_from_file(self, mock_config_file):
+        """Test asynchronous initialization from file."""
+        manager = SchemaStateManager(mock_config_file)
+        await manager.initialize()
+
+        assert manager.is_initialized is True
+        state = manager.state
+        assert state.source == "file"
+        assert len(state.leaves) > 0
+
+    @pytest.mark.asyncio
+    async def test_refresh_before_init_fails(self, mock_config_file):
+        """Test refresh before initialization returns error."""
+        manager = SchemaStateManager(mock_config_file)
+        result = await manager.refresh()
+
+        assert result["success"] is False
+        assert "not initialized" in result["error"]
+
+
+class TestSchemaStateManagerMDR:
+    """Tests for SchemaStateManager MDR integration."""
+
+    @patch("lif.mdr_client.core._create_sync_client")
+    def test_initialize_sync_from_mdr_success(self, mock_client_class, mock_config_mdr):
+        """Test successful sync initialization from MDR."""
+        # Load actual file content to return as MDR response
+        from lif.mdr_client import get_openapi_lif_data_model_from_file
+
+        openapi_data = get_openapi_lif_data_model_from_file()
+
+        # Setup mock
+        mock_response = MagicMock()
+        mock_response.status_code = 200
+        mock_response.json.return_value = openapi_data
+        mock_response.raise_for_status.return_value = None
+
+        mock_client = MagicMock()
+        mock_client.__enter__ = MagicMock(return_value=mock_client)
+        mock_client.__exit__ = MagicMock(return_value=False)
+        mock_client.get.return_value = mock_response
+        mock_client_class.return_value = mock_client
+
+        manager = SchemaStateManager(mock_config_mdr)
+        manager.initialize_sync()
+
+        assert manager.is_initialized is True
+        state = manager.state
+        assert state.source == "mdr"
+        assert len(state.leaves) > 0
+
+    @patch("lif.mdr_client.core._create_sync_client")
+    def test_initialize_sync_mdr_failure_no_fallback(self, mock_client_class, mock_config_mdr):
+        """Test that MDR failure does NOT fall back to file - exits instead."""
+        # Setup mock to raise connection error
+        mock_client = MagicMock()
+        mock_client.__enter__ = MagicMock(return_value=mock_client)
+        mock_client.__exit__ = MagicMock(return_value=False)
+        mock_client.get.side_effect = httpx.ConnectError("Connection refused")
+        mock_client_class.return_value = mock_client
+
+        manager = SchemaStateManager(mock_config_mdr)
+
+        # Should exit because MDR is configured but unavailable (no fallback)
+        with pytest.raises(SystemExit):
+            manager.initialize_sync()
+
+    def test_initialize_sync_mdr_not_configured_fails(self):
+        """Test that missing OPENAPI_DATA_MODEL_ID causes failure when MDR expected."""
+        config = LIFSchemaConfig(
+            root_type_name="Person",
+            use_openapi_from_file=False,  # Expects MDR
+            openapi_data_model_id=None,  # But no model ID!
+            semantic_search_model_name="all-MiniLM-L6-v2",
+        )
+
+        manager = SchemaStateManager(config)
+
+        # Should exit because MDR is expected but not configured
+        with pytest.raises(SystemExit):
+            manager.initialize_sync()
+
+
+class TestSchemaStateManagerRefresh:
+    """Tests for SchemaStateManager refresh functionality."""
+
+    @pytest.mark.asyncio
+    async def test_refresh_updates_state_from_file(self, mock_config_file):
+        """Test that refresh updates the state when using file config."""
+        manager = SchemaStateManager(mock_config_file)
+        manager.initialize_sync()
+
+        initial_leaf_count = len(manager.state.leaves)
+
+        result = await manager.refresh()
+
+        assert result["success"] is True
+        assert result["previous_leaf_count"] == initial_leaf_count
+
+    @pytest.mark.asyncio
+    async def test_refresh_mdr_failure_preserves_state(self, mock_config_mdr):
+        """Test that refresh failure preserves existing state."""
+        # First initialize with mocked successful MDR
+        with patch("lif.mdr_client.core._create_sync_client") as mock_client_class:
+            from lif.mdr_client import get_openapi_lif_data_model_from_file
+
+            openapi_data = get_openapi_lif_data_model_from_file()
+
+            mock_response = MagicMock()
+            mock_response.status_code = 200
+            mock_response.json.return_value = openapi_data
+            mock_response.raise_for_status.return_value = None
+
+            mock_client = MagicMock()
+            mock_client.__enter__ = MagicMock(return_value=mock_client)
+            mock_client.__exit__ = MagicMock(return_value=False)
+            mock_client.get.return_value = mock_response
+            mock_client_class.return_value = mock_client
+
+            manager = SchemaStateManager(mock_config_mdr)
+            manager.initialize_sync()
+
+        initial_leaf_count = len(manager.state.leaves)
+        initial_source = manager.state.source
+
+        # Now make refresh fail
+        with patch("lif.mdr_client.core._create_sync_client") as mock_client_class:
+            mock_client = MagicMock()
+            mock_client.__enter__ = MagicMock(return_value=mock_client)
+            mock_client.__exit__ = MagicMock(return_value=False)
+            mock_client.get.side_effect = httpx.ConnectError("Connection refused")
+            mock_client_class.return_value = mock_client
+
+            result = await manager.refresh()
+
+            # Refresh should fail
+            assert result["success"] is False
+            assert "error" in result
+
+            # But state should be preserved
+            assert len(manager.state.leaves) == initial_leaf_count
+            assert manager.state.source == initial_source
+
+
+class TestSchemaState:
+    """Tests for SchemaState dataclass."""
+
+    def test_schema_state_creation(self, mock_config_file):
+        """Test SchemaState can be created with required fields."""
+        import numpy as np
+        from sentence_transformers import SentenceTransformer
+
+        state = SchemaState(
+            openapi={"openapi": "3.0.0"},
+            leaves=[],
+            leaves_by_root={},
+            filter_models={},
+            mutation_models={},
+            embeddings=np.array([]),
+            model=MagicMock(spec=SentenceTransformer),
+            source="file",
+        )
+
+        assert state.openapi == {"openapi": "3.0.0"}
+        assert state.source == "file"
+        assert state.leaves == []
+
+
+class TestSchemaStateManagerThreadSafety:
+    """Tests for thread safety of SchemaStateManager."""
+
+    def test_state_access_is_thread_safe(self, mock_config_file):
+        """Test that state access uses locking."""
+        manager = SchemaStateManager(mock_config_file)
+        manager.initialize_sync()
+
+        # Access state from multiple "threads" (simulated)
+        # This test just verifies the lock exists and is used
+        assert manager._lock is not None
+
+        # Multiple accesses should work without issues
+        for _ in range(10):
+            _ = manager.state
+            _ = manager.is_initialized
+            _ = manager.get_status()