From e1d9724f50fb6d8f9e02c739c855e0b3d4ca383f Mon Sep 17 00:00:00 2001 From: "pr-test1[bot]" <226697212+pr-test1[bot]@users.noreply.github.com> Date: Tue, 16 Sep 2025 14:51:10 +0000 Subject: [PATCH 1/2] docs: sync mint.json with latest code --- mint.json | 33 +++++++++++++++++++++++++-------- 1 file changed, 25 insertions(+), 8 deletions(-) diff --git a/mint.json b/mint.json index ccb1843..2747043 100644 --- a/mint.json +++ b/mint.json @@ -38,9 +38,13 @@ "mode": "auto" }, "navigation": [ - { + { "group": "Getting Started", - "pages": ["about", "installation", "quick-start"] + "pages": [ + "about", + "installation", + "quick-start" + ] }, { "group": "Tutorials", @@ -64,7 +68,8 @@ "pages": [ "concepts/deployment", "concepts/models", - "concepts/contributing" + "concepts/contributing", + "concepts/openai-proxy" ] } ], @@ -77,17 +82,29 @@ { "title": "Documentation", "links": [ - { "label": "Getting Started", "url": "/" }, - { "label": "Contributing", "url": "/contributing" } + { + "label": "Getting Started", + "url": "/" + }, + { + "label": "Contributing", + "url": "/contributing" + } ] }, { "title": "Resources", "links": [ - { "label": "GitHub", "url": "https://github.com/slashml/magemaker" }, - { "label": "Support", "url": "mailto:support@slashml.com" } + { + "label": "GitHub", + "url": "https://github.com/slashml/magemaker" + }, + { + "label": "Support", + "url": "mailto:support@slashml.com" + } ] } ] } -} \ No newline at end of file +} From a53a22df4999709eb984cc3f1fba014802815437 Mon Sep 17 00:00:00 2001 From: "pr-test1[bot]" <226697212+pr-test1[bot]@users.noreply.github.com> Date: Tue, 16 Sep 2025 14:51:11 +0000 Subject: [PATCH 2/2] docs: create concepts/openai-proxy.mdx --- concepts/openai-proxy.mdx | 139 ++++++++++++++++++++++++++++++++++++++ 1 file changed, 139 insertions(+) create mode 100644 concepts/openai-proxy.mdx diff --git a/concepts/openai-proxy.mdx b/concepts/openai-proxy.mdx new file mode 100644 index 0000000..5212764 --- /dev/null +++ b/concepts/openai-proxy.mdx @@ -0,0 +1,139 @@ +--- +title: OpenAI-Compatible Proxy API +description: Use Magemaker endpoints through an OpenAI-style REST interface +--- + +## Overview + +Magemaker ships with a lightweight FastAPI server (`server.py`) that lets you interact with any SageMaker endpoint you deployed **as if it were an OpenAI model**. +This is useful when you want to: + +- Drop‐in replace `openai` SDK calls with your own hosted models +- Integrate Magemaker deployments with existing tooling that expects an OpenAI endpoint (e.g. LangChain, Llama-Index, Chat-GPT UIs) + +The server exposes two kinds of routes: + +1. **Utility routes** to inspect / query individual endpoints +2. An **OpenAI-compatible `/chat/completions` route** + +> NOTE +> The proxy currently supports **chat-completion style models** only. Text-completion and other modalities will be added in future releases. + +--- + +## Quick Start + +1. Ensure you have already **deployed at least one SageMaker endpoint** with Magemaker. +2. Set the required AWS region variable (Magemaker does this automatically when you call the CLI): + +```bash +export AWS_REGION_NAME= # e.g. us-east-1 +``` + +3. Run the server: + +```bash +python -m magemaker.server # or `python server.py` from repo root +``` + +The server will start on `http://localhost:8000` by default. + +--- + +## REST Endpoints + +### 1. `GET /endpoint/{endpoint_name}` +Returns metadata for a specific SageMaker endpoint. + +```bash +curl http://localhost:8000/endpoint/my-bert-endpoint +``` + +Response example: +```json +{ + "EndpointName": "my-bert-endpoint", + "EndpointStatus": "InService", + "CreationTime": "2024-05-28T12:34:56Z", + ... +} +``` + +### 2. `POST /endpoint/{endpoint_name}/query` +Runs an **arbitrary payload** against the specified endpoint. + +Request body (JSON): +```json +{ + "inputs": "Hello, how are you?", + "context": "" // optional, defaults to empty string +} +``` + +Example cURL: +```bash +curl -X POST \ + -H "Content-Type: application/json" \ + -d '{"inputs":"Hello there!"}' \ + http://localhost:8000/endpoint/my-bert-endpoint/query +``` + +### 3. `POST /chat/completions` +OpenAI-compatible route that dispatches the request to the **first endpoint that was deployed with the requested model id**. + +Minimal request example: +```json +{ + "model": "meta-llama/Meta-Llama-3-8B-Instruct", + "messages": [{"role": "user", "content": "Tell me a joke."}] +} +``` + +Using `openai` SDK (just change the base URL): +```python +import openai + +openai.api_key = "sk-ignored" # not used but required by the client +openai.base_url = "http://localhost:8000" + +resp = openai.ChatCompletion.create( + model="meta-llama/Meta-Llama-3-8B-Instruct", + messages=[{"role": "user", "content": "Hello!"}] +) +print(resp.choices[0].message.content) +``` + +--- + +## Environment Variables + +The server relies on the same `.env` file generated by Magemaker plus one extra variable: + +| Variable | Description | Required | Default | +| -------- | ----------- | -------- | ------- | +| `AWS_REGION_NAME` | AWS region where your SageMaker endpoints live | ✅ | – | + +All other variables (`AWS_ACCESS_KEY_ID`, `AWS_SECRET_ACCESS_KEY`, etc.) are read automatically through `dotenv` just like the CLI. + +--- + +## Error Handling + +- The server raises `404 Not Deployed` if the requested model has **no active endpoints**. +- Querying endpoints returns the raw error from SageMaker if the invocation fails. + +--- + +## Production Deployment Tips + +1. **Behind a Reverse Proxy** – Run FastAPI with Uvicorn or Gunicorn behind Nginx for TLS termination. +2. **Authentication** – Add an API key or OAuth middleware before exposing the service publicly. +3. **Autoscaling** – Combine with Magemaker’s upcoming autoscaling feature to scale SageMaker instances based on traffic. + +--- + +## Roadmap + +- Support for non-chat completion routes (`/completions`, `/embeddings`) +- Multi-model routing strategies (e.g. round-robin, latency-based) +- Streaming responses