Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
35 commits
Select commit Hold shift + click to select a range
7d09b57
fix: ai gateway mutating webhook should default failurePolicy to Fail…
googs1025 Nov 5, 2025
73aa427
docs: fix example in examples/inference-pool (#1488)
googs1025 Nov 5, 2025
bb8ad26
fix: finish reason should be tool calls when the model responded with…
hustxiayang Nov 5, 2025
a0e4c0e
refactor: decouples backendauth & headermutator from extproc (#1491)
mathetake Nov 5, 2025
bfb8b54
fix: use the right namespace for DNS names in the default webhook cer…
nacx Nov 6, 2025
c092761
cli: align on apigroup for backendtlspolicy (#1500)
nacx Nov 6, 2025
9fe1fd7
fix: mcp route to gateway cross namespace reference (#1489)
Hritik003 Nov 6, 2025
8d406a1
chore(deps): bump github.com/containerd/containerd from 1.7.27 to 1.7…
dependabot[bot] Nov 6, 2025
a427796
test: deflake TestRerank_RecordRequestCompletion (#1505)
mathetake Nov 6, 2025
98b62c5
feat: support JSON schema dereferencing in tool-param for GCP Anthrop…
sukumargaonkar Nov 7, 2025
1d23373
revert: "refactor: scaffolds anthropic messages structs for tracing" …
mathetake Nov 7, 2025
ad5f75e
feat: enhance openAIToolsToGeminiTools to handle response JSON schema…
sukumargaonkar Nov 7, 2025
908fae3
site: v0.4 release notes (#1456)
missBerg Nov 8, 2025
e584dff
chore: align otel embeddings with final openinference spec (#1519)
codefromthecrypt Nov 10, 2025
5cd5f1b
fix: updates image generation request tracing to match latest OpenInf…
codefromthecrypt Nov 10, 2025
4822f8a
fix: fix tool call index of stream chunks for gemini models when the …
hustxiayang Nov 10, 2025
5b583ad
docs: add the versioned documentation for v0.4.x (#1517)
mathetake Nov 10, 2025
bdbf263
docs: fix links to external types (#1525)
nacx Nov 11, 2025
fb7d6f0
docs: update envoy AI Gateway helm chart version to v1.5.0 (#1528)
deepakdeore2004 Nov 11, 2025
21cc0e4
fix: handle GCP global region (#1480)
xiaolin593 Nov 11, 2025
99dc630
chore: remove nip.io now that EG supports localhost backends in cli (…
nacx Nov 11, 2025
3586a6a
docs: parameterize common variables (#1531)
nacx Nov 11, 2025
63943f0
chore: use a meaningful version in the compiled binaries (#1532)
nacx Nov 11, 2025
9e6597b
fix: encode backend name in MCP resource URIs (#1535)
nacx Nov 11, 2025
d049ce0
feat: modify the manifest to support multi-instance deployment. (#1511)
Hyzhou Nov 12, 2025
cc1ecad
mcp: proxy stdio servers in standalone mode (#1479)
nacx Nov 12, 2025
a09da27
feat: implement otel tracing for cohere v2 rerank endpoint (#1539)
AyushSawant18588 Nov 13, 2025
6a67c3c
fix: fix stream parallel tool call index tests (#1537)
hustxiayang Nov 13, 2025
05acc5f
feat: route-level body mutation for top-level field (#1492)
xiaolin593 Nov 13, 2025
3898b06
refactor: decouples translators from extproc (#1507)
mathetake Nov 13, 2025
0fb4728
refactor: move the non-extproc packages out of internal/extproc (#1540)
mathetake Nov 13, 2025
6f0c41b
ci: use EG v1.6.0 (#1541)
mathetake Nov 13, 2025
104ee25
chore: support hostname in aigatewayroute
xianml Nov 14, 2025
19f014b
fix: ext_proc insert bug
xianml Nov 17, 2025
2c3da50
chore(deps): bump js-yaml
dependabot[bot] Nov 18, 2025
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
12 changes: 5 additions & 7 deletions .github/workflows/build_and_test.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -184,10 +184,8 @@ jobs:
include:
- name: latest
envoy_gateway_version: v0.0.0-latest
- name: v1.6.0-rc.1
envoy_gateway_version: v1.6.0-rc.1
- name: v1.5.0
envoy_gateway_version: v1.5.0
- name: v1.6.0
envoy_gateway_version: v1.6.0
steps:
- uses: actions/checkout@v4
- uses: actions/setup-go@v5
Expand Down Expand Up @@ -246,7 +244,7 @@ jobs:
- run: make test-e2e-upgrade
env:
# We only need to test the upgrade from the latest stable version of EG.
EG_VERSION: v1.6.0-rc.1
EG_VERSION: v1.6.0
K8S_VERSION: ${{ matrix.k8s-version }}

test_e2e_inference_extension:
Expand Down Expand Up @@ -275,7 +273,7 @@ jobs:
- uses: docker/setup-buildx-action@v3
- run: make test-e2e-inference-extension
env:
EG_VERSION: v1.6.0-rc.1
EG_VERSION: v1.6.0

test_e2e_namespaced:
needs: changes
Expand Down Expand Up @@ -305,7 +303,7 @@ jobs:
env:
# We only need to test with the latest stable version of EG, since these e2e tests
# do not depend on the EG version.
EG_VERSION: v1.6.0-rc.1
EG_VERSION: v1.6.0

test_e2e_aigw:
needs: changes
Expand Down
4 changes: 2 additions & 2 deletions Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -14,9 +14,9 @@ GO_TOOL := go tool -modfile=tools/go.mod
COMMANDS := controller extproc

# This is the package that contains the version information for the build.
GIT_COMMIT:=$(shell git rev-parse HEAD)
VERSION_STRING:=$(shell git describe --tags --long)
VERSION_PACKAGE := github.com/envoyproxy/ai-gateway/internal/version
GO_LDFLAGS += -X $(VERSION_PACKAGE).Version=$(GIT_COMMIT)
GO_LDFLAGS += -X $(VERSION_PACKAGE).version=$(VERSION_STRING)

# This is the directory where the built artifacts will be placed.
OUTPUT_DIR ?= out
Expand Down
86 changes: 86 additions & 0 deletions api/v1alpha1/ai_gateway_route.go
Original file line number Diff line number Diff line change
Expand Up @@ -328,6 +328,15 @@ type AIGatewayRouteRuleBackendRef struct {
// +optional
HeaderMutation *HTTPHeaderMutation `json:"headerMutation,omitempty"`

// BodyMutation defines the request body mutation to be applied to this backend.
// This allows modification of JSON fields in the request body before sending to the backend.
// When both route-level and backend-level BodyMutation are defined,
// route-level takes precedence over backend-level for conflicting operations.
// This field is ignored when referencing InferencePool resources.
//
// +optional
BodyMutation *HTTPBodyMutation `json:"bodyMutation,omitempty"`

// Weight is the weight of the backend. This is exactly the same as the weight in
// the BackendRef in the Gateway API. See for the details:
// https://gateway-api.sigs.k8s.io/reference/spec/#gateway.networking.k8s.io%2fv1.BackendRef
Expand Down Expand Up @@ -398,3 +407,80 @@ type AIGatewayFilterConfigExternalProcessor struct {
// +optional
Resources *corev1.ResourceRequirements `json:"resources,omitempty"`
}

// HTTPBodyMutation defines the mutation of HTTP request body JSON fields that will be applied to the request
type HTTPBodyMutation struct {
// Set overwrites/adds the request body with the given JSON field (name, value)
// before sending to the backend. Only top-level fields are currently supported.
//
// Input:
// {
// "model": "gpt-4",
// "service_tier": "default"
// }
//
// Config:
// set:
// - path: "service_tier"
// value: "scale"
//
// Output:
// {
// "model": "gpt-4",
// "service_tier": "scale"
// }
//
// +optional
// +listType=map
// +listMapKey=path
// +kubebuilder:validation:MaxItems=16
Set []HTTPBodyField `json:"set,omitempty"`

// Remove the given JSON field(s) from the HTTP request body before sending to the backend.
// The value of Remove is a list of top-level field names to remove.
//
// Input:
// {
// "model": "gpt-4",
// "service_tier": "default",
// "internal_flag": true
// }
//
// Config:
// remove: ["service_tier", "internal_flag"]
//
// Output:
// {
// "model": "gpt-4"
// }
//
// +optional
// +listType=set
// +kubebuilder:validation:MaxItems=16
Remove []string `json:"remove,omitempty"`
}

// HTTPBodyField represents a JSON field name and value for body mutation
type HTTPBodyField struct {
// Path is the top-level field name to set in the request body.
// Examples: "service_tier", "max_tokens", "temperature"
//
// +kubebuilder:validation:Required
// +kubebuilder:validation:MinLength=1
Path string `json:"path"`

// Value is the JSON value to set at the specified field. This can be any valid JSON value:
// string, number, boolean, object, array, or null.
// The value will be parsed as JSON and inserted at the specified field.
//
// Examples:
// - "\"scale\"" (string)
// - "42" (number)
// - "true" (boolean)
// - "{\"key\": \"value\"}" (object)
// - "[1, 2, 3]" (array)
// - "null" (null)
//
// +kubebuilder:validation:Required
Value string `json:"value"`
}
5 changes: 5 additions & 0 deletions api/v1alpha1/ai_service_backend.go
Original file line number Diff line number Diff line change
Expand Up @@ -70,6 +70,11 @@ type AIServiceBackendSpec struct {
// +optional
HeaderMutation *HTTPHeaderMutation `json:"headerMutation,omitempty"`

// BodyMutation defines the mutation of HTTP request body JSON fields that will be applied to the request
// before sending it to the backend.
// +optional
BodyMutation *HTTPBodyMutation `json:"bodyMutation,omitempty"`

// TODO: maybe add backend-level LLMRequestCost configuration that overrides the AIGatewayRoute-level LLMRequestCost.
// That may be useful for the backend that has a different cost calculation logic.
}
Expand Down
50 changes: 50 additions & 0 deletions api/v1alpha1/zz_generated.deepcopy.go

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

2 changes: 1 addition & 1 deletion cmd/aigw/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@

## Quick Start

[docker-compose.yml](docker-compose.yaml) builds and runs `aigw`, targeting
[docker-compose.yaml](docker-compose.yaml) builds and runs `aigw`, targeting
[Ollama][ollama] for OpenAI chat completion requests on port 1975.

- **aigw** (port 1975): Envoy AI Gateway CLI (standalone mode)
Expand Down
11 changes: 1 addition & 10 deletions cmd/aigw/config_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -58,16 +58,7 @@ func TestReadConfig(t *testing.T) {
"OPENAI_BASE_URL": "http://localhost:11434/v1",
},
mcpServers: testMcpServers,
expectHostnames: []string{"127.0.0.1.nip.io", "dreamtap.xyz"},
expectPort: "11434",
},
{
name: "generates config from OpenAI env vars for localhost",
envVars: map[string]string{
"OPENAI_API_KEY": "test-key",
"OPENAI_BASE_URL": "http://localhost:11434/v1",
},
expectHostnames: []string{"127.0.0.1.nip.io"},
expectHostnames: []string{"localhost", "dreamtap.xyz"},
expectPort: "11434",
},
{
Expand Down
2 changes: 1 addition & 1 deletion cmd/aigw/main.go
Original file line number Diff line number Diff line change
Expand Up @@ -184,7 +184,7 @@ func doMain(ctx context.Context, stdout, stderr io.Writer, args []string, exitFn

switch parsed.Command() {
case "version":
_, _ = fmt.Fprintf(stdout, "Envoy AI Gateway CLI: %s\n", version.Version)
_, _ = fmt.Fprintf(stdout, "Envoy AI Gateway CLI: %s\n", version.Parse())
case "run", "run <path>":
err = rf(ctx, c.Run, c.Run.runOpts, stdout, stderr)
if err != nil {
Expand Down
4 changes: 4 additions & 0 deletions cmd/aigw/run.go
Original file line number Diff line number Diff line change
Expand Up @@ -145,6 +145,10 @@ func run(ctx context.Context, c cmdRun, o *runOpts, stdout, stderr io.Writer) er
adminPort: c.AdminPort,
extProcLauncher: o.extProcLauncher,
}
// If any of the configured MCP servers is using stdio, set up the streamable HTTP proxies for them
if err = proxyStdioMCPServers(ctx, debugLogger, c.mcpConfig); err != nil {
return fmt.Errorf("failed to proxy stdio for MCP servers: %w", err)
}
aiGatewayResourcesYaml, err := readConfig(o.configPath, c.mcpConfig, c.Debug)
if err != nil {
return err
Expand Down
Loading