Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion cmd/llm-d-routing-sidecar/main.go
Original file line number Diff line number Diff line change
Expand Up @@ -60,7 +60,7 @@ func main() {
if *connector == proxy.ConnectorNIXLV1 {
logger.Info("Warning: nixl connector is deprecated and will be removed in a future release in favor of --connector=nixlv2")
}
logger.Info("p/d connector validated", "connector", connector)
logger.Info("p/d connector validated", "connector", *connector)

// Determine namespace and pool name for SSRF protection
if *enableSSRFProtection {
Expand Down
12 changes: 8 additions & 4 deletions go.mod
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,8 @@ require (
github.com/hashicorp/golang-lru/v2 v2.0.7
github.com/onsi/ginkgo/v2 v2.23.4
github.com/onsi/gomega v1.37.0
go.opentelemetry.io/contrib/instrumentation/net/http/otelhttp v0.57.0
go.opentelemetry.io/otel v1.36.0
k8s.io/apimachinery v0.31.3
k8s.io/client-go v0.31.3
k8s.io/klog/v2 v2.130.1
Expand All @@ -17,7 +19,9 @@ require (
require (
github.com/davecgh/go-spew v1.1.2-0.20180830191138-d8f796af33cc // indirect
github.com/emicklei/go-restful/v3 v3.12.0 // indirect
github.com/felixge/httpsnoop v1.0.4 // indirect
github.com/fxamacker/cbor/v2 v2.7.0 // indirect
github.com/go-logr/stdr v1.2.2 // indirect
github.com/go-openapi/jsonreference v0.21.0 // indirect
github.com/go-openapi/swag v0.23.0 // indirect
github.com/go-task/slim-sprig/v3 v3.0.0 // indirect
Expand All @@ -29,19 +33,19 @@ require (
github.com/json-iterator/go v1.1.12 // indirect
github.com/modern-go/concurrent v0.0.0-20180306012644-bacd9c7ef1dd // indirect
github.com/modern-go/reflect2 v1.0.2 // indirect
github.com/rogpeppe/go-internal v1.13.1 // indirect
github.com/spf13/pflag v1.0.5 // indirect
github.com/stretchr/testify v1.10.0 // indirect
github.com/x448/float16 v0.8.4 // indirect
go.opentelemetry.io/auto/sdk v1.1.0 // indirect
go.opentelemetry.io/otel/metric v1.36.0 // indirect
go.opentelemetry.io/otel/trace v1.36.0 // indirect
go.uber.org/automaxprocs v1.6.0 // indirect
golang.org/x/net v0.38.0 // indirect
golang.org/x/oauth2 v0.27.0 // indirect
golang.org/x/sys v0.32.0 // indirect
golang.org/x/sys v0.33.0 // indirect
golang.org/x/term v0.30.0 // indirect
golang.org/x/text v0.23.0 // indirect
golang.org/x/time v0.5.0 // indirect
golang.org/x/tools v0.31.0 // indirect
gopkg.in/check.v1 v1.0.0-20201130134442-10cb98267c6c // indirect
gopkg.in/inf.v0 v0.9.1 // indirect
gopkg.in/yaml.v2 v2.4.0 // indirect
gopkg.in/yaml.v3 v3.0.1 // indirect
Expand Down
22 changes: 17 additions & 5 deletions go.sum
Original file line number Diff line number Diff line change
Expand Up @@ -4,10 +4,15 @@ github.com/davecgh/go-spew v1.1.2-0.20180830191138-d8f796af33cc h1:U9qPSI2PIWSS1
github.com/davecgh/go-spew v1.1.2-0.20180830191138-d8f796af33cc/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
github.com/emicklei/go-restful/v3 v3.12.0 h1:y2DdzBAURM29NFF94q6RaY4vjIH1rtwDapwQtU84iWk=
github.com/emicklei/go-restful/v3 v3.12.0/go.mod h1:6n3XBCmQQb25CM2LCACGz8ukIrRry+4bhvbpWn3mrbc=
github.com/felixge/httpsnoop v1.0.4 h1:NFTV2Zj1bL4mc9sqWACXbQFVBBg2W3GPvqp8/ESS2Wg=
github.com/felixge/httpsnoop v1.0.4/go.mod h1:m8KPJKqk1gH5J9DgRY2ASl2lWCfGKXixSwevea8zH2U=
github.com/fxamacker/cbor/v2 v2.7.0 h1:iM5WgngdRBanHcxugY4JySA0nk1wZorNOpTgCMedv5E=
github.com/fxamacker/cbor/v2 v2.7.0/go.mod h1:pxXPTn3joSm21Gbwsv0w9OSA2y1HFR9qXEeXQVeNoDQ=
github.com/go-logr/logr v1.2.2/go.mod h1:jdQByPbusPIv2/zmleS9BjJVeZ6kBagPoEUsqbVz/1A=
github.com/go-logr/logr v1.4.2 h1:6pFjapn8bFcIbiKo3XT4j/BhANplGihG6tvd+8rYgrY=
github.com/go-logr/logr v1.4.2/go.mod h1:9T104GzyrTigFIr8wt5mBrctHMim0Nb2HLGrmQ40KvY=
github.com/go-logr/stdr v1.2.2 h1:hSWxHoqTgW2S2qGc0LTAI563KZ5YKYRhT3MFKZMbjag=
github.com/go-logr/stdr v1.2.2/go.mod h1:mMo/vtBO5dYbehREoey6XUKy/eSumjCCveDpRre4VKE=
github.com/go-openapi/jsonpointer v0.21.0 h1:YgdVicSA9vH5RiHs9TZW5oyafXZFc6+2Vc1rr/O9oNQ=
github.com/go-openapi/jsonpointer v0.21.0/go.mod h1:IUyH9l/+uyhIYQ/PXVA41Rexl+kOkAPDdXEYns6fzUY=
github.com/go-openapi/jsonreference v0.21.0 h1:Rs+Y7hSXT83Jacb7kFyjn4ijOuVGSvOdF2+tg1TRrwQ=
Expand Down Expand Up @@ -42,11 +47,8 @@ github.com/json-iterator/go v1.1.12 h1:PV8peI4a0ysnczrg+LtxykD8LfKY9ML6u2jnxaEnr
github.com/json-iterator/go v1.1.12/go.mod h1:e30LSqwooZae/UwlEbR2852Gd8hjQvJoHmT4TnhNGBo=
github.com/kisielk/errcheck v1.5.0/go.mod h1:pFxgyoBC7bSaBwPgfKdkLd5X25qrDl4LWUI2bnpBCr8=
github.com/kisielk/gotool v1.0.0/go.mod h1:XhKaO+MFFWcvkIS/tQcRk01m1F5IRFswLeQ+oQHNcck=
github.com/kr/pretty v0.2.1/go.mod h1:ipq/a2n7PKx3OHsz4KJII5eveXtPO4qwEXGdVfWzfnI=
github.com/kr/pretty v0.3.1 h1:flRD4NNwYAUpkphVc1HcthR4KEIFJ65n8Mw5qdRn3LE=
github.com/kr/pretty v0.3.1/go.mod h1:hoEshYVHaxMs3cyo3Yncou5ZscifuDolrwPKZanG3xk=
github.com/kr/pty v1.1.1/go.mod h1:pFQYn66WHrOpPYNljwOMqo10TkYh1fy3cYio2l3bCsQ=
github.com/kr/text v0.1.0/go.mod h1:4Jbv+DJW3UT/LiOwJeYQe1efqtUx/iVham/4vfdArNI=
github.com/kr/text v0.2.0 h1:5Nx0Ya0ZqY2ygV366QzturHI13Jq95ApcVaJBhpS+AY=
github.com/kr/text v0.2.0/go.mod h1:eLer722TekiGuMkidMxC/pM04lWEeraHUUmBw8l2grE=
github.com/mailru/easyjson v0.7.7 h1:UGYAvKxe3sBsEDzO8ZeWOSlIQfWFlxbzLZe7hwFURr0=
Expand Down Expand Up @@ -79,6 +81,16 @@ github.com/x448/float16 v0.8.4 h1:qLwI1I70+NjRFUR3zs1JPUCgaCXSh3SW62uAKT1mSBM=
github.com/x448/float16 v0.8.4/go.mod h1:14CWIYCyZA/cWjXOioeEpHeN/83MdbZDRQHoFcYsOfg=
github.com/yuin/goldmark v1.1.27/go.mod h1:3hX8gzYuyVAZsxl0MRgGTJEmQBFcNTphYh9decYSb74=
github.com/yuin/goldmark v1.2.1/go.mod h1:3hX8gzYuyVAZsxl0MRgGTJEmQBFcNTphYh9decYSb74=
go.opentelemetry.io/auto/sdk v1.1.0 h1:cH53jehLUN6UFLY71z+NDOiNJqDdPRaXzTel0sJySYA=
go.opentelemetry.io/auto/sdk v1.1.0/go.mod h1:3wSPjt5PWp2RhlCcmmOial7AvC4DQqZb7a7wCow3W8A=
go.opentelemetry.io/contrib/instrumentation/net/http/otelhttp v0.57.0 h1:DheMAlT6POBP+gh8RUH19EOTnQIor5QE0uSRPtzCpSw=
go.opentelemetry.io/contrib/instrumentation/net/http/otelhttp v0.57.0/go.mod h1:wZcGmeVO9nzP67aYSLDqXNWK87EZWhi7JWj1v7ZXf94=
go.opentelemetry.io/otel v1.36.0 h1:UumtzIklRBY6cI/lllNZlALOF5nNIzJVb16APdvgTXg=
go.opentelemetry.io/otel v1.36.0/go.mod h1:/TcFMXYjyRNh8khOAO9ybYkqaDBb/70aVwkNML4pP8E=
go.opentelemetry.io/otel/metric v1.36.0 h1:MoWPKVhQvJ+eeXWHFBOPoBOi20jh6Iq2CcCREuTYufE=
go.opentelemetry.io/otel/metric v1.36.0/go.mod h1:zC7Ks+yeyJt4xig9DEw9kuUFe5C3zLbVjV2PzT6qzbs=
go.opentelemetry.io/otel/trace v1.36.0 h1:ahxWNuqZjpdiFAyrIoQ4GIiAIhxAunQR6MUoKrsNd4w=
go.opentelemetry.io/otel/trace v1.36.0/go.mod h1:gQ+OnDZzrybY4k4seLzPAWNwVBBVlF2szhehOBB/tGA=
go.uber.org/automaxprocs v1.6.0 h1:O3y2/QNTOdbF+e/dpXNNW7Rx2hZ4sTIPyybbxyNqTUs=
go.uber.org/automaxprocs v1.6.0/go.mod h1:ifeIMSnPZuznNm6jmdzmU3/bfk01Fe2fotchwEFJ8r8=
golang.org/x/crypto v0.0.0-20190308221718-c2843e01d9a2/go.mod h1:djNgcEr1/C05ACkg1iLfiJU5Ep61QUkGW8qpdssI0+w=
Expand All @@ -100,8 +112,8 @@ golang.org/x/sync v0.0.0-20201020160332-67f06af15bc9/go.mod h1:RxMgew5VJxzue5/jJ
golang.org/x/sys v0.0.0-20190215142949-d0b11bdaac8a/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY=
golang.org/x/sys v0.0.0-20190412213103-97732733099d/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
golang.org/x/sys v0.0.0-20200930185726-fdedc70b468f/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
golang.org/x/sys v0.32.0 h1:s77OFDvIQeibCmezSnk/q6iAfkdiQaJi4VzroCFrN20=
golang.org/x/sys v0.32.0/go.mod h1:BJP2sWEmIv4KK5OTEluFJCKSidICx8ciO85XgH3Ak8k=
golang.org/x/sys v0.33.0 h1:q3i8TbbEz+JRD9ywIRlyRAQbM0qF7hu24q3teo2hbuw=
golang.org/x/sys v0.33.0/go.mod h1:BJP2sWEmIv4KK5OTEluFJCKSidICx8ciO85XgH3Ak8k=
golang.org/x/term v0.30.0 h1:PQ39fJZ+mfadBm0y5WlL4vlM7Sx1Hgf13sMIY2+QS9Y=
golang.org/x/term v0.30.0/go.mod h1:NYYFdzHoI5wRh/h5tDMdMqCqPJZEuNqVR5xJLd/n67g=
golang.org/x/text v0.3.0/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ=
Expand Down
16 changes: 15 additions & 1 deletion internal/proxy/chat_completions.go
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,9 @@ package proxy

import (
"net/http"

"go.opentelemetry.io/otel"
"go.opentelemetry.io/otel/attribute"
)

var (
Expand All @@ -29,15 +32,24 @@ var (
)

func (s *Server) chatCompletionsHandler(w http.ResponseWriter, r *http.Request) {
prefillPodHostPort := r.Header.Get(requestHeaderPrefillHostPort)
tracer := otel.GetTracerProvider().Tracer("llm-d-routing-sidecar")
ctx, span := tracer.Start(r.Context(), "routing_proxy.request")
defer span.End()

span.SetAttributes(
attribute.String("llm_d.proxy.connector", s.config.Connector),
)

prefillPodHostPort := r.Header.Get(requestHeaderPrefillHostPort)
if prefillPodHostPort == "" {
// backward compatible behavior: to remove in next release
prefillPodHostPort = r.Header.Get(requestHeaderPrefillURL)
}

if prefillPodHostPort == "" {
s.logger.V(4).Info("skip disaggregated prefill")
// Update the request context for downstream handlers
r = r.WithContext(ctx)
s.decoderProxy.ServeHTTP(w, r)
return
}
Expand All @@ -54,5 +66,7 @@ func (s *Server) chatCompletionsHandler(w http.ResponseWriter, r *http.Request)
}

s.logger.V(4).Info("SSRF protection: prefill target allowed", "target", prefillPodHostPort)

r = r.WithContext(ctx)
s.runConnectorProtocol(w, r, prefillPodHostPort)
}
21 changes: 17 additions & 4 deletions internal/proxy/connector_nixlv2.go
Original file line number Diff line number Diff line change
Expand Up @@ -23,9 +23,20 @@
"strings"

"github.com/google/uuid"
"go.opentelemetry.io/otel"
"go.opentelemetry.io/otel/attribute"
)

func (s *Server) runNIXLProtocolV2(w http.ResponseWriter, r *http.Request, prefillPodHostPort string) {
tracer := otel.GetTracerProvider().Tracer("llm-d-routing-sidecar")
ctx, span := tracer.Start(r.Context(), "routing_proxy.nixl_v2_protocol")
defer span.End()

span.SetAttributes(
attribute.String("llm_d.proxy.connector", "nixlv2"),
attribute.String("llm_d.prefill.target_host", prefillPodHostPort),
)

Check failure on line 39 in internal/proxy/connector_nixlv2.go

View workflow job for this annotation

GitHub Actions / lint-and-test

File is not properly formatted (gofmt)
s.logger.V(4).Info("running NIXL protocol V2", "url", prefillPodHostPort)

// Read request body
Expand Down Expand Up @@ -57,10 +68,13 @@
uuidStr := uuid.String()

// Prefill Stage
_, prefillSpan := tracer.Start(ctx, "routing_proxy.nixl_v2_prefill")
prefillSpan.SetAttributes(
attribute.String("llm_d.nixl.stage", "prefill"),
)

// 1. Prepare prefill request
ctx := r.Context()
preq := r.Clone(ctx)
preq := r.Clone(r.Context())

preq.Header.Add(requestHeaderRequestID, uuidStr)

Expand Down Expand Up @@ -131,7 +145,7 @@
// Decode Stage

// 1. Prepare decode request
dreq := r.Clone(ctx)
dreq := r.Clone(r.Context())

dreq.Header.Add(requestHeaderRequestID, uuidStr)

Expand Down Expand Up @@ -159,7 +173,6 @@
dreq.ContentLength = int64(len(dbody))

// 2. Forward to local decoder.

s.logger.V(5).Info("sending request to decoder", "body", string(dbody))
s.decoderProxy.ServeHTTP(w, dreq)
}
6 changes: 5 additions & 1 deletion internal/proxy/proxy.go
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,7 @@ import (

"github.com/go-logr/logr"
lru "github.com/hashicorp/golang-lru/v2"
"go.opentelemetry.io/contrib/instrumentation/net/http/otelhttp"
"k8s.io/klog/v2"
)

Expand Down Expand Up @@ -165,8 +166,11 @@ func (s *Server) Start(ctx context.Context) error {
// Configure handlers
mux := s.createRoutes()

// Wrap the server with OpenTelemetry HTTP instrumentation
handler := otelhttp.NewHandler(mux, "routing-proxy-server")

server := &http.Server{
Handler: mux,
Handler: handler,
// No ReadTimeout/WriteTimeout for LLM inference - can take hours for large contexts
IdleTimeout: 300 * time.Second, // 5 minutes for keep-alive connections
ReadHeaderTimeout: 30 * time.Second, // Reasonable for headers only
Expand Down
Loading