Skip to content

Commit c1ce80f

Browse files
committed
Merge master and for time being remove Kubvernor from the guides
2 parents 2847818 + 32970c0 commit c1ce80f

File tree

207 files changed

+8582
-6632
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

207 files changed

+8582
-6632
lines changed

.github/ISSUE_TEMPLATE/new-release.md

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -41,7 +41,7 @@ This document defines the process for releasing Gateway API Inference Extension.
4141
tag has been published, set the appropriate environment variable or update the script. For example:
4242

4343
```shell
44-
export VLLM_GPU=0.9.2
44+
export VLLM_GPU=0.8.5
4545
export VLLM_CPU=0.9.3
4646
export VLLM_SIM=0.3.0
4747
```

.github/workflows/kal.yml

Lines changed: 2 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -20,9 +20,5 @@ jobs:
2020
persist-credentials: false
2121
- name: Set up Go
2222
uses: actions/setup-go@d35c59abb061a4a6fb18e82ac0862c26744d6ab5 # tag=v5.5.0
23-
- name: Install Golang CI Lint
24-
run: go install github.com/golangci/golangci-lint/v2/cmd/[email protected]
25-
- name: Build KAL
26-
run: golangci-lint custom
27-
- name: run api linter
28-
run: ./bin/golangci-kube-api-linter run -c ./.golangci-kal.yml ./...
23+
- name: Run API Linter
24+
run: make api-lint
Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,18 @@
1+
name: Label non-main PRs
2+
3+
on:
4+
pull_request:
5+
types: [opened, edited, synchronize, reopened]
6+
7+
jobs:
8+
add-label:
9+
runs-on: ubuntu-latest
10+
steps:
11+
- name: Add labels when base branch is not main
12+
if: github.event.pull_request.base.ref != 'main'
13+
uses: actions-ecosystem/action-add-labels@v1
14+
with:
15+
github_token: ${{ secrets.GITHUB_TOKEN }}
16+
labels: |
17+
do-not-merge/hold
18+
do-not-merge/cherry-pick-not-approved

Makefile

Lines changed: 14 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -138,7 +138,9 @@ test: generate fmt vet envtest image-build verify-crds ## Run tests.
138138

139139
.PHONY: test-unit
140140
test-unit: ## Run unit tests.
141-
CGO_ENABLED=1 KUBEBUILDER_ASSETS="$(shell $(ENVTEST) use $(ENVTEST_K8S_VERSION) --bin-dir $(LOCALBIN) -p path)" go test ./pkg/... -race -coverprofile cover.out
141+
CGO_ENABLED=1 KUBEBUILDER_ASSETS="$(shell $(ENVTEST) use $(ENVTEST_K8S_VERSION) --bin-dir $(LOCALBIN) -p path)" go test ./pkg/... -race -coverprofile cover.out; \
142+
go tool cover -func=cover.out; \
143+
rm cover.out
142144

143145
.PHONY: test-integration
144146
test-integration: envtest ## Run integration tests.
@@ -160,8 +162,12 @@ lint-fix: golangci-lint ## Run golangci-lint linter and perform fixes
160162
ci-lint: golangci-lint
161163
$(GOLANGCI_LINT) run --timeout 15m0s
162164

165+
.PHONY: api-lint
166+
api-lint: golangci-api-lint
167+
$(GOLANGCI_API_LINT) run -c .golangci-kal.yml --timeout 15m0s ./...
168+
163169
.PHONY: verify
164-
verify: vet fmt-verify generate ci-lint verify-all
170+
verify: vet fmt-verify generate ci-lint api-lint verify-all
165171
git --no-pager diff --exit-code config api client-go
166172

167173
.PHONY: verify-crds
@@ -366,6 +372,7 @@ CONTROLLER_GEN ?= $(LOCALBIN)/controller-gen
366372
ENVTEST ?= $(LOCALBIN)/setup-envtest
367373
CRD_REF_DOCS ?= $(LOCALBIN)/crd-ref-docs
368374
GOLANGCI_LINT = $(LOCALBIN)/golangci-lint
375+
GOLANGCI_API_LINT = $(LOCALBIN)/golangci-kube-api-linter
369376
HELM = $(PROJECT_DIR)/bin/helm
370377
YQ = $(PROJECT_DIR)/bin/yq
371378
KUBECTL_VALIDATE = $(PROJECT_DIR)/bin/kubectl-validate
@@ -407,6 +414,11 @@ golangci-lint: $(GOLANGCI_LINT) ## Download golangci-lint locally if necessary.
407414
$(GOLANGCI_LINT): $(LOCALBIN)
408415
$(call go-install-tool,$(GOLANGCI_LINT),github.com/golangci/golangci-lint/v2/cmd/golangci-lint,$(GOLANGCI_LINT_VERSION))
409416

417+
.PHONY: golangci-api-lint
418+
golangci-api-lint: golangci-lint $(GOLANGCI_API_LINT) ## Download golangci-lint locally if necessary before building KAL
419+
$(GOLANGCI_API_LINT):
420+
$(GOLANGCI_LINT) custom
421+
410422
.PHONY: yq
411423
yq: ## Download yq locally if necessary.
412424
GOBIN=$(PROJECT_DIR)/bin GO111MODULE=on go install github.com/mikefarah/yq/v4@$(YQ_VERSION)

OWNERS_ALIASES

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,6 @@ aliases:
99
- kfswain
1010

1111
gateway-api-inference-extension-reviewers:
12+
- elevran
1213
- liu-cong
1314
- robscott
14-
- shaneutt

README.md

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -29,7 +29,8 @@ The following specific terms to this project:
2929
performance, availability and capabilities to optimize routing. Includes
3030
things like [Prefix Cache] status or [LoRA Adapters] availability.
3131
- **Endpoint Picker(EPP)**: An implementation of an `Inference Scheduler` with additional Routing, Flow, and Request Control layers to allow for sophisticated routing strategies. Additional info on the architecture of the EPP [here](https://github.com/kubernetes-sigs/gateway-api-inference-extension/tree/main/docs/proposals/0683-epp-architecture-proposal).
32-
32+
- **Body Based Router(BBR)**: An optional additional [ext-proc](https://www.envoyproxy.io/docs/envoy/latest/configuration/http/http_filters/ext_proc_filter) server that parses the http body of the inference prompt message and extracts information (currently the model name for OpenAI API style messages) into a format which can then be used by the gateway for routing purposes. Additional info [here](https://github.com/kubernetes-sigs/gateway-api-inference-extension/tree/main/pkg/bbr/README.md) and in the documentation [user guides](https://gateway-api-inference-extension.sigs.k8s.io/guides/).
33+
3334

3435
The following are key industry terms that are important to understand for
3536
this project:

api/v1/inferencepool_types.go

Lines changed: 12 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -96,6 +96,7 @@ type Port struct {
9696

9797
// EndpointPickerRef specifies a reference to an Endpoint Picker extension and its
9898
// associated configuration.
99+
// +kubebuilder:validation:XValidation:rule="self.kind != 'Service' || has(self.port)",message="port is required when kind is 'Service' or unspecified (defaults to 'Service')"
99100
type EndpointPickerRef struct {
100101
// Group is the group of the referent API object. When unspecified, the default value
101102
// is "", representing the Core API group.
@@ -118,29 +119,29 @@ type EndpointPickerRef struct {
118119
//
119120
// +optional
120121
// +kubebuilder:default=Service
121-
//nolint:kubeapilinter // ignore kubeapilinter here as we want to use pointer for optional struct.
122-
Kind *Kind `json:"kind,omitempty"`
122+
Kind Kind `json:"kind,omitempty"`
123123

124124
// Name is the name of the referent API object.
125125
//
126126
// +required
127127
Name ObjectName `json:"name,omitempty"`
128128

129-
// PortNumber is the port number of the Endpoint Picker extension service. When unspecified,
130-
// implementations SHOULD infer a default value of 9002 when the kind field is "Service" or
131-
// unspecified (defaults to "Service").
129+
// Port is the port of the Endpoint Picker extension service.
130+
//
131+
// Port is required when the referent is a Kubernetes Service. In this
132+
// case, the port number is the service port number, not the target port.
133+
// For other resources, destination port might be derived from the referent
134+
// resource or this field.
132135
//
133136
// +optional
134-
//nolint:kubeapilinter // ignore kubeapilinter here as we want to use pointer for optional struct.
135-
PortNumber *PortNumber `json:"portNumber,omitempty"`
137+
Port *Port `json:"port,omitempty"`
136138

137139
// FailureMode configures how the parent handles the case when the Endpoint Picker extension
138140
// is non-responsive. When unspecified, defaults to "FailClose".
139141
//
140142
// +optional
141143
// +kubebuilder:default="FailClose"
142-
//nolint:kubeapilinter // ignore kubeapilinter here as we want to use pointer for optional struct.
143-
FailureMode *EndpointPickerFailureMode `json:"failureMode,omitempty"`
144+
FailureMode EndpointPickerFailureMode `json:"failureMode,omitempty"`
144145
}
145146

146147
// EndpointPickerFailureMode defines the options for how the parent handles the case when the
@@ -286,8 +287,7 @@ type ParentReference struct {
286287
//
287288
// +optional
288289
// +kubebuilder:default=Gateway
289-
//nolint:kubeapilinter // ignore kubeapilinter here as we want to use pointer for optional struct.
290-
Kind *Kind `json:"kind,omitempty"`
290+
Kind Kind `json:"kind,omitempty"`
291291

292292
// Name is the name of the referent API object.
293293
//
@@ -303,6 +303,5 @@ type ParentReference struct {
303303
// documentation for details: https://gateway-api.sigs.k8s.io/api-types/referencegrant/
304304
//
305305
// +optional
306-
//nolint:kubeapilinter // ignore kubeapilinter here as we want to use pointer for optional struct.
307-
Namespace *Namespace `json:"namespace,omitempty"`
306+
Namespace Namespace `json:"namespace,omitempty"`
308307
}

api/v1/zz_generated.deepcopy.go

Lines changed: 3 additions & 23 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

apix/v1alpha2/inferenceobjective_types.go

Lines changed: 0 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -25,7 +25,6 @@ import (
2525
// +kubebuilder:object:root=true
2626
// +kubebuilder:subresource:status
2727
// +kubebuilder:storageversion
28-
// +kubebuilder:printcolumn:name="Model Name",type=string,JSONPath=`.spec.modelName`
2928
// +kubebuilder:printcolumn:name="Inference Pool",type=string,JSONPath=`.spec.poolRef.name`
3029
// +kubebuilder:printcolumn:name="Priority",type=string,JSONPath=`.spec.priority`
3130
// +kubebuilder:printcolumn:name="Age",type=date,JSONPath=`.metadata.creationTimestamp`
@@ -56,12 +55,6 @@ type InferenceObjectiveList struct {
5655
// performance and latency goals for the model. These workloads are
5756
// expected to operate within an InferencePool sharing compute capacity with other
5857
// InferenceObjectives, defined by the Inference Platform Admin.
59-
//
60-
// InferenceObjective's modelName (not the ObjectMeta name) is unique for a given InferencePool,
61-
// if the name is reused, an error will be shown on the status of a
62-
// InferenceObjective that attempted to reuse. The oldest InferenceObjective, based on
63-
// creation timestamp, will be selected to remain valid. In the event of a race
64-
// condition, one will be selected at random.
6558
type InferenceObjectiveSpec struct {
6659

6760
// Priority defines how important it is to serve the request compared to other requests in the same pool.
@@ -135,10 +128,6 @@ const (
135128
//
136129
// * "Accepted"
137130
//
138-
// Possible reasons for this condition to be False are:
139-
//
140-
// * "ModelNameInUse"
141-
//
142131
// Possible reasons for this condition to be Unknown are:
143132
//
144133
// * "Pending"
@@ -148,10 +137,6 @@ const (
148137
// ObjectiveReasonAccepted is the desired state. Model conforms to the state of the pool.
149138
ObjectiveReasonAccepted InferenceObjectiveConditionReason = "Accepted"
150139

151-
// ObjectiveReasonNameInUse is used when a given ModelName already exists within the pool.
152-
// Details about naming conflict resolution are on the ModelName field itself.
153-
ObjectiveReasonNameInUse InferenceObjectiveConditionReason = "ModelNameInUse"
154-
155140
// ObjectiveReasonPending is the initial state, and indicates that the controller has not yet reconciled the InferenceObjective.
156141
ObjectiveReasonPending InferenceObjectiveConditionReason = "Pending"
157142
)

0 commit comments

Comments
 (0)