diff --git a/.gitignore b/.gitignore index 619755a38..153a56984 100644 --- a/.gitignore +++ b/.gitignore @@ -48,3 +48,7 @@ images/agent/dev images/agent/Makefile local_build.sh /base_images.yml + +images/sds-common-scheduler-extender/REVIEW.md + +# trigger for build diff --git a/api/v1alpha1/const.go b/api/v1alpha1/const.go index ee313e2e6..bfab9e60d 100644 --- a/api/v1alpha1/const.go +++ b/api/v1alpha1/const.go @@ -43,4 +43,22 @@ const ( BlockDeviceRotaLabelKey = blockDeviceLabelPrefix + "/rota" BlockDeviceHotPlugLabelKey = blockDeviceLabelPrefix + "/hotplug" BlockDeviceMachineIDLabelKey = blockDeviceLabelPrefix + "/machineid" + + // ReplicatedStorageClass VolumeAccess modes + VolumeAccessLocal = "Local" + VolumeAccessEventuallyLocal = "EventuallyLocal" + VolumeAccessPreferablyLocal = "PreferablyLocal" + VolumeAccessAny = "Any" + + // ReplicatedStorageClass Topology modes + TopologyTransZonal = "TransZonal" + TopologyZonal = "Zonal" + TopologyIgnored = "Ignored" + + // ReplicatedStoragePool Types + RSPTypeLVM = "LVM" // Thick volumes + RSPTypeLVMThin = "LVMThin" // Thin volumes + + // Labels for replicated volumes + LabelReplicatedNode = "storage.deckhouse.io/sds-replicated-volume-node" ) diff --git a/api/v1alpha1/register.go b/api/v1alpha1/register.go index f8db9e0ce..222c9ad83 100644 --- a/api/v1alpha1/register.go +++ b/api/v1alpha1/register.go @@ -52,6 +52,10 @@ var knownTypes = []runtime.Object{ &LVMVolumeGroupSetList{}, &LVMLogicalVolumeSnapshot{}, &LVMLogicalVolumeSnapshotList{}, + &ReplicatedStorageClass{}, + &ReplicatedStorageClassList{}, + &ReplicatedStoragePool{}, + &ReplicatedStoragePoolList{}, } // Adds the list of known types to Scheme. diff --git a/api/v1alpha1/replicated_storage_class.go b/api/v1alpha1/replicated_storage_class.go new file mode 100644 index 000000000..af5f7b440 --- /dev/null +++ b/api/v1alpha1/replicated_storage_class.go @@ -0,0 +1,158 @@ +/* +Copyright 2025 Flant JSC + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package v1alpha1 + +import ( + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "k8s.io/apimachinery/pkg/runtime" +) + +// +k8s:deepcopy-gen:interfaces=k8s.io/apimachinery/pkg/runtime.Object +type ReplicatedStorageClassList struct { + metav1.TypeMeta `json:",inline"` + metav1.ListMeta `json:"metadata"` + + Items []ReplicatedStorageClass `json:"items"` +} + +// +k8s:deepcopy-gen:interfaces=k8s.io/apimachinery/pkg/runtime.Object +type ReplicatedStorageClass struct { + metav1.TypeMeta `json:",inline"` + metav1.ObjectMeta `json:"metadata,omitempty"` + + Spec ReplicatedStorageClassSpec `json:"spec"` + Status ReplicatedStorageClassStatus `json:"status,omitempty"` +} + +// +k8s:deepcopy-gen=true +type ReplicatedStorageClassSpec struct { + // StoragePool is the name of the ReplicatedStoragePool resource + StoragePool string `json:"storagePool"` + // ReclaimPolicy defines what happens to the volume when the PVC is deleted (Delete or Retain) + ReclaimPolicy string `json:"reclaimPolicy"` + // Replication mode: None, Availability, ConsistencyAndAvailability + Replication string `json:"replication,omitempty"` + // VolumeAccess mode: Local, EventuallyLocal, PreferablyLocal, Any + VolumeAccess string `json:"volumeAccess,omitempty"` + // Topology mode: TransZonal, Zonal, Ignored + Topology string `json:"topology"` + // Zones is the list of zones where volumes should be replicated + Zones []string `json:"zones,omitempty"` +} + +// +k8s:deepcopy-gen=true +type ReplicatedStorageClassStatus struct { + // Phase is the current state: Failed, Created + Phase string `json:"phase,omitempty"` + // Reason provides additional information about the current state + Reason string `json:"reason,omitempty"` +} + +// DeepCopyInto copies the receiver into out +func (in *ReplicatedStorageClass) DeepCopyInto(out *ReplicatedStorageClass) { + *out = *in + out.TypeMeta = in.TypeMeta + in.ObjectMeta.DeepCopyInto(&out.ObjectMeta) + in.Spec.DeepCopyInto(&out.Spec) + out.Status = in.Status +} + +// DeepCopy creates a deep copy of ReplicatedStorageClass +func (in *ReplicatedStorageClass) DeepCopy() *ReplicatedStorageClass { + if in == nil { + return nil + } + out := new(ReplicatedStorageClass) + in.DeepCopyInto(out) + return out +} + +// DeepCopyObject creates a deep copy as runtime.Object +func (in *ReplicatedStorageClass) DeepCopyObject() runtime.Object { + if c := in.DeepCopy(); c != nil { + return c + } + return nil +} + +// DeepCopyInto copies the receiver into out +func (in *ReplicatedStorageClassList) DeepCopyInto(out *ReplicatedStorageClassList) { + *out = *in + out.TypeMeta = in.TypeMeta + in.ListMeta.DeepCopyInto(&out.ListMeta) + if in.Items != nil { + in, out := &in.Items, &out.Items + *out = make([]ReplicatedStorageClass, len(*in)) + for i := range *in { + (*in)[i].DeepCopyInto(&(*out)[i]) + } + } +} + +// DeepCopy creates a deep copy of ReplicatedStorageClassList +func (in *ReplicatedStorageClassList) DeepCopy() *ReplicatedStorageClassList { + if in == nil { + return nil + } + out := new(ReplicatedStorageClassList) + in.DeepCopyInto(out) + return out +} + +// DeepCopyObject creates a deep copy as runtime.Object +func (in *ReplicatedStorageClassList) DeepCopyObject() runtime.Object { + if c := in.DeepCopy(); c != nil { + return c + } + return nil +} + +// DeepCopyInto copies the receiver into out +func (in *ReplicatedStorageClassSpec) DeepCopyInto(out *ReplicatedStorageClassSpec) { + *out = *in + if in.Zones != nil { + in, out := &in.Zones, &out.Zones + *out = make([]string, len(*in)) + copy(*out, *in) + } +} + +// DeepCopy creates a deep copy of ReplicatedStorageClassSpec +func (in *ReplicatedStorageClassSpec) DeepCopy() *ReplicatedStorageClassSpec { + if in == nil { + return nil + } + out := new(ReplicatedStorageClassSpec) + in.DeepCopyInto(out) + return out +} + +// DeepCopyInto copies the receiver into out +func (in *ReplicatedStorageClassStatus) DeepCopyInto(out *ReplicatedStorageClassStatus) { + *out = *in +} + +// DeepCopy creates a deep copy of ReplicatedStorageClassStatus +func (in *ReplicatedStorageClassStatus) DeepCopy() *ReplicatedStorageClassStatus { + if in == nil { + return nil + } + out := new(ReplicatedStorageClassStatus) + in.DeepCopyInto(out) + return out +} + diff --git a/api/v1alpha1/replicated_storage_pool.go b/api/v1alpha1/replicated_storage_pool.go new file mode 100644 index 000000000..c09ba4959 --- /dev/null +++ b/api/v1alpha1/replicated_storage_pool.go @@ -0,0 +1,173 @@ +/* +Copyright 2025 Flant JSC + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package v1alpha1 + +import ( + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "k8s.io/apimachinery/pkg/runtime" +) + +// +k8s:deepcopy-gen:interfaces=k8s.io/apimachinery/pkg/runtime.Object +type ReplicatedStoragePoolList struct { + metav1.TypeMeta `json:",inline"` + metav1.ListMeta `json:"metadata"` + + Items []ReplicatedStoragePool `json:"items"` +} + +// +k8s:deepcopy-gen:interfaces=k8s.io/apimachinery/pkg/runtime.Object +type ReplicatedStoragePool struct { + metav1.TypeMeta `json:",inline"` + metav1.ObjectMeta `json:"metadata,omitempty"` + + Spec ReplicatedStoragePoolSpec `json:"spec"` + Status ReplicatedStoragePoolStatus `json:"status,omitempty"` +} + +// +k8s:deepcopy-gen=true +type ReplicatedStoragePoolSpec struct { + // Type defines the volumes type: LVM (for Thick) or LVMThin (for Thin) + Type string `json:"type"` + // LvmVolumeGroups is the list of LVMVolumeGroup resources used for storage + LvmVolumeGroups []ReplicatedStoragePoolLVG `json:"lvmVolumeGroups"` +} + +// +k8s:deepcopy-gen=true +type ReplicatedStoragePoolLVG struct { + // Name is the LVMVolumeGroup resource name + Name string `json:"name"` + // ThinPoolName is the thin pool name (required for LVMThin type) + ThinPoolName string `json:"thinPoolName,omitempty"` +} + +// +k8s:deepcopy-gen=true +type ReplicatedStoragePoolStatus struct { + // Phase is the current state: Updating, Failed, Completed + Phase string `json:"phase,omitempty"` + // Reason provides additional information about the current state + Reason string `json:"reason,omitempty"` +} + +// DeepCopyInto copies the receiver into out +func (in *ReplicatedStoragePool) DeepCopyInto(out *ReplicatedStoragePool) { + *out = *in + out.TypeMeta = in.TypeMeta + in.ObjectMeta.DeepCopyInto(&out.ObjectMeta) + in.Spec.DeepCopyInto(&out.Spec) + out.Status = in.Status +} + +// DeepCopy creates a deep copy of ReplicatedStoragePool +func (in *ReplicatedStoragePool) DeepCopy() *ReplicatedStoragePool { + if in == nil { + return nil + } + out := new(ReplicatedStoragePool) + in.DeepCopyInto(out) + return out +} + +// DeepCopyObject creates a deep copy as runtime.Object +func (in *ReplicatedStoragePool) DeepCopyObject() runtime.Object { + if c := in.DeepCopy(); c != nil { + return c + } + return nil +} + +// DeepCopyInto copies the receiver into out +func (in *ReplicatedStoragePoolList) DeepCopyInto(out *ReplicatedStoragePoolList) { + *out = *in + out.TypeMeta = in.TypeMeta + in.ListMeta.DeepCopyInto(&out.ListMeta) + if in.Items != nil { + in, out := &in.Items, &out.Items + *out = make([]ReplicatedStoragePool, len(*in)) + for i := range *in { + (*in)[i].DeepCopyInto(&(*out)[i]) + } + } +} + +// DeepCopy creates a deep copy of ReplicatedStoragePoolList +func (in *ReplicatedStoragePoolList) DeepCopy() *ReplicatedStoragePoolList { + if in == nil { + return nil + } + out := new(ReplicatedStoragePoolList) + in.DeepCopyInto(out) + return out +} + +// DeepCopyObject creates a deep copy as runtime.Object +func (in *ReplicatedStoragePoolList) DeepCopyObject() runtime.Object { + if c := in.DeepCopy(); c != nil { + return c + } + return nil +} + +// DeepCopyInto copies the receiver into out +func (in *ReplicatedStoragePoolSpec) DeepCopyInto(out *ReplicatedStoragePoolSpec) { + *out = *in + if in.LvmVolumeGroups != nil { + in, out := &in.LvmVolumeGroups, &out.LvmVolumeGroups + *out = make([]ReplicatedStoragePoolLVG, len(*in)) + copy(*out, *in) + } +} + +// DeepCopy creates a deep copy of ReplicatedStoragePoolSpec +func (in *ReplicatedStoragePoolSpec) DeepCopy() *ReplicatedStoragePoolSpec { + if in == nil { + return nil + } + out := new(ReplicatedStoragePoolSpec) + in.DeepCopyInto(out) + return out +} + +// DeepCopyInto copies the receiver into out +func (in *ReplicatedStoragePoolLVG) DeepCopyInto(out *ReplicatedStoragePoolLVG) { + *out = *in +} + +// DeepCopy creates a deep copy of ReplicatedStoragePoolLVG +func (in *ReplicatedStoragePoolLVG) DeepCopy() *ReplicatedStoragePoolLVG { + if in == nil { + return nil + } + out := new(ReplicatedStoragePoolLVG) + in.DeepCopyInto(out) + return out +} + +// DeepCopyInto copies the receiver into out +func (in *ReplicatedStoragePoolStatus) DeepCopyInto(out *ReplicatedStoragePoolStatus) { + *out = *in +} + +// DeepCopy creates a deep copy of ReplicatedStoragePoolStatus +func (in *ReplicatedStoragePoolStatus) DeepCopy() *ReplicatedStoragePoolStatus { + if in == nil { + return nil + } + out := new(ReplicatedStoragePoolStatus) + in.DeepCopyInto(out) + return out +} + diff --git a/crds/lvmlogicalvolume.yaml b/crds/lvmlogicalvolume.yaml index d5302c2dc..328d021f7 100644 --- a/crds/lvmlogicalvolume.yaml +++ b/crds/lvmlogicalvolume.yaml @@ -61,7 +61,7 @@ spec: - rule: self == oldSelf message: Value is immutable. minLength: 1 - pattern: '^[a-z0-9]([a-z0-9-.]{0,251}[a-z0-9])?$' + pattern: '^[a-z0-9]([a-z0-9-._]{0,251}[a-z0-9])?$' type: type: string description: | diff --git a/docs/README.md b/docs/README.md index b528db58b..15a115fb0 100644 --- a/docs/README.md +++ b/docs/README.md @@ -30,4 +30,4 @@ The `sds-node-configurator` module manages block devices and LVM on Kubernetes c - [Custom Resources](./cr.html): Module CRD reference. - [Configuration](./configuration.html): Module parameter configuration. - [Configuration scenarios](./layouts.html): Typical disk subsystem configuration scenarios for various storage configurations. -- [FAQ](./faq.html): Frequently asked questions and answers. \ No newline at end of file +- [FAQ](./faq.html): Frequently asked questions and answers. diff --git a/hooks/go/020-common-scheduler-extender-certs/common-scheduler-extender-certs.go b/hooks/go/020-common-scheduler-extender-certs/common-scheduler-extender-certs.go new file mode 100644 index 000000000..d8f11948b --- /dev/null +++ b/hooks/go/020-common-scheduler-extender-certs/common-scheduler-extender-certs.go @@ -0,0 +1,41 @@ +/* +Copyright 2025 Flant JSC + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package hooks_common + +import ( + "fmt" + + tlscertificate "github.com/deckhouse/module-sdk/common-hooks/tls-certificate" + consts "github.com/deckhouse/sds-node-configurator/hooks/go/consts" +) + +var _ = tlscertificate.RegisterInternalTLSHookEM(tlscertificate.GenSelfSignedTLSHookConf{ + CommonCACanonicalName: fmt.Sprintf("%s-%s", consts.ModulePluralName, consts.SdsCommonSchedulerExtenderCertCn), + CN: consts.SdsCommonSchedulerExtenderCertCn, + TLSSecretName: consts.CommonSchedulerExtenderSecretName, + Namespace: consts.ModuleNamespace, + SANs: tlscertificate.DefaultSANs([]string{ + "localhost", + "127.0.0.1", + consts.SdsCommonSchedulerExtenderCertCn, + fmt.Sprintf("%s.%s", consts.SdsCommonSchedulerExtenderCertCn, consts.ModuleNamespace), + fmt.Sprintf("%s.%s.svc", consts.SdsCommonSchedulerExtenderCertCn, consts.ModuleNamespace), + // %CLUSTER_DOMAIN%:// is a special value to generate SAN like 'svc_name.svc_namespace.svc.cluster.local' + fmt.Sprintf("%%CLUSTER_DOMAIN%%://%s.%s.svc", consts.SdsCommonSchedulerExtenderCertCn, consts.ModuleNamespace), + }), + FullValuesPathPrefix: fmt.Sprintf("%s.internal.customSchedulerExtenderCert", consts.ModuleName), +}) diff --git a/hooks/go/consts/consts.go b/hooks/go/consts/consts.go index 72fd66783..05909a979 100644 --- a/hooks/go/consts/consts.go +++ b/hooks/go/consts/consts.go @@ -17,8 +17,10 @@ limitations under the License. package consts const ( - ModuleName string = "sdsNodeConfigurator" - ModuleNamespace string = "d8-sds-node-configurator" - ModulePluralName string = "sds-node-configurator" - WebhookCertCn string = "webhooks" + ModuleName string = "sdsNodeConfigurator" + ModuleNamespace string = "d8-sds-node-configurator" + ModulePluralName string = "sds-node-configurator" + WebhookCertCn string = "webhooks" + SdsCommonSchedulerExtenderCertCn string = "sds-common-scheduler-extender" + CommonSchedulerExtenderSecretName string = "common-scheduler-extender-https-certs" ) diff --git a/hooks/go/main.go b/hooks/go/main.go index e029551ab..6fb811fb2 100644 --- a/hooks/go/main.go +++ b/hooks/go/main.go @@ -18,6 +18,7 @@ package main import ( "github.com/deckhouse/module-sdk/pkg/app" + _ "github.com/deckhouse/sds-node-configurator/hooks/go/020-common-scheduler-extender-certs" _ "github.com/deckhouse/sds-node-configurator/hooks/go/020-webhook-certs" ) diff --git a/images/sds-common-scheduler-extender/LICENSE b/images/sds-common-scheduler-extender/LICENSE new file mode 100644 index 000000000..b77c0c92a --- /dev/null +++ b/images/sds-common-scheduler-extender/LICENSE @@ -0,0 +1,201 @@ + Apache License + Version 2.0, January 2004 + http://www.apache.org/licenses/ + + TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION + + 1. Definitions. + + "License" shall mean the terms and conditions for use, reproduction, + and distribution as defined by Sections 1 through 9 of this document. + + "Licensor" shall mean the copyright owner or entity authorized by + the copyright owner that is granting the License. + + "Legal Entity" shall mean the union of the acting entity and all + other entities that control, are controlled by, or are under common + control with that entity. For the purposes of this definition, + "control" means (i) the power, direct or indirect, to cause the + direction or management of such entity, whether by contract or + otherwise, or (ii) ownership of fifty percent (50%) or more of the + outstanding shares, or (iii) beneficial ownership of such entity. + + "You" (or "Your") shall mean an individual or Legal Entity + exercising permissions granted by this License. + + "Source" form shall mean the preferred form for making modifications, + including but not limited to software source code, documentation + source, and configuration files. + + "Object" form shall mean any form resulting from mechanical + transformation or translation of a Source form, including but + not limited to compiled object code, generated documentation, + and conversions to other media types. + + "Work" shall mean the work of authorship, whether in Source or + Object form, made available under the License, as indicated by a + copyright notice that is included in or attached to the work + (an example is provided in the Appendix below). + + "Derivative Works" shall mean any work, whether in Source or Object + form, that is based on (or derived from) the Work and for which the + editorial revisions, annotations, elaborations, or other modifications + represent, as a whole, an original work of authorship. For the purposes + of this License, Derivative Works shall not include works that remain + separable from, or merely link (or bind by name) to the interfaces of, + the Work and Derivative Works thereof. + + "Contribution" shall mean any work of authorship, including + the original version of the Work and any modifications or additions + to that Work or Derivative Works thereof, that is intentionally + submitted to Licensor for inclusion in the Work by the copyright owner + or by an individual or Legal Entity authorized to submit on behalf of + the copyright owner. For the purposes of this definition, "submitted" + means any form of electronic, verbal, or written communication sent + to the Licensor or its representatives, including but not limited to + communication on electronic mailing lists, source code control systems, + and issue tracking systems that are managed by, or on behalf of, the + Licensor for the purpose of discussing and improving the Work, but + excluding communication that is conspicuously marked or otherwise + designated in writing by the copyright owner as "Not a Contribution." + + "Contributor" shall mean Licensor and any individual or Legal Entity + on behalf of whom a Contribution has been received by Licensor and + subsequently incorporated within the Work. + + 2. Grant of Copyright License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + copyright license to reproduce, prepare Derivative Works of, + publicly display, publicly perform, sublicense, and distribute the + Work and such Derivative Works in Source or Object form. + + 3. Grant of Patent License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + (except as stated in this section) patent license to make, have made, + use, offer to sell, sell, import, and otherwise transfer the Work, + where such license applies only to those patent claims licensable + by such Contributor that are necessarily infringed by their + Contribution(s) alone or by combination of their Contribution(s) + with the Work to which such Contribution(s) was submitted. If You + institute patent litigation against any entity (including a + cross-claim or counterclaim in a lawsuit) alleging that the Work + or a Contribution incorporated within the Work constitutes direct + or contributory patent infringement, then any patent licenses + granted to You under this License for that Work shall terminate + as of the date such litigation is filed. + + 4. Redistribution. You may reproduce and distribute copies of the + Work or Derivative Works thereof in any medium, with or without + modifications, and in Source or Object form, provided that You + meet the following conditions: + + (a) You must give any other recipients of the Work or + Derivative Works a copy of this License; and + + (b) You must cause any modified files to carry prominent notices + stating that You changed the files; and + + (c) You must retain, in the Source form of any Derivative Works + that You distribute, all copyright, patent, trademark, and + attribution notices from the Source form of the Work, + excluding those notices that do not pertain to any part of + the Derivative Works; and + + (d) If the Work includes a "NOTICE" text file as part of its + distribution, then any Derivative Works that You distribute must + include a readable copy of the attribution notices contained + within such NOTICE file, excluding those notices that do not + pertain to any part of the Derivative Works, in at least one + of the following places: within a NOTICE text file distributed + as part of the Derivative Works; within the Source form or + documentation, if provided along with the Derivative Works; or, + within a display generated by the Derivative Works, if and + wherever such third-party notices normally appear. The contents + of the NOTICE file are for informational purposes only and + do not modify the License. You may add Your own attribution + notices within Derivative Works that You distribute, alongside + or as an addendum to the NOTICE text from the Work, provided + that such additional attribution notices cannot be construed + as modifying the License. + + You may add Your own copyright statement to Your modifications and + may provide additional or different license terms and conditions + for use, reproduction, or distribution of Your modifications, or + for any such Derivative Works as a whole, provided Your use, + reproduction, and distribution of the Work otherwise complies with + the conditions stated in this License. + + 5. Submission of Contributions. Unless You explicitly state otherwise, + any Contribution intentionally submitted for inclusion in the Work + by You to the Licensor shall be under the terms and conditions of + this License, without any additional terms or conditions. + Notwithstanding the above, nothing herein shall supersede or modify + the terms of any separate license agreement you may have executed + with Licensor regarding such Contributions. + + 6. Trademarks. This License does not grant permission to use the trade + names, trademarks, service marks, or product names of the Licensor, + except as required for reasonable and customary use in describing the + origin of the Work and reproducing the content of the NOTICE file. + + 7. Disclaimer of Warranty. Unless required by applicable law or + agreed to in writing, Licensor provides the Work (and each + Contributor provides its Contributions) on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or + implied, including, without limitation, any warranties or conditions + of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A + PARTICULAR PURPOSE. You are solely responsible for determining the + appropriateness of using or redistributing the Work and assume any + risks associated with Your exercise of permissions under this License. + + 8. Limitation of Liability. In no event and under no legal theory, + whether in tort (including negligence), contract, or otherwise, + unless required by applicable law (such as deliberate and grossly + negligent acts) or agreed to in writing, shall any Contributor be + liable to You for damages, including any direct, indirect, special, + incidental, or consequential damages of any character arising as a + result of this License or out of the use or inability to use the + Work (including but not limited to damages for loss of goodwill, + work stoppage, computer failure or malfunction, or any and all + other commercial damages or losses), even if such Contributor + has been advised of the possibility of such damages. + + 9. Accepting Warranty or Additional Liability. While redistributing + the Work or Derivative Works thereof, You may choose to offer, + and charge a fee for, acceptance of support, warranty, indemnity, + or other liability obligations and/or rights consistent with this + License. However, in accepting such obligations, You may act only + on Your own behalf and on Your sole responsibility, not on behalf + of any other Contributor, and only if You agree to indemnify, + defend, and hold each Contributor harmless for any liability + incurred by, or claims asserted against, such Contributor by reason + of your accepting any such warranty or additional liability. + + END OF TERMS AND CONDITIONS + + APPENDIX: How to apply the Apache License to your work. + + To apply the Apache License to your work, attach the following + boilerplate notice, with the fields enclosed by brackets "[]" + replaced with your own identifying information. (Don't include + the brackets!) The text should be enclosed in the appropriate + comment syntax for the file format. We also recommend that a + file or class name and description of purpose be included on the + same "printed page" as the copyright notice for easier + identification within third-party archives. + + Copyright [yyyy] [name of copyright owner] + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. diff --git a/images/sds-common-scheduler-extender/cmd/access_log.go b/images/sds-common-scheduler-extender/cmd/access_log.go new file mode 100644 index 000000000..ac6ee75e6 --- /dev/null +++ b/images/sds-common-scheduler-extender/cmd/access_log.go @@ -0,0 +1,81 @@ +/* +Copyright 2025 Flant JSC + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package main + +import ( + "net" + "net/http" + "time" + + "github.com/deckhouse/sds-node-configurator/images/sds-common-scheduler-extender/pkg/logger" +) + +type accessLogResponseWriter struct { + http.ResponseWriter + statusCode int + size int +} + +func (w *accessLogResponseWriter) Write(data []byte) (int, error) { + n, err := w.ResponseWriter.Write(data) + w.size += n + return n, err +} + +func (w *accessLogResponseWriter) WriteHeader(statusCode int) { + w.statusCode = statusCode + w.ResponseWriter.WriteHeader(statusCode) +} + +func accessLogHandler(log logger.Logger, schedulerHandler http.Handler) http.Handler { + return http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + startTime := time.Now() + + // Generate trace ID for this request + traceID := logger.GenerateTraceID() + ctx := logger.WithTraceID(r.Context(), traceID) + r = r.WithContext(ctx) + + // Create logger with trace ID + requestLog := logger.WithTraceIDLogger(ctx, log) + + accessLogRW := &accessLogResponseWriter{ResponseWriter: w, statusCode: http.StatusOK} + + schedulerHandler.ServeHTTP(accessLogRW, r) + + fields := []interface{}{ + "traceid", traceID, + "response_time", time.Since(startTime).Seconds(), + "protocol", r.Proto, + "http_status_code", accessLogRW.statusCode, + "http_method", r.Method, + "url", r.RequestURI, + "http_host", r.Host, + "request_size", r.ContentLength, + "response_size", accessLogRW.size, + } + ip, _, err := net.SplitHostPort(r.RemoteAddr) + if err == nil { + fields = append(fields, "remote_ipaddr", ip) + } + ua := r.Header.Get("User-Agent") + if len(ua) > 0 { + fields = append(fields, "http_user_agent", ua) + } + requestLog.Info("[accessLogHandler]", fields...) + }) +} diff --git a/images/sds-common-scheduler-extender/cmd/access_log_test.go b/images/sds-common-scheduler-extender/cmd/access_log_test.go new file mode 100644 index 000000000..691f18aec --- /dev/null +++ b/images/sds-common-scheduler-extender/cmd/access_log_test.go @@ -0,0 +1,145 @@ +/* +Copyright 2025 Flant JSC + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package main + +import ( + "net" + "net/http" + "net/http/httptest" + "testing" + "time" + + "github.com/go-logr/logr" + "github.com/go-logr/zapr" + "go.uber.org/zap" + "go.uber.org/zap/zaptest/observer" + + "github.com/deckhouse/sds-node-configurator/images/sds-common-scheduler-extender/pkg/logger" +) + +func getInt64(t *testing.T, m map[string]interface{}, key string) int64 { + t.Helper() + i, ok := m[key].(int64) + if !ok { + t.Errorf(`i, ok := m[%q].(int64); !ok`, key) + } + return i +} +func getString(t *testing.T, m map[string]interface{}, key string) string { + t.Helper() + s, ok := m[key].(string) + if !ok { + t.Errorf(`s, ok := m[%q].(string); !ok`, key) + } + return s +} + +// testLoggerWrapper wraps logger.Logger and overrides Info to use direct Info +type testLoggerWrapper struct { + logger.Logger + baseLogr logr.Logger +} + +// Info overrides the default Info to use direct Info instead of V(2).Info +func (t *testLoggerWrapper) Info(message string, keysAndValues ...interface{}) { + t.baseLogr.WithValues("level", "INFO").Info(message, keysAndValues...) +} + +func TestAccessLogHandler(t *testing.T) { + obs, logs := observer.New(zap.InfoLevel) + zapLogger := zap.New(obs) + logrLogger := zapr.NewLogger(zapLogger) + baseLogger := logger.NewLoggerWrap(logrLogger) + + // Create a wrapper that overrides Info to use direct Info + testLogWrapper := &testLoggerWrapper{ + Logger: baseLogger, + baseLogr: logrLogger, + } + + mux := http.NewServeMux() + + // Create a custom handler that uses our test logger + customHandler := func(w http.ResponseWriter, r *http.Request) { + startTime := time.Now() + accessLogRW := &accessLogResponseWriter{ResponseWriter: w, statusCode: http.StatusOK} + mux.ServeHTTP(accessLogRW, r) + + fields := []interface{}{ + "response_time", time.Since(startTime).Seconds(), + "protocol", r.Proto, + "http_status_code", accessLogRW.statusCode, + "http_method", r.Method, + "url", r.RequestURI, + "http_host", r.Host, + "request_size", r.ContentLength, + "response_size", accessLogRW.size, + } + ip, _, err := net.SplitHostPort(r.RemoteAddr) + if err == nil { + fields = append(fields, "remote_ipaddr", ip) + } + ua := r.Header.Get("User-Agent") + if len(ua) > 0 { + fields = append(fields, "http_user_agent", ua) + } + testLogWrapper.Info("[accessLogHandler] access", fields...) + } + mux.HandleFunc("/hello", func(w http.ResponseWriter, _ *http.Request) { + w.WriteHeader(http.StatusOK) + // w.Write([]byte("hello")) + _, err := w.Write([]byte("hello")) + if err != nil { + t.Fatal(err) + } + }) + serv := httptest.NewServer(http.HandlerFunc(customHandler)) + defer serv.Close() + + cli := serv.Client() + _, err := cli.Get(serv.URL + "/hello") + if err != nil { + t.Fatal(err) + } + _, err = cli.Get(serv.URL + "/notfound") + if err != nil { + t.Fatal(err) + } + + if logs.Len() != 2 { + t.Fatal(`len(accessLogs) != 2`) + } + + helloLog := logs.All()[0].ContextMap() + notfoundLog := logs.All()[1].ContextMap() + + if getInt64(t, helloLog, "http_status_code") != http.StatusOK { + t.Error(`getInt(t, helloLog, "http_status_code") != http.StatusOK`) + } + if getString(t, helloLog, "http_method") != "GET" { + t.Error(`getString(t, helloLog, "http_method") != "GET"`) + } + if getString(t, helloLog, "url") != "/hello" { + t.Error(`getString(t, helloLog, "url") != "/hello"`) + } + if getString(t, notfoundLog, "url") != "/notfound" { + t.Error(`getString(t, notfoundLog, "url") != "/notfound"`) + } + if getInt64(t, helloLog, "response_size") != 5 { + t.Error(`getInt(t, helloLog, "response_size") != helloLength`) + } +} diff --git a/images/sds-common-scheduler-extender/cmd/main.go b/images/sds-common-scheduler-extender/cmd/main.go new file mode 100644 index 000000000..f6f34a640 --- /dev/null +++ b/images/sds-common-scheduler-extender/cmd/main.go @@ -0,0 +1,249 @@ +/* +Copyright 2025 Flant JSC + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package main + +import ( + "context" + "errors" + "fmt" + "net/http" + "os" + "os/signal" + "sync" + "syscall" + "time" + + d8commonapi "github.com/deckhouse/sds-common-lib/api/v1alpha1" + "github.com/spf13/cobra" + corev1 "k8s.io/api/core/v1" + storagev1 "k8s.io/api/storage/v1" + "k8s.io/apimachinery/pkg/runtime" + ctrl "sigs.k8s.io/controller-runtime" + "sigs.k8s.io/controller-runtime/pkg/healthz" + "sigs.k8s.io/controller-runtime/pkg/manager" + "sigs.k8s.io/yaml" + + slv "github.com/deckhouse/sds-local-volume/api/v1alpha1" + snc "github.com/deckhouse/sds-node-configurator/api/v1alpha1" + "github.com/deckhouse/sds-node-configurator/images/sds-common-scheduler-extender/pkg/cache" + "github.com/deckhouse/sds-node-configurator/images/sds-common-scheduler-extender/pkg/controller" + "github.com/deckhouse/sds-node-configurator/images/sds-common-scheduler-extender/pkg/kubutils" + "github.com/deckhouse/sds-node-configurator/images/sds-common-scheduler-extender/pkg/logger" + "github.com/deckhouse/sds-node-configurator/images/sds-common-scheduler-extender/pkg/scheduler" +) + +const ( + defaultDivisor = 1 + defaultListenAddr = ":8000" + defaultCacheSize = 10 + defaultcertFile = "/etc/sds-common-scheduler-extender/certs/tls.crt" + defaultkeyFile = "/etc/sds-common-scheduler-extender/certs/tls.key" +) + +type Config struct { + ListenAddr string `json:"listen"` + DefaultDivisor float64 `json:"default-divisor"` + LogLevel string `json:"log-level"` + CacheSize int `json:"cache-size"` + HealthProbeBindAddress string `json:"health-probe-bind-address"` + CertFile string `json:"cert-file"` + KeyFile string `json:"key-file"` + PVCExpiredDurationSec int `json:"pvc-expired-duration-sec"` +} + +var cfgFilePath string + +var resourcesSchemeFuncs = []func(*runtime.Scheme) error{ + slv.AddToScheme, + snc.AddToScheme, + corev1.AddToScheme, + storagev1.AddToScheme, + d8commonapi.AddToScheme, +} + +var config = &Config{ + ListenAddr: defaultListenAddr, + DefaultDivisor: defaultDivisor, + LogLevel: "2", + CacheSize: defaultCacheSize, + CertFile: defaultcertFile, + KeyFile: defaultkeyFile, + PVCExpiredDurationSec: cache.DefaultPVCExpiredDurationSec, +} + +var rootCmd = &cobra.Command{ + Use: "sds-common-scheduler-extender", + Version: "development", + Short: "a scheduler-extender for sds modules", + RunE: func(cmd *cobra.Command, _ []string) error { + // to avoid printing usage information when error is returned + cmd.SilenceUsage = true + // to avoid printing errors (we log it closer to the place where it has happened) + cmd.SilenceErrors = true + return subMain(cmd.Context()) + }, +} + +func init() { + rootCmd.PersistentFlags().StringVar(&cfgFilePath, "config", "", "config file") +} + +func main() { + ctx, _ := signal.NotifyContext(context.Background(), syscall.SIGINT, syscall.SIGTERM) + + if err := rootCmd.ExecuteContext(ctx); err != nil { + // we expect err to be logged already + os.Exit(1) + } +} + +func subMain(ctx context.Context) error { + if len(cfgFilePath) != 0 { + b, err := os.ReadFile(cfgFilePath) + if err != nil { + print(err) + return err + } + + if err = yaml.Unmarshal(b, config); err != nil { + print(err) + return err + } + } + + // Override log level from environment variable if set + if envLogLevel := os.Getenv("LOG_LEVEL"); envLogLevel != "" { + config.LogLevel = envLogLevel + } + + log, err := logger.NewLogger(logger.Verbosity(config.LogLevel)) + if err != nil { + print(fmt.Sprintf("unable to initialize logger, err: %s", err)) + return err + } + mainLog := log.WithName("main") + mainLog.Info(fmt.Sprintf("logger has been initialized, log level: %s", config.LogLevel)) + + // Set the logger for the controller-runtime + ctrl.SetLogger(log.GetLogger()) + + kConfig, err := kubutils.KubernetesDefaultConfigCreate() + if err != nil { + mainLog.Error(err, "unable to KubernetesDefaultConfigCreate") + return err + } + mainLog.Info("kubernetes config has been successfully created.") + + scheme := runtime.NewScheme() + for _, f := range resourcesSchemeFuncs { + if err := f(scheme); err != nil { + mainLog.Error(err, "unable to add scheme to func") + return err + } + } + mainLog.Info("successfully read scheme CR") + + managerOpts := manager.Options{ + Scheme: scheme, + Logger: log.GetLogger(), + HealthProbeBindAddress: config.HealthProbeBindAddress, + BaseContext: func() context.Context { return ctx }, + } + + mgr, err := manager.New(kConfig, managerOpts) + if err != nil { + mainLog.Error(err, "unable to create manager for creating controllers") + return err + } + + schedulerCache := cache.NewCache(log, config.PVCExpiredDurationSec) + mainLog.Info("scheduler cache was initialized") + + schedulerHandler, err := scheduler.NewHandler(ctx, mgr.GetClient(), log, schedulerCache, config.DefaultDivisor) + if err != nil { + mainLog.Error(err, "unable to create http.Handler of the scheduler extender") + return err + } + mainLog.Info("scheduler handler initialized") + + if _, err = controller.RunLVGWatcherCacheController(mgr, log, schedulerCache); err != nil { + mainLog.Error(err, fmt.Sprintf("unable to run %s controller", controller.LVGWatcherCacheCtrlName)) + return err + } + mainLog.Info(fmt.Sprintf("successfully ran %s controller", controller.LVGWatcherCacheCtrlName)) + + if err = controller.RunPVCWatcherCacheController(mgr, log, schedulerCache); err != nil { + mainLog.Error(err, fmt.Sprintf("unable to run %s controller", controller.PVCWatcherCacheCtrlName)) + return err + } + mainLog.Info(fmt.Sprintf("successfully ran %s controller", controller.PVCWatcherCacheCtrlName)) + + if err = mgr.AddHealthzCheck("healthz", healthz.Ping); err != nil { + mainLog.Error(err, "unable to mgr.AddHealthzCheck") + return err + } + mainLog.Info("successfully AddHealthzCheck") + + if err = mgr.AddReadyzCheck("readyz", healthz.Ping); err != nil { + mainLog.Error(err, "unable to mgr.AddReadyzCheck") + return err + } + mainLog.Info("successfully AddReadyzCheck") + + serv := &http.Server{ + Addr: config.ListenAddr, + Handler: accessLogHandler(log, schedulerHandler), + ReadTimeout: 30 * time.Second, + WriteTimeout: 30 * time.Second, + } + mainLog.Info("server was initialized") + + return runServer(ctx, serv, mgr, log) +} + +func runServer(ctx context.Context, serv *http.Server, mgr manager.Manager, log logger.Logger) error { + ctx, stop := context.WithCancel(ctx) + + var wg sync.WaitGroup + defer wg.Wait() + defer stop() // stop() should be called before wg.Wait() to stop the goroutine correctly. + wg.Add(1) + + go func() { + defer wg.Done() + <-ctx.Done() + if err := serv.Shutdown(ctx); err != nil { + log.Error(err, "[runServer] failed to shutdown gracefully") + } + }() + + go func() { + log.Info("[runServer] kube manager will start now") + if err := mgr.Start(ctx); err != nil { + log.Error(err, "[runServer] unable to mgr.Start") + } + }() + + log.Info(fmt.Sprintf("[runServer] starts serving on: %s", config.ListenAddr)) + + if err := serv.ListenAndServeTLS(config.CertFile, config.KeyFile); !errors.Is(err, http.ErrServerClosed) { + log.Error(err, "[runServer] unable to run the server") + return err + } + + return nil +} diff --git a/images/sds-common-scheduler-extender/go.mod b/images/sds-common-scheduler-extender/go.mod new file mode 100644 index 000000000..42c3a5d8d --- /dev/null +++ b/images/sds-common-scheduler-extender/go.mod @@ -0,0 +1,74 @@ +module github.com/deckhouse/sds-node-configurator/images/sds-common-scheduler-extender + +go 1.24.10 + +require ( + github.com/deckhouse/sds-common-lib v0.6.3 + github.com/deckhouse/sds-local-volume/api v0.0.0-20250114155747-5d75d401a787 + github.com/deckhouse/sds-node-configurator/api v0.0.0-20250424082358-e271071c2a57 + github.com/go-logr/logr v1.4.2 + github.com/go-logr/zapr v1.3.0 + github.com/spf13/cobra v1.9.1 + github.com/stretchr/testify v1.10.0 + go.uber.org/zap v1.27.0 + k8s.io/api v0.33.0 + k8s.io/apimachinery v0.33.0 + k8s.io/client-go v0.33.0 + k8s.io/klog/v2 v2.130.1 + k8s.io/utils v0.0.0-20241210054802-24370beab758 + sigs.k8s.io/controller-runtime v0.20.4 + sigs.k8s.io/yaml v1.4.0 +) + +require ( + github.com/beorn7/perks v1.0.1 // indirect + github.com/cespare/xxhash/v2 v2.3.0 // indirect + github.com/davecgh/go-spew v1.1.2-0.20180830191138-d8f796af33cc // indirect + github.com/emicklei/go-restful/v3 v3.12.1 // indirect + github.com/evanphx/json-patch/v5 v5.9.11 // indirect + github.com/fsnotify/fsnotify v1.8.0 // indirect + github.com/fxamacker/cbor/v2 v2.7.0 // indirect + github.com/go-openapi/jsonpointer v0.21.0 // indirect + github.com/go-openapi/jsonreference v0.21.0 // indirect + github.com/go-openapi/swag v0.23.0 // indirect + github.com/gogo/protobuf v1.3.2 // indirect + github.com/google/btree v1.1.3 // indirect + github.com/google/gnostic-models v0.6.9 // indirect + github.com/google/go-cmp v0.7.0 // indirect + github.com/google/uuid v1.6.0 // indirect + github.com/inconshreveable/mousetrap v1.1.0 // indirect + github.com/josharian/intern v1.0.0 // indirect + github.com/json-iterator/go v1.1.12 // indirect + github.com/mailru/easyjson v0.9.0 // indirect + github.com/modern-go/concurrent v0.0.0-20180306012644-bacd9c7ef1dd // indirect + github.com/modern-go/reflect2 v1.0.2 // indirect + github.com/munnerz/goautoneg v0.0.0-20191010083416-a7dc8b61c822 // indirect + github.com/pkg/errors v0.9.1 // indirect + github.com/pmezard/go-difflib v1.0.1-0.20181226105442-5d4384ee4fb2 // indirect + github.com/prometheus/client_golang v1.22.0 // indirect + github.com/prometheus/client_model v0.6.1 // indirect + github.com/prometheus/common v0.62.0 // indirect + github.com/prometheus/procfs v0.15.1 // indirect + github.com/spf13/pflag v1.0.6 // indirect + github.com/x448/float16 v0.8.4 // indirect + go.uber.org/multierr v1.11.0 // indirect + golang.org/x/net v0.41.0 // indirect + golang.org/x/oauth2 v0.27.0 // indirect + golang.org/x/sync v0.15.0 // indirect + golang.org/x/sys v0.33.0 // indirect + golang.org/x/term v0.32.0 // indirect + golang.org/x/text v0.26.0 // indirect + golang.org/x/time v0.9.0 // indirect + gomodules.xyz/jsonpatch/v2 v2.4.0 // indirect + google.golang.org/protobuf v1.36.6 // indirect + gopkg.in/evanphx/json-patch.v4 v4.12.0 // indirect + gopkg.in/inf.v0 v0.9.1 // indirect + gopkg.in/yaml.v3 v3.0.1 // indirect + k8s.io/apiextensions-apiserver v0.33.0 // indirect + k8s.io/kube-openapi v0.0.0-20250318190949-c8a335a9a2ff // indirect + sigs.k8s.io/json v0.0.0-20241014173422-cfa47c3a1cc8 // indirect + sigs.k8s.io/randfill v1.0.0 // indirect + sigs.k8s.io/structured-merge-diff/v4 v4.6.0 // indirect +) + +replace github.com/deckhouse/sds-node-configurator/api => ../../api diff --git a/images/sds-common-scheduler-extender/go.sum b/images/sds-common-scheduler-extender/go.sum new file mode 100644 index 000000000..cb3fdc7b9 --- /dev/null +++ b/images/sds-common-scheduler-extender/go.sum @@ -0,0 +1,197 @@ +github.com/beorn7/perks v1.0.1 h1:VlbKKnNfV8bJzeqoa4cOKqO6bYr3WgKZxO8Z16+hsOM= +github.com/beorn7/perks v1.0.1/go.mod h1:G2ZrVWU2WbWT9wwq4/hrbKbnv/1ERSJQ0ibhJ6rlkpw= +github.com/cespare/xxhash/v2 v2.3.0 h1:UL815xU9SqsFlibzuggzjXhog7bL6oX9BbNZnL2UFvs= +github.com/cespare/xxhash/v2 v2.3.0/go.mod h1:VGX0DQ3Q6kWi7AoAeZDth3/j3BFtOZR5XLFGgcrjCOs= +github.com/cpuguy83/go-md2man/v2 v2.0.6/go.mod h1:oOW0eioCTA6cOiMLiUPZOpcVxMig6NIQQ7OS05n1F4g= +github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= +github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= +github.com/davecgh/go-spew v1.1.2-0.20180830191138-d8f796af33cc h1:U9qPSI2PIWSS1VwoXQT9A3Wy9MM3WgvqSxFWenqJduM= +github.com/davecgh/go-spew v1.1.2-0.20180830191138-d8f796af33cc/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= +github.com/deckhouse/sds-common-lib v0.6.3 h1:k0OotLuQaKuZt8iyph9IusDixjAE0MQRKyuTe2wZP3I= +github.com/deckhouse/sds-common-lib v0.6.3/go.mod h1:UHZMKkqEh6RAO+vtA7dFTwn/2m5lzfPn0kfULBmDf2o= +github.com/deckhouse/sds-local-volume/api v0.0.0-20250114155747-5d75d401a787 h1:YYeoWACJsEOqNcQ/RWDsF82hihNYZKlYZAJopvdeKrQ= +github.com/deckhouse/sds-local-volume/api v0.0.0-20250114155747-5d75d401a787/go.mod h1:LBLI26oEmeAMYTSRFFFljP8AOk4kqJEwHcf4fYnyzME= +github.com/emicklei/go-restful/v3 v3.12.1 h1:PJMDIM/ak7btuL8Ex0iYET9hxM3CI2sjZtzpL63nKAU= +github.com/emicklei/go-restful/v3 v3.12.1/go.mod h1:6n3XBCmQQb25CM2LCACGz8ukIrRry+4bhvbpWn3mrbc= +github.com/evanphx/json-patch v0.5.2 h1:xVCHIVMUu1wtM/VkR9jVZ45N3FhZfYMMYGorLCR8P3k= +github.com/evanphx/json-patch v0.5.2/go.mod h1:ZWS5hhDbVDyob71nXKNL0+PWn6ToqBHMikGIFbs31qQ= +github.com/evanphx/json-patch/v5 v5.9.11 h1:/8HVnzMq13/3x9TPvjG08wUGqBTmZBsCWzjTM0wiaDU= +github.com/evanphx/json-patch/v5 v5.9.11/go.mod h1:3j+LviiESTElxA4p3EMKAB9HXj3/XEtnUf6OZxqIQTM= +github.com/fsnotify/fsnotify v1.8.0 h1:dAwr6QBTBZIkG8roQaJjGof0pp0EeF+tNV7YBP3F/8M= +github.com/fsnotify/fsnotify v1.8.0/go.mod h1:8jBTzvmWwFyi3Pb8djgCCO5IBqzKJ/Jwo8TRcHyHii0= +github.com/fxamacker/cbor/v2 v2.7.0 h1:iM5WgngdRBanHcxugY4JySA0nk1wZorNOpTgCMedv5E= +github.com/fxamacker/cbor/v2 v2.7.0/go.mod h1:pxXPTn3joSm21Gbwsv0w9OSA2y1HFR9qXEeXQVeNoDQ= +github.com/go-logr/logr v1.4.2 h1:6pFjapn8bFcIbiKo3XT4j/BhANplGihG6tvd+8rYgrY= +github.com/go-logr/logr v1.4.2/go.mod h1:9T104GzyrTigFIr8wt5mBrctHMim0Nb2HLGrmQ40KvY= +github.com/go-logr/zapr v1.3.0 h1:XGdV8XW8zdwFiwOA2Dryh1gj2KRQyOOoNmBy4EplIcQ= +github.com/go-logr/zapr v1.3.0/go.mod h1:YKepepNBd1u/oyhd/yQmtjVXmm9uML4IXUgMOwR8/Gg= +github.com/go-openapi/jsonpointer v0.21.0 h1:YgdVicSA9vH5RiHs9TZW5oyafXZFc6+2Vc1rr/O9oNQ= +github.com/go-openapi/jsonpointer v0.21.0/go.mod h1:IUyH9l/+uyhIYQ/PXVA41Rexl+kOkAPDdXEYns6fzUY= +github.com/go-openapi/jsonreference v0.21.0 h1:Rs+Y7hSXT83Jacb7kFyjn4ijOuVGSvOdF2+tg1TRrwQ= +github.com/go-openapi/jsonreference v0.21.0/go.mod h1:LmZmgsrTkVg9LG4EaHeY8cBDslNPMo06cago5JNLkm4= +github.com/go-openapi/swag v0.23.0 h1:vsEVJDUo2hPJ2tu0/Xc+4noaxyEffXNIs3cOULZ+GrE= +github.com/go-openapi/swag v0.23.0/go.mod h1:esZ8ITTYEsH1V2trKHjAN8Ai7xHb8RV+YSZ577vPjgQ= +github.com/go-task/slim-sprig/v3 v3.0.0 h1:sUs3vkvUymDpBKi3qH1YSqBQk9+9D/8M2mN1vB6EwHI= +github.com/go-task/slim-sprig/v3 v3.0.0/go.mod h1:W848ghGpv3Qj3dhTPRyJypKRiqCdHZiAzKg9hl15HA8= +github.com/gogo/protobuf v1.3.2 h1:Ov1cvc58UF3b5XjBnZv7+opcTcQFZebYjWzi34vdm4Q= +github.com/gogo/protobuf v1.3.2/go.mod h1:P1XiOD3dCwIKUDQYPy72D8LYyHL2YPYrpS2s69NZV8Q= +github.com/google/btree v1.1.3 h1:CVpQJjYgC4VbzxeGVHfvZrv1ctoYCAI8vbl07Fcxlyg= +github.com/google/btree v1.1.3/go.mod h1:qOPhT0dTNdNzV6Z/lhRX0YXUafgPLFUh+gZMl761Gm4= +github.com/google/gnostic-models v0.6.9 h1:MU/8wDLif2qCXZmzncUQ/BOfxWfthHi63KqpoNbWqVw= +github.com/google/gnostic-models v0.6.9/go.mod h1:CiWsm0s6BSQd1hRn8/QmxqB6BesYcbSZxsz9b0KuDBw= +github.com/google/go-cmp v0.5.9/go.mod h1:17dUlkBOakJ0+DkrSSNjCkIjxS6bF9zb3elmeNGIjoY= +github.com/google/go-cmp v0.7.0 h1:wk8382ETsv4JYUZwIsn6YpYiWiBsYLSJiTsyBybVuN8= +github.com/google/go-cmp v0.7.0/go.mod h1:pXiqmnSA92OHEEa9HXL2W4E7lf9JzCmGVUdgjX3N/iU= +github.com/google/gofuzz v1.0.0/go.mod h1:dBl0BpW6vV/+mYPU4Po3pmUjxk6FQPldtuIdl/M65Eg= +github.com/google/gofuzz v1.2.0 h1:xRy4A+RhZaiKjJ1bPfwQ8sedCA+YS2YcCHW6ec7JMi0= +github.com/google/gofuzz v1.2.0/go.mod h1:dBl0BpW6vV/+mYPU4Po3pmUjxk6FQPldtuIdl/M65Eg= +github.com/google/pprof v0.0.0-20250403155104-27863c87afa6 h1:BHT72Gu3keYf3ZEu2J0b1vyeLSOYI8bm5wbJM/8yDe8= +github.com/google/pprof v0.0.0-20250403155104-27863c87afa6/go.mod h1:boTsfXsheKC2y+lKOCMpSfarhxDeIzfZG1jqGcPl3cA= +github.com/google/uuid v1.6.0 h1:NIvaJDMOsjHA8n1jAhLSgzrAzy1Hgr+hNrb57e+94F0= +github.com/google/uuid v1.6.0/go.mod h1:TIyPZe4MgqvfeYDBFedMoGGpEw/LqOeaOT+nhxU+yHo= +github.com/inconshreveable/mousetrap v1.1.0 h1:wN+x4NVGpMsO7ErUn/mUI3vEoE6Jt13X2s0bqwp9tc8= +github.com/inconshreveable/mousetrap v1.1.0/go.mod h1:vpF70FUmC8bwa3OWnCshd2FqLfsEA9PFc4w1p2J65bw= +github.com/josharian/intern v1.0.0 h1:vlS4z54oSdjm0bgjRigI+G1HpF+tI+9rE5LLzOg8HmY= +github.com/josharian/intern v1.0.0/go.mod h1:5DoeVV0s6jJacbCEi61lwdGj/aVlrQvzHFFd8Hwg//Y= +github.com/json-iterator/go v1.1.12 h1:PV8peI4a0ysnczrg+LtxykD8LfKY9ML6u2jnxaEnrnM= +github.com/json-iterator/go v1.1.12/go.mod h1:e30LSqwooZae/UwlEbR2852Gd8hjQvJoHmT4TnhNGBo= +github.com/kisielk/errcheck v1.5.0/go.mod h1:pFxgyoBC7bSaBwPgfKdkLd5X25qrDl4LWUI2bnpBCr8= +github.com/kisielk/gotool v1.0.0/go.mod h1:XhKaO+MFFWcvkIS/tQcRk01m1F5IRFswLeQ+oQHNcck= +github.com/klauspost/compress v1.18.0 h1:c/Cqfb0r+Yi+JtIEq73FWXVkRonBlf0CRNYc8Zttxdo= +github.com/klauspost/compress v1.18.0/go.mod h1:2Pp+KzxcywXVXMr50+X0Q/Lsb43OQHYWRCY2AiWywWQ= +github.com/kr/pretty v0.3.1 h1:flRD4NNwYAUpkphVc1HcthR4KEIFJ65n8Mw5qdRn3LE= +github.com/kr/pretty v0.3.1/go.mod h1:hoEshYVHaxMs3cyo3Yncou5ZscifuDolrwPKZanG3xk= +github.com/kr/text v0.2.0 h1:5Nx0Ya0ZqY2ygV366QzturHI13Jq95ApcVaJBhpS+AY= +github.com/kr/text v0.2.0/go.mod h1:eLer722TekiGuMkidMxC/pM04lWEeraHUUmBw8l2grE= +github.com/kylelemons/godebug v1.1.0 h1:RPNrshWIDI6G2gRW9EHilWtl7Z6Sb1BR0xunSBf0SNc= +github.com/kylelemons/godebug v1.1.0/go.mod h1:9/0rRGxNHcop5bhtWyNeEfOS8JIWk580+fNqagV/RAw= +github.com/mailru/easyjson v0.9.0 h1:PrnmzHw7262yW8sTBwxi1PdJA3Iw/EKBa8psRf7d9a4= +github.com/mailru/easyjson v0.9.0/go.mod h1:1+xMtQp2MRNVL/V1bOzuP3aP8VNwRW55fQUto+XFtTU= +github.com/modern-go/concurrent v0.0.0-20180228061459-e0a39a4cb421/go.mod h1:6dJC0mAP4ikYIbvyc7fijjWJddQyLn8Ig3JB5CqoB9Q= +github.com/modern-go/concurrent v0.0.0-20180306012644-bacd9c7ef1dd h1:TRLaZ9cD/w8PVh93nsPXa1VrQ6jlwL5oN8l14QlcNfg= +github.com/modern-go/concurrent v0.0.0-20180306012644-bacd9c7ef1dd/go.mod h1:6dJC0mAP4ikYIbvyc7fijjWJddQyLn8Ig3JB5CqoB9Q= +github.com/modern-go/reflect2 v1.0.2 h1:xBagoLtFs94CBntxluKeaWgTMpvLxC4ur3nMaC9Gz0M= +github.com/modern-go/reflect2 v1.0.2/go.mod h1:yWuevngMOJpCy52FWWMvUC8ws7m/LJsjYzDa0/r8luk= +github.com/munnerz/goautoneg v0.0.0-20191010083416-a7dc8b61c822 h1:C3w9PqII01/Oq1c1nUAm88MOHcQC9l5mIlSMApZMrHA= +github.com/munnerz/goautoneg v0.0.0-20191010083416-a7dc8b61c822/go.mod h1:+n7T8mK8HuQTcFwEeznm/DIxMOiR9yIdICNftLE1DvQ= +github.com/onsi/ginkgo/v2 v2.23.4 h1:ktYTpKJAVZnDT4VjxSbiBenUjmlL/5QkBEocaWXiQus= +github.com/onsi/ginkgo/v2 v2.23.4/go.mod h1:Bt66ApGPBFzHyR+JO10Zbt0Gsp4uWxu5mIOTusL46e8= +github.com/onsi/gomega v1.38.0 h1:c/WX+w8SLAinvuKKQFh77WEucCnPk4j2OTUr7lt7BeY= +github.com/onsi/gomega v1.38.0/go.mod h1:OcXcwId0b9QsE7Y49u+BTrL4IdKOBOKnD6VQNTJEB6o= +github.com/pkg/errors v0.9.1 h1:FEBLx1zS214owpjy7qsBeixbURkuhQAwrK5UwLGTwt4= +github.com/pkg/errors v0.9.1/go.mod h1:bwawxfHBFNV+L2hUp1rHADufV3IMtnDRdf1r5NINEl0= +github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4= +github.com/pmezard/go-difflib v1.0.1-0.20181226105442-5d4384ee4fb2 h1:Jamvg5psRIccs7FGNTlIRMkT8wgtp5eCXdBlqhYGL6U= +github.com/pmezard/go-difflib v1.0.1-0.20181226105442-5d4384ee4fb2/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4= +github.com/prometheus/client_golang v1.22.0 h1:rb93p9lokFEsctTys46VnV1kLCDpVZ0a/Y92Vm0Zc6Q= +github.com/prometheus/client_golang v1.22.0/go.mod h1:R7ljNsLXhuQXYZYtw6GAE9AZg8Y7vEW5scdCXrWRXC0= +github.com/prometheus/client_model v0.6.1 h1:ZKSh/rekM+n3CeS952MLRAdFwIKqeY8b62p8ais2e9E= +github.com/prometheus/client_model v0.6.1/go.mod h1:OrxVMOVHjw3lKMa8+x6HeMGkHMQyHDk9E3jmP2AmGiY= +github.com/prometheus/common v0.62.0 h1:xasJaQlnWAeyHdUBeGjXmutelfJHWMRr+Fg4QszZ2Io= +github.com/prometheus/common v0.62.0/go.mod h1:vyBcEuLSvWos9B1+CyL7JZ2up+uFzXhkqml0W5zIY1I= +github.com/prometheus/procfs v0.15.1 h1:YagwOFzUgYfKKHX6Dr+sHT7km/hxC76UB0learggepc= +github.com/prometheus/procfs v0.15.1/go.mod h1:fB45yRUv8NstnjriLhBQLuOUt+WW4BsoGhij/e3PBqk= +github.com/rogpeppe/go-internal v1.13.1 h1:KvO1DLK/DRN07sQ1LQKScxyZJuNnedQ5/wKSR38lUII= +github.com/rogpeppe/go-internal v1.13.1/go.mod h1:uMEvuHeurkdAXX61udpOXGD/AzZDWNMNyH2VO9fmH0o= +github.com/russross/blackfriday/v2 v2.1.0/go.mod h1:+Rmxgy9KzJVeS9/2gXHxylqXiyQDYRxCVz55jmeOWTM= +github.com/spf13/cobra v1.9.1 h1:CXSaggrXdbHK9CF+8ywj8Amf7PBRmPCOJugH954Nnlo= +github.com/spf13/cobra v1.9.1/go.mod h1:nDyEzZ8ogv936Cinf6g1RU9MRY64Ir93oCnqb9wxYW0= +github.com/spf13/pflag v1.0.6 h1:jFzHGLGAlb3ruxLB8MhbI6A8+AQX/2eW4qeyNZXNp2o= +github.com/spf13/pflag v1.0.6/go.mod h1:McXfInJRrz4CZXVZOBLb0bTZqETkiAhM9Iw0y3An2Bg= +github.com/stretchr/objx v0.1.0/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME= +github.com/stretchr/objx v0.5.2 h1:xuMeJ0Sdp5ZMRXx/aWO6RZxdr3beISkG5/G/aIRr3pY= +github.com/stretchr/objx v0.5.2/go.mod h1:FRsXN1f5AsAjCGJKqEizvkpNtU+EGNCLh3NxZ/8L+MA= +github.com/stretchr/testify v1.3.0/go.mod h1:M5WIy9Dh21IEIfnGCwXGc5bZfKNJtfHm1UVUgZn+9EI= +github.com/stretchr/testify v1.10.0 h1:Xv5erBjTwe/5IxqUQTdXv5kgmIvbHo3QQyRwhJsOfJA= +github.com/stretchr/testify v1.10.0/go.mod h1:r2ic/lqez/lEtzL7wO/rwa5dbSLXVDPFyf8C91i36aY= +github.com/x448/float16 v0.8.4 h1:qLwI1I70+NjRFUR3zs1JPUCgaCXSh3SW62uAKT1mSBM= +github.com/x448/float16 v0.8.4/go.mod h1:14CWIYCyZA/cWjXOioeEpHeN/83MdbZDRQHoFcYsOfg= +github.com/yuin/goldmark v1.1.27/go.mod h1:3hX8gzYuyVAZsxl0MRgGTJEmQBFcNTphYh9decYSb74= +github.com/yuin/goldmark v1.2.1/go.mod h1:3hX8gzYuyVAZsxl0MRgGTJEmQBFcNTphYh9decYSb74= +go.uber.org/automaxprocs v1.6.0 h1:O3y2/QNTOdbF+e/dpXNNW7Rx2hZ4sTIPyybbxyNqTUs= +go.uber.org/automaxprocs v1.6.0/go.mod h1:ifeIMSnPZuznNm6jmdzmU3/bfk01Fe2fotchwEFJ8r8= +go.uber.org/goleak v1.3.0 h1:2K3zAYmnTNqV73imy9J1T3WC+gmCePx2hEGkimedGto= +go.uber.org/goleak v1.3.0/go.mod h1:CoHD4mav9JJNrW/WLlf7HGZPjdw8EucARQHekz1X6bE= +go.uber.org/multierr v1.11.0 h1:blXXJkSxSSfBVBlC76pxqeO+LN3aDfLQo+309xJstO0= +go.uber.org/multierr v1.11.0/go.mod h1:20+QtiLqy0Nd6FdQB9TLXag12DsQkrbs3htMFfDN80Y= +go.uber.org/zap v1.27.0 h1:aJMhYGrd5QSmlpLMr2MftRKl7t8J8PTZPA732ud/XR8= +go.uber.org/zap v1.27.0/go.mod h1:GB2qFLM7cTU87MWRP2mPIjqfIDnGu+VIO4V/SdhGo2E= +golang.org/x/crypto v0.0.0-20190308221718-c2843e01d9a2/go.mod h1:djNgcEr1/C05ACkg1iLfiJU5Ep61QUkGW8qpdssI0+w= +golang.org/x/crypto v0.0.0-20191011191535-87dc89f01550/go.mod h1:yigFU9vqHzYiE8UmvKecakEJjdnWj3jj499lnFckfCI= +golang.org/x/crypto v0.0.0-20200622213623-75b288015ac9/go.mod h1:LzIPMQfyMNhhGPhUkYOs5KpL4U8rLKemX1yGLhDgUto= +golang.org/x/mod v0.2.0/go.mod h1:s0Qsj1ACt9ePp/hMypM3fl4fZqREWJwdYDEqhRiZZUA= +golang.org/x/mod v0.3.0/go.mod h1:s0Qsj1ACt9ePp/hMypM3fl4fZqREWJwdYDEqhRiZZUA= +golang.org/x/net v0.0.0-20190404232315-eb5bcb51f2a3/go.mod h1:t9HGtf8HONx5eT2rtn7q6eTqICYqUVnKs3thJo3Qplg= +golang.org/x/net v0.0.0-20190620200207-3b0461eec859/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s= +golang.org/x/net v0.0.0-20200226121028-0de0cce0169b/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s= +golang.org/x/net v0.0.0-20201021035429-f5854403a974/go.mod h1:sp8m0HH+o8qH0wwXwYZr8TS3Oi6o0r6Gce1SSxlDquU= +golang.org/x/net v0.41.0 h1:vBTly1HeNPEn3wtREYfy4GZ/NECgw2Cnl+nK6Nz3uvw= +golang.org/x/net v0.41.0/go.mod h1:B/K4NNqkfmg07DQYrbwvSluqCJOOXwUjeb/5lOisjbA= +golang.org/x/oauth2 v0.27.0 h1:da9Vo7/tDv5RH/7nZDz1eMGS/q1Vv1N/7FCrBhI9I3M= +golang.org/x/oauth2 v0.27.0/go.mod h1:onh5ek6nERTohokkhCD/y2cV4Do3fxFHFuAejCkRWT8= +golang.org/x/sync v0.0.0-20190423024810-112230192c58/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= +golang.org/x/sync v0.0.0-20190911185100-cd5d95a43a6e/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= +golang.org/x/sync v0.0.0-20201020160332-67f06af15bc9/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= +golang.org/x/sync v0.15.0 h1:KWH3jNZsfyT6xfAfKiz6MRNmd46ByHDYaZ7KSkCtdW8= +golang.org/x/sync v0.15.0/go.mod h1:1dzgHSNfp02xaA81J2MS99Qcpr2w7fw1gpm99rleRqA= +golang.org/x/sys v0.0.0-20190215142949-d0b11bdaac8a/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY= +golang.org/x/sys v0.0.0-20190412213103-97732733099d/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= +golang.org/x/sys v0.0.0-20200930185726-fdedc70b468f/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= +golang.org/x/sys v0.33.0 h1:q3i8TbbEz+JRD9ywIRlyRAQbM0qF7hu24q3teo2hbuw= +golang.org/x/sys v0.33.0/go.mod h1:BJP2sWEmIv4KK5OTEluFJCKSidICx8ciO85XgH3Ak8k= +golang.org/x/term v0.32.0 h1:DR4lr0TjUs3epypdhTOkMmuF5CDFJ/8pOnbzMZPQ7bg= +golang.org/x/term v0.32.0/go.mod h1:uZG1FhGx848Sqfsq4/DlJr3xGGsYMu/L5GW4abiaEPQ= +golang.org/x/text v0.3.0/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ= +golang.org/x/text v0.3.3/go.mod h1:5Zoc/QRtKVWzQhOtBMvqHzDpF6irO9z98xDceosuGiQ= +golang.org/x/text v0.26.0 h1:P42AVeLghgTYr4+xUnTRKDMqpar+PtX7KWuNQL21L8M= +golang.org/x/text v0.26.0/go.mod h1:QK15LZJUUQVJxhz7wXgxSy/CJaTFjd0G+YLonydOVQA= +golang.org/x/time v0.9.0 h1:EsRrnYcQiGH+5FfbgvV4AP7qEZstoyrHB0DzarOQ4ZY= +golang.org/x/time v0.9.0/go.mod h1:3BpzKBy/shNhVucY/MWOyx10tF3SFh9QdLuxbVysPQM= +golang.org/x/tools v0.0.0-20180917221912-90fa682c2a6e/go.mod h1:n7NCudcB/nEzxVGmLbDWY5pfWTLqBcC2KZ6jyYvM4mQ= +golang.org/x/tools v0.0.0-20191119224855-298f0cb1881e/go.mod h1:b+2E5dAYhXwXZwtnZ6UAqBI28+e2cm9otk0dWdXHAEo= +golang.org/x/tools v0.0.0-20200619180055-7c47624df98f/go.mod h1:EkVYQZoAsY45+roYkvgYkIh4xh/qjgUK9TdY2XT94GE= +golang.org/x/tools v0.0.0-20210106214847-113979e3529a/go.mod h1:emZCQorbCU4vsT4fOWvOPXz4eW1wZW4PmDk9uLelYpA= +golang.org/x/tools v0.33.0 h1:4qz2S3zmRxbGIhDIAgjxvFutSvH5EfnsYrRBj0UI0bc= +golang.org/x/tools v0.33.0/go.mod h1:CIJMaWEY88juyUfo7UbgPqbC8rU2OqfAV1h2Qp0oMYI= +golang.org/x/xerrors v0.0.0-20190717185122-a985d3407aa7/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0= +golang.org/x/xerrors v0.0.0-20191011141410-1b5146add898/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0= +golang.org/x/xerrors v0.0.0-20191204190536-9bdfabe68543/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0= +golang.org/x/xerrors v0.0.0-20200804184101-5ec99f83aff1/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0= +gomodules.xyz/jsonpatch/v2 v2.4.0 h1:Ci3iUJyx9UeRx7CeFN8ARgGbkESwJK+KB9lLcWxY/Zw= +gomodules.xyz/jsonpatch/v2 v2.4.0/go.mod h1:AH3dM2RI6uoBZxn3LVrfvJ3E0/9dG4cSrbuBJT4moAY= +google.golang.org/protobuf v1.36.6 h1:z1NpPI8ku2WgiWnf+t9wTPsn6eP1L7ksHUlkfLvd9xY= +google.golang.org/protobuf v1.36.6/go.mod h1:jduwjTPXsFjZGTmRluh+L6NjiWu7pchiJ2/5YcXBHnY= +gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0= +gopkg.in/check.v1 v1.0.0-20201130134442-10cb98267c6c h1:Hei/4ADfdWqJk1ZMxUNpqntNwaWcugrBjAiHlqqRiVk= +gopkg.in/check.v1 v1.0.0-20201130134442-10cb98267c6c/go.mod h1:JHkPIbrfpd72SG/EVd6muEfDQjcINNoR0C8j2r3qZ4Q= +gopkg.in/evanphx/json-patch.v4 v4.12.0 h1:n6jtcsulIzXPJaxegRbvFNNrZDjbij7ny3gmSPG+6V4= +gopkg.in/evanphx/json-patch.v4 v4.12.0/go.mod h1:p8EYWUEYMpynmqDbY58zCKCFZw8pRWMG4EsWvDvM72M= +gopkg.in/inf.v0 v0.9.1 h1:73M5CoZyi3ZLMOyDlQh031Cx6N9NDJ2Vvfl76EDAgDc= +gopkg.in/inf.v0 v0.9.1/go.mod h1:cWUDdTG/fYaXco+Dcufb5Vnc6Gp2YChqWtbxRZE0mXw= +gopkg.in/yaml.v3 v3.0.1 h1:fxVm/GzAzEWqLHuvctI91KS9hhNmmWOoWu0XTYJS7CA= +gopkg.in/yaml.v3 v3.0.1/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM= +k8s.io/api v0.33.0 h1:yTgZVn1XEe6opVpP1FylmNrIFWuDqe2H0V8CT5gxfIU= +k8s.io/api v0.33.0/go.mod h1:CTO61ECK/KU7haa3qq8sarQ0biLq2ju405IZAd9zsiM= +k8s.io/apiextensions-apiserver v0.33.0 h1:d2qpYL7Mngbsc1taA4IjJPRJ9ilnsXIrndH+r9IimOs= +k8s.io/apiextensions-apiserver v0.33.0/go.mod h1:VeJ8u9dEEN+tbETo+lFkwaaZPg6uFKLGj5vyNEwwSzc= +k8s.io/apimachinery v0.33.0 h1:1a6kHrJxb2hs4t8EE5wuR/WxKDwGN1FKH3JvDtA0CIQ= +k8s.io/apimachinery v0.33.0/go.mod h1:BHW0YOu7n22fFv/JkYOEfkUYNRN0fj0BlvMFWA7b+SM= +k8s.io/client-go v0.33.0 h1:UASR0sAYVUzs2kYuKn/ZakZlcs2bEHaizrrHUZg0G98= +k8s.io/client-go v0.33.0/go.mod h1:kGkd+l/gNGg8GYWAPr0xF1rRKvVWvzh9vmZAMXtaKOg= +k8s.io/klog/v2 v2.130.1 h1:n9Xl7H1Xvksem4KFG4PYbdQCQxqc/tTUyrgXaOhHSzk= +k8s.io/klog/v2 v2.130.1/go.mod h1:3Jpz1GvMt720eyJH1ckRHK1EDfpxISzJ7I9OYgaDtPE= +k8s.io/kube-openapi v0.0.0-20250318190949-c8a335a9a2ff h1:/usPimJzUKKu+m+TE36gUyGcf03XZEP0ZIKgKj35LS4= +k8s.io/kube-openapi v0.0.0-20250318190949-c8a335a9a2ff/go.mod h1:5jIi+8yX4RIb8wk3XwBo5Pq2ccx4FP10ohkbSKCZoK8= +k8s.io/utils v0.0.0-20241210054802-24370beab758 h1:sdbE21q2nlQtFh65saZY+rRM6x6aJJI8IUa1AmH/qa0= +k8s.io/utils v0.0.0-20241210054802-24370beab758/go.mod h1:OLgZIPagt7ERELqWJFomSt595RzquPNLL48iOWgYOg0= +sigs.k8s.io/controller-runtime v0.20.4 h1:X3c+Odnxz+iPTRobG4tp092+CvBU9UK0t/bRf+n0DGU= +sigs.k8s.io/controller-runtime v0.20.4/go.mod h1:xg2XB0K5ShQzAgsoujxuKN4LNXR2LfwwHsPj7Iaw+XY= +sigs.k8s.io/json v0.0.0-20241014173422-cfa47c3a1cc8 h1:gBQPwqORJ8d8/YNZWEjoZs7npUVDpVXUUOFfW6CgAqE= +sigs.k8s.io/json v0.0.0-20241014173422-cfa47c3a1cc8/go.mod h1:mdzfpAEoE6DHQEN0uh9ZbOCuHbLK5wOm7dK4ctXE9Tg= +sigs.k8s.io/randfill v0.0.0-20250304075658-069ef1bbf016/go.mod h1:XeLlZ/jmk4i1HRopwe7/aU3H5n1zNUcX6TM94b3QxOY= +sigs.k8s.io/randfill v1.0.0 h1:JfjMILfT8A6RbawdsK2JXGBR5AQVfd+9TbzrlneTyrU= +sigs.k8s.io/randfill v1.0.0/go.mod h1:XeLlZ/jmk4i1HRopwe7/aU3H5n1zNUcX6TM94b3QxOY= +sigs.k8s.io/structured-merge-diff/v4 v4.6.0 h1:IUA9nvMmnKWcj5jl84xn+T5MnlZKThmUW1TdblaLVAc= +sigs.k8s.io/structured-merge-diff/v4 v4.6.0/go.mod h1:dDy58f92j70zLsuZVuUX5Wp9vtxXpaZnkPGWeqDfCps= +sigs.k8s.io/yaml v1.4.0 h1:Mk1wCc2gy/F0THH0TAp1QYyJNzRm2KCLy3o5ASXVI5E= +sigs.k8s.io/yaml v1.4.0/go.mod h1:Ejl7/uTz7PSA4eKMyQCUTnhZYNmLIl+5c2lQPGR2BPY= diff --git a/images/sds-common-scheduler-extender/mount-points.yaml b/images/sds-common-scheduler-extender/mount-points.yaml new file mode 100644 index 000000000..1e2c62038 --- /dev/null +++ b/images/sds-common-scheduler-extender/mount-points.yaml @@ -0,0 +1,3 @@ +dirs: + - /etc/sds-common-scheduler-extender + - /etc/sds-common-scheduler-extender/certs diff --git a/images/sds-common-scheduler-extender/pkg/cache/cache.go b/images/sds-common-scheduler-extender/pkg/cache/cache.go new file mode 100644 index 000000000..f12147fda --- /dev/null +++ b/images/sds-common-scheduler-extender/pkg/cache/cache.go @@ -0,0 +1,1022 @@ +/* +Copyright 2025 Flant JSC + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package cache + +import ( + "errors" + "fmt" + "strings" + "sync" + "time" + + corev1 "k8s.io/api/core/v1" + + snc "github.com/deckhouse/sds-node-configurator/api/v1alpha1" + "github.com/deckhouse/sds-node-configurator/images/sds-common-scheduler-extender/pkg/consts" + "github.com/deckhouse/sds-node-configurator/images/sds-common-scheduler-extender/pkg/logger" +) + +const ( + DefaultPVCExpiredDurationSec = 30 + + pvcPerLVGCount = 150 + lvgsPerPVCDefaultCount = 5 + lvgsPerNodeCount = 5 + SelectedNodeAnnotation = "volume.kubernetes.io/selected-node" +) + +type Cache struct { + mtx sync.RWMutex + lvgByName map[string]*lvgEntry // map[lvgName]*lvgEntry + log logger.Logger + expiredDuration time.Duration +} + +type lvgEntry struct { + lvg *snc.LVMVolumeGroup + thickByPVC map[string]*pvcEntry // map[pvcKey]*pvcEntry + thickByVolume map[string]*volumeEntry // map[volumeName]*volumeEntry - for thick volumes + thinByPool map[string]*thinPoolEntry // map[thinPoolName]*thinPoolEntry +} + +type thinPoolEntry struct { + pvcs map[string]*pvcEntry // map[pvcKey]*pvcEntry + volumes map[string]*volumeEntry // map[volumeName]*volumeEntry - for thin volumes +} + +type pvcEntry struct { + pvc *corev1.PersistentVolumeClaim + selectedNode string +} + +type volumeEntry struct { + size int64 + createdAt time.Time +} + +// NewCache initialize new cache. +func NewCache(logger logger.Logger, pvcExpDurSec int) *Cache { + ch := &Cache{ + lvgByName: make(map[string]*lvgEntry), + log: logger, + expiredDuration: time.Duration(pvcExpDurSec) * time.Second, + } + + go func() { + timer := time.NewTimer(ch.expiredDuration) + + for range timer.C { + ch.clearBoundExpiredPVC() + ch.clearExpiredVolumes() + timer.Reset(ch.expiredDuration) + } + }() + + return ch +} + +func (c *Cache) clearBoundExpiredPVC() { + c.log.Debug("[clearBoundExpiredPVC] starts to clear expired PVC") + // Take a snapshot of all PVCs under read lock to avoid holding write lock during removals + var snapshot []*corev1.PersistentVolumeClaim + c.mtx.RLock() + for _, lvg := range c.lvgByName { + for _, pe := range lvg.thickByPVC { + snapshot = append(snapshot, pe.pvc) + } + for _, tp := range lvg.thinByPool { + for _, pe := range tp.pvcs { + snapshot = append(snapshot, pe.pvc) + } + } + } + c.mtx.RUnlock() + + for _, pvc := range snapshot { + if pvc.Status.Phase != corev1.ClaimBound { + c.log.Trace(fmt.Sprintf("[clearBoundExpiredPVC] PVC %s is not in a Bound state", pvc.Name)) + continue + } + if time.Since(pvc.CreationTimestamp.Time) > c.expiredDuration { + c.log.Warning(fmt.Sprintf("[clearBoundExpiredPVC] PVC %s is in a Bound state and expired, remove it from the cache", pvc.Name)) + + // TODO: RemovePVFromTheCache(pvc.Spec.VolumeName) - since we are adding our API to the scheduler-extender, we need to remove the PV from the cache. + c.RemovePVCFromTheCache(pvc) + } else { + c.log.Trace(fmt.Sprintf("[clearBoundExpiredPVC] PVC %s is in a Bound state but not expired yet.", pvc.Name)) + } + } + c.log.Debug("[clearBoundExpiredPVC] finished the expired PVC clearing") +} + +func (c *Cache) clearExpiredVolumes() { + c.log.Debug("[clearExpiredVolumes] starts to clear expired volumes") + c.mtx.Lock() + defer c.mtx.Unlock() + + for lvgName, entry := range c.lvgByName { + // Clear expired thick volumes + for volumeName, ve := range entry.thickByVolume { + if time.Since(ve.createdAt) > c.expiredDuration { + c.log.Warning(fmt.Sprintf("[clearExpiredVolumes] thick volume %s in LVG %s expired, remove it from the cache", volumeName, lvgName)) + delete(entry.thickByVolume, volumeName) + } + } + + // Clear expired thin volumes + for thinPoolName, tp := range entry.thinByPool { + for volumeName, ve := range tp.volumes { + if time.Since(ve.createdAt) > c.expiredDuration { + c.log.Warning(fmt.Sprintf("[clearExpiredVolumes] thin volume %s in LVG %s Thin Pool %s expired, remove it from the cache", volumeName, lvgName, thinPoolName)) + delete(tp.volumes, volumeName) + } + } + } + } + c.log.Debug("[clearExpiredVolumes] finished the expired volumes clearing") +} + +// ClearBoundPVCsFromLVG removes all Bound PVCs from the cache for the specified LVG without waiting for expiration. +// Unlike clearBoundExpiredPVC, this function removes Bound PVCs immediately. +func (c *Cache) ClearBoundPVCsFromLVG(lvgName string) error { + c.log.Debug(fmt.Sprintf("[ClearBoundPVCsFromLVG] starts to clear the cache for the LVMVolumeGroup %s", lvgName)) + pvcs, err := c.GetAllPVCForLVG(lvgName) + if err != nil { + c.log.Error(err, fmt.Sprintf("[ClearBoundPVCsFromLVG] unable to get all PVC for the LVMVolumeGroup %s", lvgName)) + return err + } + + for _, pvc := range pvcs { + c.log.Trace(fmt.Sprintf("[ClearBoundPVCsFromLVG] cached PVC %s/%s belongs to LVMVolumeGroup %s", pvc.Namespace, pvc.Name, lvgName)) + c.log.Trace(fmt.Sprintf("[ClearBoundPVCsFromLVG] PVC %s/%s has status phase %s", pvc.Namespace, pvc.Name, pvc.Status.Phase)) + if pvc.Status.Phase == corev1.ClaimBound { + c.log.Trace(fmt.Sprintf("[ClearBoundPVCsFromLVG] cached PVC %s/%s has Status.Phase Bound. It will be removed from the cache for LVMVolumeGroup %s", pvc.Namespace, pvc.Name, lvgName)) + c.RemovePVCFromTheCache(pvc) + c.log.Debug(fmt.Sprintf("[ClearBoundPVCsFromLVG] PVC %s/%s was removed from the cache for LVMVolumeGroup %s", pvc.Namespace, pvc.Name, lvgName)) + } + } + return nil +} + +// AddLVG adds selected LVMVolumeGroup resource to the cache. If it is already stored, does nothing. +func (c *Cache) AddLVG(lvg *snc.LVMVolumeGroup) { + c.mtx.Lock() + defer c.mtx.Unlock() + + if _, exists := c.lvgByName[lvg.Name]; exists { + c.log.Debug(fmt.Sprintf("[AddLVG] the LVMVolumeGroup %s has been already added to the cache", lvg.Name)) + return + } + + c.lvgByName[lvg.Name] = &lvgEntry{ + lvg: lvg, + thickByPVC: make(map[string]*pvcEntry), + thickByVolume: make(map[string]*volumeEntry), + thinByPool: make(map[string]*thinPoolEntry), + } + + c.log.Trace(fmt.Sprintf("[AddLVG] the LVMVolumeGroup %s nodes: %v", lvg.Name, lvg.Status.Nodes)) +} + +// UpdateLVG updated selected LVMVolumeGroup resource in the cache. If such LVMVolumeGroup is not stored, returns an error. +func (c *Cache) UpdateLVG(lvg *snc.LVMVolumeGroup) error { + c.mtx.Lock() + defer c.mtx.Unlock() + + entry, found := c.lvgByName[lvg.Name] + if !found { + return fmt.Errorf("the LVMVolumeGroup %s was not found in the cache", lvg.Name) + } + entry.lvg = lvg + c.log.Trace(fmt.Sprintf("[UpdateLVG] the LVMVolumeGroup %s nodes: %v", lvg.Name, lvg.Status.Nodes)) + return nil +} + +// TryGetLVG returns selected LVMVolumeGroup resource if it is stored in the cache, otherwise returns nil. +func (c *Cache) TryGetLVG(name string) *snc.LVMVolumeGroup { + c.mtx.RLock() + defer c.mtx.RUnlock() + entry, found := c.lvgByName[name] + if !found || entry == nil { + c.log.Debug(fmt.Sprintf("[TryGetLVG] the LVMVolumeGroup %s was not found in the cache. Return nil", name)) + return nil + } + + return entry.lvg +} + +// GetLVGNamesByNodeName returns LVMVolumeGroups resources names stored in the cache for the selected node. If none of them exist, returns empty slice. +func (c *Cache) GetLVGNamesByNodeName(nodeName string) []string { + c.mtx.RLock() + defer c.mtx.RUnlock() + + var result []string + for name, entry := range c.lvgByName { + if lvgHasNode(entry.lvg, nodeName) { + result = append(result, name) + } + } + if len(result) == 0 { + c.log.Debug(fmt.Sprintf("[GetLVGNamesByNodeName] no LVMVolumeGroup was found in the cache for the node %s. Return empty slice", nodeName)) + } + return result +} + +// GetAllLVG returns all the LVMVolumeGroups resources stored in the cache. +// +// Return: map[lvgName]*snc.LVMVolumeGroup +func (c *Cache) GetAllLVG() map[string]*snc.LVMVolumeGroup { + c.mtx.RLock() + defer c.mtx.RUnlock() + result := make(map[string]*snc.LVMVolumeGroup, len(c.lvgByName)) + for name, entry := range c.lvgByName { + if entry.lvg == nil { + c.log.Error(fmt.Errorf("LVMVolumeGroup %s is not initialized", name), "[GetAllLVG] an error occurs while iterating the LVMVolumeGroups") + continue + } + result[name] = entry.lvg + } + return result +} + +// GetLVGThickReservedSpace returns a sum of reserved space by every thick PVC in the selected LVMVolumeGroup resource. If such LVMVolumeGroup resource is not stored, returns an error. +func (c *Cache) GetLVGThickReservedSpace(lvgName string) (int64, error) { + c.mtx.RLock() + defer c.mtx.RUnlock() + entry, found := c.lvgByName[lvgName] + if !found || entry == nil { + c.log.Debug(fmt.Sprintf("[GetLVGThickReservedSpace] the LVMVolumeGroup %s was not found in the cache. Returns 0", lvgName)) + return 0, nil + } + + var space int64 + // Consider PVCs + for _, pe := range entry.thickByPVC { + space += pe.pvc.Spec.Resources.Requests.Storage().Value() + } + // Consider volumes + for _, ve := range entry.thickByVolume { + space += ve.size + } + + return space, nil +} + +// GetLVGThinReservedSpace returns a sum of reserved space by every thin PVC in the selected LVMVolumeGroup resource. If such LVMVolumeGroup resource is not stored, returns an error. +func (c *Cache) GetLVGThinReservedSpace(lvgName string, thinPoolName string) (int64, error) { + c.mtx.RLock() + defer c.mtx.RUnlock() + entry, found := c.lvgByName[lvgName] + if !found || entry == nil { + c.log.Debug(fmt.Sprintf("[GetLVGThinReservedSpace] the LVMVolumeGroup %s was not found in the cache. Returns 0", lvgName)) + return 0, nil + } + + tp, found := entry.thinByPool[thinPoolName] + if !found || tp == nil { + c.log.Debug(fmt.Sprintf("[GetLVGThinReservedSpace] the Thin pool %s of the LVMVolumeGroup %s was not found in the cache. Returns 0", thinPoolName, lvgName)) + return 0, nil + } + + var space int64 + // Consider PVCs + for _, pe := range tp.pvcs { + space += pe.pvc.Spec.Resources.Requests.Storage().Value() + } + // Consider volumes + for _, ve := range tp.volumes { + space += ve.size + } + + return space, nil +} + +// DeleteLVG deletes selected LVMVolumeGroup resource from the cache. +func (c *Cache) DeleteLVG(lvgName string) { + c.mtx.Lock() + defer c.mtx.Unlock() + delete(c.lvgByName, lvgName) +} + +// AddThickPVC adds selected PVC to selected LVMVolumeGroup resource. If the LVMVolumeGroup resource is not stored, returns an error. +// If selected PVC is already stored in the cache, does nothing. +func (c *Cache) AddThickPVC(lvgName string, pvc *corev1.PersistentVolumeClaim) error { + if pvc.Status.Phase == corev1.ClaimBound { + c.log.Warning(fmt.Sprintf("[AddThickPVC] PVC %s/%s has status phase BOUND. It will not be added to the cache", pvc.Namespace, pvc.Name)) + return nil + } + + pvcKey := configurePVCKey(pvc) + + c.mtx.Lock() + defer c.mtx.Unlock() + + entry, found := c.lvgByName[lvgName] + if !found || entry == nil { + err := fmt.Errorf("the LVMVolumeGroup %s was not found in the cache", lvgName) + c.log.Error(err, fmt.Sprintf("[AddThickPVC] an error occurred while trying to add PVC %s to the cache", pvcKey)) + return err + } + + // this case might be triggered if the extender recovers after fail and finds some pending thickPVCs with selected nodes + c.log.Trace(fmt.Sprintf("[AddThickPVC] PVC %s/%s annotations: %v", pvc.Namespace, pvc.Name, pvc.Annotations)) + + shouldAdd, err := c.shouldAddPVC(pvc, entry, pvcKey, lvgName, "") + if err != nil { + return err + } + + if !shouldAdd { + c.log.Debug(fmt.Sprintf("[AddThickPVC] PVC %s should not be added", pvcKey)) + return nil + } + + c.log.Debug(fmt.Sprintf("[AddThickPVC] new PVC %s cache will be added to the LVMVolumeGroup %s", pvcKey, lvgName)) + c.addNewThickPVC(entry, pvc) + + return nil +} + +func (c *Cache) shouldAddPVC(pvc *corev1.PersistentVolumeClaim, entry *lvgEntry, pvcKey, lvgName, thinPoolName string) (bool, error) { + if pvc.Annotations[SelectedNodeAnnotation] != "" { + c.log.Debug(fmt.Sprintf("[shouldAddPVC] PVC %s/%s has selected node anotation, selected node: %s", pvc.Namespace, pvc.Name, pvc.Annotations[SelectedNodeAnnotation])) + + if !lvgHasNode(entry.lvg, pvc.Annotations[SelectedNodeAnnotation]) { + c.log.Debug(fmt.Sprintf("[shouldAddPVC] LVMVolumeGroup %s does not belong to PVC %s/%s selected node %s. It will be skipped", lvgName, pvc.Namespace, pvc.Name, pvc.Annotations[SelectedNodeAnnotation])) + return false, nil + } + + c.log.Debug(fmt.Sprintf("[shouldAddPVC] LVMVolumeGroup %s belongs to PVC %s/%s selected node %s", lvgName, pvc.Namespace, pvc.Name, pvc.Annotations[SelectedNodeAnnotation])) + + // if pvc is thick + if _, found := entry.thickByPVC[pvcKey]; found && thinPoolName == "" { + c.log.Debug(fmt.Sprintf("[shouldAddPVC] PVC %s was found in the cache of the LVMVolumeGroup %s", pvcKey, lvgName)) + return false, nil + } + + // if pvc is thin + if thinPoolName != "" { + tp, found := entry.thinByPool[thinPoolName] + if !found || tp == nil { + c.log.Debug(fmt.Sprintf("[shouldAddPVC] Thin pool %s was not found in the cache, PVC %s should be added", thinPoolName, pvcKey)) + return true, nil + } + + if _, found = tp.pvcs[pvcKey]; found { + c.log.Debug(fmt.Sprintf("[shouldAddPVC] PVC %s was found in the Thin pool %s cache of the LVMVolumeGroup %s. No need to add", pvcKey, thinPoolName, lvgName)) + return false, nil + } + } + } + + return true, nil +} + +func (c *Cache) AddThinPVC(lvgName, thinPoolName string, pvc *corev1.PersistentVolumeClaim) error { + if pvc.Status.Phase == corev1.ClaimBound { + c.log.Warning(fmt.Sprintf("[AddThinPVC] PVC %s/%s has status phase BOUND. It will not be added to the cache", pvc.Namespace, pvc.Name)) + return nil + } + + pvcKey := configurePVCKey(pvc) + + c.mtx.Lock() + defer c.mtx.Unlock() + + entry, found := c.lvgByName[lvgName] + if !found || entry == nil { + err := fmt.Errorf("the LVMVolumeGroup %s was not found in the cache", lvgName) + c.log.Error(err, fmt.Sprintf("[AddThinPVC] an error occurred while trying to add PVC %s to the cache", pvcKey)) + return err + } + + thinPoolBelongs := lvgHasThinPool(entry.lvg, thinPoolName) + if !thinPoolBelongs { + err := fmt.Errorf("thin pool %s was not found in the LVMVolumeGroup %s", thinPoolName, lvgName) + c.log.Error(err, fmt.Sprintf("[AddThinPVC] unable to add Thin pool %s of the LVMVolumeGroup %s for the PVC %s", thinPoolName, lvgName, pvcKey)) + return err + } + + // this case might be triggered if the extender recovers after fail and finds some pending thin PVCs with selected nodes + c.log.Trace(fmt.Sprintf("[AddThinPVC] PVC %s/%s annotations: %v", pvc.Namespace, pvc.Name, pvc.Annotations)) + shouldAdd, err := c.shouldAddPVC(pvc, entry, pvcKey, lvgName, thinPoolName) + if err != nil { + return err + } + + if !shouldAdd { + c.log.Debug(fmt.Sprintf("[AddThinPVC] PVC %s should not be added", pvcKey)) + return nil + } + + c.log.Debug(fmt.Sprintf("[AddThinPVC] new PVC %s cache will be added to the LVMVolumeGroup %s", pvcKey, lvgName)) + err = c.addNewThinPVC(entry, pvc, thinPoolName) + if err != nil { + c.log.Error(err, fmt.Sprintf("[AddThinPVC] unable to add PVC %s to Thin Pool %s of the LVMVolumeGroup %s", pvcKey, thinPoolName, lvgName)) + return err + } + + return nil +} + +func (c *Cache) addNewThickPVC(lvgCh *lvgEntry, pvc *corev1.PersistentVolumeClaim) { + pvcKey := configurePVCKey(pvc) + lvgCh.thickByPVC[pvcKey] = &pvcEntry{pvc: pvc, selectedNode: pvc.Annotations[SelectedNodeAnnotation]} +} + +func (c *Cache) addNewThinPVC(lvgCh *lvgEntry, pvc *corev1.PersistentVolumeClaim, thinPoolName string) error { + pvcKey := configurePVCKey(pvc) + + err := c.addThinPoolIfNotExists(lvgCh, thinPoolName) + if err != nil { + c.log.Error(err, fmt.Sprintf("[addNewThinPVC] unable to add Thin pool %s in the LVMVolumeGroup %s cache for PVC %s", thinPoolName, lvgCh.lvg.Name, pvc.Name)) + return err + } + + thinPoolCh := lvgCh.thinByPool[thinPoolName] + if thinPoolCh == nil { + err = fmt.Errorf("thin pool %s not found", thinPoolName) + c.log.Error(err, fmt.Sprintf("[addNewThinPVC] unable to add Thin PVC %s to the cache", pvcKey)) + return err + } + + thinPoolCh.pvcs[pvcKey] = &pvcEntry{pvc: pvc, selectedNode: pvc.Annotations[SelectedNodeAnnotation]} + c.log.Debug(fmt.Sprintf("[addNewThinPVC] THIN PVC %s was added to the cache to Thin Pool %s", pvcKey, thinPoolName)) + return nil +} + +// UpdateThickPVC updates selected PVC in selected LVMVolumeGroup resource. If no such PVC is stored in the cache, adds it. +func (c *Cache) UpdateThickPVC(lvgName string, pvc *corev1.PersistentVolumeClaim) error { + pvcKey := configurePVCKey(pvc) + + c.mtx.Lock() + defer c.mtx.Unlock() + + entry, found := c.lvgByName[lvgName] + if !found || entry == nil { + return fmt.Errorf("the LVMVolumeGroup %s was not found in the cache", lvgName) + } + + pvcCh, found := entry.thickByPVC[pvcKey] + if !found || pvcCh == nil { + c.log.Warning(fmt.Sprintf("[UpdateThickPVC] PVC %s was not found in the cache for the LVMVolumeGroup %s. It will be added", pvcKey, lvgName)) + c.addNewThickPVC(entry, pvc) + return nil + } + + pvcCh.pvc = pvc + pvcCh.selectedNode = pvc.Annotations[SelectedNodeAnnotation] + c.log.Debug(fmt.Sprintf("[UpdateThickPVC] successfully updated PVC %s with selected node %s in the cache for LVMVolumeGroup %s", pvcKey, pvc.Annotations[SelectedNodeAnnotation], lvgName)) + + return nil +} + +func (c *Cache) UpdateThinPVC(lvgName, thinPoolName string, pvc *corev1.PersistentVolumeClaim) error { + pvcKey := configurePVCKey(pvc) + + c.mtx.Lock() + defer c.mtx.Unlock() + + entry, found := c.lvgByName[lvgName] + if !found || entry == nil { + return fmt.Errorf("the LVMVolumeGroup %s was not found in the cache", lvgName) + } + + thinPoolCh, found := entry.thinByPool[thinPoolName] + if !found || thinPoolCh == nil { + c.log.Debug(fmt.Sprintf("[UpdateThinPVC] Thin Pool %s was not found in the LVMVolumeGroup %s, add it.", thinPoolName, lvgName)) + err := c.addThinPoolIfNotExists(entry, thinPoolName) + if err != nil { + return err + } + thinPoolCh = entry.thinByPool[thinPoolName] + } + + pvcCh, found := thinPoolCh.pvcs[pvcKey] + if !found || pvcCh == nil { + c.log.Warning(fmt.Sprintf("[UpdateThinPVC] Thin PVC %s was not found in Thin pool %s in the cache for the LVMVolumeGroup %s. It will be added", pvcKey, thinPoolName, lvgName)) + err := c.addNewThinPVC(entry, pvc, thinPoolName) + if err != nil { + c.log.Error(err, fmt.Sprintf("[UpdateThinPVC] an error occurred while trying to update the PVC %s", pvcKey)) + return err + } + return nil + } + + pvcCh.pvc = pvc + pvcCh.selectedNode = pvc.Annotations[SelectedNodeAnnotation] + c.log.Debug(fmt.Sprintf("[UpdateThinPVC] successfully updated THIN PVC %s with selected node %s in the cache for LVMVolumeGroup %s", pvcKey, pvc.Annotations[SelectedNodeAnnotation], lvgName)) + + return nil +} + +func (c *Cache) addThinPoolIfNotExists(lvgCh *lvgEntry, thinPoolName string) error { + if len(thinPoolName) == 0 { + err := errors.New("no thin pool name specified") + c.log.Error(err, fmt.Sprintf("[addThinPoolIfNotExists] unable to add thin pool in the LVMVolumeGroup %s", lvgCh.lvg.Name)) + return err + } + + if _, found := lvgCh.thinByPool[thinPoolName]; found { + c.log.Debug(fmt.Sprintf("[addThinPoolIfNotExists] Thin pool %s is already created in the LVMVolumeGroup %s. No need to add a new one", thinPoolName, lvgCh.lvg.Name)) + return nil + } + + lvgCh.thinByPool[thinPoolName] = &thinPoolEntry{ + pvcs: make(map[string]*pvcEntry), + volumes: make(map[string]*volumeEntry), + } + return nil +} + +// GetAllPVCForLVG returns slice of PVC belonging to selected LVMVolumeGroup resource. If such LVMVolumeGroup is not stored in the cache, returns an error. +func (c *Cache) GetAllPVCForLVG(lvgName string) ([]*corev1.PersistentVolumeClaim, error) { + c.mtx.RLock() + defer c.mtx.RUnlock() + + lvgCh, found := c.lvgByName[lvgName] + if !found || lvgCh == nil { + err := fmt.Errorf("cache was not found for the LVMVolumeGroup %s", lvgName) + c.log.Error(err, fmt.Sprintf("[GetAllPVCForLVG] an error occurred while trying to get all PVC for the LVMVolumeGroup %s", lvgName)) + return nil, err + } + + result := make([]*corev1.PersistentVolumeClaim, 0, pvcPerLVGCount) + // collect Thick PVC for the LVG + for _, pe := range lvgCh.thickByPVC { + result = append(result, pe.pvc) + } + + // collect Thin PVC for the LVG + for _, tp := range lvgCh.thinByPool { + for _, pe := range tp.pvcs { + result = append(result, pe.pvc) + } + } + + return result, nil +} + +// GetAllThickPVCLVG returns slice of PVC belonging to selected LVMVolumeGroup resource. If such LVMVolumeGroup is not stored in the cache, returns an error. +func (c *Cache) GetAllThickPVCLVG(lvgName string) ([]*corev1.PersistentVolumeClaim, error) { + c.mtx.RLock() + defer c.mtx.RUnlock() + + lvgCh, found := c.lvgByName[lvgName] + if !found || lvgCh == nil { + err := fmt.Errorf("cache was not found for the LVMVolumeGroup %s", lvgName) + c.log.Error(err, fmt.Sprintf("[GetAllPVCForLVG] an error occurred while trying to get all PVC for the LVMVolumeGroup %s", lvgName)) + return nil, err + } + + result := make([]*corev1.PersistentVolumeClaim, 0, pvcPerLVGCount) + // collect Thick PVC for the LVG + for _, pe := range lvgCh.thickByPVC { + result = append(result, pe.pvc) + } + + return result, nil +} + +// GetAllPVCFromLVGThinPool returns slice of PVC belonging to selected LVMVolumeGroup resource. If such LVMVolumeGroup is not stored in the cache, returns an error. +func (c *Cache) GetAllPVCFromLVGThinPool(lvgName, thinPoolName string) ([]*corev1.PersistentVolumeClaim, error) { + c.mtx.RLock() + defer c.mtx.RUnlock() + + lvgCh, found := c.lvgByName[lvgName] + if !found || lvgCh == nil { + err := fmt.Errorf("cache was not found for the LVMVolumeGroup %s", lvgName) + c.log.Error(err, fmt.Sprintf("[GetAllPVCFromLVGThinPool] an error occurred while trying to get all PVC for the LVMVolumeGroup %s", lvgName)) + return nil, err + } + + thinPoolCh, found := lvgCh.thinByPool[thinPoolName] + if !found || thinPoolCh == nil { + c.log.Debug(fmt.Sprintf("[GetAllPVCFromLVGThinPool] no Thin pool %s in the LVMVolumeGroup %s was found. Returns nil slice", thinPoolName, lvgName)) + return nil, nil + } + + result := make([]*corev1.PersistentVolumeClaim, 0, pvcPerLVGCount) + for _, pe := range thinPoolCh.pvcs { + result = append(result, pe.pvc) + } + + return result, nil +} + +// GetLVGNamesForPVC returns a slice of LVMVolumeGroup resources names, where selected PVC has been stored in. If no such LVMVolumeGroup found, returns empty slice. +func (c *Cache) GetLVGNamesForPVC(pvc *corev1.PersistentVolumeClaim) []string { + pvcKey := configurePVCKey(pvc) + c.mtx.RLock() + defer c.mtx.RUnlock() + var result []string + for lvgName, entry := range c.lvgByName { + if _, ok := entry.thickByPVC[pvcKey]; ok { + result = append(result, lvgName) + continue + } + for _, tp := range entry.thinByPool { + if _, ok := tp.pvcs[pvcKey]; ok { + result = append(result, lvgName) + break + } + } + } + if len(result) == 0 { + c.log.Warning(fmt.Sprintf("[GetLVGNamesForPVC] no cached LVMVolumeGroups were found for PVC %s", pvcKey)) + return nil + } + return result +} + +// CheckIsPVCStored checks if selected PVC has been already stored in the cache. +func (c *Cache) CheckIsPVCStored(pvc *corev1.PersistentVolumeClaim) bool { + pvcKey := configurePVCKey(pvc) + c.mtx.RLock() + defer c.mtx.RUnlock() + for _, entry := range c.lvgByName { + if _, ok := entry.thickByPVC[pvcKey]; ok { + return true + } + for _, tp := range entry.thinByPool { + if _, ok := tp.pvcs[pvcKey]; ok { + return true + } + } + } + return false +} + +// RemoveSpaceReservationForPVCWithSelectedNode removes space reservation for selected PVC for every LVMVolumeGroup resource, which is not bound to the PVC selected node. +func (c *Cache) RemoveSpaceReservationForPVCWithSelectedNode(pvc *corev1.PersistentVolumeClaim, deviceType string) error { + pvcKey := configurePVCKey(pvc) + // the LVG which is used to store PVC + selectedLVGName := "" + + c.mtx.Lock() + defer c.mtx.Unlock() + + // Build list of LVG names for this PVC on the fly + lvgNamesForPVC := make([]string, 0, lvgsPerPVCDefaultCount) + for lvgName, entry := range c.lvgByName { + switch deviceType { + case consts.Thin: + for _, tp := range entry.thinByPool { + if _, ok := tp.pvcs[pvcKey]; ok { + lvgNamesForPVC = append(lvgNamesForPVC, lvgName) + break + } + } + case consts.Thick: + if _, ok := entry.thickByPVC[pvcKey]; ok { + lvgNamesForPVC = append(lvgNamesForPVC, lvgName) + } + } + } + if len(lvgNamesForPVC) == 0 { + c.log.Debug(fmt.Sprintf("[RemoveSpaceReservationForPVCWithSelectedNode] cache for PVC %s has been already removed", pvcKey)) + return nil + } + + for _, lvgName := range lvgNamesForPVC { + entry, found := c.lvgByName[lvgName] + if !found || entry == nil { + err := fmt.Errorf("no cache found for the LVMVolumeGroup %s", lvgName) + c.log.Error(err, fmt.Sprintf("[RemoveSpaceReservationForPVCWithSelectedNode] an error occurred while trying to remove space reservation for PVC %s", pvcKey)) + return err + } + + switch deviceType { + case consts.Thin: + for thinPoolName, thinPoolCh := range entry.thinByPool { + pvcCh, found := thinPoolCh.pvcs[pvcKey] + if !found { + c.log.Debug(fmt.Sprintf("[RemoveSpaceReservationForPVCWithSelectedNode] PVC %s space reservation in the LVMVolumeGroup %s has been already removed", pvcKey, lvgName)) + continue + } + + selectedNode := pvcCh.selectedNode + if selectedNode == "" { + delete(thinPoolCh.pvcs, pvcKey) + c.log.Debug(fmt.Sprintf("[RemoveSpaceReservationForPVCWithSelectedNode] removed space reservation for PVC %s in the Thin pool %s of the LVMVolumeGroup %s due the PVC got selected to the node %s", pvcKey, thinPoolName, lvgName, pvc.Annotations[SelectedNodeAnnotation])) + } else { + selectedLVGName = lvgName + c.log.Debug(fmt.Sprintf("[RemoveSpaceReservationForPVCWithSelectedNode] PVC %s got selected to the node %s. It should not be revomed from the LVMVolumeGroup %s", pvcKey, pvc.Annotations[SelectedNodeAnnotation], lvgName)) + } + } + case consts.Thick: + pvcCh, found := entry.thickByPVC[pvcKey] + if !found { + c.log.Debug(fmt.Sprintf("[RemoveSpaceReservationForPVCWithSelectedNode] PVC %s space reservation in the LVMVolumeGroup %s has been already removed", pvcKey, lvgName)) + continue + } + + selectedNode := pvcCh.selectedNode + if selectedNode == "" { + delete(entry.thickByPVC, pvcKey) + c.log.Debug(fmt.Sprintf("[RemoveSpaceReservationForPVCWithSelectedNode] removed space reservation for PVC %s in the LVMVolumeGroup %s due the PVC got selected to the node %s", pvcKey, lvgName, pvc.Annotations[SelectedNodeAnnotation])) + } else { + selectedLVGName = lvgName + c.log.Debug(fmt.Sprintf("[RemoveSpaceReservationForPVCWithSelectedNode] PVC %s got selected to the node %s. It should not be revomed from the LVMVolumeGroup %s", pvcKey, pvc.Annotations[SelectedNodeAnnotation], lvgName)) + } + } + } + c.log.Debug(fmt.Sprintf("[RemoveSpaceReservationForPVCWithSelectedNode] PVC %s space reservation has been removed from LVMVolumeGroup cache", pvcKey)) + + c.log.Debug(fmt.Sprintf("[RemoveSpaceReservationForPVCWithSelectedNode] cache for PVC %s will be wiped from unused LVMVolumeGroups", pvcKey)) + cleared := make([]string, 0, len(lvgNamesForPVC)) + for _, lvgName := range lvgNamesForPVC { + if lvgName == selectedLVGName { + c.log.Debug(fmt.Sprintf("[RemoveSpaceReservationForPVCWithSelectedNode] the LVMVolumeGroup %s will be saved for PVC %s cache as used", lvgName, pvcKey)) + } else { + c.log.Debug(fmt.Sprintf("[RemoveSpaceReservationForPVCWithSelectedNode] the LVMVolumeGroup %s will be removed from PVC %s cache as not used", lvgName, pvcKey)) + cleared = append(cleared, lvgName) + } + } + c.log.Trace(fmt.Sprintf("[RemoveSpaceReservationForPVCWithSelectedNode] cleared LVMVolumeGroups for PVC %s: %v", pvcKey, cleared)) + + return nil +} + +// RemovePVCFromTheCache completely removes selected PVC in the cache. +func (c *Cache) RemovePVCFromTheCache(pvc *corev1.PersistentVolumeClaim) { + pvcKey := configurePVCKey(pvc) + + c.log.Debug(fmt.Sprintf("[RemovePVCFromTheCache] run full cache wipe for PVC %s", pvcKey)) + c.mtx.Lock() + defer c.mtx.Unlock() + for _, entry := range c.lvgByName { + // Remove PVC entry + delete(entry.thickByPVC, pvcKey) + for _, tp := range entry.thinByPool { + delete(tp.pvcs, pvcKey) + } + } +} + +// AddThickVolume reserves space for a thick volume in the specified LVG. +// If volume already exists, updates its size and creation time. +func (c *Cache) AddThickVolume(lvgName string, volumeName string, size int64) error { + c.mtx.Lock() + defer c.mtx.Unlock() + + entry, found := c.lvgByName[lvgName] + if !found || entry == nil { + return fmt.Errorf("the LVMVolumeGroup %s was not found in the cache", lvgName) + } + + if existingVolume, exists := entry.thickByVolume[volumeName]; exists { + existingVolume.size = size + existingVolume.createdAt = time.Now() + c.log.Debug(fmt.Sprintf("[AddThickVolume] volume %s already exists in LVG %s, updated size to %d and reset creation time", volumeName, lvgName, size)) + } else { + entry.thickByVolume[volumeName] = &volumeEntry{ + size: size, + createdAt: time.Now(), + } + c.log.Debug(fmt.Sprintf("[AddThickVolume] volume %s with size %d added to LVG %s", volumeName, size, lvgName)) + } + return nil +} + +// AddThinVolume reserves space for a thin volume in the specified LVG and thin pool. +// If volume already exists, updates its size and creation time. +func (c *Cache) AddThinVolume(lvgName string, thinPoolName string, volumeName string, size int64) error { + c.mtx.Lock() + defer c.mtx.Unlock() + + entry, found := c.lvgByName[lvgName] + if !found || entry == nil { + return fmt.Errorf("the LVMVolumeGroup %s was not found in the cache", lvgName) + } + + err := c.addThinPoolIfNotExists(entry, thinPoolName) + if err != nil { + return err + } + + thinPoolCh := entry.thinByPool[thinPoolName] + if thinPoolCh == nil { + return fmt.Errorf("thin pool %s not found", thinPoolName) + } + + if existingVolume, exists := thinPoolCh.volumes[volumeName]; exists { + existingVolume.size = size + existingVolume.createdAt = time.Now() + c.log.Debug(fmt.Sprintf("[AddThinVolume] volume %s already exists in LVG %s Thin Pool %s, updated size to %d and reset creation time", volumeName, lvgName, thinPoolName, size)) + } else { + thinPoolCh.volumes[volumeName] = &volumeEntry{ + size: size, + createdAt: time.Now(), + } + c.log.Debug(fmt.Sprintf("[AddThinVolume] volume %s with size %d added to LVG %s Thin Pool %s", volumeName, size, lvgName, thinPoolName)) + } + return nil +} + +// RemoveThickVolume removes volume reservation for a thick volume +func (c *Cache) RemoveThickVolume(lvgName string, volumeName string) { + c.mtx.Lock() + defer c.mtx.Unlock() + + entry, found := c.lvgByName[lvgName] + if !found || entry == nil { + c.log.Debug(fmt.Sprintf("[RemoveThickVolume] LVG %s not found in cache", lvgName)) + return + } + + if _, exists := entry.thickByVolume[volumeName]; exists { + delete(entry.thickByVolume, volumeName) + c.log.Debug(fmt.Sprintf("[RemoveThickVolume] volume %s removed from LVG %s", volumeName, lvgName)) + } +} + +// RemoveThinVolume removes volume reservation for a thin volume +func (c *Cache) RemoveThinVolume(lvgName string, thinPoolName string, volumeName string) { + c.mtx.Lock() + defer c.mtx.Unlock() + + entry, found := c.lvgByName[lvgName] + if !found || entry == nil { + c.log.Debug(fmt.Sprintf("[RemoveThinVolume] LVG %s not found in cache", lvgName)) + return + } + + tp, found := entry.thinByPool[thinPoolName] + if !found || tp == nil { + c.log.Debug(fmt.Sprintf("[RemoveThinVolume] Thin pool %s not found in LVG %s", thinPoolName, lvgName)) + return + } + + if _, exists := tp.volumes[volumeName]; exists { + delete(tp.volumes, volumeName) + c.log.Debug(fmt.Sprintf("[RemoveThinVolume] volume %s removed from LVG %s Thin Pool %s", volumeName, lvgName, thinPoolName)) + } +} + +// FindLVGForPVCBySelectedNode finds a suitable LVMVolumeGroup resource's name for selected PVC based on selected node. If no such LVMVolumeGroup found, returns empty string. +func (c *Cache) FindLVGForPVCBySelectedNode(pvc *corev1.PersistentVolumeClaim, nodeName string) string { + pvcKey := configurePVCKey(pvc) + + c.mtx.RLock() + defer c.mtx.RUnlock() + + // Build a list of LVGs that contain the PVC + var lvgsForPVC []string + for lvgName, entry := range c.lvgByName { + if _, ok := entry.thickByPVC[pvcKey]; ok { + lvgsForPVC = append(lvgsForPVC, lvgName) + continue + } + for _, tp := range entry.thinByPool { + if _, ok := tp.pvcs[pvcKey]; ok { + lvgsForPVC = append(lvgsForPVC, lvgName) + break + } + } + } + if len(lvgsForPVC) == 0 { + c.log.Debug(fmt.Sprintf("[FindLVGForPVCBySelectedNode] no LVMVolumeGroups were found in the cache for PVC %s. Returns empty string", pvcKey)) + return "" + } + + // Build a set of LVGs that belong to node + nodeLVGs := make(map[string]struct{}, lvgsPerNodeCount) + for lvgName, entry := range c.lvgByName { + if lvgHasNode(entry.lvg, nodeName) { + nodeLVGs[lvgName] = struct{}{} + } + } + if len(nodeLVGs) == 0 { + c.log.Debug(fmt.Sprintf("[FindLVGForPVCBySelectedNode] no LVMVolumeGroups were found in the cache for the node %s. Returns empty string", nodeName)) + return "" + } + + var targetLVG string + for _, lvgName := range lvgsForPVC { + if _, ok := nodeLVGs[lvgName]; ok { + targetLVG = lvgName + break + } + } + + if targetLVG == "" { + c.log.Debug(fmt.Sprintf("[FindLVGForPVCBySelectedNode] no LVMVolumeGroup was found for PVC %s. Returns empty string", pvcKey)) + } + + return targetLVG +} + +// PrintTheCacheLog prints the logs with cache state. +func (c *Cache) PrintTheCacheLog() { + c.log.Cache("*******************CACHE BEGIN*******************") + c.log.Cache("[LVMVolumeGroups BEGIN]") + c.mtx.RLock() + for lvgName, lvgCh := range c.lvgByName { + c.log.Cache(fmt.Sprintf("[%s]", lvgName)) + + for pvcName, pvcCh := range lvgCh.thickByPVC { + c.log.Cache(fmt.Sprintf(" THICK PVC %s, selected node: %s", pvcName, pvcCh.selectedNode)) + } + + for thinPoolName, thinPoolCh := range lvgCh.thinByPool { + for pvcName, pvcCh := range thinPoolCh.pvcs { + c.log.Cache(fmt.Sprintf(" THIN POOL %s PVC %s, selected node: %s", thinPoolName, pvcName, pvcCh.selectedNode)) + } + } + } + c.mtx.RUnlock() + c.log.Cache("[LVMVolumeGroups ENDS]") + + c.log.Cache("[PVC and LVG BEGINS]") + // Build pvc -> lvgs mapping for printing purposes + c.mtx.RLock() + pvcToLvgs := make(map[string][]string) + for lvgName, entry := range c.lvgByName { + for pvcKey := range entry.thickByPVC { + pvcToLvgs[pvcKey] = append(pvcToLvgs[pvcKey], lvgName) + } + for _, tp := range entry.thinByPool { + for pvcKey := range tp.pvcs { + // deduplicate on append + if !containsString(pvcToLvgs[pvcKey], lvgName) { + pvcToLvgs[pvcKey] = append(pvcToLvgs[pvcKey], lvgName) + } + } + } + } + for pvcName, lvgs := range pvcToLvgs { + c.log.Cache(fmt.Sprintf("[PVC: %s]", pvcName)) + for _, lvgName := range lvgs { + c.log.Cache(fmt.Sprintf(" LVMVolumeGroup: %s", lvgName)) + } + } + c.mtx.RUnlock() + c.log.Cache("[PVC and LVG ENDS]") + + c.log.Cache("[Node and LVG BEGINS]") + // Build node -> lvgs mapping + c.mtx.RLock() + nodeToLvgs := make(map[string][]string) + for lvgName, entry := range c.lvgByName { + for _, n := range entry.lvg.Status.Nodes { + if !containsString(nodeToLvgs[n.Name], lvgName) { + nodeToLvgs[n.Name] = append(nodeToLvgs[n.Name], lvgName) + } + } + } + for nodeName, lvgs := range nodeToLvgs { + c.log.Cache(fmt.Sprintf("[Node: %s]", nodeName)) + for _, lvgName := range lvgs { + c.log.Cache(fmt.Sprintf(" LVMVolumeGroup name: %s", lvgName)) + } + } + c.mtx.RUnlock() + c.log.Cache("[Node and LVG ENDS]") + c.log.Cache("*******************CACHE END*******************") +} + +func configurePVCKey(pvc *corev1.PersistentVolumeClaim) string { + return fmt.Sprintf("%s/%s", pvc.Namespace, pvc.Name) +} + +func lvgHasNode(lvg *snc.LVMVolumeGroup, nodeName string) bool { + for _, n := range lvg.Status.Nodes { + if n.Name == nodeName { + return true + } + } + return false +} + +func lvgHasThinPool(lvg *snc.LVMVolumeGroup, thinPoolName string) bool { + for _, tp := range lvg.Status.ThinPools { + if tp.Name == thinPoolName { + return true + } + } + return false +} + +func containsString(slice []string, val string) bool { + for _, s := range slice { + if strings.EqualFold(s, val) { + return true + } + } + return false +} diff --git a/images/sds-common-scheduler-extender/pkg/cache/cache_test.go b/images/sds-common-scheduler-extender/pkg/cache/cache_test.go new file mode 100644 index 000000000..686c5b0e3 --- /dev/null +++ b/images/sds-common-scheduler-extender/pkg/cache/cache_test.go @@ -0,0 +1,314 @@ +/* +Copyright 2025 Flant JSC + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package cache + +import ( + "fmt" + "sync" + "testing" + "time" + + "github.com/stretchr/testify/assert" + corev1 "k8s.io/api/core/v1" + "k8s.io/apimachinery/pkg/api/resource" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + + snc "github.com/deckhouse/sds-node-configurator/api/v1alpha1" + "github.com/deckhouse/sds-node-configurator/images/sds-common-scheduler-extender/pkg/logger" +) + +func TestCache_ClearBoundExpiredPVC(t *testing.T) { + log := logger.Logger{} + const ( + lvgName = "lvg-name" + tpName = "thin-pool" + thickBoundExpiredPVC = "thick-bound-expired-pvc" + thickPendingExpiredPVC = "thick-pending-expired-pvc" + thickBoundFreshPVC = "thick-bound-not-expired-pvc" + thinBoundExpiredPVC = "thin-bound-expired-pvc" + thinPendingExpiredPVC = "thin-pending-expired-pvc" + thinBoundFreshPVC = "thin-bound-not-expired-pvc" + ) + + ch := NewCache(log, DefaultPVCExpiredDurationSec) + expiredTime := time.Now().Add((-DefaultPVCExpiredDurationSec - 1) * time.Second) + + // Seed internals directly to simulate already-reserved entries with various phases. + ch.mtx.Lock() + ch.lvgByName[lvgName] = &lvgEntry{ + lvg: &snc.LVMVolumeGroup{ + ObjectMeta: metav1.ObjectMeta{Name: lvgName}, + }, + thickByPVC: make(map[string]*pvcEntry), + thinByPool: map[string]*thinPoolEntry{ + tpName: {pvcs: make(map[string]*pvcEntry)}, + }, + } + // Thick + ch.lvgByName[lvgName].thickByPVC["/"+thickBoundExpiredPVC] = &pvcEntry{ + pvc: &corev1.PersistentVolumeClaim{ + ObjectMeta: metav1.ObjectMeta{Name: thickBoundExpiredPVC, CreationTimestamp: metav1.NewTime(expiredTime)}, + Status: corev1.PersistentVolumeClaimStatus{Phase: corev1.ClaimBound}, + }, + } + ch.lvgByName[lvgName].thickByPVC["/"+thickPendingExpiredPVC] = &pvcEntry{ + pvc: &corev1.PersistentVolumeClaim{ + ObjectMeta: metav1.ObjectMeta{Name: thickPendingExpiredPVC, CreationTimestamp: metav1.NewTime(expiredTime)}, + Status: corev1.PersistentVolumeClaimStatus{Phase: corev1.ClaimPending}, + }, + } + ch.lvgByName[lvgName].thickByPVC["/"+thickBoundFreshPVC] = &pvcEntry{ + pvc: &corev1.PersistentVolumeClaim{ + ObjectMeta: metav1.ObjectMeta{Name: thickBoundFreshPVC, CreationTimestamp: metav1.NewTime(time.Now())}, + Status: corev1.PersistentVolumeClaimStatus{Phase: corev1.ClaimBound}, + }, + } + // Thin + ch.lvgByName[lvgName].thinByPool[tpName].pvcs["/"+thinBoundExpiredPVC] = &pvcEntry{ + pvc: &corev1.PersistentVolumeClaim{ + ObjectMeta: metav1.ObjectMeta{Name: thinBoundExpiredPVC, CreationTimestamp: metav1.NewTime(expiredTime)}, + Status: corev1.PersistentVolumeClaimStatus{Phase: corev1.ClaimBound}, + }, + } + ch.lvgByName[lvgName].thinByPool[tpName].pvcs["/"+thinPendingExpiredPVC] = &pvcEntry{ + pvc: &corev1.PersistentVolumeClaim{ + ObjectMeta: metav1.ObjectMeta{Name: thinPendingExpiredPVC, CreationTimestamp: metav1.NewTime(expiredTime)}, + Status: corev1.PersistentVolumeClaimStatus{Phase: corev1.ClaimPending}, + }, + } + ch.lvgByName[lvgName].thinByPool[tpName].pvcs["/"+thinBoundFreshPVC] = &pvcEntry{ + pvc: &corev1.PersistentVolumeClaim{ + ObjectMeta: metav1.ObjectMeta{Name: thinBoundFreshPVC, CreationTimestamp: metav1.NewTime(time.Now())}, + Status: corev1.PersistentVolumeClaimStatus{Phase: corev1.ClaimBound}, + }, + } + ch.mtx.Unlock() + + ch.clearBoundExpiredPVC() + + ch.mtx.RLock() + defer ch.mtx.RUnlock() + // Thick assertions + _, found := ch.lvgByName[lvgName].thickByPVC["/"+thickBoundExpiredPVC] + assert.False(t, found) + _, found = ch.lvgByName[lvgName].thickByPVC["/"+thickPendingExpiredPVC] + assert.True(t, found) + _, found = ch.lvgByName[lvgName].thickByPVC["/"+thickBoundFreshPVC] + assert.True(t, found) + // Thin assertions + _, found = ch.lvgByName[lvgName].thinByPool[tpName].pvcs["/"+thinBoundExpiredPVC] + assert.False(t, found) + _, found = ch.lvgByName[lvgName].thinByPool[tpName].pvcs["/"+thinPendingExpiredPVC] + assert.True(t, found) + _, found = ch.lvgByName[lvgName].thinByPool[tpName].pvcs["/"+thinBoundFreshPVC] + assert.True(t, found) +} + +func TestCache_ReservedSpace_PublicAPI(t *testing.T) { + log := logger.Logger{} + ch := NewCache(log, DefaultPVCExpiredDurationSec) + lvg := &snc.LVMVolumeGroup{ + ObjectMeta: metav1.ObjectMeta{Name: "lvg-1"}, + Status: snc.LVMVolumeGroupStatus{ + ThinPools: []snc.LVMVolumeGroupThinPoolStatus{{Name: "tp-1"}}, + }, + } + ch.AddLVG(lvg) + + pvc1 := &corev1.PersistentVolumeClaim{ + ObjectMeta: metav1.ObjectMeta{Name: "pvc-1", Namespace: "ns"}, + Spec: corev1.PersistentVolumeClaimSpec{ + Resources: corev1.VolumeResourceRequirements{ + Requests: corev1.ResourceList{corev1.ResourceStorage: *resource.NewQuantity(1<<20, resource.BinarySI)}, + }, + }, + Status: corev1.PersistentVolumeClaimStatus{Phase: corev1.ClaimPending}, + } + pvc2 := pvc1.DeepCopy() + pvc2.Name = "pvc-2" + pvc2.Spec.Resources.Requests[corev1.ResourceStorage] = *resource.NewQuantity(2<<20, resource.BinarySI) + + assert.NoError(t, ch.AddThickPVC(lvg.Name, pvc1)) + assert.NoError(t, ch.AddThickPVC(lvg.Name, pvc2)) + sum, err := ch.GetLVGThickReservedSpace(lvg.Name) + assert.NoError(t, err) + assert.Equal(t, int64((1<<20)+(2<<20)), sum) + + // Thin + pvc3 := pvc1.DeepCopy() + pvc3.Name = "pvc-3" + pvc3.Spec.Resources.Requests[corev1.ResourceStorage] = *resource.NewQuantity(3<<20, resource.BinarySI) + assert.NoError(t, ch.AddThinPVC(lvg.Name, "tp-1", pvc1)) + assert.NoError(t, ch.AddThinPVC(lvg.Name, "tp-1", pvc3)) + thinSum, err := ch.GetLVGThinReservedSpace(lvg.Name, "tp-1") + assert.NoError(t, err) + assert.Equal(t, int64((1<<20)+(3<<20)), thinSum) +} + +func TestCache_UpdateLVG(t *testing.T) { + cache := NewCache(logger.Logger{}, DefaultPVCExpiredDurationSec) + name := "test-lvg" + lvg := &snc.LVMVolumeGroup{ + ObjectMeta: metav1.ObjectMeta{ + Name: name, + }, + Status: snc.LVMVolumeGroupStatus{ + AllocatedSize: resource.MustParse("1Gi"), + }, + } + cache.AddLVG(lvg) + + newLVG := &snc.LVMVolumeGroup{ + ObjectMeta: metav1.ObjectMeta{ + Name: name, + }, + Status: snc.LVMVolumeGroupStatus{ + AllocatedSize: resource.MustParse("2Gi"), + }, + } + + err := cache.UpdateLVG(newLVG) + if err != nil { + t.Error(err) + } + + updatedLvg := cache.TryGetLVG(name) + assert.Equal(t, newLVG.Status.AllocatedSize, updatedLvg.Status.AllocatedSize) +} + +// Concurrency/race-oriented tests per Go race detector guidance: +// https://go.dev/doc/articles/race_detector +func TestCache_Race_AddUpdateRead(t *testing.T) { + log := logger.Logger{} + ch := NewCache(log, DefaultPVCExpiredDurationSec) + + lvg := &snc.LVMVolumeGroup{ + ObjectMeta: metav1.ObjectMeta{Name: "lvg-race"}, + Status: snc.LVMVolumeGroupStatus{ + Nodes: []snc.LVMVolumeGroupNode{{Name: "node-1"}}, + ThinPools: []snc.LVMVolumeGroupThinPoolStatus{{Name: "tp-1"}}, + }, + } + ch.AddLVG(lvg) + + var wg sync.WaitGroup + start := make(chan struct{}) + + // Writers: add/update PVCs + for i := 0; i < 20; i++ { + wg.Add(1) + go func(i int) { + defer wg.Done() + <-start + pvc := &corev1.PersistentVolumeClaim{ + ObjectMeta: metav1.ObjectMeta{ + Name: fmt.Sprintf("pvc-%d", i), + Namespace: "ns", + }, + Spec: corev1.PersistentVolumeClaimSpec{ + Resources: corev1.VolumeResourceRequirements{ + Requests: corev1.ResourceList{corev1.ResourceStorage: *resource.NewQuantity(int64(1<<20+i), resource.BinarySI)}, + }, + }, + Status: corev1.PersistentVolumeClaimStatus{Phase: corev1.ClaimPending}, + } + _ = ch.AddThickPVC(lvg.Name, pvc) + _ = ch.AddThinPVC(lvg.Name, "tp-1", pvc) + + // Update with selected node annotation + pvc.Annotations = map[string]string{SelectedNodeAnnotation: "node-1"} + _ = ch.UpdateThickPVC(lvg.Name, pvc) + _ = ch.UpdateThinPVC(lvg.Name, "tp-1", pvc) + }(i) + } + + // Readers: query functions + for i := 0; i < 20; i++ { + wg.Add(1) + go func() { + defer wg.Done() + <-start + _ = ch.GetAllLVG() + _ = ch.GetLVGNamesByNodeName("node-1") + _, _ = ch.GetAllPVCForLVG(lvg.Name) + _, _ = ch.GetLVGThickReservedSpace(lvg.Name) + _, _ = ch.GetLVGThinReservedSpace(lvg.Name, "tp-1") + }() + } + + // Removers + for i := 0; i < 10; i++ { + wg.Add(1) + go func(i int) { + defer wg.Done() + <-start + pvc := &corev1.PersistentVolumeClaim{ + ObjectMeta: metav1.ObjectMeta{Name: fmt.Sprintf("pvc-%d", i), Namespace: "ns"}, + } + ch.RemovePVCFromTheCache(pvc) + }(i) + } + + close(start) + wg.Wait() +} + +func TestCache_Race_AddDeleteLVG_GetAll(t *testing.T) { + log := logger.Logger{} + ch := NewCache(log, DefaultPVCExpiredDurationSec) + + var wg sync.WaitGroup + start := make(chan struct{}) + + // Add/Update LVGs + for i := 0; i < 20; i++ { + wg.Add(1) + go func(i int) { + defer wg.Done() + <-start + lvg := &snc.LVMVolumeGroup{ + ObjectMeta: metav1.ObjectMeta{Name: fmt.Sprintf("lvg-%d", i)}, + } + ch.AddLVG(lvg) + lvg.Status.AllocatedSize = resource.MustParse(fmt.Sprintf("%dGi", 1+i)) + _ = ch.UpdateLVG(lvg) + }(i) + } + + // Readers + for i := 0; i < 20; i++ { + wg.Add(1) + go func() { + defer wg.Done() + <-start + _ = ch.GetAllLVG() + }() + } + + // Deleters + for i := 0; i < 10; i++ { + wg.Add(1) + go func(i int) { + defer wg.Done() + <-start + ch.DeleteLVG(fmt.Sprintf("lvg-%d", i)) + }(i) + } + + close(start) + wg.Wait() +} diff --git a/images/sds-common-scheduler-extender/pkg/consts/consts.go b/images/sds-common-scheduler-extender/pkg/consts/consts.go new file mode 100644 index 000000000..8af1a990d --- /dev/null +++ b/images/sds-common-scheduler-extender/pkg/consts/consts.go @@ -0,0 +1,46 @@ +/* +Copyright 2025 Flant JSC + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package consts + +const ( + SdsLocalVolumeProvisioner = "local.csi.storage.deckhouse.io" + SdsReplicatedVolumeProvisioner = "replicated.csi.storage.deckhouse.io" + + LvmTypeParamKey = "local.csi.storage.deckhouse.io/lvm-type" + LVMVolumeGroupsParamKey = "local.csi.storage.deckhouse.io/lvm-volume-groups" + + Thick = "Thick" + Thin = "Thin" + + // ReplicatedStorageClass VolumeAccess modes + VolumeAccessLocal = "Local" + VolumeAccessEventuallyLocal = "EventuallyLocal" + VolumeAccessPreferablyLocal = "PreferablyLocal" + VolumeAccessAny = "Any" + + // ReplicatedStorageClass Topology modes + TopologyTransZonal = "TransZonal" + TopologyZonal = "Zonal" + TopologyIgnored = "Ignored" + + // ReplicatedStoragePool Types + RSPTypeLVM = "LVM" // Thick volumes + RSPTypeLVMThin = "LVMThin" // Thin volumes + + // Labels for replicated volumes + LabelReplicatedNode = "storage.deckhouse.io/sds-replicated-volume-node" +) diff --git a/images/sds-common-scheduler-extender/pkg/controller/lvg_watcher_cache.go b/images/sds-common-scheduler-extender/pkg/controller/lvg_watcher_cache.go new file mode 100644 index 000000000..71785f2d4 --- /dev/null +++ b/images/sds-common-scheduler-extender/pkg/controller/lvg_watcher_cache.go @@ -0,0 +1,147 @@ +/* +Copyright 2025 Flant JSC + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package controller + +import ( + "context" + "fmt" + "reflect" + + "k8s.io/client-go/util/workqueue" + "sigs.k8s.io/controller-runtime/pkg/controller" + "sigs.k8s.io/controller-runtime/pkg/event" + "sigs.k8s.io/controller-runtime/pkg/handler" + "sigs.k8s.io/controller-runtime/pkg/manager" + "sigs.k8s.io/controller-runtime/pkg/reconcile" + "sigs.k8s.io/controller-runtime/pkg/source" + + snc "github.com/deckhouse/sds-node-configurator/api/v1alpha1" + "github.com/deckhouse/sds-node-configurator/images/sds-common-scheduler-extender/pkg/cache" + "github.com/deckhouse/sds-node-configurator/images/sds-common-scheduler-extender/pkg/logger" +) + +const ( + LVGWatcherCacheCtrlName = "lvg-watcher-cache-controller" +) + +func RunLVGWatcherCacheController( + mgr manager.Manager, + log logger.Logger, + cache *cache.Cache, +) (controller.Controller, error) { + log.Info("[RunLVGWatcherCacheController] starts the work") + + c, err := controller.New(LVGWatcherCacheCtrlName, mgr, controller.Options{ + Reconciler: reconcile.Func(func(_ context.Context, _ reconcile.Request) (reconcile.Result, error) { + return reconcile.Result{}, nil + }), + }) + if err != nil { + log.Error(err, "[RunCacheWatcherController] unable to create a controller") + return nil, err + } + + err = c.Watch(source.Kind(mgr.GetCache(), &snc.LVMVolumeGroup{}, handler.TypedFuncs[*snc.LVMVolumeGroup, reconcile.Request]{ + CreateFunc: func(_ context.Context, e event.TypedCreateEvent[*snc.LVMVolumeGroup], _ workqueue.TypedRateLimitingInterface[reconcile.Request]) { + log.Info(fmt.Sprintf("[RunLVGWatcherCacheController] CreateFunc starts the cache reconciliation for the LVMVolumeGroup %s", e.Object.GetName())) + + lvg := e.Object + if lvg.DeletionTimestamp != nil { + log.Info(fmt.Sprintf("[RunLVGWatcherCacheController] the LVMVolumeGroup %s should not be reconciled", lvg.Name)) + return + } + + log.Debug(fmt.Sprintf("[RunLVGWatcherCacheController] tries to get the LVMVolumeGroup %s from the cache", lvg.Name)) + existedLVG := cache.TryGetLVG(lvg.Name) + if existedLVG != nil { + log.Debug(fmt.Sprintf("[RunLVGWatcherCacheController] the LVMVolumeGroup %s was found in the cache. It will be updated", lvg.Name)) + err := cache.UpdateLVG(lvg) + if err != nil { + log.Error(err, fmt.Sprintf("[RunLVGWatcherCacheController] unable to update the LVMVolumeGroup %s in the cache", lvg.Name)) + } else { + log.Info(fmt.Sprintf("[RunLVGWatcherCacheController] cache was updated for the LVMVolumeGroup %s", lvg.Name)) + } + } else { + log.Debug(fmt.Sprintf("[RunLVGWatcherCacheController] the LVMVolumeGroup %s was not found. It will be added to the cache", lvg.Name)) + cache.AddLVG(lvg) + log.Info(fmt.Sprintf("[RunLVGWatcherCacheController] cache was added for the LVMVolumeGroup %s", lvg.Name)) + } + + err = cache.ClearBoundPVCsFromLVG(lvg.Name) + if err != nil { + log.Error(err, fmt.Sprintf("[RunLVGWatcherCacheController] unable to clear bound PVCs for the LVMVolumeGroup %s", lvg.Name)) + } + + log.Info(fmt.Sprintf("[RunLVGWatcherCacheController] cache for the LVMVolumeGroup %s was reconciled by CreateFunc", lvg.Name)) + }, + UpdateFunc: func(_ context.Context, e event.TypedUpdateEvent[*snc.LVMVolumeGroup], _ workqueue.TypedRateLimitingInterface[reconcile.Request]) { + log.Info(fmt.Sprintf("[RunCacheWatcherController] UpdateFunc starts the cache reconciliation for the LVMVolumeGroup %s", e.ObjectNew.GetName())) + oldLvg := e.ObjectOld + newLvg := e.ObjectNew + err := cache.UpdateLVG(newLvg) + if err != nil { + log.Error(err, fmt.Sprintf("[RunLVGWatcherCacheController] unable to update the LVMVolumeGroup %s cache", newLvg.Name)) + return + } + log.Debug(fmt.Sprintf("[RunLVGWatcherCacheController] successfully updated the LVMVolumeGroup %s in the cache", newLvg.Name)) + + log.Debug(fmt.Sprintf("[RunLVGWatcherCacheController] starts to calculate the size difference for LVMVolumeGroup %s", newLvg.Name)) + log.Trace(fmt.Sprintf("[RunLVGWatcherCacheController] old state LVMVolumeGroup %s has size %s", oldLvg.Name, oldLvg.Status.AllocatedSize.String())) + log.Trace(fmt.Sprintf("[RunLVGWatcherCacheController] new state LVMVolumeGroup %s has size %s", newLvg.Name, newLvg.Status.AllocatedSize.String())) + + if !shouldReconcileLVG(oldLvg, newLvg) { + log.Debug(fmt.Sprintf("[RunLVGWatcherCacheController] the LVMVolumeGroup %s should not be reconciled", newLvg.Name)) + return + } + log.Debug(fmt.Sprintf("[RunLVGWatcherCacheController] the LVMVolumeGroup %s should be reconciled by Update Func", newLvg.Name)) + + err = cache.ClearBoundPVCsFromLVG(newLvg.Name) + if err != nil { + log.Error(err, fmt.Sprintf("[RunLVGWatcherCacheController] unable to clear bound PVCs for the LVMVolumeGroup %s", newLvg.Name)) + } + + log.Debug(fmt.Sprintf("[RunLVGWatcherCacheController] Update Func ends reconciliation the LVMVolumeGroup %s cache", newLvg.Name)) + }, + DeleteFunc: func(_ context.Context, e event.TypedDeleteEvent[*snc.LVMVolumeGroup], _ workqueue.TypedRateLimitingInterface[reconcile.Request]) { + log.Info(fmt.Sprintf("[RunCacheWatcherController] DeleteFunc starts the cache reconciliation for the LVMVolumeGroup %s", e.Object.GetName())) + lvg := e.Object + cache.DeleteLVG(lvg.Name) + log.Debug(fmt.Sprintf("[RunLVGWatcherCacheController] LVMVolumeGroup %s was deleted from the cache", lvg.Name)) + }, + }, + ), + ) + if err != nil { + log.Error(err, "[RunCacheWatcherController] unable to watch the events") + return nil, err + } + + return c, nil +} + +func shouldReconcileLVG(oldLVG, newLVG *snc.LVMVolumeGroup) bool { + if newLVG.DeletionTimestamp != nil { + return false + } + + if oldLVG.Status.AllocatedSize.Value() == newLVG.Status.AllocatedSize.Value() && + reflect.DeepEqual(oldLVG.Status.ThinPools, newLVG.Status.ThinPools) { + return false + } + + return true +} diff --git a/images/sds-common-scheduler-extender/pkg/controller/lvg_watcher_cache_test.go b/images/sds-common-scheduler-extender/pkg/controller/lvg_watcher_cache_test.go new file mode 100644 index 000000000..c1558b3b9 --- /dev/null +++ b/images/sds-common-scheduler-extender/pkg/controller/lvg_watcher_cache_test.go @@ -0,0 +1,131 @@ +/* +Copyright 2025 Flant JSC + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package controller + +import ( + "testing" + + "github.com/stretchr/testify/assert" + "k8s.io/apimachinery/pkg/api/resource" + v1 "k8s.io/apimachinery/pkg/apis/meta/v1" + + snc "github.com/deckhouse/sds-node-configurator/api/v1alpha1" +) + +func TestLVGWatcherCache(t *testing.T) { + t.Run("shouldReconcileLVG", func(t *testing.T) { + t.Run("deletion_timestamp_not_nil_returns_false", func(t *testing.T) { + lvg := &snc.LVMVolumeGroup{} + lvg.DeletionTimestamp = &v1.Time{} + + assert.False(t, shouldReconcileLVG(&snc.LVMVolumeGroup{}, lvg)) + }) + + t.Run("allocated_size_and_status_thin_pools_equal_returns_false", func(t *testing.T) { + size := resource.MustParse("1G") + thinPools := []snc.LVMVolumeGroupThinPoolStatus{ + { + Name: "thin", + ActualSize: resource.MustParse("1G"), + }, + } + oldLvg := &snc.LVMVolumeGroup{ + ObjectMeta: v1.ObjectMeta{ + Name: "first", + }, + Status: snc.LVMVolumeGroupStatus{ + AllocatedSize: size, + ThinPools: thinPools, + }, + } + newLvg := &snc.LVMVolumeGroup{ + ObjectMeta: v1.ObjectMeta{ + Name: "first", + }, + Status: snc.LVMVolumeGroupStatus{ + AllocatedSize: size, + ThinPools: thinPools, + }, + } + + assert.False(t, shouldReconcileLVG(oldLvg, newLvg)) + }) + + t.Run("allocated_size_not_equal_returns_true", func(t *testing.T) { + thinPools := []snc.LVMVolumeGroupThinPoolStatus{ + { + Name: "thin", + ActualSize: resource.MustParse("1G"), + }, + } + oldLvg := &snc.LVMVolumeGroup{ + ObjectMeta: v1.ObjectMeta{ + Name: "first", + }, + Status: snc.LVMVolumeGroupStatus{ + AllocatedSize: resource.MustParse("1G"), + ThinPools: thinPools, + }, + } + newLvg := &snc.LVMVolumeGroup{ + ObjectMeta: v1.ObjectMeta{ + Name: "first", + }, + Status: snc.LVMVolumeGroupStatus{ + AllocatedSize: resource.MustParse("2G"), + ThinPools: thinPools, + }, + } + + assert.True(t, shouldReconcileLVG(oldLvg, newLvg)) + }) + + t.Run("status_thin_pools_not_equal_returns_false", func(t *testing.T) { + size := resource.MustParse("1G") + oldLvg := &snc.LVMVolumeGroup{ + ObjectMeta: v1.ObjectMeta{ + Name: "first", + }, + Status: snc.LVMVolumeGroupStatus{ + AllocatedSize: size, + ThinPools: []snc.LVMVolumeGroupThinPoolStatus{ + { + Name: "thin", + ActualSize: resource.MustParse("1G"), + }, + }, + }, + } + newLvg := &snc.LVMVolumeGroup{ + ObjectMeta: v1.ObjectMeta{ + Name: "first", + }, + Status: snc.LVMVolumeGroupStatus{ + AllocatedSize: size, + ThinPools: []snc.LVMVolumeGroupThinPoolStatus{ + { + Name: "thin", + ActualSize: resource.MustParse("2G"), + }, + }, + }, + } + + assert.True(t, shouldReconcileLVG(oldLvg, newLvg)) + }) + }) +} diff --git a/images/sds-common-scheduler-extender/pkg/controller/pvc_watcher_cache.go b/images/sds-common-scheduler-extender/pkg/controller/pvc_watcher_cache.go new file mode 100644 index 000000000..3fe6e7be7 --- /dev/null +++ b/images/sds-common-scheduler-extender/pkg/controller/pvc_watcher_cache.go @@ -0,0 +1,215 @@ +/* +Copyright 2025 Flant JSC + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package controller + +import ( + "context" + "errors" + "fmt" + + corev1 "k8s.io/api/core/v1" + storagev1 "k8s.io/api/storage/v1" + "k8s.io/client-go/util/workqueue" + "k8s.io/utils/strings/slices" + "sigs.k8s.io/controller-runtime/pkg/client" + "sigs.k8s.io/controller-runtime/pkg/controller" + "sigs.k8s.io/controller-runtime/pkg/event" + "sigs.k8s.io/controller-runtime/pkg/handler" + "sigs.k8s.io/controller-runtime/pkg/manager" + "sigs.k8s.io/controller-runtime/pkg/reconcile" + "sigs.k8s.io/controller-runtime/pkg/source" + + "github.com/deckhouse/sds-node-configurator/images/sds-common-scheduler-extender/pkg/cache" + "github.com/deckhouse/sds-node-configurator/images/sds-common-scheduler-extender/pkg/consts" + "github.com/deckhouse/sds-node-configurator/images/sds-common-scheduler-extender/pkg/logger" + "github.com/deckhouse/sds-node-configurator/images/sds-common-scheduler-extender/pkg/scheduler" +) + +const ( + PVCWatcherCacheCtrlName = "pvc-watcher-cache-controller" +) + +func RunPVCWatcherCacheController( + mgr manager.Manager, + log logger.Logger, + schedulerCache *cache.Cache, +) error { + log.Info("[RunPVCWatcherCacheController] starts the work") + + c, err := controller.New(PVCWatcherCacheCtrlName, mgr, controller.Options{ + Reconciler: reconcile.Func(func(_ context.Context, _ reconcile.Request) (reconcile.Result, error) { + return reconcile.Result{}, nil + }), + }) + if err != nil { + log.Error(err, "[RunPVCWatcherCacheController] unable to create controller") + return err + } + + err = c.Watch(source.Kind(mgr.GetCache(), &corev1.PersistentVolumeClaim{}, handler.TypedFuncs[*corev1.PersistentVolumeClaim, reconcile.Request]{ + CreateFunc: func(ctx context.Context, e event.TypedCreateEvent[*corev1.PersistentVolumeClaim], _ workqueue.TypedRateLimitingInterface[reconcile.Request]) { + log.Info("[RunPVCWatcherCacheController] CreateFunc reconciliation starts") + pvc := e.Object + log.Debug(fmt.Sprintf("[RunPVCWatcherCacheController] CreateFunc starts the reconciliation for the PVC %s/%s", pvc.Namespace, pvc.Name)) + + if pvc.Annotations == nil { + log.Debug(fmt.Sprintf("[RunPVCWatcherCacheController] PVC %s/%s should not be reconciled by CreateFunc due to annotations is nil", pvc.Namespace, pvc.Name)) + return + } + + selectedNodeName, wasSelected := pvc.Annotations[cache.SelectedNodeAnnotation] + if !wasSelected || pvc.Status.Phase == corev1.ClaimBound || pvc.DeletionTimestamp != nil { + log.Debug(fmt.Sprintf("[RunPVCWatcherCacheController] PVC %s/%s should not be reconciled by CreateFunc due to no selected node annotation found or deletion timestamp is not nil or status phase is bound", pvc.Namespace, pvc.Name)) + return + } + log.Debug(fmt.Sprintf("[RunPVCWatcherCacheController] PVC %s/%s has selected node annotation, it will be reconciled in CreateFunc", pvc.Namespace, pvc.Name)) + log.Trace(fmt.Sprintf("[RunPVCWatcherCacheController] PVC %s/%s has been selected to the node %s", pvc.Namespace, pvc.Name, selectedNodeName)) + + reconcilePVC(ctx, mgr, log, schedulerCache, pvc, selectedNodeName) + log.Info("[RunPVCWatcherCacheController] CreateFunc reconciliation ends") + }, + UpdateFunc: func(ctx context.Context, e event.TypedUpdateEvent[*corev1.PersistentVolumeClaim], _ workqueue.TypedRateLimitingInterface[reconcile.Request]) { + log.Info("[RunPVCWatcherCacheController] Update Func reconciliation starts") + pvc := e.ObjectNew + log.Debug(fmt.Sprintf("[RunPVCWatcherCacheController] UpdateFunc starts the reconciliation for the PVC %s/%s", pvc.Namespace, pvc.Name)) + + if pvc.Annotations == nil { + log.Debug(fmt.Sprintf("[RunPVCWatcherCacheController] PVC %s/%s should not be reconciled by UpdateFunc due to annotations is nil", pvc.Namespace, pvc.Name)) + return + } + + selectedNodeName, wasSelected := pvc.Annotations[cache.SelectedNodeAnnotation] + if !wasSelected || pvc.DeletionTimestamp != nil { + log.Debug(fmt.Sprintf("[RunPVCWatcherCacheController] PVC %s/%s should not be reconciled by UpdateFunc due to no selected node annotation found or deletion timestamp is not nil", pvc.Namespace, pvc.Name)) + return + } + log.Debug(fmt.Sprintf("[RunPVCWatcherCacheController] PVC %s/%s has selected node annotation, it will be reconciled in UpdateFunc", pvc.Namespace, pvc.Name)) + log.Trace(fmt.Sprintf("[RunPVCWatcherCacheController] PVC %s/%s has been selected to the node %s", pvc.Namespace, pvc.Name, selectedNodeName)) + + reconcilePVC(ctx, mgr, log, schedulerCache, pvc, selectedNodeName) + log.Info("[RunPVCWatcherCacheController] Update Func reconciliation ends") + }, + DeleteFunc: func(_ context.Context, e event.TypedDeleteEvent[*corev1.PersistentVolumeClaim], _ workqueue.TypedRateLimitingInterface[reconcile.Request]) { + log.Info("[RunPVCWatcherCacheController] Delete Func reconciliation starts") + pvc := e.Object + log.Debug(fmt.Sprintf("[RunPVCWatcherCacheController] DeleteFunc starts the reconciliation for the PVC %s/%s", pvc.Namespace, pvc.Name)) + + log.Debug(fmt.Sprintf("[RunPVCWatcherCacheController] PVC %s/%s was removed from the cluster. It will be fully removed from the cache", pvc.Namespace, pvc.Name)) + schedulerCache.RemovePVCFromTheCache(pvc) + log.Debug(fmt.Sprintf("[RunPVCWatcherCacheController] successfully fully removed PVC %s/%s from the cache", pvc.Namespace, pvc.Name)) + }, + }, + ), + ) + if err != nil { + log.Error(err, "[RunPVCWatcherCacheController] unable to controller Watch") + return err + } + + return nil +} + +func reconcilePVC(ctx context.Context, mgr manager.Manager, log logger.Logger, schedulerCache *cache.Cache, pvc *corev1.PersistentVolumeClaim, selectedNodeName string) { + sc := &storagev1.StorageClass{} + err := mgr.GetClient().Get(ctx, client.ObjectKey{ + Name: *pvc.Spec.StorageClassName, + }, sc) + if err != nil { + log.Error(err, fmt.Sprintf("[reconcilePVC] unable to get Storage Class %s for PVC %s/%s", *pvc.Spec.StorageClassName, pvc.Namespace, pvc.Name)) + return + } + + if sc.Provisioner != consts.SdsLocalVolumeProvisioner { + log.Debug(fmt.Sprintf("[reconcilePVC] Storage Class %s for PVC %s/%s is not managed by sds-local-volume-provisioner. Ends the reconciliation", sc.Name, pvc.Namespace, pvc.Name)) + return + } + + log.Debug(fmt.Sprintf("[reconcilePVC] tries to extract LVGs from the Storage Class %s for PVC %s/%s", sc.Name, pvc.Namespace, pvc.Name)) + lvgsFromSc, err := scheduler.ExtractLVGsFromSC(sc) + if err != nil { + log.Error(err, fmt.Sprintf("[reconcilePVC] unable to extract LVMVolumeGroups from the Storage Class %s", sc.Name)) + } + log.Debug(fmt.Sprintf("[reconcilePVC] successfully extracted LVGs from the Storage Class %s for PVC %s/%s", sc.Name, pvc.Namespace, pvc.Name)) + + lvgsForPVC := schedulerCache.GetLVGNamesForPVC(pvc) + if len(lvgsForPVC) == 0 { + log.Debug(fmt.Sprintf("[reconcilePVC] no LVMVolumeGroups were found in the cache for PVC %s/%s. Use Storage Class %s instead", pvc.Namespace, pvc.Name, *pvc.Spec.StorageClassName)) + + for _, lvg := range lvgsFromSc { + lvgsForPVC = append(lvgsForPVC, lvg.Name) + } + } + for _, lvgName := range lvgsForPVC { + log.Trace(fmt.Sprintf("[reconcilePVC] LVMVolumeGroup %s belongs to PVC %s/%s", lvgName, pvc.Namespace, pvc.Name)) + } + + log.Debug(fmt.Sprintf("[reconcilePVC] starts to find common LVMVolumeGroup for the selected node %s and PVC %s/%s", selectedNodeName, pvc.Namespace, pvc.Name)) + lvgsOnTheNode := schedulerCache.GetLVGNamesByNodeName(selectedNodeName) + for _, lvgName := range lvgsOnTheNode { + log.Trace(fmt.Sprintf("[reconcilePVC] LVMVolumeGroup %s belongs to the node %s", lvgName, selectedNodeName)) + } + + var commonLVGName string + for _, pvcLvg := range lvgsForPVC { + if slices.Contains(lvgsOnTheNode, pvcLvg) { + commonLVGName = pvcLvg + break + } + } + if commonLVGName == "" { + log.Error(errors.New("common LVMVolumeGroup was not found"), fmt.Sprintf("[reconcilePVC] unable to identify a LVMVolumeGroup for PVC %s/%s", pvc.Namespace, pvc.Name)) + return + } + + log.Debug(fmt.Sprintf("[reconcilePVC] successfully found common LVMVolumeGroup %s for the selected node %s and PVC %s/%s", commonLVGName, selectedNodeName, pvc.Namespace, pvc.Name)) + log.Debug(fmt.Sprintf("[reconcilePVC] starts to update PVC %s/%s in the cache", pvc.Namespace, pvc.Name)) + + log.Trace(fmt.Sprintf("[reconcilePVC] %s PVC %s/%s has status phase: %s", sc.Parameters[consts.LvmTypeParamKey], pvc.Namespace, pvc.Name, pvc.Status.Phase)) + switch sc.Parameters[consts.LvmTypeParamKey] { + case consts.Thick: + err = schedulerCache.UpdateThickPVC(commonLVGName, pvc) + if err != nil { + log.Error(err, fmt.Sprintf("[reconcilePVC] unable to update Thick PVC %s/%s in the cache", pvc.Namespace, pvc.Name)) + return + } + case consts.Thin: + for _, lvg := range lvgsFromSc { + if lvg.Name == commonLVGName { + err = schedulerCache.UpdateThinPVC(commonLVGName, lvg.Thin.PoolName, pvc) + if err != nil { + log.Error(err, fmt.Sprintf("[reconcilePVC] unable to update Thin PVC %s/%s in the cache", pvc.Namespace, pvc.Name)) + return + } + break + } + } + } + log.Debug(fmt.Sprintf("[reconcilePVC] successfully updated %s PVC %s/%s in the cache", sc.Parameters[consts.LvmTypeParamKey], pvc.Namespace, pvc.Name)) + + log.Cache(fmt.Sprintf("[reconcilePVC] cache state BEFORE the removal space reservation for PVC %s/%s", pvc.Namespace, pvc.Name)) + schedulerCache.PrintTheCacheLog() + log.Debug(fmt.Sprintf("[reconcilePVC] starts to remove space reservation for PVC %s/%s with selected node from the cache", pvc.Namespace, pvc.Name)) + err = schedulerCache.RemoveSpaceReservationForPVCWithSelectedNode(pvc, sc.Parameters[consts.LvmTypeParamKey]) + if err != nil { + log.Error(err, fmt.Sprintf("[reconcilePVC] unable to remove PVC %s/%s space reservation in the cache", pvc.Namespace, pvc.Name)) + return + } + log.Debug(fmt.Sprintf("[reconcilePVC] successfully removed space reservation for PVC %s/%s with selected node", pvc.Namespace, pvc.Name)) + + log.Cache(fmt.Sprintf("[reconcilePVC] cache state AFTER the removal space reservation for PVC %s/%s", pvc.Namespace, pvc.Name)) + schedulerCache.PrintTheCacheLog() +} diff --git a/images/sds-common-scheduler-extender/pkg/kubutils/kubernetes.go b/images/sds-common-scheduler-extender/pkg/kubutils/kubernetes.go new file mode 100644 index 000000000..f5bba2747 --- /dev/null +++ b/images/sds-common-scheduler-extender/pkg/kubutils/kubernetes.go @@ -0,0 +1,38 @@ +/* +Copyright 2025 Flant JSC + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package kubutils + +import ( + "fmt" + + "k8s.io/client-go/rest" + "k8s.io/client-go/tools/clientcmd" +) + +func KubernetesDefaultConfigCreate() (*rest.Config, error) { + clientConfig := clientcmd.NewNonInteractiveDeferredLoadingClientConfig( + clientcmd.NewDefaultClientConfigLoadingRules(), + &clientcmd.ConfigOverrides{}, + ) + + // Get a config to talk to API server + config, err := clientConfig.ClientConfig() + if err != nil { + return nil, fmt.Errorf("config kubernetes error %w", err) + } + return config, nil +} diff --git a/images/sds-common-scheduler-extender/pkg/logger/logger.go b/images/sds-common-scheduler-extender/pkg/logger/logger.go new file mode 100644 index 000000000..158367906 --- /dev/null +++ b/images/sds-common-scheduler-extender/pkg/logger/logger.go @@ -0,0 +1,104 @@ +/* +Copyright 2025 Flant JSC + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package logger + +import ( + "strconv" + + "github.com/go-logr/logr" + "k8s.io/klog/v2/textlogger" +) + +const ( + ErrorLevel Verbosity = "0" + WarningLevel Verbosity = "1" + InfoLevel Verbosity = "2" + DebugLevel Verbosity = "3" + TraceLevel Verbosity = "4" + CacheLevel Verbosity = "5" +) + +const ( + warnLvl = iota + 1 + infoLvl + debugLvl + traceLvl + cacheLvl +) + +type ( + Verbosity string +) + +type Logger struct { + log logr.Logger +} + +func NewLogger(level Verbosity) (Logger, error) { + v, err := strconv.Atoi(string(level)) + if err != nil { + return Logger{}, err + } + + log := textlogger.NewLogger(textlogger.NewConfig(textlogger.Verbosity(v))).WithCallDepth(1) + + return Logger{log: log}, nil +} + +func NewLoggerWrap(log logr.Logger) Logger { + return Logger{log: log} +} + +// WithName creates a new Logger instance with an additional name component. +// The name is used to identify the source of log messages. +func (l Logger) WithName(name string) Logger { + return NewLoggerWrap(l.GetLogger().WithName(name)) +} + +// WithValues creates a new Logger instance with additional key-value pairs. +// These key-value pairs will be included in all subsequent log messages from this logger. +func (l Logger) WithValues(keysAndValues ...any) Logger { + return NewLoggerWrap(l.GetLogger().WithValues(keysAndValues...)) +} + +func (l Logger) GetLogger() logr.Logger { + return l.log +} + +func (l Logger) Error(err error, message string, keysAndValues ...interface{}) { + l.log.WithValues("level", "ERROR").Error(err, message, keysAndValues...) +} + +func (l Logger) Warning(message string, keysAndValues ...interface{}) { + l.log.V(warnLvl).WithValues("level", "WARNING").Info(message, keysAndValues...) +} + +func (l Logger) Info(message string, keysAndValues ...interface{}) { + l.log.V(infoLvl).WithValues("level", "INFO").Info(message, keysAndValues...) +} + +func (l Logger) Debug(message string, keysAndValues ...interface{}) { + l.log.V(debugLvl).WithValues("level", "DEBUG").Info(message, keysAndValues...) +} + +func (l Logger) Trace(message string, keysAndValues ...interface{}) { + l.log.V(traceLvl).WithValues("level", "TRACE").Info(message, keysAndValues...) +} + +func (l Logger) Cache(message string, keysAndValues ...interface{}) { + l.log.V(cacheLvl).WithValues("level", "CACHE").Info(message, keysAndValues...) +} diff --git a/images/sds-common-scheduler-extender/pkg/logger/traceid.go b/images/sds-common-scheduler-extender/pkg/logger/traceid.go new file mode 100644 index 000000000..074562c02 --- /dev/null +++ b/images/sds-common-scheduler-extender/pkg/logger/traceid.go @@ -0,0 +1,68 @@ +/* +Copyright 2025 Flant JSC + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package logger + +import ( + "context" + "crypto/rand" + "encoding/hex" + "fmt" + "time" +) + +type traceIDKey struct{} + +const traceIDKeyName = "traceid" + +// GenerateTraceID generates a unique trace ID. +func GenerateTraceID() string { + b := make([]byte, 8) + if _, err := rand.Read(b); err != nil { + // Fallback: use current time in nanoseconds as ID if random generation fails + return fmt.Sprintf("%d", time.Now().UnixNano()) + } + return hex.EncodeToString(b) +} + +// WithTraceID adds a trace ID to the context. +func WithTraceID(ctx context.Context, traceID string) context.Context { + return context.WithValue(ctx, traceIDKey{}, traceID) +} + +// TraceIDFromContext extracts the trace ID from the context. +// Returns empty string if trace ID is not found. +func TraceIDFromContext(ctx context.Context) string { + if ctx == nil { + return "" + } + traceID, ok := ctx.Value(traceIDKey{}).(string) + if !ok { + return "" + } + return traceID +} + +// WithTraceIDLogger returns a logger with trace ID from context added as a value. +// If trace ID is not found in context, returns the original logger. +func WithTraceIDLogger(ctx context.Context, log Logger) Logger { + traceID := TraceIDFromContext(ctx) + if traceID == "" { + return log + } + return log.WithValues(traceIDKeyName, traceID) +} + diff --git a/images/sds-common-scheduler-extender/pkg/scheduler/api.go b/images/sds-common-scheduler-extender/pkg/scheduler/api.go new file mode 100644 index 000000000..5e16be2f9 --- /dev/null +++ b/images/sds-common-scheduler-extender/pkg/scheduler/api.go @@ -0,0 +1,93 @@ +/* +Copyright 2025 Flant JSC + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package scheduler + +import ( + apiv1 "k8s.io/api/core/v1" +) + +// ExtenderArgs is copied from https://godoc.org/k8s.io/kubernetes/pkg/scheduler/api/v1#ExtenderArgs +type ExtenderArgs struct { + // Pod being scheduled + Pod *apiv1.Pod `json:"pod"` + // List of candidate nodes where the pod can be scheduled; to be populated + // only if ExtenderConfig.NodeCacheCapable == false + Nodes *apiv1.NodeList `json:"nodes,omitempty"` + // List of candidate node names where the pod can be scheduled; to be + // populated only if ExtenderConfig.NodeCacheCapable == true + NodeNames *[]string `json:"nodenames,omitempty"` +} + +// HostPriority is copied from https://godoc.org/k8s.io/kubernetes/pkg/scheduler/api/v1#HostPriority +type HostPriority struct { + // Name of the host + Host string `json:"host"` + // Score associated with the host + Score int `json:"score"` +} + +// HostPriorityList is copied from https://godoc.org/k8s.io/kubernetes/pkg/scheduler/api/v1#HostPriorityList +type HostPriorityList []HostPriority + +// ExtenderFilterResult is copied from https://godoc.org/k8s.io/kubernetes/pkg/scheduler/api/v1#ExtenderFilterResult +type ExtenderFilterResult struct { + // Filtered set of nodes where the pod can be scheduled; to be populated + // only if ExtenderConfig.NodeCacheCapable == false + Nodes *apiv1.NodeList `json:"nodes,omitempty"` + // Filtered set of nodes where the pod can be scheduled; to be populated + // only if ExtenderConfig.NodeCacheCapable == true + NodeNames *[]string `json:"nodenames,omitempty"` + // Filtered out nodes where the pod can't be scheduled and the failure messages + FailedNodes FailedNodesMap `json:"failedNodes,omitempty"` + // Error message indicating failure + Error string `json:"error,omitempty"` +} + +// FailedNodesMap is copied from https://godoc.org/k8s.io/kubernetes/pkg/scheduler/api/v1#FailedNodesMap +type FailedNodesMap map[string]string + +// FilterPrioritizeRequest is the request structure for the filter-prioritize endpoint +type FilterPrioritizeRequest struct { + LVGs []LVGInput `json:"lvgs"` + Volume VolumeInput `json:"volume"` +} + +// LVGInput represents an LVG input in the filter-prioritize request +type LVGInput struct { + Name string `json:"name"` + ThinPoolName string `json:"thinPoolName,omitempty"` // required for thin volumes, can be empty for thick +} + +// VolumeInput represents volume information in the filter-prioritize request +type VolumeInput struct { + Name string `json:"name"` // volume name (used for reservation) + Size int64 `json:"size"` // size in bytes + Type string `json:"type"` // "thin" or "thick" +} + +// FilterPrioritizeResponse is the response structure for the filter-prioritize endpoint +type FilterPrioritizeResponse struct { + LVGs []LVGScore `json:"lvgs"` + Error string `json:"error,omitempty"` +} + +// LVGScore represents a scored LVG in the filter-prioritize response +type LVGScore struct { + Name string `json:"name"` + Score int `json:"score"` + // thinPoolName is not needed in response, as client knows it from request +} diff --git a/images/sds-common-scheduler-extender/pkg/scheduler/filter.go b/images/sds-common-scheduler-extender/pkg/scheduler/filter.go new file mode 100644 index 000000000..b7cc0227c --- /dev/null +++ b/images/sds-common-scheduler-extender/pkg/scheduler/filter.go @@ -0,0 +1,521 @@ +/* +Copyright 2025 Flant JSC + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package scheduler + +import ( + "context" + "encoding/json" + "errors" + "fmt" + "net/http" + "strings" + "sync" + + corev1 "k8s.io/api/core/v1" + storagev1 "k8s.io/api/storage/v1" + "k8s.io/apimachinery/pkg/api/resource" + "k8s.io/utils/strings/slices" + "sigs.k8s.io/controller-runtime/pkg/client" + + snc "github.com/deckhouse/sds-node-configurator/api/v1alpha1" + "github.com/deckhouse/sds-node-configurator/images/sds-common-scheduler-extender/pkg/cache" + "github.com/deckhouse/sds-node-configurator/images/sds-common-scheduler-extender/pkg/consts" + "github.com/deckhouse/sds-node-configurator/images/sds-common-scheduler-extender/pkg/logger" +) + +func (s *scheduler) filter(w http.ResponseWriter, r *http.Request) { + servingLog := logger.WithTraceIDLogger(r.Context(), s.log).WithName("filter") + + servingLog.Debug("starts the serving the request") + + var inputData ExtenderArgs + reader := http.MaxBytesReader(w, r.Body, 10<<20) // 10MB + err := json.NewDecoder(reader).Decode(&inputData) + if err != nil { + servingLog.Error(err, "unable to decode a request") + http.Error(w, "internal server error", http.StatusInternalServerError) + return + } + servingLog.Trace(fmt.Sprintf("input data: %+v", inputData)) + + if inputData.Pod == nil { + servingLog.Error(errors.New("no pod in the request"), "unable to get a Pod from the request") + http.Error(w, "bad request", http.StatusBadRequest) + return + } + + servingLog = servingLog.WithValues("Pod", fmt.Sprintf("%s/%s", inputData.Pod.Namespace, inputData.Pod.Name)) + + nodeNames, err := getNodeNames(inputData) + if err != nil { + servingLog.Error(err, "unable to get node names from the request") + http.Error(w, "bad request", http.StatusBadRequest) + return + } + servingLog.Trace(fmt.Sprintf("NodeNames from the request: %+v", nodeNames)) + + managedPVCs, err := getManagedPVCsFromPod(s.ctx, s.client, servingLog, inputData.Pod, s.targetProvisioners) + if err != nil { + servingLog.Error(err, "unable to get managed PVCs from the Pod") + http.Error(w, "internal server error", http.StatusInternalServerError) + return + } + if len(managedPVCs) == 0 { + servingLog.Debug("Pod uses PVCs which are not managed by our modules. Return the same nodes") + if err := writeNodeNamesResponse(w, servingLog, nodeNames); err != nil { + servingLog.Error(err, "unable to write node names response") + http.Error(w, "internal server error", http.StatusInternalServerError) + } + return + } + for _, pvc := range managedPVCs { + servingLog.Trace(fmt.Sprintf("managed PVC: %s", pvc.Name)) + + // this might happen when the extender-scheduler recovers after failure, populates the cache with PVC-watcher controller and then + // the kube scheduler post a request to schedule the pod with the PVC. + if s.cache.CheckIsPVCStored(pvc) { + servingLog.Debug(fmt.Sprintf("PVC %s/%s has been already stored in the cache. Old state will be removed from the cache", pvc.Namespace, pvc.Name)) + s.cache.RemovePVCFromTheCache(pvc) + } else { + servingLog.Debug(fmt.Sprintf("PVC %s/%s was not found in the scheduler cache", pvc.Namespace, pvc.Name)) + } + } + + scUsedByPVCs, err := getStorageClassesUsedByPVCs(s.ctx, s.client, managedPVCs) + if err != nil { + servingLog.Error(err, "unable to get StorageClasses from the PVC") + http.Error(w, "internal server error", http.StatusInternalServerError) + return + } + for _, sc := range scUsedByPVCs { + servingLog.Trace(fmt.Sprintf("Pod uses StorageClass: %s", sc.Name)) + } + if len(scUsedByPVCs) != len(managedPVCs) { + servingLog.Error(errors.New("number of StorageClasses does not match the number of PVCs"), "unable to get StorageClasses from the PVC") + http.Error(w, "internal server error", http.StatusInternalServerError) + return + } + + servingLog.Debug("starts to extract PVC requested sizes") + pvcRequests, err := extractRequestedSize(s.ctx, s.client, servingLog, managedPVCs, scUsedByPVCs) + if err != nil { + servingLog.Error(err, "unable to extract request size") + http.Error(w, "internal server error", http.StatusInternalServerError) + return + } + if len(pvcRequests) == 0 { + servingLog.Debug("No PVC requests found. Return the same nodes") + if err := writeNodeNamesResponse(w, servingLog, nodeNames); err != nil { + servingLog.Error(err, "unable to write node names response") + http.Error(w, "internal server error", http.StatusInternalServerError) + } + return + } + servingLog.Trace(fmt.Sprintf("PVC requests: %+v", pvcRequests)) + servingLog.Debug("successfully extracted the PVC requested sizes") + + // Check if there are replicated PVCs that require node information + var nodes map[string]*corev1.Node + if hasReplicatedPVCs(managedPVCs, scUsedByPVCs) { + servingLog.Debug("Pod has replicated PVCs, fetching node information") + nodes, err = getNodes(s.ctx, s.client, nodeNames) + if err != nil { + servingLog.Error(err, "unable to get nodes") + http.Error(w, "internal server error", http.StatusInternalServerError) + return + } + } + + servingLog.Debug("starts to filter the nodes from the request") + filteredNodes, err := filterNodes(servingLog, s.ctx, s.client, s.cache, &nodeNames, nodes, inputData.Pod, managedPVCs, scUsedByPVCs, pvcRequests) + if err != nil { + servingLog.Error(err, "unable to filter the nodes") + http.Error(w, "internal server error", http.StatusInternalServerError) + return + } + servingLog.Debug("successfully filtered the nodes from the request") + + servingLog.Debug("starts to populate the cache") + servingLog.Cache("cache before the PVC reservation") + s.cache.PrintTheCacheLog() + err = populateCache(servingLog, filteredNodes.NodeNames, inputData.Pod, s.cache, managedPVCs, scUsedByPVCs) + if err != nil { + servingLog.Error(err, "unable to populate cache") + http.Error(w, "internal server error", http.StatusInternalServerError) + return + } + servingLog.Debug("successfully populated the cache") + servingLog.Cache("cache after the PVC reservation") + s.cache.PrintTheCacheLog() + + // Log response body at DEBUG level + responseJSON, err := json.Marshal(filteredNodes) + if err != nil { + servingLog.Error(err, "unable to marshal response") + http.Error(w, "internal server error", http.StatusInternalServerError) + return + } + servingLog.Debug(fmt.Sprintf("response: %s", string(responseJSON))) + + w.Header().Set("content-type", "application/json") + _, err = w.Write(responseJSON) + if err != nil { + servingLog.Error(err, "unable to write response") + http.Error(w, "internal server error", http.StatusInternalServerError) + return + } + + servingLog.Debug("ends the serving the request") +} + +func writeNodeNamesResponse(w http.ResponseWriter, log logger.Logger, nodeNames []string) error { + filteredNodes := &ExtenderFilterResult{ + NodeNames: &nodeNames, + } + log.Trace(fmt.Sprintf("filtered nodes: %+v", filteredNodes)) + + // Log response body at DEBUG level + responseJSON, err := json.Marshal(filteredNodes) + if err != nil { + return err + } + log.Debug(fmt.Sprintf("response: %s", string(responseJSON))) + + w.Header().Set("content-type", "application/json") + if _, err := w.Write(responseJSON); err != nil { + return err + } + return nil +} + +func filterNodes( + log logger.Logger, + ctx context.Context, + cl client.Client, + schedulerCache *cache.Cache, + nodeNames *[]string, + nodes map[string]*corev1.Node, + pod *corev1.Pod, + managedPVCs map[string]*corev1.PersistentVolumeClaim, + scUsedByPVCs map[string]*storagev1.StorageClass, + pvcRequests map[string]PVCRequest, +) (*ExtenderFilterResult, error) { + allLVGs := schedulerCache.GetAllLVG() + for _, lvg := range allLVGs { + log.Trace(fmt.Sprintf("[filterNodes] LVMVolumeGroup %s in the cache", lvg.Name)) + } + + // Separate PVCs by provisioner + localPVCs := filterPVCsByProvisioner(managedPVCs, scUsedByPVCs, consts.SdsLocalVolumeProvisioner) + replicatedPVCs := filterPVCsByProvisioner(managedPVCs, scUsedByPVCs, consts.SdsReplicatedVolumeProvisioner) + + log.Debug(fmt.Sprintf("[filterNodes] local PVCs count: %d, replicated PVCs count: %d", len(localPVCs), len(replicatedPVCs))) + + // Get LVGs from StorageClasses only for local PVCs + var scLVGs map[string]LVMVolumeGroups + var usedLVGs map[string]*snc.LVMVolumeGroup + var nodeLVGs map[string][]*snc.LVMVolumeGroup + var err error + + if len(localPVCs) > 0 { + log.Debug("[filterNodes] starts to get LVMVolumeGroups for local StorageClasses") + localSCs := make(map[string]*storagev1.StorageClass) + for _, pvc := range localPVCs { + if pvc.Spec.StorageClassName != nil { + if sc, exists := scUsedByPVCs[*pvc.Spec.StorageClassName]; exists { + localSCs[sc.Name] = sc + } + } + } + + scLVGs, err = GetLVGsFromStorageClasses(localSCs) + if err != nil { + return nil, err + } + log.Debug("[filterNodes] successfully got LVMVolumeGroups for local StorageClasses") + for scName, lvmVolumeGroups := range scLVGs { + for _, lvg := range lvmVolumeGroups { + log.Trace(fmt.Sprintf("[filterNodes] LVMVolumeGroup %s belongs to StorageClass %s", lvg.Name, scName)) + } + } + + // list of LVMVolumeGroups which are used by the local PVCs + usedLVGs = RemoveUnusedLVGs(allLVGs, scLVGs) + for _, lvg := range usedLVGs { + log.Trace(fmt.Sprintf("[filterNodes] the LVMVolumeGroup %s is actually used. VG size: %s, allocatedSize: %s", lvg.Name, lvg.Status.VGSize.String(), lvg.Status.AllocatedSize.String())) + } + + // get the LVMVolumeGroups by node name + nodeLVGs = LVMVolumeGroupsByNodeName(usedLVGs) + for n, ls := range nodeLVGs { + for _, l := range ls { + log.Trace(fmt.Sprintf("[filterNodes] the LVMVolumeGroup %s belongs to node %s", l.Name, n)) + } + } + } + + result := &ExtenderFilterResult{ + NodeNames: &[]string{}, + FailedNodes: FailedNodesMap{}, + } + + failedNodesMapMtx := &sync.Mutex{} + resultNodesMtx := &sync.Mutex{} + + wg := &sync.WaitGroup{} + wg.Add(len(*nodeNames)) + errs := make(chan error, len(*nodeNames)*len(managedPVCs)) + + for i, nodeName := range *nodeNames { + go func(i int, nodeName string) { + log.Trace(fmt.Sprintf("[filterNodes] goroutine %d starts the work with node %s", i, nodeName)) + defer func() { + log.Trace(fmt.Sprintf("[filterNodes] goroutine %d ends the work with node %s", i, nodeName)) + wg.Done() + }() + + var failReasons []string + + // === Filter for LOCAL PVCs === + if len(localPVCs) > 0 { + ok, reason := filterNodeForLocalPVCs(log, schedulerCache, allLVGs, nodeName, nodeLVGs, localPVCs, scLVGs, pvcRequests) + if !ok && reason != "" { + failReasons = append(failReasons, fmt.Sprintf("[local] %s", reason)) + } + } + + // === Filter for REPLICATED PVCs === + if len(replicatedPVCs) > 0 { + node := nodes[nodeName] + if node == nil { + failReasons = append(failReasons, fmt.Sprintf("[replicated] node %s not found", nodeName)) + } else { + ok, reason := filterNodeForReplicatedPVCs(log, ctx, cl, schedulerCache, nodeName, node, replicatedPVCs, scUsedByPVCs, pvcRequests) + if !ok && reason != "" { + failReasons = append(failReasons, fmt.Sprintf("[replicated] %s", reason)) + } + } + } + + if len(failReasons) > 0 { + failedNodesMapMtx.Lock() + result.FailedNodes[nodeName] = strings.Join(failReasons, "; ") + failedNodesMapMtx.Unlock() + return + } + + resultNodesMtx.Lock() + *result.NodeNames = append(*result.NodeNames, nodeName) + resultNodesMtx.Unlock() + }(i, nodeName) + } + wg.Wait() + log.Debug("[filterNodes] goroutines work is done") + if len(errs) != 0 { + for err = range errs { + log.Error(err, "[filterNodes] an error occurs while filtering the nodes") + } + } + close(errs) + if err != nil { + log.Error(err, fmt.Sprintf("[filterNodes] unable to filter nodes for the Pod, last error: %s", err.Error())) + return nil, err + } + + for _, nodeName := range *result.NodeNames { + log.Trace(fmt.Sprintf("[filterNodes] for a Pod there is a suitable node: %s", nodeName)) + } + + for node, reason := range result.FailedNodes { + log.Trace(fmt.Sprintf("[filterNodes] for a Pod there is a failed node: %s, reason: %s", node, reason)) + } + + return result, nil +} + +// filterNodeForLocalPVCs filters node for local PVCs +func filterNodeForLocalPVCs( + log logger.Logger, + schedulerCache *cache.Cache, + allLVGs map[string]*snc.LVMVolumeGroup, + nodeName string, + nodeLVGs map[string][]*snc.LVMVolumeGroup, + localPVCs map[string]*corev1.PersistentVolumeClaim, + scLVGs map[string]LVMVolumeGroups, + pvcRequests map[string]PVCRequest, +) (bool, string) { + // if the node does not have any LVMVolumeGroups from used StorageClasses, then this node is not suitable + lvgsFromNode, exists := nodeLVGs[nodeName] + if !exists { + log.Debug(fmt.Sprintf("[filterNodeForLocalPVCs] node %s does not have any LVMVolumeGroups from used StorageClasses", nodeName)) + return false, fmt.Sprintf("node %s does not have any LVMVolumeGroups from used StorageClasses", nodeName) + } + + // now we iterate all over the PVCs to see if we can place all of them on the node + for _, pvc := range localPVCs { + pvcReq := pvcRequests[pvc.Name] + + // we get LVGs which might be used by the PVC + lvgsFromSC := scLVGs[*pvc.Spec.StorageClassName] + + // we get the specific LVG which the PVC can use on the node + commonLVG := findMatchedLVG(lvgsFromNode, lvgsFromSC) + if commonLVG == nil { + return false, fmt.Sprintf("unable to match Storage Class's LVMVolumeGroup with the node's one, Storage Class: %s, node: %s", *pvc.Spec.StorageClassName, nodeName) + } + log.Trace(fmt.Sprintf("[filterNodeForLocalPVCs] LVMVolumeGroup %s is common for storage class %s and node %s", commonLVG.Name, *pvc.Spec.StorageClassName, nodeName)) + + // Use common function to check available space in LVG + lvg := allLVGs[commonLVG.Name] + hasSpace, err := checkLVGHasSpace(schedulerCache, lvg, pvcReq.DeviceType, commonLVG.Thin.PoolName, pvcReq.RequestedSize) + if err != nil { + log.Error(err, fmt.Sprintf("[filterNodeForLocalPVCs] unable to check space for LVG %s", commonLVG.Name)) + return false, fmt.Sprintf("error checking space for LVG %s: %v", commonLVG.Name, err) + } + + if !hasSpace { + log.Trace(fmt.Sprintf("[filterNodeForLocalPVCs] LVMVolumeGroup %s does not have enough space for PVC %s (requested: %s)", commonLVG.Name, pvc.Name, resource.NewQuantity(pvcReq.RequestedSize, resource.BinarySI))) + return false, fmt.Sprintf("LVMVolumeGroup %s does not have enough space for PVC %s", commonLVG.Name, pvc.Name) + } + } + + return true, "" +} + +func populateCache( + log logger.Logger, + filteredNodeNames *[]string, + pod *corev1.Pod, + schedulerCache *cache.Cache, + managedPVCS map[string]*corev1.PersistentVolumeClaim, + scUsedByPVCs map[string]*storagev1.StorageClass, +) error { + for _, nodeName := range *filteredNodeNames { + for _, volume := range pod.Spec.Volumes { + if volume.PersistentVolumeClaim != nil { + pvc := managedPVCS[volume.PersistentVolumeClaim.ClaimName] + if pvc == nil { + continue + } + + sc := scUsedByPVCs[*pvc.Spec.StorageClassName] + + // Only cache local PVCs, replicated PVCs use different mechanism + if sc.Provisioner != consts.SdsLocalVolumeProvisioner { + log.Debug(fmt.Sprintf("[populateCache] PVC %s uses provisioner %s, skipping cache population", pvc.Name, sc.Provisioner)) + continue + } + + log.Debug(fmt.Sprintf("[populateCache] reconcile the PVC %s on node %s", volume.PersistentVolumeClaim.ClaimName, nodeName)) + lvgNamesForTheNode := schedulerCache.GetLVGNamesByNodeName(nodeName) + log.Trace(fmt.Sprintf("[populateCache] LVMVolumeGroups from cache for the node %s: %v", nodeName, lvgNamesForTheNode)) + + lvgsForPVC, err := ExtractLVGsFromSC(sc) + if err != nil { + return err + } + + switch sc.Parameters[consts.LvmTypeParamKey] { + case consts.Thick: + log.Debug(fmt.Sprintf("[populateCache] Storage Class %s has device type Thick, so the cache will be populated by PVC space requests", sc.Name)) + log.Trace(fmt.Sprintf("[populateCache] LVMVolumeGroups from Storage Class %s for PVC %s/%s: %+v", sc.Name, pvc.Namespace, pvc.Name, lvgsForPVC)) + for _, lvg := range lvgsForPVC { + if slices.Contains(lvgNamesForTheNode, lvg.Name) { + log.Trace(fmt.Sprintf("[populateCache] PVC %s/%s will reserve space in LVMVolumeGroup %s cache", pvc.Namespace, pvc.Name, lvg.Name)) + err = schedulerCache.AddThickPVC(lvg.Name, pvc) + if err != nil { + return err + } + } + } + case consts.Thin: + log.Debug(fmt.Sprintf("[populateCache] Storage Class %s has device type Thin, so the cache will be populated by PVC space requests", sc.Name)) + log.Trace(fmt.Sprintf("[populateCache] LVMVolumeGroups from Storage Class %s for PVC %s/%s: %+v", sc.Name, pvc.Namespace, pvc.Name, lvgsForPVC)) + for _, lvg := range lvgsForPVC { + if slices.Contains(lvgNamesForTheNode, lvg.Name) { + log.Trace(fmt.Sprintf("[populateCache] PVC %s/%s will reserve space in LVMVolumeGroup %s Thin Pool %s cache", pvc.Namespace, pvc.Name, lvg.Name, lvg.Thin.PoolName)) + err = schedulerCache.AddThinPVC(lvg.Name, lvg.Thin.PoolName, pvc) + if err != nil { + return err + } + } + } + } + } + } + } + + return nil +} + +// Params: +// lvgs - all LVMVolumeGroups in the cache; +// +// Return: map[lvgName]map[string]int64 +// Example: +// +// { +// "vg0": { +// "tp0": 100, +// "tp1": 200, +// }, +// } +// +// Description: +// This function returns a map of ThinPools free spaces for each LVMVolumeGroup. +// +// .status.thinPools[].availableSpace is the free space of the ThinPool. +func getLVGThinFreeSpaces(lvgs map[string]*snc.LVMVolumeGroup) map[string]map[string]int64 { + result := make(map[string]map[string]int64, len(lvgs)) + + for _, lvg := range lvgs { + if result[lvg.Name] == nil { + result[lvg.Name] = make(map[string]int64, len(lvg.Status.ThinPools)) + } + + for _, tp := range lvg.Status.ThinPools { + result[lvg.Name][tp.Name] = tp.AvailableSpace.Value() + } + } + + return result +} + +// Params: +// lvgs - all LVMVolumeGroups in the cache; +// +// Return: map[lvgName]int64 +// Example: +// +// { +// "vg0": 100, +// "vg1": 200, +// } +// +// Description: +// This function returns a map of Thick free spaces for each LVMVolumeGroup. +// +// .status.VGFree is the free space of the LVMVolumeGroup. +func getLVGThickFreeSpaces(lvgs map[string]*snc.LVMVolumeGroup) map[string]int64 { + result := make(map[string]int64, len(lvgs)) + + for _, lvg := range lvgs { + result[lvg.Name] = lvg.Status.VGFree.Value() + } + + return result +} diff --git a/images/sds-common-scheduler-extender/pkg/scheduler/filter_prioritize.go b/images/sds-common-scheduler-extender/pkg/scheduler/filter_prioritize.go new file mode 100644 index 000000000..d8a55a682 --- /dev/null +++ b/images/sds-common-scheduler-extender/pkg/scheduler/filter_prioritize.go @@ -0,0 +1,246 @@ +/* +Copyright 2025 Flant JSC + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package scheduler + +import ( + "encoding/json" + "fmt" + "net/http" + "sort" + "strings" + + "github.com/deckhouse/sds-node-configurator/images/sds-common-scheduler-extender/pkg/consts" + "github.com/deckhouse/sds-node-configurator/images/sds-common-scheduler-extender/pkg/logger" +) + +func (s *scheduler) filterAndPrioritize(w http.ResponseWriter, r *http.Request) { + servingLog := logger.WithTraceIDLogger(r.Context(), s.log).WithName("filter-and-prioritize") + + if r.Method != http.MethodPost { + http.Error(w, "method not allowed", http.StatusMethodNotAllowed) + return + } + + var req FilterPrioritizeRequest + reader := http.MaxBytesReader(w, r.Body, 10<<20) // 10MB + err := json.NewDecoder(reader).Decode(&req) + if err != nil { + servingLog.Error(err, "unable to decode request") + http.Error(w, "bad request", http.StatusBadRequest) + return + } + + // Normalize volume type (accept both "thick"/"Thick" and "thin"/"Thin") + volumeTypeLower := strings.ToLower(req.Volume.Type) + if volumeTypeLower == "thick" { + req.Volume.Type = consts.Thick + } else if volumeTypeLower == "thin" { + req.Volume.Type = consts.Thin + } + + // Validation + if len(req.LVGs) == 0 { + http.Error(w, "lvgs list is empty", http.StatusBadRequest) + return + } + if req.Volume.Name == "" || req.Volume.Size <= 0 { + http.Error(w, "invalid volume data", http.StatusBadRequest) + return + } + if req.Volume.Type != consts.Thick && req.Volume.Type != consts.Thin { + http.Error(w, fmt.Sprintf("invalid volume type: %s (expected 'thick' or 'thin')", req.Volume.Type), http.StatusBadRequest) + return + } + + // Validate thinPoolName for thin volumes + if req.Volume.Type == consts.Thin { + for _, lvg := range req.LVGs { + if lvg.ThinPoolName == "" { + http.Error(w, "thinPoolName is required for thin volumes", http.StatusBadRequest) + return + } + } + } + + // Log request details for debugging + servingLog.Debug(fmt.Sprintf("request: volume=%s, size=%d bytes (%.2f Gi), type=%s, lvgs count=%d", + req.Volume.Name, req.Volume.Size, float64(req.Volume.Size)/(1024*1024*1024), req.Volume.Type, len(req.LVGs))) + for i, lvg := range req.LVGs { + servingLog.Debug(fmt.Sprintf("request: lvg[%d]=%s, thinPoolName=%s", i, lvg.Name, lvg.ThinPoolName)) + } + + // Filter LVGs by available space + // Uses common function checkLVGHasSpace + filteredLVGs, err := s.filterLVGs(servingLog, req.LVGs, req.Volume) + if err != nil { + servingLog.Error(err, "unable to filter LVGs") + http.Error(w, "internal server error", http.StatusInternalServerError) + return + } + + if len(filteredLVGs) == 0 { + response := FilterPrioritizeResponse{ + LVGs: []LVGScore{}, + } + responseJSON, _ := json.Marshal(response) + servingLog.Debug(fmt.Sprintf("response: %s", string(responseJSON))) + w.Header().Set("content-type", "application/json") + json.NewEncoder(w).Encode(response) + return + } + + // Score filtered LVGs + // Uses common function calculateLVGScore + scoredLVGs := s.scoreLVGs(servingLog, filteredLVGs, req.Volume) + + // Reserve space for all filtered LVGs + err = s.reserveSpaceForVolumes(servingLog, filteredLVGs, req.Volume) + if err != nil { + servingLog.Error(err, "unable to reserve space") + // Don't return error, as filtering and scoring are already done + // Reservation can be retried later + } + + // Build response + response := FilterPrioritizeResponse{ + LVGs: scoredLVGs, + } + + // Log response body at DEBUG level + responseJSON, err := json.Marshal(response) + if err != nil { + servingLog.Error(err, "unable to marshal response") + http.Error(w, "internal server error", http.StatusInternalServerError) + return + } + servingLog.Debug(fmt.Sprintf("response: %s", string(responseJSON))) + + w.Header().Set("content-type", "application/json") + _, err = w.Write(responseJSON) + if err != nil { + servingLog.Error(err, "unable to write response") + http.Error(w, "internal server error", http.StatusInternalServerError) + } +} + +// filterLVGs filters LVGs by available space +// Uses common function checkLVGHasSpace +func (s *scheduler) filterLVGs(log logger.Logger, lvgs []LVGInput, volume VolumeInput) ([]LVGInput, error) { + var filtered []LVGInput + + log.Debug(fmt.Sprintf("[filterLVGs] starting to filter %d LVGs for volume type %s, size %d bytes", len(lvgs), volume.Type, volume.Size)) + + for _, lvgInput := range lvgs { + lvg := s.cache.TryGetLVG(lvgInput.Name) + if lvg == nil { + log.Debug(fmt.Sprintf("[filterLVGs] LVG %s not found in cache, skipping", lvgInput.Name)) + continue + } + + log.Debug(fmt.Sprintf("[filterLVGs] LVG %s found in cache, checking available space", lvgInput.Name)) + + // Get detailed space information for logging + spaceInfo, err := getLVGAvailableSpace(s.cache, lvg, volume.Type, lvgInput.ThinPoolName) + if err != nil { + log.Error(err, fmt.Sprintf("[filterLVGs] unable to get available space for LVG %s", lvgInput.Name)) + continue + } + + // Log detailed space information + requestedSizeGi := float64(volume.Size) / (1024 * 1024 * 1024) + availableSizeGi := float64(spaceInfo.AvailableSpace) / (1024 * 1024 * 1024) + totalSizeGi := float64(spaceInfo.TotalSize) / (1024 * 1024 * 1024) + log.Debug(fmt.Sprintf("[filterLVGs] LVG %s: requested=%.2f Gi, available=%.2f Gi, total=%.2f Gi", + lvgInput.Name, requestedSizeGi, availableSizeGi, totalSizeGi)) + + // Check if LVG has enough space + hasSpace := spaceInfo.AvailableSpace >= volume.Size + if hasSpace { + log.Debug(fmt.Sprintf("[filterLVGs] LVG %s has enough space (available: %d bytes >= requested: %d bytes), adding to filtered list", + lvgInput.Name, spaceInfo.AvailableSpace, volume.Size)) + filtered = append(filtered, lvgInput) + } else { + log.Debug(fmt.Sprintf("[filterLVGs] LVG %s does not have enough space (available: %d bytes < requested: %d bytes), skipping", + lvgInput.Name, spaceInfo.AvailableSpace, volume.Size)) + } + } + + log.Debug(fmt.Sprintf("[filterLVGs] filtered %d LVGs out of %d requested", len(filtered), len(lvgs))) + return filtered, nil +} + +// scoreLVGs scores LVGs +// Uses common function calculateLVGScore +func (s *scheduler) scoreLVGs(log logger.Logger, lvgs []LVGInput, volume VolumeInput) []LVGScore { + var scored []LVGScore + + for _, lvgInput := range lvgs { + lvg := s.cache.TryGetLVG(lvgInput.Name) + if lvg == nil { + log.Debug(fmt.Sprintf("[scoreLVGs] LVG %s not found in cache, skipping", lvgInput.Name)) + continue + } + + // Use common function to calculate score + score, err := calculateLVGScore(s.cache, lvg, volume.Type, lvgInput.ThinPoolName, volume.Size, s.defaultDivisor) + if err != nil { + log.Error(err, fmt.Sprintf("[scoreLVGs] unable to calculate score for LVG %s", lvgInput.Name)) + continue + } + + lvgScore := LVGScore{ + Name: lvgInput.Name, + Score: score, + } + + scored = append(scored, lvgScore) + } + + // Sort by score (from highest to lowest) + sort.Slice(scored, func(i, j int) bool { + return scored[i].Score > scored[j].Score + }) + + return scored +} + +func (s *scheduler) reserveSpaceForVolumes(log logger.Logger, lvgs []LVGInput, volume VolumeInput) error { + for _, lvgInput := range lvgs { + switch volume.Type { + case consts.Thick: + err := s.cache.AddThickVolume(lvgInput.Name, volume.Name, volume.Size) + if err != nil { + log.Error(err, fmt.Sprintf("[reserveSpaceForVolumes] unable to reserve space for volume %s in LVG %s", volume.Name, lvgInput.Name)) + return err + } + log.Debug(fmt.Sprintf("[reserveSpaceForVolumes] reserved %d bytes for volume %s in LVG %s", volume.Size, volume.Name, lvgInput.Name)) + + case consts.Thin: + if lvgInput.ThinPoolName == "" { + continue + } + err := s.cache.AddThinVolume(lvgInput.Name, lvgInput.ThinPoolName, volume.Name, volume.Size) + if err != nil { + log.Error(err, fmt.Sprintf("[reserveSpaceForVolumes] unable to reserve space for volume %s in LVG %s Thin Pool %s", volume.Name, lvgInput.Name, lvgInput.ThinPoolName)) + return err + } + log.Debug(fmt.Sprintf("[reserveSpaceForVolumes] reserved %d bytes for volume %s in LVG %s Thin Pool %s", volume.Size, volume.Name, lvgInput.Name, lvgInput.ThinPoolName)) + } + } + + return nil +} diff --git a/images/sds-common-scheduler-extender/pkg/scheduler/filter_test.go b/images/sds-common-scheduler-extender/pkg/scheduler/filter_test.go new file mode 100644 index 000000000..9b73f058a --- /dev/null +++ b/images/sds-common-scheduler-extender/pkg/scheduler/filter_test.go @@ -0,0 +1,162 @@ +/* +Copyright 2025 Flant JSC + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package scheduler + +import ( + "context" + "net/http" + "testing" + + "github.com/stretchr/testify/assert" + v1 "k8s.io/api/core/v1" + v12 "k8s.io/api/storage/v1" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "k8s.io/apimachinery/pkg/runtime" + "k8s.io/client-go/kubernetes/scheme" + "sigs.k8s.io/controller-runtime/pkg/client/fake" + + "github.com/deckhouse/sds-node-configurator/images/sds-common-scheduler-extender/pkg/consts" + "github.com/deckhouse/sds-node-configurator/images/sds-common-scheduler-extender/pkg/logger" +) + +func TestFilter(t *testing.T) { + log := logger.Logger{} + ctx := context.Background() + t.Run("getManagedPVCsFromPod filters PVCs by provisioner", func(t *testing.T) { + sc1 := "sc1" + sc2 := "sc2" + sc3 := "sc3" + + objects := []runtime.Object{ + &v1.PersistentVolumeClaim{ + ObjectMeta: metav1.ObjectMeta{ + Name: "first", + Namespace: "default", + }, + Spec: v1.PersistentVolumeClaimSpec{ + StorageClassName: &sc1, + }, + }, + &v1.PersistentVolumeClaim{ + ObjectMeta: metav1.ObjectMeta{ + Name: "second", + Namespace: "default", + }, + Spec: v1.PersistentVolumeClaimSpec{ + StorageClassName: &sc2, + }, + }, + &v1.PersistentVolumeClaim{ + ObjectMeta: metav1.ObjectMeta{ + Name: "third", + Namespace: "default", + }, + Spec: v1.PersistentVolumeClaimSpec{ + StorageClassName: &sc3, + }, + }, + &v12.StorageClass{ + ObjectMeta: metav1.ObjectMeta{ + Name: sc1, + }, + Provisioner: consts.SdsLocalVolumeProvisioner, + }, + &v12.StorageClass{ + ObjectMeta: metav1.ObjectMeta{ + Name: sc2, + }, + Provisioner: consts.SdsLocalVolumeProvisioner, + }, + &v12.StorageClass{ + ObjectMeta: metav1.ObjectMeta{ + Name: sc3, + }, + }, + } + + pod := &v1.Pod{ + ObjectMeta: metav1.ObjectMeta{ + Name: "test-pod", + Namespace: "default", + }, + Spec: v1.PodSpec{ + Volumes: []v1.Volume{ + { + Name: "volume1", + VolumeSource: v1.VolumeSource{ + PersistentVolumeClaim: &v1.PersistentVolumeClaimVolumeSource{ + ClaimName: "first", + }, + }, + }, + { + Name: "volume2", + VolumeSource: v1.VolumeSource{ + PersistentVolumeClaim: &v1.PersistentVolumeClaimVolumeSource{ + ClaimName: "second", + }, + }, + }, + { + Name: "volume3", + VolumeSource: v1.VolumeSource{ + PersistentVolumeClaim: &v1.PersistentVolumeClaimVolumeSource{ + ClaimName: "third", + }, + }, + }, + }, + }, + } + + s := scheme.Scheme + _ = v1.AddToScheme(s) + _ = v12.AddToScheme(s) + + cl := fake.NewFakeClient(objects...) + targetProvisioners := []string{consts.SdsLocalVolumeProvisioner} + filtered, err := getManagedPVCsFromPod(ctx, cl, log, pod, targetProvisioners) + + assert.NoError(t, err) + if assert.Equal(t, 2, len(filtered)) { + _, ok := filtered["first"] + assert.True(t, ok) + _, ok = filtered["second"] + assert.True(t, ok) + _, ok = filtered["third"] + assert.False(t, ok) + } + }) +} + +func Test_scheduler_filter(t *testing.T) { + tests := []struct { + name string // description of this test case + // Named input parameters for target function. + w http.ResponseWriter + r *http.Request + }{ + // TODO: Add test cases. + } + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + // TODO: construct the receiver type. + var s scheduler + s.filter(tt.w, tt.r) + }) + } +} diff --git a/images/sds-common-scheduler-extender/pkg/scheduler/func.go b/images/sds-common-scheduler-extender/pkg/scheduler/func.go new file mode 100644 index 000000000..b32ab6a93 --- /dev/null +++ b/images/sds-common-scheduler-extender/pkg/scheduler/func.go @@ -0,0 +1,621 @@ +/* +Copyright 2025 Flant JSC + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package scheduler + +import ( + "context" + "fmt" + "math" + + corev1 "k8s.io/api/core/v1" + storagev1 "k8s.io/api/storage/v1" + "k8s.io/utils/strings/slices" + "sigs.k8s.io/controller-runtime/pkg/client" + + d8commonapi "github.com/deckhouse/sds-common-lib/api/v1alpha1" + snc "github.com/deckhouse/sds-node-configurator/api/v1alpha1" + "github.com/deckhouse/sds-node-configurator/images/sds-common-scheduler-extender/pkg/cache" + "github.com/deckhouse/sds-node-configurator/images/sds-common-scheduler-extender/pkg/consts" + "github.com/deckhouse/sds-node-configurator/images/sds-common-scheduler-extender/pkg/logger" + "github.com/stretchr/testify/assert/yaml" +) + +const ( + annotationBetaStorageProvisioner = "volume.beta.kubernetes.io/storage-provisioner" + annotationStorageProvisioner = "volume.kubernetes.io/storage-provisioner" +) + +// PVCRequest is a request for a PVC +type PVCRequest struct { + DeviceType string + RequestedSize int64 +} + +type LVMVolumeGroup struct { + Name string `yaml:"name"` + Thin struct { + PoolName string `yaml:"poolName"` + } `yaml:"thin"` +} +type LVMVolumeGroups []LVMVolumeGroup + +// discoverProvisionerForPVC tries to detect a provisioner for the given PVC using: +// 1) PVC annotations +// 2) StorageClass referenced by the PVC +// 3) PV bound to the PVC +func discoverProvisionerForPVC( + ctx context.Context, + cl client.Client, + log logger.Logger, + pvc *corev1.PersistentVolumeClaim, +) (string, error) { + var discoveredProvisioner string + + log.Trace(fmt.Sprintf("[discoverProvisionerForPVC] get pvc: %+v", pvc)) + log.Trace(fmt.Sprintf("[discoverProvisionerForPVC] check provisioner in pvc annotations: %+v", pvc.Annotations)) + + // Get provisioner from PVC annotations + discoveredProvisioner = pvc.Annotations[annotationStorageProvisioner] + if discoveredProvisioner != "" { + log.Trace(fmt.Sprintf("[discoverProvisionerForPVC] discovered provisioner in pvc annotations: %s", discoveredProvisioner)) + return discoveredProvisioner, nil + } + + discoveredProvisioner = pvc.Annotations[annotationBetaStorageProvisioner] + if discoveredProvisioner != "" { + log.Trace(fmt.Sprintf("[discoverProvisionerForPVC] discovered provisioner in beta pvc annotations: %s", discoveredProvisioner)) + return discoveredProvisioner, nil + } + + // Get provisioner from StorageClass + if pvc.Spec.StorageClassName != nil && *pvc.Spec.StorageClassName != "" { + log.Trace(fmt.Sprintf("[discoverProvisionerForPVC] can't find provisioner in pvc annotations, check in storageClass with name: %s", *pvc.Spec.StorageClassName)) + storageClass := &storagev1.StorageClass{} + if err := cl.Get(ctx, client.ObjectKey{Name: *pvc.Spec.StorageClassName}, storageClass); err != nil { + return "", fmt.Errorf("[discoverProvisionerForPVC] error getting StorageClass %s: %v", *pvc.Spec.StorageClassName, err) + } + discoveredProvisioner = storageClass.Provisioner + log.Trace(fmt.Sprintf("[discoverProvisionerForPVC] discover provisioner %s in storageClass: %+v", discoveredProvisioner, storageClass)) + if discoveredProvisioner != "" { + return discoveredProvisioner, nil + } + } + + // Get provisioner from PV + if pvc.Spec.VolumeName != "" { + log.Trace(fmt.Sprintf("[discoverProvisionerForPVC] can't find provisioner in pvc annotations and StorageClass, check in PV with name: %s", pvc.Spec.VolumeName)) + pv := &corev1.PersistentVolume{} + if err := cl.Get(ctx, client.ObjectKey{Name: pvc.Spec.VolumeName}, pv); err != nil { + return "", fmt.Errorf("[discoverProvisionerForPVC] error getting PV %s: %v", pvc.Spec.VolumeName, err) + } + + if pv.Spec.CSI != nil { + discoveredProvisioner = pv.Spec.CSI.Driver + } + + log.Trace(fmt.Sprintf("[discoverProvisionerForPVC] discover provisioner %s in PV: %+v", discoveredProvisioner, pv)) + } + + return discoveredProvisioner, nil +} + +// Get all node names from the request +func getNodeNames(inputData ExtenderArgs) ([]string, error) { + if inputData.NodeNames != nil && len(*inputData.NodeNames) > 0 { + return *inputData.NodeNames, nil + } + + if inputData.Nodes != nil && len(inputData.Nodes.Items) > 0 { + nodeNames := make([]string, 0, len(inputData.Nodes.Items)) + for _, node := range inputData.Nodes.Items { + nodeNames = append(nodeNames, node.Name) + } + return nodeNames, nil + } + + return nil, fmt.Errorf("no nodes provided") +} + +// Get all PVCs from the Pod which are managed by our modules +// +// Params: +// ctx - context; +// cl - client; +// log - logger; +// pod - Pod; +// targetProvisioners - target provisioners; +// +// Return: map[pvcName]*corev1.PersistentVolumeClaim +func getManagedPVCsFromPod(ctx context.Context, cl client.Client, log logger.Logger, pod *corev1.Pod, targetProvisioners []string) (map[string]*corev1.PersistentVolumeClaim, error) { + var discoveredProvisioner string + managedPVCs := make(map[string]*corev1.PersistentVolumeClaim, len(pod.Spec.Volumes)) + var useLinstor *bool + for _, volume := range pod.Spec.Volumes { + if volume.PersistentVolumeClaim != nil { + pvcName := volume.PersistentVolumeClaim.ClaimName + log = log.WithValues("PVC", pvcName) + + pvc := &corev1.PersistentVolumeClaim{} + err := cl.Get(ctx, client.ObjectKey{Namespace: pod.Namespace, Name: pvcName}, pvc) + if err != nil { + return nil, fmt.Errorf("[getManagedPVCsFromPod] error getting PVC: %v", err) + } + + discoveredProvisioner, err = discoverProvisionerForPVC(ctx, cl, log, pvc) + if err != nil { + return nil, fmt.Errorf("[getManagedPVCsFromPod] error getting provisioner: %v", err) + } + log.Trace(fmt.Sprintf("[getManagedPVCsFromPod] discovered provisioner: %s", discoveredProvisioner)) + + if !slices.Contains(targetProvisioners, discoveredProvisioner) { + log.Debug(fmt.Sprintf("[getManagedPVCsFromPod] provisioner not matches targetProvisioners %+v", targetProvisioners)) + continue + } + + if discoveredProvisioner == consts.SdsReplicatedVolumeProvisioner { + if useLinstor == nil { + useLinstor, err = getUseLinstor(ctx, cl, log) + if err != nil { + return nil, fmt.Errorf("[getManagedPVCsFromPod] error getting useLinstor: %v", err) + } + } + + if *useLinstor { + log.Debug("[getManagedPVCsFromPod] filter out PVC due to used provisioner is managed by the Linstor") + continue + } + } + + log.Debug("[getManagedPVCsFromPod] add PVC to the managed PVCs") + managedPVCs[pvcName] = pvc + } + } + + return managedPVCs, nil +} + +// Get all StorageClasses used by the PVCs +// +// Params: +// ctx - context; +// cl - client; +// pvcs - PVCs; +// +// Return: map[scName]*storagev1.StorageClass +func getStorageClassesUsedByPVCs(ctx context.Context, cl client.Client, pvcs map[string]*corev1.PersistentVolumeClaim) (map[string]*storagev1.StorageClass, error) { + scs := &storagev1.StorageClassList{} + err := cl.List(ctx, scs) + if err != nil { + return nil, err + } + + scMap := make(map[string]storagev1.StorageClass, len(scs.Items)) + for _, sc := range scs.Items { + scMap[sc.Name] = sc + } + + result := make(map[string]*storagev1.StorageClass, len(pvcs)) + for _, pvc := range pvcs { + if pvc.Spec.StorageClassName == nil { + err = fmt.Errorf("no StorageClass specified for PVC %s", pvc.Name) + return nil, err + } + + scName := *pvc.Spec.StorageClassName + if sc, match := scMap[scName]; match { + result[sc.Name] = &sc + } + } + + return result, nil +} + +// Get useLinstor value from the sds-replication-volume ModuleConfig +func getUseLinstor(ctx context.Context, cl client.Client, log logger.Logger) (*bool, error) { + // local variables to return pointers to + _true := true + _false := false + mc := &d8commonapi.ModuleConfig{} + err := cl.Get(ctx, client.ObjectKey{Name: "sds-replicated-volume"}, mc) + if err != nil { + if client.IgnoreNotFound(err) == nil { + log.Debug("[getUseLinstor] ModuleConfig sds-replicated-volume not found. Assume useLinstor is true") + return &_true, nil + } + return &_true, err + } + + if value, exists := mc.Spec.Settings["useLinstor"]; exists && value == true { + log.Debug("[getUseLinstor] ModuleConfig sds-replicated-volume found. Assume useLinstor is true") + return &_true, nil + } + + log.Debug("[getUseLinstor] ModuleConfig sds-replicated-volume found. Assume useLinstor is false") + return &_false, nil +} + +// extractRequestedSize extracts the requested size from the PVC based on the PVC status phase and the StorageClass parameters. +// +// Return: map[pvcName]PVCRequest +// Example: +// +// { +// "pvc1": { +// "deviceType": "Thick", +// "requestedSize": 100 +// } +// } +// { +// "pvc2": { +// "deviceType": "Thin", +// "requestedSize": 200 +// } +// } +func extractRequestedSize( + ctx context.Context, + cl client.Client, + log logger.Logger, + pvcs map[string]*corev1.PersistentVolumeClaim, + scs map[string]*storagev1.StorageClass, +) (map[string]PVCRequest, error) { + pvcRequests := make(map[string]PVCRequest, len(pvcs)) + for _, pvc := range pvcs { + sc := scs[*pvc.Spec.StorageClassName] + log.Debug(fmt.Sprintf("[extractRequestedSize] PVC %s/%s has status phase: %s", pvc.Namespace, pvc.Name, pvc.Status.Phase)) + + // Determine device type based on provisioner + var deviceType string + isReplicated := sc.Provisioner == consts.SdsReplicatedVolumeProvisioner + + if isReplicated { + // For replicated PVCs, get device type from RSP + rsc, err := getReplicatedStorageClassForExtract(ctx, cl, sc.Name) + if err != nil { + log.Error(err, fmt.Sprintf("[extractRequestedSize] unable to get RSC for SC %s", sc.Name)) + continue + } + rsp, err := getReplicatedStoragePoolForExtract(ctx, cl, rsc.Spec.StoragePool) + if err != nil { + log.Error(err, fmt.Sprintf("[extractRequestedSize] unable to get RSP %s", rsc.Spec.StoragePool)) + continue + } + switch rsp.Spec.Type { + case consts.RSPTypeLVM: + deviceType = consts.Thick + case consts.RSPTypeLVMThin: + deviceType = consts.Thin + default: + deviceType = consts.Thick + } + } else { + // For local PVCs, get device type from SC parameters + deviceType = sc.Parameters[consts.LvmTypeParamKey] + } + + if deviceType == "" { + log.Debug(fmt.Sprintf("[extractRequestedSize] unable to determine device type for PVC %s/%s", pvc.Namespace, pvc.Name)) + continue + } + + switch pvc.Status.Phase { + case corev1.ClaimPending: + pvcRequests[pvc.Name] = PVCRequest{ + DeviceType: deviceType, + RequestedSize: pvc.Spec.Resources.Requests.Storage().Value(), + } + + case corev1.ClaimBound: + pv := &corev1.PersistentVolume{} + if err := cl.Get(ctx, client.ObjectKey{Name: pvc.Spec.VolumeName}, pv); err != nil { + return nil, fmt.Errorf("[extractRequestedSize] error getting PV %s: %v", pvc.Spec.VolumeName, err) + } + pvcRequests[pvc.Name] = PVCRequest{ + DeviceType: deviceType, + RequestedSize: pvc.Spec.Resources.Requests.Storage().Value() - pv.Spec.Capacity.Storage().Value(), + } + } + } + + return pvcRequests, nil +} + +// getReplicatedStorageClassForExtract retrieves RSC by SC name for extractRequestedSize +func getReplicatedStorageClassForExtract(ctx context.Context, cl client.Client, scName string) (*snc.ReplicatedStorageClass, error) { + rsc := &snc.ReplicatedStorageClass{} + err := cl.Get(ctx, client.ObjectKey{Name: scName}, rsc) + if err != nil { + return nil, fmt.Errorf("unable to get ReplicatedStorageClass %s: %w", scName, err) + } + return rsc, nil +} + +// getReplicatedStoragePoolForExtract retrieves RSP by name for extractRequestedSize +func getReplicatedStoragePoolForExtract(ctx context.Context, cl client.Client, rspName string) (*snc.ReplicatedStoragePool, error) { + rsp := &snc.ReplicatedStoragePool{} + err := cl.Get(ctx, client.ObjectKey{Name: rspName}, rsp) + if err != nil { + return nil, fmt.Errorf("unable to get ReplicatedStoragePool %s: %w", rspName, err) + } + return rsp, nil +} + +// Get LVMVolumeGroups from StorageClasses +// +// Return: map[scName]LVMVolumeGroups +func GetLVGsFromStorageClasses(scs map[string]*storagev1.StorageClass) (map[string]LVMVolumeGroups, error) { + result := make(map[string]LVMVolumeGroups, len(scs)) + + for _, sc := range scs { + lvgs, err := ExtractLVGsFromSC(sc) + if err != nil { + return nil, err + } + + result[sc.Name] = append(result[sc.Name], lvgs...) + } + + return result, nil +} + +// Extract LVMVolumeGroups from StorageClass +func ExtractLVGsFromSC(sc *storagev1.StorageClass) (LVMVolumeGroups, error) { + var lvmVolumeGroups LVMVolumeGroups + err := yaml.Unmarshal([]byte(sc.Parameters[consts.LVMVolumeGroupsParamKey]), &lvmVolumeGroups) + if err != nil { + return nil, err + } + return lvmVolumeGroups, nil +} + +// Remove LVMVolumeGroups, which are not used in StorageClasses +// +// Params: +// lvgs - all LVMVolumeGroups in the cache; +// scsLVGs - LVMVolumeGroups for each StorageClass +// +// Return: map[lvgName]*snc.LVMVolumeGroup +func RemoveUnusedLVGs(lvgs map[string]*snc.LVMVolumeGroup, scsLVGs map[string]LVMVolumeGroups) map[string]*snc.LVMVolumeGroup { + result := make(map[string]*snc.LVMVolumeGroup, len(lvgs)) + usedLvgs := make(map[string]struct{}, len(lvgs)) + + for _, scLvgs := range scsLVGs { + for _, lvg := range scLvgs { + usedLvgs[lvg.Name] = struct{}{} + } + } + + for _, lvg := range lvgs { + if _, used := usedLvgs[lvg.Name]; used { + result[lvg.Name] = lvg + } + } + + return result +} + +// Params: +// lvgs - LVMVolumeGroups; +// +// Return: map[nodeName][]*snc.LVMVolumeGroup +func LVMVolumeGroupsByNodeName(lvgs map[string]*snc.LVMVolumeGroup) map[string][]*snc.LVMVolumeGroup { + sorted := make(map[string][]*snc.LVMVolumeGroup, len(lvgs)) + for _, lvg := range lvgs { + for _, node := range lvg.Status.Nodes { + sorted[node.Name] = append(sorted[node.Name], lvg) + } + } + + return sorted +} + +// Params: +// nodeLVGs - LVMVolumeGroups on the node; +// scLVGs - LVMVolumeGroups for the Storage Class; +// +// Return: *LVMVolumeGroup +// Example: +// +// { +// "name": "vg0", +// "status": { +// "nodes": ["node1", "node2"], +// }, +// } +func findMatchedLVG(nodeLVGs []*snc.LVMVolumeGroup, scLVGs LVMVolumeGroups) *LVMVolumeGroup { + nodeLVGNames := make(map[string]struct{}, len(nodeLVGs)) + for _, lvg := range nodeLVGs { + nodeLVGNames[lvg.Name] = struct{}{} + } + + for _, lvg := range scLVGs { + if _, match := nodeLVGNames[lvg.Name]; match { + return &lvg + } + } + + return nil +} + +// Params: +// thinPools - ThinPools of the LVMVolumeGroup; +// name - name of the ThinPool to find; +// +// Return: *snc.LVMVolumeGroupThinPoolStatus +// Example: +// +// { +// "name": "tp0", +// "availableSpace": 100, +// } +func findMatchedThinPool(thinPools []snc.LVMVolumeGroupThinPoolStatus, name string) *snc.LVMVolumeGroupThinPoolStatus { + for _, tp := range thinPools { + if tp.Name == name { + return &tp + } + } + + return nil +} + +// LVGSpaceInfo contains information about available space in LVG +type LVGSpaceInfo struct { + AvailableSpace int64 // available space considering reservations + TotalSize int64 // total LVG size +} + +// getLVGAvailableSpace gets available space in LVG considering reservations +// Works directly with LVG, without node binding +// +// Params: +// - schedulerCache - scheduler cache +// - lvg - LVMVolumeGroup from cache +// - deviceType - device type ("Thick" or "Thin") +// - thinPoolName - thin pool name (required for thin, can be empty for thick) +// +// Return: +// - LVGSpaceInfo with available space information +// - error if an error occurred +func getLVGAvailableSpace( + schedulerCache *cache.Cache, + lvg *snc.LVMVolumeGroup, + deviceType string, + thinPoolName string, +) (LVGSpaceInfo, error) { + var availableSpace int64 + var totalSize int64 + + switch deviceType { + case consts.Thick: + freeSpace := lvg.Status.VGFree.Value() + reserved, err := schedulerCache.GetLVGThickReservedSpace(lvg.Name) + if err != nil { + return LVGSpaceInfo{}, fmt.Errorf("unable to get reserved space for LVG %s: %w", lvg.Name, err) + } + availableSpace = freeSpace - reserved + totalSize = lvg.Status.VGSize.Value() + + case consts.Thin: + if thinPoolName == "" { + return LVGSpaceInfo{}, fmt.Errorf("thinPoolName is required for thin volumes") + } + + thinPool := findMatchedThinPool(lvg.Status.ThinPools, thinPoolName) + if thinPool == nil { + return LVGSpaceInfo{}, fmt.Errorf("thin pool %s not found in LVG %s", thinPoolName, lvg.Name) + } + + freeSpace := thinPool.AvailableSpace.Value() + reserved, err := schedulerCache.GetLVGThinReservedSpace(lvg.Name, thinPoolName) + if err != nil { + return LVGSpaceInfo{}, fmt.Errorf("unable to get reserved space for thin pool %s: %w", thinPoolName, err) + } + availableSpace = freeSpace - reserved + totalSize = lvg.Status.VGSize.Value() + + default: + return LVGSpaceInfo{}, fmt.Errorf("unknown device type: %s", deviceType) + } + + return LVGSpaceInfo{ + AvailableSpace: availableSpace, + TotalSize: totalSize, + }, nil +} + +// checkLVGHasSpace checks if LVG has enough space for the requested size +// Works directly with LVG, without node binding +// +// Params: +// - schedulerCache - scheduler cache +// - lvg - LVMVolumeGroup from cache +// - deviceType - device type ("Thick" or "Thin") +// - thinPoolName - thin pool name (required for thin, can be empty for thick) +// - requestedSize - requested size in bytes +// +// Return: +// - true if there is enough space +// - error if an error occurred +func checkLVGHasSpace( + schedulerCache *cache.Cache, + lvg *snc.LVMVolumeGroup, + deviceType string, + thinPoolName string, + requestedSize int64, +) (bool, error) { + spaceInfo, err := getLVGAvailableSpace(schedulerCache, lvg, deviceType, thinPoolName) + if err != nil { + return false, err + } + + return spaceInfo.AvailableSpace >= requestedSize, nil +} + +// calculateLVGScore calculates score for LVG based on available space +// Uses the same logic as getFreeSpaceLeftPercent and getNodeScore from prioritize.go +// Works directly with LVG, without node binding +// +// Params: +// - schedulerCache - scheduler cache +// - lvg - LVMVolumeGroup from cache +// - deviceType - device type ("Thick" or "Thin") +// - thinPoolName - thin pool name (required for thin, can be empty for thick) +// - requestedSize - requested size in bytes +// - divisor - divisor for score calculation +// +// Return: +// - score for LVG (1-10) +// - error if an error occurred +func calculateLVGScore( + schedulerCache *cache.Cache, + lvg *snc.LVMVolumeGroup, + deviceType string, + thinPoolName string, + requestedSize int64, + divisor float64, +) (int, error) { + spaceInfo, err := getLVGAvailableSpace(schedulerCache, lvg, deviceType, thinPoolName) + if err != nil { + return 0, err + } + + // Use the same logic as in prioritize.go + freeSpaceLeft := getFreeSpaceLeftPercent(spaceInfo.AvailableSpace, requestedSize, spaceInfo.TotalSize) + score := getNodeScore(freeSpaceLeft, divisor) + + return score, nil +} + +// getFreeSpaceLeftPercent calculates the percentage of free space left after placing the requested volume +func getFreeSpaceLeftPercent(freeSize, requestedSpace, totalSize int64) int64 { + leftFreeSize := freeSize - requestedSpace + fraction := float64(leftFreeSize) / float64(totalSize) + percent := fraction * 100 + return int64(percent) +} + +// getNodeScore calculates score based on free space +func getNodeScore(freeSpace int64, divisor float64) int { + converted := int(math.Round(math.Log2(float64(freeSpace) / divisor))) + switch { + case converted < 1: + return 1 + case converted > 10: + return 10 + default: + return converted + } +} diff --git a/images/sds-common-scheduler-extender/pkg/scheduler/func_test.go b/images/sds-common-scheduler-extender/pkg/scheduler/func_test.go new file mode 100644 index 000000000..e1ec7957c --- /dev/null +++ b/images/sds-common-scheduler-extender/pkg/scheduler/func_test.go @@ -0,0 +1,246 @@ +/* +Copyright 2025 Flant JSC + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package scheduler + +import ( + "context" + "testing" + + corev1 "k8s.io/api/core/v1" + storagev1 "k8s.io/api/storage/v1" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "k8s.io/apimachinery/pkg/runtime" + "k8s.io/client-go/kubernetes/scheme" + "sigs.k8s.io/controller-runtime/pkg/client/fake" + + "github.com/deckhouse/sds-node-configurator/images/sds-common-scheduler-extender/pkg/logger" +) + +func TestShouldProcessPod(t *testing.T) { + log := logger.Logger{} + ctx := context.Background() + + tt := []struct { + name string + pod *corev1.Pod + objects []runtime.Object + targetProvisioner string + expectedShouldProcess bool + expectedError bool + }{ + { + name: "Provisioner in PVC annotations", + pod: &corev1.Pod{ + ObjectMeta: metav1.ObjectMeta{ + Name: "pod1", + Namespace: "default", + }, + Spec: corev1.PodSpec{ + Volumes: []corev1.Volume{ + { + Name: "volume1", + VolumeSource: corev1.VolumeSource{ + PersistentVolumeClaim: &corev1.PersistentVolumeClaimVolumeSource{ + ClaimName: "pvc1", + }, + }, + }, + }, + }, + }, + objects: []runtime.Object{ + &corev1.PersistentVolumeClaim{ + ObjectMeta: metav1.ObjectMeta{ + Name: "pvc1", + Namespace: "default", + Annotations: map[string]string{ + "volume.beta.kubernetes.io/storage-provisioner": "my-provisioner", + }, + }, + }, + }, + targetProvisioner: "my-provisioner", + expectedShouldProcess: true, + }, + { + name: "Provisioner in StorageClass", + pod: &corev1.Pod{ + ObjectMeta: metav1.ObjectMeta{ + Name: "pod2", + Namespace: "default", + }, + Spec: corev1.PodSpec{ + Volumes: []corev1.Volume{ + { + Name: "volume2", + VolumeSource: corev1.VolumeSource{ + PersistentVolumeClaim: &corev1.PersistentVolumeClaimVolumeSource{ + ClaimName: "pvc2", + }, + }, + }, + }, + }, + }, + objects: []runtime.Object{ + &corev1.PersistentVolumeClaim{ + ObjectMeta: metav1.ObjectMeta{ + Name: "pvc2", + Namespace: "default", + }, + Spec: corev1.PersistentVolumeClaimSpec{ + StorageClassName: stringPtr("sc2"), + }, + }, + &storagev1.StorageClass{ + ObjectMeta: metav1.ObjectMeta{ + Name: "sc2", + }, + Provisioner: "my-provisioner", + }, + }, + targetProvisioner: "my-provisioner", + expectedShouldProcess: true, + }, + { + name: "Provisioner in PV", + pod: &corev1.Pod{ + ObjectMeta: metav1.ObjectMeta{ + Name: "pod3", + Namespace: "default", + }, + Spec: corev1.PodSpec{ + Volumes: []corev1.Volume{ + { + Name: "volume3", + VolumeSource: corev1.VolumeSource{ + PersistentVolumeClaim: &corev1.PersistentVolumeClaimVolumeSource{ + ClaimName: "pvc3", + }, + }, + }, + }, + }, + }, + objects: []runtime.Object{ + &corev1.PersistentVolumeClaim{ + ObjectMeta: metav1.ObjectMeta{ + Name: "pvc3", + Namespace: "default", + }, + Spec: corev1.PersistentVolumeClaimSpec{ + VolumeName: "pv3", + }, + }, + &corev1.PersistentVolume{ + ObjectMeta: metav1.ObjectMeta{ + Name: "pv3", + }, + Spec: corev1.PersistentVolumeSpec{ + PersistentVolumeSource: corev1.PersistentVolumeSource{ + CSI: &corev1.CSIPersistentVolumeSource{ + Driver: "my-provisioner", + }, + }, + }, + }, + }, + targetProvisioner: "my-provisioner", + expectedShouldProcess: true, + }, + { + name: "No matching Provisioner", + pod: &corev1.Pod{ + ObjectMeta: metav1.ObjectMeta{ + Name: "pod4", + Namespace: "default", + }, + Spec: corev1.PodSpec{ + Volumes: []corev1.Volume{ + { + Name: "volume4", + VolumeSource: corev1.VolumeSource{ + PersistentVolumeClaim: &corev1.PersistentVolumeClaimVolumeSource{ + ClaimName: "pvc4", + }, + }, + }, + }, + }, + }, + objects: []runtime.Object{ + &corev1.PersistentVolumeClaim{ + ObjectMeta: metav1.ObjectMeta{ + Name: "pvc4", + Namespace: "default", + }, + }, + }, + targetProvisioner: "my-provisioner", + expectedShouldProcess: false, + }, + { + name: "Error getting PVC", + pod: &corev1.Pod{ + ObjectMeta: metav1.ObjectMeta{ + Name: "pod5", + Namespace: "default", + }, + Spec: corev1.PodSpec{ + Volumes: []corev1.Volume{ + { + Name: "volume5", + VolumeSource: corev1.VolumeSource{ + PersistentVolumeClaim: &corev1.PersistentVolumeClaimVolumeSource{ + ClaimName: "nonexistent-pvc", + }, + }, + }, + }, + }, + }, + objects: []runtime.Object{}, + targetProvisioner: "my-provisioner", + expectedShouldProcess: false, + expectedError: true, + }, + } + + for _, tc := range tt { + t.Run(tc.name, func(t *testing.T) { + s := scheme.Scheme + _ = corev1.AddToScheme(s) + _ = storagev1.AddToScheme(s) + + cl := fake.NewFakeClient(tc.objects...) + targetProvisioners := []string{tc.targetProvisioner} + managedPVCs, err := getManagedPVCsFromPod(ctx, cl, log, tc.pod, targetProvisioners) + if (err != nil) != tc.expectedError { + t.Fatalf("Unexpected error: %v", err) + } + + shouldProcess := len(managedPVCs) > 0 + if shouldProcess != tc.expectedShouldProcess { + t.Errorf("Expected shouldProcess to be %v, but got %v", tc.expectedShouldProcess, shouldProcess) + } + }) + } +} + +func stringPtr(s string) *string { + return &s +} diff --git a/images/sds-common-scheduler-extender/pkg/scheduler/prioritize.go b/images/sds-common-scheduler-extender/pkg/scheduler/prioritize.go new file mode 100644 index 000000000..522e4ea29 --- /dev/null +++ b/images/sds-common-scheduler-extender/pkg/scheduler/prioritize.go @@ -0,0 +1,375 @@ +/* +Copyright 2025 Flant JSC + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package scheduler + +import ( + "context" + "encoding/json" + "errors" + "fmt" + "net/http" + "sync" + + corev1 "k8s.io/api/core/v1" + storagev1 "k8s.io/api/storage/v1" + "k8s.io/apimachinery/pkg/api/resource" + "sigs.k8s.io/controller-runtime/pkg/client" + + snc "github.com/deckhouse/sds-node-configurator/api/v1alpha1" + "github.com/deckhouse/sds-node-configurator/images/sds-common-scheduler-extender/pkg/cache" + "github.com/deckhouse/sds-node-configurator/images/sds-common-scheduler-extender/pkg/consts" + "github.com/deckhouse/sds-node-configurator/images/sds-common-scheduler-extender/pkg/logger" +) + +func (s *scheduler) prioritize(w http.ResponseWriter, r *http.Request) { + servingLog := logger.WithTraceIDLogger(r.Context(), s.log).WithName("prioritize") + + servingLog.Debug("starts the serving the request") + + var inputData ExtenderArgs + reader := http.MaxBytesReader(w, r.Body, 10<<20) // 10MB + err := json.NewDecoder(reader).Decode(&inputData) + if err != nil { + servingLog.Error(err, "unable to decode a request") + http.Error(w, "internal server error", http.StatusInternalServerError) + return + } + servingLog.Trace(fmt.Sprintf("input data: %+v", inputData)) + + if inputData.Pod == nil { + servingLog.Error(errors.New("no pod in the request"), "unable to get a Pod from the request") + http.Error(w, "bad request", http.StatusBadRequest) + return + } + + servingLog = servingLog.WithValues("Pod", fmt.Sprintf("%s/%s", inputData.Pod.Namespace, inputData.Pod.Name)) + + nodeNames, err := getNodeNames(inputData) + if err != nil { + servingLog.Error(err, "unable to get node names from the request") + http.Error(w, "bad request", http.StatusBadRequest) + return + } + servingLog.Trace(fmt.Sprintf("NodeNames from the request: %+v", nodeNames)) + + managedPVCs, err := getManagedPVCsFromPod(s.ctx, s.client, servingLog, inputData.Pod, s.targetProvisioners) + if err != nil { + servingLog.Error(err, "unable to get managed PVCs from the Pod") + http.Error(w, "internal server error", http.StatusInternalServerError) + return + } + if len(managedPVCs) == 0 { + servingLog.Debug("Pod uses PVCs which are not managed by our modules. Return the same nodes") + if err := writeNodeScoresResponse(w, servingLog, nodeNames, 0); err != nil { + servingLog.Error(err, "unable to write node names response") + http.Error(w, "internal server error", http.StatusInternalServerError) + } + return + } + for _, pvc := range managedPVCs { + servingLog.Trace(fmt.Sprintf("managed PVC: %s", pvc.Name)) + } + + scUsedByPVCs, err := getStorageClassesUsedByPVCs(s.ctx, s.client, managedPVCs) + if err != nil { + servingLog.Error(err, "unable to get StorageClasses from the PVC") + http.Error(w, "internal server error", http.StatusInternalServerError) + return + } + for _, sc := range scUsedByPVCs { + servingLog.Trace(fmt.Sprintf("Pod uses StorageClass: %s", sc.Name)) + } + if len(scUsedByPVCs) != len(managedPVCs) { + servingLog.Error(errors.New("number of StorageClasses does not match the number of PVCs"), "unable to get StorageClasses from the PVC") + http.Error(w, "internal server error", http.StatusInternalServerError) + return + } + + servingLog.Debug("starts to extract PVC requested sizes") + pvcRequests, err := extractRequestedSize(s.ctx, s.client, servingLog, managedPVCs, scUsedByPVCs) + if err != nil { + servingLog.Error(err, "unable to extract request size") + http.Error(w, "internal server error", http.StatusInternalServerError) + return + } + if len(pvcRequests) == 0 { + servingLog.Debug("No PVC requests found. Return the same nodes with 0 score") + if err := writeNodeScoresResponse(w, servingLog, nodeNames, 0); err != nil { + servingLog.Error(err, "unable to write node scores response") + http.Error(w, "internal server error", http.StatusInternalServerError) + } + return + } + servingLog.Trace(fmt.Sprintf("PVC requests: %+v", pvcRequests)) + servingLog.Debug("successfully extracted the PVC requested sizes") + + servingLog.Debug("starts to score the nodes for Pod") + // TODO: In future, retrieve replica locations from DRBD/Linstor for replicated PVCs + // For now, pass empty map as we don't have replica information yet + replicaLocations := make(map[string][]string) + + scoredNodes, err := scoreNodes(servingLog, s.ctx, s.client, s.cache, &nodeNames, managedPVCs, scUsedByPVCs, pvcRequests, replicaLocations, s.defaultDivisor) + if err != nil { + servingLog.Error(err, "unable to score nodes") + http.Error(w, "internal server error", http.StatusInternalServerError) + return + } + servingLog.Debug("successfully scored the nodes for Pod") + + // Log response body at DEBUG level + responseJSON, err := json.Marshal(scoredNodes) + if err != nil { + servingLog.Error(err, "unable to marshal response") + http.Error(w, "internal server error", http.StatusInternalServerError) + return + } + servingLog.Debug(fmt.Sprintf("response: %s", string(responseJSON))) + + w.Header().Set("content-type", "application/json") + _, err = w.Write(responseJSON) + if err != nil { + servingLog.Error(err, "unable to write response") + http.Error(w, "internal server error", http.StatusInternalServerError) + return + } + + servingLog.Debug("ends the serving the request") +} + +func writeNodeScoresResponse(w http.ResponseWriter, log logger.Logger, nodeNames []string, score int) error { + scores := make([]HostPriority, 0, len(nodeNames)) + for _, nodeName := range nodeNames { + scores = append(scores, HostPriority{ + Host: nodeName, + Score: score, + }) + } + log.Trace(fmt.Sprintf("node scores: %+v", scores)) + + // Log response body at DEBUG level + responseJSON, err := json.Marshal(scores) + if err != nil { + return err + } + log.Debug(fmt.Sprintf("response: %s", string(responseJSON))) + + w.Header().Set("content-type", "application/json") + if _, err := w.Write(responseJSON); err != nil { + return err + } + return nil +} + +func scoreNodes( + log logger.Logger, + ctx context.Context, + cl client.Client, + schedulerCache *cache.Cache, + nodeNames *[]string, + managedPVCs map[string]*corev1.PersistentVolumeClaim, + scUsedByPVCs map[string]*storagev1.StorageClass, + pvcRequests map[string]PVCRequest, + replicaLocations map[string][]string, + divisor float64, +) ([]HostPriority, error) { + allLVGs := schedulerCache.GetAllLVG() + for _, lvg := range allLVGs { + log.Trace(fmt.Sprintf("[scoreNodes] LVMVolumeGroup %s in the cache", lvg.Name)) + } + + // Separate PVCs by provisioner + localPVCs := filterPVCsByProvisioner(managedPVCs, scUsedByPVCs, consts.SdsLocalVolumeProvisioner) + replicatedPVCs := filterPVCsByProvisioner(managedPVCs, scUsedByPVCs, consts.SdsReplicatedVolumeProvisioner) + + log.Debug(fmt.Sprintf("[scoreNodes] local PVCs count: %d, replicated PVCs count: %d", len(localPVCs), len(replicatedPVCs))) + + // Get LVGs from StorageClasses only for local PVCs + var scLVGs map[string]LVMVolumeGroups + var usedLVGs map[string]*snc.LVMVolumeGroup + var nodeLVGs map[string][]*snc.LVMVolumeGroup + var err error + + if len(localPVCs) > 0 { + log.Debug("[scoreNodes] starts to get LVMVolumeGroups for local Storage Classes") + localSCs := make(map[string]*storagev1.StorageClass) + for _, pvc := range localPVCs { + if pvc.Spec.StorageClassName != nil { + if sc, exists := scUsedByPVCs[*pvc.Spec.StorageClassName]; exists { + localSCs[sc.Name] = sc + } + } + } + + scLVGs, err = GetLVGsFromStorageClasses(localSCs) + if err != nil { + return nil, err + } + log.Debug("[scoreNodes] successfully got LVMVolumeGroups for local Storage Classes") + for scName, lvmVolumeGroups := range scLVGs { + for _, lvg := range lvmVolumeGroups { + log.Trace(fmt.Sprintf("[scoreNodes] LVMVolumeGroup %s belongs to Storage Class %s", lvg.Name, scName)) + } + } + + usedLVGs = RemoveUnusedLVGs(allLVGs, scLVGs) + for lvgName := range usedLVGs { + log.Trace(fmt.Sprintf("[scoreNodes] used LVMVolumeGroup %s", lvgName)) + } + + nodeLVGs = LVMVolumeGroupsByNodeName(usedLVGs) + for n, ls := range nodeLVGs { + for _, l := range ls { + log.Trace(fmt.Sprintf("[scoreNodes] the LVMVolumeGroup %s belongs to node %s", l.Name, n)) + } + } + } + + result := make([]HostPriority, 0, len(*nodeNames)) + resultMtx := &sync.Mutex{} + wg := &sync.WaitGroup{} + wg.Add(len(*nodeNames)) + errs := make(chan error, len(managedPVCs)*len(*nodeNames)) + + for i, nodeName := range *nodeNames { + go func(i int, nodeName string) { + log.Trace(fmt.Sprintf("[scoreNodes] goroutine %d starts the work for the node %s", i, nodeName)) + defer func() { + log.Trace(fmt.Sprintf("[scoreNodes] goroutine %d ends the work for the node %s", i, nodeName)) + wg.Done() + }() + + var totalScore int64 + pvcCount := 0 + + // === Score LOCAL PVCs === + if len(localPVCs) > 0 { + lvgsFromNode := nodeLVGs[nodeName] + for _, pvc := range localPVCs { + pvcReq := pvcRequests[pvc.Name] + lvgsFromSC := scLVGs[*pvc.Spec.StorageClassName] + commonLVG := findMatchedLVG(lvgsFromNode, lvgsFromSC) + if commonLVG == nil { + log.Debug(fmt.Sprintf("[scoreNodes] unable to match local LVG for SC %s on node %s, scoring 0", *pvc.Spec.StorageClassName, nodeName)) + pvcCount++ + continue + } + log.Trace(fmt.Sprintf("[scoreNodes] LVMVolumeGroup %s is common for storage class %s and node %s", commonLVG.Name, *pvc.Spec.StorageClassName, nodeName)) + + // Use common function to get available space in LVG + lvg := allLVGs[commonLVG.Name] + spaceInfo, err := getLVGAvailableSpace(schedulerCache, lvg, pvcReq.DeviceType, commonLVG.Thin.PoolName) + if err != nil { + log.Error(err, fmt.Sprintf("[scoreNodes] unable to get available space for LVG %s", lvg.Name)) + pvcCount++ + continue + } + + log.Trace(fmt.Sprintf("[scoreNodes] LVMVolumeGroup %s available space: %s, total size: %s", lvg.Name, resource.NewQuantity(spaceInfo.AvailableSpace, resource.BinarySI), resource.NewQuantity(spaceInfo.TotalSize, resource.BinarySI))) + totalScore += getFreeSpaceLeftPercent(spaceInfo.AvailableSpace, pvcReq.RequestedSize, spaceInfo.TotalSize) + pvcCount++ + } + } + + // === Score REPLICATED PVCs === + if len(replicatedPVCs) > 0 { + for _, pvc := range replicatedPVCs { + pvcReq := pvcRequests[pvc.Name] + sc := scUsedByPVCs[*pvc.Spec.StorageClassName] + + // Get RSC (name = SC name) + rsc, err := getReplicatedStorageClass(ctx, cl, sc.Name) + if err != nil { + log.Error(err, fmt.Sprintf("[scoreNodes] unable to get RSC for SC %s", sc.Name)) + pvcCount++ + continue + } + + // Get RSP + rsp, err := getReplicatedStoragePool(ctx, cl, rsc.Spec.StoragePool) + if err != nil { + log.Error(err, fmt.Sprintf("[scoreNodes] unable to get RSP %s", rsc.Spec.StoragePool)) + pvcCount++ + continue + } + + volumeAccess := rsc.Spec.VolumeAccess + if volumeAccess == "" { + volumeAccess = consts.VolumeAccessPreferablyLocal + } + + // For Bound PVC with volumeAccess != Local/EventuallyLocal: + // score 0 if no LVG or no space (to prefer nodes with actual storage over diskless) + if pvc.Status.Phase == corev1.ClaimBound && + volumeAccess != consts.VolumeAccessLocal && + volumeAccess != consts.VolumeAccessEventuallyLocal { + + hasLVGAndSpace, _ := checkNodeHasLVGWithSpaceForReplicated(log, schedulerCache, nodeName, rsp, pvcReq.RequestedSize) + if !hasLVGAndSpace { + log.Debug(fmt.Sprintf("[scoreNodes] node %s has no LVG/space for replicated PVC %s, scoring 0 for this PVC", nodeName, pvc.Name)) + // pvcScore = 0, just increment count + pvcCount++ + continue + } + } + + // Calculate score based on available space + pvcScore := calculateReplicatedPVCScore(log, schedulerCache, nodeName, rsp, pvcReq, divisor) + totalScore += pvcScore + pvcCount++ + } + } + + // Calculate replica bonus + replicaBonus := calculateReplicaBonus(nodeName, managedPVCs, replicaLocations) + totalScore += replicaBonus + + var averageScore int64 + if pvcCount > 0 { + averageScore = totalScore / int64(pvcCount) + } + log.Trace(fmt.Sprintf("[scoreNodes] average score for the node %s: %d (total: %d, pvcCount: %d, replicaBonus: %d)", nodeName, averageScore, totalScore, pvcCount, replicaBonus)) + score := getNodeScore(averageScore, divisor) + log.Trace(fmt.Sprintf("[scoreNodes] node %s has final score %d", nodeName, score)) + + resultMtx.Lock() + result = append(result, HostPriority{ + Host: nodeName, + Score: score, + }) + resultMtx.Unlock() + }(i, nodeName) + } + wg.Wait() + log.Debug("[scoreNodes] goroutines work is done") + if len(errs) != 0 { + for err = range errs { + log.Error(err, "[scoreNodes] an error occurs while scoring the nodes") + } + } + close(errs) + if err != nil { + return nil, err + } + + log.Trace("[scoreNodes] final result") + for _, n := range result { + log.Trace(fmt.Sprintf("[scoreNodes] host: %s", n.Host)) + log.Trace(fmt.Sprintf("[scoreNodes] score: %d", n.Score)) + } + + return result, nil +} diff --git a/images/sds-common-scheduler-extender/pkg/scheduler/prioritize_test.go b/images/sds-common-scheduler-extender/pkg/scheduler/prioritize_test.go new file mode 100644 index 000000000..480cd1b92 --- /dev/null +++ b/images/sds-common-scheduler-extender/pkg/scheduler/prioritize_test.go @@ -0,0 +1,55 @@ +/* +Copyright 2025 Flant JSC + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package scheduler + +import ( + "math" + "testing" + + "k8s.io/apimachinery/pkg/api/resource" +) + +func TestPrioritize(t *testing.T) { + t.Run("getFreeSpaceLeftPercent", func(_ *testing.T) { + requested := resource.MustParse("1Gi") + devisor := 1.0 + + totalSizeString := "327676Mi" + totalSize := resource.MustParse(totalSizeString) + allocated := resource.MustParse("211Gi") + freeSize := resource.MustParse(totalSizeString) + freeSize.Sub(allocated) + + percent := getFreeSpaceLeftPercent(freeSize.Value(), requested.Value(), totalSize.Value()) + t.Logf("First freeSpacePercent %d", percent) + + rawScore := int(math.Round(math.Log2(float64(percent) / devisor))) + t.Logf("rawScore1=%d", rawScore) + + totalSizeString2 := "327676Mi" + totalSize2 := resource.MustParse(totalSizeString2) + allocated2 := resource.MustParse("301Gi") + freeSize2 := resource.MustParse(totalSizeString2) + freeSize2.Sub(allocated2) + + percent2 := getFreeSpaceLeftPercent(freeSize2.Value(), requested.Value(), totalSize2.Value()) + t.Logf("Second freeSpacePercent2 %d", percent2) + + rawScore2 := int(math.Round(math.Log2(float64(percent2) / devisor))) + t.Logf("rawScore2=%d", rawScore2) + }) +} diff --git a/images/sds-common-scheduler-extender/pkg/scheduler/replicated.go b/images/sds-common-scheduler-extender/pkg/scheduler/replicated.go new file mode 100644 index 000000000..b7acfab3c --- /dev/null +++ b/images/sds-common-scheduler-extender/pkg/scheduler/replicated.go @@ -0,0 +1,392 @@ +/* +Copyright 2025 Flant JSC + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package scheduler + +import ( + "context" + "fmt" + "strings" + + corev1 "k8s.io/api/core/v1" + storagev1 "k8s.io/api/storage/v1" + "sigs.k8s.io/controller-runtime/pkg/client" + + snc "github.com/deckhouse/sds-node-configurator/api/v1alpha1" + "github.com/deckhouse/sds-node-configurator/images/sds-common-scheduler-extender/pkg/cache" + "github.com/deckhouse/sds-node-configurator/images/sds-common-scheduler-extender/pkg/consts" + "github.com/deckhouse/sds-node-configurator/images/sds-common-scheduler-extender/pkg/logger" +) + +// getReplicatedStorageClass retrieves RSC by SC name (they are equal) +func getReplicatedStorageClass(ctx context.Context, cl client.Client, scName string) (*snc.ReplicatedStorageClass, error) { + rsc := &snc.ReplicatedStorageClass{} + err := cl.Get(ctx, client.ObjectKey{Name: scName}, rsc) + if err != nil { + return nil, fmt.Errorf("unable to get ReplicatedStorageClass %s: %w", scName, err) + } + return rsc, nil +} + +// getReplicatedStoragePool retrieves RSP by name +func getReplicatedStoragePool(ctx context.Context, cl client.Client, rspName string) (*snc.ReplicatedStoragePool, error) { + rsp := &snc.ReplicatedStoragePool{} + err := cl.Get(ctx, client.ObjectKey{Name: rspName}, rsp) + if err != nil { + return nil, fmt.Errorf("unable to get ReplicatedStoragePool %s: %w", rspName, err) + } + return rsp, nil +} + +// getDeviceTypeFromRSP returns device type (Thick/Thin) from RSP +func getDeviceTypeFromRSP(rsp *snc.ReplicatedStoragePool) string { + switch rsp.Spec.Type { + case consts.RSPTypeLVM: + return consts.Thick + case consts.RSPTypeLVMThin: + return consts.Thin + default: + return consts.Thick + } +} + +// requiresLVGCheck returns true if volumeAccess requires LVG and space checks +func requiresLVGCheck(volumeAccess string) bool { + return volumeAccess == consts.VolumeAccessLocal || + volumeAccess == consts.VolumeAccessEventuallyLocal +} + +// isNodeInZones checks if the node is in one of the specified zones +func isNodeInZones(node *corev1.Node, zones []string) bool { + if len(zones) == 0 { + return true + } + nodeZone := node.Labels["topology.kubernetes.io/zone"] + for _, z := range zones { + if z == nodeZone { + return true + } + } + return false +} + +// hasReplicatedNodeLabel checks if node has the sds-replicated-volume-node label +func hasReplicatedNodeLabel(node *corev1.Node) bool { + _, exists := node.Labels[consts.LabelReplicatedNode] + return exists +} + +// lvgHasNode checks if the LVG belongs to the specified node +func lvgHasNode(lvg *snc.LVMVolumeGroup, nodeName string) bool { + for _, n := range lvg.Status.Nodes { + if n.Name == nodeName { + return true + } + } + return false +} + +// findLVGForNodeInRSP finds LVG from RSP that belongs to the node +func findLVGForNodeInRSP( + schedulerCache *cache.Cache, + nodeName string, + rsp *snc.ReplicatedStoragePool, +) (*snc.LVMVolumeGroup, *snc.ReplicatedStoragePoolLVG, bool) { + for i := range rsp.Spec.LvmVolumeGroups { + lvgRef := &rsp.Spec.LvmVolumeGroups[i] + lvg := schedulerCache.TryGetLVG(lvgRef.Name) + if lvg == nil { + continue + } + if lvgHasNode(lvg, nodeName) { + return lvg, lvgRef, true + } + } + return nil, nil, false +} + +// checkNodeHasLVGFromRSP checks if node has any LVG from RSP +func checkNodeHasLVGFromRSP( + schedulerCache *cache.Cache, + nodeName string, + rsp *snc.ReplicatedStoragePool, +) bool { + _, _, found := findLVGForNodeInRSP(schedulerCache, nodeName, rsp) + return found +} + +// checkNodeHasLVGWithSpaceForReplicated checks if node has LVG with enough space for replicated PVC +func checkNodeHasLVGWithSpaceForReplicated( + log logger.Logger, + schedulerCache *cache.Cache, + nodeName string, + rsp *snc.ReplicatedStoragePool, + requestedSize int64, +) (bool, string) { + deviceType := getDeviceTypeFromRSP(rsp) + + lvg, lvgRef, found := findLVGForNodeInRSP(schedulerCache, nodeName, rsp) + if !found { + return false, fmt.Sprintf("no LVG from RSP %s found on node %s", rsp.Name, nodeName) + } + + var hasSpace bool + var err error + + if deviceType == consts.Thin { + hasSpace, err = checkLVGHasSpace(schedulerCache, lvg, consts.Thin, lvgRef.ThinPoolName, requestedSize) + } else { + hasSpace, err = checkLVGHasSpace(schedulerCache, lvg, consts.Thick, "", requestedSize) + } + + if err != nil { + log.Error(err, fmt.Sprintf("unable to check space for LVG %s", lvgRef.Name)) + return false, fmt.Sprintf("error checking space for LVG %s: %v", lvgRef.Name, err) + } + + if !hasSpace { + return false, fmt.Sprintf("LVG %s on node %s does not have enough space", lvgRef.Name, nodeName) + } + + return true, "" +} + +// filterNodesByVolumeReplicas is a stub for filtering by volume replicas +// TODO: Implement replica-based filtering for Local volumeAccess with Bound PVC. +// This function should filter nodes to only include those that have volume replicas. +// For now, return all nodes unchanged. +func filterNodesByVolumeReplicas( + log logger.Logger, + nodeNames []string, + pvc *corev1.PersistentVolumeClaim, +) []string { + log.Debug(fmt.Sprintf("[filterNodesByVolumeReplicas] TODO: implement replica-based filtering for PVC %s/%s", pvc.Namespace, pvc.Name)) + return nodeNames +} + +// filterNodesByVolumeZone is a stub for filtering by volume zone +// TODO: Implement zone-based filtering for Zonal topology with Bound PVC. +// This function should filter nodes to only include those in the same zone as the volume. +// For now, return all nodes unchanged. +func filterNodesByVolumeZone( + log logger.Logger, + nodeNames []string, + pvc *corev1.PersistentVolumeClaim, + rsc *snc.ReplicatedStorageClass, +) []string { + log.Debug(fmt.Sprintf("[filterNodesByVolumeZone] TODO: implement zone-based filtering for Zonal topology, PVC %s/%s", pvc.Namespace, pvc.Name)) + return nodeNames +} + +// filterPVCsByProvisioner filters PVCs by provisioner +func filterPVCsByProvisioner( + pvcs map[string]*corev1.PersistentVolumeClaim, + scs map[string]*storagev1.StorageClass, + provisioner string, +) map[string]*corev1.PersistentVolumeClaim { + result := make(map[string]*corev1.PersistentVolumeClaim) + for name, pvc := range pvcs { + if pvc.Spec.StorageClassName == nil { + continue + } + sc, exists := scs[*pvc.Spec.StorageClassName] + if !exists { + continue + } + if sc.Provisioner == provisioner { + result[name] = pvc + } + } + return result +} + +// filterNodeForReplicatedPVCs filters node for replicated PVCs +func filterNodeForReplicatedPVCs( + log logger.Logger, + ctx context.Context, + cl client.Client, + schedulerCache *cache.Cache, + nodeName string, + node *corev1.Node, + replicatedPVCs map[string]*corev1.PersistentVolumeClaim, + scUsedByPVCs map[string]*storagev1.StorageClass, + pvcRequests map[string]PVCRequest, +) (bool, string) { + var failReasons []string + + for _, pvc := range replicatedPVCs { + sc := scUsedByPVCs[*pvc.Spec.StorageClassName] + pvcReq := pvcRequests[pvc.Name] + + // Get RSC (name = SC name) + rsc, err := getReplicatedStorageClass(ctx, cl, sc.Name) + if err != nil { + failReasons = append(failReasons, fmt.Sprintf("PVC %s: unable to get RSC: %v", pvc.Name, err)) + continue + } + + // Get RSP + rsp, err := getReplicatedStoragePool(ctx, cl, rsc.Spec.StoragePool) + if err != nil { + failReasons = append(failReasons, fmt.Sprintf("PVC %s: unable to get RSP: %v", pvc.Name, err)) + continue + } + + volumeAccess := rsc.Spec.VolumeAccess + if volumeAccess == "" { + volumeAccess = consts.VolumeAccessPreferablyLocal + } + + // === R1: Check sds-replicated-volume-node label === + if !hasReplicatedNodeLabel(node) { + failReasons = append(failReasons, fmt.Sprintf("PVC %s: node %s missing label %s", pvc.Name, nodeName, consts.LabelReplicatedNode)) + continue + } + + // === R2: Check zones from RSC === + if len(rsc.Spec.Zones) > 0 && !isNodeInZones(node, rsc.Spec.Zones) { + failReasons = append(failReasons, fmt.Sprintf("PVC %s: node %s not in zones %v", pvc.Name, nodeName, rsc.Spec.Zones)) + continue + } + + // === R3 (TODO): For Zonal topology + Bound PVC === + if rsc.Spec.Topology == consts.TopologyZonal && pvc.Status.Phase == corev1.ClaimBound { + log.Debug(fmt.Sprintf("[filterNodeForReplicatedPVCs] TODO: zone filtering for Zonal topology, pvc=%s", pvc.Name)) + // filterNodesByVolumeZone will be called later when implemented + } + + // === LVG and space checks === + switch pvc.Status.Phase { + case corev1.ClaimPending: + // Volume not yet created + if requiresLVGCheck(volumeAccess) { + ok, reason := checkNodeHasLVGWithSpaceForReplicated(log, schedulerCache, nodeName, rsp, pvcReq.RequestedSize) + if !ok { + failReasons = append(failReasons, fmt.Sprintf("PVC %s: %s", pvc.Name, reason)) + } + } + + case corev1.ClaimBound: + // Volume already created + switch volumeAccess { + case consts.VolumeAccessLocal: + // TODO: Exclude all nodes except those where replicas exist + log.Debug(fmt.Sprintf("[filterNodeForReplicatedPVCs] TODO: filter by replicas for Local, pvc=%s", pvc.Name)) + + case consts.VolumeAccessEventuallyLocal: + ok, reason := checkNodeHasLVGWithSpaceForReplicated(log, schedulerCache, nodeName, rsp, pvcReq.RequestedSize) + if !ok { + failReasons = append(failReasons, fmt.Sprintf("PVC %s: %s", pvc.Name, reason)) + } + + default: + // PreferablyLocal, Any - do not check LVG + } + } + } + + if len(failReasons) > 0 { + return false, strings.Join(failReasons, "; ") + } + return true, "" +} + +// calculateReplicatedPVCScore calculates score for replicated PVC +func calculateReplicatedPVCScore( + log logger.Logger, + schedulerCache *cache.Cache, + nodeName string, + rsp *snc.ReplicatedStoragePool, + pvcReq PVCRequest, + divisor float64, +) int64 { + deviceType := getDeviceTypeFromRSP(rsp) + + lvg, lvgRef, found := findLVGForNodeInRSP(schedulerCache, nodeName, rsp) + if !found { + return 0 + } + + var thinPoolName string + if deviceType == consts.Thin { + thinPoolName = lvgRef.ThinPoolName + } + + score, err := calculateLVGScore(schedulerCache, lvg, deviceType, thinPoolName, pvcReq.RequestedSize, divisor) + if err != nil { + log.Error(err, fmt.Sprintf("unable to calculate score for LVG %s", lvgRef.Name)) + return 0 + } + + return int64(score) +} + +// calculateReplicaBonus calculates bonus for replicas on the node +func calculateReplicaBonus( + nodeName string, + managedPVCs map[string]*corev1.PersistentVolumeClaim, + replicaLocations map[string][]string, +) int64 { + var bonus int64 + + for pvcName := range managedPVCs { + nodeList, exists := replicaLocations[pvcName] + if !exists { + continue + } + + for _, n := range nodeList { + if n == nodeName { + bonus += 10 // 10 points for each replica on the node + break + } + } + } + + return bonus +} + +// getNodes retrieves nodes by names +func getNodes(ctx context.Context, cl client.Client, nodeNames []string) (map[string]*corev1.Node, error) { + nodes := make(map[string]*corev1.Node, len(nodeNames)) + for _, nodeName := range nodeNames { + node := &corev1.Node{} + err := cl.Get(ctx, client.ObjectKey{Name: nodeName}, node) + if err != nil { + return nil, fmt.Errorf("unable to get node %s: %w", nodeName, err) + } + nodes[nodeName] = node + } + return nodes, nil +} + +// hasReplicatedPVCs checks if there are any replicated PVCs in the map +func hasReplicatedPVCs(pvcs map[string]*corev1.PersistentVolumeClaim, scs map[string]*storagev1.StorageClass) bool { + for _, pvc := range pvcs { + if pvc.Spec.StorageClassName == nil { + continue + } + sc, exists := scs[*pvc.Spec.StorageClassName] + if !exists { + continue + } + if sc.Provisioner == consts.SdsReplicatedVolumeProvisioner { + return true + } + } + return false +} + diff --git a/images/sds-common-scheduler-extender/pkg/scheduler/route.go b/images/sds-common-scheduler-extender/pkg/scheduler/route.go new file mode 100644 index 000000000..fba485ab9 --- /dev/null +++ b/images/sds-common-scheduler-extender/pkg/scheduler/route.go @@ -0,0 +1,254 @@ +/* +Copyright 2025 Flant JSC + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package scheduler + +import ( + "context" + "fmt" + "net/http" + "os" + "strings" + + "k8s.io/apimachinery/pkg/api/resource" + "sigs.k8s.io/controller-runtime/pkg/client" + + "github.com/deckhouse/sds-node-configurator/images/sds-common-scheduler-extender/pkg/cache" + "github.com/deckhouse/sds-node-configurator/images/sds-common-scheduler-extender/pkg/consts" + "github.com/deckhouse/sds-node-configurator/images/sds-common-scheduler-extender/pkg/logger" +) + +const ( + envTargetProvisioners = "TARGET_PROVISIONERS" +) + +type scheduler struct { + defaultDivisor float64 + log logger.Logger + client client.Client + ctx context.Context + cache *cache.Cache + targetProvisioners []string + filterRequestCount int + prioritizeRequestCount int +} + +func (s *scheduler) ServeHTTP(w http.ResponseWriter, r *http.Request) { + // Get logger with trace ID from request context + requestLog := logger.WithTraceIDLogger(r.Context(), s.log) + + switch r.URL.Path { + case "/scheduler/filter": + s.filterRequestCount++ + requestLog.Debug("[ServeHTTP] filter route starts handling the request") + s.filter(w, r) + requestLog.Debug("[ServeHTTP] filter route ends handling the request") + case "/scheduler/prioritize": + s.prioritizeRequestCount++ + requestLog.Debug("[ServeHTTP] prioritize route starts handling the request") + s.prioritize(w, r) + requestLog.Debug("[ServeHTTP] prioritize route ends handling the request") + case "/api/v1/volumes/filter-prioritize": + requestLog.Debug("[ServeHTTP] filter-prioritize route starts handling the request") + s.filterAndPrioritize(w, r) + requestLog.Debug("[ServeHTTP] filter-prioritize route ends handling the request") + case "/status": + requestLog.Debug("[ServeHTTP] status route starts handling the request") + status(w, r) + requestLog.Debug("[ServeHTTP] status route ends handling the request") + case "/cache": + requestLog.Debug("[ServeHTTP] cache route starts handling the request") + s.getCache(w, r) + requestLog.Debug("[ServeHTTP] cache route ends handling the request") + case "/stat": + requestLog.Debug("[ServeHTTP] stat route starts handling the request") + s.getCacheStat(w, r) + requestLog.Debug("[ServeHTTP] stat route ends handling the request") + default: + http.Error(w, "not found", http.StatusNotFound) + } +} + +// getTargetProvisioners reads target provisioners from environment variable. +// If TARGET_PROVISIONERS is not set, returns default provisioners. +// The environment variable can contain comma-separated list of provisioners. +func getTargetProvisioners(log logger.Logger) []string { + envValue := os.Getenv(envTargetProvisioners) + if envValue == "" { + // Return default provisioners if environment variable is not set + defaultProvisioners := []string{consts.SdsLocalVolumeProvisioner, consts.SdsReplicatedVolumeProvisioner} + log.Info(fmt.Sprintf("TARGET_PROVISIONERS environment variable is not set, using default provisioners: %v", defaultProvisioners)) + return defaultProvisioners + } + + // Parse comma-separated provisioners + provisioners := strings.Split(envValue, ",") + result := make([]string, 0, len(provisioners)) + for _, p := range provisioners { + trimmed := strings.TrimSpace(p) + if trimmed != "" { + result = append(result, trimmed) + } + } + + if len(result) == 0 { + // Fallback to default if parsing resulted in empty list + defaultProvisioners := []string{consts.SdsLocalVolumeProvisioner, consts.SdsReplicatedVolumeProvisioner} + log.Warning(fmt.Sprintf("TARGET_PROVISIONERS environment variable is set but empty after parsing, using default provisioners: %v", defaultProvisioners)) + return defaultProvisioners + } + + log.Info(fmt.Sprintf("Using target provisioners from TARGET_PROVISIONERS environment variable: %v", result)) + return result +} + +// NewHandler return new http.Handler of the scheduler extender +func NewHandler(ctx context.Context, cl client.Client, log logger.Logger, lvgCache *cache.Cache, defaultDiv float64) (http.Handler, error) { + targetProvisioners := getTargetProvisioners(log) + return &scheduler{ + defaultDivisor: defaultDiv, + log: log, + client: cl, + ctx: ctx, + cache: lvgCache, + targetProvisioners: targetProvisioners, + }, nil +} + +func status(w http.ResponseWriter, _ *http.Request) { + w.WriteHeader(http.StatusOK) + _, err := w.Write([]byte("ok")) + if err != nil { + fmt.Printf("error occurs on status route, err: %s\n", err.Error()) + } +} + +func (s *scheduler) getCache(w http.ResponseWriter, r *http.Request) { + requestLog := logger.WithTraceIDLogger(r.Context(), s.log) + w.WriteHeader(http.StatusOK) + + s.cache.PrintTheCacheLog() + + lvgs := s.cache.GetAllLVG() + for _, lvg := range lvgs { + reserved, err := s.cache.GetLVGThickReservedSpace(lvg.Name) + if err != nil { + w.WriteHeader(http.StatusInternalServerError) + _, err = w.Write([]byte("unable to write the cache")) + if err != nil { + requestLog.Error(err, "error write response") + } + } + + _, err = w.Write([]byte(fmt.Sprintf("LVMVolumeGroup: %s Thick Reserved: %s\n", lvg.Name, resource.NewQuantity(reserved, resource.BinarySI).String()))) + if err != nil { + w.WriteHeader(http.StatusInternalServerError) + _, err = w.Write([]byte("unable to write the cache")) + if err != nil { + requestLog.Error(err, "error write response") + } + } + + thickPvcs, err := s.cache.GetAllThickPVCLVG(lvg.Name) + if err != nil { + w.WriteHeader(http.StatusInternalServerError) + _, err = w.Write([]byte("unable to write the cache")) + if err != nil { + requestLog.Error(err, "error write response") + } + } + for _, pvc := range thickPvcs { + _, err = w.Write([]byte(fmt.Sprintf("\t\tThick PVC: %s, reserved: %s, selected node: %s\n", pvc.Name, pvc.Spec.Resources.Requests.Storage().String(), pvc.Annotations[cache.SelectedNodeAnnotation]))) + if err != nil { + w.WriteHeader(http.StatusInternalServerError) + requestLog.Error(err, "error write response") + } + } + + for _, tp := range lvg.Status.ThinPools { + thinReserved, err := s.cache.GetLVGThinReservedSpace(lvg.Name, tp.Name) + if err != nil { + w.WriteHeader(http.StatusInternalServerError) + requestLog.Error(err, "error write response") + } + _, err = w.Write([]byte(fmt.Sprintf("\tThinPool: %s, reserved: %s\n", tp.Name, resource.NewQuantity(thinReserved, resource.BinarySI).String()))) + if err != nil { + w.WriteHeader(http.StatusInternalServerError) + requestLog.Error(err, "error write response") + } + + thinPvcs, err := s.cache.GetAllPVCFromLVGThinPool(lvg.Name, tp.Name) + if err != nil { + w.WriteHeader(http.StatusInternalServerError) + requestLog.Error(err, "error write response") + } + + for _, pvc := range thinPvcs { + _, err = w.Write([]byte(fmt.Sprintf("\t\tThin PVC: %s, reserved: %s, selected node:%s\n", pvc.Name, pvc.Spec.Resources.Requests.Storage().String(), pvc.Annotations[cache.SelectedNodeAnnotation]))) + if err != nil { + w.WriteHeader(http.StatusInternalServerError) + requestLog.Error(err, "error write response") + } + } + } + } +} + +func (s *scheduler) getCacheStat(w http.ResponseWriter, r *http.Request) { + requestLog := logger.WithTraceIDLogger(r.Context(), s.log) + w.WriteHeader(http.StatusOK) + + pvcTotalCount := 0 + var totalReserved int64 + var sb strings.Builder + lvgs := s.cache.GetAllLVG() + for _, lvg := range lvgs { + pvcs, err := s.cache.GetAllPVCForLVG(lvg.Name) + if err != nil { + requestLog.Error(err, "something bad") + } + + pvcTotalCount += len(pvcs) + + // sum thick reserved + thickReserved, err := s.cache.GetLVGThickReservedSpace(lvg.Name) + if err != nil { + requestLog.Error(err, "unable to get thick reserved space") + } + totalReserved += thickReserved + // sum thin reserved across all thin pools + for _, tp := range lvg.Status.ThinPools { + thinReserved, err := s.cache.GetLVGThinReservedSpace(lvg.Name, tp.Name) + if err != nil { + requestLog.Error(err, "unable to get thin reserved space") + continue + } + totalReserved += thinReserved + } + } + + sb.WriteString(fmt.Sprintf("Filter request count: %d, Prioritize request count: %d\n", s.filterRequestCount, s.prioritizeRequestCount)) + sb.WriteString(fmt.Sprintf("Total reserved (thick+thin) across all PVCs (%d items): %s\n", pvcTotalCount, resource.NewQuantity(totalReserved, resource.BinarySI).String())) + + _, err := w.Write([]byte(sb.String())) + if err != nil { + w.WriteHeader(http.StatusInternalServerError) + _, err = w.Write([]byte("unable to write the cache")) + if err != nil { + requestLog.Error(err, "error write response for cache stat") + } + } +} diff --git a/images/sds-common-scheduler-extender/werf.inc.yaml b/images/sds-common-scheduler-extender/werf.inc.yaml new file mode 100644 index 000000000..f90246c38 --- /dev/null +++ b/images/sds-common-scheduler-extender/werf.inc.yaml @@ -0,0 +1,62 @@ +--- +image: {{ .ModuleNamePrefix }}{{ .ImageName }}-src-artifact +fromImage: builder/src +final: false + +git: + - add: {{ .ModuleDir }} + to: /src + includePaths: + - api + - images/{{ $.ImageName }} + stageDependencies: + install: + - '**/*' + excludePaths: + - images/{{ $.ImageName }}/werf.yaml + +shell: + install: + - echo "src artifact" + +--- +image: {{ .ModuleNamePrefix }}{{ .ImageName }}-golang-artifact +fromImage: {{ eq .SVACE_ENABLED "false" | ternary "builder/golang-alpine" "builder/golang-alt-svace" }} +final: false + +import: + - image: {{ .ModuleNamePrefix }}{{ .ImageName }}-src-artifact + add: /src + to: /src + before: install + +mount: +{{ include "mount points for golang builds" . }} + +secrets: +- id: GOPROXY + value: {{ .GOPROXY }} + +shell: + setup: + - cd /src/images/{{ $.ImageName }}/cmd + - GOPROXY=$(cat /run/secrets/GOPROXY) go mod download + - export GOOS=linux GOARCH=amd64 CGO_ENABLED=0 + - | + {{- include "image-build.build" (set $ "BuildCommand" (printf `go build -ldflags="-s -w" -tags "%s" -o /%s` .MODULE_EDITION $.ImageName)) | nindent 6 }} + - chmod +x /{{ $.ImageName }} + +--- +image: {{ .ModuleNamePrefix }}{{ .ImageName }} +fromImage: base/distroless + +git: +{{- include "image mount points" . }} +import: + - image: {{ .ModuleNamePrefix }}{{ .ImageName }}-golang-artifact + add: /{{ $.ImageName }} + to: /{{ $.ImageName }} + before: install +imageSpec: + config: + entrypoint: ["/{{ $.ImageName }}"] diff --git a/openapi/values.yaml b/openapi/values.yaml index d1d8cb40d..c6d3c8696 100644 --- a/openapi/values.yaml +++ b/openapi/values.yaml @@ -28,6 +28,23 @@ properties: ca: type: string x-examples: ["YjY0ZW5jX3N0cmluZwo="] + customSchedulerExtenderCert: + type: object + default: {} + x-required-for-helm: + - crt + - key + - ca + properties: + crt: + type: string + x-examples: ["YjY0ZW5jX3N0cmluZwo="] + key: + type: string + x-examples: ["YjY0ZW5jX3N0cmluZwo="] + ca: + type: string + x-examples: ["YjY0ZW5jX3N0cmluZwo="] registry: type: object description: "System field, overwritten by Deckhouse. Don't use" diff --git a/openapi/values_ce.yaml b/openapi/values_ce.yaml index d1d8cb40d..c6d3c8696 100644 --- a/openapi/values_ce.yaml +++ b/openapi/values_ce.yaml @@ -28,6 +28,23 @@ properties: ca: type: string x-examples: ["YjY0ZW5jX3N0cmluZwo="] + customSchedulerExtenderCert: + type: object + default: {} + x-required-for-helm: + - crt + - key + - ca + properties: + crt: + type: string + x-examples: ["YjY0ZW5jX3N0cmluZwo="] + key: + type: string + x-examples: ["YjY0ZW5jX3N0cmluZwo="] + ca: + type: string + x-examples: ["YjY0ZW5jX3N0cmluZwo="] registry: type: object description: "System field, overwritten by Deckhouse. Don't use" diff --git a/templates/sds-common-scheduler-extender/configmap.yaml b/templates/sds-common-scheduler-extender/configmap.yaml new file mode 100644 index 000000000..5ed41f114 --- /dev/null +++ b/templates/sds-common-scheduler-extender/configmap.yaml @@ -0,0 +1,23 @@ +--- +apiVersion: v1 +kind: ConfigMap +metadata: + name: sds-common-scheduler-extender + namespace: d8-{{ .Chart.Name }} + {{- include "helm_lib_module_labels" (list . (dict "app" "sds-common-scheduler-extender")) | nindent 2 }} +data: + scheduler-extender-config.yaml: |- + listen: ":8099" + health-probe-bind-address: ":8081" + default-divisor: 1 +{{- if eq .Values.sdsNodeConfigurator.logLevel "ERROR" }} + log-level: "0" +{{- else if eq .Values.sdsNodeConfigurator.logLevel "WARN" }} + log-level: "1" +{{- else if eq .Values.sdsNodeConfigurator.logLevel "INFO" }} + log-level: "2" +{{- else if eq .Values.sdsNodeConfigurator.logLevel "DEBUG" }} + log-level: "3" +{{- else if eq .Values.sdsNodeConfigurator.logLevel "TRACE" }} + log-level: "4" +{{- end }} diff --git a/templates/sds-common-scheduler-extender/deployment.yaml b/templates/sds-common-scheduler-extender/deployment.yaml new file mode 100644 index 000000000..7053c522e --- /dev/null +++ b/templates/sds-common-scheduler-extender/deployment.yaml @@ -0,0 +1,130 @@ +{{- define "sds_common_scheduler_extender_resources" }} +cpu: 10m +memory: 25Mi +{{- end }} + +{{- if (.Values.global.enabledModules | has "vertical-pod-autoscaler-crd") }} +--- +apiVersion: autoscaling.k8s.io/v1 +kind: VerticalPodAutoscaler +metadata: + name: sds-common-scheduler-extender + namespace: d8-{{ .Chart.Name }} + {{- include "helm_lib_module_labels" (list . (dict "app" "sds-common-scheduler-extender")) | nindent 2 }} +spec: + targetRef: + apiVersion: "apps/v1" + kind: Deployment + name: sds-common-scheduler-extender + updatePolicy: + updateMode: "Auto" + resourcePolicy: + containerPolicies: + - containerName: sds-common-scheduler-extender + minAllowed: + {{- include "sds_common_scheduler_extender_resources" . | nindent 8 }} + maxAllowed: + memory: 40Mi + cpu: 20m +{{- end }} +--- +apiVersion: policy/v1 +kind: PodDisruptionBudget +metadata: + name: sds-common-scheduler-extender + namespace: d8-{{ .Chart.Name }} + {{- include "helm_lib_module_labels" (list . (dict "app" "sds-common-scheduler-extender" )) | nindent 2 }} +spec: + minAvailable: {{ include "helm_lib_is_ha_to_value" (list . 1 0) }} + selector: + matchLabels: + app: sds-common-scheduler-extender +--- +apiVersion: apps/v1 +kind: Deployment +metadata: + name: sds-common-scheduler-extender + namespace: d8-{{ .Chart.Name }} + {{- include "helm_lib_module_labels" (list . (dict "app" "sds-common-scheduler-extender" )) | nindent 2 }} +spec: + {{- include "helm_lib_deployment_strategy_and_replicas_for_ha" . | nindent 2 }} + revisionHistoryLimit: 2 + selector: + matchLabels: + app: sds-common-scheduler-extender + template: + metadata: + annotations: + checksum/ca: {{ .Values.sdsNodeConfigurator.internal.customSchedulerExtenderCert.ca | sha256sum | quote }} + labels: + app: sds-common-scheduler-extender + spec: + {{- include "helm_lib_priority_class" (tuple . "system-cluster-critical") | nindent 6 }} + {{- include "helm_lib_node_selector" (tuple . "system") | nindent 6 }} + {{- include "helm_lib_tolerations" (tuple . "system") | nindent 6 }} + {{- include "helm_lib_module_pod_security_context_run_as_user_nobody" . | nindent 6 }} + {{- include "helm_lib_pod_anti_affinity_for_ha" (list . (dict "app" "sds-common-scheduler-extender")) | nindent 6 }} + imagePullSecrets: + - name: {{ .Chart.Name }}-module-registry + containers: + - name: sds-common-scheduler-extender + {{- include "helm_lib_module_container_security_context_read_only_root_filesystem_capabilities_drop_all" . | nindent 10 }} + image: {{ include "helm_lib_module_image" (list . "sdsCommonSchedulerExtender") }} + imagePullPolicy: IfNotPresent + livenessProbe: + failureThreshold: 8 + httpGet: + path: /healthz + port: 8081 + scheme: HTTP + initialDelaySeconds: 10 + periodSeconds: 10 + timeoutSeconds: 15 + readinessProbe: + httpGet: + path: /readyz + port: 8081 + scheme: HTTP + periodSeconds: 1 + failureThreshold: 3 + args: + - --config=/etc/sds-common-scheduler-extender/scheduler-extender-config.yaml + env: + - name: LOG_LEVEL +{{- if eq .Values.sdsNodeConfigurator.logLevel "ERROR" }} + value: "0" +{{- else if eq .Values.sdsNodeConfigurator.logLevel "WARN" }} + value: "1" +{{- else if eq .Values.sdsNodeConfigurator.logLevel "INFO" }} + value: "2" +{{- else if eq .Values.sdsNodeConfigurator.logLevel "DEBUG" }} + value: "3" +{{- else if eq .Values.sdsNodeConfigurator.logLevel "TRACE" }} + value: "4" +{{- end }} + volumeMounts: + - name: scheduler-extender-config + mountPath: /etc/sds-common-scheduler-extender + readOnly: true + - name: scheduler-extender-certs + mountPath: /etc/sds-common-scheduler-extender/certs + readOnly: true + resources: + requests: + {{- include "helm_lib_module_ephemeral_storage_only_logs" . | nindent 14 }} + {{- if not ( .Values.global.enabledModules | has "vertical-pod-autoscaler-crd") }} + {{- include "sds_common_scheduler_extender_resources" . | nindent 14 }} + {{- end }} + ports: + - containerPort: 8099 + protocol: TCP + name: http + volumes: + - name: scheduler-extender-config + configMap: + defaultMode: 420 + name: sds-common-scheduler-extender + - name: scheduler-extender-certs + secret: + secretName: common-scheduler-extender-https-certs + serviceAccountName: sds-common-scheduler-extender diff --git a/templates/sds-common-scheduler-extender/kube-scheduler-webhook-configuration.yaml b/templates/sds-common-scheduler-extender/kube-scheduler-webhook-configuration.yaml new file mode 100644 index 000000000..527040e77 --- /dev/null +++ b/templates/sds-common-scheduler-extender/kube-scheduler-webhook-configuration.yaml @@ -0,0 +1,22 @@ +--- +apiVersion: deckhouse.io/v1alpha1 +kind: KubeSchedulerWebhookConfiguration +metadata: + name: d8-{{ .Chart.Name }} + {{- include "helm_lib_module_labels" (list . (dict "app" "sds-common-scheduler-extender")) | nindent 2 }} +webhooks: +- weight: 5 + # Do NOT change to "Fail" under any circumstances. + # + # When the scheduler-extender pod is (re)scheduled after restart/failure, the extender is not ready. + # With "Fail", kube-scheduler gets an error and refuses to schedule its own pod → pod stays Pending → extender never starts → scheduler can't schedule anything. + # Deadlock: pod never runs. + failurePolicy: Ignore + clientConfig: + service: + name: sds-common-scheduler-extender + namespace: d8-{{ .Chart.Name }} + port: 8099 + path: /scheduler + caBundle: {{ .Values.sdsNodeConfigurator.internal.customSchedulerExtenderCert.ca | b64enc }} + timeoutSeconds: 5 diff --git a/templates/sds-common-scheduler-extender/rbac-for-us.yaml b/templates/sds-common-scheduler-extender/rbac-for-us.yaml new file mode 100644 index 000000000..cfac2966a --- /dev/null +++ b/templates/sds-common-scheduler-extender/rbac-for-us.yaml @@ -0,0 +1,61 @@ +--- +apiVersion: v1 +kind: ServiceAccount +metadata: + name: sds-common-scheduler-extender + namespace: d8-{{ .Chart.Name }} + {{- include "helm_lib_module_labels" (list . (dict "app" "sds-common-scheduler-extender")) | nindent 2 }} +--- +apiVersion: rbac.authorization.k8s.io/v1 +kind: ClusterRoleBinding +metadata: + name: d8:{{ .Chart.Name }}:sds-common-scheduler-extender-kube-scheduler + {{- include "helm_lib_module_labels" (list . (dict "app" "sds-common-scheduler-extender")) | nindent 2 }} +roleRef: + apiGroup: rbac.authorization.k8s.io + kind: ClusterRole + name: system:kube-scheduler +subjects: + - kind: ServiceAccount + name: sds-common-scheduler-extender + namespace: d8-{{ .Chart.Name }} +--- +apiVersion: rbac.authorization.k8s.io/v1 +kind: ClusterRoleBinding +metadata: + name: d8:{{ .Chart.Name }}:sds-common-scheduler-extender-volume-scheduler + {{- include "helm_lib_module_labels" (list . (dict "app" "sds-common-scheduler-extender")) | nindent 2 }} +roleRef: + apiGroup: rbac.authorization.k8s.io + kind: ClusterRole + name: system:volume-scheduler +subjects: + - kind: ServiceAccount + name: sds-common-scheduler-extender + namespace: d8-{{ .Chart.Name }} +--- +apiVersion: rbac.authorization.k8s.io/v1 +kind: ClusterRole +metadata: + name: d8:{{ .Chart.Name }}:sds-common-scheduler-extender + namespace: d8-{{ .Chart.Name }} + {{- include "helm_lib_module_labels" (list . (dict "app" "sds-common-scheduler-extender")) | nindent 2 }} +rules: + - apiGroups: [ "storage.deckhouse.io" ] + resources: [ "lvmvolumegroups" ] + verbs: [ "list", "watch", "get"] +--- +apiVersion: rbac.authorization.k8s.io/v1 +kind: ClusterRoleBinding +metadata: + name: d8:{{ .Chart.Name }}:sds-common-scheduler-extender + namespace: d8-{{ .Chart.Name }} + {{- include "helm_lib_module_labels" (list . (dict "app" "sds-common-scheduler-extender")) | nindent 2 }} +roleRef: + apiGroup: rbac.authorization.k8s.io + kind: ClusterRole + name: d8:{{ .Chart.Name }}:sds-common-scheduler-extender +subjects: + - kind: ServiceAccount + name: sds-common-scheduler-extender + namespace: d8-{{ .Chart.Name }} diff --git a/templates/sds-common-scheduler-extender/secret.yaml b/templates/sds-common-scheduler-extender/secret.yaml new file mode 100644 index 000000000..aeecf5b3e --- /dev/null +++ b/templates/sds-common-scheduler-extender/secret.yaml @@ -0,0 +1,12 @@ +--- +apiVersion: v1 +kind: Secret +metadata: + name: common-scheduler-extender-https-certs + namespace: d8-{{ .Chart.Name }} + {{- include "helm_lib_module_labels" (list . (dict "app" "sds-common-scheduler-extender")) | nindent 2 }} +type: kubernetes.io/tls +data: + ca.crt: {{ .Values.sdsNodeConfigurator.internal.customSchedulerExtenderCert.ca | b64enc }} + tls.crt: {{ .Values.sdsNodeConfigurator.internal.customSchedulerExtenderCert.crt | b64enc }} + tls.key: {{ .Values.sdsNodeConfigurator.internal.customSchedulerExtenderCert.key | b64enc }} diff --git a/templates/sds-common-scheduler-extender/service.yaml b/templates/sds-common-scheduler-extender/service.yaml new file mode 100644 index 000000000..22e1b15cc --- /dev/null +++ b/templates/sds-common-scheduler-extender/service.yaml @@ -0,0 +1,16 @@ +--- +apiVersion: v1 +kind: Service +metadata: + name: sds-common-scheduler-extender + namespace: d8-{{ .Chart.Name }} + {{- include "helm_lib_module_labels" (list . (dict "app" "sds-common-scheduler-extender" )) | nindent 2 }} +spec: + type: ClusterIP + ports: + - port: 8099 + targetPort: http + protocol: TCP + name: http + selector: + app: sds-common-scheduler-extender