diff --git a/.github/workflows/build_prod.yml b/.github/workflows/build_prod.yml index e57018e7e..d1ea58cfd 100644 --- a/.github/workflows/build_prod.yml +++ b/.github/workflows/build_prod.yml @@ -44,6 +44,22 @@ jobs: wget https://fox.flant.com/api/v4/projects/deckhouse%2Fbase-images/packages/generic/base_images/$BASE_IMAGES_VERSION/base_images.yml -O base_images.yml cat base_images.yml + eval $(ssh-agent) + trap "kill -3 ${SSH_AGENT_PID}" ERR EXIT HUP INT QUIT TERM + ssh-add - <<< "$SOURCE_REPO_SSH_KEY" + export SSH_KNOWN_HOSTS=~/.ssh/known_hosts + + HOST=$(grep -oP '(?<=@)[^/:]+' <<< $SOURCE_REPO) + mkdir -p ~/.ssh + touch ~/.ssh/known_hosts + HOST_KEYS=$(ssh-keyscan -H "$HOST" 2>/dev/null) + while IFS= read -r KEY_LINE; do + CONSTANT_PART=$(awk '{print $2, $3}' <<< "$KEY_LINE") + if ! grep -q "$CONSTANT_PART" ~/.ssh/known_hosts; then + echo "$KEY_LINE" >> ~/.ssh/known_hosts + fi + done <<< "$HOST_KEYS" + - uses: deckhouse/modules-actions/setup@v2 with: registry: ${{ vars.PROD_REGISTRY }} diff --git a/.werf/choose-edition.yaml b/.werf/choose-edition.yaml index cee72f13c..590efe1d6 100644 --- a/.werf/choose-edition.yaml +++ b/.werf/choose-edition.yaml @@ -4,15 +4,13 @@ fromImage: builder/alt fromCacheVersion: {{ div .Commit.Date.Unix (mul 60 60 24 30) }} git: - - add: / - to: / - includePaths: - - openapi + - add: /openapi + to: /openapi stageDependencies: setup: - - openapi/values_*.yaml + - '**/*' shell: setup: - cd /openapi - - if [[ {{ .MODULE_EDITION }} == "ce" ]]; then cp -v values_ce.yaml values.yaml; else cp -v values_ee.yaml values.yaml; fi + - if [[ {{ .MODULE_EDITION }} == "ce" ]]; then cp -fv values_ce.yaml values.yaml; fi - rm -rf values_*.yaml diff --git a/.werf/consts.yaml b/.werf/consts.yaml index edea2786f..db8eaa433 100644 --- a/.werf/consts.yaml +++ b/.werf/consts.yaml @@ -5,9 +5,4 @@ {{- $versions := dict }} {{- $_ := set $versions "UTIL_LINUX" "v2.39.3" }} {{- $_ := set $versions "LVM2" "d786a8f820d54ce87a919e6af5426c333c173b11" }} - -{{- $_ := set $ "VERSIONS" $versions }} - -# custom constants -{{- $_ := set $ "DECKHOUSE_UID_GID" "64535" }} -{{- $_ := set $ "ALT_CLEANUP_CMD" "rm -rf /var/lib/apt/lists/* /var/cache/apt/* && mkdir -p /var/lib/apt/lists/partial /var/cache/apt/archives/partial" }} +{{- $_ := set . "VERSIONS" $versions }} diff --git a/.werf/werf.inc.yaml b/.werf/werf.inc.yaml new file mode 100644 index 000000000..9bab64296 --- /dev/null +++ b/.werf/werf.inc.yaml @@ -0,0 +1,60 @@ +--- +image: {{ .ModuleNamePrefix }}{{ .ImageName }}-src-artifact +fromImage: builder/src +final: false + +git: + - add: {{ .ModuleDir }} + to: /src + includePaths: + - api + - lib/go + - images/{{ $.ImageName }} + stageDependencies: + install: + - '**/*' + excludePaths: + - images/{{ $.ImageName }}/werf.yaml + +shell: + install: + - echo "src artifact" + +--- +image: {{ .ModuleNamePrefix }}{{ .ImageName }}-golang-artifact +fromImage: builder/golang-alpine +final: false + +import: + - image: {{ .ModuleNamePrefix }}{{ .ImageName }}-src-artifact + add: /src + to: /src + before: install + +mount: +{{ include "mount points for golang builds" . }} + +secrets: +- id: GOPROXY + value: {{ .GOPROXY }} + +shell: + setup: + - cd /src/images/{{ $.ImageName }}/cmd + - GOPROXY=$(cat /run/secrets/GOPROXY) go mod download + - GOOS=linux GOARCH=amd64 CGO_ENABLED=0 go build -ldflags="-s -w" -tags {{ .MODULE_EDITION }} -o /{{ $.ImageName }} + - chmod +x /{{ $.ImageName }} + +--- +image: {{ .ModuleNamePrefix }}{{ .ImageName }} +fromImage: base/distroless + +import: + - image: {{ .ModuleNamePrefix }}{{ .ImageName }}-golang-artifact + add: /{{ $.ImageName }} + to: /{{ $.ImageName }} + before: install + +imageSpec: + config: + entrypoint: ["/{{ $.ImageName }}"] diff --git a/hooks/go/020-common-scheduler-extender-certs/common-scheduler-extender-certs.go b/hooks/go/020-common-scheduler-extender-certs/common-scheduler-extender-certs.go new file mode 100644 index 000000000..d8f11948b --- /dev/null +++ b/hooks/go/020-common-scheduler-extender-certs/common-scheduler-extender-certs.go @@ -0,0 +1,41 @@ +/* +Copyright 2025 Flant JSC + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package hooks_common + +import ( + "fmt" + + tlscertificate "github.com/deckhouse/module-sdk/common-hooks/tls-certificate" + consts "github.com/deckhouse/sds-node-configurator/hooks/go/consts" +) + +var _ = tlscertificate.RegisterInternalTLSHookEM(tlscertificate.GenSelfSignedTLSHookConf{ + CommonCACanonicalName: fmt.Sprintf("%s-%s", consts.ModulePluralName, consts.SdsCommonSchedulerExtenderCertCn), + CN: consts.SdsCommonSchedulerExtenderCertCn, + TLSSecretName: consts.CommonSchedulerExtenderSecretName, + Namespace: consts.ModuleNamespace, + SANs: tlscertificate.DefaultSANs([]string{ + "localhost", + "127.0.0.1", + consts.SdsCommonSchedulerExtenderCertCn, + fmt.Sprintf("%s.%s", consts.SdsCommonSchedulerExtenderCertCn, consts.ModuleNamespace), + fmt.Sprintf("%s.%s.svc", consts.SdsCommonSchedulerExtenderCertCn, consts.ModuleNamespace), + // %CLUSTER_DOMAIN%:// is a special value to generate SAN like 'svc_name.svc_namespace.svc.cluster.local' + fmt.Sprintf("%%CLUSTER_DOMAIN%%://%s.%s.svc", consts.SdsCommonSchedulerExtenderCertCn, consts.ModuleNamespace), + }), + FullValuesPathPrefix: fmt.Sprintf("%s.internal.customSchedulerExtenderCert", consts.ModuleName), +}) diff --git a/hooks/go/consts/consts.go b/hooks/go/consts/consts.go index 72fd66783..05909a979 100644 --- a/hooks/go/consts/consts.go +++ b/hooks/go/consts/consts.go @@ -17,8 +17,10 @@ limitations under the License. package consts const ( - ModuleName string = "sdsNodeConfigurator" - ModuleNamespace string = "d8-sds-node-configurator" - ModulePluralName string = "sds-node-configurator" - WebhookCertCn string = "webhooks" + ModuleName string = "sdsNodeConfigurator" + ModuleNamespace string = "d8-sds-node-configurator" + ModulePluralName string = "sds-node-configurator" + WebhookCertCn string = "webhooks" + SdsCommonSchedulerExtenderCertCn string = "sds-common-scheduler-extender" + CommonSchedulerExtenderSecretName string = "common-scheduler-extender-https-certs" ) diff --git a/hooks/go/main.go b/hooks/go/main.go index e029551ab..6fb811fb2 100644 --- a/hooks/go/main.go +++ b/hooks/go/main.go @@ -18,6 +18,7 @@ package main import ( "github.com/deckhouse/module-sdk/pkg/app" + _ "github.com/deckhouse/sds-node-configurator/hooks/go/020-common-scheduler-extender-certs" _ "github.com/deckhouse/sds-node-configurator/hooks/go/020-webhook-certs" ) diff --git a/images/sds-common-scheduler-extender/LICENSE b/images/sds-common-scheduler-extender/LICENSE new file mode 100644 index 000000000..b77c0c92a --- /dev/null +++ b/images/sds-common-scheduler-extender/LICENSE @@ -0,0 +1,201 @@ + Apache License + Version 2.0, January 2004 + http://www.apache.org/licenses/ + + TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION + + 1. Definitions. + + "License" shall mean the terms and conditions for use, reproduction, + and distribution as defined by Sections 1 through 9 of this document. + + "Licensor" shall mean the copyright owner or entity authorized by + the copyright owner that is granting the License. + + "Legal Entity" shall mean the union of the acting entity and all + other entities that control, are controlled by, or are under common + control with that entity. For the purposes of this definition, + "control" means (i) the power, direct or indirect, to cause the + direction or management of such entity, whether by contract or + otherwise, or (ii) ownership of fifty percent (50%) or more of the + outstanding shares, or (iii) beneficial ownership of such entity. + + "You" (or "Your") shall mean an individual or Legal Entity + exercising permissions granted by this License. + + "Source" form shall mean the preferred form for making modifications, + including but not limited to software source code, documentation + source, and configuration files. + + "Object" form shall mean any form resulting from mechanical + transformation or translation of a Source form, including but + not limited to compiled object code, generated documentation, + and conversions to other media types. + + "Work" shall mean the work of authorship, whether in Source or + Object form, made available under the License, as indicated by a + copyright notice that is included in or attached to the work + (an example is provided in the Appendix below). + + "Derivative Works" shall mean any work, whether in Source or Object + form, that is based on (or derived from) the Work and for which the + editorial revisions, annotations, elaborations, or other modifications + represent, as a whole, an original work of authorship. For the purposes + of this License, Derivative Works shall not include works that remain + separable from, or merely link (or bind by name) to the interfaces of, + the Work and Derivative Works thereof. + + "Contribution" shall mean any work of authorship, including + the original version of the Work and any modifications or additions + to that Work or Derivative Works thereof, that is intentionally + submitted to Licensor for inclusion in the Work by the copyright owner + or by an individual or Legal Entity authorized to submit on behalf of + the copyright owner. For the purposes of this definition, "submitted" + means any form of electronic, verbal, or written communication sent + to the Licensor or its representatives, including but not limited to + communication on electronic mailing lists, source code control systems, + and issue tracking systems that are managed by, or on behalf of, the + Licensor for the purpose of discussing and improving the Work, but + excluding communication that is conspicuously marked or otherwise + designated in writing by the copyright owner as "Not a Contribution." + + "Contributor" shall mean Licensor and any individual or Legal Entity + on behalf of whom a Contribution has been received by Licensor and + subsequently incorporated within the Work. + + 2. Grant of Copyright License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + copyright license to reproduce, prepare Derivative Works of, + publicly display, publicly perform, sublicense, and distribute the + Work and such Derivative Works in Source or Object form. + + 3. Grant of Patent License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + (except as stated in this section) patent license to make, have made, + use, offer to sell, sell, import, and otherwise transfer the Work, + where such license applies only to those patent claims licensable + by such Contributor that are necessarily infringed by their + Contribution(s) alone or by combination of their Contribution(s) + with the Work to which such Contribution(s) was submitted. If You + institute patent litigation against any entity (including a + cross-claim or counterclaim in a lawsuit) alleging that the Work + or a Contribution incorporated within the Work constitutes direct + or contributory patent infringement, then any patent licenses + granted to You under this License for that Work shall terminate + as of the date such litigation is filed. + + 4. Redistribution. You may reproduce and distribute copies of the + Work or Derivative Works thereof in any medium, with or without + modifications, and in Source or Object form, provided that You + meet the following conditions: + + (a) You must give any other recipients of the Work or + Derivative Works a copy of this License; and + + (b) You must cause any modified files to carry prominent notices + stating that You changed the files; and + + (c) You must retain, in the Source form of any Derivative Works + that You distribute, all copyright, patent, trademark, and + attribution notices from the Source form of the Work, + excluding those notices that do not pertain to any part of + the Derivative Works; and + + (d) If the Work includes a "NOTICE" text file as part of its + distribution, then any Derivative Works that You distribute must + include a readable copy of the attribution notices contained + within such NOTICE file, excluding those notices that do not + pertain to any part of the Derivative Works, in at least one + of the following places: within a NOTICE text file distributed + as part of the Derivative Works; within the Source form or + documentation, if provided along with the Derivative Works; or, + within a display generated by the Derivative Works, if and + wherever such third-party notices normally appear. The contents + of the NOTICE file are for informational purposes only and + do not modify the License. You may add Your own attribution + notices within Derivative Works that You distribute, alongside + or as an addendum to the NOTICE text from the Work, provided + that such additional attribution notices cannot be construed + as modifying the License. + + You may add Your own copyright statement to Your modifications and + may provide additional or different license terms and conditions + for use, reproduction, or distribution of Your modifications, or + for any such Derivative Works as a whole, provided Your use, + reproduction, and distribution of the Work otherwise complies with + the conditions stated in this License. + + 5. Submission of Contributions. Unless You explicitly state otherwise, + any Contribution intentionally submitted for inclusion in the Work + by You to the Licensor shall be under the terms and conditions of + this License, without any additional terms or conditions. + Notwithstanding the above, nothing herein shall supersede or modify + the terms of any separate license agreement you may have executed + with Licensor regarding such Contributions. + + 6. Trademarks. This License does not grant permission to use the trade + names, trademarks, service marks, or product names of the Licensor, + except as required for reasonable and customary use in describing the + origin of the Work and reproducing the content of the NOTICE file. + + 7. Disclaimer of Warranty. Unless required by applicable law or + agreed to in writing, Licensor provides the Work (and each + Contributor provides its Contributions) on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or + implied, including, without limitation, any warranties or conditions + of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A + PARTICULAR PURPOSE. You are solely responsible for determining the + appropriateness of using or redistributing the Work and assume any + risks associated with Your exercise of permissions under this License. + + 8. Limitation of Liability. In no event and under no legal theory, + whether in tort (including negligence), contract, or otherwise, + unless required by applicable law (such as deliberate and grossly + negligent acts) or agreed to in writing, shall any Contributor be + liable to You for damages, including any direct, indirect, special, + incidental, or consequential damages of any character arising as a + result of this License or out of the use or inability to use the + Work (including but not limited to damages for loss of goodwill, + work stoppage, computer failure or malfunction, or any and all + other commercial damages or losses), even if such Contributor + has been advised of the possibility of such damages. + + 9. Accepting Warranty or Additional Liability. While redistributing + the Work or Derivative Works thereof, You may choose to offer, + and charge a fee for, acceptance of support, warranty, indemnity, + or other liability obligations and/or rights consistent with this + License. However, in accepting such obligations, You may act only + on Your own behalf and on Your sole responsibility, not on behalf + of any other Contributor, and only if You agree to indemnify, + defend, and hold each Contributor harmless for any liability + incurred by, or claims asserted against, such Contributor by reason + of your accepting any such warranty or additional liability. + + END OF TERMS AND CONDITIONS + + APPENDIX: How to apply the Apache License to your work. + + To apply the Apache License to your work, attach the following + boilerplate notice, with the fields enclosed by brackets "[]" + replaced with your own identifying information. (Don't include + the brackets!) The text should be enclosed in the appropriate + comment syntax for the file format. We also recommend that a + file or class name and description of purpose be included on the + same "printed page" as the copyright notice for easier + identification within third-party archives. + + Copyright [yyyy] [name of copyright owner] + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. diff --git a/images/sds-common-scheduler-extender/cmd/access_log.go b/images/sds-common-scheduler-extender/cmd/access_log.go new file mode 100644 index 000000000..3f23c1e0a --- /dev/null +++ b/images/sds-common-scheduler-extender/cmd/access_log.go @@ -0,0 +1,74 @@ +/* +Copyright 2024 Flant JSC + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package main + +import ( + "context" + "net" + "net/http" + "time" + + "sigs.k8s.io/controller-runtime/pkg/log" +) + +type accessLogResponseWriter struct { + http.ResponseWriter + statusCode int + size int +} + +func (wr *accessLogResponseWriter) Write(data []byte) (int, error) { + n, err := wr.ResponseWriter.Write(data) + wr.size += n + return n, err +} + +func (wr *accessLogResponseWriter) WriteHeader(statusCode int) { + wr.statusCode = statusCode + wr.ResponseWriter.WriteHeader(statusCode) +} + +func (wr *accessLogResponseWriter) accessLogHandler(ctx context.Context, next http.Handler) http.Handler { + logger := log.FromContext(ctx) + return http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + startTime := time.Now() + + next.ServeHTTP(w, r) + status := wr.statusCode + + fields := []interface{}{ + "type", "access", + "response_time", time.Since(startTime).Seconds(), + "protocol", r.Proto, + "http_status_code", status, + "http_method", r.Method, + "url", r.RequestURI, + "http_host", r.Host, + "request_size", r.ContentLength, + "response_size", wr.size, + } + ip, _, err := net.SplitHostPort(r.RemoteAddr) + if err == nil { + fields = append(fields, "remote_ipaddr", ip) + } + ua := r.Header.Get("User-Agent") + if len(ua) > 0 { + fields = append(fields, "http_user_agent", ua) + } + logger.Info("access", fields...) + }) +} diff --git a/images/sds-common-scheduler-extender/cmd/main.go b/images/sds-common-scheduler-extender/cmd/main.go new file mode 100644 index 000000000..68db6d62e --- /dev/null +++ b/images/sds-common-scheduler-extender/cmd/main.go @@ -0,0 +1,285 @@ +package main + +import ( + "context" + "errors" + "fmt" + "net/http" + "os" + "os/signal" + "sync" + "syscall" + "time" + + slv "github.com/deckhouse/sds-local-volume/api/v1alpha1" + snc "github.com/deckhouse/sds-node-configurator/api/v1alpha1" + lapi "github.com/deckhouse/sds-replicated-volume/api/linstor" + srv "github.com/deckhouse/sds-replicated-volume/api/v1alpha1" + srv2 "github.com/deckhouse/sds-replicated-volume/api/v1alpha2" + v1 "k8s.io/api/core/v1" + sv1 "k8s.io/api/storage/v1" + ctrl "sigs.k8s.io/controller-runtime" + "sigs.k8s.io/controller-runtime/pkg/healthz" + "sigs.k8s.io/controller-runtime/pkg/manager" + "sigs.k8s.io/yaml" + + "github.com/deckhouse/sds-node-configurator/images/sds-common-scheduler-extender/pkg/cache" + "github.com/deckhouse/sds-node-configurator/images/sds-common-scheduler-extender/pkg/controller" + "github.com/deckhouse/sds-node-configurator/images/sds-common-scheduler-extender/pkg/kubutils" + "github.com/deckhouse/sds-node-configurator/images/sds-common-scheduler-extender/pkg/logger" + "github.com/deckhouse/sds-node-configurator/images/sds-common-scheduler-extender/pkg/scheduler" + + "github.com/spf13/cobra" + "k8s.io/apimachinery/pkg/runtime" +) + +const ( + defaultDivisor = 1 + defaultListenAddr = ":8000" + defaultCacheSize = 10 + defaultcertFile = "/etc/sds-common-scheduler-extender/certs/tls.crt" + defaultkeyFile = "/etc/sds-common-scheduler-extender/certs/tls.key" + defaultConfigMapUpdateTimeout = 5 + defaultCacheCheckInterval = 1 + defaultCachePVCTTL = 3600 + defaultCachePVCCheckInterval = 3600 + defaultLogLevel = "3" +) + +type Config struct { + DefaultDivisor float64 `json:"default-divisor"` + ListenAddr string `json:"listen"` + LogLevel string `json:"log-level"` + HealthProbeBindAddress string `json:"health-probe-bind-address"` + CertFile string `json:"cert-file"` + KeyFile string `json:"key-file"` + CacheSize int `json:"cache-size"` + PVCTTL int `json:"pvc-ttl"` + CfgMapUpdateTimeout int `json:"configmap-update-timeout"` + CacheCheckInterval int `json:"cache-check-interval"` + CachePVCCheckInterval int `json:"cache-pvc-check-interval"` +} + +var cfgFilePath string + +var resourcesSchemeFuncs = []func(*runtime.Scheme) error{ + srv.AddToScheme, + snc.AddToScheme, + v1.AddToScheme, + sv1.AddToScheme, + slv.AddToScheme, + lapi.AddToScheme, + srv2.AddToScheme, +} + +var config = &Config{ + ListenAddr: defaultListenAddr, + DefaultDivisor: defaultDivisor, + LogLevel: defaultLogLevel, + CacheSize: defaultCacheSize, + CertFile: defaultcertFile, + KeyFile: defaultkeyFile, + PVCTTL: defaultCachePVCTTL, + CfgMapUpdateTimeout: defaultConfigMapUpdateTimeout, + CacheCheckInterval: defaultCacheCheckInterval, + CachePVCCheckInterval: defaultCachePVCCheckInterval, +} + +var rootCmd = &cobra.Command{ + Use: "sds-replicated-volume-scheduler", + Version: "development", + Short: "a scheduler-extender for sds-replicated-volume", + Long: `A scheduler-extender for sds-replicated-volume. +The extender implements filter and prioritize verbs. +The filter verb is "filter" and served at "/filter" via HTTP. +It filters out nodes that have less storage capacity than requested. +The prioritize verb is "prioritize" and served at "/prioritize" via HTTP. +It scores nodes with this formula: + min(10, max(0, log2(capacity >> 30 / divisor))) +The default divisor is 1. It can be changed with a command-line option. +`, + RunE: func(cmd *cobra.Command, _ []string) error { + // to avoid printing usage information when error is returned + cmd.SilenceUsage = true + // to avoid printing errors (we log it closer to the place where it has happened) + cmd.SilenceErrors = true + return subMain(cmd.Context()) + }, +} + +func init() { + rootCmd.PersistentFlags().StringVar(&cfgFilePath, "config", "", "config file") +} + +func main() { + ctx, _ := signal.NotifyContext(context.Background(), syscall.SIGINT, syscall.SIGTERM) + + if err := rootCmd.ExecuteContext(ctx); err != nil { + // we expect err to be logged already + os.Exit(1) + } +} + +func subMain(ctx context.Context) error { + if len(cfgFilePath) != 0 { + b, err := os.ReadFile(cfgFilePath) + if err != nil { + print(err) + return err + } + + if err = yaml.Unmarshal(b, config); err != nil { + print(err) + return err + } + } + + log, err := logger.NewLogger(logger.Verbosity("4")) + if err != nil { + print(fmt.Sprintf("[subMain] unable to initialize logger, err: %s", err)) + return err + } + log.Info(fmt.Sprintf("[subMain] logger has been initialized, log level: %s", config.LogLevel)) + ctrl.SetLogger(log.GetLogger()) + + kConfig, err := kubutils.KubernetesDefaultConfigCreate() + if err != nil { + log.Error(err, "[subMain] unable to KubernetesDefaultConfigCreate") + return err + } + log.Info("[subMain] kubernetes config has been successfully created.") + + scheme := runtime.NewScheme() + for _, f := range resourcesSchemeFuncs { + if err := f(scheme); err != nil { + log.Error(err, "[subMain] unable to add scheme to func") + return err + } + } + log.Info("[subMain] successfully read scheme CR") + + managerOpts := manager.Options{ + Scheme: scheme, + Logger: log.GetLogger(), + HealthProbeBindAddress: config.HealthProbeBindAddress, + BaseContext: func() context.Context { return ctx }, + } + + mgr, err := manager.New(kConfig, managerOpts) + if err != nil { + log.Error(err, "[subMain] unable to create manager for creating controllers") + return err + } + + сache := cache.NewCache(log) + cacheMrg := cache.NewCacheManager(сache, mgr, log) + log.Info("[subMain] scheduler cache manager initialized") + + go cacheMrg.RunCleaner(ctx, time.Duration(config.CachePVCCheckInterval)*time.Second) + log.Info("[subMain] scheduler cleanup process started") + + go cacheMrg.RunSaver(ctx, time.Duration(config.CacheCheckInterval)*time.Second, time.Duration(config.CfgMapUpdateTimeout)*time.Second) + log.Info("[subMain] scheduler cache saver started") + + client := mgr.GetClient() + s := scheduler.NewScheduler(ctx, client, log, cacheMrg, config.DefaultDivisor) + log.Info("[subMain] scheduler handler initialized") + + handler := scheduler.NewHandler(log, s) + + if err = controller.RunPVCWatcherCacheController(mgr, log, cacheMrg); err != nil { + log.Error(err, fmt.Sprintf("[subMain] unable to run %s controller", controller.PVCWatcherCacheCtrlName)) + return err + } + log.Info(fmt.Sprintf("[subMain] successfully ran %s controller", controller.PVCWatcherCacheCtrlName)) + + if err = controller.RunLVGWatcherCacheController(mgr, log, cacheMrg); err != nil { + log.Error(err, fmt.Sprintf("[subMain] unable to run %s controller", controller.LVGWatcherCacheCtrlName)) + return err + } + log.Info(fmt.Sprintf("[subMain] successfully ran %s controller", controller.LVGWatcherCacheCtrlName)) + + if err = controller.RunLayerResourceIDsWatcher(mgr, log); err != nil { + log.Error(err, fmt.Sprintf("[subMain] unable to run %s controller", controller.LVGLayerResourceIDsWatcherName)) + return err + } + log.Info(fmt.Sprintf("[subMain] successfully ran %s controller", controller.LVGLayerResourceIDsWatcherName)) + + if err = mgr.AddHealthzCheck("healthz", healthz.Ping); err != nil { + log.Error(err, "[subMain] unable to mgr.AddHealthzCheck") + return err + } + log.Info("[subMain] successfully AddHealthzCheck") + + if err = mgr.AddReadyzCheck("readyz", healthz.Ping); err != nil { + log.Error(err, "[subMain] unable to mgr.AddReadyzCheck") + return err + } + log.Info("[subMain] successfully AddReadyzCheck") + + mux := http.NewServeMux() + + //TODO may be this approach needs to be simplified somehow + filteringHandler := scheduler.BodyUnmarshalMiddleware( + scheduler.LogMiddleware( + scheduler.PodCheckMiddleware(ctx, client, http.HandlerFunc(handler.Filter), log), + log, + ), + log, + ) + + prioritizingHandler := scheduler.BodyUnmarshalMiddleware( + scheduler.LogMiddleware( + scheduler.PodCheckMiddleware(ctx, client, http.HandlerFunc(handler.Prioritize), log), + log, + ), + log, + ) + + mux.Handle("/scheduler/filter", filteringHandler) + mux.Handle("/scheduler/prioritize", prioritizingHandler) + mux.HandleFunc("/status", handler.Status) + + serv := &http.Server{ + Addr: config.ListenAddr, + Handler: mux, + ReadTimeout: 30 * time.Second, + WriteTimeout: 30 * time.Second, + } + log.Info("[subMain] server was initialized") + + return runServer(ctx, serv, mgr, log) +} + +func runServer(ctx context.Context, serv *http.Server, mgr manager.Manager, log *logger.Logger) error { + ctx, stop := context.WithCancel(ctx) + + var wg sync.WaitGroup + defer wg.Wait() + defer stop() // stop() should be called before wg.Wait() to stop the goroutine correctly. + wg.Add(1) + + go func() { + defer wg.Done() + <-ctx.Done() + if err := serv.Shutdown(ctx); err != nil { + log.Error(err, "[runServer] failed to shutdown gracefully") + } + }() + + go func() { + log.Info("[runServer] kube manager will start now") + if err := mgr.Start(ctx); err != nil { + log.Error(err, "[runServer] unable to mgr.Start") + } + }() + + log.Info(fmt.Sprintf("[runServer] starts serving on: %s", config.ListenAddr)) + + if err := serv.ListenAndServeTLS(config.CertFile, config.KeyFile); !errors.Is(err, http.ErrServerClosed) { + log.Error(err, "[runServer] unable to run the server") + return err + } + + return nil +} diff --git a/images/sds-common-scheduler-extender/go.mod b/images/sds-common-scheduler-extender/go.mod new file mode 100644 index 000000000..da990a484 --- /dev/null +++ b/images/sds-common-scheduler-extender/go.mod @@ -0,0 +1,73 @@ +module github.com/deckhouse/sds-node-configurator/images/sds-common-scheduler-extender + +go 1.24.2 + +require ( + github.com/deckhouse/sds-node-configurator/api v0.0.0-20250123151518-099b1c39c216 + github.com/go-logr/logr v1.4.2 + github.com/spf13/cobra v1.8.1 + k8s.io/api v0.32.1 + k8s.io/apimachinery v0.33.1 + k8s.io/client-go v0.32.0 + k8s.io/klog/v2 v2.130.1 + k8s.io/utils v0.0.0-20241104100929-3ea5e8cea738 + sigs.k8s.io/controller-runtime v0.20.1 + sigs.k8s.io/yaml v1.4.0 +) + +replace github.com/deckhouse/sds-node-configurator/api => ../../api + +require ( + github.com/deckhouse/sds-local-volume/api v0.0.0-20250507092959-70b48eb00576 + github.com/deckhouse/sds-replicated-volume/api v0.0.0-20250610081001-f06e0b517b0b +) + +require ( + github.com/beorn7/perks v1.0.1 // indirect + github.com/cespare/xxhash/v2 v2.3.0 // indirect + github.com/davecgh/go-spew v1.1.2-0.20180830191138-d8f796af33cc // indirect + github.com/emicklei/go-restful/v3 v3.11.0 // indirect + github.com/evanphx/json-patch/v5 v5.9.0 // indirect + github.com/fsnotify/fsnotify v1.7.0 // indirect + github.com/fxamacker/cbor/v2 v2.7.0 // indirect + github.com/go-openapi/jsonpointer v0.21.0 // indirect + github.com/go-openapi/jsonreference v0.20.2 // indirect + github.com/go-openapi/swag v0.23.0 // indirect + github.com/gogo/protobuf v1.3.2 // indirect + github.com/golang/protobuf v1.5.4 // indirect + github.com/google/btree v1.1.3 // indirect + github.com/google/gnostic-models v0.6.9 // indirect + github.com/google/go-cmp v0.7.0 // indirect + github.com/google/uuid v1.6.0 // indirect + github.com/inconshreveable/mousetrap v1.1.0 // indirect + github.com/josharian/intern v1.0.0 // indirect + github.com/json-iterator/go v1.1.12 // indirect + github.com/mailru/easyjson v0.7.7 // indirect + github.com/modern-go/concurrent v0.0.0-20180306012644-bacd9c7ef1dd // indirect + github.com/modern-go/reflect2 v1.0.2 // indirect + github.com/munnerz/goautoneg v0.0.0-20191010083416-a7dc8b61c822 // indirect + github.com/pkg/errors v0.9.1 // indirect + github.com/prometheus/client_golang v1.19.1 // indirect + github.com/prometheus/client_model v0.6.1 // indirect + github.com/prometheus/common v0.55.0 // indirect + github.com/prometheus/procfs v0.15.1 // indirect + github.com/spf13/pflag v1.0.5 // indirect + github.com/x448/float16 v0.8.4 // indirect + golang.org/x/net v0.40.0 // indirect + golang.org/x/oauth2 v0.23.0 // indirect + golang.org/x/sync v0.14.0 // indirect + golang.org/x/sys v0.33.0 // indirect + golang.org/x/term v0.32.0 // indirect + golang.org/x/text v0.25.0 // indirect + golang.org/x/time v0.9.0 // indirect + gomodules.xyz/jsonpatch/v2 v2.4.0 // indirect + google.golang.org/protobuf v1.36.5 // indirect + gopkg.in/evanphx/json-patch.v4 v4.12.0 // indirect + gopkg.in/inf.v0 v0.9.1 // indirect + gopkg.in/yaml.v3 v3.0.1 // indirect + k8s.io/apiextensions-apiserver v0.32.0 // indirect + k8s.io/kube-openapi v0.0.0-20250318190949-c8a335a9a2ff // indirect + sigs.k8s.io/json v0.0.0-20241010143419-9aa6b5e7a4b3 // indirect + sigs.k8s.io/randfill v1.0.0 // indirect + sigs.k8s.io/structured-merge-diff/v4 v4.6.0 // indirect +) diff --git a/images/sds-common-scheduler-extender/go.sum b/images/sds-common-scheduler-extender/go.sum new file mode 100644 index 000000000..952726907 --- /dev/null +++ b/images/sds-common-scheduler-extender/go.sum @@ -0,0 +1,207 @@ +github.com/beorn7/perks v1.0.1 h1:VlbKKnNfV8bJzeqoa4cOKqO6bYr3WgKZxO8Z16+hsOM= +github.com/beorn7/perks v1.0.1/go.mod h1:G2ZrVWU2WbWT9wwq4/hrbKbnv/1ERSJQ0ibhJ6rlkpw= +github.com/cespare/xxhash/v2 v2.3.0 h1:UL815xU9SqsFlibzuggzjXhog7bL6oX9BbNZnL2UFvs= +github.com/cespare/xxhash/v2 v2.3.0/go.mod h1:VGX0DQ3Q6kWi7AoAeZDth3/j3BFtOZR5XLFGgcrjCOs= +github.com/cpuguy83/go-md2man/v2 v2.0.4/go.mod h1:tgQtvFlXSQOSOSIRvRPT7W67SCa46tRHOmNcaadrF8o= +github.com/creack/pty v1.1.9/go.mod h1:oKZEueFk5CKHvIhNR5MUki03XCEU+Q6VDXinZuGJ33E= +github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= +github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= +github.com/davecgh/go-spew v1.1.2-0.20180830191138-d8f796af33cc h1:U9qPSI2PIWSS1VwoXQT9A3Wy9MM3WgvqSxFWenqJduM= +github.com/davecgh/go-spew v1.1.2-0.20180830191138-d8f796af33cc/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= +github.com/deckhouse/sds-local-volume/api v0.0.0-20250507092959-70b48eb00576 h1:p0OATWSzVJn4U8f+xuOk7SoZknyNhUEH+sx1ecSVSSY= +github.com/deckhouse/sds-local-volume/api v0.0.0-20250507092959-70b48eb00576/go.mod h1:LucHX2fB3tjvLZlGWyf8rJa7pTo0/8cxFCYzZ236bPo= +github.com/deckhouse/sds-replicated-volume/api v0.0.0-20250605080358-5a38c8fe1203 h1:T1OoDF6S6e4ZxIut7JyKLMEVee1dE8t92upDdou+Nx0= +github.com/deckhouse/sds-replicated-volume/api v0.0.0-20250605080358-5a38c8fe1203/go.mod h1:2NWJIfctU3XGDpwuVMRkaqzsGvcjEvMYHcuSwnO8fn4= +github.com/deckhouse/sds-replicated-volume/api v0.0.0-20250609170352-4798fc2cf669 h1:XNuLdADaPc2/KGQCCDsEjFkfVSQKiZPec7ul1PLr/88= +github.com/deckhouse/sds-replicated-volume/api v0.0.0-20250609170352-4798fc2cf669/go.mod h1:M4w6qypH4ak8ypHvN57ts+pbco0iJPkTFNfMIXKOE5Q= +github.com/deckhouse/sds-replicated-volume/api v0.0.0-20250610081001-f06e0b517b0b h1:9CT3d8a1rCqjENG6ph1sLLmnTCE5j0Rcc23AT8qHdJQ= +github.com/deckhouse/sds-replicated-volume/api v0.0.0-20250610081001-f06e0b517b0b/go.mod h1:2NWJIfctU3XGDpwuVMRkaqzsGvcjEvMYHcuSwnO8fn4= +github.com/emicklei/go-restful/v3 v3.11.0 h1:rAQeMHw1c7zTmncogyy8VvRZwtkmkZ4FxERmMY4rD+g= +github.com/emicklei/go-restful/v3 v3.11.0/go.mod h1:6n3XBCmQQb25CM2LCACGz8ukIrRry+4bhvbpWn3mrbc= +github.com/evanphx/json-patch v0.5.2 h1:xVCHIVMUu1wtM/VkR9jVZ45N3FhZfYMMYGorLCR8P3k= +github.com/evanphx/json-patch v0.5.2/go.mod h1:ZWS5hhDbVDyob71nXKNL0+PWn6ToqBHMikGIFbs31qQ= +github.com/evanphx/json-patch/v5 v5.9.0 h1:kcBlZQbplgElYIlo/n1hJbls2z/1awpXxpRi0/FOJfg= +github.com/evanphx/json-patch/v5 v5.9.0/go.mod h1:VNkHZ/282BpEyt/tObQO8s5CMPmYYq14uClGH4abBuQ= +github.com/fsnotify/fsnotify v1.7.0 h1:8JEhPFa5W2WU7YfeZzPNqzMP6Lwt7L2715Ggo0nosvA= +github.com/fsnotify/fsnotify v1.7.0/go.mod h1:40Bi/Hjc2AVfZrqy+aj+yEI+/bRxZnMJyTJwOpGvigM= +github.com/fxamacker/cbor/v2 v2.7.0 h1:iM5WgngdRBanHcxugY4JySA0nk1wZorNOpTgCMedv5E= +github.com/fxamacker/cbor/v2 v2.7.0/go.mod h1:pxXPTn3joSm21Gbwsv0w9OSA2y1HFR9qXEeXQVeNoDQ= +github.com/go-logr/logr v1.4.2 h1:6pFjapn8bFcIbiKo3XT4j/BhANplGihG6tvd+8rYgrY= +github.com/go-logr/logr v1.4.2/go.mod h1:9T104GzyrTigFIr8wt5mBrctHMim0Nb2HLGrmQ40KvY= +github.com/go-logr/zapr v1.3.0 h1:XGdV8XW8zdwFiwOA2Dryh1gj2KRQyOOoNmBy4EplIcQ= +github.com/go-logr/zapr v1.3.0/go.mod h1:YKepepNBd1u/oyhd/yQmtjVXmm9uML4IXUgMOwR8/Gg= +github.com/go-openapi/jsonpointer v0.19.6/go.mod h1:osyAmYz/mB/C3I+WsTTSgw1ONzaLJoLCyoi6/zppojs= +github.com/go-openapi/jsonpointer v0.21.0 h1:YgdVicSA9vH5RiHs9TZW5oyafXZFc6+2Vc1rr/O9oNQ= +github.com/go-openapi/jsonpointer v0.21.0/go.mod h1:IUyH9l/+uyhIYQ/PXVA41Rexl+kOkAPDdXEYns6fzUY= +github.com/go-openapi/jsonreference v0.20.2 h1:3sVjiK66+uXK/6oQ8xgcRKcFgQ5KXa2KvnJRumpMGbE= +github.com/go-openapi/jsonreference v0.20.2/go.mod h1:Bl1zwGIM8/wsvqjsOQLJ/SH+En5Ap4rVB5KVcIDZG2k= +github.com/go-openapi/swag v0.22.3/go.mod h1:UzaqsxGiab7freDnrUUra0MwWfN/q7tE4j+VcZ0yl14= +github.com/go-openapi/swag v0.23.0 h1:vsEVJDUo2hPJ2tu0/Xc+4noaxyEffXNIs3cOULZ+GrE= +github.com/go-openapi/swag v0.23.0/go.mod h1:esZ8ITTYEsH1V2trKHjAN8Ai7xHb8RV+YSZ577vPjgQ= +github.com/go-task/slim-sprig/v3 v3.0.0 h1:sUs3vkvUymDpBKi3qH1YSqBQk9+9D/8M2mN1vB6EwHI= +github.com/go-task/slim-sprig/v3 v3.0.0/go.mod h1:W848ghGpv3Qj3dhTPRyJypKRiqCdHZiAzKg9hl15HA8= +github.com/gogo/protobuf v1.3.2 h1:Ov1cvc58UF3b5XjBnZv7+opcTcQFZebYjWzi34vdm4Q= +github.com/gogo/protobuf v1.3.2/go.mod h1:P1XiOD3dCwIKUDQYPy72D8LYyHL2YPYrpS2s69NZV8Q= +github.com/golang/protobuf v1.5.4 h1:i7eJL8qZTpSEXOPTxNKhASYpMn+8e5Q6AdndVa1dWek= +github.com/golang/protobuf v1.5.4/go.mod h1:lnTiLA8Wa4RWRcIUkrtSVa5nRhsEGBg48fD6rSs7xps= +github.com/google/btree v1.1.3 h1:CVpQJjYgC4VbzxeGVHfvZrv1ctoYCAI8vbl07Fcxlyg= +github.com/google/btree v1.1.3/go.mod h1:qOPhT0dTNdNzV6Z/lhRX0YXUafgPLFUh+gZMl761Gm4= +github.com/google/gnostic-models v0.6.9 h1:MU/8wDLif2qCXZmzncUQ/BOfxWfthHi63KqpoNbWqVw= +github.com/google/gnostic-models v0.6.9/go.mod h1:CiWsm0s6BSQd1hRn8/QmxqB6BesYcbSZxsz9b0KuDBw= +github.com/google/go-cmp v0.5.9/go.mod h1:17dUlkBOakJ0+DkrSSNjCkIjxS6bF9zb3elmeNGIjoY= +github.com/google/go-cmp v0.7.0 h1:wk8382ETsv4JYUZwIsn6YpYiWiBsYLSJiTsyBybVuN8= +github.com/google/go-cmp v0.7.0/go.mod h1:pXiqmnSA92OHEEa9HXL2W4E7lf9JzCmGVUdgjX3N/iU= +github.com/google/gofuzz v1.0.0/go.mod h1:dBl0BpW6vV/+mYPU4Po3pmUjxk6FQPldtuIdl/M65Eg= +github.com/google/gofuzz v1.2.0 h1:xRy4A+RhZaiKjJ1bPfwQ8sedCA+YS2YcCHW6ec7JMi0= +github.com/google/gofuzz v1.2.0/go.mod h1:dBl0BpW6vV/+mYPU4Po3pmUjxk6FQPldtuIdl/M65Eg= +github.com/google/pprof v0.0.0-20241029153458-d1b30febd7db h1:097atOisP2aRj7vFgYQBbFN4U4JNXUNYpxael3UzMyo= +github.com/google/pprof v0.0.0-20241029153458-d1b30febd7db/go.mod h1:vavhavw2zAxS5dIdcRluK6cSGGPlZynqzFM8NdvU144= +github.com/google/uuid v1.6.0 h1:NIvaJDMOsjHA8n1jAhLSgzrAzy1Hgr+hNrb57e+94F0= +github.com/google/uuid v1.6.0/go.mod h1:TIyPZe4MgqvfeYDBFedMoGGpEw/LqOeaOT+nhxU+yHo= +github.com/inconshreveable/mousetrap v1.1.0 h1:wN+x4NVGpMsO7ErUn/mUI3vEoE6Jt13X2s0bqwp9tc8= +github.com/inconshreveable/mousetrap v1.1.0/go.mod h1:vpF70FUmC8bwa3OWnCshd2FqLfsEA9PFc4w1p2J65bw= +github.com/josharian/intern v1.0.0 h1:vlS4z54oSdjm0bgjRigI+G1HpF+tI+9rE5LLzOg8HmY= +github.com/josharian/intern v1.0.0/go.mod h1:5DoeVV0s6jJacbCEi61lwdGj/aVlrQvzHFFd8Hwg//Y= +github.com/json-iterator/go v1.1.12 h1:PV8peI4a0ysnczrg+LtxykD8LfKY9ML6u2jnxaEnrnM= +github.com/json-iterator/go v1.1.12/go.mod h1:e30LSqwooZae/UwlEbR2852Gd8hjQvJoHmT4TnhNGBo= +github.com/kisielk/errcheck v1.5.0/go.mod h1:pFxgyoBC7bSaBwPgfKdkLd5X25qrDl4LWUI2bnpBCr8= +github.com/kisielk/gotool v1.0.0/go.mod h1:XhKaO+MFFWcvkIS/tQcRk01m1F5IRFswLeQ+oQHNcck= +github.com/kr/pretty v0.2.1/go.mod h1:ipq/a2n7PKx3OHsz4KJII5eveXtPO4qwEXGdVfWzfnI= +github.com/kr/pretty v0.3.1 h1:flRD4NNwYAUpkphVc1HcthR4KEIFJ65n8Mw5qdRn3LE= +github.com/kr/pretty v0.3.1/go.mod h1:hoEshYVHaxMs3cyo3Yncou5ZscifuDolrwPKZanG3xk= +github.com/kr/pty v1.1.1/go.mod h1:pFQYn66WHrOpPYNljwOMqo10TkYh1fy3cYio2l3bCsQ= +github.com/kr/text v0.1.0/go.mod h1:4Jbv+DJW3UT/LiOwJeYQe1efqtUx/iVham/4vfdArNI= +github.com/kr/text v0.2.0 h1:5Nx0Ya0ZqY2ygV366QzturHI13Jq95ApcVaJBhpS+AY= +github.com/kr/text v0.2.0/go.mod h1:eLer722TekiGuMkidMxC/pM04lWEeraHUUmBw8l2grE= +github.com/mailru/easyjson v0.7.7 h1:UGYAvKxe3sBsEDzO8ZeWOSlIQfWFlxbzLZe7hwFURr0= +github.com/mailru/easyjson v0.7.7/go.mod h1:xzfreul335JAWq5oZzymOObrkdz5UnU4kGfJJLY9Nlc= +github.com/modern-go/concurrent v0.0.0-20180228061459-e0a39a4cb421/go.mod h1:6dJC0mAP4ikYIbvyc7fijjWJddQyLn8Ig3JB5CqoB9Q= +github.com/modern-go/concurrent v0.0.0-20180306012644-bacd9c7ef1dd h1:TRLaZ9cD/w8PVh93nsPXa1VrQ6jlwL5oN8l14QlcNfg= +github.com/modern-go/concurrent v0.0.0-20180306012644-bacd9c7ef1dd/go.mod h1:6dJC0mAP4ikYIbvyc7fijjWJddQyLn8Ig3JB5CqoB9Q= +github.com/modern-go/reflect2 v1.0.2 h1:xBagoLtFs94CBntxluKeaWgTMpvLxC4ur3nMaC9Gz0M= +github.com/modern-go/reflect2 v1.0.2/go.mod h1:yWuevngMOJpCy52FWWMvUC8ws7m/LJsjYzDa0/r8luk= +github.com/munnerz/goautoneg v0.0.0-20191010083416-a7dc8b61c822 h1:C3w9PqII01/Oq1c1nUAm88MOHcQC9l5mIlSMApZMrHA= +github.com/munnerz/goautoneg v0.0.0-20191010083416-a7dc8b61c822/go.mod h1:+n7T8mK8HuQTcFwEeznm/DIxMOiR9yIdICNftLE1DvQ= +github.com/onsi/ginkgo/v2 v2.21.0 h1:7rg/4f3rB88pb5obDgNZrNHrQ4e6WpjonchcpuBRnZM= +github.com/onsi/ginkgo/v2 v2.21.0/go.mod h1:7Du3c42kxCUegi0IImZ1wUQzMBVecgIHjR1C+NkhLQo= +github.com/onsi/gomega v1.35.1 h1:Cwbd75ZBPxFSuZ6T+rN/WCb/gOc6YgFBXLlZLhC7Ds4= +github.com/onsi/gomega v1.35.1/go.mod h1:PvZbdDc8J6XJEpDK4HCuRBm8a6Fzp9/DmhC9C7yFlog= +github.com/pkg/errors v0.9.1 h1:FEBLx1zS214owpjy7qsBeixbURkuhQAwrK5UwLGTwt4= +github.com/pkg/errors v0.9.1/go.mod h1:bwawxfHBFNV+L2hUp1rHADufV3IMtnDRdf1r5NINEl0= +github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4= +github.com/pmezard/go-difflib v1.0.1-0.20181226105442-5d4384ee4fb2 h1:Jamvg5psRIccs7FGNTlIRMkT8wgtp5eCXdBlqhYGL6U= +github.com/pmezard/go-difflib v1.0.1-0.20181226105442-5d4384ee4fb2/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4= +github.com/prometheus/client_golang v1.19.1 h1:wZWJDwK+NameRJuPGDhlnFgx8e8HN3XHQeLaYJFJBOE= +github.com/prometheus/client_golang v1.19.1/go.mod h1:mP78NwGzrVks5S2H6ab8+ZZGJLZUq1hoULYBAYBw1Ho= +github.com/prometheus/client_model v0.6.1 h1:ZKSh/rekM+n3CeS952MLRAdFwIKqeY8b62p8ais2e9E= +github.com/prometheus/client_model v0.6.1/go.mod h1:OrxVMOVHjw3lKMa8+x6HeMGkHMQyHDk9E3jmP2AmGiY= +github.com/prometheus/common v0.55.0 h1:KEi6DK7lXW/m7Ig5i47x0vRzuBsHuvJdi5ee6Y3G1dc= +github.com/prometheus/common v0.55.0/go.mod h1:2SECS4xJG1kd8XF9IcM1gMX6510RAEL65zxzNImwdc8= +github.com/prometheus/procfs v0.15.1 h1:YagwOFzUgYfKKHX6Dr+sHT7km/hxC76UB0learggepc= +github.com/prometheus/procfs v0.15.1/go.mod h1:fB45yRUv8NstnjriLhBQLuOUt+WW4BsoGhij/e3PBqk= +github.com/rogpeppe/go-internal v1.13.1 h1:KvO1DLK/DRN07sQ1LQKScxyZJuNnedQ5/wKSR38lUII= +github.com/rogpeppe/go-internal v1.13.1/go.mod h1:uMEvuHeurkdAXX61udpOXGD/AzZDWNMNyH2VO9fmH0o= +github.com/russross/blackfriday/v2 v2.1.0/go.mod h1:+Rmxgy9KzJVeS9/2gXHxylqXiyQDYRxCVz55jmeOWTM= +github.com/spf13/cobra v1.8.1 h1:e5/vxKd/rZsfSJMUX1agtjeTDf+qv1/JdBF8gg5k9ZM= +github.com/spf13/cobra v1.8.1/go.mod h1:wHxEcudfqmLYa8iTfL+OuZPbBZkmvliBWKIezN3kD9Y= +github.com/spf13/pflag v1.0.5 h1:iy+VFUOCP1a+8yFto/drg2CJ5u0yRoB7fZw3DKv/JXA= +github.com/spf13/pflag v1.0.5/go.mod h1:McXfInJRrz4CZXVZOBLb0bTZqETkiAhM9Iw0y3An2Bg= +github.com/stretchr/objx v0.1.0/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME= +github.com/stretchr/objx v0.4.0/go.mod h1:YvHI0jy2hoMjB+UWwv71VJQ9isScKT/TqJzVSSt89Yw= +github.com/stretchr/objx v0.5.0/go.mod h1:Yh+to48EsGEfYuaHDzXPcE3xhTkx73EhmCGUpEOglKo= +github.com/stretchr/testify v1.3.0/go.mod h1:M5WIy9Dh21IEIfnGCwXGc5bZfKNJtfHm1UVUgZn+9EI= +github.com/stretchr/testify v1.7.1/go.mod h1:6Fq8oRcR53rry900zMqJjRRixrwX3KX962/h/Wwjteg= +github.com/stretchr/testify v1.8.0/go.mod h1:yNjHg4UonilssWZ8iaSj1OCr/vHnekPRkoO+kdMU+MU= +github.com/stretchr/testify v1.8.1/go.mod h1:w2LPCIKwWwSfY2zedu0+kehJoqGctiVI29o6fzry7u4= +github.com/stretchr/testify v1.10.0 h1:Xv5erBjTwe/5IxqUQTdXv5kgmIvbHo3QQyRwhJsOfJA= +github.com/stretchr/testify v1.10.0/go.mod h1:r2ic/lqez/lEtzL7wO/rwa5dbSLXVDPFyf8C91i36aY= +github.com/x448/float16 v0.8.4 h1:qLwI1I70+NjRFUR3zs1JPUCgaCXSh3SW62uAKT1mSBM= +github.com/x448/float16 v0.8.4/go.mod h1:14CWIYCyZA/cWjXOioeEpHeN/83MdbZDRQHoFcYsOfg= +github.com/yuin/goldmark v1.1.27/go.mod h1:3hX8gzYuyVAZsxl0MRgGTJEmQBFcNTphYh9decYSb74= +github.com/yuin/goldmark v1.2.1/go.mod h1:3hX8gzYuyVAZsxl0MRgGTJEmQBFcNTphYh9decYSb74= +go.uber.org/goleak v1.3.0 h1:2K3zAYmnTNqV73imy9J1T3WC+gmCePx2hEGkimedGto= +go.uber.org/goleak v1.3.0/go.mod h1:CoHD4mav9JJNrW/WLlf7HGZPjdw8EucARQHekz1X6bE= +go.uber.org/multierr v1.11.0 h1:blXXJkSxSSfBVBlC76pxqeO+LN3aDfLQo+309xJstO0= +go.uber.org/multierr v1.11.0/go.mod h1:20+QtiLqy0Nd6FdQB9TLXag12DsQkrbs3htMFfDN80Y= +go.uber.org/zap v1.27.0 h1:aJMhYGrd5QSmlpLMr2MftRKl7t8J8PTZPA732ud/XR8= +go.uber.org/zap v1.27.0/go.mod h1:GB2qFLM7cTU87MWRP2mPIjqfIDnGu+VIO4V/SdhGo2E= +golang.org/x/crypto v0.0.0-20190308221718-c2843e01d9a2/go.mod h1:djNgcEr1/C05ACkg1iLfiJU5Ep61QUkGW8qpdssI0+w= +golang.org/x/crypto v0.0.0-20191011191535-87dc89f01550/go.mod h1:yigFU9vqHzYiE8UmvKecakEJjdnWj3jj499lnFckfCI= +golang.org/x/crypto v0.0.0-20200622213623-75b288015ac9/go.mod h1:LzIPMQfyMNhhGPhUkYOs5KpL4U8rLKemX1yGLhDgUto= +golang.org/x/mod v0.2.0/go.mod h1:s0Qsj1ACt9ePp/hMypM3fl4fZqREWJwdYDEqhRiZZUA= +golang.org/x/mod v0.3.0/go.mod h1:s0Qsj1ACt9ePp/hMypM3fl4fZqREWJwdYDEqhRiZZUA= +golang.org/x/net v0.0.0-20190404232315-eb5bcb51f2a3/go.mod h1:t9HGtf8HONx5eT2rtn7q6eTqICYqUVnKs3thJo3Qplg= +golang.org/x/net v0.0.0-20190620200207-3b0461eec859/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s= +golang.org/x/net v0.0.0-20200226121028-0de0cce0169b/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s= +golang.org/x/net v0.0.0-20201021035429-f5854403a974/go.mod h1:sp8m0HH+o8qH0wwXwYZr8TS3Oi6o0r6Gce1SSxlDquU= +golang.org/x/net v0.40.0 h1:79Xs7wF06Gbdcg4kdCCIQArK11Z1hr5POQ6+fIYHNuY= +golang.org/x/net v0.40.0/go.mod h1:y0hY0exeL2Pku80/zKK7tpntoX23cqL3Oa6njdgRtds= +golang.org/x/oauth2 v0.23.0 h1:PbgcYx2W7i4LvjJWEbf0ngHV6qJYr86PkAV3bXdLEbs= +golang.org/x/oauth2 v0.23.0/go.mod h1:XYTD2NtWslqkgxebSiOHnXEap4TF09sJSc7H1sXbhtI= +golang.org/x/sync v0.0.0-20190423024810-112230192c58/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= +golang.org/x/sync v0.0.0-20190911185100-cd5d95a43a6e/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= +golang.org/x/sync v0.0.0-20201020160332-67f06af15bc9/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= +golang.org/x/sync v0.14.0 h1:woo0S4Yywslg6hp4eUFjTVOyKt0RookbpAHG4c1HmhQ= +golang.org/x/sync v0.14.0/go.mod h1:1dzgHSNfp02xaA81J2MS99Qcpr2w7fw1gpm99rleRqA= +golang.org/x/sys v0.0.0-20190215142949-d0b11bdaac8a/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY= +golang.org/x/sys v0.0.0-20190412213103-97732733099d/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= +golang.org/x/sys v0.0.0-20200930185726-fdedc70b468f/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= +golang.org/x/sys v0.33.0 h1:q3i8TbbEz+JRD9ywIRlyRAQbM0qF7hu24q3teo2hbuw= +golang.org/x/sys v0.33.0/go.mod h1:BJP2sWEmIv4KK5OTEluFJCKSidICx8ciO85XgH3Ak8k= +golang.org/x/term v0.32.0 h1:DR4lr0TjUs3epypdhTOkMmuF5CDFJ/8pOnbzMZPQ7bg= +golang.org/x/term v0.32.0/go.mod h1:uZG1FhGx848Sqfsq4/DlJr3xGGsYMu/L5GW4abiaEPQ= +golang.org/x/text v0.3.0/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ= +golang.org/x/text v0.3.3/go.mod h1:5Zoc/QRtKVWzQhOtBMvqHzDpF6irO9z98xDceosuGiQ= +golang.org/x/text v0.25.0 h1:qVyWApTSYLk/drJRO5mDlNYskwQznZmkpV2c8q9zls4= +golang.org/x/text v0.25.0/go.mod h1:WEdwpYrmk1qmdHvhkSTNPm3app7v4rsT8F2UD6+VHIA= +golang.org/x/time v0.9.0 h1:EsRrnYcQiGH+5FfbgvV4AP7qEZstoyrHB0DzarOQ4ZY= +golang.org/x/time v0.9.0/go.mod h1:3BpzKBy/shNhVucY/MWOyx10tF3SFh9QdLuxbVysPQM= +golang.org/x/tools v0.0.0-20180917221912-90fa682c2a6e/go.mod h1:n7NCudcB/nEzxVGmLbDWY5pfWTLqBcC2KZ6jyYvM4mQ= +golang.org/x/tools v0.0.0-20191119224855-298f0cb1881e/go.mod h1:b+2E5dAYhXwXZwtnZ6UAqBI28+e2cm9otk0dWdXHAEo= +golang.org/x/tools v0.0.0-20200619180055-7c47624df98f/go.mod h1:EkVYQZoAsY45+roYkvgYkIh4xh/qjgUK9TdY2XT94GE= +golang.org/x/tools v0.0.0-20210106214847-113979e3529a/go.mod h1:emZCQorbCU4vsT4fOWvOPXz4eW1wZW4PmDk9uLelYpA= +golang.org/x/tools v0.26.0 h1:v/60pFQmzmT9ExmjDv2gGIfi3OqfKoEP6I5+umXlbnQ= +golang.org/x/tools v0.26.0/go.mod h1:TPVVj70c7JJ3WCazhD8OdXcZg/og+b9+tH/KxylGwH0= +golang.org/x/xerrors v0.0.0-20190717185122-a985d3407aa7/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0= +golang.org/x/xerrors v0.0.0-20191011141410-1b5146add898/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0= +golang.org/x/xerrors v0.0.0-20191204190536-9bdfabe68543/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0= +golang.org/x/xerrors v0.0.0-20200804184101-5ec99f83aff1/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0= +gomodules.xyz/jsonpatch/v2 v2.4.0 h1:Ci3iUJyx9UeRx7CeFN8ARgGbkESwJK+KB9lLcWxY/Zw= +gomodules.xyz/jsonpatch/v2 v2.4.0/go.mod h1:AH3dM2RI6uoBZxn3LVrfvJ3E0/9dG4cSrbuBJT4moAY= +google.golang.org/protobuf v1.36.5 h1:tPhr+woSbjfYvY6/GPufUoYizxw1cF/yFoxJ2fmpwlM= +google.golang.org/protobuf v1.36.5/go.mod h1:9fA7Ob0pmnwhb644+1+CVWFRbNajQ6iRojtC/QF5bRE= +gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0= +gopkg.in/check.v1 v1.0.0-20201130134442-10cb98267c6c h1:Hei/4ADfdWqJk1ZMxUNpqntNwaWcugrBjAiHlqqRiVk= +gopkg.in/check.v1 v1.0.0-20201130134442-10cb98267c6c/go.mod h1:JHkPIbrfpd72SG/EVd6muEfDQjcINNoR0C8j2r3qZ4Q= +gopkg.in/evanphx/json-patch.v4 v4.12.0 h1:n6jtcsulIzXPJaxegRbvFNNrZDjbij7ny3gmSPG+6V4= +gopkg.in/evanphx/json-patch.v4 v4.12.0/go.mod h1:p8EYWUEYMpynmqDbY58zCKCFZw8pRWMG4EsWvDvM72M= +gopkg.in/inf.v0 v0.9.1 h1:73M5CoZyi3ZLMOyDlQh031Cx6N9NDJ2Vvfl76EDAgDc= +gopkg.in/inf.v0 v0.9.1/go.mod h1:cWUDdTG/fYaXco+Dcufb5Vnc6Gp2YChqWtbxRZE0mXw= +gopkg.in/yaml.v3 v3.0.0-20200313102051-9f266ea9e77c/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM= +gopkg.in/yaml.v3 v3.0.1 h1:fxVm/GzAzEWqLHuvctI91KS9hhNmmWOoWu0XTYJS7CA= +gopkg.in/yaml.v3 v3.0.1/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM= +k8s.io/api v0.32.1 h1:f562zw9cy+GvXzXf0CKlVQ7yHJVYzLfL6JAS4kOAaOc= +k8s.io/api v0.32.1/go.mod h1:/Yi/BqkuueW1BgpoePYBRdDYfjPF5sgTr5+YqDZra5k= +k8s.io/apiextensions-apiserver v0.32.0 h1:S0Xlqt51qzzqjKPxfgX1xh4HBZE+p8KKBq+k2SWNOE0= +k8s.io/apiextensions-apiserver v0.32.0/go.mod h1:86hblMvN5yxMvZrZFX2OhIHAuFIMJIZ19bTvzkP+Fmw= +k8s.io/apimachinery v0.33.1 h1:mzqXWV8tW9Rw4VeW9rEkqvnxj59k1ezDUl20tFK/oM4= +k8s.io/apimachinery v0.33.1/go.mod h1:BHW0YOu7n22fFv/JkYOEfkUYNRN0fj0BlvMFWA7b+SM= +k8s.io/client-go v0.32.0 h1:DimtMcnN/JIKZcrSrstiwvvZvLjG0aSxy8PxN8IChp8= +k8s.io/client-go v0.32.0/go.mod h1:boDWvdM1Drk4NJj/VddSLnx59X3OPgwrOo0vGbtq9+8= +k8s.io/klog/v2 v2.130.1 h1:n9Xl7H1Xvksem4KFG4PYbdQCQxqc/tTUyrgXaOhHSzk= +k8s.io/klog/v2 v2.130.1/go.mod h1:3Jpz1GvMt720eyJH1ckRHK1EDfpxISzJ7I9OYgaDtPE= +k8s.io/kube-openapi v0.0.0-20250318190949-c8a335a9a2ff h1:/usPimJzUKKu+m+TE36gUyGcf03XZEP0ZIKgKj35LS4= +k8s.io/kube-openapi v0.0.0-20250318190949-c8a335a9a2ff/go.mod h1:5jIi+8yX4RIb8wk3XwBo5Pq2ccx4FP10ohkbSKCZoK8= +k8s.io/utils v0.0.0-20241104100929-3ea5e8cea738 h1:M3sRQVHv7vB20Xc2ybTt7ODCeFj6JSWYFzOFnYeS6Ro= +k8s.io/utils v0.0.0-20241104100929-3ea5e8cea738/go.mod h1:OLgZIPagt7ERELqWJFomSt595RzquPNLL48iOWgYOg0= +sigs.k8s.io/controller-runtime v0.20.1 h1:JbGMAG/X94NeM3xvjenVUaBjy6Ui4Ogd/J5ZtjZnHaE= +sigs.k8s.io/controller-runtime v0.20.1/go.mod h1:BrP3w158MwvB3ZbNpaAcIKkHQ7YGpYnzpoSTZ8E14WU= +sigs.k8s.io/json v0.0.0-20241010143419-9aa6b5e7a4b3 h1:/Rv+M11QRah1itp8VhT6HoVx1Ray9eB4DBr+K+/sCJ8= +sigs.k8s.io/json v0.0.0-20241010143419-9aa6b5e7a4b3/go.mod h1:18nIHnGi6636UCz6m8i4DhaJ65T6EruyzmoQqI2BVDo= +sigs.k8s.io/randfill v0.0.0-20250304075658-069ef1bbf016/go.mod h1:XeLlZ/jmk4i1HRopwe7/aU3H5n1zNUcX6TM94b3QxOY= +sigs.k8s.io/randfill v1.0.0 h1:JfjMILfT8A6RbawdsK2JXGBR5AQVfd+9TbzrlneTyrU= +sigs.k8s.io/randfill v1.0.0/go.mod h1:XeLlZ/jmk4i1HRopwe7/aU3H5n1zNUcX6TM94b3QxOY= +sigs.k8s.io/structured-merge-diff/v4 v4.6.0 h1:IUA9nvMmnKWcj5jl84xn+T5MnlZKThmUW1TdblaLVAc= +sigs.k8s.io/structured-merge-diff/v4 v4.6.0/go.mod h1:dDy58f92j70zLsuZVuUX5Wp9vtxXpaZnkPGWeqDfCps= +sigs.k8s.io/yaml v1.4.0 h1:Mk1wCc2gy/F0THH0TAp1QYyJNzRm2KCLy3o5ASXVI5E= +sigs.k8s.io/yaml v1.4.0/go.mod h1:Ejl7/uTz7PSA4eKMyQCUTnhZYNmLIl+5c2lQPGR2BPY= diff --git a/images/sds-common-scheduler-extender/pkg/cache/cache.go b/images/sds-common-scheduler-extender/pkg/cache/cache.go new file mode 100644 index 000000000..2861a1c69 --- /dev/null +++ b/images/sds-common-scheduler-extender/pkg/cache/cache.go @@ -0,0 +1,570 @@ +package cache + +import ( + "encoding/json" + "errors" + "fmt" + "slices" + "time" + + "github.com/deckhouse/sds-node-configurator/images/sds-common-scheduler-extender/pkg/consts" + "github.com/deckhouse/sds-node-configurator/images/sds-common-scheduler-extender/pkg/logger" + + snc "github.com/deckhouse/sds-node-configurator/api/v1alpha1" + corev1 "k8s.io/api/core/v1" + v1 "k8s.io/api/core/v1" + slices2 "k8s.io/utils/strings/slices" +) + +const ( + SelectedNodeAnnotation = "volume.kubernetes.io/selected-node" + lvgsPerPVCCount = 5 + lvgsPerNodeCount = 5 +) + +type Cache struct { + storage *Storage + log *logger.Logger +} + +type Storage struct { + Lvgs map[string]*LvgCache `json:"lvgs"` + PvcLVGs map[string][]string `json:"pvc_lvgs"` + NodeLVGs map[string][]string `json:"node_lvgs"` +} + +type LvgCache struct { + Lvg *snc.LVMVolumeGroup `json:"lvg"` + ThickPVCs map[string]*pvcCache `json:"thick_pvcs"` + ThinPools map[string]map[string]*pvcCache `json:"thin_pools"` +} + +type pvcCache struct { + PVC *corev1.PersistentVolumeClaim `json:"pvc"` + SelectedNode string `json:"selected_node"` + Provisioner string `json:"provisioner"` +} + +func NewCache(log *logger.Logger) *Cache { + return &Cache{ + storage: &Storage{ + Lvgs: make(map[string]*LvgCache), + PvcLVGs: make(map[string][]string), + NodeLVGs: make(map[string][]string), + }, + log: log, + } +} + +func (c *Cache) String() string { + bytes, err := json.Marshal(c) + if err != nil { + c.log.Error(err, "failed to marshal cache. returning empty string") + return "" + } + return string(bytes) +} + +func (c *Cache) clearBoundExpiredPVC(pvcTTL time.Duration) int { + deletedPVCs := 0 + + for lvgName := range c.storage.Lvgs { + pvcs, err := c.GetAllPVCForLVG(lvgName) + if err != nil { + c.log.Error(err, fmt.Sprintf("[clearBoundExpiredPVC] unable to get PVCs for the LVMVolumeGroup %s", lvgName)) + continue + } + + for _, pvc := range pvcs { + if pvc.Status.Phase != v1.ClaimBound { + c.log.Trace(fmt.Sprintf("[clearBoundExpiredPVC] PVC %s is not in a Bound state", pvc.Name)) + continue + } + + if time.Since(pvc.CreationTimestamp.Time) > pvcTTL { + c.log.Warning(fmt.Sprintf("[clearBoundExpiredPVC] PVC %s is in a Bound state and expired, remove it from the cache", pvc.Name)) + c.RemovePVCFromTheCache(pvc) + deletedPVCs++ + } else { + c.log.Trace(fmt.Sprintf("[clearBoundExpiredPVC] PVC %s is in a Bound state but not expired yet.", pvc.Name)) + } + } + } + + c.log.Debug("[clearBoundExpiredPVC] finished the expired PVC clearing") + return deletedPVCs +} + +func (c *Cache) GetAllPVCForLVG(lvgName string) ([]*v1.PersistentVolumeClaim, error) { + lvgCh, found := c.storage.Lvgs[lvgName] + if !found { + err := fmt.Errorf("cache was not found for the LVMVolumeGroup %s", lvgName) + c.log.Error(err, fmt.Sprintf("[GetAllPVCForLVG] an error occurred while trying to get all PVC for the LVMVolumeGroup %s", lvgName)) + return nil, err + } + + size := len(lvgCh.ThickPVCs) + for _, pvcMap := range lvgCh.ThinPools { + size += len(pvcMap) + } + + result := make([]*v1.PersistentVolumeClaim, 0, size) + for _, pvcCh := range lvgCh.ThickPVCs { + result = append(result, pvcCh.PVC) + } + + for _, pvcMap := range lvgCh.ThinPools { + for _, pvcCh := range pvcMap { + result = append(result, pvcCh.PVC) + } + } + + return result, nil +} + +func (c *Cache) GetAllLVG() map[string]*snc.LVMVolumeGroup { + lvgs := make(map[string]*snc.LVMVolumeGroup) + for lvgName, lvgCh := range c.storage.Lvgs { + if lvgCh.Lvg == nil { + c.log.Error(fmt.Errorf("LVMVolumeGroup %s is not initialized", lvgName), "[GetAllLVG] an error occurs while iterating the LVMVolumeGroups") + continue + } + + lvgs[lvgName] = lvgCh.Lvg + } + + return lvgs +} + +func (c *Cache) GetLVGThickReservedSpace(lvgName string) (int64, error) { + lvg, found := c.storage.Lvgs[lvgName] + if !found { + c.log.Debug(fmt.Sprintf("[GetLVGThickReservedSpace] the LVMVolumeGroup %s was not found in the cache. Returns 0", lvgName)) + return 0, nil + } + + var space int64 + for _, pvcCh := range lvg.ThickPVCs { + space += pvcCh.PVC.Spec.Resources.Requests.Storage().Value() + } + + return space, nil +} + +func (c *Cache) GetLVGThinReservedSpace(lvgName string, thinPoolName string) (int64, error) { + lvgCh, found := c.storage.Lvgs[lvgName] + if !found { + c.log.Debug(fmt.Sprintf("[GetLVGThinReservedSpace] the LVMVolumeGroup %s was not found in the cache. Returns 0", lvgName)) + return 0, nil + } + + pvcMap, found := lvgCh.ThinPools[thinPoolName] + if !found { + c.log.Debug(fmt.Sprintf("[GetLVGThinReservedSpace] the Thin pool %s of the LVMVolumeGroup %s was not found in the cache. Returns 0", lvgName, thinPoolName)) + return 0, nil + } + + var space int64 + for _, pvcCh := range pvcMap { + space += pvcCh.PVC.Spec.Resources.Requests.Storage().Value() + } + + return space, nil +} + +func (c *Cache) RemovePVCFromTheCache(pvc *v1.PersistentVolumeClaim) { + pvcKey := configurePVCKey(pvc) + + c.log.Debug(fmt.Sprintf("[RemovePVCFromTheCache] run full cache wipe for PVC %s", pvcKey)) + lvgSlice, ok := c.storage.PvcLVGs[pvcKey] + if ok { + for _, lvgName := range lvgSlice { + lvgCh, found := c.storage.Lvgs[lvgName] + if found { + delete(lvgCh.ThickPVCs, pvcKey) + for _, pvcMap := range lvgCh.ThinPools { + delete(pvcMap, pvcKey) + } + } + } + } + + delete(c.storage.PvcLVGs, pvcKey) +} + +func (c *Cache) GetLVGNamesForPVC(pvc *v1.PersistentVolumeClaim) []string { + pvcKey := configurePVCKey(pvc) + lvgNames, found := c.storage.PvcLVGs[pvcKey] + if !found { + c.log.Warning(fmt.Sprintf("[GetLVGNamesForPVC] no cached LVMVolumeGroups were found for PVC %s", pvcKey)) + return nil + } + + return lvgNames +} + +func (c *Cache) GetLVGNamesByNodeName(nodeName string) []string { + lvgs, found := c.storage.NodeLVGs[nodeName] + if !found { + c.log.Debug(fmt.Sprintf("[GetLVGNamesByNodeName] no LVMVolumeGroup was found in the cache for the node %s. Return empty slice", nodeName)) + return []string{} + } + + return lvgs +} + +func (c *Cache) UpdateThickPVC(lvgName string, pvc *v1.PersistentVolumeClaim, provisioner string) error { + pvcKey := configurePVCKey(pvc) + + lvgCh, found := c.storage.Lvgs[lvgName] + if !found { + return fmt.Errorf("the LVMVolumeGroup %s was not found in the cache", lvgName) + } + + pvcCh, found := lvgCh.ThickPVCs[pvcKey] + if !found { + c.log.Warning(fmt.Sprintf("[UpdateThickPVC] PVC %s was not found in the cache for the LVMVolumeGroup %s. It will be added", pvcKey, lvgName)) + err := c.AddThickPVC(lvgName, pvc, provisioner) + if err != nil { + c.log.Error(err, fmt.Sprintf("[UpdateThickPVC] an error occurred while trying to update the PVC %s", pvcKey)) + return err + } + return nil + } + + pvcCh.PVC = pvc + pvcCh.SelectedNode = pvc.Annotations[SelectedNodeAnnotation] + pvcCh.Provisioner = provisioner + c.log.Debug(fmt.Sprintf("[UpdateThickPVC] successfully updated PVC %s with selected node %s in the cache for LVMVolumeGroup %s", pvcKey, pvc.Annotations[SelectedNodeAnnotation], lvgName)) + return nil +} + +func (c *Cache) AddThickPVC(lvgName string, pvc *v1.PersistentVolumeClaim, provisioner string) error { + if pvc.Status.Phase == v1.ClaimBound { + c.log.Warning(fmt.Sprintf("[AddThickPVC] PVC %s/%s has status phase BOUND. It will not be added to the cache", pvc.Namespace, pvc.Name)) + return nil + } + + pvcKey := configurePVCKey(pvc) + + lvgCh, found := c.storage.Lvgs[lvgName] + if !found { + err := fmt.Errorf("the LVMVolumeGroup %s was not found in the cache", lvgName) + c.log.Error(err, fmt.Sprintf("[AddThickPVC] an error occurred while trying to add PVC %s to the cache", pvcKey)) + return err + } + + c.log.Trace(fmt.Sprintf("[AddThickPVC] PVC %s/%s annotations: %v", pvc.Namespace, pvc.Name, pvc.Annotations)) + + shouldAdd, err := c.shouldAddPVC(pvc, lvgCh, pvcKey, lvgName, "") + if err != nil { + return err + } + + if !shouldAdd { + c.log.Debug(fmt.Sprintf("[AddThickPVC] PVC %s should not be added", pvcKey)) + return nil + } + + c.log.Debug(fmt.Sprintf("[AddThickPVC] new PVC %s cache will be added to the LVMVolumeGroup %s", pvcKey, lvgName)) + c.addNewThickPVC(lvgCh, pvc, provisioner) + + return nil +} + +func (c *Cache) addNewThickPVC(lvgCh *LvgCache, pvc *v1.PersistentVolumeClaim, provisioner string) { + pvcKey := configurePVCKey(pvc) + lvgCh.ThickPVCs[pvcKey] = &pvcCache{ + PVC: pvc, + SelectedNode: pvc.Annotations[SelectedNodeAnnotation], + Provisioner: provisioner, + } + + c.AddLVGToPVC(lvgCh.Lvg.Name, pvcKey) +} + +func (c *Cache) AddLVGToPVC(lvgName, pvcKey string) { + // TODO protect from duplicates + lvgsForPVC, found := c.storage.PvcLVGs[pvcKey] + if !found || lvgsForPVC == nil { + lvgsForPVC = make([]string, 0, lvgsPerPVCCount) + } + + c.log.Trace(fmt.Sprintf("[addLVGToPVC] LVMVolumeGroups from the cache for PVC %s before append: %v", pvcKey, lvgsForPVC)) + lvgsForPVC = append(lvgsForPVC, lvgName) + c.log.Trace(fmt.Sprintf("[addLVGToPVC] LVMVolumeGroups from the cache for PVC %s after append: %v", pvcKey, lvgsForPVC)) + c.storage.PvcLVGs[pvcKey] = lvgsForPVC +} + +func (c *Cache) shouldAddPVC(pvc *v1.PersistentVolumeClaim, lvgCh *LvgCache, pvcKey, lvgName, thinPoolName string) (bool, error) { + if pvc.Annotations[SelectedNodeAnnotation] != "" { + c.log.Debug(fmt.Sprintf("[shouldAddPVC] PVC %s/%s has selected node annotation, selected node: %s", pvc.Namespace, pvc.Name, pvc.Annotations[SelectedNodeAnnotation])) + + lvgsOnTheNode, found := c.storage.NodeLVGs[pvc.Annotations[SelectedNodeAnnotation]] + if !found { + err := fmt.Errorf("no LVMVolumeGroups found for the node %s", pvc.Annotations[SelectedNodeAnnotation]) + c.log.Error(err, fmt.Sprintf("[shouldAddPVC] an error occurred while trying to add PVC %s to the cache", pvcKey)) + return false, err + } + + if !slices2.Contains(lvgsOnTheNode, lvgName) { + c.log.Debug(fmt.Sprintf("[shouldAddPVC] LVMVolumeGroup %s does not belong to PVC %s/%s selected node %s. It will be skipped", lvgName, pvc.Namespace, pvc.Name, pvc.Annotations[SelectedNodeAnnotation])) + return false, nil + } + + c.log.Debug(fmt.Sprintf("[shouldAddPVC] LVMVolumeGroup %s belongs to PVC %s/%s selected node %s", lvgName, pvc.Namespace, pvc.Name, pvc.Annotations[SelectedNodeAnnotation])) + + // if pvc is thick + if _, found := lvgCh.ThickPVCs[pvcKey]; found { + c.log.Debug(fmt.Sprintf("[shouldAddPVC] PVC %s was found in the cache of the LVMVolumeGroup %s", pvcKey, lvgName)) + return false, nil + } + + // if pvc is thin + if thinPoolName != "" { + if thinPool, found := lvgCh.ThinPools[thinPoolName]; found { + if _, found := thinPool[pvcKey]; found { + c.log.Debug(fmt.Sprintf("[shouldAddPVC] PVC %s was found in the Thin pool %s cache of the LVMVolumeGroup %s. No need to add", pvcKey, thinPoolName, lvgName)) + return false, nil + } + } else { + c.log.Debug(fmt.Sprintf("[shouldAddPVC] Thin pool %s was not found in the cache, PVC %s should be added", thinPoolName, pvcKey)) + return true, nil + } + } + } + + return true, nil +} + +func (c *Cache) UpdateThinPVC(lvgName, thinPoolName string, pvc *v1.PersistentVolumeClaim, provisioner string) error { + pvcKey := configurePVCKey(pvc) + lvgCh, found := c.storage.Lvgs[lvgName] + if !found { + return fmt.Errorf("the LVMVolumeGroup %s was not found in the cache", lvgName) + } + + if _, found := lvgCh.ThinPools[thinPoolName]; !found { + c.log.Debug(fmt.Sprintf("[UpdateThinPVC] Thin Pool %s was not found in the LVMVolumeGroup %s, add it.", thinPoolName, lvgName)) + err := c.addThinPoolIfNotExists(lvgCh, thinPoolName) + if err != nil { + return err + } + } + + if _, found := lvgCh.ThinPools[thinPoolName][pvcKey]; !found { + c.log.Warning(fmt.Sprintf("[UpdateThinPVC] Thin PVC %s was not found in Thin pool %s in the cache for the LVMVolumeGroup %s. It will be added", pvcKey, thinPoolName, lvgName)) + err := c.addNewThinPVC(lvgCh, pvc, thinPoolName, provisioner) + if err != nil { + c.log.Error(err, fmt.Sprintf("[UpdateThinPVC] an error occurred while trying to update the PVC %s", pvcKey)) + return err + } + return nil + } + + lvgCh.ThinPools[thinPoolName][pvcKey].PVC = pvc + lvgCh.ThinPools[thinPoolName][pvcKey].SelectedNode = pvc.Annotations[SelectedNodeAnnotation] + lvgCh.ThinPools[thinPoolName][pvcKey].Provisioner = provisioner + c.log.Debug(fmt.Sprintf("[UpdateThinPVC] successfully updated THIN PVC %s with selected node %s in the cache for LVMVolumeGroup %s", pvcKey, pvc.Annotations[SelectedNodeAnnotation], lvgName)) + + return nil +} + +func (c *Cache) addThinPoolIfNotExists(lvgCh *LvgCache, thinPoolName string) error { + if thinPoolName == "" { + err := errors.New("no thin pool name specified") + c.log.Error(err, fmt.Sprintf("[addThinPoolIfNotExists] unable to add thin pool in the LVMVolumeGroup %s", lvgCh.Lvg.Name)) + return err + } + + if _, found := lvgCh.ThinPools[thinPoolName]; found { + c.log.Debug(fmt.Sprintf("[addThinPoolIfNotExists] Thin pool %s is already created in the LVMVolumeGroup %s. No need to add a new one", thinPoolName, lvgCh.Lvg.Name)) + return nil + } + + lvgCh.ThinPools[thinPoolName] = make(map[string]*pvcCache) + return nil +} + +func (c *Cache) addNewThinPVC(lvgCh *LvgCache, pvc *v1.PersistentVolumeClaim, thinPoolName string, provisioner string) error { + pvcKey := configurePVCKey(pvc) + err := c.addThinPoolIfNotExists(lvgCh, thinPoolName) + if err != nil { + c.log.Error(err, fmt.Sprintf("[addNewThinPVC] unable to add Thin pool %s in the LVMVolumeGroup %s cache for PVC %s", thinPoolName, lvgCh.Lvg.Name, pvc.Name)) + return err + } + + if _, found := lvgCh.ThinPools[thinPoolName]; !found { + err = fmt.Errorf("thin pool %s not found", thinPoolName) + c.log.Error(err, fmt.Sprintf("[addNewThinPVC] unable to add Thin PVC %s to the cache", pvcKey)) + return err + } + + lvgCh.ThinPools[thinPoolName][pvcKey] = &pvcCache{ + PVC: pvc, + SelectedNode: pvc.Annotations[SelectedNodeAnnotation], + Provisioner: provisioner, + } + c.log.Debug(fmt.Sprintf("[addNewThinPVC] THIN PVC %s was added to the cache to Thin Pool %s", pvcKey, thinPoolName)) + + c.AddLVGToPVC(lvgCh.Lvg.Name, pvcKey) + return nil +} + +func (c *Cache) RemoveSpaceReservationForPVCWithSelectedNode(pvc *v1.PersistentVolumeClaim, deviceType string) error { + pvcKey := configurePVCKey(pvc) + // the LVG which is used to store PVC + selectedLVGsNames := make([]string, 0, 3) + + lvgNamesForPVC, found := c.storage.PvcLVGs[pvcKey] + if !found { + c.log.Debug(fmt.Sprintf("[RemoveSpaceReservationForPVCWithSelectedNode] cache for PVC %s has been already removed", pvcKey)) + return nil + } + + for _, lvgName := range lvgNamesForPVC { + lvgCh, found := c.storage.Lvgs[lvgName] + if !found || lvgCh == nil { + err := fmt.Errorf("no cache found for the LVMVolumeGroup %s", lvgName) + c.log.Error(err, fmt.Sprintf("[RemoveSpaceReservationForPVCWithSelectedNode] an error occurred while trying to remove space reservation for PVC %s", pvcKey)) + return err + } + + switch deviceType { + case consts.Thin: + for thinPoolName, thinPool := range lvgCh.ThinPools { + if pvcCh, found := thinPool[pvcKey]; found { + selectedNode := pvcCh.SelectedNode + if selectedNode == "" { + delete(lvgCh.ThinPools[thinPoolName], pvcKey) + c.log.Debug(fmt.Sprintf("[RemoveSpaceReservationForPVCWithSelectedNode] removed space reservation for PVC %s in the Thin pool %s of the LVMVolumeGroup %s due the PVC was selected to the node %s", pvcKey, thinPoolName, lvgName, pvc.Annotations[SelectedNodeAnnotation])) + } else { + // TODO найти все лвг, хрянящие копии тома + selectedLVGsNames = append(selectedLVGsNames, lvgName) + c.log.Debug(fmt.Sprintf("[RemoveSpaceReservationForPVCWithSelectedNode] PVC %s was selected to the node %s. It should not be removed from the LVMVolumeGroup %s", pvcKey, pvc.Annotations[SelectedNodeAnnotation], lvgName)) + } + } else { + c.log.Debug(fmt.Sprintf("[RemoveSpaceReservationForPVCWithSelectedNode] PVC %s space reservation in the LVMVolumeGroup %s has been already removed", pvcKey, lvgName)) + } + } + case consts.Thick: + if pvcCh, found := lvgCh.ThickPVCs[pvcKey]; found { + selectedNode := pvcCh.SelectedNode + if selectedNode == "" { + delete(lvgCh.ThickPVCs, pvcKey) + c.log.Debug(fmt.Sprintf("[RemoveSpaceReservationForPVCWithSelectedNode] removed space reservation for PVC %s in the LVMVolumeGroup %s due the PVC was selected to the node %s", pvcKey, lvgName, pvc.Annotations[SelectedNodeAnnotation])) + } else { + // TODO найти все лвг, хрянящие копии тома + selectedLVGsNames = append(selectedLVGsNames, lvgName) + c.log.Debug(fmt.Sprintf("[RemoveSpaceReservationForPVCWithSelectedNode] PVC %s was selected to the node %s. It should not be removed from the LVMVolumeGroup %s", pvcKey, pvc.Annotations[SelectedNodeAnnotation], lvgName)) + } + } else { + c.log.Debug(fmt.Sprintf("[RemoveSpaceReservationForPVCWithSelectedNode] PVC %s space reservation in the LVMVolumeGroup %s has been already removed", pvcKey, lvgName)) + } + } + } + c.log.Debug(fmt.Sprintf("[RemoveSpaceReservationForPVCWithSelectedNode] PVC %s space reservation has been removed from LVMVolumeGroup cache", pvcKey)) + + c.log.Debug(fmt.Sprintf("[RemoveSpaceReservationForPVCWithSelectedNode] cache for PVC %s will be wiped from unused LVMVolumeGroups", pvcKey)) + cleared := make([]string, 0, len(lvgNamesForPVC)) + + for _, lvgName := range lvgNamesForPVC { + if slices.Contains(selectedLVGsNames, lvgName) { + c.log.Debug(fmt.Sprintf("[RemoveSpaceReservationForPVCWithSelectedNode] the LVMVolumeGroup %s will be saved for PVC %s cache as used", lvgName, pvcKey)) + cleared = append(cleared, lvgName) + } else { + c.log.Debug(fmt.Sprintf("[RemoveSpaceReservationForPVCWithSelectedNode] the LVMVolumeGroup %s will be removed from PVC %s cache as not used", lvgName, pvcKey)) + } + } + + c.log.Trace(fmt.Sprintf("[RemoveSpaceReservationForPVCWithSelectedNode] cleared LVMVolumeGroups for PVC %s: %v", pvcKey, cleared)) + c.storage.PvcLVGs[pvcKey] = cleared + return nil +} + +func (c *Cache) TryGetLVG(name string) *snc.LVMVolumeGroup { + lvgCh, found := c.storage.Lvgs[name] + if !found { + c.log.Debug(fmt.Sprintf("[TryGetLVG] the LVMVolumeGroup %s was not found in the cache. Return nil", name)) + return nil + } + + return lvgCh.Lvg +} + +func (c *Cache) UpdateLVG(lvg *snc.LVMVolumeGroup) error { + lvgCh, found := c.storage.Lvgs[lvg.Name] + if !found { + return fmt.Errorf("the LVMVolumeGroup %s was not found in the lvgCh", lvg.Name) + } + + lvgCh.Lvg = lvg + + c.log.Trace(fmt.Sprintf("[UpdateLVG] the LVMVolumeGroup %s nodes: %v", lvg.Name, lvg.Status.Nodes)) + for _, node := range lvg.Status.Nodes { + lvgsOnTheNode, found := c.storage.NodeLVGs[node.Name] + if !found { + lvgsOnTheNode = make([]string, 0, lvgsPerNodeCount) + } + + if !slices2.Contains(lvgsOnTheNode, lvg.Name) { + lvgsOnTheNode = append(lvgsOnTheNode, lvg.Name) + c.log.Debug(fmt.Sprintf("[UpdateLVG] the LVMVolumeGroup %s has been added to the node %s", lvg.Name, node.Name)) + c.storage.NodeLVGs[node.Name] = lvgsOnTheNode + } else { + c.log.Debug(fmt.Sprintf("[UpdateLVG] the LVMVolumeGroup %s has been already added to the node %s", lvg.Name, node.Name)) + } + } + return nil +} + +func (c *Cache) AddLVG(lvg *snc.LVMVolumeGroup) { + if _, found := c.storage.Lvgs[lvg.Name]; found { + c.log.Debug(fmt.Sprintf("[AddLVG] the LVMVolumeGroup %s has been already added to the cache", lvg.Name)) + return + } + + c.storage.Lvgs[lvg.Name] = &LvgCache{ + Lvg: lvg, + ThickPVCs: make(map[string]*pvcCache), + ThinPools: make(map[string]map[string]*pvcCache), + } + + c.log.Trace(fmt.Sprintf("[AddLVG] the LVMVolumeGroup %s nodes: %v", lvg.Name, lvg.Status.Nodes)) + for _, node := range lvg.Status.Nodes { + lvgsOnTheNode, found := c.storage.NodeLVGs[node.Name] + if !found { + lvgsOnTheNode = make([]string, 0, lvgsPerNodeCount) + } + + lvgsOnTheNode = append(lvgsOnTheNode, lvg.Name) + c.log.Debug(fmt.Sprintf("[AddLVG] the LVMVolumeGroup %s has been added to the node %s", lvg.Name, node.Name)) + c.storage.NodeLVGs[node.Name] = lvgsOnTheNode + } +} + +func (c *Cache) DeleteLVG(lvgName string) { + delete(c.storage.Lvgs, lvgName) + + for nodeName, lvgNames := range c.storage.NodeLVGs { + for i, lvg := range lvgNames { + if lvg == lvgName { + c.storage.NodeLVGs[nodeName] = append(lvgNames[:i], lvgNames[i+1:]...) + break + } + } + } + + for pvcKey, lvgNames := range c.storage.PvcLVGs { + for i, lvg := range lvgNames { + if lvg == lvgName { + c.storage.PvcLVGs[pvcKey] = append(lvgNames[:i], lvgNames[i+1:]...) + break + } + } + } +} + +func configurePVCKey(pvc *v1.PersistentVolumeClaim) string { + return fmt.Sprintf("%s/%s", pvc.Namespace, pvc.Name) +} diff --git a/images/sds-common-scheduler-extender/pkg/cache/cache.md b/images/sds-common-scheduler-extender/pkg/cache/cache.md new file mode 100644 index 000000000..8750532a1 --- /dev/null +++ b/images/sds-common-scheduler-extender/pkg/cache/cache.md @@ -0,0 +1,303 @@ +# SDS-Replicated-Volume Cache Control + +// TODO описать разные кейсы работы с кешем + +Cache это in-memory хранилище об LVG в кластере. Его цель - резервировать место в LVG для новых PVC, пока те не перейдут в состояние "Bound" +Данные в Cache помещает и актуализирует контроллер. + +Визуализация кэша с данными: +```go +Cache{ + Lvgs: sync.Map{ + // TODO убрать нэймспейс + "namespace-1/lvg-1": LvgCache{ + Lvg: &snc.LVMVolumeGroup{Name: "lvg-1"}, + thickPVCs: sync.Map{ + "namespace-1/pvc-1": &pvcCache{ + pvc: &v1.PersistentVolumeClaim{Name: "pvc-1"}, + selectedNode: "node-1", + }, + "namespace-1/pvc-2": &pvcCache{ + pvc: &v1.PersistentVolumeClaim{Name: "pvc-2"}, + selectedNode: "node-2", + }, + }, + thinPools: sync.Map{ + "pool-1": &thinPoolCache{ + // TODO убрать поле (? возможно нужно оставить из-за синкмапы) + // TODO исследовать возможность впилить мьютекс + pvcs: sync.Map{ + "namespace-1/pvc-2": &pvcCache{ + pvc: &v1.PersistentVolumeClaim{Name: "pvc-2"}, + selectedNode: "node-2", + }, + "namespace-1/pvc-3": &pvcCache{ + pvc: &v1.PersistentVolumeClaim{Name: "pvc-3"}, + selectedNode: "node-3", + }, + }, + } + }, + } + }, + pvcLVGs: sync.Map{ + "namespace-1/pvc-1": []string{"lvg-1"}, + "namespace-1/pvc-2": []string{"lvg-2"}, + "namespace-1/pvc-3": []string{"lvg-3"}, + }, + nodeLVGs: sync.Map{ + "namespace-1/node-1": []string{"lvg-1"}, + "namespace-1/node-2": []string{"lvg-2"}, + "namespace-1/node-3": []string{"lvg-3"}, + }, + log: logger.NewLogger(), + expiredDuration: time.Duration(5) * time.Second, +} +``` + +## Принцип работы +При запуске приложения указатель на экземпляр кэша передается в контроллер. +Контроллер реагирует на создание и изменение PVC или LVG в класетере с помощью методов CreateFunc, UpdateFunc и DeleteFunc. + +### 1. Контроллер 'pvc_watcher_cache' + +- CreateFunc +1. При создании новой PVC метод: + - Проверяет наличие у нее аннотаций (при отсутствии логгирует факт и завершает работу) + - Если аннотации есть, проверяет наличие в них значения по ключу "volume.kubernetes.io/selected-node" (SelectedNodeAnnotation) +2. Метод завершает работу если: + - Не найдено значение по ключу "SelectedNodeAnnotation" + - PVC находится в статусе "Bound" + - pvc.DeletionTimestamp != nil +3. После этого вызывается функция reconcilePVC() + + +- UpdateFunc +1. При обновлении существующей PVC метод: + - Проверяет наличие у нее аннотаций (при отсутствии логгирует факт и завершает работу) + - Если аннотации есть, проверяет наличие в них значения по ключу "volume.kubernetes.io/selected-node" (SelectedNodeAnnotation) +2. Метод завершает работу если: + - Не найдено значение по ключу "SelectedNodeAnnotation" + - pvc.DeletionTimestamp != nil +3. После этого вызывается функция reconcilePVC() + +Единственное отличие от CreateFunc здесь - метод не проверяет статус PVC. + + +- DeleteFunc +При удалении PVC из кластера метод вызывает метод кэша RemovePVCFromTheCache() + +### 2. Контроллер 'lvg_watcher_cache' + +- CreateFunc +При создании новой LVG метод: + 1. Проверяет наличие DeletionTimestamp. Завершает работу, если он существует + 2. Получает из кэша LVG по ее имени: + - если LVG найдена выобновляет данные о ней, вызывая метод UpdateLVG() + - если не найдена, создает запись в кэше методом AddLVG() + 3. Запрашивает в кэше все PVC для данной LVG + 4. Итерируя по PVC, удаляет из кэша все со статусом "Bound" (метод RemovePVCFromTheCache()) + +- UpdateFunc +При обновлении LVG в кластере, метод: + 1. Обновляет данные о ней в кеше методом UpdateLVG() + 2. Вызывает метод shouldReconcileLVG(), для проверки необходимости дальнейшей обработки LVG + 3. LVG *НЕ* будет обработана если: + - у новой версии присутствует DeletionTimestamp + - значения thinPools и аллоцированной памяти старой и новой версии LVG не изменились + 4. Запрашивает в кэше все PVC для данной LVG + 5. Итерируя по PVC, удаляет из кэша все со статусом "Bound" (метод RemovePVCFromTheCache()) + +- DeleteFunc +При удалении PVC из кластера метод вызывает метод кэша DeleteLVG() + + +## Методы кеша +```go +clearBoundExpiredPVC() +``` +Метод выполняет очистку кеша от устаревших PVC в состоянии Bound. + +Алгоритм работы: + 1. Метод итерирует по всем lvg поля Lvgs + 2. Запрашиваются все pvc относящиеся к каждой lvg + 3. PVC удаляется из кэша если *одновременно*: + - ее статус pvc.Status.Phase == v1.ClaimBound + - с момента ее создания (pvc.CreationTimestamp.Time) прошло больше времени, чем указано в поле expiredDuration кэша + +```go +GetAllPVCForLVG(lvgName string) ([]*v1.PersistentVolumeClaim, error) +``` +Метод получает все хранящиеся в кэше pvc, связанных с переданной lvg + +Алгоритм работы: + 1. Метод проверяет наличие pvc в кэше по переданному имени lvg + 2. Затем метод итерирует по всем объектам полей thickPVCs, thinPools. Для каждой существующей lvg счетчик size увеличивается на 1 + 3. Создается слайс для pvc, с cap == size, в него перед возвратом помещаются указатели на pvc из полей thickPVCs и thinPools + 4. Метод возвращает слайс + + +```go +GetAllLVG() map[string]*snc.LVMVolumeGroup +``` +Метод возвращает все хранящиеся LVG в виде мапы map[string]*snc.LVMVolumeGroup + + +```go +GetLVGThickReservedSpace(lvgName string) (int64, error) +``` +Метод возвращает суммарное зарезервированное место всеми PVC ThickLVG. +Значение зарезервированного места возвращает метод pvc.Spec.Resources.Requests.Storage().Value() + + +```go +GetLVGThinReservedSpace(lvgName string, thinPoolName string) (int64, error) +``` +Метод подсчитывает количество (в байтах) зарезервированного места всеми thin PVC выбранной LVG + + +```go +RemovePVCFromTheCache(pvc *v1.PersistentVolumeClaim) +``` +Метод удаляет из кэша все данные о переданной PVC + + +```go +GetLVGNamesForPVC(pvc *v1.PersistentVolumeClaim) []string +``` +Метод возвращает слайс имен LVG для выбранной PVC + + +```go +GetLVGNamesByNodeName(nodeName string) []string +``` +Метод возвращает слайс имен LVG для выбранной ноды + + +```go +UpdateThickPVC(lvgName string, pvc *v1.PersistentVolumeClaim) error +``` +Метод обновляет PVC переданной LVG. Если такой LVG в кэше нет - добавляет ее. + + +```go +AddThickPVC(lvgName string, pvc *v1.PersistentVolumeClaim) error +``` +Метод добавляет переданную PVC к выбранной LVG. Если такой LVG нет, возвращает ошибку. Если в кэше есть такая PVC, ничего не делает. + + +```go +addNewThickPVC(lvgCh *LvgCache, pvc *v1.PersistentVolumeClaim) +``` +Метод добавляет переданную PVC к выбранной LVG. + + +```go +addLVGToPVC(lvgName, pvcKey string) +``` +Метод добавляет имя LVG в мапу c.pvcLVGs + + +```go +shouldAddPVC(pvc *v1.PersistentVolumeClaim, lvgCh *LvgCache, pvcKey, lvgName, thinPoolName string) (bool, error) +``` +Метод проверяет, нужно ли добавлять PVC в кэш. + +Алгоритм работы: + 1. Если PVC не содержит аннотацию выбранного узла (pvc.Annotations["volume.kubernetes.io/selected-node"] == "") метод вернет true + 2. Если указанная аннотация присутствует, то: + - eсли для данного узла не найден список LVG в кэше (nodeLVGs), вернется false + - если переданная LVG (lvgName) не входит в найденных список, вернется false + - если LVG принадлежит узлу, производится дальнейшая проверка: + - если PVC уже присутствует в кэше для thick PVC (lvgCh.thickPVCs), метод логирует это и возвращает false (PVC уже добавлен, его не следует добавлять повторно) + - далее проверяется случай для thin PVC: + - если аргумент thinPoolName передан, то метод пытается найти кэш для соответствующего thin pool в lvgCh.thinPools + - если кэш для thin pool не найден, метод считает, что нужно добавить PVC (возвращает true) + - если thin pool найден, то производится проверка, существует ли PVC (по ключу pvcKey) внутри кэша thin pool. Если найден, то метод возвращает false (PVC уже добавлен в thin pool) + +Резюмируя: добавление происходит только если PVC соответствует выбранному узлу и группе, и ещё не присутствует в кэше thick или thin PVC + + +```go +UpdateThinPVC(lvgName, thinPoolName string, pvc *v1.PersistentVolumeClaim) error +``` +Метод обновляет объект thin PVC в кэше для указанной LVMVolumeGroup и thin pool + + +```go +addThinPoolIfNotExists(lvgCh *LvgCache, thinPoolName string) error +``` +Метод addThinPoolIfNotExists проверяет наличие thin pool с указанным именем в кэше для данной LVMVolumeGroup. + +Алгоритм работы: + 1. Если имя thin pool не задано (строка пуста), возвращается ошибка. + 2. Если thin pool с таким именем уже существует в кэше, метод ничего не делает и возвращает nil. + 3. Если thin pool отсутствует, создаётся новый объект thinPoolCache и сохраняется в кэш с ключом thinPoolName, после чего метод возвращает nil. + + +```go +addNewThinPVC(lvgCh *LvgCache, pvc *v1.PersistentVolumeClaim, thinPoolName string) error +``` +Метод создает новый thinPool если он не существует в кэше. +Добавляет pvc в pool, sызывает метод addLVGToPVC. +Чтобы связать PVC с LVMVolumeGroup по lvgCh.Lvg.Name + + +```go +PrintTheCacheLog() +``` +Метод выводит содержимое кэша в лог. + + +```go +RemoveSpaceReservationForPVCWithSelectedNode(pvc *v1.PersistentVolumeClaim, deviceType string) error +``` +Метод удаляет резервацию места для PVC в кэше для всех LVMVolumeGroups, за исключением той группы, в которой PVC закреплён за выбранным узлом. + +Алгоритм работы: + 1. Метод получает из кэша список LVG, которые связаны с переданной PVC (из pvcLVGs). Если кэш отсутствует, метод завершается + 2. Для каждой LVMVolumeGroup ([]string) из п.1: + - В зависимости от типа устройства (Thin или Thick): + - Для Thin: + - Проходит по thin pool’ам, ищет PVC по ключу. + - Если PVC найден, проверяет поле selectedNode + - Если selectedNode пустой, удаляет PVC из thin pool’а. + – Если не пустой, запоминает эту группу как выбранную (selectedLVGName) и PVC не удаляется. + - Для Thick: + - Ищет PVC в кэше thickPVCs. + - Если найдён, проверяет поле selectedNode и либо удаляет резервацию (при пустом selectedNode), либо запоминает группу как выбранную + 3. После прохода по всем группам метод обновляет кэш pvcLVGs: оставляет только выбранную LVMVolumeGroup (если таковая имеется) в списке для данного PVC, а для остальных удаляет привязку. + +```go +TryGetLVG(name string) *snc.LVMVolumeGroup +``` +Метод пытается получить объект LVMVolumeGroup из кэша по заданному имени +Если кэш содержит запись с таким именем, возвращается LVMVolumeGroup (из LvgCache), иначе метод выводит сообщение в лог и возвращает nil + +```go +UpdateLVG(lvg *snc.LVMVolumeGroup) error +``` +Метод обновляет информацию о LVG в кэше +Алгоритм работы: + 1. Проверяет наличие кэша для переданной LVG (c.Lvgs). Метод возвращает ошибку, если кэш не найден + 2. Если кэш найден, записывает в поле lvgCh.Lvg переданное значение LVG + 3. Для каждого узла из списка lvg.Status.Nodes метод проверяет, содержится ли имя LVG в поле nodeLVGs + - Если группа ещё не привязана к узлу, добавляет её в список и сохраняет обратно в nodeLVGs + - Если привязка уже существует, просто логирует этот факт + +```go +AddLVG(lvg *snc.LVMVolumeGroup) +``` +Метод AddLVG добавляет новый объект LVMVolumeGroup в кэш +Алгоритм работы: + 1. Добавляет новый объект в кэш (c.Lvgs) помощью syncMap.LoadOrStore(). Если группа уже там (loaded == true), выводится отладочное сообщение, и метод завершается + 2. Если группа добавляется впервые, для каждого узла из lvg.Status.Nodes: + - Проверяется, существует ли уже список групп для этого узла в nodeLVGs + - Если списка нет, создаётся новый с вместимостью, рассчитанной на определённое количество групп + - Добавляется имя группы в список для узла, после чего обновлённый список сохраняется в nodeLVGs + - Выполняется логирование добавления группы к конкретному узлу + +```go +DeleteLVG(lvgName string) +``` +Метод удаляет информацию об LVG из кэша (c.Lvgs, c.nodeLVGs, c.pvcLVGs) diff --git a/images/sds-common-scheduler-extender/pkg/cache/manager.go b/images/sds-common-scheduler-extender/pkg/cache/manager.go new file mode 100644 index 000000000..41509fbf7 --- /dev/null +++ b/images/sds-common-scheduler-extender/pkg/cache/manager.go @@ -0,0 +1,301 @@ +package cache + +import ( + "context" + "errors" + "fmt" + "sync" + "time" + + "github.com/deckhouse/sds-node-configurator/images/sds-common-scheduler-extender/pkg/logger" + + snc "github.com/deckhouse/sds-node-configurator/api/v1alpha1" + v1 "k8s.io/api/core/v1" + k8serrors "k8s.io/apimachinery/pkg/api/errors" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "sigs.k8s.io/controller-runtime/pkg/client" + "sigs.k8s.io/controller-runtime/pkg/manager" +) + +const ( + nameSpace = "d8-sds-node-configurator" + configMapName = "sheduler-extender-cache" +) + +type CacheManager struct { + cache *Cache + locker sync.Locker + log *logger.Logger + mrg manager.Manager + isUpdated bool +} + +func NewCacheManager(c *Cache, mrg manager.Manager, log *logger.Logger) *CacheManager { + return &CacheManager{ + cache: c, + locker: &sync.Mutex{}, + mrg: mrg, + log: log, + } +} + +func (cm *CacheManager) RunCleaner(ctx context.Context, pvcCheckInterval time.Duration) { + t := time.NewTicker(pvcCheckInterval) + defer t.Stop() + + for { + select { + case <-ctx.Done(): + cm.log.Info("[CacheManager] Cleaner gracefully stops it's work") + return + case <-t.C: + cm.log.Info("[CacheManager] Starting pvc cleanup") + cm.locker.Lock() + deletedPVCs := cm.cache.clearBoundExpiredPVC(pvcCheckInterval) + if deletedPVCs > 0 { + cm.isUpdated = true + } + cm.locker.Unlock() + cm.log.Info("[CacheManager] pvc cleanup has finished") + } + } +} + +func (cm *CacheManager) RunSaver(ctx context.Context, cacheCheckInterval, configMapUpdateTimeout time.Duration) { + t := time.NewTicker(cacheCheckInterval) + + for { + select { + case <-ctx.Done(): + cm.log.Info("[CacheManager] Saver gracefully stops it's work") + return + case <-t.C: + if !cm.isUpdated { + continue + } + + cm.locker.Lock() + cacheStr := cm.cache.String() + cm.locker.Unlock() + if cacheStr == "" { + cm.log.Warning("[CacheManager] Cache returned an empty data string. Skipping iteration") + continue + } + + cwt, cancel := context.WithTimeout(ctx, configMapUpdateTimeout) + err := cm.SaveOrUpdate(cwt, cm.mrg, cacheStr) + if err != nil { + if errors.Is(err, context.DeadlineExceeded) { + cm.log.Error(err, "[CacheManager] cache saving process timed out") + } else { + cm.log.Error(err, "[CacheManager] cache saving process failed") + } + } + cancel() + } + } +} + +func (cm *CacheManager) SaveOrUpdate(ctx context.Context, mrg manager.Manager, data string) error { + cfgMap := &v1.ConfigMap{} + err := cm.mrg.GetClient().Get(ctx, client.ObjectKey{Name: configMapName, Namespace: nameSpace}, cfgMap) + if err != nil { + if k8serrors.IsNotFound(err) { + cfgMap.ObjectMeta = metav1.ObjectMeta{ + Name: configMapName, + Namespace: nameSpace, + } + cfgMap.Data = map[string]string{ + "cache": data, + } + return cm.mrg.GetClient().Create(ctx, cfgMap) + } + return err + } + + cfgMap.Data["cache"] = data + return cm.mrg.GetClient().Update(ctx, cfgMap) +} + +func (cm *CacheManager) GetAllLVG() map[string]*snc.LVMVolumeGroup { + cm.locker.Lock() + defer cm.locker.Unlock() + + return cm.cache.GetAllLVG() +} + +func (cm *CacheManager) GetLVGThickReservedSpace(lvgName string) (int64, error) { + cm.locker.Lock() + defer cm.locker.Unlock() + + return cm.cache.GetLVGThickReservedSpace(lvgName) +} + +func (cm *CacheManager) GetLVGThinReservedSpace(lvgName string, thinPoolName string) (int64, error) { + cm.locker.Lock() + defer cm.locker.Unlock() + + return cm.cache.GetLVGThinReservedSpace(lvgName, thinPoolName) +} + +func (cm *CacheManager) RemovePVCFromTheCache(pvc *v1.PersistentVolumeClaim) { + cm.locker.Lock() + defer func() { + cm.isUpdated = true + cm.locker.Unlock() + }() + + cm.cache.RemovePVCFromTheCache(pvc) +} + +func (cm *CacheManager) GetLVGNamesForPVC(pvc *v1.PersistentVolumeClaim) []string { + cm.locker.Lock() + defer cm.locker.Unlock() + + return cm.cache.GetLVGNamesForPVC(pvc) +} + +func (cm *CacheManager) GetLVGNamesByNodeName(nodeName string) []string { + cm.locker.Lock() + defer cm.locker.Unlock() + + return cm.cache.GetLVGNamesByNodeName(nodeName) +} + +func (cm *CacheManager) UpdateThickPVC(lvgName string, pvc *v1.PersistentVolumeClaim, provisioner string) error { + cm.locker.Lock() + defer func() { + cm.isUpdated = true + cm.locker.Unlock() + }() + + return cm.cache.UpdateThickPVC(lvgName, pvc, provisioner) +} + +func (cm *CacheManager) AddThickPVC(lvgName string, pvc *v1.PersistentVolumeClaim, provisioner string) error { + cm.locker.Lock() + defer func() { + cm.isUpdated = true + cm.locker.Unlock() + }() + + return cm.cache.AddThickPVC(lvgName, pvc, provisioner) +} + +func (cm *CacheManager) UpdateThinPVC(lvgName, thinPoolName string, pvc *v1.PersistentVolumeClaim, provisioner string) error { + cm.locker.Lock() + defer func() { + cm.isUpdated = true + cm.locker.Unlock() + }() + + return cm.cache.UpdateThinPVC(lvgName, thinPoolName, pvc, provisioner) +} + +func (cm *CacheManager) PrintTheCacheLog() { + cm.locker.Lock() + defer cm.locker.Unlock() + + cm.log.Cache("*******************CACHE BEGIN*******************") + cm.log.Cache("[LVMVolumeGroups BEGIN]") + for lvgName, lvgCh := range cm.cache.storage.Lvgs { + cm.log.Cache(fmt.Sprintf("[%s]", lvgName)) + + for pvcName, pvcCh := range lvgCh.ThickPVCs { + cm.log.Cache(fmt.Sprintf(" THICK PVC %s, selected node: %s", pvcName, pvcCh.SelectedNode)) + } + + for thinPoolName, thinPool := range lvgCh.ThinPools { + for pvcName, pvcCh := range thinPool { + cm.log.Cache(fmt.Sprintf(" THIN POOL %s PVC %s, selected node: %s", thinPoolName, pvcName, pvcCh.SelectedNode)) + } + } + } + cm.log.Cache("[LVMVolumeGroups ENDS]") + + cm.log.Cache("[PVC and LVG BEGINS]") + for pvcName, lvgs := range cm.cache.storage.PvcLVGs { + cm.log.Cache(fmt.Sprintf("[PVC: %s]", pvcName)) + + for _, lvgName := range lvgs { + cm.log.Cache(fmt.Sprintf(" LVMVolumeGroup: %s", lvgName)) + } + } + cm.log.Cache("[PVC and LVG ENDS]") + + cm.log.Cache("[Node and LVG BEGINS]") + for nodeName, lvgs := range cm.cache.storage.NodeLVGs { + cm.log.Cache(fmt.Sprintf("[Node: %s]", nodeName)) + + for _, lvgName := range lvgs { + cm.log.Cache(fmt.Sprintf(" LVMVolumeGroup name: %s", lvgName)) + } + } + cm.log.Cache("[Node and LVG ENDS]") + + cm.log.Cache("*******************CACHE END*******************") +} + +func (cm *CacheManager) TryGetLVG(name string) *snc.LVMVolumeGroup { + cm.locker.Lock() + defer cm.locker.Unlock() + + return cm.cache.TryGetLVG(name) +} + +func (cm *CacheManager) UpdateLVG(lvg *snc.LVMVolumeGroup) error { + cm.locker.Lock() + defer func() { + cm.isUpdated = true + cm.locker.Unlock() + }() + + return cm.cache.UpdateLVG(lvg) +} + +func (cm *CacheManager) AddLVG(lvg *snc.LVMVolumeGroup) { + cm.locker.Lock() + defer func() { + cm.isUpdated = true + cm.locker.Unlock() + }() + + cm.cache.AddLVG(lvg) +} + +func (cm *CacheManager) DeleteLVG(lvgName string) { + cm.locker.Lock() + defer func() { + cm.isUpdated = true + cm.locker.Unlock() + }() + + cm.cache.DeleteLVG(lvgName) +} + +func (cm *CacheManager) GetAllPVCForLVG(lvgName string) ([]*v1.PersistentVolumeClaim, error) { + cm.locker.Lock() + defer cm.locker.Unlock() + + return cm.cache.GetAllPVCForLVG(lvgName) +} + +func (cm *CacheManager) RemoveSpaceReservationForPVCWithSelectedNode(pvc *v1.PersistentVolumeClaim, deviceType string) error { + cm.locker.Lock() + defer func() { + cm.isUpdated = true + cm.locker.Unlock() + }() + + return cm.cache.RemoveSpaceReservationForPVCWithSelectedNode(pvc, deviceType) +} + +func (cm *CacheManager) AddLVGToPVC(lvgName, pvcKey string) { + cm.locker.Lock() + defer func() { + cm.isUpdated = true + cm.locker.Unlock() + }() + + cm.cache.AddLVGToPVC(lvgName, pvcKey) +} diff --git a/images/sds-common-scheduler-extender/pkg/consts/consts.go b/images/sds-common-scheduler-extender/pkg/consts/consts.go new file mode 100644 index 000000000..2cfa3fd93 --- /dev/null +++ b/images/sds-common-scheduler-extender/pkg/consts/consts.go @@ -0,0 +1,12 @@ +package consts + +const ( + SdsReplicatedVolumeProvisioner = "replicated.csi.storage.deckhouse.io" + SdsLocalVolumeProvisioner = "local.csi.storage.deckhouse.io" + + LvmTypeParamKey = "csi.storage.deckhouse.io/lvm-type" + LVMVolumeGroupsParamKey = "csi.storage.deckhouse.io/lvm-volume-groups" + + Thick = "Thick" + Thin = "Thin" +) diff --git a/images/sds-common-scheduler-extender/pkg/controller/layer_resource_ids_watcher.go b/images/sds-common-scheduler-extender/pkg/controller/layer_resource_ids_watcher.go new file mode 100644 index 000000000..f94ddb8f7 --- /dev/null +++ b/images/sds-common-scheduler-extender/pkg/controller/layer_resource_ids_watcher.go @@ -0,0 +1,46 @@ +package controller + +import ( + "context" + "fmt" + + lapi "github.com/deckhouse/sds-replicated-volume/api/linstor" + "k8s.io/client-go/util/workqueue" + + "github.com/deckhouse/sds-node-configurator/images/sds-common-scheduler-extender/pkg/logger" + "sigs.k8s.io/controller-runtime/pkg/controller" + "sigs.k8s.io/controller-runtime/pkg/event" + "sigs.k8s.io/controller-runtime/pkg/handler" + "sigs.k8s.io/controller-runtime/pkg/manager" + "sigs.k8s.io/controller-runtime/pkg/reconcile" + "sigs.k8s.io/controller-runtime/pkg/source" +) + +const ( + LVGLayerResourceIDsWatcherName = "layer-resource-ids-watcher" +) + +func RunLayerResourceIDsWatcher( + mgr manager.Manager, + log *logger.Logger, +) error { + log.Info("[RunLayerResourceIDsWatcher] starts the work") + + c, err := controller.New(LVGLayerResourceIDsWatcherName, mgr, controller.Options{ + Reconciler: reconcile.Func(func(_ context.Context, _ reconcile.Request) (reconcile.Result, error) { + return reconcile.Result{}, nil + }), + }) + if err != nil { + log.Error(err, "[RunLayerResourceIDsWatcher] unable to create a controller") + return err + } + + err = c.Watch(source.Kind(mgr.GetCache(), &lapi.LayerResourceIds{}, handler.TypedFuncs[*lapi.LayerResourceIds, reconcile.Request]{ + CreateFunc: func(_ context.Context, e event.TypedCreateEvent[*lapi.LayerResourceIds], _ workqueue.TypedRateLimitingInterface[reconcile.Request]) { + log.Info(fmt.Sprintf("[RunLayerResourceIDsWatcher] res id created %s", e.Object.GetName())) + }, + })) + + return nil +} diff --git a/images/sds-common-scheduler-extender/pkg/controller/lvg_watcher_cache.go b/images/sds-common-scheduler-extender/pkg/controller/lvg_watcher_cache.go new file mode 100644 index 000000000..ea89b8c7c --- /dev/null +++ b/images/sds-common-scheduler-extender/pkg/controller/lvg_watcher_cache.go @@ -0,0 +1,156 @@ +package controller + +import ( + "context" + "fmt" + "reflect" + + snc "github.com/deckhouse/sds-node-configurator/api/v1alpha1" + v1 "k8s.io/api/core/v1" + "k8s.io/client-go/util/workqueue" + "sigs.k8s.io/controller-runtime/pkg/controller" + "sigs.k8s.io/controller-runtime/pkg/event" + "sigs.k8s.io/controller-runtime/pkg/handler" + "sigs.k8s.io/controller-runtime/pkg/manager" + "sigs.k8s.io/controller-runtime/pkg/reconcile" + "sigs.k8s.io/controller-runtime/pkg/source" + + "github.com/deckhouse/sds-node-configurator/images/sds-common-scheduler-extender/pkg/cache" + "github.com/deckhouse/sds-node-configurator/images/sds-common-scheduler-extender/pkg/logger" +) + +const ( + LVGWatcherCacheCtrlName = "lvg-watcher-cache-controller" +) + +func RunLVGWatcherCacheController( + mgr manager.Manager, + log *logger.Logger, + cacheMgr *cache.CacheManager, +) error { + log.Info("[RunLVGWatcherCacheController] starts the work") + + c, err := controller.New(LVGWatcherCacheCtrlName, mgr, controller.Options{ + Reconciler: reconcile.Func(func(_ context.Context, _ reconcile.Request) (reconcile.Result, error) { + return reconcile.Result{}, nil + }), + }) + if err != nil { + log.Error(err, "[RunCacheWatcherController] unable to create a controller") + return err + } + + err = c.Watch(source.Kind(mgr.GetCache(), &snc.LVMVolumeGroup{}, handler.TypedFuncs[*snc.LVMVolumeGroup, reconcile.Request]{ + CreateFunc: func(_ context.Context, e event.TypedCreateEvent[*snc.LVMVolumeGroup], _ workqueue.TypedRateLimitingInterface[reconcile.Request]) { + log.Info(fmt.Sprintf("[RunLVGWatcherCacheController] CreateFunc starts the cache reconciliation for the LVMVolumeGroup %s", e.Object.GetName())) + + lvg := e.Object + if lvg.DeletionTimestamp != nil { + log.Info(fmt.Sprintf("[RunLVGWatcherCacheController] the LVMVolumeGroup %s should not be reconciled", lvg.Name)) + return + } + + log.Debug(fmt.Sprintf("[RunLVGWatcherCacheController] tries to get the LVMVolumeGroup %s from the cache", lvg.Name)) + existedLVG := cacheMgr.TryGetLVG(lvg.Name) + if existedLVG != nil { + log.Debug(fmt.Sprintf("[RunLVGWatcherCacheController] the LVMVolumeGroup %s was found in the cache. It will be updated", lvg.Name)) + err := cacheMgr.UpdateLVG(lvg) + if err != nil { + log.Error(err, fmt.Sprintf("[RunLVGWatcherCacheController] unable to update the LVMVolumeGroup %s in the cache", lvg.Name)) + } else { + log.Info(fmt.Sprintf("[RunLVGWatcherCacheController] cache was updated for the LVMVolumeGroup %s", lvg.Name)) + } + } else { + log.Debug(fmt.Sprintf("[RunLVGWatcherCacheController] the LVMVolumeGroup %s was not found. It will be added to the cache", lvg.Name)) + cacheMgr.AddLVG(lvg) + log.Info(fmt.Sprintf("[RunLVGWatcherCacheController] cache was added for the LVMVolumeGroup %s", lvg.Name)) + } + + log.Debug(fmt.Sprintf("[RunLVGWatcherCacheController] starts to clear the cache for the LVMVolumeGroup %s", lvg.Name)) + pvcs, err := cacheMgr.GetAllPVCForLVG(lvg.Name) + if err != nil { + log.Error(err, fmt.Sprintf("[RunLVGWatcherCacheController] unable to get all PVC for the LVMVolumeGroup %s", lvg.Name)) + return + } + + for _, pvc := range pvcs { + log.Trace(fmt.Sprintf("[RunLVGWatcherCacheController] cached PVC %s/%s belongs to LVMVolumeGroup %s", pvc.Namespace, pvc.Name, lvg.Name)) + log.Trace(fmt.Sprintf("[RunLVGWatcherCacheController] PVC %s/%s has status phase %s", pvc.Namespace, pvc.Name, pvc.Status.Phase)) + if pvc.Status.Phase == v1.ClaimBound { + log.Trace(fmt.Sprintf("[RunLVGWatcherCacheController] cached PVC %s/%s has Status.Phase Bound. It will be removed from the cache for LVMVolumeGroup %s", pvc.Namespace, pvc.Name, lvg.Name)) + cacheMgr.RemovePVCFromTheCache(pvc) + + log.Debug(fmt.Sprintf("[RunLVGWatcherCacheController] PVC %s/%s was removed from the cache for LVMVolumeGroup %s", pvc.Namespace, pvc.Name, lvg.Name)) + } + } + + log.Info(fmt.Sprintf("[RunLVGWatcherCacheController] cache for the LVMVolumeGroup %s was reconciled by CreateFunc", lvg.Name)) + }, + UpdateFunc: func(_ context.Context, e event.TypedUpdateEvent[*snc.LVMVolumeGroup], _ workqueue.TypedRateLimitingInterface[reconcile.Request]) { + log.Info(fmt.Sprintf("[RunCacheWatcherController] UpdateFunc starts the cache reconciliation for the LVMVolumeGroup %s", e.ObjectNew.GetName())) + oldLvg := e.ObjectOld + newLvg := e.ObjectNew + err := cacheMgr.UpdateLVG(newLvg) + if err != nil { + log.Error(err, fmt.Sprintf("[RunLVGWatcherCacheController] unable to update the LVMVolumeGroup %s cache", newLvg.Name)) + return + } + log.Debug(fmt.Sprintf("[RunLVGWatcherCacheController] successfully updated the LVMVolumeGroup %s in the cache", newLvg.Name)) + + log.Debug(fmt.Sprintf("[RunLVGWatcherCacheController] starts to calculate the size difference for LVMVolumeGroup %s", newLvg.Name)) + log.Trace(fmt.Sprintf("[RunLVGWatcherCacheController] old state LVMVolumeGroup %s has size %s", oldLvg.Name, oldLvg.Status.AllocatedSize.String())) + log.Trace(fmt.Sprintf("[RunLVGWatcherCacheController] new state LVMVolumeGroup %s has size %s", newLvg.Name, newLvg.Status.AllocatedSize.String())) + + if !shouldReconcileLVG(oldLvg, newLvg) { + log.Debug(fmt.Sprintf("[RunLVGWatcherCacheController] the LVMVolumeGroup %s should not be reconciled", newLvg.Name)) + return + } + log.Debug(fmt.Sprintf("[RunLVGWatcherCacheController] the LVMVolumeGroup %s should be reconciled by Update Func", newLvg.Name)) + + cachedPVCs, err := cacheMgr.GetAllPVCForLVG(newLvg.Name) + if err != nil { + log.Error(err, fmt.Sprintf("[RunLVGWatcherCacheController] unable to get all PVC for the LVMVolumeGroup %s", newLvg.Name)) + return + } + + for _, pvc := range cachedPVCs { + log.Trace(fmt.Sprintf("[RunLVGWatcherCacheController] PVC %s/%s from the cache belongs to LVMVolumeGroup %s", pvc.Namespace, pvc.Name, newLvg.Name)) + log.Trace(fmt.Sprintf("[RunLVGWatcherCacheController] PVC %s/%s has status phase %s", pvc.Namespace, pvc.Name, pvc.Status.Phase)) + if pvc.Status.Phase == v1.ClaimBound { + log.Debug(fmt.Sprintf("[RunLVGWatcherCacheController] PVC %s/%s from the cache has Status.Phase Bound. It will be removed from the reserved space in the LVMVolumeGroup %s", pvc.Namespace, pvc.Name, newLvg.Name)) + cacheMgr.RemovePVCFromTheCache(pvc) + log.Debug(fmt.Sprintf("[RunLVGWatcherCacheController] PVC %s/%s was removed from the LVMVolumeGroup %s in the cache", pvc.Namespace, pvc.Name, newLvg.Name)) + } + } + + log.Debug(fmt.Sprintf("[RunLVGWatcherCacheController] Update Func ends reconciliation the LVMVolumeGroup %s cache", newLvg.Name)) + }, + DeleteFunc: func(_ context.Context, e event.TypedDeleteEvent[*snc.LVMVolumeGroup], _ workqueue.TypedRateLimitingInterface[reconcile.Request]) { + log.Info(fmt.Sprintf("[RunCacheWatcherController] DeleteFunc starts the cache reconciliation for the LVMVolumeGroup %s", e.Object.GetName())) + lvg := e.Object + cacheMgr.DeleteLVG(lvg.Name) + log.Debug(fmt.Sprintf("[RunLVGWatcherCacheController] LVMVolumeGroup %s was deleted from the cache", lvg.Name)) + }, + }, + ), + ) + if err != nil { + log.Error(err, "[RunCacheWatcherController] unable to watch the events") + return err + } + + return nil +} + +func shouldReconcileLVG(oldLVG, newLVG *snc.LVMVolumeGroup) bool { + if newLVG.DeletionTimestamp != nil { + return false + } + + if oldLVG.Status.AllocatedSize.Value() == newLVG.Status.AllocatedSize.Value() && + reflect.DeepEqual(oldLVG.Status.ThinPools, newLVG.Status.ThinPools) { + return false + } + + return true +} diff --git a/images/sds-common-scheduler-extender/pkg/controller/pvc_watcher_cache.go b/images/sds-common-scheduler-extender/pkg/controller/pvc_watcher_cache.go new file mode 100644 index 000000000..6b876d547 --- /dev/null +++ b/images/sds-common-scheduler-extender/pkg/controller/pvc_watcher_cache.go @@ -0,0 +1,210 @@ +package controller + +import ( + "context" + "errors" + "fmt" + "slices" + + "github.com/deckhouse/sds-node-configurator/images/sds-common-scheduler-extender/pkg/cache" + "github.com/deckhouse/sds-node-configurator/images/sds-common-scheduler-extender/pkg/consts" + "github.com/deckhouse/sds-node-configurator/images/sds-common-scheduler-extender/pkg/logger" + "github.com/deckhouse/sds-node-configurator/images/sds-common-scheduler-extender/pkg/scheduler" + + slv "github.com/deckhouse/sds-local-volume/api/v1alpha1" + v1 "k8s.io/api/core/v1" + storagev1 "k8s.io/api/storage/v1" + k8serrors "k8s.io/apimachinery/pkg/api/errors" + "k8s.io/client-go/util/workqueue" + "sigs.k8s.io/controller-runtime/pkg/client" + "sigs.k8s.io/controller-runtime/pkg/controller" + "sigs.k8s.io/controller-runtime/pkg/event" + "sigs.k8s.io/controller-runtime/pkg/handler" + "sigs.k8s.io/controller-runtime/pkg/manager" + "sigs.k8s.io/controller-runtime/pkg/reconcile" + "sigs.k8s.io/controller-runtime/pkg/source" +) + +const ( + PVCWatcherCacheCtrlName = "pvc-watcher-cache-controller" +) + +func RunPVCWatcherCacheController( + mgr manager.Manager, + log *logger.Logger, + cacheMgr *cache.CacheManager, +) error { + log.Info("[RunPVCWatcherCacheController] starts the work") + + c, err := controller.New("pvc-watcher-cache-controller", mgr, controller.Options{ + Reconciler: reconcile.Func(func(_ context.Context, _ reconcile.Request) (reconcile.Result, error) { + return reconcile.Result{}, nil + }), + }) + if err != nil { + log.Error(err, "[RunPVCWatcherCacheController] unable to create controller") + return err + } + + err = c.Watch(source.Kind(mgr.GetCache(), &v1.PersistentVolumeClaim{}, handler.TypedFuncs[*v1.PersistentVolumeClaim, reconcile.Request]{ + CreateFunc: func(ctx context.Context, e event.TypedCreateEvent[*v1.PersistentVolumeClaim], _ workqueue.TypedRateLimitingInterface[reconcile.Request]) { + log.Info("[RunPVCWatcherCacheController] CreateFunc reconciliation starts") + pvc := e.Object + log.Debug(fmt.Sprintf("[RunPVCWatcherCacheController] CreateFunc starts the reconciliation for the PVC %s/%s", pvc.Namespace, pvc.Name)) + + if pvc.Annotations == nil { + log.Debug(fmt.Sprintf("[RunPVCWatcherCacheController] PVC %s/%s should not be reconciled by CreateFunc due to annotations is nil", pvc.Namespace, pvc.Name)) + return + } + + selectedNodeName, wasSelected := pvc.Annotations[cache.SelectedNodeAnnotation] + if !wasSelected || pvc.Status.Phase == v1.ClaimBound || pvc.DeletionTimestamp != nil { + log.Debug(fmt.Sprintf("[RunPVCWatcherCacheController] PVC %s/%s should not be reconciled by CreateFunc due to no selected node annotation found or deletion timestamp is not nil", pvc.Namespace, pvc.Name)) + return + } + log.Debug(fmt.Sprintf("[RunPVCWatcherCacheController] PVC %s/%s has selected node annotation, it will be reconciled in CreateFunc", pvc.Namespace, pvc.Name)) + log.Trace(fmt.Sprintf("[RunPVCWatcherCacheController] PVC %s/%s has been selected to the node %s", pvc.Namespace, pvc.Name, selectedNodeName)) + + reconcilePVC(ctx, mgr, log, cacheMgr, pvc, selectedNodeName) + log.Info("[RunPVCWatcherCacheController] CreateFunc reconciliation ends") + }, + UpdateFunc: func(ctx context.Context, e event.TypedUpdateEvent[*v1.PersistentVolumeClaim], _ workqueue.TypedRateLimitingInterface[reconcile.Request]) { + log.Info("[RunPVCWatcherCacheController] Update Func reconciliation starts") + pvc := e.ObjectNew + log.Debug(fmt.Sprintf("[RunPVCWatcherCacheController] UpdateFunc starts the reconciliation for the PVC %s/%s", pvc.Namespace, pvc.Name)) + + if pvc.Annotations == nil { + log.Debug(fmt.Sprintf("[RunPVCWatcherCacheController] PVC %s/%s should not be reconciled by UpdateFunc due to annotations is nil", pvc.Namespace, pvc.Name)) + return + } + + selectedNodeName, wasSelected := pvc.Annotations[cache.SelectedNodeAnnotation] + if !wasSelected || pvc.DeletionTimestamp != nil { + log.Debug(fmt.Sprintf("[RunPVCWatcherCacheController] PVC %s/%s should not be reconciled by UpdateFunc due to no selected node annotation found or deletion timestamp is not nil", pvc.Namespace, pvc.Name)) + return + } + log.Debug(fmt.Sprintf("[RunPVCWatcherCacheController] PVC %s/%s has selected node annotation, it will be reconciled in UpdateFunc", pvc.Namespace, pvc.Name)) + log.Trace(fmt.Sprintf("[RunPVCWatcherCacheController] PVC %s/%s has been selected to the node %s", pvc.Namespace, pvc.Name, selectedNodeName)) + + reconcilePVC(ctx, mgr, log, cacheMgr, pvc, selectedNodeName) + log.Info("[RunPVCWatcherCacheController] Update Func reconciliation ends") + }, + DeleteFunc: func(_ context.Context, e event.TypedDeleteEvent[*v1.PersistentVolumeClaim], _ workqueue.TypedRateLimitingInterface[reconcile.Request]) { + log.Info("[RunPVCWatcherCacheController] Delete Func reconciliation starts") + pvc := e.Object + log.Debug(fmt.Sprintf("[RunPVCWatcherCacheController] DeleteFunc starts the reconciliation for the PVC %s/%s", pvc.Namespace, pvc.Name)) + + log.Debug(fmt.Sprintf("[RunPVCWatcherCacheController] PVC %s/%s was removed from the cluster. It will be fully removed from the cache", pvc.Namespace, pvc.Name)) + cacheMgr.RemovePVCFromTheCache(pvc) + log.Debug(fmt.Sprintf("[RunPVCWatcherCacheController] successfully fully removed PVC %s/%s from the cache", pvc.Namespace, pvc.Name)) + }, + }, + ), + ) + if err != nil { + log.Error(err, "[RunPVCWatcherCacheController] unable to controller Watch") + return err + } + + return nil +} + +func reconcilePVC(ctx context.Context, mgr manager.Manager, log *logger.Logger, cacheMgr *cache.CacheManager, pvc *v1.PersistentVolumeClaim, selectedNodeName string) { + sc := &storagev1.StorageClass{} + err := mgr.GetClient().Get(ctx, client.ObjectKey{ + Name: *pvc.Spec.StorageClassName, + }, sc) + if err != nil { + log.Error(err, fmt.Sprintf("[reconcilePVC] unable to get Storage Class %s for PVC %s/%s", *pvc.Spec.StorageClassName, pvc.Namespace, pvc.Name)) + return + } + + if sc.Provisioner != consts.SdsReplicatedVolumeProvisioner { + log.Debug(fmt.Sprintf("[reconcilePVC] Storage Class %s for PVC %s/%s is not managed by sds-replicated-volume-provisioner. Ends the reconciliation", sc.Name, pvc.Namespace, pvc.Name)) + return + } + + log.Debug(fmt.Sprintf("[reconcilePVC] tries to extract LVGs from the Storage Class %s for PVC %s/%s", sc.Name, pvc.Namespace, pvc.Name)) + lvgsFromSc, err := scheduler.ExtractLVGsFromSC(sc) + if err != nil { + log.Error(err, fmt.Sprintf("[reconcilePVC] unable to extract LVMVolumeGroups from the Storage Class %s", sc.Name)) + } + log.Debug(fmt.Sprintf("[reconcilePVC] successfully extracted LVGs from the Storage Class %s for PVC %s/%s", sc.Name, pvc.Namespace, pvc.Name)) + + lvgsForPVC := cacheMgr.GetLVGNamesForPVC(pvc) + if len(lvgsForPVC) == 0 { + log.Debug(fmt.Sprintf("[reconcilePVC] no LVMVolumeGroups were found in the cache for PVC %s/%s. Use Storage Class %s instead", pvc.Namespace, pvc.Name, *pvc.Spec.StorageClassName)) + + for _, lvg := range lvgsFromSc { + lvgsForPVC = append(lvgsForPVC, lvg.Name) + } + } + for _, lvgName := range lvgsForPVC { + log.Trace(fmt.Sprintf("[reconcilePVC] LVMVolumeGroup %s belongs to PVC %s/%s", lvgName, pvc.Namespace, pvc.Name)) + } + + log.Debug(fmt.Sprintf("[reconcilePVC] starts to find common LVMVolumeGroup for the selected node %s and PVC %s/%s", selectedNodeName, pvc.Namespace, pvc.Name)) + lvgsOnTheNode := cacheMgr.GetLVGNamesByNodeName(selectedNodeName) + for _, lvgName := range lvgsOnTheNode { + log.Trace(fmt.Sprintf("[reconcilePVC] LVMVolumeGroup %s belongs to the node %s", lvgName, selectedNodeName)) + } + + var commonLVGName string + for _, pvcLvg := range lvgsForPVC { + if slices.Contains(lvgsOnTheNode, pvcLvg) { + commonLVGName = pvcLvg + break + } + } + if commonLVGName == "" { + log.Error(errors.New("common LVMVolumeGroup was not found"), fmt.Sprintf("[reconcilePVC] unable to identify a LVMVolumeGroup for PVC %s/%s", pvc.Namespace, pvc.Name)) + return + } + + log.Debug(fmt.Sprintf("[reconcilePVC] successfully found common LVMVolumeGroup %s for the selected node %s and PVC %s/%s", commonLVGName, selectedNodeName, pvc.Namespace, pvc.Name)) + log.Debug(fmt.Sprintf("[reconcilePVC] starts to update PVC %s/%s in the cache", pvc.Namespace, pvc.Name)) + + log.Trace(fmt.Sprintf("[reconcilePVC] %s PVC %s/%s has status phase: %s", sc.Parameters[consts.LvmTypeParamKey], pvc.Namespace, pvc.Name, pvc.Status.Phase)) + switch sc.Parameters[consts.LvmTypeParamKey] { + case consts.Thick: + err = cacheMgr.UpdateThickPVC(commonLVGName, pvc, sc.Provisioner) + if err != nil { + log.Error(err, fmt.Sprintf("[reconcilePVC] unable to update Thick PVC %s/%s in the cache", pvc.Namespace, pvc.Name)) + return + } + case consts.Thin: + for _, lvg := range lvgsFromSc { + if lvg.Name == commonLVGName { + err = cacheMgr.UpdateThinPVC(commonLVGName, lvg.Thin.PoolName, pvc, sc.Provisioner) + if err != nil { + log.Error(err, fmt.Sprintf("[reconcilePVC] unable to update Thin PVC %s/%s in the cache", pvc.Namespace, pvc.Name)) + return + } + break + } + } + } + log.Debug(fmt.Sprintf("[reconcilePVC] successfully updated %s PVC %s/%s in the cache", sc.Parameters[consts.LvmTypeParamKey], pvc.Namespace, pvc.Name)) + + log.Cache(fmt.Sprintf("[reconcilePVC] cache state BEFORE the removal space reservation for PVC %s/%s", pvc.Namespace, pvc.Name)) + cacheMgr.PrintTheCacheLog() + log.Debug(fmt.Sprintf("[reconcilePVC] starts to remove space reservation for PVC %s/%s with selected node from the cache", pvc.Namespace, pvc.Name)) + + err = mgr.GetClient().Get(ctx, client.ObjectKey{Name: sc.Name, Namespace: sc.Namespace}, &slv.LocalStorageClass{}) + if err != nil { + // Space reservation removal when node is selected now happens in sds-local-volume-only + if !k8serrors.IsNotFound(err) { + removalErr := cacheMgr.RemoveSpaceReservationForPVCWithSelectedNode(pvc, sc.Parameters[consts.LvmTypeParamKey]) + if removalErr != nil { + log.Error(removalErr, fmt.Sprintf("[reconcilePVC] unable to remove PVC %s/%s space reservation in the cache", pvc.Namespace, pvc.Name)) + } + } else { + log.Error(err, fmt.Sprintf("[reconcilePVC] unable to get LocalStorageClass %s/%s", sc.Namespace, sc.Name)) + return + } + } + log.Debug(fmt.Sprintf("[reconcilePVC] successfully removed space reservation for PVC %s/%s with selected node", pvc.Namespace, pvc.Name)) + + log.Cache(fmt.Sprintf("[reconcilePVC] cache state AFTER the removal space reservation for PVC %s/%s", pvc.Namespace, pvc.Name)) + cacheMgr.PrintTheCacheLog() +} diff --git a/images/sds-common-scheduler-extender/pkg/kubutils/kubernetes.go b/images/sds-common-scheduler-extender/pkg/kubutils/kubernetes.go new file mode 100644 index 000000000..0e0a69d22 --- /dev/null +++ b/images/sds-common-scheduler-extender/pkg/kubutils/kubernetes.go @@ -0,0 +1,35 @@ +/* +Copyright 2024 Flant JSC +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + http://www.apache.org/licenses/LICENSE-2.0 +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package kubutils + +import ( + "fmt" + + "k8s.io/client-go/rest" + "k8s.io/client-go/tools/clientcmd" +) + +func KubernetesDefaultConfigCreate() (*rest.Config, error) { + clientConfig := clientcmd.NewNonInteractiveDeferredLoadingClientConfig( + clientcmd.NewDefaultClientConfigLoadingRules(), + &clientcmd.ConfigOverrides{}, + ) + + // Get a config to talk to API server + config, err := clientConfig.ClientConfig() + if err != nil { + return nil, fmt.Errorf("config kubernetes error %w", err) + } + return config, nil +} diff --git a/images/sds-common-scheduler-extender/pkg/logger/logger.go b/images/sds-common-scheduler-extender/pkg/logger/logger.go new file mode 100644 index 000000000..455c03955 --- /dev/null +++ b/images/sds-common-scheduler-extender/pkg/logger/logger.go @@ -0,0 +1,84 @@ +/* +Copyright 2024 Flant JSC +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + http://www.apache.org/licenses/LICENSE-2.0 +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package logger + +import ( + "fmt" + "strconv" + + "github.com/go-logr/logr" + "k8s.io/klog/v2/textlogger" +) + +type Verbosity string + +const ( + ErrorLevel Verbosity = "0" + WarningLevel Verbosity = "1" + InfoLevel Verbosity = "2" + DebugLevel Verbosity = "3" + TraceLevel Verbosity = "4" + CacheLevel Verbosity = "5" +) + +const ( + warnLvl = iota + 1 + infoLvl + debugLvl + traceLvl + cacheLvl +) + +type Logger struct { + log logr.Logger +} + +func NewLogger(level Verbosity) (*Logger, error) { + v, err := strconv.Atoi(string(level)) + if err != nil { + return nil, err + } + + log := textlogger.NewLogger(textlogger.NewConfig(textlogger.Verbosity(v))).WithCallDepth(1) + + return &Logger{log: log}, nil +} + +func (l Logger) GetLogger() logr.Logger { + return l.log +} + +func (l Logger) Error(err error, message string, keysAndValues ...interface{}) { + l.log.Error(err, fmt.Sprintf("ERROR %s", message), keysAndValues...) +} + +func (l Logger) Warning(message string, keysAndValues ...interface{}) { + l.log.V(warnLvl).Info(fmt.Sprintf("WARNING %s", message), keysAndValues...) +} + +func (l Logger) Info(message string, keysAndValues ...interface{}) { + l.log.V(infoLvl).Info(fmt.Sprintf("INFO %s", message), keysAndValues...) +} + +func (l Logger) Debug(message string, keysAndValues ...interface{}) { + l.log.V(debugLvl).Info(fmt.Sprintf("DEBUG %s", message), keysAndValues...) +} + +func (l Logger) Trace(message string, keysAndValues ...interface{}) { + l.log.V(traceLvl).Info(fmt.Sprintf("TRACE %s", message), keysAndValues...) +} + +func (l Logger) Cache(message string, keysAndValues ...interface{}) { + l.log.V(cacheLvl).Info(fmt.Sprintf("CACHE %s", message), keysAndValues...) +} diff --git a/images/sds-common-scheduler-extender/pkg/scheduler/filter.go b/images/sds-common-scheduler-extender/pkg/scheduler/filter.go new file mode 100644 index 000000000..bb1ba13ab --- /dev/null +++ b/images/sds-common-scheduler-extender/pkg/scheduler/filter.go @@ -0,0 +1,259 @@ +package scheduler + +import ( + "encoding/json" + "errors" + "fmt" + "strings" + "sync" + + snc "github.com/deckhouse/sds-node-configurator/api/v1alpha1" + "github.com/deckhouse/sds-node-configurator/images/sds-common-scheduler-extender/pkg/logger" + srv2 "github.com/deckhouse/sds-replicated-volume/api/v1alpha2" + corev1 "k8s.io/api/core/v1" +) + +// Filter processes the filtering logic for a given request. +func (s *scheduler) Filter(inputData ExtenderArgs) (*ExtenderFilterResult, error) { + nodeNames, err := getNodeNames(inputData, s.log) + if err != nil { + return nil, fmt.Errorf("unable to get node names: %w", err) + } + + s.log.Debug(fmt.Sprintf("[filter] filtering for Pod %s/%s", inputData.Pod.Namespace, inputData.Pod.Name)) + s.log.Trace(fmt.Sprintf("[filter] Pod: %+v, Nodes: %+v", inputData.Pod, nodeNames)) + + input, err := s.collectFilterInput(inputData.Pod, nodeNames) + if err != nil { + return nil, err + } + + return s.filterNodes(input, s.log) +} + +// collectFilterInput gathers all necessary data for filtering. +func (s *scheduler) collectFilterInput(pod *corev1.Pod, nodeNames []string) (*FilterInput, error) { + podRelatedPVCs, err := getPodRelatedPVCs(s.ctx, s.client, s.log, pod) + if err != nil { + return nil, fmt.Errorf("unable to get PVCs for Pod %s/%s: %w", pod.Name, pod.Namespace, err) + } + if len(podRelatedPVCs) == 0 { + return nil, errors.New("no PVCs found for Pod") + } + + scsUsedByPodPVCs, err := getStorageClassesUsedByPVCs(s.ctx, s.client, podRelatedPVCs) + if err != nil { + return nil, fmt.Errorf("unable to get StorageClasses: %w", err) + } + + replicatedPVCs, localPVCs := filterPVCsByProvisioner(s.log, podRelatedPVCs, scsUsedByPodPVCs) + if len(replicatedPVCs) == 0 && len(localPVCs) == 0 { + s.log.Warning(fmt.Sprintf("[filter] Pod %s/%s uses unmanaged PVCs. replicatedPVCs length %d, localPVCs length %d", pod.Namespace, pod.Name, len(replicatedPVCs), len(localPVCs))) + return nil, errors.New("no managed PVCs found") + } + + pvMap, err := getPersistentVolumes(s.ctx, s.client, s.log) + if err != nil { + return nil, fmt.Errorf("unable to get PersistentVolumes: %w", err) + } + + replicatedAndLocalPVCs := make(map[string]*corev1.PersistentVolumeClaim, len(replicatedPVCs)+len(localPVCs)) + for name, pvc := range replicatedPVCs { + replicatedAndLocalPVCs[name] = pvc + } + for name, pvc := range localPVCs { + replicatedAndLocalPVCs[name] = pvc + } + + pvcSizeRequests, err := extractRequestedSize(s.log, replicatedAndLocalPVCs, scsUsedByPodPVCs, pvMap) + if err != nil { + return nil, fmt.Errorf("unable to extract PVC request sizes: %w", err) + } + + replicatedSCSUsedByPodPVCs, localSCSUsedByPodPVCs, err := getRSCByCS(s.ctx, s.client, scsUsedByPodPVCs, s.log) + if err != nil { + return nil, fmt.Errorf("unable to filter replicated StorageClasses: %w", err) + } + + drbdReplicaList, err := getDRBDReplicaList(s.ctx, s.client) + if err != nil { + return nil, fmt.Errorf("unable to list DRBD replicas: %w", err) + } + + drbdReplicaMap := make(map[string]*srv2.DRBDResourceReplica, len(drbdReplicaList.Items)) + for _, replica := range drbdReplicaList.Items { + drbdReplicaMap[replica.Name] = &replica + } + b, _ := json.MarshalIndent(drbdReplicaMap, "", " ") + fmt.Printf("[collectFilterInput] drbdReplicaMap %+v\n", string(b)) + + drbdNodesMap, err := getDRBDNodesMap(s.ctx, s.client, s.log) + if err != nil { + return nil, fmt.Errorf("unable to get DRBD nodes map: %w", err) + } + return &FilterInput{ + Pod: pod, + NodeNames: nodeNames, + ReplicatedProvisionPVCs: replicatedPVCs, + LocalProvisionPVCs: localPVCs, + SCSUsedByPodPVCs: scsUsedByPodPVCs, + PVCSizeRequests: pvcSizeRequests, + ReplicatedSCSUsedByPodPVCs: replicatedSCSUsedByPodPVCs, + LocalSCSUsedByPodPVCs: localSCSUsedByPodPVCs, + DRBDNodesMap: drbdNodesMap, + DRBDResourceReplicaMap: drbdReplicaMap, + }, nil +} + +func (s *scheduler) filterNodes(input *FilterInput, log *logger.Logger) (*ExtenderFilterResult, error) { + log.Debug("[filterNodes] filtering nodes", "nodes", input.NodeNames) + + lvgInfo, err := collectLVGInfo(s, input.SCSUsedByPodPVCs) + if err != nil { + log.Error(err, "[filterNodes] unable to collect LVG info") + return nil, fmt.Errorf("unable to collect LVG info: %w", err) + } + + result, err := s.filterNodesParallel(input, lvgInfo) + if err != nil { + log.Error(err, "[filterNodes] failed to filter nodes") + return nil, err + } + + log.Trace("[filterNodes]", "filtered nodes result", result) + return result, nil +} + +func (s *scheduler) filterNodesParallel(input *FilterInput, lvgInfo *LVGInfo) (*ExtenderFilterResult, error) { + commonNodes, err := getSharedNodesByStorageClasses(input.SCSUsedByPodPVCs, lvgInfo.NodeToLVGs) + if err != nil { + s.log.Error(err, "[filterNodesParallel] failed to find any shared nodes") + return nil, fmt.Errorf("unable to get common nodes: %w", err) + } + + result := &ExtenderFilterResult{ + NodeNames: &[]string{}, + FailedNodes: map[string]string{}, + } + resCh := make(chan ResultWithError, len(input.NodeNames)) + var wg sync.WaitGroup + wg.Add(len(input.NodeNames)) + + for _, nodeName := range input.NodeNames { + go func(nodeName string) { + defer wg.Done() + + srvErr := s.filterSingleNodeSRV(nodeName, input, lvgInfo, commonNodes, s.log) + slvErr := s.filterSingleNodeSLV(nodeName, input, lvgInfo, commonNodes, s.log) + + if srvErr == nil && slvErr == nil { + s.log.Debug(fmt.Sprintf("[filterNodesParallel] node %s is ok to schedule a pod to", nodeName)) + resCh <- ResultWithError{NodeName: nodeName} + return + } + // TODO improve this part of the code later + errMessages := []string{srvErr.Error(), slvErr.Error()} + nodeErr := fmt.Errorf(strings.Join(errMessages, ", ")) + s.log.Debug(fmt.Sprintf("[filterNodesParallel] node %s is bad to schedule a pod to. Reason: %s", nodeName, nodeErr.Error())) + resCh <- ResultWithError{NodeName: nodeName, Err: nodeErr} + }(nodeName) + } + + go func() { + wg.Wait() + close(resCh) + }() + + for r := range resCh { + if r.Err == nil { + *result.NodeNames = append(*result.NodeNames, r.NodeName) + } else { + result.FailedNodes[r.NodeName] = r.Err.Error() + } + } + + s.log.Debug("[filterNodes] filtered nodes", "nodes", result.NodeNames) + return result, nil +} + +func (s *scheduler) filterSingleNodeSLV(nodeName string, filterInput *FilterInput, lvgInfo *LVGInfo, commonNodes map[string][]*snc.LVMVolumeGroup, log *logger.Logger) error { + log.Debug("[filterSingleNodeSLV] checking node", "node", nodeName) + + nodeLvgs := commonNodes[nodeName] + + hasEnoughSpace := true + for _, pvc := range filterInput.LocalProvisionPVCs { + lvgsFromSC := lvgInfo.SCLVGs[*pvc.Spec.StorageClassName] + sharedLVG := findSharedLVG(nodeLvgs, lvgsFromSC) + lvgs := s.cacheMgr.GetAllLVG() + + hasEnoughSpace = nodeHasEnoughSpace(filterInput.PVCSizeRequests, lvgInfo.ThickFreeSpaces, lvgInfo.ThinFreeSpaces, sharedLVG, pvc, lvgs, s.log) + if !hasEnoughSpace { + return fmt.Errorf("[filterSingleNodeSLV] node %s has not enough space", nodeName) + } + } + return nil +} + +func (s *scheduler) filterSingleNodeSRV(nodeName string, filterInput *FilterInput, lvgInfo *LVGInfo, commonNodes map[string][]*snc.LVMVolumeGroup, log *logger.Logger) error { + log.Debug("[filterSingleNodeSRV] filtering node", "node", nodeName) + + nodeLvgs := commonNodes[nodeName] + for _, pvc := range filterInput.ReplicatedProvisionPVCs { + log.Debug("[filterSingleNodeSRV] processing PVC", "pvc", pvc.Name, "node", nodeName) + replica := filterInput.DRBDResourceReplicaMap[pvc.Spec.VolumeName] + peer := replica.Spec.Peers[nodeName] + isNodeDiskless := peer.Diskless + + lvgsFromSC := lvgInfo.SCLVGs[*pvc.Spec.StorageClassName] + pvcRSC := filterInput.ReplicatedSCSUsedByPodPVCs[*pvc.Spec.StorageClassName] + sharedLVG := findSharedLVG(nodeLvgs, lvgsFromSC) + + lvgs := s.cacheMgr.GetAllLVG() + hasEnoughSpace := nodeHasEnoughSpace(filterInput.PVCSizeRequests, lvgInfo.ThickFreeSpaces, lvgInfo.ThinFreeSpaces, sharedLVG, pvc, lvgs, s.log) + + switch pvcRSC.Spec.VolumeAccess { + case "Local": + if pvc.Spec.VolumeName == "" { + if sharedLVG == nil { + return fmt.Errorf("[filterSingleNodeSRV] node %s does not contain LVGs from storage class %s", nodeName, pvcRSC.Name) + } + if !hasEnoughSpace { + return fmt.Errorf("[filterSingleNodeSRV] node does not have enough space in LVG %s for PVC %s/%s", sharedLVG.Name, pvc.Namespace, pvc.Name) + } + } else if !isNodeDiskless { + return fmt.Errorf("[filterSingleNodeSRV] node %s is not diskful for PV %s", nodeName, pvc.Spec.VolumeName) + } + + case "EventuallyLocal": + if pvc.Spec.VolumeName == "" { + if sharedLVG == nil { + return fmt.Errorf("[filterSingleNodeSRV] node %s does not contain LVGs from storage class %s", nodeName, pvcRSC.Name) + } + if !hasEnoughSpace { + return fmt.Errorf("[filterSingleNodeSRV] node does not have enough space in LVG %s for PVC %s/%s", sharedLVG.Name, pvc.Namespace, pvc.Name) + } + } else if isNodeDiskless { + log.Trace("[filterSingleNodeSRV]", "node is diskful for EventuallyLocal PVC", "node", nodeName, "pvc", pvc.Name) + return nil + } else if sharedLVG == nil || !hasEnoughSpace { + return fmt.Errorf("[filterSingleNodeSRV] node %s does not meet EventuallyLocal criteria for PVC %s", nodeName, pvc.Name) + } + + case "PreferablyLocal": + if pvc.Spec.VolumeName == "" && !hasEnoughSpace { + return fmt.Errorf("[filterSingleNodeSRV] node does not have enough space in LVG %s for PVC %s/%s", sharedLVG.Name, pvc.Namespace, pvc.Name) + } + } + } + + if !isDrbdNode(nodeName, filterInput.DRBDNodesMap) { + return fmt.Errorf("[filterSingleNodeSRV] node %s is not a DRBD node", nodeName) + } + if !isOkNode(nodeName) { + return fmt.Errorf("[filterSingleNodeSRV] node %s is offline", nodeName) + } + + log.Debug("[filterSingleNodeSRV] node is ok", "node", nodeName) + return nil +} diff --git a/images/sds-common-scheduler-extender/pkg/scheduler/filter_doc.md b/images/sds-common-scheduler-extender/pkg/scheduler/filter_doc.md new file mode 100644 index 000000000..2f3e1afd5 --- /dev/null +++ b/images/sds-common-scheduler-extender/pkg/scheduler/filter_doc.md @@ -0,0 +1,89 @@ +# Scheduler Extender + +Работа с подрами осуществляется двумя эндпоинтами - /filter и /prioritize + +## /filter +Задача эндпоинта - отфильтровать неподходищие для назначения пода ноды и вернуть список подходящих. + +## Алгоритм работы: + +1. Метод принимает HTTP-запрос и десериализует его в структуру ExtenderArgs: + +```go +type ExtenderArgs struct { + Pod *apiv1.Pod `json:"pod"` + Nodes *apiv1.NodeList `json:"nodes,omitempty"` + NodeNames *[]string `json:"nodenames,omitempty"` +} +``` +Pod - под, который ожидает назначения на ноду +NodeNames - имено нод-кандидатов, на которых под может быть размещен +В текущей реализации логики непустым будет только поле NodeNames. Nodes не используется. + +2. Если поле Pod == nil, на клиент возвращается ошибка +3. Имена нод-кандидатов извлекаются из ExtenderArgs методом getNodeNames() +4. С помощью функции shouldProcessPod() определяется нужно ли обрабатывать данный под: + - метод итерирует по всем томам пода (shouldProcessPod) + - для каждого тома находит связанную с ним pvc + - на основании данных pvc метод определяет кто является ее provisioner + - для продолжения работы пода ходя бы один том должен быть создан на основе pvc чей provisioner = "replicated.csi.storage.deckhouse.io" +5. Если работу с подом продолжать нельзя, на клиент возвращается ошибка +6. Вызывается метод filterNodes для отфильтровывания неподходящих для пода нод: + - метод итерирует по именам нод-кандидатов + - для каждого имени метод итерирует по всем переданным PVC + - для каждой PVC: + - метод получает StorageClass и проверяет его поле Spec.VolumeAccess + - если VolumeAccess == "Local" (под нельзя размещать на нодах, где нет копии тома PVC): + - если для PVC уже был выделен PV, нода-кандидат *НЕ* подойдет если она не является DRBD-нодой (CRD DRBDResource, обязательно diskful) + - если PV выделен не был, нода-кандидат *НЕ* подойдет если: + - на ней не хватает места для томов пода + - на ней нет ни одной копии хотя бы одного тома PVC + - если VolumeAccess == "EventuallyLocal" + - PVC с выделенным PV, нода не подойдет: + - нет места + - нет ни одной копии томов PVC + - не DRBD-нода diskful + - если PV нет, не подойдет нода: + - нет места + - нет ни одной копии томов PVC + - если VolumeAccess == "EventuallyLocal" + - если PV нет, не подойдут ноды на которых не хватает дискового места + - если PV требований для фильтрации не предъявляется + - Все ноды, прошедшие эти проверки будут проверены еще раз: + - отфильтровываются не DRBD diskful ноды (на случай, если в проверках выше нода не проходила такую проверку) + - также не подойдут offline-ноды + + - Любые ноды, прошедшие эти проверки признаются подходящими для размещения пода + +## /prioritize +Задача эндпоинта - определить более подходищие для пода ноды, привоив каждой условный рейтинг + +## Алгоритм работы: + +1. Метод принимает HTTP-запрос и десериализует его в структуру ExtenderArgs: + +```go +type ExtenderArgs struct { + Pod *apiv1.Pod `json:"pod"` + Nodes *apiv1.NodeList `json:"nodes,omitempty"` + NodeNames *[]string `json:"nodenames,omitempty"` +} +``` +Pod - под, который ожидает назначения на ноду +NodeNames - имено нод-кандидатов, на которых под может быть размещен +В текущей реализации логики непустым будет только поле NodeNames. Nodes не используется. + +2. Метод scoreNodes, составляющий рейтинг нод-кандидатов, имеет принцип, схожий с работой метода по фильтрации нод из предыдущего пункта: + - метод итерирует по именам нод-кандидатов + - для каждого имени метод итерирует по всем переданным PVC + - для каждой PVC: + - вычисляется оставшееся свободное на ноде место методом calculateFreeSpace() в прицентном соотношении к занятому месту на диске + - баллы рейтинга относительно свободного места вычисляется по формуле: + ```go + int(math.Round(math.Log2(float64(freeSpace) * multiplier))) + ``` + multiplier - показатель, передаваемый в конфигурации приложения. Используется для масштабирования результатов подсчета рейтинга при необходимости. По умолчению равен 1. + + - за каждую копию тома рассматриваемой PVC, находящуюся на ноде-кандидате, нода получает +1 балл рейтинга + + - Итоговый балл вычисляется путем сложения рейтинга свободного места + рейтинга за наличие копий томов diff --git a/images/sds-common-scheduler-extender/pkg/scheduler/filter_test.go b/images/sds-common-scheduler-extender/pkg/scheduler/filter_test.go new file mode 100644 index 000000000..6c4075452 --- /dev/null +++ b/images/sds-common-scheduler-extender/pkg/scheduler/filter_test.go @@ -0,0 +1,160 @@ +package scheduler + +// import ( +// "fmt" +// c "github.com/deckhouse/sds-node-configurator/images/sds-common-scheduler-extender/pkg/cache" +// "github.com/deckhouse/sds-node-configurator/images/sds-common-scheduler-extender/pkg/consts" +// "github.com/deckhouse/sds-node-configurator/images/sds-common-scheduler-extender/pkg/logger" +// "testing" + +// apiv1 "k8s.io/api/core/v1" +// v1 "k8s.io/api/core/v1" +// storagev1 "k8s.io/api/storage/v1" +// "k8s.io/apimachinery/pkg/api/resource" +// metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" +// ) + +// const ( +// node_1 string = "node-1" +// node_2 string = "node-2" +// node_3 string = "node-3" +// ) + +// func mockPod(podName, pvcName string) *apiv1.Pod { +// return &apiv1.Pod{ +// ObjectMeta: metav1.ObjectMeta{ +// Name: podName, +// }, +// Spec: apiv1.PodSpec{ +// Volumes: []apiv1.Volume{ +// { +// Name: fmt.Sprintf("volume/%s", pvcName), +// VolumeSource: apiv1.VolumeSource{ +// PersistentVolumeClaim: &apiv1.PersistentVolumeClaimVolumeSource{ +// ClaimName: pvcName, +// }, +// }, +// }, +// }, +// }, +// } +// } + +// func TestFilterNodes(t *testing.T) { +// log := logger.Logger{} + +// cache := c.Cache{} +// lvgCache := []*c.LvgCache{ +// { +// Lvg: mockLVG("lvg-1", node_1, "2Gi"), +// }, +// { +// Lvg: mockLVG("lvg-2", node_2, "1Gi"), +// }, +// { +// Lvg: mockLVG("lvg-3", node_2, "1Gi"), +// }, +// } + +// for _, lvgC := range lvgCache { +// cache.AddLVG(lvgC.Lvg) +// } +// cache.AddLVGToPVC("lvg-1", "pvc-1") +// cache.AddLVGToPVC("lvg-2", "pvc-2") + +// nodeNames := []string{node_1, node_2, node_3} + +// inputData := ExtenderArgs{ +// NodeNames: &nodeNames, +// Pod: mockPod("pod-1", "pvc-1"), +// } + +// namagedPvcs := map[string]*v1.PersistentVolumeClaim{ +// "pvc-1": { +// ObjectMeta: metav1.ObjectMeta{ +// Name: "pvc-1", +// }, +// Spec: v1.PersistentVolumeClaimSpec{ +// StorageClassName: &storageClassNameOne, +// Resources: v1.VolumeResourceRequirements{ +// Requests: v1.ResourceList{ +// v1.ResourceStorage: resource.MustParse("1Gi"), +// }, +// }, +// }, +// }, +// "pvc-2": { +// ObjectMeta: metav1.ObjectMeta{ +// Name: "pvc-2", +// }, +// Spec: v1.PersistentVolumeClaimSpec{ +// StorageClassName: &storageClassNameTwo, +// Resources: v1.VolumeResourceRequirements{ +// Requests: v1.ResourceList{ +// v1.ResourceStorage: resource.MustParse("500Mi"), +// }, +// }, +// }, +// }, +// } + +// // Do not change intendation here or else these LVGs will not be parsed +// mockLVGYamlOne := `- name: lvg-1 +// Thin: +// poolName: pool1 +// - name: lvg-2 +// Thin: +// poolName: pool2` + +// mockLVGYamlTwo := `- name: lvg-3 +// Thin: +// poolName: pool3` + +// storageClasses := map[string]*storagev1.StorageClass{ +// storageClassNameOne: { +// ObjectMeta: metav1.ObjectMeta{ +// Name: storageClassNameOne, +// }, +// Provisioner: "replicated.csi.storage.deckhouse.io", +// Parameters: map[string]string{ +// consts.LVMVolumeGroupsParamKey: mockLVGYamlOne, +// }, +// }, +// storageClassNameTwo: { +// ObjectMeta: metav1.ObjectMeta{ +// Name: storageClassNameTwo, +// }, +// Provisioner: "replicated.csi.storage.deckhouse.io", +// Parameters: map[string]string{ +// consts.LVMVolumeGroupsParamKey: mockLVGYamlTwo, +// }, +// }, +// } + +// pvcRequests := map[string]PVCRequest{ +// "pvc-1": { +// DeviceType: consts.Thick, +// RequestedSize: 1073741824, // 1Gb +// }, +// "pvc-2": { +// DeviceType: consts.Thin, +// RequestedSize: 524288000, // 500mb +// }, +// } + +// tests := []struct { +// testName string +// nodeNames []string +// pvcRequests map[string]PVCRequest +// storageClasses map[string]*storagev1.StorageClass +// pvcs map[string]*v1.PersistentVolumeClaim +// expect map[string]int +// }{ +// { +// testName: "Test Case #1", +// nodeNames: []string{node1}, +// pvcs: pvcs, +// expect: map[string]int{node1: 11}, +// }, +// } +// } diff --git a/images/sds-common-scheduler-extender/pkg/scheduler/func.go b/images/sds-common-scheduler-extender/pkg/scheduler/func.go new file mode 100644 index 000000000..e625b8eff --- /dev/null +++ b/images/sds-common-scheduler-extender/pkg/scheduler/func.go @@ -0,0 +1,1012 @@ +package scheduler + +import ( + "context" + "errors" + "fmt" + "math" + "net/http" + "slices" + "strings" + "sync" + "time" + + "github.com/deckhouse/sds-node-configurator/images/sds-common-scheduler-extender/pkg/cache" + "github.com/deckhouse/sds-node-configurator/images/sds-common-scheduler-extender/pkg/consts" + "github.com/deckhouse/sds-node-configurator/images/sds-common-scheduler-extender/pkg/logger" + + slv "github.com/deckhouse/sds-local-volume/api/v1alpha1" + snc "github.com/deckhouse/sds-node-configurator/api/v1alpha1" + lsrv "github.com/deckhouse/sds-replicated-volume/api/linstor" + srv "github.com/deckhouse/sds-replicated-volume/api/v1alpha1" + srv2 "github.com/deckhouse/sds-replicated-volume/api/v1alpha2" + corev1 "k8s.io/api/core/v1" + storagev1 "k8s.io/api/storage/v1" + v1 "k8s.io/api/storage/v1" + k8serrors "k8s.io/apimachinery/pkg/api/errors" + "k8s.io/apimachinery/pkg/api/resource" + "sigs.k8s.io/controller-runtime/pkg/client" + "sigs.k8s.io/yaml" +) + +const ( + annotationBetaStorageProvisioner = "volume.beta.kubernetes.io/storage-provisioner" + annotationStorageProvisioner = "volume.kubernetes.io/storage-provisioner" +) + +func shouldProcessPod(ctx context.Context, cl client.Client, pvcMap map[string]*corev1.PersistentVolumeClaim, log *logger.Logger, pod *corev1.Pod) ([]corev1.Volume, error) { + shouldProcessPod := false + targetProvisionerVolumes := make([]corev1.Volume, 0) + targetProvisioners := []string{consts.SdsLocalVolumeProvisioner, consts.SdsReplicatedVolumeProvisioner} + + log.Trace(fmt.Sprintf("[ShouldProcessPod] targetProvisioners=%+v, pod: %+v", targetProvisioners, pod)) + + for _, volume := range pod.Spec.Volumes { + if volume.PersistentVolumeClaim == nil { + log.Trace(fmt.Sprintf("[ShouldProcessPod] skip volume %s because it doesn't have PVC", volume.Name)) + continue + } + + log.Trace(fmt.Sprintf("[ShouldProcessPod] process volume: %+v that has pvc: %+v", volume, volume.PersistentVolumeClaim)) + pvcName := volume.PersistentVolumeClaim.ClaimName + pvc, found := pvcMap[pvcName] + if !found { + return nil, fmt.Errorf("[ShouldProcessPod] error getting PVC %s/%s: %v", pod.Namespace, pvcName) + } + + log.Trace(fmt.Sprintf("[ShouldProcessPod] Successfully get PVC %s/%s: %+v", pod.Namespace, pvcName, pvc)) + + discoveredProvisioner, err := getProvisionerFromPVC(ctx, cl, log, pvc) + if err != nil { + return nil, fmt.Errorf("[ShouldProcessPod] error getting provisioner from PVC %s/%s: %v", pod.Namespace, pvcName) + } + log.Trace(fmt.Sprintf("[ShouldProcessPod] discovered provisioner: %s", discoveredProvisioner)) + if slices.Contains(targetProvisioners, discoveredProvisioner) { + log.Trace(fmt.Sprintf("[ShouldProcessPod] provisioner matches targetProvisioner %s. Pod: %s/%s", discoveredProvisioner, pod.Namespace, pod.Name)) + shouldProcessPod = true + targetProvisionerVolumes = append(targetProvisionerVolumes, volume) + } else { + log.Trace(fmt.Sprintf("[ShouldProcessPod] provisioner %s doesn't match targetProvisioner. Skip volume %s.", discoveredProvisioner, volume.Name)) + } + } + + if shouldProcessPod { + log.Trace(fmt.Sprintf("[ShouldProcessPod] targetProvisioner found in pod volumes. Pod: %s/%s. Volumes that match: %+v", pod.Namespace, pod.Name, targetProvisionerVolumes)) + return targetProvisionerVolumes, nil + } + + log.Trace(fmt.Sprintf("[ShouldProcessPod] can't find targetProvisioner in pod volumes. Skip pod: %s/%s", pod.Namespace, pod.Name)) + return nil, errors.New(fmt.Sprintf("[ShouldProcessPod] can't find targetProvisioner in pod volumes. Skip pod: %s/%s", pod.Namespace, pod.Name)) +} + +func getProvisionerFromPVC(ctx context.Context, cl client.Client, log *logger.Logger, pvc *corev1.PersistentVolumeClaim) (string, error) { + discoveredProvisioner := "" + log.Trace(fmt.Sprintf("[getProvisionerFromPVC] check provisioner in pvc annotations: %+v", pvc.Annotations)) + + discoveredProvisioner = pvc.Annotations[annotationStorageProvisioner] + if discoveredProvisioner != "" { + log.Trace(fmt.Sprintf("[getProvisionerFromPVC] discovered provisioner in pvc annotations: %s", discoveredProvisioner)) + } else { + discoveredProvisioner = pvc.Annotations[annotationBetaStorageProvisioner] + log.Trace(fmt.Sprintf("[getProvisionerFromPVC] discovered provisioner in beta pvc annotations: %s", discoveredProvisioner)) + } + + if discoveredProvisioner == "" && pvc.Spec.StorageClassName != nil && *pvc.Spec.StorageClassName != "" { + log.Trace(fmt.Sprintf("[getProvisionerFromPVC] can't find provisioner in pvc annotations, check in storageClass with name: %s", *pvc.Spec.StorageClassName)) + + storageClass := &storagev1.StorageClass{} + if err := cl.Get(ctx, client.ObjectKey{Name: *pvc.Spec.StorageClassName}, storageClass); err != nil { + if !k8serrors.IsNotFound(err) { + return "", fmt.Errorf("[getProvisionerFromPVC] error getting StorageClass %s: %v", *pvc.Spec.StorageClassName, err) + } + log.Warning(fmt.Sprintf("[getProvisionerFromPVC] StorageClass %s for PVC %s/%s not found", *pvc.Spec.StorageClassName, pvc.Namespace, pvc.Name)) + } + discoveredProvisioner = storageClass.Provisioner + log.Trace(fmt.Sprintf("[getProvisionerFromPVC] discover provisioner %s in storageClass: %+v", discoveredProvisioner, storageClass)) + } + + if discoveredProvisioner == "" && pvc.Spec.VolumeName != "" { + log.Trace(fmt.Sprintf("[getProvisionerFromPVC] can't find provisioner in pvc annotations and StorageClass, check in PV with name: %s", pvc.Spec.VolumeName)) + + pv := &corev1.PersistentVolume{} + if err := cl.Get(ctx, client.ObjectKey{Name: pvc.Spec.VolumeName}, pv); err != nil { + if !k8serrors.IsNotFound(err) { + return "", fmt.Errorf("[getProvisionerFromPVC] error getting PV %s for PVC %s/%s: %v", pvc.Spec.VolumeName, pvc.Namespace, pvc.Name, err) + } + log.Warning(fmt.Sprintf("[getProvisionerFromPVC] PV %s for PVC %s/%s not found", pvc.Spec.VolumeName, pvc.Namespace, pvc.Name)) + } + + if pv.Spec.CSI != nil { + discoveredProvisioner = pv.Spec.CSI.Driver + } + + log.Trace(fmt.Sprintf("[getProvisionerFromPVC] discover provisioner %s in PV: %+v", discoveredProvisioner, pv)) + } + + return discoveredProvisioner, nil +} + +func getReplicatedStoragePools(ctx context.Context, cl client.Client, log *logger.Logger) (map[string]*srv.ReplicatedStoragePool, error) { + rsp := &srv.ReplicatedStoragePoolList{} + err := cl.List(ctx, rsp) + if err != nil { + log.Error(err, "[getReplicatedStoragePools] failed to list replicated storage pools") + return nil, err + } + + rpsMap := make(map[string]*srv.ReplicatedStoragePool, len(rsp.Items)) + for _, rp := range rsp.Items { + rpsMap[rp.Name] = &rp + } + + log.Trace("[getReplicatedStoragePools]", "replicated storage pools", rpsMap) + return rpsMap, nil +} + +func getReplicatedStorageClasses(ctx context.Context, cl client.Client, log *logger.Logger) (map[string]*srv.ReplicatedStorageClass, error) { + rscs := &srv.ReplicatedStorageClassList{} + err := cl.List(ctx, rscs) + if err != nil { + log.Error(err, "[getReplicatedStorageClasses] failed to list replicated storage classes") + return nil, err + } + + rscMap := make(map[string]*srv.ReplicatedStorageClass, len(rscs.Items)) + for _, rsc := range rscs.Items { + rscMap[rsc.Name] = &rsc + } + + log.Trace("[getReplicatedStorageClasses]", "replicated storage classes", rscMap) + return rscMap, nil +} + +func getlvmVolumeGroups(ctx context.Context, cl client.Client, log *logger.Logger) (map[string]*snc.LVMVolumeGroup, error) { + lvmList := &snc.LVMVolumeGroupList{} + err := cl.List(ctx, lvmList) + if err != nil { + log.Error(err, "[getlvmVolumeGroups] failed to list LVM volume groups") + return nil, err + } + + lvmMap := make(map[string]*snc.LVMVolumeGroup, len(lvmList.Items)) + for _, lvm := range lvmList.Items { + lvmMap[lvm.Name] = &lvm + } + + log.Trace("[getlvmVolumeGroups]", "LVM volume groups map", lvmMap) + return lvmMap, nil +} + +func getNodeWithLvmVgsMap(ctx context.Context, cl client.Client, log *logger.Logger) (map[string][]*snc.LVMVolumeGroup, error) { + lvmList := &snc.LVMVolumeGroupList{} + err := cl.List(ctx, lvmList) + if err != nil { + log.Error(err, "[getNodeWithLvmVgsMap] failed to list LVM volume groups") + return nil, err + } + + nodeToLvmMap := make(map[string][]*snc.LVMVolumeGroup, len(lvmList.Items)) + for _, lvm := range lvmList.Items { + nodeToLvmMap[lvm.Spec.Local.NodeName] = append(nodeToLvmMap[lvm.Spec.Local.NodeName], &lvm) + } + + log.Trace("[getNodeWithLvmVgsMap]", "node to LVM volume groups map", nodeToLvmMap) + return nodeToLvmMap, nil +} + +// func getDRBDResourceMap(ctx context.Context, cl client.Client) (map[string]*srv.DRBDResource, error) { +// // TODO +// // drbdList := &srv.DRBDResourceList{} +// // err := cl.List(ctx, drbdList) +// // if err != nil { +// // return nil, err +// // } + +// // drbdMap := make(map[string]*srv.DRBDResource, len(drbdList.Items)) +// // for _, drbd := range drbdList.Items { +// // drbdMap[drbd.Name] = &drbd +// // } +// drbdMap := map[string]*srv.DRBDResource{} +// return drbdMap, nil +// } + +func getDRBDNodesMap(ctx context.Context, cl client.Client, log *logger.Logger) (map[string]struct{}, error) { + result := make(map[string]struct{}) + + lvgList := &snc.LVMVolumeGroupList{} + err := cl.List(ctx, lvgList) + if err != nil { + log.Error(err, "[getDRBDNodesMap] failed to list LVM volume groups") + return result, err + } + + lvgMap := make(map[string]*snc.LVMVolumeGroup, len(lvgList.Items)) + for _, lvg := range lvgList.Items { + lvgMap[lvg.Name] = &lvg + } + log.Trace("[getDRBDNodesMap]", "LVM volume group map", lvgMap) + + rspList := &srv.ReplicatedStoragePoolList{} + err = cl.List(ctx, rspList) + if err != nil { + log.Error(err, "[getDRBDNodesMap] failed to list replicated storage pools") + return result, err + } + log.Trace("[getDRBDNodesMap]", "LVM volume group map", lvgMap) + + for _, rsc := range rspList.Items { + for _, rscLVG := range rsc.Spec.LVMVolumeGroups { + lvg, found := lvgMap[rscLVG.Name] + if !found { + log.Warning("[getDRBDNodesMap]", fmt.Sprintf("no LVM volume group %s found, skipping iteration", rscLVG.Name)) + } + result[lvg.Spec.Local.NodeName] = struct{}{} + } + } + + log.Trace("[getDRBDNodesMap]", "DRBD nodes map", result) + return result, nil +} + +func getPersistentVolumeClaims(ctx context.Context, cl client.Client, log *logger.Logger) (map[string]*corev1.PersistentVolumeClaim, error) { + pvs := &corev1.PersistentVolumeClaimList{} + err := cl.List(ctx, pvs) + if err != nil { + log.Error(err, "[getPersistentVolumeClaims] failed to list persistent volume claims") + return nil, err + } + + pvcMap := make(map[string]*corev1.PersistentVolumeClaim, len(pvs.Items)) + for _, pvc := range pvs.Items { + pvcMap[pvc.Name] = &pvc + } + + log.Trace("[getPersistentVolumeClaims]", "persistent volume claims map", pvcMap) + return pvcMap, nil +} + +func getPersistentVolumes(ctx context.Context, cl client.Client, log *logger.Logger) (map[string]*corev1.PersistentVolume, error) { + pvs := &corev1.PersistentVolumeList{} + err := cl.List(ctx, pvs) + if err != nil { + log.Error(err, "[getPersistentVolumes] failed to list persistent volumes") + return nil, err + } + + pvMap := make(map[string]*corev1.PersistentVolume, len(pvs.Items)) + for _, pv := range pvs.Items { + pvMap[pv.Name] = &pv + } + + log.Trace("[getPersistentVolumes]", "persistent volumes map", pvMap) + return pvMap, nil +} + +func getLayerStorageVolumes(ctx context.Context, cl client.Client) (*lsrv.LayerStorageVolumesList, error) { + cwt, cancel := context.WithTimeout(ctx, 5*time.Second) + defer cancel() + + layerStorageVolumes := &lsrv.LayerStorageVolumesList{} + err := cl.List(cwt, layerStorageVolumes) + if err != nil { + return nil, err + } + + return layerStorageVolumes, nil +} + +func getDRBDReplicaList(ctx context.Context, cl client.Client) (*srv2.DRBDResourceReplicaList, error) { + cwt, cancel := context.WithTimeout(ctx, 5*time.Second) + defer cancel() + + rrl := &srv2.DRBDResourceReplicaList{} + if err := cl.List(cwt, rrl); err != nil { + return nil, err + } + + return rrl, nil +} + +func getNodeNames(inputData ExtenderArgs, log *logger.Logger) ([]string, error) { + if inputData.NodeNames != nil && len(*inputData.NodeNames) > 0 { + log.Trace("[getNodeNames]", "node names from input", *inputData.NodeNames) + return *inputData.NodeNames, nil + } + + if inputData.Nodes != nil && len(inputData.Nodes.Items) > 0 { + nodeNames := make([]string, 0, len(inputData.Nodes.Items)) + for _, node := range inputData.Nodes.Items { + nodeNames = append(nodeNames, node.Name) + } + log.Trace("[getNodeNames]", "node names from nodes", nodeNames) + return nodeNames, nil + } + + log.Error(nil, "[getNodeNames] no nodes provided") + return nil, fmt.Errorf("no nodes provided") +} + +// collectLVGInfo gathers LVMVolumeGroup data. +func collectLVGInfo(s *scheduler, storageClasses map[string]*storagev1.StorageClass) (*LVGInfo, error) { + lvgs := s.cacheMgr.GetAllLVG() + for _, lvg := range lvgs { + s.log.Trace(fmt.Sprintf("[filterNodes] LVMVolumeGroup %s in cache", lvg.Name)) + } + + scLVGs, err := CreateLVGsMapFromStorageClasses(storageClasses) + if err != nil { + return nil, err + } + + filteredLVGs := GetCachedLVGsUsedByPodStorageClases(lvgs, scLVGs) + thickFreeSpaces := getLVGThickFreeSpaces(filteredLVGs) + thinFreeSpaces := getLVGThinFreeSpaces(filteredLVGs) + + for lvgName, freeSpace := range thickFreeSpaces { + reserved, err := s.cacheMgr.GetLVGThickReservedSpace(lvgName) + if err != nil { + s.log.Error(err, fmt.Sprintf("[filterNodes] unable to get reserved space for LVMVolumeGroup %s", lvgName)) + continue + } + thickFreeSpaces[lvgName] = freeSpace - reserved + s.log.Trace(fmt.Sprintf("[filterNodes] LVMVolumeGroup %s thick free space %s", lvgName, resource.NewQuantity(thickFreeSpaces[lvgName], resource.BinarySI))) + } + + for lvgName, thinPools := range thinFreeSpaces { + for tpName, freeSpace := range thinPools { + reserved, err := s.cacheMgr.GetLVGThinReservedSpace(lvgName, tpName) + if err != nil { + s.log.Error(err, fmt.Sprintf("[filterNodes] unable to get reserved space for Thin pool %s of LVMVolumeGroup %s", tpName, lvgName)) + continue + } + thinFreeSpaces[lvgName][tpName] = freeSpace - reserved + s.log.Trace(fmt.Sprintf("[filterNodes] LVMVolumeGroup %s Thin pool %s free space %s", lvgName, tpName, resource.NewQuantity(thinFreeSpaces[lvgName][tpName], resource.BinarySI))) + } + } + + nodeToLVGs := CreateNodeToCachedLVGsMap(filteredLVGs) + s.log.Trace(fmt.Sprintf("[filterNodes] node name to LVM volume group map %+v", nodeToLVGs)) + return &LVGInfo{ + ThickFreeSpaces: thickFreeSpaces, + ThinFreeSpaces: thinFreeSpaces, + NodeToLVGs: nodeToLVGs, + SCLVGs: scLVGs, + }, nil +} + +func collectLVGScoreInfo(s *scheduler, storageClasses map[string]*storagev1.StorageClass) (*LVGScoreInfo, error) { + lvgs := s.cacheMgr.GetAllLVG() + scLVGs, err := CreateLVGsMapFromStorageClasses(storageClasses) + if err != nil { + return nil, err + } + + usedLVGs := GetCachedLVGsUsedByPodStorageClases(lvgs, scLVGs) + for lvgName := range usedLVGs { + s.log.Trace(fmt.Sprintf("[collectLVGScoreInfo] used LVMVolumeGroup %s", lvgName)) + } + + nodeToLVGs := CreateNodeToCachedLVGsMap(usedLVGs) + for nodeName, lvgList := range nodeToLVGs { + for _, lvg := range lvgList { + s.log.Trace(fmt.Sprintf("[collectLVGScoreInfo] LVMVolumeGroup %s belongs to node %s", lvg.Name, nodeName)) + } + } + + res := &LVGScoreInfo{ + NodeToLVGs: nodeToLVGs, + SCLVGs: scLVGs, + LVGs: lvgs, + } + + s.log.Trace(fmt.Sprintf("[collectLVGScoreInfo] LVGScoreInfo %+v", res)) + return res, nil +} + +func calculateFreeSpace( + lvg *snc.LVMVolumeGroup, + schedulerCache *cache.CacheManager, + pvcReq *PVCRequest, + commonLVG *LVMVolumeGroup, + log *logger.Logger, + pvc *corev1.PersistentVolumeClaim, + nodeName string, +) (resource.Quantity, error) { + var freeSpace resource.Quantity + + switch pvcReq.DeviceType { + case consts.Thick: + freeSpace = lvg.Status.VGFree + log.Trace(fmt.Sprintf("[scoreNodes] LVMVolumeGroup %s free Thick space before PVC reservation: %s", lvg.Name, freeSpace.String())) + + reserved, err := schedulerCache.GetLVGThickReservedSpace(lvg.Name) + if err != nil { + return freeSpace, errors.New(fmt.Sprintf("[scoreNodes] unable to count reserved space for the LVMVolumeGroup %s", lvg.Name)) + } + log.Trace(fmt.Sprintf("[scoreNodes] LVMVolumeGroup %s PVC Space reservation: %s", lvg.Name, resource.NewQuantity(reserved, resource.BinarySI))) + + freeSpace = *resource.NewQuantity(freeSpace.Value()-reserved, resource.BinarySI) + log.Trace(fmt.Sprintf("[scoreNodes] LVMVolumeGroup %s free Thick space after PVC reservation: %s", lvg.Name, freeSpace.String())) + case consts.Thin: + thinPool := findMatchedThinPool(lvg.Status.ThinPools, commonLVG.Thin.PoolName) + if thinPool == nil { + return freeSpace, errors.New(fmt.Sprintf("[scoreNodes] unable to match Storage Class's ThinPools with the node's one, Storage Class: %s, node: %s", *pvc.Spec.StorageClassName, nodeName)) + } + + freeSpace = thinPool.AvailableSpace + } + + return freeSpace, nil +} + +// TODO pick better naming to freeSize and method name +++ +func getFreeSpaceLeftAsPercent(freeSpaceBytes, requestedSpace, totalSpace int64) int64 { + freeSpaceLeft := freeSpaceBytes - requestedSpace + fraction := float64(freeSpaceLeft) / float64(totalSpace) + percent := fraction * 100 + return int64(percent) +} + +// TODO change divisor to multiplier +++ +func getNodeScore(freeSpace int64, multiplier float64) int { + converted := int(math.Round(math.Log2(float64(freeSpace) * multiplier))) + switch { + case converted < 1: + return 1 + case converted > 10: + return 10 + default: + return converted + } +} + +// func isDrbdDiskfulNode(drbdResourceMap map[string]*srv.DRBDResource, pvName string, nodeName string) bool { +// //TODO implement logic later when DRBDResource becomes available in the cluster +// return true +// // resource, found := drbdResourceMap[pvName] +// // if !found { +// // return false +// // } + +// // for _, node := range resource.Spec.Peers { +// // if node.NodeName == nodeName && !node.Diskless { +// // return true +// // } +// // } + +// // return false +// } + +func isOkNode(_ string) bool { + // TODO implement node online check + return true +} + +func getRSCByCS(ctx context.Context, cl client.Client, scs map[string]*v1.StorageClass, log *logger.Logger) (map[string]*srv.ReplicatedStorageClass, map[string]*slv.LocalStorageClass, error) { + SRVresult := map[string]*srv.ReplicatedStorageClass{} + SLVresult := map[string]*slv.LocalStorageClass{} + + rscList := &srv.ReplicatedStorageClassList{} + err := cl.List(ctx, rscList) + if err != nil { + log.Error(err, "[getRSCByCS] failed to list replicated storage classes") + return nil, nil, err + } + + rscMap := make(map[string]*srv.ReplicatedStorageClass, len(rscList.Items)) + for _, rsc := range rscList.Items { + rscMap[rsc.Name] = &rsc + } + + lscList := &slv.LocalStorageClassList{} + err = cl.List(ctx, lscList) + if err != nil { + log.Error(err, "[getRSCByCS] failed to list local storage classes") + return nil, nil, err + } + + lscMap := make(map[string]*slv.LocalStorageClass, len(lscList.Items)) + for _, lsc := range lscList.Items { + lscMap[lsc.Name] = &lsc + } + + for _, sc := range scs { + if sc.Provisioner == consts.SdsReplicatedVolumeProvisioner { + SRVresult[sc.Name] = rscMap[sc.Name] + } + if sc.Provisioner == consts.SdsLocalVolumeProvisioner { + SLVresult[sc.Name] = lscMap[sc.Name] + } + } + + log.Debug("[getRSCByCS]", "replicated storage classes map", SRVresult, "local storage classes map", SLVresult) + return SRVresult, SLVresult, nil +} + +func isDrbdNode(targetNode string, drbdNodesMap map[string]struct{}) bool { + _, ok := drbdNodesMap[targetNode] + return ok +} + +func nodeHasEnoughSpace( + pvcRequests map[string]PVCRequest, + lvgsThickFree map[string]int64, + lvgsThinFree map[string]map[string]int64, + commonLVG *LVMVolumeGroup, + pvc *corev1.PersistentVolumeClaim, + lvgMap map[string]*snc.LVMVolumeGroup, + log *logger.Logger, +) bool { + log.Debug("[nodeHasEnoughSpace] checking space for PVC", "pvc", pvc.Name) + + nodeIsOk := true + pvcReq := pvcRequests[pvc.Name] + thickMapMtx := &sync.RWMutex{} + thinMapMtx := &sync.RWMutex{} + + switch pvcReq.DeviceType { + case consts.Thick: + thickMapMtx.RLock() + freeSpace := lvgsThickFree[commonLVG.Name] + thickMapMtx.RUnlock() + + if freeSpace < pvcReq.RequestedSize { + log.Warning("[nodeHasEnoughSpace]", "insufficient thick space for PVC", "pvc", pvc.Name, "freeSpace", freeSpace, "requested", pvcReq.RequestedSize) + nodeIsOk = false + break + } + + thickMapMtx.Lock() + lvgsThickFree[commonLVG.Name] -= pvcReq.RequestedSize + thickMapMtx.Unlock() + log.Trace("[nodeHasEnoughSpace]", "updated thick free space", "lvg", commonLVG.Name, "remaining", lvgsThickFree[commonLVG.Name]) + + case consts.Thin: + lvg := lvgMap[commonLVG.Name] + targetThinPool := findMatchedThinPool(lvg.Status.ThinPools, commonLVG.Thin.PoolName) + + thinMapMtx.RLock() + freeSpace := lvgsThinFree[lvg.Name][targetThinPool.Name] + thinMapMtx.RUnlock() + + if freeSpace < pvcReq.RequestedSize { + log.Warning("[nodeHasEnoughSpace]", "insufficient thin space for PVC", "pvc", pvc.Name, "freeSpace", freeSpace, "requested", pvcReq.RequestedSize) + nodeIsOk = false + break + } + + thinMapMtx.Lock() + lvgsThinFree[lvg.Name][targetThinPool.Name] -= pvcReq.RequestedSize + thinMapMtx.Unlock() + log.Trace("[nodeHasEnoughSpace]", "updated thin free space", "lvg", lvg.Name, "thinPool", targetThinPool.Name, "remaining", lvgsThinFree[lvg.Name][targetThinPool.Name]) + } + + log.Trace("[nodeHasEnoughSpace]", "space check result", "pvc", pvc.Name, "nodeIsOk", nodeIsOk) + return nodeIsOk +} + +func findMatchedThinPool(thinPools []snc.LVMVolumeGroupThinPoolStatus, name string) *snc.LVMVolumeGroupThinPoolStatus { + for _, tp := range thinPools { + if tp.Name == name { + return &tp + } + } + + return nil +} + +// func findMatchedLVG(nodeLVGs []*snc.LVMVolumeGroup, scLVGs []srv.ReplicatedStoragePoolLVMVolumeGroups) *srv.ReplicatedStoragePoolLVMVolumeGroups { +// nodeLVGNames := make(map[string]struct{}, len(nodeLVGs)) +// for _, lvg := range nodeLVGs { +// nodeLVGNames[lvg.Name] = struct{}{} +// } + +// for _, lvg := range scLVGs { +// if _, match := nodeLVGNames[lvg.Name]; match { +// return &lvg +// } +// } + +// return nil +// } + +func findSharedLVG(nodeLVGs []*snc.LVMVolumeGroup, scLVGs []LVMVolumeGroup) *LVMVolumeGroup { + nodeLVGNames := make(map[string]struct{}, len(nodeLVGs)) + for _, lvg := range nodeLVGs { + nodeLVGNames[lvg.Name] = struct{}{} + } + + for _, lvg := range scLVGs { + if _, match := nodeLVGNames[lvg.Name]; match { + return &lvg + } + } + + return nil +} + +// func getAllNodesWithLVGs(ctx context.Context, cl client.Client) (map[string]*snc.LVMVolumeGroup, error) { +// result := map[string]*snc.LVMVolumeGroup{} +// lvgs := &snc.LVMVolumeGroupList{} +// err := cl.List(ctx, lvgs) +// if err != nil { +// return nil, err +// } + +// for _, lvg := range lvgs.Items { +// result[lvg.Spec.Local.NodeName] = &lvg +// } + +// return result, nil +// } + +// func getAllLvgsFromPod(pvcs map[string]*corev1.PersistentVolumeClaim, rscMap map[string]*srv.ReplicatedStorageClass, spMap map[string]*srv.ReplicatedStoragePool, lvgMap map[string]*snc.LVMVolumeGroup) map[string]*snc.LVMVolumeGroup { +// result := map[string]*snc.LVMVolumeGroup{} + +// for _, pvc := range pvcs { +// scName := *pvc.Spec.StorageClassName +// sc, found := rscMap[scName] +// if !found { +// continue //TODO +// } + +// sp := spMap[sc.Spec.StoragePool] + +// for _, lvgGr := range sp.Spec.LVMVolumeGroups { +// result[lvgGr.Name] = lvgMap[lvgGr.Name] +// } +// } + +// return result +// } + +func getLVGThinFreeSpaces(lvgs map[string]*snc.LVMVolumeGroup) map[string]map[string]int64 { + result := make(map[string]map[string]int64, len(lvgs)) + + for _, lvg := range lvgs { + if result[lvg.Name] == nil { + result[lvg.Name] = make(map[string]int64, len(lvg.Status.ThinPools)) + } + + for _, tp := range lvg.Status.ThinPools { + result[lvg.Name][tp.Name] = tp.AvailableSpace.Value() + } + } + + return result +} + +func getLVGThickFreeSpaces(lvgs map[string]*snc.LVMVolumeGroup) map[string]int64 { + result := make(map[string]int64, len(lvgs)) + + for _, lvg := range lvgs { + result[lvg.Name] = lvg.Status.VGFree.Value() + } + + return result +} + +// func filterDRBDNodes(nodes []string, sp *srv.ReplicatedStoragePool, lvmGrMap map[string]*snc.LVMVolumeGroup) []string { +// result := []string{} +// allowedNodes := map[string]struct{}{} // nodes which contain lvgs + +// for _, lvmVolGr := range sp.Spec.LVMVolumeGroups { +// lvmGr, found := lvmGrMap[lvmVolGr.Name] +// if !found { +// continue +// } +// allowedNodes[lvmGr.Spec.Local.NodeName] = struct{}{} +// } + +// for _, nodeName := range nodes { +// if _, allowed := allowedNodes[nodeName]; allowed { +// result = append(result, nodeName) +// } +// } + +// return result +// } + +type PVCRequest struct { + DeviceType string + RequestedSize int64 +} + +func extractRequestedSize( + log *logger.Logger, + pvcs map[string]*corev1.PersistentVolumeClaim, + scs map[string]*v1.StorageClass, + pvs map[string]*corev1.PersistentVolume, +) (map[string]PVCRequest, error) { + pvcRequests := make(map[string]PVCRequest, len(pvcs)) + for _, pvc := range pvcs { + sc := scs[*pvc.Spec.StorageClassName] + log.Debug(fmt.Sprintf("[extractRequestedSize] PVC %s/%s has status phase: %s", pvc.Namespace, pvc.Name, pvc.Status.Phase)) + switch pvc.Status.Phase { + case corev1.ClaimPending: + switch sc.Parameters[consts.LvmTypeParamKey] { + case consts.Thick: + reqSize := pvc.Spec.Resources.Requests.Storage().Value() + if reqSize < 0 { + reqSize = 0 + } + pvcRequests[pvc.Name] = PVCRequest{ + DeviceType: consts.Thick, + RequestedSize: reqSize, + } + case consts.Thin: + reqSize := pvc.Spec.Resources.Requests.Storage().Value() + if reqSize < 0 { + reqSize = 0 + } + pvcRequests[pvc.Name] = PVCRequest{ + DeviceType: consts.Thin, + RequestedSize: pvc.Spec.Resources.Requests.Storage().Value(), + } + } + + case corev1.ClaimBound: + pv := pvs[pvc.Spec.VolumeName] + switch sc.Parameters[consts.LvmTypeParamKey] { + case consts.Thick: + reqSize := pvc.Spec.Resources.Requests.Storage().Value() - pv.Spec.Capacity.Storage().Value() + if reqSize < 0 { + reqSize = 0 + } + pvcRequests[pvc.Name] = PVCRequest{ + DeviceType: consts.Thick, + RequestedSize: reqSize, + } + case consts.Thin: + reqSize := pvc.Spec.Resources.Requests.Storage().Value() - pv.Spec.Capacity.Storage().Value() + if reqSize < 0 { + reqSize = 0 + } + pvcRequests[pvc.Name] = PVCRequest{ + DeviceType: consts.Thin, + RequestedSize: reqSize, + } + } + } + } + + for name, req := range pvcRequests { + log.Trace(fmt.Sprintf("[extractRequestedSize] pvc %s has requested size: %d, device type: %s", name, req.RequestedSize, req.DeviceType)) + } + + return pvcRequests, nil +} + +func getPodRelatedPVCs(ctx context.Context, cl client.Client, log *logger.Logger, pod *corev1.Pod) (map[string]*corev1.PersistentVolumeClaim, error) { + pvcMap, err := getAllPVCsFromNamespace(ctx, cl, pod.Namespace) + if err != nil { + log.Error(err, fmt.Sprintf("[getUsedPVC] unable to get all PVC for Pod %s in the namespace %s", pod.Name, pod.Namespace)) + return nil, err + } + + for pvcName := range pvcMap { + log.Trace(fmt.Sprintf("[getUsedPVC] PVC %s is in namespace %s", pvcName, pod.Namespace)) + } + + usedPvc := make(map[string]*corev1.PersistentVolumeClaim, len(pod.Spec.Volumes)) + for _, volume := range pod.Spec.Volumes { + if volume.PersistentVolumeClaim != nil { + log.Trace(fmt.Sprintf("[getUsedPVC] Pod %s/%s uses PVC %s", pod.Namespace, pod.Name, volume.PersistentVolumeClaim.ClaimName)) + pvc := pvcMap[volume.PersistentVolumeClaim.ClaimName] + usedPvc[volume.PersistentVolumeClaim.ClaimName] = &pvc + } + } + + return usedPvc, err +} + +func getAllPVCsFromNamespace(ctx context.Context, cl client.Client, namespace string) (map[string]corev1.PersistentVolumeClaim, error) { + list := &corev1.PersistentVolumeClaimList{} + err := cl.List(ctx, list, &client.ListOptions{Namespace: namespace}) + if err != nil { + return nil, err + } + + pvcs := make(map[string]corev1.PersistentVolumeClaim, len(list.Items)) + for _, pvc := range list.Items { + pvcs[pvc.Name] = pvc + } + + return pvcs, nil +} + +func getStorageClassesUsedByPVCs(ctx context.Context, cl client.Client, pvcs map[string]*corev1.PersistentVolumeClaim) (map[string]*v1.StorageClass, error) { + scs := &v1.StorageClassList{} + err := cl.List(ctx, scs) + if err != nil { + return nil, err + } + + scMap := make(map[string]v1.StorageClass, len(scs.Items)) + for _, sc := range scs.Items { + scMap[sc.Name] = sc + } + + result := make(map[string]*v1.StorageClass, len(pvcs)) + for _, pvc := range pvcs { + if pvc.Spec.StorageClassName == nil { + err = fmt.Errorf("no StorageClass specified for PVC %s", pvc.Name) + return nil, err + } + + scName := *pvc.Spec.StorageClassName + if sc, match := scMap[scName]; match { + result[sc.Name] = &sc + } + } + + fmt.Printf("[getStorageClassesUsedByPVCs] result: %+v\n", result) + return result, nil +} + +func filterPVCsByProvisioner(log *logger.Logger, podRelatedPVCs map[string]*corev1.PersistentVolumeClaim, scsUsedByPodPVCs map[string]*v1.StorageClass) (map[string]*corev1.PersistentVolumeClaim, map[string]*corev1.PersistentVolumeClaim) { + replicatedPVCs := make(map[string]*corev1.PersistentVolumeClaim, len(podRelatedPVCs)) + localPVCs := make(map[string]*corev1.PersistentVolumeClaim, len(podRelatedPVCs)) + + for _, pvc := range podRelatedPVCs { + sc := scsUsedByPodPVCs[*pvc.Spec.StorageClassName] + if sc.Provisioner == consts.SdsLocalVolumeProvisioner { + localPVCs[pvc.Name] = pvc + continue + } + if sc.Provisioner == consts.SdsReplicatedVolumeProvisioner { + replicatedPVCs[pvc.Name] = pvc + continue + } + log.Debug(fmt.Sprintf("[filterNotManagedPVC] filter out PVC %s/%s due to used Storage class %s is not managed by sds-replicated-volume-provisioner", pvc.Name, pvc.Namespace, sc.Name)) + } + + return replicatedPVCs, localPVCs +} + +// func getSortedLVGsFromStorageClasses(replicatedSCs map[string]*srv.ReplicatedStorageClass, spMap map[string]*srv.ReplicatedStoragePool) (map[string][]srv.ReplicatedStoragePoolLVMVolumeGroups, error) { +// result := make(map[string][]srv.ReplicatedStoragePoolLVMVolumeGroups, len(replicatedSCs)) + +// for _, sc := range replicatedSCs { +// pool := spMap[sc.Spec.StoragePool] +// result[sc.Name] = pool.Spec.LVMVolumeGroups +// } + +// return result, nil +// } + +func CreateLVGsMapFromStorageClasses(scs map[string]*v1.StorageClass) (map[string][]LVMVolumeGroup, error) { + result := make(map[string][]LVMVolumeGroup, len(scs)) + + for _, sc := range scs { + lvgs, err := ExtractLVGsFromSC(sc) + if err != nil { + return nil, err + } + + result[sc.Name] = append(result[sc.Name], lvgs...) + } + + return result, nil +} + +func ExtractLVGsFromSC(sc *v1.StorageClass) ([]LVMVolumeGroup, error) { + lvms, ok := sc.Parameters[consts.LVMVolumeGroupsParamKey] + if !ok { + return nil, fmt.Errorf("key is %s not found in StorageClass parameters", consts.LvmTypeParamKey) + } + + lvms = strings.Trim(lvms, "'") + var lvmVolumeGroups []LVMVolumeGroup + err := yaml.Unmarshal([]byte(lvms), &lvmVolumeGroups) + if err != nil { + return nil, err + } + return lvmVolumeGroups, nil +} + +func GetCachedLVGsUsedByPodStorageClases(lvgs map[string]*snc.LVMVolumeGroup, scsLVGs map[string][]LVMVolumeGroup) map[string]*snc.LVMVolumeGroup { + result := make(map[string]*snc.LVMVolumeGroup, len(lvgs)) + usedLvgs := make(map[string]struct{}, len(lvgs)) + + for _, scLvgs := range scsLVGs { + for _, lvg := range scLvgs { + usedLvgs[lvg.Name] = struct{}{} + } + } + + for _, lvg := range lvgs { + if _, used := usedLvgs[lvg.Name]; used { + result[lvg.Name] = lvg + } + } + + return result +} + +func CreateNodeToCachedLVGsMap(lvgs map[string]*snc.LVMVolumeGroup) map[string][]*snc.LVMVolumeGroup { + sorted := make(map[string][]*snc.LVMVolumeGroup, len(lvgs)) + for _, lvg := range lvgs { + for _, node := range lvg.Status.Nodes { + sorted[node.Name] = append(sorted[node.Name], lvg) + } + } + + return sorted +} + +// func isOnSameNode(nodeLVGs []*snc.LVMVolumeGroup, scLVGs []LVMVolumeGroup) bool { +// nodeLVGNames := make(map[string]struct{}, len(nodeLVGs)) +// for _, lvg := range nodeLVGs { +// nodeLVGNames[lvg.Name] = struct{}{} +// } + +// for _, lvg := range scLVGs { +// if _, found := nodeLVGNames[lvg.Name]; !found { +// return false +// } +// } + +// return true +// } + +func findMatchedLVGs(nodeLVGs []*snc.LVMVolumeGroup, scLVGs []LVMVolumeGroup) *LVMVolumeGroup { + nodeLVGNames := make(map[string]struct{}, len(nodeLVGs)) + for _, lvg := range nodeLVGs { + nodeLVGNames[lvg.Name] = struct{}{} + } + + for _, lvg := range scLVGs { + if _, match := nodeLVGNames[lvg.Name]; match { + return &lvg + } + } + + return nil +} + +func getSharedNodesByStorageClasses(podStorageClasses map[string]*v1.StorageClass, nodeToCachedLVGsMap map[string][]*snc.LVMVolumeGroup) (map[string][]*snc.LVMVolumeGroup, error) { + result := make(map[string][]*snc.LVMVolumeGroup, len(nodeToCachedLVGsMap)) + + for nodeName, lvgs := range nodeToCachedLVGsMap { + lvgNames := make(map[string]struct{}, len(lvgs)) + for _, l := range lvgs { + lvgNames[l.Name] = struct{}{} + } + + nodeIncludesLVG := true + for _, sc := range podStorageClasses { + scLvgs, err := ExtractLVGsFromSC(sc) + if err != nil { + return nil, err + } + + contains := false + for _, lvg := range scLvgs { + if _, exist := lvgNames[lvg.Name]; exist { + contains = true + break + } + } + + if !contains { + nodeIncludesLVG = false + break + } + } + + if nodeIncludesLVG { + result[nodeName] = lvgs + } + } + + return result, nil +} + +func Status(w http.ResponseWriter, r *http.Request) { + w.WriteHeader(http.StatusOK) + _, err := w.Write([]byte("ok")) + if err != nil { + fmt.Printf("error occurs on status route, err: %s\n", err.Error()) + } +} diff --git a/images/sds-common-scheduler-extender/pkg/scheduler/handler.go b/images/sds-common-scheduler-extender/pkg/scheduler/handler.go new file mode 100644 index 000000000..02c404572 --- /dev/null +++ b/images/sds-common-scheduler-extender/pkg/scheduler/handler.go @@ -0,0 +1,99 @@ +package scheduler + +import ( + "encoding/json" + "errors" + "fmt" + "net/http" + + "github.com/deckhouse/sds-node-configurator/images/sds-common-scheduler-extender/pkg/logger" +) + +type FiltererPrioritizer interface { + Filter(inputData ExtenderArgs) (*ExtenderFilterResult, error) + Prioritize(inputData ExtenderArgs) ([]HostPriority, error) +} + +type Handler struct { + log *logger.Logger + scheduler FiltererPrioritizer +} + +func NewHandler(log *logger.Logger, sheduler FiltererPrioritizer) *Handler { + return &Handler{ + log: log, + scheduler: sheduler, + } +} + +func (h *Handler) Filter(w http.ResponseWriter, r *http.Request) { + h.log.Debug("[Filter] starts filtering") + + inputData, ok := r.Context().Value("inputData").(ExtenderArgs) + if !ok { + h.log.Error(errors.New("pod data not found in context"), "[Filter] missing pod data") + http.Error(w, "internal error", http.StatusInternalServerError) + return + } + + h.log.Trace(fmt.Sprintf("[Filter] filter input data: %+v", inputData)) + if inputData.Pod == nil { + h.log.Error(errors.New("no pod in request"), "[Filter] no pod provided for filtering") + http.Error(w, "internal error", http.StatusInternalServerError) + return + } + + result, err := h.scheduler.Filter(inputData) + if err != nil { + h.log.Error(err, "[Filter] filtering failed") + http.Error(w, err.Error(), http.StatusBadRequest) + return + } + + w.Header().Set("content-type", "application/json") + if err := json.NewEncoder(w).Encode(result); err != nil { + h.log.Error(err, "[Filter] unable to encode filter response") + http.Error(w, "internal error", http.StatusInternalServerError) + return + } + + h.log.Debug(fmt.Sprintf("[filter] completed filtering for Pod %s/%s", inputData.Pod.Namespace, inputData.Pod.Name)) +} + +func (h *Handler) Prioritize(w http.ResponseWriter, r *http.Request) { + h.log.Debug("[Prioritize] starts serving") + + inputData, ok := r.Context().Value("inputData").(ExtenderArgs) + if !ok { + h.log.Error(errors.New("pod data not found in context"), "[Prioritize] missing pod data") + http.Error(w, "internal error", http.StatusInternalServerError) + return + } + + h.log.Trace(fmt.Sprintf("[Prioritize] filter input data: %+v", inputData)) + if inputData.Pod == nil { + h.log.Error(errors.New("no pod in request"), "[Prioritize] no pod provided for filtering") + http.Error(w, "internal error", http.StatusInternalServerError) + return + } + + result, err := h.scheduler.Prioritize(inputData) + if err != nil { + h.log.Error(err, "[Prioritize] prioritization failed") + http.Error(w, err.Error(), http.StatusBadRequest) + return + } + + w.Header().Set("content-type", "application/json") + if err := json.NewEncoder(w).Encode(result); err != nil { + h.log.Error(err, "[Prioritize] unable to encode response") + http.Error(w, "internal error", http.StatusInternalServerError) + return + } + + h.log.Debug(fmt.Sprintf("[Prioritize] completed serving for Pod %s/%s", inputData.Pod.Namespace, inputData.Pod.Name)) +} + +func (h *Handler) Status(w http.ResponseWriter, r *http.Request) { + Status(w, r) +} diff --git a/images/sds-common-scheduler-extender/pkg/scheduler/middleware.go b/images/sds-common-scheduler-extender/pkg/scheduler/middleware.go new file mode 100644 index 000000000..cdb099799 --- /dev/null +++ b/images/sds-common-scheduler-extender/pkg/scheduler/middleware.go @@ -0,0 +1,111 @@ +package scheduler + +import ( + "context" + "encoding/json" + "errors" + "fmt" + "net" + "net/http" + "time" + + "github.com/deckhouse/sds-node-configurator/images/sds-common-scheduler-extender/pkg/logger" + corev1 "k8s.io/api/core/v1" + "sigs.k8s.io/controller-runtime/pkg/client" +) + +// type Middleware struct { +// Handler http.Handler +// Log *logger.Logger +// } + +// func NewMiddleware(handler http.Handler, log *logger.Logger) *Middleware { +// return &Middleware{ +// Handler: handler, +// Log: log, +// } +// } + +func BodyUnmarshalMiddleware(next http.Handler, log *logger.Logger) http.Handler { + return http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + var inputData ExtenderArgs + reader := http.MaxBytesReader(w, r.Body, 10<<20) + if err := json.NewDecoder(reader).Decode(&inputData); err != nil { + log.Error(err, "[handler] unable to decode filter request") + http.Error(w, "unable to decode request", http.StatusBadRequest) + return + } + + cwv := context.WithValue(r.Context(), "inputData", inputData) + req := r.WithContext(cwv) + next.ServeHTTP(w, req) + }) +} + +func LogMiddleware(next http.Handler, log *logger.Logger) http.Handler { + return http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + next.ServeHTTP(w, r) + + startTime := time.Now() + // status := m.handler.Status + + fields := []interface{}{ + "type", "access", + "response_time", time.Since(startTime).Seconds(), + "protocol", r.Proto, + // "http_status_code", status, + "http_method", r.Method, + "url", r.RequestURI, + "http_host", r.Host, + "request_size", r.ContentLength, + // "response_size", wr.size, + } + ip, _, err := net.SplitHostPort(r.RemoteAddr) + if err == nil { + fields = append(fields, "remote_ipaddr", ip) + } + ua := r.Header.Get("User-Agent") + if len(ua) > 0 { + fields = append(fields, "http_user_agent", ua) + } + log.Info("access", fields...) + }) +} + +func PodCheckMiddleware(ctx context.Context, cl client.Client, next http.Handler, log *logger.Logger) http.Handler { + return http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + inputData, ok := r.Context().Value("inputData").(ExtenderArgs) + if !ok { + log.Error(errors.New("pod data not found in context"), "[WithPodCheck] missing pod data") + http.Error(w, "internal error", http.StatusInternalServerError) + return + } + pod := inputData.Pod + + pvcs := &corev1.PersistentVolumeClaimList{} + if err := cl.List(ctx, pvcs); err != nil { + log.Error(err, "[WithPodCheck] error listing PVCs") + http.Error(w, "error listing PVCs", http.StatusInternalServerError) + } + + pvcMap := make(map[string]*corev1.PersistentVolumeClaim, len(pvcs.Items)) + for _, pvc := range pvcs.Items { + pvcMap[pvc.Name] = &pvc + } + + volumes, err := shouldProcessPod(ctx, cl, pvcMap, log, pod) + if err != nil { + log.Error(err, fmt.Sprintf("[WithPodCheck] error processing pod %s/%s", pod.Namespace, pod.Name)) + result := &ExtenderFilterResult{NodeNames: inputData.NodeNames} + if err := json.NewEncoder(w).Encode(result); err != nil { + log.Error(err, "[WithPodCheck] unable to decode request") + http.Error(w, "unable to decode request", http.StatusBadRequest) + return + } + return + } + + log.Trace(fmt.Sprintf("[WithPodCheck] pod %s/%s is eligible, matched volumes: %+v", pod.Namespace, pod.Name, volumes)) + next.ServeHTTP(w, r) + }) +} diff --git a/images/sds-common-scheduler-extender/pkg/scheduler/prioritize.go b/images/sds-common-scheduler-extender/pkg/scheduler/prioritize.go new file mode 100644 index 000000000..7ebb76d0e --- /dev/null +++ b/images/sds-common-scheduler-extender/pkg/scheduler/prioritize.go @@ -0,0 +1,211 @@ +package scheduler + +import ( + "errors" + "fmt" + "sync" + + srv "github.com/deckhouse/sds-replicated-volume/api/v1alpha1" + srv2 "github.com/deckhouse/sds-replicated-volume/api/v1alpha2" + + corev1 "k8s.io/api/core/v1" + v1 "k8s.io/api/core/v1" +) + +func (s *scheduler) Prioritize(inputData ExtenderArgs) ([]HostPriority, error) { + nodeNames, err := getNodeNames(inputData, s.log) + if err != nil { + return nil, fmt.Errorf("unable to get node names: %w", err) + } + + s.log.Debug(fmt.Sprintf("[prioritize] prioritizing for Pod %s/%s", inputData.Pod.Namespace, inputData.Pod.Name)) + s.log.Trace(fmt.Sprintf("[prioritize] Pod: %+v, Nodes: %+v", inputData.Pod, nodeNames)) + + input, err := s.collectPrioritizeInput(inputData.Pod, nodeNames) + if err != nil { + return nil, err + } + + return s.scoreNodes(input) +} + +// collectPrioritizeInput gathers all necessary data for prioritization. +func (s *scheduler) collectPrioritizeInput(pod *v1.Pod, nodeNames []string) (*PrioritizeInput, error) { + pvcs, err := getPodRelatedPVCs(s.ctx, s.client, s.log, pod) + if err != nil { + return nil, fmt.Errorf("unable to get PVCs for Pod %s/%s: %w", pod.Name, pod.Namespace, err) + } + if len(pvcs) == 0 { + return nil, errors.New("no PVCs found for Pod") + } + + scs, err := getStorageClassesUsedByPVCs(s.ctx, s.client, pvcs) + if err != nil { + return nil, fmt.Errorf("unable to get StorageClasses: %w", err) + } + + replicatedPVCs, localPVCs := filterPVCsByProvisioner(s.log, pvcs, scs) + if len(replicatedPVCs) == 0 && len(localPVCs) == 0 { + s.log.Warning(fmt.Sprintf("[filter] Pod %s/%s uses unmanaged PVCs. replicatedPVCs length %d, localPVCs length %d", pod.Namespace, pod.Name, len(replicatedPVCs), len(localPVCs))) + return nil, errors.New("no managed PVCs found") + } + + replicatedAndLocalPVCs := make(map[string]*corev1.PersistentVolumeClaim, len(replicatedPVCs)+len(localPVCs)) + for name, pvc := range replicatedPVCs { + replicatedAndLocalPVCs[name] = pvc + } + for name, pvc := range replicatedPVCs { + localPVCs[name] = pvc + } + + pvMap, err := getPersistentVolumes(s.ctx, s.client, s.log) + if err != nil { + return nil, fmt.Errorf("unable to get PersistentVolumes: %w", err) + } + + pvcRequests, err := extractRequestedSize(s.log, replicatedAndLocalPVCs, scs, pvMap) + if err != nil { + return nil, fmt.Errorf("unable to extract PVC request sizes: %w", err) + } + + storagePoolList := &srv.ReplicatedStoragePoolList{} + if err := s.client.List(s.ctx, storagePoolList); err != nil { + return nil, fmt.Errorf("unable to list replicated storage pools: %w", err) + } + storagePoolMap := make(map[string]*srv.ReplicatedStoragePool, len(storagePoolList.Items)) + for _, storagePool := range storagePoolList.Items { + storagePoolMap[storagePool.Name] = &storagePool + } + + drbdReplicaList, err := getDRBDReplicaList(s.ctx, s.client) + if err != nil { + return nil, fmt.Errorf("unable to list DRBD replicas: %w", err) + } + + drbdReplicaMap := make(map[string]*srv2.DRBDResourceReplica, len(drbdReplicaList.Items)) + for _, replica := range drbdReplicaList.Items { + drbdReplicaMap[replica.Name] = &replica + } + + res := &PrioritizeInput{ + Pod: pod, + NodeNames: nodeNames, + ReplicatedProvisionPVCs: replicatedPVCs, + LocalProvisionPVCs: localPVCs, + StorageClasses: scs, + PVCRequests: pvcRequests, + StoragePoolMap: storagePoolMap, + DefaultDivisor: s.defaultDivisor, + DRBDResourceReplicaMap: drbdReplicaMap, + } + // b, _ := json.MarshalIndent(res, "", " ") + // s.log.Trace(fmt.Sprintf("[collectPrioritizeInput] PrioritizeInput: %+v", string(b))) + return res, nil +} + +// scoreNodes prioritizes nodes based on storage criteria. +func (s *scheduler) scoreNodes(input *PrioritizeInput) ([]HostPriority, error) { + s.log.Debug("[scoreNodes] prioritizing nodes", "nodes", input.NodeNames) + + lvgInfo, err := collectLVGScoreInfo(s, input.StorageClasses) + if err != nil { + return nil, fmt.Errorf("unable to collect LVG info: %w", err) + } + + return s.scoreNodesParallel(input, lvgInfo) +} + +func (s *scheduler) scoreNodesParallel(input *PrioritizeInput, lvgInfo *LVGScoreInfo) ([]HostPriority, error) { + result := make([]HostPriority, 0, len(input.NodeNames)) + resultCh := make(chan HostPriority, len(input.NodeNames)) + var wg sync.WaitGroup + wg.Add(len(input.NodeNames)) + + for _, nodeName := range input.NodeNames { + go func(nodeName string) { + defer wg.Done() + score := s.scoreSingleNode(input, lvgInfo, nodeName) + resultCh <- HostPriority{Host: nodeName, Score: score} + }(nodeName) + } + + go func() { + wg.Wait() + close(resultCh) + }() + + for score := range resultCh { + result = append(result, score) + } + + s.log.Debug("[scoreNodes] scored nodes", "results", result) + return result, nil +} + +func (s *scheduler) scoreSingleNode(input *PrioritizeInput, lvgInfo *LVGScoreInfo, nodeName string) int { + s.log.Debug(fmt.Sprintf("[scoreSingleNode] scoring node %s", nodeName)) + + lvgsFromNode := lvgInfo.NodeToLVGs[nodeName] + s.log.Trace(fmt.Sprintf("[scoreSingleNode] LVMVolumeGroups from node %s: %+v", nodeName, lvgsFromNode)) + var totalFreeSpaceLeftPercent int64 + nodeScore := 0 + + PVCs := make(map[string]*corev1.PersistentVolumeClaim, len(input.LocalProvisionPVCs)+len(input.ReplicatedProvisionPVCs)) + for name, pvc := range input.LocalProvisionPVCs { + PVCs[name] = pvc + } + for name, pvc := range input.ReplicatedProvisionPVCs { + PVCs[name] = pvc + } + + for _, pvc := range PVCs { + replica := input.DRBDResourceReplicaMap[pvc.Spec.VolumeName] + s.log.Info(fmt.Sprintf("[scoreSingleNode] pvc: %+v", pvc)) + s.log.Info(fmt.Sprintf("[scoreSingleNode] replica: %+v", replica)) + s.log.Info(fmt.Sprintf("[scoreSingleNode] node Name %s", nodeName)) + peer := replica.Spec.Peers[nodeName] + if peer.Diskless { + s.log.Info(fmt.Sprintf("[scoreSingleNode] node %s is diskless for pvc %s, returning 0 score points", nodeName, pvc.Name)) + return 0 + } + + pvcReq := input.PVCRequests[pvc.Name] + s.log.Trace(fmt.Sprintf("[scoreSingleNode] pvc %s size request: %+v", pvc.Name, pvcReq)) + + lvgsFromSC := lvgInfo.SCLVGs[*pvc.Spec.StorageClassName] + s.log.Trace(fmt.Sprintf("[scoreSingleNode] LVMVolumeGroups %+v from SC: %s", lvgsFromSC, *pvc.Spec.StorageClassName)) + commonLVG := findMatchedLVGs(lvgsFromNode, lvgsFromSC) + s.log.Trace(fmt.Sprintf("[scoreSingleNode] Common LVMVolumeGroup %+v of node %s and SC %s", commonLVG, nodeName, *pvc.Spec.StorageClassName)) + + if commonLVG == nil { + s.log.Warning(fmt.Sprintf("[scoreSingleNode] unable to match Storage Class's LVMVolumeGroup with node %s for Storage Class %s", nodeName, *pvc.Spec.StorageClassName)) + continue + } + + nodeScore += 10 + lvg := lvgInfo.LVGs[commonLVG.Name] + s.log.Trace(fmt.Sprintf("[scoreSingleNode] LVMVolumeGroup %s data: %+v", lvg.Name, lvg)) + + freeSpace, err := calculateFreeSpace(lvg, s.cacheMgr, &pvcReq, commonLVG, s.log, pvc, nodeName) + if err != nil { + s.log.Error(err, fmt.Sprintf("[scoreSingleNode] unable to calculate free space for LVMVolumeGroup %s, PVC: %s, node: %s", lvg.Name, pvc.Name, nodeName)) + continue + } + s.log.Trace(fmt.Sprintf("[scoreSingleNode] LVMVolumeGroup %s freeSpace: %s", lvg.Name, freeSpace.String())) + s.log.Trace(fmt.Sprintf("[scoreSingleNode] LVMVolumeGroup %s total size: %s", lvg.Name, lvg.Status.VGSize.String())) + totalFreeSpaceLeftPercent += getFreeSpaceLeftAsPercent(freeSpace.Value(), pvcReq.RequestedSize, lvg.Status.VGSize.Value()) + + s.log.Trace(fmt.Sprintf("[scoreSingleNode] totalFreeSpaceLeftPercent: %d", totalFreeSpaceLeftPercent)) + } + + averageFreeSpace := int64(0) + if len(PVCs) > 0 { + averageFreeSpace = totalFreeSpaceLeftPercent / int64(len(PVCs)) + } + s.log.Trace(fmt.Sprintf("[scoreNodes] average free space left for node %s: %d%%", nodeName, averageFreeSpace)) + + nodeScore += getNodeScore(averageFreeSpace, 1/input.DefaultDivisor) + s.log.Trace(fmt.Sprintf("[scoreNodes] node %s has score %d with average free space left %d%%", nodeName, nodeScore, averageFreeSpace)) + + return nodeScore +} diff --git a/images/sds-common-scheduler-extender/pkg/scheduler/prioritize_doc.md b/images/sds-common-scheduler-extender/pkg/scheduler/prioritize_doc.md new file mode 100644 index 000000000..e69de29bb diff --git a/images/sds-common-scheduler-extender/pkg/scheduler/prioritize_test.go b/images/sds-common-scheduler-extender/pkg/scheduler/prioritize_test.go new file mode 100644 index 000000000..fb934bd8c --- /dev/null +++ b/images/sds-common-scheduler-extender/pkg/scheduler/prioritize_test.go @@ -0,0 +1,194 @@ +package scheduler + +// import ( +// "testing" + +// c "github.com/deckhouse/sds-node-configurator/images/sds-common-scheduler-extender/pkg/cache" +// "github.com/deckhouse/sds-node-configurator/images/sds-common-scheduler-extender/pkg/consts" +// "github.com/deckhouse/sds-node-configurator/images/sds-common-scheduler-extender/pkg/logger" + +// snc "github.com/deckhouse/sds-node-configurator/api/v1alpha1" +// v1 "k8s.io/api/core/v1" +// storagev1 "k8s.io/api/storage/v1" +// "k8s.io/apimachinery/pkg/api/resource" +// metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" +// ) + +// var ( +// storageClassNameOne string = "storage-class-1" +// storageClassNameTwo string = "storage-class-2" +// ) + +// const ( +// node1 string = "node-1" +// node2 string = "node-2" +// node3 string = "node-3" +// ) + +// func mockLVG(lvgName, nodeName, lvgFreeSize string) *snc.LVMVolumeGroup { +// return &snc.LVMVolumeGroup{ +// ObjectMeta: metav1.ObjectMeta{Name: lvgName}, +// Spec: snc.LVMVolumeGroupSpec{ +// Local: snc.LVMVolumeGroupLocalSpec{ +// NodeName: nodeName, +// }, +// }, +// Status: snc.LVMVolumeGroupStatus{ +// Nodes: []snc.LVMVolumeGroupNode{ +// { +// Name: nodeName, +// }, +// }, +// VGFree: resource.MustParse(lvgFreeSize), +// }, +// } +// } + +// func mockPVC(pvcName, requestedSize string) *v1.PersistentVolumeClaim { +// return &v1.PersistentVolumeClaim{ +// ObjectMeta: metav1.ObjectMeta{Name: pvcName}, +// Spec: v1.PersistentVolumeClaimSpec{ +// StorageClassName: &storageClassNameOne, +// Resources: v1.VolumeResourceRequirements{ +// Requests: v1.ResourceList{ +// v1.ResourceStorage: resource.MustParse(requestedSize), +// }, +// }, +// }, +// } +// } + +// func TestScoreNodes(t *testing.T) { +// log := logger.Logger{} + +// // cacheMgr := c.CacheManager{cache: &c.Cache{}} +// cacheMgr := c.NewCacheManager(&c.Cache{}, nil, &log) +// lvgCache := []*c.LvgCache{ +// { +// Lvg: mockLVG("lvg-1", node1, "2Gi"), +// }, +// { +// Lvg: mockLVG("lvg-2", node2, "1Gi"), +// ThickPVCs: map[string]*c.pvcCache{}, +// ThinPools: map[string]map[string]*c.pvcCache{}, +// }, +// { +// Lvg: mockLVG("lvg-3", node2, "1Gi"), +// }, +// } + +// for _, lvgC := range lvgCache { +// cacheMgr.AddLVG(lvgC.Lvg) +// } +// cacheMgr.AddLVGToPVC("lvg-1", "pvc-1") +// cacheMgr.AddLVGToPVC("lvg-2", "pvc-2") + +// pvcRequests := map[string]PVCRequest{ +// "pvc-1": { +// DeviceType: consts.Thick, +// RequestedSize: 1073741824, // 1Gb +// }, +// "pvc-2": { +// DeviceType: consts.Thin, +// RequestedSize: 524288000, // 500mb +// }, +// } + +// // Do not change intendation here or else these LVGs will not be parsed +// mockLVGYamlOne := `- name: lvg-1 +// Thin: +// poolName: pool1 +// - name: lvg-2 +// Thin: +// poolName: pool2` + +// mockLVGYamlTwo := `- name: lvg-3 +// Thin: +// poolName: pool3` + +// scs := map[string]*storagev1.StorageClass{ +// storageClassNameOne: { +// ObjectMeta: metav1.ObjectMeta{ +// Name: storageClassNameOne, +// }, +// Provisioner: "replicated.csi.storage.deckhouse.io", +// Parameters: map[string]string{ +// consts.LVMVolumeGroupsParamKey: mockLVGYamlOne, +// }, +// }, +// storageClassNameTwo: { +// ObjectMeta: metav1.ObjectMeta{ +// Name: storageClassNameTwo, +// }, +// Provisioner: "replicated.csi.storage.deckhouse.io", +// Parameters: map[string]string{ +// consts.LVMVolumeGroupsParamKey: mockLVGYamlTwo, +// }, +// }, +// } + +// pvcs := map[string]*v1.PersistentVolumeClaim{ +// "pvc-1": { +// ObjectMeta: metav1.ObjectMeta{ +// Name: "pvc-1", +// }, +// Spec: v1.PersistentVolumeClaimSpec{ +// StorageClassName: &storageClassNameOne, +// Resources: v1.VolumeResourceRequirements{ +// Requests: v1.ResourceList{ +// v1.ResourceStorage: resource.MustParse("1Gi"), +// }, +// }, +// }, +// }, +// "pvc-2": { +// ObjectMeta: metav1.ObjectMeta{ +// Name: "pvc-2", +// }, +// Spec: v1.PersistentVolumeClaimSpec{ +// StorageClassName: &storageClassNameTwo, +// Resources: v1.VolumeResourceRequirements{ +// Requests: v1.ResourceList{ +// v1.ResourceStorage: resource.MustParse("500Mi"), +// }, +// }, +// }, +// }, +// } + +// tests := []struct { +// testName string +// nodeNames []string +// pvcs map[string]*v1.PersistentVolumeClaim +// expect map[string]int +// }{ +// { +// testName: "Test Case #1", +// nodeNames: []string{node1}, +// pvcs: pvcs, +// expect: map[string]int{node1: 11}, +// }, +// { +// testName: "Test Case #2", +// nodeNames: []string{node2}, +// pvcs: pvcs, +// expect: map[string]int{node2: 3}, +// }, +// } + +// for _, tt := range tests { +// t.Run(tt.testName, func(t *testing.T) { +// score, err := scoreNodes(log, cacheMgr, &tt.nodeNames, tt.pvcs, scs, pvcRequests, 1) +// if err != nil { +// t.Error(err) +// } +// t.Logf("Node score: %v", score) + +// for _, res := range score { +// if tt.expect[res.Host] != res.Score { +// t.Errorf("Expected score for node %s to be %d, got %d", res.Host, tt.expect[res.Host], res.Score) +// } +// } +// }) +// } +// } diff --git a/images/sds-common-scheduler-extender/pkg/scheduler/sheduler.go b/images/sds-common-scheduler-extender/pkg/scheduler/sheduler.go new file mode 100644 index 000000000..59dbf121a --- /dev/null +++ b/images/sds-common-scheduler-extender/pkg/scheduler/sheduler.go @@ -0,0 +1,28 @@ +package scheduler + +import ( + "context" + + "github.com/deckhouse/sds-node-configurator/images/sds-common-scheduler-extender/pkg/cache" + "github.com/deckhouse/sds-node-configurator/images/sds-common-scheduler-extender/pkg/logger" + + "sigs.k8s.io/controller-runtime/pkg/client" +) + +type scheduler struct { + ctx context.Context + log *logger.Logger + client client.Client + cacheMgr *cache.CacheManager + defaultDivisor float64 +} + +func NewScheduler(ctx context.Context, cl client.Client, log *logger.Logger, cacheMgr *cache.CacheManager, defaultDiv float64) *scheduler { + return &scheduler{ + defaultDivisor: defaultDiv, + log: log, + client: cl, + ctx: ctx, + cacheMgr: cacheMgr, + } +} diff --git a/images/sds-common-scheduler-extender/pkg/scheduler/types.go b/images/sds-common-scheduler-extender/pkg/scheduler/types.go new file mode 100644 index 000000000..c4e184fc0 --- /dev/null +++ b/images/sds-common-scheduler-extender/pkg/scheduler/types.go @@ -0,0 +1,117 @@ +/* +Copyright 2024 Flant JSC + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package scheduler + +import ( + slv "github.com/deckhouse/sds-local-volume/api/v1alpha1" + snc "github.com/deckhouse/sds-node-configurator/api/v1alpha1" + srv "github.com/deckhouse/sds-replicated-volume/api/v1alpha1" + srv2 "github.com/deckhouse/sds-replicated-volume/api/v1alpha2" + apiv1 "k8s.io/api/core/v1" + v1 "k8s.io/api/core/v1" + storagev1 "k8s.io/api/storage/v1" +) + +// ExtenderArgs is copied from https://godoc.org/k8s.io/kubernetes/pkg/scheduler/api/v1#ExtenderArgs +type ExtenderArgs struct { + // Pod being scheduled + Pod *apiv1.Pod `json:"pod"` + // List of candidate nodes where the pod can be scheduled; to be populated + // only if ExtenderConfig.NodeCacheCapable == false + Nodes *apiv1.NodeList `json:"nodes,omitempty"` + // List of candidate node names where the pod can be scheduled; to be + // populated only if ExtenderConfig.NodeCacheCapable == true + NodeNames *[]string `json:"nodenames,omitempty"` +} + +// ExtenderFilterResult is copied from https://godoc.org/k8s.io/kubernetes/pkg/scheduler/api/v1#ExtenderFilterResult +type ExtenderFilterResult struct { + // Filtered set of nodes where the pod can be scheduled; to be populated + // only if ExtenderConfig.NodeCacheCapable == false + Nodes *apiv1.NodeList `json:"nodes,omitempty"` + // Filtered set of nodes where the pod can be scheduled; to be populated + // only if ExtenderConfig.NodeCacheCapable == true + NodeNames *[]string `json:"nodenames,omitempty"` + // Filtered out nodes where the pod can't be scheduled and the failure messages + FailedNodes map[string]string `json:"failedNodes,omitempty"` + // Error message indicating failure + Error string `json:"error,omitempty"` +} + +// HostPriority is copied from https://godoc.org/k8s.io/kubernetes/pkg/scheduler/api/v1#HostPriority +type HostPriority struct { + // Name of the host + Host string `json:"host"` + // Score associated with the host + Score int `json:"score"` +} + +// FilterInput holds input data for filtering nodes +type FilterInput struct { + Pod *v1.Pod + NodeNames []string + ReplicatedProvisionPVCs map[string]*v1.PersistentVolumeClaim + LocalProvisionPVCs map[string]*v1.PersistentVolumeClaim + SCSUsedByPodPVCs map[string]*storagev1.StorageClass + PVCSizeRequests map[string]PVCRequest + ReplicatedSCSUsedByPodPVCs map[string]*srv.ReplicatedStorageClass + LocalSCSUsedByPodPVCs map[string]*slv.LocalStorageClass + // DRBDResourceMap map[string]*srv.DRBDResource + DRBDNodesMap map[string]struct{} + DRBDResourceReplicaMap map[string]*srv2.DRBDResourceReplica +} + +// LVGInfo holds LVMVolumeGroup-related data +type LVGInfo struct { + ThickFreeSpaces map[string]int64 + ThinFreeSpaces map[string]map[string]int64 + NodeToLVGs map[string][]*snc.LVMVolumeGroup + SCLVGs map[string][]LVMVolumeGroup +} + +// ResultWithError holds the result of filtering a single node +type ResultWithError struct { + NodeName string + Err error +} + +// PrioritizeInput holds input data for prioritizing nodes +type PrioritizeInput struct { + Pod *v1.Pod + NodeNames []string + ReplicatedProvisionPVCs map[string]*v1.PersistentVolumeClaim + LocalProvisionPVCs map[string]*v1.PersistentVolumeClaim + StorageClasses map[string]*storagev1.StorageClass + PVCRequests map[string]PVCRequest + StoragePoolMap map[string]*srv.ReplicatedStoragePool + DefaultDivisor float64 + DRBDResourceReplicaMap map[string]*srv2.DRBDResourceReplica +} + +// LVGScoreInfo holds LVMVolumeGroup-related data for scoring +type LVGScoreInfo struct { + NodeToLVGs map[string][]*snc.LVMVolumeGroup + SCLVGs map[string][]LVMVolumeGroup + LVGs map[string]*snc.LVMVolumeGroup +} + +type LVMVolumeGroup struct { + Name string `yaml:"name"` + Thin struct { + PoolName string `yaml:"poolName"` + } `yaml:"Thin"` +} diff --git a/images/sds-common-scheduler-extender/pkg/test.go b/images/sds-common-scheduler-extender/pkg/test.go new file mode 100644 index 000000000..682ff1693 --- /dev/null +++ b/images/sds-common-scheduler-extender/pkg/test.go @@ -0,0 +1,238 @@ +package main + +import ( + "context" + "crypto/md5" + "encoding/hex" + "fmt" + "math/rand" + "slices" + "strings" + "sync" + "time" +) + +const ( + LeaderboardTopSize = 20 + AdderWorkerNum = 5 + AdderMaxItemsPerPeriod = 1000 + AdderDelay = 10 * time.Millisecond + ClearDelay = 60 * time.Second + PrinterDelay = 100 * time.Millisecond +) + +func main() { + lb := NewLeaderboard() + + // ADDERS + for range AdderWorkerNum { + go func() { + for { + time.Sleep(AdderDelay) + // add some random amount of random candidates + for range rand.Intn(AdderMaxItemsPerPeriod) { + lb.AddCandidate(randomName()) + } + } + }() + } + + // CLEARER + go func() { + for { + time.Sleep(ClearDelay) + lb.Clear() + } + }() + + // PRINTER + prev := summary{} + maxWaited := time.Duration(0) + immediateReads := 0 + timeoutedReads := 0 + + for { + time.Sleep(PrinterDelay) + + ctx, cancel := context.WithTimeout(context.Background(), PrinterDelay) + + next := lb.ReadOrWait(ctx, prev.Version) // blocks until next update + + if ctx.Err() != nil { + // timeout + timeoutedReads++ + } else { + cancel() + } + + if next.Waited < 0 { + immediateReads++ + } else { + maxWaited = max(maxWaited, next.Waited) + } + + fmt.Print("\033[H\033[2J") // clear screen + fmt.Println(time.Now().Format(time.TimeOnly)) + fmt.Printf( + "ImmediateReads: %d; TimeoutedReads: %d; MaxWaited: %v; LastWait: %v\n", + immediateReads, timeoutedReads, maxWaited, next.Waited, + ) + fmt.Print(next.String()) + + if next.Lifetime != prev.Lifetime { + // clear has happened, reset stats + maxWaited = time.Duration(0) + immediateReads = 0 + timeoutedReads = 0 + } + + prev = next + } +} + +type leaderboard struct { + mu *sync.Mutex + cond *sync.Cond + // Why lifetime is needed, why not just zero the version? + // Simply zeroing the version may be insufficient for + // [leaderboard.ReadOrWait] to detect reset, because "Signal() does not + // affect goroutine scheduling priority", and [leaderboard.AddCandidate] + // may be awoken before a [leaderboard.ReadOrWait] goroutine. It will + // increment the zeroed version, and [leaderboard.ReadOrWait] may never + // detect the reset. + lifetime, version int + board []candidate +} + +func NewLeaderboard() *leaderboard { + lb := &leaderboard{} + lb.mu = &sync.Mutex{} + lb.cond = sync.NewCond(lb.mu) + return lb +} + +// Reads summary or blocks calling goroutine, if lastVersion is the same as +// current version, i.e. no changes were made since last read. +// Unblocks as soon as any change is made, or context canceled. +// Duration of wait is returned in summary. It will be -1 if there was no wait. +func (s *leaderboard) ReadOrWait(ctx context.Context, lastVersion int) summary { + s.mu.Lock() + defer s.mu.Unlock() + + if s.version != lastVersion { + // read immediately + return s.summary(-1) + } + + // If ctx canceled at the same time as Wait unblocks, + // there's a minor chance of a race, when we will be signaling to the next + // iteration of [leaderboard.ReadOrWait]. + // "awakenerDone" protects against this, because this iteration won't end, + // until awakener is done. + awakenerDone := make(chan struct{}) + defer func() { + <-awakenerDone + }() + + // childCtx, cancel := context.WithCancel(ctx) + // defer cancel() + + // "awakener" will awake us, when parent context will be canceled + go func() { + <-ctx.Done() + s.cond.Signal() + awakenerDone <- struct{}{} + }() + + // wait + start := time.Now() + s.cond.Wait() + + return s.summary(time.Since(start)) +} + +// Attempts to add candidate's name to the leaderboard. +// If candidate's result is worse then the worst existing candidate, +// there will be no change. +// The probability of the change shrinks with more calls, +// until Clear is called. +func (s *leaderboard) AddCandidate(name string) { + s.mu.Lock() + defer s.mu.Unlock() + + compareCandidates := func(a candidate, b candidate) int { + return strings.Compare(a.Result, b.Result) + } + + cand := NewCandidate(name) + if len(s.board) == LeaderboardTopSize && compareCandidates(cand, s.board[len(s.board)-1]) > 0 { + // skip this looser + return + } + + // signal is required as soon as we know we will be changing state + defer s.cond.Signal() + + s.board = append(s.board, cand) + slices.SortStableFunc(s.board, compareCandidates) + s.board = s.board[:min(LeaderboardTopSize, len(s.board))] + s.version++ +} + +func (s *leaderboard) Clear() { + s.mu.Lock() + defer s.mu.Unlock() + defer s.cond.Signal() + + s.board = nil + s.version = 0 + s.lifetime++ +} + +func (s *leaderboard) summary(waited time.Duration) summary { + return summary{ + Version: s.version, + Lifetime: s.lifetime, + Waited: waited, + Top: slices.Clone(s.board[0:min(LeaderboardTopSize, len(s.board))]), + } +} + +type candidate struct { + Name string + Result string +} + +func NewCandidate(name string) candidate { + hash := md5.Sum([]byte(name)) + return candidate{ + Name: name, + Result: hex.EncodeToString(hash[:]), + } +} + +type summary struct { + Lifetime, Version int + Waited time.Duration + Top []candidate +} + +func (s summary) String() string { + sb := &strings.Builder{} + sb.WriteString(fmt.Sprintf("Lifetime: %d; Version: %d\n\n", s.Lifetime, s.Version)) + for i, c := range s.Top { + sb.WriteString(fmt.Sprintf("\t%d) %s <= %s\n", i+1, c.Result, c.Name)) + } + + return sb.String() +} + +func randomName() string { + nameLen := 2 + rand.Intn(14) + name := make([]byte, nameLen) + name[0] = byte(rand.Intn('Z'-'A')) + 'A' + for i := 1; i < nameLen; i++ { + name[i] = byte(rand.Intn('z'-'a')) + 'a' + } + return string(name) +} diff --git a/images/sds-common-scheduler-extender/werf.inc.yaml b/images/sds-common-scheduler-extender/werf.inc.yaml new file mode 100644 index 000000000..5306d6fc9 --- /dev/null +++ b/images/sds-common-scheduler-extender/werf.inc.yaml @@ -0,0 +1,83 @@ +{{- $csiBinaries := "/usr/bin/curl" }} +--- +# Required for external analytics. Do not remove! +image: {{ $.ImageName }}-src-artifact +fromImage: builder/src +final: false + +git: + - add: / + to: /src + includePaths: + - api + - images/{{ $.ImageName }} + stageDependencies: + install: + - '**/*' + excludePaths: + - images/{{ $.ImageName }}/werf.yaml + +shell: + install: + - echo "src artifact" + +--- +image: {{ $.ImageName }}-golang-artifact +fromImage: builder/golang-alpine +fromCacheVersion: 2025-04-30-1 +final: false + +import: + - image: {{ $.ImageName }}-src-artifact + add: /src + to: /src + before: install + +mount: + - fromPath: ~/go-pkg-cache + to: /go/pkg + +shell: + setup: + - cd /src/images/{{ $.ImageName }}/cmd + - GOOS=linux GOARCH=amd64 CGO_ENABLED=0 go build -ldflags="-s -w" -o /{{ $.ImageName }} + - chmod +x /{{ $.ImageName }} + +--- +image: {{ $.ImageName }}-binaries-artifact +fromImage: builder/alt +final: false + +git: + - add: /tools/dev_images/additional_tools/binary_replace.sh + to: /binary_replace.sh + stageDependencies: + install: + - '**/*' + +shell: + install: + - apt-get update + - apt-get -y install glibc-utils curl + - {{ $.Root.ALT_CLEANUP_CMD }} + - chmod +x /binary_replace.sh + - /binary_replace.sh -i "{{ $csiBinaries }}" -o /relocate + +--- +image: {{ $.ImageName }} +fromImage: base/distroless + +import: + - image: {{ $.ImageName }}-golang-artifact + add: /{{ $.ImageName }} + to: /{{ $.ImageName }} + before: install + - image: {{ $.ImageName }}-binaries-artifact + add: /relocate + to: / + before: install + includePaths: + - '**/*' +docker: + ENTRYPOINT: ["/{{ $.ImageName }}"] + USER: deckhouse:deckhouse diff --git a/images/webhooks/werf.inc.yaml b/images/webhooks/werf.inc.yaml index 2cc3ca8e0..9bab64296 100644 --- a/images/webhooks/werf.inc.yaml +++ b/images/webhooks/werf.inc.yaml @@ -1,10 +1,10 @@ --- -# do not remove this image: used in external audits (DKP CSE) -image: {{ $.ImageName }}-src-artifact +image: {{ .ModuleNamePrefix }}{{ .ImageName }}-src-artifact fromImage: builder/src final: false + git: - - add: / + - add: {{ .ModuleDir }} to: /src includePaths: - api @@ -18,35 +18,43 @@ git: shell: install: - - rm -rf /src/.git + - echo "src artifact" --- -image: {{ $.ImageName }}-golang-artifact +image: {{ .ModuleNamePrefix }}{{ .ImageName }}-golang-artifact fromImage: builder/golang-alpine final: false + import: - - image: {{ $.ImageName }}-src-artifact + - image: {{ .ModuleNamePrefix }}{{ .ImageName }}-src-artifact add: /src to: /src - before: setup + before: install + mount: - - fromPath: ~/go-pkg-cache - to: /go/pkg +{{ include "mount points for golang builds" . }} + +secrets: +- id: GOPROXY + value: {{ .GOPROXY }} + shell: setup: - cd /src/images/{{ $.ImageName }}/cmd - - export CGO_ENABLED=0 GOOS=linux GOARCH=amd64 - - go build -ldflags="-s -w" -tags {{ $.Root.MODULE_EDITION }} -o /{{ $.ImageName }} + - GOPROXY=$(cat /run/secrets/GOPROXY) go mod download + - GOOS=linux GOARCH=amd64 CGO_ENABLED=0 go build -ldflags="-s -w" -tags {{ .MODULE_EDITION }} -o /{{ $.ImageName }} - chmod +x /{{ $.ImageName }} --- -image: {{ $.ImageName }} +image: {{ .ModuleNamePrefix }}{{ .ImageName }} fromImage: base/distroless + import: - - image: {{ $.ImageName }}-golang-artifact - add: /webhooks - to: /webhooks - before: setup -docker: - ENTRYPOINT: ["/{{ $.ImageName }}"] - USER: deckhouse:deckhouse + - image: {{ .ModuleNamePrefix }}{{ .ImageName }}-golang-artifact + add: /{{ $.ImageName }} + to: /{{ $.ImageName }} + before: install + +imageSpec: + config: + entrypoint: ["/{{ $.ImageName }}"] diff --git a/openapi/values_ce.yaml b/openapi/values_ce.yaml index d1d8cb40d..c6d3c8696 100644 --- a/openapi/values_ce.yaml +++ b/openapi/values_ce.yaml @@ -28,6 +28,23 @@ properties: ca: type: string x-examples: ["YjY0ZW5jX3N0cmluZwo="] + customSchedulerExtenderCert: + type: object + default: {} + x-required-for-helm: + - crt + - key + - ca + properties: + crt: + type: string + x-examples: ["YjY0ZW5jX3N0cmluZwo="] + key: + type: string + x-examples: ["YjY0ZW5jX3N0cmluZwo="] + ca: + type: string + x-examples: ["YjY0ZW5jX3N0cmluZwo="] registry: type: object description: "System field, overwritten by Deckhouse. Don't use" diff --git a/openapi/values_ee.yaml b/openapi/values_ee.yaml deleted file mode 100644 index d1d8cb40d..000000000 --- a/openapi/values_ee.yaml +++ /dev/null @@ -1,33 +0,0 @@ -x-extend: - schema: config-values.yaml -type: object -properties: - internal: - type: object - default: {} - properties: - pythonVersions: - type: array - default: [] - items: - type: string - customWebhookCert: - type: object - default: {} - x-required-for-helm: - - crt - - key - - ca - properties: - crt: - type: string - x-examples: ["YjY0ZW5jX3N0cmluZwo="] - key: - type: string - x-examples: ["YjY0ZW5jX3N0cmluZwo="] - ca: - type: string - x-examples: ["YjY0ZW5jX3N0cmluZwo="] - registry: - type: object - description: "System field, overwritten by Deckhouse. Don't use" diff --git a/templates/sds-common-scheduler-extender/configmap.yaml b/templates/sds-common-scheduler-extender/configmap.yaml new file mode 100644 index 000000000..f01938ab9 --- /dev/null +++ b/templates/sds-common-scheduler-extender/configmap.yaml @@ -0,0 +1,26 @@ +{{- if or (or (hasPrefix "dev" .Values.global.deckhouseVersion) (hasSuffix "dev" .Values.global.deckhouseVersion)) (semverCompare ">=1.64" .Values.global.deckhouseVersion) }} +--- +apiVersion: v1 +kind: ConfigMap +metadata: + name: sds-common-scheduler-extender + namespace: d8-{{ .Chart.Name }} + {{- include "helm_lib_module_labels" (list . (dict "app" "sds-common-scheduler-extender")) | nindent 2 }} +data: + scheduler-extender-config.yaml: |- + listen: ":8099" + health-probe-bind-address: ":8081" + default-divisor: 1 +{{- if eq .Values.sdsNodeConfigurator.logLevel "ERROR" }} + log-level: "0" +{{- else if eq .Values.sdsNodeConfigurator.logLevel "WARN" }} + log-level: "1" +{{- else if eq .Values.sdsNodeConfigurator.logLevel "INFO" }} + log-level: "2" +{{- else if eq .Values.sdsNodeConfigurator.logLevel "DEBUG" }} + log-level: "3" +{{- else if eq .Values.sdsNodeConfigurator.logLevel "TRACE" }} + log-level: "4" + {{- end }} + +{{- end }} diff --git a/templates/sds-common-scheduler-extender/deployment.yaml b/templates/sds-common-scheduler-extender/deployment.yaml new file mode 100644 index 000000000..ad84f32a0 --- /dev/null +++ b/templates/sds-common-scheduler-extender/deployment.yaml @@ -0,0 +1,122 @@ +{{- define "sds_common_scheduler_extender_resources" }} +cpu: 10m +memory: 25Mi +{{- end }} + +{{- if or (or (hasPrefix "dev" .Values.global.deckhouseVersion) (hasSuffix "dev" .Values.global.deckhouseVersion)) (semverCompare ">=1.64" .Values.global.deckhouseVersion) }} + +{{- if (.Values.global.enabledModules | has "vertical-pod-autoscaler-crd") }} +--- +apiVersion: autoscaling.k8s.io/v1 +kind: VerticalPodAutoscaler +metadata: + name: sds-common-scheduler-extender + namespace: d8-{{ .Chart.Name }} + {{- include "helm_lib_module_labels" (list . (dict "app" "sds-common-scheduler-extender")) | nindent 2 }} +spec: + targetRef: + apiVersion: "apps/v1" + kind: Deployment + name: sds-common-scheduler-extender + updatePolicy: + updateMode: "Auto" + resourcePolicy: + containerPolicies: + - containerName: sds-common-scheduler-extender + minAllowed: + {{- include "sds_common_scheduler_extender_resources" . | nindent 8 }} + maxAllowed: + memory: 40Mi + cpu: 20m +{{- end }} +--- +apiVersion: policy/v1 +kind: PodDisruptionBudget +metadata: + name: sds-common-scheduler-extender + namespace: d8-{{ .Chart.Name }} + {{- include "helm_lib_module_labels" (list . (dict "app" "sds-common-scheduler-extender" )) | nindent 2 }} +spec: + minAvailable: {{ include "helm_lib_is_ha_to_value" (list . 1 0) }} + selector: + matchLabels: + app: sds-common-scheduler-extender +--- +apiVersion: apps/v1 +kind: Deployment +metadata: + name: sds-common-scheduler-extender + namespace: d8-{{ .Chart.Name }} + {{- include "helm_lib_module_labels" (list . (dict "app" "sds-common-scheduler-extender" )) | nindent 2 }} +spec: + {{- include "helm_lib_deployment_strategy_and_replicas_for_ha" . | nindent 2 }} + revisionHistoryLimit: 2 + selector: + matchLabels: + app: sds-common-scheduler-extender + template: + metadata: + annotations: + checksum/ca: {{ .Values.sdsNodeConfigurator.internal.customSchedulerExtenderCert.ca | sha256sum | quote }} + labels: + app: sds-common-scheduler-extender + spec: + {{- include "helm_lib_priority_class" (tuple . "system-cluster-critical") | nindent 6 }} + {{- include "helm_lib_node_selector" (tuple . "system") | nindent 6 }} + {{- include "helm_lib_tolerations" (tuple . "system") | nindent 6 }} + {{- include "helm_lib_module_pod_security_context_run_as_user_nobody" . | nindent 6 }} + {{- include "helm_lib_pod_anti_affinity_for_ha" (list . (dict "app" "sds-common-scheduler-extender")) | nindent 6 }} + imagePullSecrets: + - name: {{ .Chart.Name }}-module-registry + containers: + - name: sds-common-scheduler-extender + {{- include "helm_lib_module_container_security_context_read_only_root_filesystem_capabilities_drop_all" . | nindent 10 }} + image: {{ include "helm_lib_module_image" (list . "sdsCommonSchedulerExtender") }} + imagePullPolicy: IfNotPresent + livenessProbe: + failureThreshold: 8 + httpGet: + path: /healthz + port: 8081 + scheme: HTTP + initialDelaySeconds: 10 + periodSeconds: 10 + timeoutSeconds: 15 + readinessProbe: + httpGet: + path: /readyz + port: 8081 + scheme: HTTP + periodSeconds: 1 + failureThreshold: 3 + args: + - sds-common-scheduler-extender + - --config=/etc/sds-common-scheduler-extender/scheduler-extender-config.yaml + volumeMounts: + - name: scheduler-extender-config + mountPath: /etc/sds-common-scheduler-extender + readOnly: true + - name: scheduler-extender-certs + mountPath: /etc/sds-common-scheduler-extender/certs + readOnly: true + resources: + requests: + {{- include "helm_lib_module_ephemeral_storage_only_logs" . | nindent 14 }} + {{- if not ( .Values.global.enabledModules | has "vertical-pod-autoscaler-crd") }} + {{- include "sds_common_scheduler_extender_resources" . | nindent 14 }} + {{- end }} + ports: + - containerPort: 8099 + protocol: TCP + name: http + volumes: + - name: scheduler-extender-config + configMap: + defaultMode: 420 + name: sds-common-scheduler-extender + - name: scheduler-extender-certs + secret: + secretName: common-scheduler-extender-https-certs + serviceAccountName: sds-common-scheduler-extender + +{{- end }} diff --git a/templates/sds-common-scheduler-extender/kube-scheduler-webhook-configuration.yaml b/templates/sds-common-scheduler-extender/kube-scheduler-webhook-configuration.yaml new file mode 100644 index 000000000..7d165b3cb --- /dev/null +++ b/templates/sds-common-scheduler-extender/kube-scheduler-webhook-configuration.yaml @@ -0,0 +1,18 @@ +{{- if or (or (hasPrefix "dev" .Values.global.deckhouseVersion) (hasSuffix "dev" .Values.global.deckhouseVersion)) (semverCompare ">=1.64" .Values.global.deckhouseVersion) }} +apiVersion: deckhouse.io/v1alpha1 +kind: KubeSchedulerWebhookConfiguration +metadata: + name: d8-{{ .Chart.Name }} + {{- include "helm_lib_module_labels" (list . (dict "app" "sds-common-scheduler-extender")) | nindent 2 }} +webhooks: +- weight: 5 + failurePolicy: Ignore + clientConfig: + service: + name: sds-common-scheduler-extender + namespace: d8-{{ .Chart.Name }} + port: 8099 + path: /scheduler + caBundle: {{ .Values.sdsNodeConfigurator.internal.customSchedulerExtenderCert.ca | b64enc }} + timeoutSeconds: 5 +{{- end }} diff --git a/templates/sds-common-scheduler-extender/rbac-for-us.yaml b/templates/sds-common-scheduler-extender/rbac-for-us.yaml new file mode 100644 index 000000000..eba6daedc --- /dev/null +++ b/templates/sds-common-scheduler-extender/rbac-for-us.yaml @@ -0,0 +1,187 @@ +{{- if or (or (hasPrefix "dev" .Values.global.deckhouseVersion) (hasSuffix "dev" .Values.global.deckhouseVersion)) (semverCompare ">=1.64" .Values.global.deckhouseVersion) }} +--- +apiVersion: v1 +kind: ServiceAccount +metadata: + name: sds-common-scheduler-extender + namespace: d8-{{ .Chart.Name }} + {{- include "helm_lib_module_labels" (list . (dict "app" "sds-common-scheduler-extender")) | nindent 2 }} + +--- +apiVersion: rbac.authorization.k8s.io/v1 +kind: ClusterRoleBinding +metadata: + name: d8:{{ .Chart.Name }}:sds-common-scheduler-extender-volume-scheduler + {{- include "helm_lib_module_labels" (list . (dict "app" "sds-common-scheduler-extender")) | nindent 2 }} +roleRef: + apiGroup: rbac.authorization.k8s.io + kind: ClusterRole + name: system:volume-scheduler +subjects: + - kind: ServiceAccount + name: sds-common-scheduler-extender + namespace: d8-{{ .Chart.Name }} + +--- +apiVersion: rbac.authorization.k8s.io/v1 +kind: ClusterRole +metadata: + name: d8:{{ .Chart.Name }}:sds-common-scheduler-extender + namespace: d8-{{ .Chart.Name }} + {{- include "helm_lib_module_labels" (list . (dict "app" "sds-common-scheduler-extender")) | nindent 2 }} +rules: + - apiGroups: ["coordination.k8s.io"] + resources: ["leases"] + verbs: ["create", "get", "update"] + - apiGroups: [ "storage.deckhouse.io" ] + resources: [ "lvmvolumegroups", "localstorageclasses", "replicatedstorageclasses" ] + verbs: [ "list", "watch", "get"] + - apiGroups: ["v1"] + resources: ["persistentvolumeclaims"] + verbs: ["list", "watch", "get"] +--- +apiVersion: rbac.authorization.k8s.io/v1 +kind: ClusterRoleBinding +metadata: + name: d8:{{ .Chart.Name }}:sds-common-scheduler-extender + namespace: d8-{{ .Chart.Name }} + {{- include "helm_lib_module_labels" (list . (dict "app" "sds-common-scheduler-extender")) | nindent 2 }} +roleRef: + apiGroup: rbac.authorization.k8s.io + kind: ClusterRole + name: d8:{{ .Chart.Name }}:sds-common-scheduler-extender +subjects: + - kind: ServiceAccount + name: sds-common-scheduler-extender + namespace: d8-{{ .Chart.Name }} +{{- end }} +--- +apiVersion: rbac.authorization.k8s.io/v1 +kind: ClusterRole +metadata: + name: d8:{{ .Chart.Name }}:sds-common-scheduler-extender:rsc-reader + namespace: d8-{{ .Chart.Name }} + {{- include "helm_lib_module_labels" (list . (dict "app" "sds-common-scheduler-extender")) | nindent 2 }} +rules: + - apiGroups: ["storage.deckhouse.io"] + resources: ["replicatedstorageclasses", "replicatedstoragepools"] + verbs: ["get", "update", "watch", "list"] +--- +apiVersion: rbac.authorization.k8s.io/v1 +kind: ClusterRoleBinding +metadata: + name: d8:{{ .Chart.Name }}:sds-common-scheduler-extender:rsc-reader + namespace: d8-{{ .Chart.Name }} + {{- include "helm_lib_module_labels" (list . (dict "app" "sds-common-scheduler-extender")) | nindent 2 }} +roleRef: + apiGroup: rbac.authorization.k8s.io + kind: ClusterRole + name: d8:{{ .Chart.Name }}:sds-common-scheduler-extender:rsc-reader +subjects: + - kind: ServiceAccount + name: sds-common-scheduler-extender + namespace: d8-{{ .Chart.Name }} +--- +apiVersion: rbac.authorization.k8s.io/v1 +kind: ClusterRole +metadata: + name: d8:{{ .Chart.Name }}:sds-common-scheduler-extender:configmap-manager + namespace: d8-{{ .Chart.Name }} + {{- include "helm_lib_module_labels" (list . (dict "app" "sds-common-scheduler-extender")) | nindent 2 }} +rules: + - apiGroups: [""] + resources: ["configmaps"] + verbs: ["list", "watch", "get", "create", "update"] +--- +apiVersion: rbac.authorization.k8s.io/v1 +kind: ClusterRoleBinding +metadata: + name: d8:{{ .Chart.Name }}:sds-common-scheduler-extender:configmap-manager + namespace: d8-{{ .Chart.Name }} + {{- include "helm_lib_module_labels" (list . (dict "app" "sds-common-scheduler-extender")) | nindent 2 }} +roleRef: + apiGroup: rbac.authorization.k8s.io + kind: ClusterRole + name: d8:{{ .Chart.Name }}:sds-common-scheduler-extender:configmap-manager +subjects: + - kind: ServiceAccount + name: sds-common-scheduler-extender + namespace: d8-{{ .Chart.Name }} +--- +apiVersion: rbac.authorization.k8s.io/v1 +kind: ClusterRole +metadata: + name: d8:{{ .Chart.Name }}:sds-common-scheduler-extender:layerstoragevolumes-manager + namespace: d8-{{ .Chart.Name }} + {{- include "helm_lib_module_labels" (list . (dict "app" "sds-common-scheduler-extender")) | nindent 2 }} +rules: + - apiGroups: ["internal.linstor.linbit.com"] + resources: ["layerstoragevolumes"] + verbs: ["get", "list", "watch"] +--- +apiVersion: rbac.authorization.k8s.io/v1 +kind: ClusterRoleBinding +metadata: + name: d8:{{ .Chart.Name }}:sds-common-scheduler-extender:layerstoragevolumes-manager + namespace: d8-{{ .Chart.Name }} + {{- include "helm_lib_module_labels" (list . (dict "app" "sds-common-scheduler-extender")) | nindent 2 }} +subjects: + - kind: ServiceAccount + name: sds-common-scheduler-extender + namespace: d8-{{ .Chart.Name }} +roleRef: + kind: ClusterRole + name: d8:{{ .Chart.Name }}:sds-common-scheduler-extender:layerstoragevolumes-manager + apiGroup: rbac.authorization.k8s.io +--- +apiVersion: rbac.authorization.k8s.io/v1 +kind: ClusterRole +metadata: + name: d8:{{ .Chart.Name }}:sds-common-scheduler-extender:resource-manager + namespace: d8-{{ .Chart.Name }} + {{- include "helm_lib_module_labels" (list . (dict "app" "sds-common-scheduler-extender")) | nindent 2 }} +rules: + - apiGroups: ["storage.deckhouse.io"] + resources: ["drbdresourcereplicas"] + verbs: ["get", "update", "watch", "list"] +--- +apiVersion: rbac.authorization.k8s.io/v1 +kind: ClusterRoleBinding +metadata: + name: d8:{{ .Chart.Name }}:sds-common-scheduler-extender:resource-manager + namespace: d8-{{ .Chart.Name }} + {{- include "helm_lib_module_labels" (list . (dict "app" "sds-common-scheduler-extender")) | nindent 2 }} +roleRef: + apiGroup: rbac.authorization.k8s.io + kind: ClusterRole + name: d8:{{ .Chart.Name }}:sds-common-scheduler-extender:resource-manager +subjects: + - kind: ServiceAccount + name: sds-common-scheduler-extender + namespace: d8-{{ .Chart.Name }} +--- +apiVersion: rbac.authorization.k8s.io/v1 +kind: ClusterRole +metadata: + name: d8:{{ .Chart.Name }}:sds-common-scheduler-extender:crd-resource-manager + namespace: d8-{{ .Chart.Name }} + {{- include "helm_lib_module_labels" (list . (dict "app" "sds-common-scheduler-extender")) | nindent 2 }} +rules: + - apiGroups: ["internal.linstor.linbit.com"] + resources: ["layerresourceids"] + verbs: ["get", "watch", "list"] +--- +apiVersion: rbac.authorization.k8s.io/v1 +kind: ClusterRoleBinding +metadata: + name: d8:{{ .Chart.Name }}:sds-common-scheduler-extender:crd-resource-manager + namespace: d8-{{ .Chart.Name }} + {{- include "helm_lib_module_labels" (list . (dict "app" "sds-common-scheduler-extender")) | nindent 2 }} +roleRef: + apiGroup: rbac.authorization.k8s.io + kind: ClusterRole + name: d8:{{ .Chart.Name }}:sds-common-scheduler-extender:crd-resource-manager +subjects: + - kind: ServiceAccount + name: sds-common-scheduler-extender + namespace: d8-{{ .Chart.Name }} \ No newline at end of file diff --git a/templates/sds-common-scheduler-extender/secret.yaml b/templates/sds-common-scheduler-extender/secret.yaml new file mode 100644 index 000000000..88fe2e864 --- /dev/null +++ b/templates/sds-common-scheduler-extender/secret.yaml @@ -0,0 +1,14 @@ +{{- if or (or (hasPrefix "dev" .Values.global.deckhouseVersion) (hasSuffix "dev" .Values.global.deckhouseVersion)) (semverCompare ">=1.64" .Values.global.deckhouseVersion) }} +--- +apiVersion: v1 +kind: Secret +metadata: + name: common-scheduler-extender-https-certs + namespace: d8-{{ .Chart.Name }} + {{- include "helm_lib_module_labels" (list . (dict "app" "sds-common-scheduler-extender")) | nindent 2 }} +type: kubernetes.io/tls +data: + ca.crt: {{ .Values.sdsNodeConfigurator.internal.customSchedulerExtenderCert.ca | b64enc | quote }} + tls.crt: {{ .Values.sdsNodeConfigurator.internal.customSchedulerExtenderCert.crt | b64enc | quote }} + tls.key: {{ .Values.sdsNodeConfigurator.internal.customSchedulerExtenderCert.key | b64enc | quote }} +{{- end }} diff --git a/templates/sds-common-scheduler-extender/service.yaml b/templates/sds-common-scheduler-extender/service.yaml new file mode 100644 index 000000000..7e2055f3e --- /dev/null +++ b/templates/sds-common-scheduler-extender/service.yaml @@ -0,0 +1,18 @@ +{{- if or (or (hasPrefix "dev" .Values.global.deckhouseVersion) (hasSuffix "dev" .Values.global.deckhouseVersion)) (semverCompare ">=1.64" .Values.global.deckhouseVersion) }} +--- +apiVersion: v1 +kind: Service +metadata: + name: sds-common-scheduler-extender + namespace: d8-{{ .Chart.Name }} + {{- include "helm_lib_module_labels" (list . (dict "app" "sds-common-scheduler-extender" )) | nindent 2 }} +spec: + type: ClusterIP + ports: + - port: 8099 + targetPort: http + protocol: TCP + name: http + selector: + app: sds-common-scheduler-extender +{{- end }}