diff --git a/operator/api/loki/v1/lokistack_types.go b/operator/api/loki/v1/lokistack_types.go index 99f5e2d9723a9..f30aca3c0014c 100644 --- a/operator/api/loki/v1/lokistack_types.go +++ b/operator/api/loki/v1/lokistack_types.go @@ -1295,6 +1295,8 @@ const ( ReasonZoneAwareEmptyLabel LokiStackConditionReason = "ReasonZoneAwareEmptyLabel" // ReasonStorageNeedsSchemaUpdate when the object storage schema version is older than V13 ReasonStorageNeedsSchemaUpdate LokiStackConditionReason = "StorageNeedsSchemaUpdate" + // ReasonInvalidReplicationFactor when the replication factor is equal to or more than the component replicas + ReasonInvalidReplicationFactor LokiStackConditionReason = "ReasonInvalidReplicationFactor" ) // PodStatus is a short description of the status a Pod can be in. diff --git a/operator/api/loki/v1/v1.go b/operator/api/loki/v1/v1.go index 96053c7c268de..0b2f84e54cf34 100644 --- a/operator/api/loki/v1/v1.go +++ b/operator/api/loki/v1/v1.go @@ -106,4 +106,6 @@ var ( ErrSummaryAnnotationMissing = errors.New("rule requires annotation: summary") // ErrDescriptionAnnotationMissing indicates that an alerting rule is missing the description annotation ErrDescriptionAnnotationMissing = errors.New("rule requires annotation: description") + // ErrReplicationFactorTooHigh indicates that the lokistack's replication factor must always be less than the number of ingester replicas + ErrReplicationFactorTooHigh = errors.New("replication factor must be less than replicas") ) diff --git a/operator/internal/handlers/lokistack_create_or_update.go b/operator/internal/handlers/lokistack_create_or_update.go index 5b248e3a20337..14deb7f6f7ee2 100644 --- a/operator/internal/handlers/lokistack_create_or_update.go +++ b/operator/internal/handlers/lokistack_create_or_update.go @@ -2,6 +2,7 @@ package handlers import ( "context" + "errors" "fmt" "os" @@ -164,7 +165,13 @@ func CreateOrUpdateLokiStack( } objects, err := manifests.BuildAll(opts) - if err != nil { + if errors.Is(err, lokiv1.ErrReplicationFactorTooHigh) { + return nil, &status.DegradedError{ + Message: "Invalid configuration: replication factor should be less than ingester replicas", + Reason: lokiv1.ReasonInvalidReplicationFactor, + Requeue: true, + } + } else if err != nil { ll.Error(err, "failed to build manifests") return nil, err } diff --git a/operator/internal/manifests/ingester.go b/operator/internal/manifests/ingester.go index 28c7e7c067f03..627d41077cb0e 100644 --- a/operator/internal/manifests/ingester.go +++ b/operator/internal/manifests/ingester.go @@ -14,6 +14,7 @@ import ( "k8s.io/utils/ptr" "sigs.k8s.io/controller-runtime/pkg/client" + lokiv1 "github.com/grafana/loki/operator/api/loki/v1" "github.com/grafana/loki/operator/internal/manifests/internal/config" "github.com/grafana/loki/operator/internal/manifests/storage" ) @@ -62,11 +63,16 @@ func BuildIngester(opts Options) ([]client.Object, error) { return nil, err } + pdb, err := newIngesterPodDisruptionBudget(opts) + if err != nil { + return nil, err + } + return []client.Object{ statefulSet, NewIngesterGRPCService(opts), NewIngesterHTTPService(opts), - newIngesterPodDisruptionBudget(opts), + pdb, }, nil } @@ -288,13 +294,24 @@ func configureIngesterGRPCServicePKI(sts *appsv1.StatefulSet, opts Options) erro // newIngesterPodDisruptionBudget returns a PodDisruptionBudget for the LokiStack // Ingester pods. -func newIngesterPodDisruptionBudget(opts Options) *policyv1.PodDisruptionBudget { +func newIngesterPodDisruptionBudget(opts Options) (*policyv1.PodDisruptionBudget, error) { l := ComponentLabels(LabelIngesterComponent, opts.Name) - // Default to 1 if not defined in ResourceRequirementsTable for a given size - mu := intstr.FromInt(1) - if opts.ResourceRequirements.Ingester.PDBMinAvailable > 0 { - mu = intstr.FromInt(opts.ResourceRequirements.Ingester.PDBMinAvailable) + pdbMinAvailable := intstr.FromInt32(int32(1)) + switch opts.Stack.Size { + case lokiv1.SizeOneXPico, lokiv1.SizeOneXMedium: + // For these sizes, default Replication.Factor = 2 and Ingester.Replicas = 3 + if opts.Stack.Template.Ingester.Replicas <= opts.Stack.Replication.Factor { + return nil, lokiv1.ErrReplicationFactorTooHigh + } + pdbMinAvailable = intstr.FromInt32(opts.Stack.Replication.Factor) + case lokiv1.SizeOneXExtraSmall, lokiv1.SizeOneXSmall: + // For these sizes, default Replication.Factor = 2 and Ingester.Replicas = 2 + // Therefore set the pdbMinAvailable to 1 to keep 1 spare pod for rolling updates + if opts.Stack.Template.Ingester.Replicas > opts.Stack.Replication.Factor { + pdbMinAvailable = intstr.FromInt32(opts.Stack.Replication.Factor - 1) + } } + return &policyv1.PodDisruptionBudget{ TypeMeta: metav1.TypeMeta{ Kind: "PodDisruptionBudget", @@ -309,7 +326,7 @@ func newIngesterPodDisruptionBudget(opts Options) *policyv1.PodDisruptionBudget Selector: &metav1.LabelSelector{ MatchLabels: l, }, - MinAvailable: &mu, + MinAvailable: &pdbMinAvailable, }, - } + }, nil } diff --git a/operator/internal/manifests/ingester_test.go b/operator/internal/manifests/ingester_test.go index c491b580ad7cf..94b85cfb226c8 100644 --- a/operator/internal/manifests/ingester_test.go +++ b/operator/internal/manifests/ingester_test.go @@ -12,7 +12,6 @@ import ( v1 "github.com/grafana/loki/operator/api/config/v1" lokiv1 "github.com/grafana/loki/operator/api/loki/v1" - "github.com/grafana/loki/operator/internal/manifests/internal" "github.com/grafana/loki/operator/internal/manifests/storage" ) @@ -108,16 +107,19 @@ func TestNewIngesterStatefulSet_SelectorMatchesLabels(t *testing.T) { func TestBuildIngester_PodDisruptionBudget(t *testing.T) { for _, tc := range []struct { Name string + Size lokiv1.LokiStackSizeType PDBMinAvailable int ExpectedMinAvailable int }{ { Name: "Small stack", + Size: lokiv1.SizeOneXSmall, PDBMinAvailable: 1, ExpectedMinAvailable: 1, }, { Name: "Medium stack", + Size: lokiv1.SizeOneXMedium, PDBMinAvailable: 2, ExpectedMinAvailable: 2, }, @@ -127,12 +129,8 @@ func TestBuildIngester_PodDisruptionBudget(t *testing.T) { Name: "abcd", Namespace: "efgh", Gates: v1.FeatureGates{}, - ResourceRequirements: internal.ComponentResources{ - Ingester: internal.ResourceRequirements{ - PDBMinAvailable: tc.PDBMinAvailable, - }, - }, Stack: lokiv1.LokiStackSpec{ + Size: tc.Size, Template: &lokiv1.LokiTemplateSpec{ Ingester: &lokiv1.LokiComponentSpec{ Replicas: rand.Int31(), @@ -141,6 +139,9 @@ func TestBuildIngester_PodDisruptionBudget(t *testing.T) { Tenants: &lokiv1.TenantsSpec{ Mode: lokiv1.OpenshiftLogging, }, + Replication: &lokiv1.ReplicationSpec{ + Factor: int32(2), + }, }, } objs, err := BuildIngester(opts) @@ -158,6 +159,62 @@ func TestBuildIngester_PodDisruptionBudget(t *testing.T) { } } +func TestBuildIngester_PodDisruptionBudgetWithReplicationFactor(t *testing.T) { + ingesterReplicas := 3 + for _, tc := range []struct { + Name string + CustomReplicationFactor int32 + PDBMinAvailable int + ExpectedMinAvailable int + }{ + { + Name: "ingester replicas <= replication factor", + CustomReplicationFactor: 4, + PDBMinAvailable: 2, + ExpectedMinAvailable: 0, + }, + { + Name: "ingester replicas > replication factor", + CustomReplicationFactor: 2, + PDBMinAvailable: 1, + ExpectedMinAvailable: 2, + }, + } { + t.Run(tc.Name, func(t *testing.T) { + opts := Options{ + Name: "abcd", + Namespace: "efgh", + Gates: v1.FeatureGates{}, + Stack: lokiv1.LokiStackSpec{ + Template: &lokiv1.LokiTemplateSpec{ + Ingester: &lokiv1.LokiComponentSpec{ + Replicas: int32(ingesterReplicas), + }, + }, + Tenants: &lokiv1.TenantsSpec{ + Mode: lokiv1.OpenshiftLogging, + }, + Size: lokiv1.SizeOneXPico, + Replication: &lokiv1.ReplicationSpec{ + Factor: tc.CustomReplicationFactor, + }, + }, + } + objs, err := BuildIngester(opts) + + if err != nil { + require.Error(t, err) + } else { + require.NoError(t, err) + pdb := objs[3].(*policyv1.PodDisruptionBudget) + require.NotNil(t, pdb) + require.NotNil(t, pdb.Spec.MinAvailable.IntVal) + require.Equal(t, int32(tc.ExpectedMinAvailable), pdb.Spec.MinAvailable.IntVal) + } + }) + } +} + func TestNewIngesterStatefulSet_TopologySpreadConstraints(t *testing.T) { obj, _ := BuildIngester(Options{ Name: "abcd", diff --git a/operator/internal/manifests/internal/sizes.go b/operator/internal/manifests/internal/sizes.go index 98847cca0760d..b94550df1aae1 100644 --- a/operator/internal/manifests/internal/sizes.go +++ b/operator/internal/manifests/internal/sizes.go @@ -37,18 +37,16 @@ func (c ComponentResources) DeepCopy() ComponentResources { // ResourceRequirements sets CPU, Memory, and PVC requirements for a component type ResourceRequirements struct { - Limits corev1.ResourceList - Requests corev1.ResourceList - PVCSize resource.Quantity - PDBMinAvailable int + Limits corev1.ResourceList + Requests corev1.ResourceList + PVCSize resource.Quantity } func (r *ResourceRequirements) DeepCopy() *ResourceRequirements { return &ResourceRequirements{ - Limits: r.Limits.DeepCopy(), - Requests: r.Requests.DeepCopy(), - PVCSize: r.PVCSize.DeepCopy(), - PDBMinAvailable: r.PDBMinAvailable, + Limits: r.Limits.DeepCopy(), + Requests: r.Requests.DeepCopy(), + PVCSize: r.PVCSize.DeepCopy(), } } @@ -91,7 +89,6 @@ var resourceRequirementsTable = map[lokiv1.LokiStackSizeType]ComponentResources{ corev1.ResourceCPU: resource.MustParse("500m"), corev1.ResourceMemory: resource.MustParse("3Gi"), }, - PDBMinAvailable: 2, }, Distributor: corev1.ResourceRequirements{ Requests: map[corev1.ResourceName]resource.Quantity{ @@ -149,7 +146,6 @@ var resourceRequirementsTable = map[lokiv1.LokiStackSizeType]ComponentResources{ corev1.ResourceCPU: resource.MustParse("2"), corev1.ResourceMemory: resource.MustParse("8Gi"), }, - PDBMinAvailable: 1, }, Distributor: corev1.ResourceRequirements{ Requests: map[corev1.ResourceName]resource.Quantity{ @@ -207,7 +203,6 @@ var resourceRequirementsTable = map[lokiv1.LokiStackSizeType]ComponentResources{ corev1.ResourceCPU: resource.MustParse("4"), corev1.ResourceMemory: resource.MustParse("20Gi"), }, - PDBMinAvailable: 1, }, Distributor: corev1.ResourceRequirements{ Requests: map[corev1.ResourceName]resource.Quantity{ @@ -265,7 +260,6 @@ var resourceRequirementsTable = map[lokiv1.LokiStackSizeType]ComponentResources{ corev1.ResourceCPU: resource.MustParse("6"), corev1.ResourceMemory: resource.MustParse("30Gi"), }, - PDBMinAvailable: 2, }, Distributor: corev1.ResourceRequirements{ Requests: map[corev1.ResourceName]resource.Quantity{