diff --git a/pkg/investigations/pruningcronjoberror/pruningcronjoberror.go b/pkg/investigations/pruningcronjoberror/pruningcronjoberror.go new file mode 100644 index 00000000..f6b7c974 --- /dev/null +++ b/pkg/investigations/pruningcronjoberror/pruningcronjoberror.go @@ -0,0 +1,351 @@ +// pruningcronjoberror remediates the PruningCronjobErrorSRE alerts +// SOP https://github.com/openshift/ops-sop/blob/master/v4/alerts/PruningCronjobErrorSRE.md + + +package pruningcronjoberror + +import ( + "bufio" + "context" + "errors" + "fmt" + "os/exec" + "strings" + + "github.com/openshift/configuration-anomaly-detection/pkg/investigations/investigation" + k8sclient "github.com/openshift/configuration-anomaly-detection/pkg/k8s" + "github.com/openshift/configuration-anomaly-detection/pkg/logging" + "github.com/openshift/configuration-anomaly-detection/pkg/notewriter" + batchv1 "k8s.io/api/batch/v1" + corev1 "k8s.io/api/core/v1" + "sigs.k8s.io/controller-runtime/pkg/client" +) + +type Investigation struct { + // kclient provides access to on-cluster resources + kclient client.Client + // notes holds the messages that will be shared with Primary upon completion + notes *notewriter.NoteWriter + // recommendations holds the set of actions CAD recommends primary to take + recommendations investigationRecommendations +} + +func (i *Investigation) setup(r *investigation.Resources) error { + // Setup investigation + k, err := k8sclient.New(r.Cluster.ID(), r.OcmClient, r.Name) + if err != nil { + return fmt.Errorf("failed to initialize kubernetes client: %w", err) + } + i.kclient = k + i.notes = notewriter.New(r.Name, logging.RawLogger) + i.recommendations = investigationRecommendations{} + + return nil +} + +func (i *Investigation) Run(r *investigation.Resources) (investigation.InvestigationResult, error) { + result := investigation.InvestigationResult{} + + // Initialize k8s client + k8scli, err := k8sclient.New(r.Cluster.ID(), r.OcmClient, r.Name) + if err != nil { + return result, fmt.Errorf("unable to initialize k8s cli: %w", err) + } + defer func() { + if k8scli, ok := k8scli.(interface{ Clean() error }); ok { + deferErr := k8scli.Clean() + if deferErr != nil { + logging.Error(deferErr) + err = errors.Join(err, deferErr) + } + } + }() + + // Execute the remediation decision tree + err = i.executeRemediationSteps(k8scli, r) + if err != nil { + i.notes.AppendWarning(fmt.Sprintf("Error during remediation: %v", err)) + } + + // Summarize recommendations from investigation in PD notes, if any found + if len(i.recommendations) > 0 { + i.notes.AppendWarning(i.recommendations.summarize()) + } else { + i.notes.AppendSuccess("no recommended actions to take against cluster") + } + + return result, r.PdClient.EscalateIncidentWithNote(i.notes.String()) +} + + +// FilterLines filters the input string line by line, returning only the lines that contain the filter. +func FilterLines(input string, filter string) (string, error) { + var filteredLines strings.Builder + reader := strings.NewReader(input) + scanner := bufio.NewScanner(reader) + for scanner.Scan() { + line := scanner.Text() + if strings.Contains(line, filter) { + filteredLines.WriteString(line) + filteredLines.WriteString("\n") // Add newline to preserve original line breaks + } + } + // Check for any errors that occurred during scanning. + if err := scanner.Err(); err != nil { + return "", fmt.Errorf("error reading input: %w", err) + } + return filteredLines.String(), nil +} + +// ExecuteCommand executes a shell command and returns the output. +func ExecuteCommand(command string, args ...string) (string, error) { + cmd := exec.Command(command, args...) + output, err := cmd.CombinedOutput() // Get both stdout and stderr + if err != nil { + return "", fmt.Errorf("error executing command: %w, output: %s", err, output) + } + return string(output), nil +} + +type investigationRecommendations []string + +func (r investigationRecommendations) summarize() string { + return strings.Join(r, "; ") +} + +// addRecommendation adds a recommendation to the investigation +func (i *Investigation) addRecommendation(recommendation string) { + i.recommendations = append(i.recommendations, recommendation) +} + +// executeRemediationSteps runs through the decision tree for remediation +func (i *Investigation) executeRemediationSteps(k8scli client.Client, r *investigation.Resources) error { + // Step 1: Check for Seccomp Error 524 + isSeccompError, err := i.checkSeccompError524(k8scli) + if err != nil { + return fmt.Errorf("failed to check seccomp error: %w", err) + } + + if isSeccompError { + i.notes.AppendWarning("Seccomp Error 524 detected. Recommendation: Send a Servicelog and either drain and reboot or replace the node.") + i.addRecommendation("Send Servicelog for Seccomp Error 524") + i.addRecommendation("Drain and reboot or replace the affected node") + return nil + } + + // Step 2: Check for ImagePullBackOff pods + hasImagePullBackOff, err := i.checkImagePullBackOffPods(k8scli) + if err != nil { + return fmt.Errorf("failed to check ImagePullBackOff pods: %w", err) + } + + if hasImagePullBackOff { + i.notes.AppendWarning("Pods in ImagePullBackOff state detected. Recommendation: Check pull secret validity and cluster-image-operator logs.") + i.addRecommendation("Check whether the pull secret is valid") + i.addRecommendation("Check cluster-image-operator logs for errors") + return nil + } + + // Step 3: Check for ResourceQuota issues + isResourceQuota, err := i.checkResourceQuotaIssues(k8scli) + if err != nil { + return fmt.Errorf("failed to check ResourceQuota issues: %w", err) + } + + if isResourceQuota { + i.notes.AppendWarning("ResourceQuota issue detected. Recommendation: Send a Servicelog.") + i.addRecommendation("Send Servicelog for ResourceQuota issue") + return nil + } + + // Step 4: Check for OVN issues + isOVNIssue, err := i.checkOVNIssues(k8scli) + if err != nil { + return fmt.Errorf("failed to check OVN issues: %w", err) + } + + if isOVNIssue { + i.notes.AppendWarning("OVN issue detected. Recommendation: Restart the OVN masters.") + i.addRecommendation("Restart OVN masters: oc delete po -n openshift-ovn-kubernetes -l app=ovnkube-master") + return nil + } + + // Step 5: Fallback - output errors and restart command + errors, restartCommand := i.getErrorsAndRestartCommand(k8scli) + i.notes.AppendWarning(fmt.Sprintf("No specific issue detected. Errors found: %s", errors)) + i.notes.AppendSuccess(fmt.Sprintf("Restart command: %s", restartCommand)) + i.addRecommendation("Review the errors and execute the restart command if appropriate") + + return nil +} + +// checkSeccompError524 checks if there's a seccomp error 524 in the pruning pods +func (i *Investigation) checkSeccompError524(k8scli client.Client) (bool, error) { + prunerPods := &corev1.PodList{} + err := k8scli.List(context.TODO(), prunerPods, client.InNamespace("openshift-sre-pruning")) + if err != nil { + return false, fmt.Errorf("failed to list pods in openshift-sre-pruning namespace: %w", err) + } + + for _, pod := range prunerPods.Items { + // Check pod events for seccomp error + for _, condition := range pod.Status.Conditions { + if strings.Contains(condition.Message, "seccomp filter: errno 524") { + return true, nil + } + } + + // Check container statuses for seccomp error + for _, containerStatus := range pod.Status.ContainerStatuses { + if containerStatus.State.Waiting != nil && + strings.Contains(containerStatus.State.Waiting.Message, "seccomp filter: errno 524") { + return true, nil + } + if containerStatus.State.Terminated != nil && + strings.Contains(containerStatus.State.Terminated.Message, "seccomp filter: errno 524") { + return true, nil + } + } + } + + return false, nil +} + +// checkImagePullBackOffPods checks if there are pods in ImagePullBackOff state +func (i *Investigation) checkImagePullBackOffPods(k8scli client.Client) (bool, error) { + prunerPods := &corev1.PodList{} + err := k8scli.List(context.TODO(), prunerPods, client.InNamespace("openshift-sre-pruning")) + if err != nil { + return false, fmt.Errorf("failed to list pods in openshift-sre-pruning namespace: %w", err) + } + + for _, pod := range prunerPods.Items { + if pod.Status.Phase == corev1.PodPending { + for _, containerStatus := range pod.Status.ContainerStatuses { + if containerStatus.State.Waiting != nil && + containerStatus.State.Waiting.Reason == "ImagePullBackOff" { + return true, nil + } + } + } + } + + return false, nil +} + +// checkResourceQuotaIssues checks if there are ResourceQuota issues preventing pod creation +func (i *Investigation) checkResourceQuotaIssues(k8scli client.Client) (bool, error) { + jobs := &batchv1.JobList{} + err := k8scli.List(context.TODO(), jobs, client.InNamespace("openshift-sre-pruning")) + if err != nil { + return false, fmt.Errorf("failed to list jobs in openshift-sre-pruning namespace: %w", err) + } + + for _, job := range jobs.Items { + for _, condition := range job.Status.Conditions { + if condition.Type == batchv1.JobFailed && + strings.Contains(condition.Message, "quota") { + return true, nil + } + } + } + + // Also check events in the namespace for quota-related failures + events := &corev1.EventList{} + err = k8scli.List(context.TODO(), events, client.InNamespace("openshift-sre-pruning")) + if err != nil { + return false, fmt.Errorf("failed to list events in openshift-sre-pruning namespace: %w", err) + } + + for _, event := range events.Items { + if strings.Contains(event.Message, "quota") || + strings.Contains(event.Message, "ResourceQuota") { + return true, nil + } + } + + return false, nil +} + +// checkOVNIssues checks if there are OVN-related issues +func (i *Investigation) checkOVNIssues(k8scli client.Client) (bool, error) { + prunerPods := &corev1.PodList{} + err := k8scli.List(context.TODO(), prunerPods, client.InNamespace("openshift-sre-pruning")) + if err != nil { + return false, fmt.Errorf("failed to list pods in openshift-sre-pruning namespace: %w", err) + } + + for _, pod := range prunerPods.Items { + for _, condition := range pod.Status.Conditions { + if strings.Contains(condition.Message, "context deadline exceeded while waiting for annotations") || + strings.Contains(condition.Message, "failed to create pod network sandbox") || + strings.Contains(condition.Message, "ovn-kubernetes") { + return true, nil + } + } + } + + // Check events for OVN-related failures + events := &corev1.EventList{} + err = k8scli.List(context.TODO(), events, client.InNamespace("openshift-sre-pruning")) + if err != nil { + return false, fmt.Errorf("failed to list events in openshift-sre-pruning namespace: %w", err) + } + + for _, event := range events.Items { + if strings.Contains(event.Message, "ovn-kubernetes") || + strings.Contains(event.Message, "context deadline exceeded") { + return true, nil + } + } + + return false, nil +} + +// getErrorsAndRestartCommand collects errors and provides restart command +func (i *Investigation) getErrorsAndRestartCommand(k8scli client.Client) (string, string) { + var errors []string + var failedJobs []string + + // Get failed jobs + jobs := &batchv1.JobList{} + err := k8scli.List(context.TODO(), jobs, client.InNamespace("openshift-sre-pruning")) + if err != nil { + errors = append(errors, fmt.Sprintf("Failed to list jobs: %v", err)) + } else { + for _, job := range jobs.Items { + for _, condition := range job.Status.Conditions { + if condition.Type == batchv1.JobFailed { + errors = append(errors, fmt.Sprintf("Job %s failed: %s", job.Name, condition.Message)) + failedJobs = append(failedJobs, job.Name) + } + } + } + } + + // Get pod errors + pods := &corev1.PodList{} + err = k8scli.List(context.TODO(), pods, client.InNamespace("openshift-sre-pruning")) + if err != nil { + errors = append(errors, fmt.Sprintf("Failed to list pods: %v", err)) + } else { + for _, pod := range pods.Items { + if pod.Status.Phase == corev1.PodFailed { + errors = append(errors, fmt.Sprintf("Pod %s failed: %s", pod.Name, pod.Status.Message)) + } + } + } + + // Generate restart command + restartCommand := "ocm backplane managedjob create SREP/retry-failed-pruning-cronjob" + if len(failedJobs) > 0 { + restartCommand += fmt.Sprintf(" # This will retry failed jobs: %s", strings.Join(failedJobs, ", ")) + } + + errorSummary := "No specific errors found" + if len(errors) > 0 { + errorSummary = strings.Join(errors, "; ") + } + + return errorSummary, restartCommand +} diff --git a/pkg/investigations/pruningcronjoberror/pruningcronjoberror_test.go b/pkg/investigations/pruningcronjoberror/pruningcronjoberror_test.go new file mode 100644 index 00000000..4c8ed9a7 --- /dev/null +++ b/pkg/investigations/pruningcronjoberror/pruningcronjoberror_test.go @@ -0,0 +1,643 @@ +package pruningcronjoberror + +import ( + "strings" + "testing" + "time" + + "github.com/openshift/configuration-anomaly-detection/pkg/investigations/investigation" + "github.com/openshift/configuration-anomaly-detection/pkg/logging" + "github.com/openshift/configuration-anomaly-detection/pkg/notewriter" + + batchv1 "k8s.io/api/batch/v1" + corev1 "k8s.io/api/core/v1" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "k8s.io/client-go/kubernetes/scheme" + "sigs.k8s.io/controller-runtime/pkg/client" + "sigs.k8s.io/controller-runtime/pkg/client/fake" +) + +func TestInvestigation_checkSeccompError524(t *testing.T) { + tests := []struct { + name string + objects []client.Object + expected bool + wantErr bool + }{ + { + name: "detects seccomp error in pod condition", + objects: []client.Object{ + newPodWithCondition("pod-with-seccomp-condition", "Error: container create failed: unable to init seccomp: error loading seccomp filter: errno 524"), + }, + expected: true, + wantErr: false, + }, + { + name: "detects seccomp error in container waiting state", + objects: []client.Object{ + newPodWithWaitingContainer("pod-with-seccomp-waiting", "Error: container create failed: seccomp filter: errno 524"), + }, + expected: true, + wantErr: false, + }, + { + name: "detects seccomp error in container terminated state", + objects: []client.Object{ + newPodWithTerminatedContainer("pod-with-seccomp-terminated", "Error: container create failed: seccomp filter: errno 524"), + }, + expected: true, + wantErr: false, + }, + { + name: "no seccomp error found", + objects: []client.Object{ + newPodWithCondition("pod-without-seccomp", "Normal pod condition"), + newPodWithWaitingContainer("pod-without-seccomp-waiting", "ImagePullBackOff"), + }, + expected: false, + wantErr: false, + }, + { + name: "no pods in namespace", + objects: []client.Object{}, + expected: false, + wantErr: false, + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + i, err := newTestInvestigation(tt.objects...) + if err != nil { + t.Fatalf("failed to create test investigation: %v", err) + } + + got, err := i.checkSeccompError524(i.kclient) + if (err != nil) != tt.wantErr { + t.Errorf("checkSeccompError524() error = %v, wantErr %v", err, tt.wantErr) + return + } + if got != tt.expected { + t.Errorf("checkSeccompError524() = %v, want %v", got, tt.expected) + } + }) + } +} + +func TestInvestigation_checkImagePullBackOffPods(t *testing.T) { + tests := []struct { + name string + objects []client.Object + expected bool + wantErr bool + }{ + { + name: "detects ImagePullBackOff pod", + objects: []client.Object{ + newPodWithImagePullBackOff("pod-with-imagepullbackoff"), + }, + expected: true, + wantErr: false, + }, + { + name: "no ImagePullBackOff pods", + objects: []client.Object{ + newRunningPod("running-pod"), + newPodWithWaitingContainer("pod-waiting", "ContainerCreating"), + }, + expected: false, + wantErr: false, + }, + { + name: "no pods in namespace", + objects: []client.Object{}, + expected: false, + wantErr: false, + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + i, err := newTestInvestigation(tt.objects...) + if err != nil { + t.Fatalf("failed to create test investigation: %v", err) + } + + got, err := i.checkImagePullBackOffPods(i.kclient) + if (err != nil) != tt.wantErr { + t.Errorf("checkImagePullBackOffPods() error = %v, wantErr %v", err, tt.wantErr) + return + } + if got != tt.expected { + t.Errorf("checkImagePullBackOffPods() = %v, want %v", got, tt.expected) + } + }) + } +} + +func TestInvestigation_checkResourceQuotaIssues(t *testing.T) { + tests := []struct { + name string + objects []client.Object + expected bool + wantErr bool + }{ + { + name: "detects quota issue in job condition", + objects: []client.Object{ + newFailedJobWithQuotaError("job-with-quota-error"), + }, + expected: true, + wantErr: false, + }, + { + name: "detects quota issue in event", + objects: []client.Object{ + newEventWithQuotaError("quota-event"), + }, + expected: true, + wantErr: false, + }, + { + name: "no quota issues", + objects: []client.Object{ + newSuccessfulJob("successful-job"), + newEvent("normal-event", "Successfully created pod"), + }, + expected: false, + wantErr: false, + }, + { + name: "no objects in namespace", + objects: []client.Object{}, + expected: false, + wantErr: false, + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + i, err := newTestInvestigation(tt.objects...) + if err != nil { + t.Fatalf("failed to create test investigation: %v", err) + } + + got, err := i.checkResourceQuotaIssues(i.kclient) + if (err != nil) != tt.wantErr { + t.Errorf("checkResourceQuotaIssues() error = %v, wantErr %v", err, tt.wantErr) + return + } + if got != tt.expected { + t.Errorf("checkResourceQuotaIssues() = %v, want %v", got, tt.expected) + } + }) + } +} + +func TestInvestigation_checkOVNIssues(t *testing.T) { + tests := []struct { + name string + objects []client.Object + expected bool + wantErr bool + }{ + { + name: "detects OVN issue in pod condition", + objects: []client.Object{ + newPodWithCondition("pod-with-ovn-condition", "context deadline exceeded while waiting for annotations"), + }, + expected: true, + wantErr: false, + }, + { + name: "detects OVN issue in event", + objects: []client.Object{ + newEvent("ovn-event", "failed to create pod network sandbox: ovn-kubernetes error"), + }, + expected: true, + wantErr: false, + }, + { + name: "no OVN issues", + objects: []client.Object{ + newRunningPod("normal-pod"), + newEvent("normal-event", "Successfully created pod"), + }, + expected: false, + wantErr: false, + }, + { + name: "no objects in namespace", + objects: []client.Object{}, + expected: false, + wantErr: false, + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + i, err := newTestInvestigation(tt.objects...) + if err != nil { + t.Fatalf("failed to create test investigation: %v", err) + } + + got, err := i.checkOVNIssues(i.kclient) + if (err != nil) != tt.wantErr { + t.Errorf("checkOVNIssues() error = %v, wantErr %v", err, tt.wantErr) + return + } + if got != tt.expected { + t.Errorf("checkOVNIssues() = %v, want %v", got, tt.expected) + } + }) + } +} + +func TestInvestigation_getErrorsAndRestartCommand(t *testing.T) { + tests := []struct { + name string + objects []client.Object + expectedErrors string + expectedCommand string + }{ + { + name: "failed job and pod", + objects: []client.Object{ + newFailedJob("failed-job", "Job failed due to timeout"), + newFailedPod("failed-pod"), + }, + expectedErrors: "Job failed-job failed: Job failed due to timeout; Pod failed-pod failed:", + expectedCommand: "ocm backplane managedjob create SREP/retry-failed-pruning-cronjob # This will retry failed jobs: failed-job", + }, + { + name: "no failures", + objects: []client.Object{ + newSuccessfulJob("successful-job"), + newRunningPod("running-pod"), + }, + expectedErrors: "No specific errors found", + expectedCommand: "ocm backplane managedjob create SREP/retry-failed-pruning-cronjob", + }, + { + name: "no objects", + objects: []client.Object{}, + expectedErrors: "No specific errors found", + expectedCommand: "ocm backplane managedjob create SREP/retry-failed-pruning-cronjob", + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + i, err := newTestInvestigation(tt.objects...) + if err != nil { + t.Fatalf("failed to create test investigation: %v", err) + } + + errors, command := i.getErrorsAndRestartCommand(i.kclient) + if !strings.Contains(errors, tt.expectedErrors) { + t.Errorf("getErrorsAndRestartCommand() errors = %v, want to contain %v", errors, tt.expectedErrors) + } + if !strings.Contains(command, "ocm backplane managedjob create SREP/retry-failed-pruning-cronjob") { + t.Errorf("getErrorsAndRestartCommand() command = %v, want to contain 'ocm backplane managedjob create SREP/retry-failed-pruning-cronjob'", command) + } + }) + } +} + +func TestInvestigation_executeRemediationSteps(t *testing.T) { + type result struct { + recommendations []string + notes []string + } + + tests := []struct { + name string + objects []client.Object + want result + }{ + { + name: "seccomp error detected", + objects: []client.Object{ + newPodWithWaitingContainer("seccomp-pod", "seccomp filter: errno 524"), + }, + want: result{ + recommendations: []string{"Send Servicelog for Seccomp Error 524", "Drain and reboot or replace the affected node"}, + notes: []string{"Seccomp Error 524 detected"}, + }, + }, + { + name: "imagepullbackoff detected", + objects: []client.Object{ + newPodWithImagePullBackOff("imagepull-pod"), + }, + want: result{ + recommendations: []string{"Check whether the pull secret is valid", "Check cluster-image-operator logs for errors"}, + notes: []string{"ImagePullBackOff state detected"}, + }, + }, + { + name: "resource quota detected", + objects: []client.Object{ + newFailedJobWithQuotaError("quota-job"), + }, + want: result{ + recommendations: []string{"Send Servicelog for ResourceQuota issue"}, + notes: []string{"ResourceQuota issue detected"}, + }, + }, + { + name: "ovn issue detected", + objects: []client.Object{ + newPodWithCondition("ovn-pod", "context deadline exceeded while waiting for annotations"), + }, + want: result{ + recommendations: []string{"Restart OVN masters: oc delete po -n openshift-ovn-kubernetes -l app=ovnkube-master"}, + notes: []string{"OVN issue detected"}, + }, + }, + { + name: "fallback case - no specific issues", + objects: []client.Object{ + newFailedJob("generic-failed-job", "Generic failure"), + }, + want: result{ + recommendations: []string{"Review the errors and execute the restart command if appropriate"}, + notes: []string{"No specific issue detected"}, + }, + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + i, err := newTestInvestigation(tt.objects...) + if err != nil { + t.Fatalf("failed to create test investigation: %v", err) + } + + // Mock investigation.Resources (minimal setup for testing) + mockResources := &investigation.Resources{} + + err = i.executeRemediationSteps(i.kclient, mockResources) + if err != nil { + t.Errorf("executeRemediationSteps() error = %v", err) + return + } + + // Verify recommendations + if len(i.recommendations) != len(tt.want.recommendations) { + t.Errorf("executeRemediationSteps() recommendations count = %d, want %d", len(i.recommendations), len(tt.want.recommendations)) + } + + for _, expectedRec := range tt.want.recommendations { + found := false + for _, rec := range i.recommendations { + if rec == expectedRec { + found = true + break + } + } + if !found { + t.Errorf("executeRemediationSteps() missing recommendation: %s", expectedRec) + } + } + + // Verify notes contain expected messages + notes := i.notes.String() + for _, expectedNote := range tt.want.notes { + if !strings.Contains(notes, expectedNote) { + t.Errorf("executeRemediationSteps() notes = %v, want to contain %v", notes, expectedNote) + } + } + }) + } +} + +// Helper functions to create test objects + +func newPodWithCondition(name, message string) *corev1.Pod { + return &corev1.Pod{ + ObjectMeta: metav1.ObjectMeta{ + Name: name, + Namespace: "openshift-sre-pruning", + }, + Status: corev1.PodStatus{ + Conditions: []corev1.PodCondition{ + { + Type: corev1.PodReady, + Status: corev1.ConditionFalse, + Message: message, + }, + }, + }, + } +} + +func newPodWithWaitingContainer(name, message string) *corev1.Pod { + return &corev1.Pod{ + ObjectMeta: metav1.ObjectMeta{ + Name: name, + Namespace: "openshift-sre-pruning", + }, + Status: corev1.PodStatus{ + Phase: corev1.PodPending, + ContainerStatuses: []corev1.ContainerStatus{ + { + Name: "container", + State: corev1.ContainerState{ + Waiting: &corev1.ContainerStateWaiting{ + Reason: "Error", + Message: message, + }, + }, + }, + }, + }, + } +} + +func newPodWithTerminatedContainer(name, message string) *corev1.Pod { + return &corev1.Pod{ + ObjectMeta: metav1.ObjectMeta{ + Name: name, + Namespace: "openshift-sre-pruning", + }, + Status: corev1.PodStatus{ + Phase: corev1.PodFailed, + ContainerStatuses: []corev1.ContainerStatus{ + { + Name: "container", + State: corev1.ContainerState{ + Terminated: &corev1.ContainerStateTerminated{ + Reason: "Error", + Message: message, + }, + }, + }, + }, + }, + } +} + +func newPodWithImagePullBackOff(name string) *corev1.Pod { + return &corev1.Pod{ + ObjectMeta: metav1.ObjectMeta{ + Name: name, + Namespace: "openshift-sre-pruning", + }, + Status: corev1.PodStatus{ + Phase: corev1.PodPending, + ContainerStatuses: []corev1.ContainerStatus{ + { + Name: "container", + State: corev1.ContainerState{ + Waiting: &corev1.ContainerStateWaiting{ + Reason: "ImagePullBackOff", + }, + }, + }, + }, + }, + } +} + +func newRunningPod(name string) *corev1.Pod { + return &corev1.Pod{ + ObjectMeta: metav1.ObjectMeta{ + Name: name, + Namespace: "openshift-sre-pruning", + }, + Status: corev1.PodStatus{ + Phase: corev1.PodRunning, + ContainerStatuses: []corev1.ContainerStatus{ + { + Name: "container", + State: corev1.ContainerState{ + Running: &corev1.ContainerStateRunning{ + StartedAt: metav1.Time{Time: time.Now()}, + }, + }, + }, + }, + }, + } +} + +func newFailedPod(name string) *corev1.Pod { + return &corev1.Pod{ + ObjectMeta: metav1.ObjectMeta{ + Name: name, + Namespace: "openshift-sre-pruning", + }, + Status: corev1.PodStatus{ + Phase: corev1.PodFailed, + Message: "Pod failed", + }, + } +} + +func newFailedJobWithQuotaError(name string) *batchv1.Job { + return &batchv1.Job{ + ObjectMeta: metav1.ObjectMeta{ + Name: name, + Namespace: "openshift-sre-pruning", + }, + Status: batchv1.JobStatus{ + Conditions: []batchv1.JobCondition{ + { + Type: batchv1.JobFailed, + Status: corev1.ConditionTrue, + Message: "pods are forbidden: failed quota: must specify limits.cpu,limits.memory", + }, + }, + }, + } +} + +func newFailedJob(name, message string) *batchv1.Job { + return &batchv1.Job{ + ObjectMeta: metav1.ObjectMeta{ + Name: name, + Namespace: "openshift-sre-pruning", + }, + Status: batchv1.JobStatus{ + Conditions: []batchv1.JobCondition{ + { + Type: batchv1.JobFailed, + Status: corev1.ConditionTrue, + Message: message, + }, + }, + }, + } +} + +func newSuccessfulJob(name string) *batchv1.Job { + return &batchv1.Job{ + ObjectMeta: metav1.ObjectMeta{ + Name: name, + Namespace: "openshift-sre-pruning", + }, + Status: batchv1.JobStatus{ + Conditions: []batchv1.JobCondition{ + { + Type: batchv1.JobComplete, + Status: corev1.ConditionTrue, + }, + }, + }, + } +} + +func newEventWithQuotaError(name string) *corev1.Event { + return &corev1.Event{ + ObjectMeta: metav1.ObjectMeta{ + Name: name, + Namespace: "openshift-sre-pruning", + }, + Message: "Error creating: pods are forbidden: failed quota: ResourceQuota exceeded", + } +} + +func newEvent(name, message string) *corev1.Event { + return &corev1.Event{ + ObjectMeta: metav1.ObjectMeta{ + Name: name, + Namespace: "openshift-sre-pruning", + }, + Message: message, + } +} + +func newFakeClient(objs ...client.Object) (client.Client, error) { + s := scheme.Scheme + err := batchv1.AddToScheme(s) + if err != nil { + return nil, err + } + + client := fake.NewClientBuilder().WithScheme(s).WithObjects(objs...).Build() + return client, nil +} + +type clientImpl struct { + client.Client +} + +func (client clientImpl) Clean() error { + return nil +} + +func newTestInvestigation(testObjects ...client.Object) (Investigation, error) { + fakeClient, err := newFakeClient(testObjects...) + if err != nil { + return Investigation{}, err + } + + i := Investigation{ + kclient: clientImpl{fakeClient}, + notes: notewriter.New("testing", logging.RawLogger), + recommendations: investigationRecommendations{}, + } + return i, nil +} +