Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Sonobuoy Conformance Results #204

Closed
wants to merge 2 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -11,3 +11,5 @@ workloads/*/dynamic-ingressroute.yaml
/config/*.env.*
/config/*.tfvars
tmp/
entrypoint/conformance/my-sonobuoy-plugins
entrypoint/conformance/*.tar.gz
4 changes: 4 additions & 0 deletions Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -71,6 +71,9 @@ test-upgrade-manual:
test-upgrade-node-replacement:
@go test -timeout=120m -v -tags=upgradereplacement -count=1 ./entrypoint/upgradecluster/... -installVersionOrCommit ${INSTALL_VERSION_OR_COMMIT} -channel ${CHANNEL}

test-run-sonobuoy:
@go test -timeout=170m -v -count=1 ./entrypoint/conformance/... $(if ${SONOBUOY_VERSION},-sonobuoyVersion ${SONOBUOY_VERSION}) --ginkgo.timeout=170m

test-create-mixedos:
@go test -timeout=45m -v -count=1 ./entrypoint/mixedoscluster/... $(if ${SONOBUOY_VERSION},-sonobuoyVersion ${SONOBUOY_VERSION})

Expand Down Expand Up @@ -125,3 +128,4 @@ go-check:
shell-check:
@shellcheck modules/airgap/setup/*.sh
@shellcheck modules/ipv6only/scripts/*.sh
@shellcheck scripts/*.sh
19 changes: 18 additions & 1 deletion docs/testing.md
Original file line number Diff line number Diff line change
Expand Up @@ -284,4 +284,21 @@ S3_FOLDER=snapshots
### Not supported/implemented currently for cluster restore:
- Hardened Cluster Setup
- ExternalDB Setup
- Selinux Setup
- Selinux Setup

## Validating Conformance Tests with Sonobuoy
- Please note that the sonobuoy version has not been updated for a year and the functionality of sonobuoy is degrading with minor versions of k8s.
- Full conformance tests done for patch validations should be 3 servers 1 agent minimum.
- You can use the make file command `make test-run-sonobuoy` to run the conformance tests.
- Additionally you can use `go test -timeout=140m -v -count=1 ./entrypoint/conformance/... --ginkgo.timeout=140m` you must extend the ginkgo timeout in addition to the go test timeout
- Required vars in `*.tfvars` file minimum conformance configuration:
```
no_of_server_nodes = 1
no_of_worker_nodes = 1
```
- sonobuoy's output is becoming unreliable for status checks observe remaining count incorrect at 404.
sono status
PLUGIN STATUS RESULT COUNT PROGRESS
e2e complete passed 1 Passed: 0, Failed: 0, Remaining:404
systemd-logs complete passed 2
Sonobuoy has completed. Use `sonobuoy retrieve` to get results
99 changes: 99 additions & 0 deletions entrypoint/conformance/conformance_suite_test.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,99 @@
package sonobuoyconformance

import (
"flag"
"os"
"strconv"
"strings"
"testing"

"github.com/rancher/distros-test-framework/config"
"github.com/rancher/distros-test-framework/pkg/customflag"
"github.com/rancher/distros-test-framework/pkg/qase"
"github.com/rancher/distros-test-framework/shared"

. "github.com/onsi/ginkgo/v2"
. "github.com/onsi/gomega"
)

var (
qaseReport = os.Getenv("REPORT_TO_QASE")
kubeconfig string
cluster *shared.Cluster
cfg *config.Env
err error
)

func TestMain(m *testing.M) {
flag.StringVar(&customflag.ServiceFlag.External.SonobuoyVersion, "sonobuoyVersion", "0.57.2", "Sonobuoy binary version")
flag.Var(&customflag.ServiceFlag.Destroy, "destroy", "Destroy cluster after test")
flag.Parse()

verifyClusterNodes()

cfg, err = config.AddEnv()
if err != nil {
shared.LogLevel("error", "error adding env vars: %w\n", err)
os.Exit(1)
}

kubeconfig = os.Getenv("KUBE_CONFIG")
if kubeconfig == "" {
// gets a cluster from terraform.
cluster = shared.ClusterConfig(cfg)
} else {
// gets a cluster from kubeconfig.
cluster = shared.KubeConfigCluster(kubeconfig)
}

os.Exit(m.Run())
os.Exit(1)
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

this code line is unreachable

}

func TestConformance(t *testing.T) {
RegisterFailHandler(Fail)

RunSpecs(t, "Run Conformance Suite")
}

var _ = ReportAfterSuite("Conformance Suite", func(report Report) {

if strings.ToLower(qaseReport) == "true" {
qaseClient, err := qase.AddQase()
Expect(err).ToNot(HaveOccurred(), "error adding qase")

qaseClient.SpecReportTestResults(qaseClient.Ctx, &report, cfg.InstallVersion)
} else {
shared.LogLevel("info", "Qase reporting is not enabled")
}
})

var _ = AfterSuite(func() {
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

this test will have a qase automatic report? if yes, we need to add in here like other suits

if customflag.ServiceFlag.Destroy {
status, err := shared.DestroyCluster(cfg)
Expect(err).NotTo(HaveOccurred())
Expect(status).To(Equal("cluster destroyed"))
}
})

func verifyClusterNodes() {
// if re-running locally the env variables are not set after cleanup
shared.LogLevel("info", "verying cluster configuration matches minimum requirements for conformance tests")
serverNum, err := strconv.Atoi(os.Getenv("no_of_server_nodes"))
if err != nil {
shared.LogLevel("error", "error converting no_of_server_nodes to int: %w", err)
os.Exit(1)
}

agentNum, _ := strconv.Atoi(os.Getenv("no_of_agent_nodes"))
if err != nil {
shared.LogLevel("error", "error converting no_of_agent_nodes to int: %w", err)
os.Exit(1)
}

if serverNum < 1 && agentNum < 1 {
shared.LogLevel("error", "%s", "cluster must at least consist of 1 server and 1 agent")
os.Exit(1)
}

}
44 changes: 44 additions & 0 deletions entrypoint/conformance/sonobuoy_test.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,44 @@
package sonobuoyconformance

import (
"fmt"

"github.com/rancher/distros-test-framework/pkg/assert"
"github.com/rancher/distros-test-framework/pkg/testcase"

. "github.com/onsi/ginkgo/v2"
)

var _ = Describe("Sonobuoy Conformance Tests...", func() {

It("Starts Up with no issues", func() {
testcase.TestBuildCluster(cluster)
})

It("Validates Node", func() {
testcase.TestNodeStatus(
cluster,
assert.NodeAssertReadyStatus(),
nil,
)
})

It("Validate Pods", func() {
testcase.TestPodStatus(
cluster,
assert.PodAssertRestart(),
assert.PodAssertReady())
})

It("Validates the releases conformance with upstream requirements", func() {
testcase.ConformanceTest("certified-conformance")
})
})

var _ = AfterEach(func() {
if CurrentSpecReport().Failed() {
fmt.Printf("\nFAILED! %s\n\n", CurrentSpecReport().FullText())
} else {
fmt.Printf("\nPASSED! %s\n\n", CurrentSpecReport().FullText())
}
})
40 changes: 10 additions & 30 deletions pkg/testcase/cluster.go
Original file line number Diff line number Diff line change
Expand Up @@ -4,11 +4,9 @@
"fmt"
"strings"

"github.com/rancher/distros-test-framework/pkg/customflag"
"github.com/rancher/distros-test-framework/pkg/testcase/support"
"github.com/rancher/distros-test-framework/shared"

. "github.com/onsi/ginkgo/v2"
. "github.com/onsi/gomega"
)

Expand Down Expand Up @@ -50,35 +48,17 @@
}
}

// TestSonobuoyMixedOS runs sonobuoy tests for mixed os cluster (linux + windows) node.
func TestSonobuoyMixedOS(deleteWorkload bool) {
sonobuoyVersion := customflag.ServiceFlag.External.SonobuoyVersion
err := shared.SonobuoyMixedOS("install", sonobuoyVersion)
Expect(err).NotTo(HaveOccurred())
func checkAndPrintAgentNodeIPs(agentNum int, agentIPs []string, isWindows bool) {

Check failure on line 51 in pkg/testcase/cluster.go

View workflow job for this annotation

GitHub Actions / Build and Lint

func `checkAndPrintAgentNodeIPs` is unused (unused)
info := "Agent Node IPs:"

cmd := "sonobuoy run --kubeconfig=" + shared.KubeConfigFile +
" --plugin my-sonobuoy-plugins/mixed-workload-e2e/mixed-workload-e2e.yaml" +
" --aggregator-node-selector kubernetes.io/os:linux --wait"
res, err := shared.RunCommandHost(cmd)
Expect(err).NotTo(HaveOccurred(), "failed output: "+res)

cmd = "sonobuoy retrieve --kubeconfig=" + shared.KubeConfigFile
testResultTar, err := shared.RunCommandHost(cmd)
Expect(err).NotTo(HaveOccurred(), "failed cmd: "+cmd)

cmd = "sonobuoy results " + testResultTar
res, err = shared.RunCommandHost(cmd)
Expect(err).NotTo(HaveOccurred(), "failed cmd: "+cmd)
Expect(res).Should(ContainSubstring("Plugin: mixed-workload-e2e\nStatus: passed\n"))
if isWindows {
info = "Windows " + info
}

if deleteWorkload {
cmd = "sonobuoy delete --all --wait --kubeconfig=" + shared.KubeConfigFile
_, err = shared.RunCommandHost(cmd)
Expect(err).NotTo(HaveOccurred(), "failed cmd: "+cmd)
err = shared.SonobuoyMixedOS("delete", sonobuoyVersion)
if err != nil {
GinkgoT().Errorf("error: %v", err)
return
}
if agentNum > 0 {
Expect(agentIPs).ShouldNot(BeEmpty())
shared.LogLevel("info", info+" %v", agentIPs)
} else {
Expect(agentIPs).Should(BeEmpty())
}
}
138 changes: 138 additions & 0 deletions pkg/testcase/conformance.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,138 @@
package testcase

import (
"os"
"strings"

"github.com/rancher/distros-test-framework/shared"

. "github.com/onsi/ginkgo/v2"
. "github.com/onsi/gomega"
)

// TestSonobuoyMixedOS runs sonobuoy tests for mixed os cluster (linux + windows) node.
func TestSonobuoyMixedOS(deleteWorkload bool) {
installConformanceBinary()

cmd := "sonobuoy run --kubeconfig=" + shared.KubeConfigFile +
" --plugin my-sonobuoy-plugins/mixed-workload-e2e/mixed-workload-e2e.yaml" +
" --aggregator-node-selector kubernetes.io/os:linux --wait"
res, err := shared.RunCommandHost(cmd)
Expect(err).NotTo(HaveOccurred(), "failed output: "+res)

cmd = "sonobuoy retrieve --kubeconfig=" + shared.KubeConfigFile
testResultTar, err := shared.RunCommandHost(cmd)
Expect(err).NotTo(HaveOccurred(), "failed cmd: "+cmd)

cmd = "sonobuoy results " + testResultTar
res, err = shared.RunCommandHost(cmd)
Expect(err).NotTo(HaveOccurred(), "failed cmd: "+cmd)
Expect(res).Should(ContainSubstring("Plugin: mixed-workload-e2e\nStatus: passed\n"))
Comment on lines +23 to +30
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Can probably re-use getResults and parseResults func here


if deleteWorkload {
cmd = "sonobuoy delete --all --wait --kubeconfig=" + shared.KubeConfigFile
_, err = shared.RunCommandHost(cmd)
Expect(err).NotTo(HaveOccurred(), "failed cmd: "+cmd)
err = shared.InstallSonobuoy("delete")
if err != nil {
GinkgoT().Errorf("error: %v", err)
return
}
}
Comment on lines +32 to +41
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Can probably re-use cleanupTests func here

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Initially some months ago I specifically didn't want to change the existing mixedOS test functionality, but good call out now.

}

func ConformanceTest(testName string) {
installConformanceBinary()
launchSonobuoyTests(testName)
checkStatus()
testResultTar := getResults()
shared.LogLevel("info", "%s", "testResultTar: "+testResultTar)
rerunFailedTests(testResultTar)
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

every running we are waiting to have the need to re-run failed ones?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It is not a long wait to do this and this automatically resolves any flakey tests except for the cilium tests which are known/expected conformance failures at this time. It makes more sense to have sonobuoy retry any tests that do fail which someone would have to do anyways in response to conformance tests failing. This way just ensures that whomever is responding to failed conformance tests can know they were already retried in case they were flakey.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

yeah sure , but cant we just simply on the get results() parse the res check if there is failed ones there and if yes, then we call rerun ?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This essentially performs identically to what you're recommending although not in the same steps, instead of handling the strings emitted by the results sonobuoy itself already parses those results which would add code complexity. I'm voting we leave the logic in sonobuoy for this type of conformance test as it's likely more conclusive.

Copy link
Contributor

@fmoral2 fmoral2 Jan 17, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

sorry i am not understanding.....

what am suggesting is only re-run if actually find failed ones

another thing is for instance, you are returning on line 106 if it is cilium right? why are you waiting a function call rerunFailedTests() go inside it, just to check if it is cilium and if yes , return back?
makes no much sense u get it? u should check before and if its cilium dont even go inside there

u are always calling re-run but in the real usage it will be only few times needed , i get the idea "oh but if there is a failure ( which if some frequence occur ) we are already taking care of that in the logic ( because u are always re-running, execpt for cilium)

so my suggestion is , your code should do what needs to do when needs to


getResults() ---  checkRessults() --- failed ones ?  yes, its cilium ? dont re-run  

getResults() ---  checkRessults() --- failed ones ?   yes ,   its not cilium ? re-run 
getResults() ---  checkRessults() --- failed ones ? no, dont re-run 
  • this way your code is clear, dont spent unnecessary time and resources and its more maintainable

testResultTar = getResults()
shared.LogLevel("info", "%s", "testResultTar: "+testResultTar)
parseResults(testResultTar)
cleanupTests()
}

func installConformanceBinary() {
shared.LogLevel("info", "installing sonobuoy binary")
err := shared.InstallSonobuoy("install")
Expect(err).NotTo(HaveOccurred())
}

func launchSonobuoyTests(testMode string) {
shared.LogLevel("info", "checking namespace existence")
cmds := "kubectl get namespace sonobuoy --kubeconfig=" + shared.KubeConfigFile
res, _ := shared.RunCommandHost(cmds)

if strings.Contains(res, "Active") {
shared.LogLevel("info", "%s", "sonobuoy namespace is active, waiting for it to complete")
return
}

if strings.Contains(res, "Error from server (NotFound): namespaces \"sonobuoy\" not found") {
cmd := "sonobuoy run --kubeconfig=" + shared.KubeConfigFile +
" --mode=" + testMode + " --kubernetes-version=" + shared.ExtractKubeImageVersion()
_, err := shared.RunCommandHost(cmd)
Expect(err).NotTo(HaveOccurred())
}
}

func checkStatus() {
shared.LogLevel("info", "checking status of running tests")
cmd := "sonobuoy status --kubeconfig=" + shared.KubeConfigFile
Eventually(func() string {
res, err := shared.RunCommandHost(cmd)
Expect(err).NotTo(HaveOccurred())
return res
}, "170m", "10m").Should(ContainSubstring("Sonobuoy has completed"), "timed out waiting for sonobuoy")
}

func getResults() string {
shared.LogLevel("info", "getting sonobuoy results")
cmd := "sonobuoy retrieve --kubeconfig=" + shared.KubeConfigFile
res, err := shared.RunCommandHost(cmd)
Expect(err).NotTo(HaveOccurred())

return res
}

func rerunFailedTests(testResultTar string) {
ciliumExpectedFailures := `
[sig-network] Services should serve endpoints on same port and different protocols
Services should be able to switch session affinity for service with type clusterIP
Services should have session affinity work for service with type clusterIP`

if strings.Contains(os.Getenv("cni"), "cilium") {
shared.LogLevel("info", "Cilium has known issues with conformance tests, skipping re-run")
shared.LogLevel("info", "ciliumExpectedFailures: %s", ciliumExpectedFailures)

return
}

shared.LogLevel("info", "re-running tests that failed from previous run")

cmd := "sonobuoy run --rerun-failed=" + testResultTar + " --kubeconfig=" + shared.KubeConfigFile +
"--kubernetes-version=" + shared.ExtractKubeImageVersion()

res, err := shared.RunCommandHost(cmd)
Expect(err).To(HaveOccurred(), "failed cmd: "+cmd)
Expect(res).Should(ContainSubstring("no tests failed for plugin"))
}

func parseResults(testResultTar string) {
shared.LogLevel("info", "parsing sonobuoy results")
cmd := "sonobuoy results " + testResultTar
res, err := shared.RunCommandHost(cmd)
Expect(err).NotTo(HaveOccurred(), "failed cmd: "+cmd)
Expect(res).Should(ContainSubstring("Status: passed"))
shared.LogLevel("info", "%s", "sonobuoy results: "+res)
}

func cleanupTests() {
shared.LogLevel("info", "cleaning up cluster conformance tests and deleting sonobuoy namespace")
cmd := "sonobuoy delete --all --wait --kubeconfig=" + shared.KubeConfigFile
res, err := shared.RunCommandHost(cmd)
Expect(err).NotTo(HaveOccurred(), "failed cmd: "+cmd)
Expect(res).Should(ContainSubstring("deleted"))
}
Loading
Loading