Skip to content

Commit 61ecf05

Browse files
authored
fix cluster restore and cert rotate (#209)
Signed-off-by: Francisco <[email protected]>
1 parent 83e207d commit 61ecf05

13 files changed

+321
-192
lines changed

docs/examples/.env.example

+35-32
Original file line numberDiff line numberDiff line change
@@ -1,34 +1,56 @@
11
# Framework config variables
2+
3+
### Variables that would need to change for different products,test cases,pem key ####
4+
### Required variables to be set by user ###
5+
6+
# Note: PRODUCT can be k3s or rke2
27
ENV_PRODUCT="{{PRODUCT}}"
38
ENV_TFVARS="{{PRODUCT}}".tfvars
4-
# Note: PRODUCT can be k3s or rke2
59

10+
# test pkg name # which is located on /entrypoint/
11+
TEST_DIR=upgradecluster
612

7-
#ACCESS_KEY_LOCAL="˜/aws-key.pem"
8-
ACCESS_KEY_LOCAL="~/aws-key.pem"
13+
# test tag name # only needed for tests pkg with multiple test cases.
14+
# can find example here - https://github.com/rancher/distros-test-framework/blob/83e207dcf2aa4964db881f87e55f1eb0b031887f/entrypoint/upgradecluster/upgrademanual_test.go#L1
15+
TEST_TAG=upgrademanual
916

1017

18+
# aws key path locally stored
19+
ACCESS_KEY_LOCAL="~/{key-name}.pem"
20+
21+
##### Variables that would not need to change ######
22+
### Optional variables to be set by user ###
1123

1224
###### Test runner config variables ########
1325
# image name #
14-
IMG_NAME=er
26+
IMG_NAME=test
1527

1628
# container name , default == distros #
1729
TAG_NAME=
1830

19-
# test pkg name #
20-
TEST_DIR=createcluster
21-
2231
# test state img name #
23-
TEST_STATE=2
24-
25-
# test tag name #
26-
TEST_TAG=upgrademanual
32+
TEST_STATE=
2733

2834
# log level could be "" = (INFO default), INFO, DEBUG or WARN #
29-
LOG_LEVEL=info
30-
###### Test runner config variables ########
35+
LOG_LEVEL=debug
3136

37+
# test state img name #
38+
TEST_STATE=
39+
40+
41+
####### custom tfvars override ###########
42+
INSTALL_VERSION=v1.30.2+k3s1 OR
43+
INSTALL_VERSION=v1.30.2+rke2r1
44+
#
45+
RKE2_CHANNEL=testing
46+
K3S_CHANNEL=testing
47+
NO_OF_SERVER_NODES=1
48+
NO_OF_WORKER_NODES=1
49+
SERVER_FLAGS=protect-kernel-defaults: true\\nselinux: true
50+
WORKER_FLAGS=protect-kernel-defaults: true\\nselinux: true
51+
ARCH=arm
52+
DATASTORE_TYPE=etcd
53+
####### custom vars tfvars override ###########
3254

3355

3456
####### Version bump test variables + cli flags ###########
@@ -46,22 +68,3 @@ DELETE_WORKLAOD=
4668
####### Version bump test variables ###########
4769

4870

49-
50-
####### custom tfvars override ###########
51-
INSTALL_VERSION=v1.30.2+k3s1 OR
52-
INSTALL_VERSION=v1.30.2+rke2r1
53-
54-
RKE2_CHANNEL=testing
55-
K3S_CHANNEL=testing
56-
NO_OF_SERVER_NODES=1
57-
NO_OF_WORKER_NODES=1
58-
SERVER_FLAGS=protect-kernel-defaults: true\\nselinux: true
59-
WORKER_FLAGS=protect-kernel-defaults: true\\nselinux: true
60-
VOLUME_SIZE=40
61-
NODE_OS=
62-
AWS_AMI=
63-
AWS_USER=
64-
INSTALL_MODE=
65-
DATASTORE_TYPE=
66-
VOLUME_SIZE=40
67-
####### custom vars tfvars override ###########

entrypoint/clusterrestore/clusterrestore_suite_test.go

+4-5
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,6 @@ package clusterrestore
22

33
import (
44
"flag"
5-
"fmt"
65
"os"
76
"strings"
87
"testing"
@@ -30,7 +29,8 @@ func TestMain(m *testing.M) {
3029
var err error
3130
flags = &customflag.ServiceFlag
3231
flag.Var(&flags.Destroy, "destroy", "Destroy cluster after test")
33-
flag.StringVar(&flags.S3Flags.Bucket, "s3Bucket", "distros_qa", "s3 bucket to store snapshots")
32+
flag.Var(&flags.Channel, "channel", "channel to use on install")
33+
flag.StringVar(&flags.S3Flags.Bucket, "s3Bucket", "distrosqa", "s3 bucket to store snapshots")
3434
flag.StringVar(&flags.S3Flags.Folder, "s3Folder", "snapshots", "s3 folder to store snapshots")
3535
flag.Parse()
3636

@@ -94,7 +94,6 @@ func FailWithReport(message string, callerSkip ...int) {
9494

9595
func checkUnsupportedFlags() {
9696
serverFlags := os.Getenv("server_flags")
97-
fmt.Printf("server flags: %s\n", serverFlags)
9897

9998
if strings.Contains(serverFlags, "profile") ||
10099
strings.Contains(serverFlags, "selinux") ||
@@ -108,8 +107,8 @@ func checkUnsupportedFlags() {
108107
func cleanS3Snapshot() {
109108
shared.LogLevel("info", "cleaning s3 snapshots")
110109

111-
err := awsClient.DeleteS3Object(customflag.ServiceFlag.S3Flags.Bucket, "on-demand-ip")
110+
err := awsClient.DeleteS3Object(customflag.ServiceFlag.S3Flags.Bucket, customflag.ServiceFlag.S3Flags.Folder)
112111
if err != nil {
113-
shared.LogLevel("error", "error deleting object: %s", err)
112+
shared.LogLevel("error", "error deleting object: %v", err)
114113
}
115114
}

pkg/aws/s3.go

+4-4
Original file line numberDiff line numberDiff line change
@@ -23,10 +23,10 @@ func (c Client) GetObjects(bucket string) ([]*s3.Object, error) {
2323
return output.Contents, nil
2424
}
2525

26-
func (c Client) DeleteS3Object(bucket, prefix string) error {
26+
func (c Client) DeleteS3Object(bucket, folder string) error {
2727
input := &s3.ListObjectsV2Input{
2828
Bucket: aws.String(bucket),
29-
Prefix: aws.String(prefix),
29+
Prefix: aws.String(folder),
3030
}
3131

3232
objList, err := c.s3.ListObjectsV2(input)
@@ -35,14 +35,14 @@ func (c Client) DeleteS3Object(bucket, prefix string) error {
3535
}
3636

3737
if len(objList.Contents) == 0 {
38-
return fmt.Errorf("no objects found with prefix %s", prefix)
38+
return fmt.Errorf("no objects found with prefix %s", *input.Prefix)
3939
}
4040

4141
sort.Slice(objList.Contents, func(i, j int) bool {
4242
return objList.Contents[i].LastModified.After(*objList.Contents[j].LastModified)
4343
})
44-
key := *objList.Contents[0].Key
4544

45+
key := aws.StringValue(objList.Contents[0].Key)
4646
delInput := &s3.DeleteObjectInput{
4747
Bucket: aws.String(bucket),
4848
Key: aws.String(key),

pkg/testcase/certrotate.go

+53-16
Original file line numberDiff line numberDiff line change
@@ -10,27 +10,64 @@ import (
1010
)
1111

1212
func TestCertRotate(cluster *shared.Cluster) {
13-
certRotate(cluster.Config.Product, cluster.ServerIPs)
14-
15-
ip, manageError := shared.ManageService(cluster.Config.Product, "restart", "agent", cluster.AgentIPs)
16-
Expect(manageError).NotTo(HaveOccurred(), "error restarting agent node ip"+ip)
13+
ms := shared.NewManageService(5, 5)
14+
certRotate(ms, cluster.Config.Product, cluster.ServerIPs)
15+
16+
actions := []shared.ServiceAction{
17+
{Service: cluster.Config.Product,
18+
Action: "restart",
19+
NodeType: "agent",
20+
},
21+
{
22+
Service: cluster.Config.Product,
23+
Action: "status",
24+
NodeType: "agent",
25+
},
26+
}
27+
for _, agentIP := range cluster.AgentIPs {
28+
output, err := ms.ManageService(agentIP, actions)
29+
if output != "" {
30+
Expect(output).To(ContainSubstring("active "), fmt.Sprintf("error restarting %s agent service for node ip: %s",
31+
cluster.Config.Product, agentIP))
32+
}
33+
Expect(err).NotTo(HaveOccurred(), fmt.Sprintf("error restarting %s service on %s", cluster.Config.Product, agentIP))
34+
}
1735

1836
verifyTLSDirContent(cluster.Config.Product, cluster.ServerIPs)
1937
}
2038

2139
// certRotate Rotate certificate for etcd only and cp only nodes.
22-
func certRotate(product string, ips []string) {
23-
ip, stopError := shared.ManageService(product, "stop", "server", ips)
24-
Expect(stopError).NotTo(HaveOccurred(),
25-
fmt.Sprintf("error stopping %s service for node ip: %s", product, ip))
26-
27-
ip, rotateError := shared.CertRotate(product, ips)
28-
Expect(rotateError).NotTo(HaveOccurred(),
29-
fmt.Sprintf("error running certificate rotate for %s service on %s", product, ip))
30-
31-
ip, startError := shared.ManageService(product, "start", "server", ips)
32-
Expect(startError).NotTo(HaveOccurred(),
33-
fmt.Sprintf("error starting %s service for node ip: %s", product, ip))
40+
func certRotate(ms *shared.ManageService, product string, ips []string) {
41+
for _, ip := range ips {
42+
actions := []shared.ServiceAction{
43+
{
44+
Service: product,
45+
Action: "stop",
46+
NodeType: "server",
47+
},
48+
{
49+
Service: product,
50+
Action: "rotate",
51+
},
52+
{
53+
Service: product,
54+
Action: "start",
55+
NodeType: "server",
56+
},
57+
{
58+
Service: product,
59+
Action: "status",
60+
NodeType: "server",
61+
},
62+
}
63+
64+
output, err := ms.ManageService(ip, actions)
65+
if output != "" {
66+
Expect(output).To(ContainSubstring("active "),
67+
fmt.Sprintf("error restarting %s service for node ip: %s", product, ip))
68+
}
69+
Expect(err).NotTo(HaveOccurred(), fmt.Sprintf("error rotating certificate for %s service on %s", product, ip))
70+
}
3471
}
3572

3673
// verifyIdenticalFiles Verify the actual and expected identical file lists match.

pkg/testcase/clusterreset.go

+25-10
Original file line numberDiff line numberDiff line change
@@ -12,7 +12,8 @@ func TestClusterReset(cluster *shared.Cluster) {
1212
killall(cluster)
1313
shared.LogLevel("info", "%s-service killed", cluster.Config.Product)
1414

15-
stopServer(cluster)
15+
ms := shared.NewManageService(5, 5)
16+
stopServer(cluster, ms)
1617
shared.LogLevel("info", "%s-service stopped", cluster.Config.Product)
1718

1819
productLocationCmd, findErr := shared.FindPath(cluster.Config.Product, cluster.ServerIPs[0])
@@ -27,7 +28,7 @@ func TestClusterReset(cluster *shared.Cluster) {
2728
deleteDataDirectories(cluster)
2829
shared.LogLevel("info", "data directories deleted")
2930

30-
restartServer(cluster)
31+
restartServer(cluster, ms)
3132
shared.LogLevel("info", "%s-service restarted", cluster.Config.Product)
3233
}
3334

@@ -68,17 +69,22 @@ func killall(cluster *shared.Cluster) {
6869
Expect(res).To(SatisfyAny(ContainSubstring("timed out"), ContainSubstring("refused")))
6970
}
7071

71-
func stopServer(cluster *shared.Cluster) {
72-
_, stopErr := shared.ManageService(cluster.Config.Product, "stop", "server", []string{cluster.ServerIPs[0]})
72+
func stopServer(cluster *shared.Cluster, ms *shared.ManageService) {
73+
action := shared.ServiceAction{
74+
Service: cluster.Config.Product,
75+
Action: "stop",
76+
NodeType: "server",
77+
}
78+
_, stopErr := ms.ManageService(cluster.ServerIPs[0], []shared.ServiceAction{action})
7379
Expect(stopErr).NotTo(HaveOccurred())
7480

75-
// due to the stop command, the service will be inactive, so ssh command to node will fail.
81+
// due to the stop command, this should fail.
7682
cmd := fmt.Sprintf("sudo systemctl status %s-server", cluster.Config.Product)
7783
_, err := shared.RunCommandOnNode(cmd, cluster.ServerIPs[0])
7884
Expect(err).To(HaveOccurred())
7985
}
8086

81-
func restartServer(cluster *shared.Cluster) {
87+
func restartServer(cluster *shared.Cluster, ms *shared.ManageService) {
8288
var startFirst []string
8389
var startLast []string
8490

@@ -91,11 +97,20 @@ func restartServer(cluster *shared.Cluster) {
9197
startLast = append(startLast, serverIP)
9298
}
9399

94-
_, startErr := shared.ManageService(cluster.Config.Product, "restart", "server", startFirst)
95-
Expect(startErr).NotTo(HaveOccurred())
100+
action := shared.ServiceAction{
101+
Service: cluster.Config.Product,
102+
Action: "restart",
103+
NodeType: "server",
104+
}
105+
for _, ip := range startFirst {
106+
_, startErr := ms.ManageService(ip, []shared.ServiceAction{action})
107+
Expect(startErr).NotTo(HaveOccurred())
108+
}
96109

97-
_, startLastErr := shared.ManageService(cluster.Config.Product, "restart", "server", startLast)
98-
Expect(startLastErr).NotTo(HaveOccurred())
110+
for _, ip := range startLast {
111+
_, startLastErr := ms.ManageService(ip, []shared.ServiceAction{action})
112+
Expect(startLastErr).NotTo(HaveOccurred())
113+
}
99114
}
100115

101116
func deleteDataDirectories(cluster *shared.Cluster) {

0 commit comments

Comments
 (0)