Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[Stretch cluster] Support zone unaware apps for zone shutdown tests #11000

Open
wants to merge 5 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
104 changes: 104 additions & 0 deletions ocs_ci/helpers/helpers.py
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,7 @@
query_nb_db_psql_version,
)

from ocs_ci.ocs.node import get_worker_nodes
from ocs_ci.ocs import constants, defaults, node, ocp, exceptions
from ocs_ci.ocs.exceptions import (
CommandFailed,
Expand Down Expand Up @@ -5744,3 +5745,106 @@ def verify_reclaimspacecronjob_suspend_state_for_pvc(pvc_obj):

logger.info(f"ReclaimSpace operation is enabled for PVC '{pvc_obj.name}'")
return False


def get_rbd_daemonset_csi_addons_node_object(node):
"""
Gets rdb daemonset CSI addons node data

Args:
node (str): Name of the node

Returns:
dict: CSI addons node object info

"""
namespace = config.ENV_DATA["cluster_namespace"]
csi_addons_node = OCP(kind=constants.CSI_ADDONS_NODE_KIND, namespace=namespace)
csi_addons_node_data = csi_addons_node.get(
resource_name=f"{node}-{namespace}-daemonset-csi-rbdplugin"
)
return csi_addons_node_data


def create_network_fence_class():
"""
Create NetworkFenceClass CR and verify Ips are populated
in respective CsiAddonsNode objects

"""

logger.info("Creating NetworkFenceClass")
network_fence_class_dict = templating.load_yaml(constants.NETWORK_FENCE_CLASS_CRD)
network_fence_class_obj = create_resource(**network_fence_class_dict)
if network_fence_class_obj.ocp.get(
resource_name=network_fence_class_obj.name, dont_raise=True
):
logger.info(
f"NetworkFenceClass {network_fence_class_obj.name} created successfully"
)

logger.info("Verifying CsiAddonsNode object for CSI RBD daemonset")
all_nodes = get_worker_nodes()

for node_name in all_nodes:
cidrs = get_rbd_daemonset_csi_addons_node_object(node_name)["status"][
"networkFenceClientStatus"
][0]["ClientDetails"][0]["cidrs"]
assert len(cidrs) == 1, "No cidrs are populated to CSI Addons node object"
logger.info(f"Cidr: {cidrs[0]} populated in {node_name} CSI addons node object")


def create_network_fence(node_name, cidr):
"""
Create NetworkFence for the node

Args:
node_name (str): Name of the node
cidr (str): cidr

Returns:
OCS: NetworkFence object

"""
logger.info("Creating NetworkFence")
network_fence_dict = templating.load_yaml(constants.NETWORK_FENCE_CRD)
network_fence_dict["metadata"]["name"] = node_name
network_fence_dict["spec"]["cidrs"][0] = cidr
network_fence_obj = create_resource(**network_fence_dict)
if network_fence_obj.ocp.get(resource_name=network_fence_obj.name, dont_raise=True):
logger.info(
f"NetworkFence {network_fence_obj.name} for node {node_name} created successfully"
)
return network_fence_obj


def unfence_node(node_name, delete=False):
"""
Un-fence node

Args:
node_name (str): Name of the node
delete (bool): If True, delete the network fence object

"""

network_fence_obj = OCP(
kind=constants.NETWORK_FENCE, namespace=config.ENV_DATA["cluster_namespace"]
)
if network_fence_obj.get(resource_name=node_name, dont_raise=True):
network_fence_obj.patch(
resource_name=node_name,
params='{"spec":{"fenceState":"Unfenced"}}',
format_type="merge",
)
assert (
network_fence_obj.get(resource_name=node_name)["spec"]["fenceState"]
!= "Fenced"
), f"{node_name} doesnt seem to be unfenced"
logger.info(f"Unfenced node {node_name} successfully!")

if delete:
network_fence_obj.delete()
logger.info(f"Deleted network fence object for node {node_name}")
else:
logger.info(f"No networkfence found for node {node_name}")
13 changes: 13 additions & 0 deletions ocs_ci/ocs/constants.py
Original file line number Diff line number Diff line change
Expand Up @@ -241,6 +241,8 @@
IMAGECONTENTSOURCEPOLICY_KIND = "ImageContentSourcePolicy"
NOOBAA_ACCOUNT = "NoobaaAccount"
EXTERNAL_CLUSTER_SCRIPT_CONFIG = "rook-ceph-external-cluster-script-config"
NETWORK_FENCE_CLASS = "NetworkFenceClass"
NETWORK_FENCE = "NetworkFence"

# Provisioners
AWS_EFS_PROVISIONER = "openshift.org/aws-efs"
Expand Down Expand Up @@ -302,6 +304,9 @@
RAM = "rss"
VIRT = "vms"

ODF_NETWORK_FENCE_CLASS = "odf-networkfenceclass"
CSI_ADDONS_NODE_KIND = "CSIAddonsNode"

# cluster types
MS_CONSUMER_TYPE = "consumer"
MS_PROVIDER_TYPE = "provider"
Expand Down Expand Up @@ -2425,6 +2430,12 @@
LOGWRITER_CEPHFS_WRITER = os.path.join(LOGWRITER_DIR, "cephfs.logwriter.yaml")
LOGWRITER_STS_PATH = os.path.join(LOGWRITER_DIR, "logwriter.rbd.yaml")

# Network Fence CRDs
NETWORK_FENCE_CLASS_CRD = os.path.join(
TEMPLATE_DIR, "network", "network-fence-class.yaml"
)
NETWORK_FENCE_CRD = os.path.join(TEMPLATE_DIR, "network", "network-fence.yaml")

# MCG namespace constants
MCG_NS_AWS_ENDPOINT = "https://s3.amazonaws.com"
MCG_NS_AZURE_ENDPOINT = "https://blob.core.windows.net"
Expand Down Expand Up @@ -3011,6 +3022,8 @@
f"{ARBITER_ZONE}{DATA_ZONE_1}-{DATA_ZONE_1}{DATA_ZONE_2}"
)

NODE_OUT_OF_SERVICE_TAINT = "node.kubernetes.io/out-of-service=nodeshutdown:NoExecute"

# Logwriter workload labels

LOGWRITER_CEPHFS_LABEL = "app=logwriter-cephfs"
Expand Down
2 changes: 1 addition & 1 deletion ocs_ci/ocs/resources/stretchcluster.py
Original file line number Diff line number Diff line change
Expand Up @@ -319,7 +319,7 @@ def get_logfile_map(self, label):
self.logfile_map[label][0] = list(set(self.logfile_map[label][0]))
logger.info(self.logfile_map[label][0])

@retry(UnexpectedBehaviour, tries=6, delay=5)
@retry(UnexpectedBehaviour, tries=8, delay=5)
def get_logwriter_reader_pods(
self,
label,
Expand Down
10 changes: 10 additions & 0 deletions ocs_ci/templates/network/network-fence-class.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
apiVersion: csiaddons.openshift.io/v1alpha1
kind: NetworkFenceClass
metadata:
name: odf-networkfenceclass
spec:
provisioner: openshift-storage.rbd.csi.ceph.com
parameters:
clusterID: openshift-storage
csiaddons.openshift.io/networkfence-secret-name: rook-csi-rbd-node
csiaddons.openshift.io/networkfence-secret-namespace: openshift-storage
14 changes: 14 additions & 0 deletions ocs_ci/templates/network/network-fence.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
apiVersion: csiaddons.openshift.io/v1alpha1
kind: NetworkFence
metadata:
name: <openshift-node-name>
spec:
cidrs:
- <ip/32>
driver: openshift-storage.rbd.csi.ceph.com
fenceState: Fenced
parameters:
clusterID: openshift-storage
secret:
name: rook-csi-rbd-provisioner
namespace: openshift-storage
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@ metadata:
spec:
completions: 6
parallelism: 6
backoffLimit: 10
completionMode: Indexed
template:
metadata:
Expand Down
91 changes: 72 additions & 19 deletions tests/conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -186,6 +186,7 @@
get_current_test_name,
modify_deployment_replica_count,
modify_statefulset_replica_count,
create_network_fence_class,
)
from ocs_ci.ocs.ceph_debug import CephObjectStoreTool, MonStoreTool, RookCephPlugin
from ocs_ci.ocs.bucket_utils import get_rgw_restart_counts
Expand Down Expand Up @@ -7910,7 +7911,7 @@ def setup_logwriter_workload(request, teardown_factory):

"""

def factory(pvc, logwriter_path):
def factory(pvc, logwriter_path, zone_aware=True):
"""
Args:
pvc (PVC): PVC object
Expand All @@ -7929,6 +7930,10 @@ def factory(pvc, logwriter_path):
dc_data["spec"]["template"]["spec"]["volumes"][0]["persistentVolumeClaim"][
"claimName"
] = pvc.name

if not zone_aware:
dc_data["spec"]["template"]["spec"].pop("topologySpreadConstraints")

logwriter_dc = helpers.create_resource(**dc_data)
teardown_factory(logwriter_dc)

Expand Down Expand Up @@ -7963,7 +7968,7 @@ def logreader_workload_factory(request, teardown_factory):


def setup_logreader_workload(request, teardown_factory):
def factory(pvc, logreader_path, duration=30):
def factory(pvc, logreader_path, duration=30, zone_aware=True):
"""
Args:
pvc (PVC): PVC object
Expand All @@ -7988,6 +7993,10 @@ def factory(pvc, logreader_path, duration=30):
job_data["spec"]["template"]["spec"]["containers"][0]["command"][
2
] = f"/opt/logreader.py -t {duration} *.log -d"

if not zone_aware:
job_data["spec"]["template"]["spec"].pop("topologySpreadConstraints")

logreader_job = helpers.create_resource(**job_data)
teardown_factory(logreader_job)

Expand Down Expand Up @@ -8063,7 +8072,7 @@ def setup_logwriter_cephfs_workload(

"""

def factory(read_duration=30):
def factory(read_duration=30, **kwargs):
"""
Args:
read_duration (int): Time duration in minutes
Expand All @@ -8078,10 +8087,10 @@ def factory(read_duration=30):
project_name=setup_stretch_cluster_project
)
logwriter_workload = logwriter_workload_factory(
pvc=pvc, logwriter_path=logwriter_path
pvc=pvc, logwriter_path=logwriter_path, **kwargs
)
logreader_workload = logreader_workload_factory(
pvc=pvc, logreader_path=logreader_path, duration=read_duration
pvc=pvc, logreader_path=logreader_path, duration=read_duration, **kwargs
)
return logwriter_workload, logreader_workload

Expand Down Expand Up @@ -8117,22 +8126,31 @@ def setup_logwriter_rbd_workload(

"""

logwriter_sts_path = constants.LOGWRITER_STS_PATH
sts_data = templating.load_yaml(logwriter_sts_path)
sts_data["metadata"]["namespace"] = setup_stretch_cluster_project.namespace
logwriter_sts = helpers.create_resource(**sts_data)
teardown_factory(logwriter_sts)
logwriter_sts_pods = [
pod["metadata"]["name"]
for pod in get_pods_having_label(
label="app=logwriter-rbd", namespace=setup_stretch_cluster_project.namespace
def factory(zone_aware=True):

logwriter_sts_path = constants.LOGWRITER_STS_PATH
sts_data = templating.load_yaml(logwriter_sts_path)
sts_data["metadata"]["namespace"] = setup_stretch_cluster_project.namespace
if not zone_aware:
sts_data["spec"]["template"]["spec"].pop("topologySpreadConstraints")

logwriter_sts = helpers.create_resource(**sts_data)
teardown_factory(logwriter_sts)
logwriter_sts_pods = [
pod["metadata"]["name"]
for pod in get_pods_having_label(
label="app=logwriter-rbd",
namespace=setup_stretch_cluster_project.namespace,
)
]
wait_for_pods_to_be_running(
namespace=setup_stretch_cluster_project.namespace,
pod_names=logwriter_sts_pods,
)
]
wait_for_pods_to_be_running(
namespace=setup_stretch_cluster_project.namespace, pod_names=logwriter_sts_pods
)

return logwriter_sts
return logwriter_sts

return factory


@pytest.fixture()
Expand Down Expand Up @@ -9082,3 +9100,38 @@ def teardown():

# Add the teardown function to the request's finalizer
request.addfinalizer(teardown)


@pytest.fixture(scope="session")
def setup_network_fence_class(request):
"""
Setup NetworkFenceClass CRD for ODF if not present

"""
try:
network_fence_class = OCP(
kind=constants.NETWORK_FENCE_CLASS,
namespace=ocsci_config.ENV_DATA["cluster_namespace"],
resource_name=constants.ODF_NETWORK_FENCE_CLASS,
)
created_by_fixture = False
if not network_fence_class.get(dont_raise=True):
create_network_fence_class()
created_by_fixture = True
else:
log.info(
f"NetworkFenceClass {network_fence_class.resource_name} already exists!"
)
finally:

def finalizer():
"""
Delete the NFC CRD if created by fixture

"""
if created_by_fixture:
network_fence_class.delete(
resource_name=constants.ODF_NETWORK_FENCE_CLASS
)

request.addfinalizer(finalizer)
Loading
Loading