-
Notifications
You must be signed in to change notification settings - Fork 170
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
run workloads with device replacement
Signed-off-by: Mahesh Shetty <[email protected]>
- Loading branch information
Mahesh Shetty
authored and
Mahesh Shetty
committed
Aug 29, 2024
1 parent
7e65182
commit 84a914b
Showing
1 changed file
with
85 additions
and
51 deletions.
There are no files selected for viewing
136 changes: 85 additions & 51 deletions
136
tests/functional/disaster-recovery/sc_arbiter/test_device_replacement.py
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,64 +1,98 @@ | ||
# import random | ||
import logging | ||
from datetime import datetime, timezone | ||
|
||
from ocs_ci.ocs import constants | ||
from ocs_ci.ocs.resources.pod import wait_for_pods_to_be_in_statuses | ||
|
||
# from ocs_ci.ocs import constants | ||
# from ocs_ci.ocs.resources.pv import get_pv_in_status | ||
# from ocs_ci.ocs.resources.deployment import get_osd_deployments | ||
# from ocs_ci.helpers.helpers import modify_deployment_replica_count | ||
# from ocs_ci.ocs.resources.pod import ( | ||
# wait_for_pods_by_label_count, | ||
# delete_all_osd_removal_jobs, | ||
# run_osd_removal_job, | ||
# verify_osd_removal_job_completed_successfully, | ||
# ) | ||
from ocs_ci.ocs.osd_operations import osd_device_replacement | ||
from ocs_ci.ocs.resources.stretchcluster import StretchCluster | ||
|
||
logger = logging.getLogger(__name__) | ||
|
||
|
||
class TestDeviceReplacementInStretchCluster: | ||
def test_device_replacement(self, nodes): | ||
def test_device_replacement( | ||
self, nodes, setup_logwriter_cephfs_workload_factory, logreader_workload_factory | ||
): | ||
""" | ||
Test device replacement in stretch cluster | ||
Test device replacement in stretch cluster while logwriter workload | ||
for both CephFs and RBD is running | ||
Steps: | ||
1) Run logwriter/reader workload for both CephFs and RBD volumes | ||
2) Perform device replacement procedure | ||
3) Verify no data loss | ||
4) Verify no data corruption | ||
""" | ||
|
||
sc_obj = StretchCluster() | ||
|
||
# setup logwriter workloads in the background | ||
( | ||
sc_obj.cephfs_logwriter_dep, | ||
sc_obj.cephfs_logreader_job, | ||
) = setup_logwriter_cephfs_workload_factory(read_duration=0) | ||
|
||
sc_obj.get_logwriter_reader_pods(label=constants.LOGWRITER_CEPHFS_LABEL) | ||
sc_obj.get_logwriter_reader_pods(label=constants.LOGREADER_CEPHFS_LABEL) | ||
sc_obj.get_logwriter_reader_pods( | ||
label=constants.LOGWRITER_RBD_LABEL, exp_num_replicas=2 | ||
) | ||
logger.info("All the workloads pods are successfully up and running") | ||
|
||
start_time = datetime.now(timezone.utc) | ||
|
||
sc_obj.get_logfile_map(label=constants.LOGWRITER_CEPHFS_LABEL) | ||
sc_obj.get_logfile_map(label=constants.LOGWRITER_RBD_LABEL) | ||
|
||
# run device replacement procedure | ||
logger.info("Running device replacement procedure now") | ||
osd_device_replacement(nodes) | ||
|
||
# # Choose the OSD that needs to be replaced | ||
# all_osd_deployments = get_osd_deployments() | ||
# osd_deployment = random.choice(all_osd_deployments) | ||
# osd_id = osd_deployment.get()["metadata"]["labels"]["ceph-osd-id"] | ||
# logger.info(f"osd-{osd_id} needs to be removed") | ||
# | ||
# # Scale down the osd deployment | ||
# logger.info(f"scaling down {osd_deployment.name} now...") | ||
# modify_deployment_replica_count( | ||
# osd_deployment.name, | ||
# replica_count=0, | ||
# ) | ||
# | ||
# # Verify that osd pods are terminated | ||
# wait_for_pods_by_label_count( | ||
# f"ceph-osd-id={osd_id}", | ||
# exptected_count=0, | ||
# ) | ||
# logger.info("osd pods are terminated successfully") | ||
# | ||
# # Remove the osd from the cluster | ||
# # to add new osd | ||
# delete_all_osd_removal_jobs() | ||
# run_osd_removal_job(osd_ids=[osd_id]) | ||
# | ||
# # Verify that OSDs are removed successfully | ||
# verify_osd_removal_job_completed_successfully(osd_id) | ||
# | ||
# # Find the persistent volume (PV) that need to be deleted and delete it | ||
# pvs = get_pv_in_status( | ||
# storage_class=constants.LOCALSTORAGE_SC, status=constants.STATUS_RELEASED | ||
# ) | ||
# for pv in pvs: | ||
# pv.delete() | ||
|
||
# Track the provisioning of PVs for the devices that match the deviceInclusionSpec | ||
|
||
# Delete the ocs-osd-removal job(s). | ||
# check Io for any failures | ||
end_time = datetime.now(timezone.utc) | ||
sc_obj.post_failure_checks(start_time, end_time, wait_for_read_completion=False) | ||
logger.info("Successfully verified with post failure checks for the workloads") | ||
|
||
sc_obj.cephfs_logreader_job.delete() | ||
logger.info(sc_obj.cephfs_logreader_pods) | ||
for pod in sc_obj.cephfs_logreader_pods: | ||
pod.wait_for_pod_delete(timeout=120) | ||
logger.info("All old CephFS logreader pods are deleted") | ||
|
||
# check for any data loss | ||
assert sc_obj.check_for_data_loss( | ||
constants.LOGWRITER_CEPHFS_LABEL | ||
), "[CephFS] Data is lost" | ||
logger.info("[CephFS] No data loss is seen") | ||
assert sc_obj.check_for_data_loss( | ||
constants.LOGWRITER_RBD_LABEL | ||
), "[RBD] Data is lost" | ||
logger.info("[RBD] No data loss is seen") | ||
|
||
# check for data corruption | ||
logreader_workload_factory( | ||
pvc=sc_obj.get_workload_pvc_obj(constants.LOGWRITER_CEPHFS_LABEL)[0], | ||
logreader_path=constants.LOGWRITER_CEPHFS_READER, | ||
duration=5, | ||
) | ||
sc_obj.get_logwriter_reader_pods(constants.LOGREADER_CEPHFS_LABEL) | ||
|
||
wait_for_pods_to_be_in_statuses( | ||
expected_statuses=constants.STATUS_COMPLETED, | ||
pod_names=[pod.name for pod in sc_obj.cephfs_logreader_pods], | ||
timeout=900, | ||
namespace=constants.STRETCH_CLUSTER_NAMESPACE, | ||
) | ||
logger.info("[CephFS] Logreader job pods have reached 'Completed' state!") | ||
|
||
assert sc_obj.check_for_data_corruption( | ||
label=constants.LOGREADER_CEPHFS_LABEL | ||
), "Data is corrupted for cephFS workloads" | ||
logger.info("No data corruption is seen in CephFS workloads") | ||
|
||
assert sc_obj.check_for_data_corruption( | ||
label=constants.LOGWRITER_RBD_LABEL | ||
), "Data is corrupted for RBD workloads" | ||
logger.info("No data corruption is seen in RBD workloads") |