Skip to content

Commit

Permalink
run workloads with device replacement
Browse files Browse the repository at this point in the history
Signed-off-by: Mahesh Shetty <[email protected]>
  • Loading branch information
Mahesh Shetty authored and Mahesh Shetty committed Aug 29, 2024
1 parent 7e65182 commit 84a914b
Showing 1 changed file with 85 additions and 51 deletions.
Original file line number Diff line number Diff line change
@@ -1,64 +1,98 @@
# import random
import logging
from datetime import datetime, timezone

from ocs_ci.ocs import constants
from ocs_ci.ocs.resources.pod import wait_for_pods_to_be_in_statuses

# from ocs_ci.ocs import constants
# from ocs_ci.ocs.resources.pv import get_pv_in_status
# from ocs_ci.ocs.resources.deployment import get_osd_deployments
# from ocs_ci.helpers.helpers import modify_deployment_replica_count
# from ocs_ci.ocs.resources.pod import (
# wait_for_pods_by_label_count,
# delete_all_osd_removal_jobs,
# run_osd_removal_job,
# verify_osd_removal_job_completed_successfully,
# )
from ocs_ci.ocs.osd_operations import osd_device_replacement
from ocs_ci.ocs.resources.stretchcluster import StretchCluster

logger = logging.getLogger(__name__)


class TestDeviceReplacementInStretchCluster:
def test_device_replacement(self, nodes):
def test_device_replacement(
self, nodes, setup_logwriter_cephfs_workload_factory, logreader_workload_factory
):
"""
Test device replacement in stretch cluster
Test device replacement in stretch cluster while logwriter workload
for both CephFs and RBD is running
Steps:
1) Run logwriter/reader workload for both CephFs and RBD volumes
2) Perform device replacement procedure
3) Verify no data loss
4) Verify no data corruption
"""

sc_obj = StretchCluster()

# setup logwriter workloads in the background
(
sc_obj.cephfs_logwriter_dep,
sc_obj.cephfs_logreader_job,
) = setup_logwriter_cephfs_workload_factory(read_duration=0)

sc_obj.get_logwriter_reader_pods(label=constants.LOGWRITER_CEPHFS_LABEL)
sc_obj.get_logwriter_reader_pods(label=constants.LOGREADER_CEPHFS_LABEL)
sc_obj.get_logwriter_reader_pods(
label=constants.LOGWRITER_RBD_LABEL, exp_num_replicas=2
)
logger.info("All the workloads pods are successfully up and running")

start_time = datetime.now(timezone.utc)

sc_obj.get_logfile_map(label=constants.LOGWRITER_CEPHFS_LABEL)
sc_obj.get_logfile_map(label=constants.LOGWRITER_RBD_LABEL)

# run device replacement procedure
logger.info("Running device replacement procedure now")
osd_device_replacement(nodes)

# # Choose the OSD that needs to be replaced
# all_osd_deployments = get_osd_deployments()
# osd_deployment = random.choice(all_osd_deployments)
# osd_id = osd_deployment.get()["metadata"]["labels"]["ceph-osd-id"]
# logger.info(f"osd-{osd_id} needs to be removed")
#
# # Scale down the osd deployment
# logger.info(f"scaling down {osd_deployment.name} now...")
# modify_deployment_replica_count(
# osd_deployment.name,
# replica_count=0,
# )
#
# # Verify that osd pods are terminated
# wait_for_pods_by_label_count(
# f"ceph-osd-id={osd_id}",
# exptected_count=0,
# )
# logger.info("osd pods are terminated successfully")
#
# # Remove the osd from the cluster
# # to add new osd
# delete_all_osd_removal_jobs()
# run_osd_removal_job(osd_ids=[osd_id])
#
# # Verify that OSDs are removed successfully
# verify_osd_removal_job_completed_successfully(osd_id)
#
# # Find the persistent volume (PV) that need to be deleted and delete it
# pvs = get_pv_in_status(
# storage_class=constants.LOCALSTORAGE_SC, status=constants.STATUS_RELEASED
# )
# for pv in pvs:
# pv.delete()

# Track the provisioning of PVs for the devices that match the deviceInclusionSpec

# Delete the ocs-osd-removal job(s).
# check Io for any failures
end_time = datetime.now(timezone.utc)
sc_obj.post_failure_checks(start_time, end_time, wait_for_read_completion=False)
logger.info("Successfully verified with post failure checks for the workloads")

sc_obj.cephfs_logreader_job.delete()
logger.info(sc_obj.cephfs_logreader_pods)
for pod in sc_obj.cephfs_logreader_pods:
pod.wait_for_pod_delete(timeout=120)
logger.info("All old CephFS logreader pods are deleted")

# check for any data loss
assert sc_obj.check_for_data_loss(
constants.LOGWRITER_CEPHFS_LABEL
), "[CephFS] Data is lost"
logger.info("[CephFS] No data loss is seen")
assert sc_obj.check_for_data_loss(
constants.LOGWRITER_RBD_LABEL
), "[RBD] Data is lost"
logger.info("[RBD] No data loss is seen")

# check for data corruption
logreader_workload_factory(
pvc=sc_obj.get_workload_pvc_obj(constants.LOGWRITER_CEPHFS_LABEL)[0],
logreader_path=constants.LOGWRITER_CEPHFS_READER,
duration=5,
)
sc_obj.get_logwriter_reader_pods(constants.LOGREADER_CEPHFS_LABEL)

wait_for_pods_to_be_in_statuses(
expected_statuses=constants.STATUS_COMPLETED,
pod_names=[pod.name for pod in sc_obj.cephfs_logreader_pods],
timeout=900,
namespace=constants.STRETCH_CLUSTER_NAMESPACE,
)
logger.info("[CephFS] Logreader job pods have reached 'Completed' state!")

assert sc_obj.check_for_data_corruption(
label=constants.LOGREADER_CEPHFS_LABEL
), "Data is corrupted for cephFS workloads"
logger.info("No data corruption is seen in CephFS workloads")

assert sc_obj.check_for_data_corruption(
label=constants.LOGWRITER_RBD_LABEL
), "Data is corrupted for RBD workloads"
logger.info("No data corruption is seen in RBD workloads")

0 comments on commit 84a914b

Please sign in to comment.