Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
10 changes: 8 additions & 2 deletions docker/mongodb-kubernetes-tests/kubetester/mongodb_common.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,20 +11,26 @@

class MongoDBCommon:
@TRACER.start_as_current_span("wait_for")
def wait_for(self, fn, timeout=None, should_raise=True):
def wait_for(self, fn, timeout=None, should_raise=True, persist_for=1):
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

is persist_for essentially equivalent to "required consecutive successes to pass"?

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

if yes, then pls leave a brief comment or rename it, it took me a bit thinking to get the meaning of this param

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

and one of the reason was that when looking for "persist for" suggest that the "problem persists" or the goal is not achieve yet and the situation persists. When something succeeds it's not often described as a situation that "persists". But I might be nitpicking here.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

is persist_for essentially equivalent to "required consecutive successes to pass"?

Yes

Not the best name, I know, I couldn't find a better one. I can add a comment. But do you have a better option? no_of_successful_passes?

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

While I'm not a fan of Gomega and Ginkgo I think we should separate "wait for something" from "consistently passes meets something" like they do:

if timeout is None:
timeout = 600
initial_timeout = timeout

wait = 3
retries = 0
while timeout > 0:
try:
self.reload()
except Exception as e:
print(f"Caught error: {e} while waiting for {fn.__name__}")
pass
if fn(self):
return True
retries += 1
if retries == persist_for:
return True
else:
retries = 0

timeout -= wait
time.sleep(wait)

Expand Down
17 changes: 6 additions & 11 deletions docker/mongodb-kubernetes-tests/kubetester/opsmanager.py
Original file line number Diff line number Diff line change
Expand Up @@ -1017,13 +1017,7 @@ def is_om_multi_cluster(self):
return self["spec"].get("topology", "") == "MultiCluster"

class StatusCommon:
def assert_reaches_phase(
self,
phase: Phase,
msg_regexp=None,
timeout=None,
ignore_errors=False,
):
def assert_reaches_phase(self, phase: Phase, msg_regexp=None, timeout=None, ignore_errors=False, persist_for=1):
intermediate_events = (
# This can be an intermediate error, right before we check for this secret we create it.
# The cluster might just be slow
Expand All @@ -1046,6 +1040,7 @@ def assert_reaches_phase(
),
timeout,
should_raise=True,
persist_for=persist_for,
)
end_time = time.time()
span = trace.get_current_span()
Expand Down Expand Up @@ -1110,8 +1105,8 @@ def __init__(self, ops_manager: MongoDBOpsManager):
def assert_abandons_phase(self, phase: Phase, timeout=400):
super().assert_abandons_phase(phase, timeout)

def assert_reaches_phase(self, phase: Phase, msg_regexp=None, timeout=1000, ignore_errors=False):
super().assert_reaches_phase(phase, msg_regexp, timeout, ignore_errors)
def assert_reaches_phase(self, phase: Phase, msg_regexp=None, timeout=1000, ignore_errors=False, persist_for=1):
super().assert_reaches_phase(phase, msg_regexp, timeout, ignore_errors, persist_for=persist_for)

def get_phase(self) -> Optional[Phase]:
try:
Expand Down Expand Up @@ -1156,8 +1151,8 @@ def __init__(self, ops_manager: MongoDBOpsManager):
def assert_abandons_phase(self, phase: Phase, timeout=400):
super().assert_abandons_phase(phase, timeout)

def assert_reaches_phase(self, phase: Phase, msg_regexp=None, timeout=1200, ignore_errors=False):
super().assert_reaches_phase(phase, msg_regexp, timeout, ignore_errors)
def assert_reaches_phase(self, phase: Phase, msg_regexp=None, timeout=1200, ignore_errors=False, persist_for=1):
super().assert_reaches_phase(phase, msg_regexp, timeout, ignore_errors, persist_for=persist_for)

def get_phase(self) -> Optional[Phase]:
try:
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -256,7 +256,7 @@ def test_scale_appdb(self, ops_manager: MongoDBOpsManager):
# Reordering the clusters triggers a change in the state
ops_manager["spec"]["applicationDatabase"]["clusterSpecList"] = scale_on_upgrade.cluster_spec
ops_manager.update()
ops_manager.appdb_status().assert_reaches_phase(Phase.Running, timeout=500)
ops_manager.appdb_status().assert_reaches_phase(Phase.Running, timeout=600)
ops_manager.om_status().assert_reaches_phase(Phase.Running, timeout=250)

def test_migrated_state_correctness(
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -104,7 +104,7 @@ def test_running(namespace: str):
try:
logger.debug(f"Waiting for {sc.name} to reach Running phase")
# Once the first resource reached Running, it shouldn't take more than ~300s for the others to do so
sc.assert_reaches_phase(Phase.Running, timeout=900 if first_iter else 300)
sc.assert_reaches_phase(Phase.Running, timeout=1200 if first_iter else 300)
succeeded_resources.append(sc.name)
first_iter = False
logger.info(f"{sc.name} reached Running phase")
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -161,8 +161,8 @@ def test_upgrade_operator(
@mark.e2e_appdb_tls_operator_upgrade_v1_32_to_mck
def test_om_tls_ok(ops_manager_tls: MongoDBOpsManager):
ops_manager_tls.load()
ops_manager_tls.appdb_status().assert_reaches_phase(Phase.Running, timeout=900)
ops_manager_tls.om_status().assert_reaches_phase(Phase.Running, timeout=900)
ops_manager_tls.appdb_status().assert_reaches_phase(Phase.Running, timeout=900, persist_for=3)
ops_manager_tls.om_status().assert_reaches_phase(Phase.Running, timeout=900, persist_for=3)
ops_manager_tls.get_om_tester().assert_healthiness()


Expand Down
Loading