From 74cc5f131ff55b86173471cf7f970bd9604ecdf2 Mon Sep 17 00:00:00 2001 From: Dragomir Penev Date: Wed, 26 Mar 2025 14:06:54 +0200 Subject: [PATCH 01/10] Dual branch config --- .github/renovate.json5 | 26 +------------------------- .github/workflows/check_pr.yaml | 2 +- 2 files changed, 2 insertions(+), 26 deletions(-) diff --git a/.github/renovate.json5 b/.github/renovate.json5 index 34085c9225..cd60ef68a5 100644 --- a/.github/renovate.json5 +++ b/.github/renovate.json5 @@ -6,6 +6,7 @@ reviewers: [ 'team:data-platform-postgresql', ], + "baseBranches": ["main", "/^*\\/edge$/"], packageRules: [ { matchPackageNames: [ @@ -13,32 +14,7 @@ ], allowedVersions: '<2.0.0', }, - { - matchManagers: [ - 'custom.regex', - ], - matchDepNames: [ - 'juju', - ], - matchDatasources: [ - 'pypi', - ], - allowedVersions: '<3', - groupName: 'Juju agents', - }, ], customManagers: [ - { - customType: 'regex', - fileMatch: [ - '^\\.github/workflows/[^/]+\\.ya?ml$', - ], - matchStrings: [ - '(libjuju: )==(?.*?) +# renovate: latest libjuju 2', - ], - depNameTemplate: 'juju', - datasourceTemplate: 'pypi', - versioningTemplate: 'loose', - }, ], } diff --git a/.github/workflows/check_pr.yaml b/.github/workflows/check_pr.yaml index beaa1541a3..6eb3823585 100644 --- a/.github/workflows/check_pr.yaml +++ b/.github/workflows/check_pr.yaml @@ -15,4 +15,4 @@ on: jobs: check-pr: name: Check pull request - uses: canonical/data-platform-workflows/.github/workflows/check_charm_pr.yaml@v31.0.0 + uses: canonical/data-platform-workflows/.github/workflows/check_charm_pr.yaml@v30.2.0 From 5d56fb0e8285fbf781b16eee787072c6351b05a7 Mon Sep 17 00:00:00 2001 From: Dragomir Penev Date: Wed, 26 Mar 2025 14:22:29 +0200 Subject: [PATCH 02/10] Try alternative endpoints when checking for primary --- src/charm.py | 6 +++++- tests/unit/test_charm.py | 2 +- 2 files changed, 6 insertions(+), 2 deletions(-) diff --git a/src/charm.py b/src/charm.py index 30ca8117a9..d4159533c6 100755 --- a/src/charm.py +++ b/src/charm.py @@ -387,7 +387,11 @@ def primary_endpoint(self) -> str | None: try: for attempt in Retrying(stop=stop_after_delay(5), wait=wait_fixed(3)): with attempt: - primary = self._patroni.get_primary() + primary = self._patroni.get_primary( + alternative_endpoints=[ + self._get_unit_ip(unit) for unit in self._peers.units + ] + ) if primary is None and (standby_leader := self._patroni.get_standby_leader()): primary = standby_leader primary_endpoint = self._patroni.get_member_ip(primary) diff --git a/tests/unit/test_charm.py b/tests/unit/test_charm.py index 7de1a502cf..3820a258e0 100644 --- a/tests/unit/test_charm.py +++ b/tests/unit/test_charm.py @@ -195,7 +195,7 @@ def test_primary_endpoint(harness): _wait_fixed.assert_called_once_with(3) _patroni.return_value.get_member_ip.assert_called_once_with(sentinel.primary) - _patroni.return_value.get_primary.assert_called_once_with() + _patroni.return_value.get_primary.assert_called_once_with(alternative_endpoints=[]) def test_primary_endpoint_no_peers(harness): From bd7da9b57e7659e0b846bf89e04a344ebdacdf7d Mon Sep 17 00:00:00 2001 From: Dragomir Penev Date: Wed, 26 Mar 2025 16:29:00 +0200 Subject: [PATCH 03/10] Only check alternative endpoints if the cluster was initialised --- src/charm.py | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/src/charm.py b/src/charm.py index d4159533c6..ab9f81b81b 100755 --- a/src/charm.py +++ b/src/charm.py @@ -385,12 +385,15 @@ def primary_endpoint(self) -> str | None: logger.debug("primary endpoint early exit: Peer relation not joined yet.") return None try: + alternative_endpoints = ( + [] + if not self.is_cluster_initialised + else [self._get_unit_ip(unit) for unit in self._peers.units] + ) for attempt in Retrying(stop=stop_after_delay(5), wait=wait_fixed(3)): with attempt: primary = self._patroni.get_primary( - alternative_endpoints=[ - self._get_unit_ip(unit) for unit in self._peers.units - ] + alternative_endpoints=alternative_endpoints ) if primary is None and (standby_leader := self._patroni.get_standby_leader()): primary = standby_leader From d996c9c7744db7d7f0ea2367eb93733e8bfcd7df Mon Sep 17 00:00:00 2001 From: Dragomir Penev Date: Wed, 26 Mar 2025 17:16:51 +0200 Subject: [PATCH 04/10] Remove retry --- src/charm.py | 56 ++++++++++++++-------------------------- tests/unit/test_charm.py | 7 +---- 2 files changed, 21 insertions(+), 42 deletions(-) diff --git a/src/charm.py b/src/charm.py index ab9f81b81b..6023b04483 100755 --- a/src/charm.py +++ b/src/charm.py @@ -384,42 +384,26 @@ def primary_endpoint(self) -> str | None: if not self._peers: logger.debug("primary endpoint early exit: Peer relation not joined yet.") return None - try: - alternative_endpoints = ( - [] - if not self.is_cluster_initialised - else [self._get_unit_ip(unit) for unit in self._peers.units] - ) - for attempt in Retrying(stop=stop_after_delay(5), wait=wait_fixed(3)): - with attempt: - primary = self._patroni.get_primary( - alternative_endpoints=alternative_endpoints - ) - if primary is None and (standby_leader := self._patroni.get_standby_leader()): - primary = standby_leader - primary_endpoint = self._patroni.get_member_ip(primary) - # Force a retry if there is no primary or the member that was - # returned is not in the list of the current cluster members - # (like when the cluster was not updated yet after a failed switchover). - if not primary_endpoint or primary_endpoint not in self._units_ips: - # TODO figure out why peer data is not available - if ( - primary_endpoint - and len(self._units_ips) == 1 - and len(self._peers.units) > 1 - ): - logger.warning( - "Possibly incoplete peer data: Will not map primary IP to unit IP" - ) - return primary_endpoint - logger.debug( - "primary endpoint early exit: Primary IP not in cached peer list." - ) - primary_endpoint = None - except RetryError: - return None - else: - return primary_endpoint + alternative_endpoints = ( + None + if not self.is_cluster_initialised + else [self._get_unit_ip(unit) for unit in self._peers.units] + ) + primary = self._patroni.get_primary(alternative_endpoints=alternative_endpoints) + if primary is None and (standby_leader := self._patroni.get_standby_leader()): + primary = standby_leader + primary_endpoint = self._patroni.get_member_ip(primary) + # Force a retry if there is no primary or the member that was + # returned is not in the list of the current cluster members + # (like when the cluster was not updated yet after a failed switchover). + if not primary_endpoint or primary_endpoint not in self._units_ips: + # TODO figure out why peer data is not available + if primary_endpoint and len(self._units_ips) == 1 and len(self._peers.units) > 1: + logger.warning("Possibly incoplete peer data: Will not map primary IP to unit IP") + return primary_endpoint + logger.debug("primary endpoint early exit: Primary IP not in cached peer list.") + primary_endpoint = None + return primary_endpoint def get_hostname_by_unit(self, _) -> str: """Create a DNS name for a PostgreSQL unit. diff --git a/tests/unit/test_charm.py b/tests/unit/test_charm.py index 3820a258e0..3bb34d54a9 100644 --- a/tests/unit/test_charm.py +++ b/tests/unit/test_charm.py @@ -177,8 +177,6 @@ def test_patroni_scrape_config_tls(harness): def test_primary_endpoint(harness): with ( - patch("charm.stop_after_delay", new_callable=PropertyMock) as _stop_after_delay, - patch("charm.wait_fixed", new_callable=PropertyMock) as _wait_fixed, patch( "charm.PostgresqlOperatorCharm._units_ips", new_callable=PropertyMock, @@ -191,11 +189,8 @@ def test_primary_endpoint(harness): assert harness.charm.primary_endpoint == "1.1.1.1" # Check needed to ensure a fast charm deployment. - _stop_after_delay.assert_called_once_with(5) - _wait_fixed.assert_called_once_with(3) - _patroni.return_value.get_member_ip.assert_called_once_with(sentinel.primary) - _patroni.return_value.get_primary.assert_called_once_with(alternative_endpoints=[]) + _patroni.return_value.get_primary.assert_called_once_with(alternative_endpoints=None) def test_primary_endpoint_no_peers(harness): From 143bad661f49966984287cc95ae60bb8505ea3fc Mon Sep 17 00:00:00 2001 From: Dragomir Penev Date: Wed, 26 Mar 2025 18:00:54 +0200 Subject: [PATCH 05/10] Add back retry exception handling --- src/charm.py | 43 ++++++++++++++++++++++++------------------- 1 file changed, 24 insertions(+), 19 deletions(-) diff --git a/src/charm.py b/src/charm.py index 6023b04483..a692547a0a 100755 --- a/src/charm.py +++ b/src/charm.py @@ -384,25 +384,30 @@ def primary_endpoint(self) -> str | None: if not self._peers: logger.debug("primary endpoint early exit: Peer relation not joined yet.") return None - alternative_endpoints = ( - None - if not self.is_cluster_initialised - else [self._get_unit_ip(unit) for unit in self._peers.units] - ) - primary = self._patroni.get_primary(alternative_endpoints=alternative_endpoints) - if primary is None and (standby_leader := self._patroni.get_standby_leader()): - primary = standby_leader - primary_endpoint = self._patroni.get_member_ip(primary) - # Force a retry if there is no primary or the member that was - # returned is not in the list of the current cluster members - # (like when the cluster was not updated yet after a failed switchover). - if not primary_endpoint or primary_endpoint not in self._units_ips: - # TODO figure out why peer data is not available - if primary_endpoint and len(self._units_ips) == 1 and len(self._peers.units) > 1: - logger.warning("Possibly incoplete peer data: Will not map primary IP to unit IP") - return primary_endpoint - logger.debug("primary endpoint early exit: Primary IP not in cached peer list.") - primary_endpoint = None + try: + alternative_endpoints = ( + None + if not self.is_cluster_initialised + else [self._get_unit_ip(unit) for unit in self._peers.units] + ) + primary = self._patroni.get_primary(alternative_endpoints=alternative_endpoints) + if primary is None and (standby_leader := self._patroni.get_standby_leader()): + primary = standby_leader + primary_endpoint = self._patroni.get_member_ip(primary) + # Force a retry if there is no primary or the member that was + # returned is not in the list of the current cluster members + # (like when the cluster was not updated yet after a failed switchover). + if not primary_endpoint or primary_endpoint not in self._units_ips: + # TODO figure out why peer data is not available + if primary_endpoint and len(self._units_ips) == 1 and len(self._peers.units) > 1: + logger.warning( + "Possibly incoplete peer data: Will not map primary IP to unit IP" + ) + return primary_endpoint + logger.debug("primary endpoint early exit: Primary IP not in cached peer list.") + primary_endpoint = None + except RetryError: + return None return primary_endpoint def get_hostname_by_unit(self, _) -> str: From dbebea89a7a4a726eb7938b052c3dc7d3d340152 Mon Sep 17 00:00:00 2001 From: Dragomir Penev Date: Wed, 26 Mar 2025 18:52:24 +0200 Subject: [PATCH 06/10] Add back else --- src/charm.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/charm.py b/src/charm.py index a692547a0a..bb19032a39 100755 --- a/src/charm.py +++ b/src/charm.py @@ -408,7 +408,8 @@ def primary_endpoint(self) -> str | None: primary_endpoint = None except RetryError: return None - return primary_endpoint + else: + return primary_endpoint def get_hostname_by_unit(self, _) -> str: """Create a DNS name for a PostgreSQL unit. From 81fcc63811a824842f3f8bc25e7586e6e23286bf Mon Sep 17 00:00:00 2001 From: Dragomir Penev Date: Wed, 26 Mar 2025 19:33:39 +0200 Subject: [PATCH 07/10] Revert retry --- .github/workflows/check_pr.yaml | 3 ++- src/charm.py | 40 ++++++++++++++++++++------------- tests/unit/test_charm.py | 5 +++++ 3 files changed, 32 insertions(+), 16 deletions(-) diff --git a/.github/workflows/check_pr.yaml b/.github/workflows/check_pr.yaml index 6eb3823585..ce3d160399 100644 --- a/.github/workflows/check_pr.yaml +++ b/.github/workflows/check_pr.yaml @@ -11,8 +11,9 @@ on: - edited branches: - main + - '*/edge' jobs: check-pr: name: Check pull request - uses: canonical/data-platform-workflows/.github/workflows/check_charm_pr.yaml@v30.2.0 + uses: canonical/data-platform-workflows/.github/workflows/check_charm_pr.yaml@v31.0.0 diff --git a/src/charm.py b/src/charm.py index bb19032a39..92b3e35b65 100755 --- a/src/charm.py +++ b/src/charm.py @@ -390,22 +390,32 @@ def primary_endpoint(self) -> str | None: if not self.is_cluster_initialised else [self._get_unit_ip(unit) for unit in self._peers.units] ) - primary = self._patroni.get_primary(alternative_endpoints=alternative_endpoints) - if primary is None and (standby_leader := self._patroni.get_standby_leader()): - primary = standby_leader - primary_endpoint = self._patroni.get_member_ip(primary) - # Force a retry if there is no primary or the member that was - # returned is not in the list of the current cluster members - # (like when the cluster was not updated yet after a failed switchover). - if not primary_endpoint or primary_endpoint not in self._units_ips: - # TODO figure out why peer data is not available - if primary_endpoint and len(self._units_ips) == 1 and len(self._peers.units) > 1: - logger.warning( - "Possibly incoplete peer data: Will not map primary IP to unit IP" + for attempt in Retrying(stop=stop_after_delay(5), wait=wait_fixed(3)): + with attempt: + primary = self._patroni.get_primary( + alternative_endpoints=alternative_endpoints ) - return primary_endpoint - logger.debug("primary endpoint early exit: Primary IP not in cached peer list.") - primary_endpoint = None + if primary is None and (standby_leader := self._patroni.get_standby_leader()): + primary = standby_leader + primary_endpoint = self._patroni.get_member_ip(primary) + # Force a retry if there is no primary or the member that was + # returned is not in the list of the current cluster members + # (like when the cluster was not updated yet after a failed switchover). + if not primary_endpoint or primary_endpoint not in self._units_ips: + # TODO figure out why peer data is not available + if ( + primary_endpoint + and len(self._units_ips) == 1 + and len(self._peers.units) > 1 + ): + logger.warning( + "Possibly incoplete peer data: Will not map primary IP to unit IP" + ) + return primary_endpoint + logger.debug( + "primary endpoint early exit: Primary IP not in cached peer list." + ) + primary_endpoint = None except RetryError: return None else: diff --git a/tests/unit/test_charm.py b/tests/unit/test_charm.py index 3bb34d54a9..a97ec3eb96 100644 --- a/tests/unit/test_charm.py +++ b/tests/unit/test_charm.py @@ -177,6 +177,8 @@ def test_patroni_scrape_config_tls(harness): def test_primary_endpoint(harness): with ( + patch("charm.stop_after_delay", new_callable=PropertyMock) as _stop_after_delay, + patch("charm.wait_fixed", new_callable=PropertyMock) as _wait_fixed, patch( "charm.PostgresqlOperatorCharm._units_ips", new_callable=PropertyMock, @@ -189,6 +191,9 @@ def test_primary_endpoint(harness): assert harness.charm.primary_endpoint == "1.1.1.1" # Check needed to ensure a fast charm deployment. + _stop_after_delay.assert_called_once_with(5) + _wait_fixed.assert_called_once_with(3) + _patroni.return_value.get_member_ip.assert_called_once_with(sentinel.primary) _patroni.return_value.get_primary.assert_called_once_with(alternative_endpoints=None) From d481f6d77ea72aef90ebc0bf97daa0add1980abd Mon Sep 17 00:00:00 2001 From: Dragomir Penev Date: Wed, 26 Mar 2025 22:00:35 +0200 Subject: [PATCH 08/10] Use _peer_members_ips --- src/charm.py | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/src/charm.py b/src/charm.py index 92b3e35b65..67503b6879 100755 --- a/src/charm.py +++ b/src/charm.py @@ -386,9 +386,7 @@ def primary_endpoint(self) -> str | None: return None try: alternative_endpoints = ( - None - if not self.is_cluster_initialised - else [self._get_unit_ip(unit) for unit in self._peers.units] + None if not self.is_cluster_initialised else list(self._peer_members_ips) ) for attempt in Retrying(stop=stop_after_delay(5), wait=wait_fixed(3)): with attempt: From e51d76f6adf9d8e2605f847b71c7507e3ec51312 Mon Sep 17 00:00:00 2001 From: Dragomir Penev Date: Wed, 26 Mar 2025 22:43:01 +0200 Subject: [PATCH 09/10] Filter out none address --- src/charm.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/charm.py b/src/charm.py index 67503b6879..a5e5342490 100755 --- a/src/charm.py +++ b/src/charm.py @@ -957,6 +957,8 @@ def _units_ips(self) -> set[str]: # Get all members IPs and remove the current unit IP from the list. addresses = {self._get_unit_ip(unit) for unit in self._peers.units} addresses.add(self._unit_ip) + if None in addresses: + addresses.remove(None) return addresses @property From 0797b6e7801344172480a16e4d0b411ef361834a Mon Sep 17 00:00:00 2001 From: Dragomir Penev Date: Thu, 27 Mar 2025 03:00:40 +0200 Subject: [PATCH 10/10] Switch to generator --- src/charm.py | 43 ++++++++-------------- src/cluster.py | 74 +++++++++++++++++++------------------- tests/unit/test_charm.py | 8 +---- tests/unit/test_cluster.py | 73 ++++++++++++++++++++++--------------- 4 files changed, 97 insertions(+), 101 deletions(-) diff --git a/src/charm.py b/src/charm.py index a5e5342490..ad4f6c2d1a 100755 --- a/src/charm.py +++ b/src/charm.py @@ -385,35 +385,22 @@ def primary_endpoint(self) -> str | None: logger.debug("primary endpoint early exit: Peer relation not joined yet.") return None try: - alternative_endpoints = ( - None if not self.is_cluster_initialised else list(self._peer_members_ips) - ) - for attempt in Retrying(stop=stop_after_delay(5), wait=wait_fixed(3)): - with attempt: - primary = self._patroni.get_primary( - alternative_endpoints=alternative_endpoints + primary = self._patroni.get_primary() + if primary is None and (standby_leader := self._patroni.get_standby_leader()): + primary = standby_leader + primary_endpoint = self._patroni.get_member_ip(primary) + # Force a retry if there is no primary or the member that was + # returned is not in the list of the current cluster members + # (like when the cluster was not updated yet after a failed switchover). + if not primary_endpoint or primary_endpoint not in self._units_ips: + # TODO figure out why peer data is not available + if primary_endpoint and len(self._units_ips) == 1 and len(self._peers.units) > 1: + logger.warning( + "Possibly incoplete peer data: Will not map primary IP to unit IP" ) - if primary is None and (standby_leader := self._patroni.get_standby_leader()): - primary = standby_leader - primary_endpoint = self._patroni.get_member_ip(primary) - # Force a retry if there is no primary or the member that was - # returned is not in the list of the current cluster members - # (like when the cluster was not updated yet after a failed switchover). - if not primary_endpoint or primary_endpoint not in self._units_ips: - # TODO figure out why peer data is not available - if ( - primary_endpoint - and len(self._units_ips) == 1 - and len(self._peers.units) > 1 - ): - logger.warning( - "Possibly incoplete peer data: Will not map primary IP to unit IP" - ) - return primary_endpoint - logger.debug( - "primary endpoint early exit: Primary IP not in cached peer list." - ) - primary_endpoint = None + return primary_endpoint + logger.debug("primary endpoint early exit: Primary IP not in cached peer list.") + primary_endpoint = None except RetryError: return None else: diff --git a/src/cluster.py b/src/cluster.py index b321a4cac4..b5af840169 100644 --- a/src/cluster.py +++ b/src/cluster.py @@ -21,7 +21,6 @@ from ops import BlockedStatus from pysyncobj.utility import TcpUtility, UtilityException from tenacity import ( - AttemptManager, RetryError, Retrying, retry, @@ -233,11 +232,12 @@ def get_member_ip(self, member_name: str) -> str: IP address of the cluster member. """ # Request info from cluster endpoint (which returns all members of the cluster). - for attempt in Retrying(stop=stop_after_attempt(2 * len(self.peers_ips) + 1)): + attempts = 2 * len(self.peers_ips) + 2 + urls = self._get_alternative_patroni_url() + for attempt in Retrying(stop=stop_after_attempt(attempts)): with attempt: - url = self._get_alternative_patroni_url(attempt) cluster_status = requests.get( - f"{url}/{PATRONI_CLUSTER_STATUS_ENDPOINT}", + f"{next(urls)}/{PATRONI_CLUSTER_STATUS_ENDPOINT}", verify=self.verify, timeout=API_REQUEST_TIMEOUT, auth=self._patroni_auth, @@ -257,11 +257,12 @@ def get_member_status(self, member_name: str) -> str: couldn't be retrieved yet. """ # Request info from cluster endpoint (which returns all members of the cluster). - for attempt in Retrying(stop=stop_after_attempt(2 * len(self.peers_ips) + 1)): + attempts = 2 * len(self.peers_ips) + 2 + urls = self._get_alternative_patroni_url() + for attempt in Retrying(stop=stop_after_attempt(attempts)): with attempt: - url = self._get_alternative_patroni_url(attempt) cluster_status = requests.get( - f"{url}/{PATRONI_CLUSTER_STATUS_ENDPOINT}", + f"{next(urls)}/{PATRONI_CLUSTER_STATUS_ENDPOINT}", verify=self.verify, timeout=API_REQUEST_TIMEOUT, auth=self._patroni_auth, @@ -284,11 +285,15 @@ def get_primary( primary pod or unit name. """ # Request info from cluster endpoint (which returns all members of the cluster). - for attempt in Retrying(stop=stop_after_attempt(2 * len(self.peers_ips) + 1)): + if alternative_endpoints: + attempts = 2 * len(alternative_endpoints) + 2 + else: + attempts = 2 * len(self.peers_ips) + 2 + urls = self._get_alternative_patroni_url(alternative_endpoints) + for attempt in Retrying(stop=stop_after_attempt(attempts)): with attempt: - url = self._get_alternative_patroni_url(attempt, alternative_endpoints) cluster_status = requests.get( - f"{url}/{PATRONI_CLUSTER_STATUS_ENDPOINT}", + f"{next(urls)}/{PATRONI_CLUSTER_STATUS_ENDPOINT}", verify=self.verify, timeout=API_REQUEST_TIMEOUT, auth=self._patroni_auth, @@ -314,11 +319,12 @@ def get_standby_leader( standby leader pod or unit name. """ # Request info from cluster endpoint (which returns all members of the cluster). - for attempt in Retrying(stop=stop_after_attempt(2 * len(self.peers_ips) + 1)): + attempts = 2 * len(self.peers_ips) + 2 + urls = self._get_alternative_patroni_url() + for attempt in Retrying(stop=stop_after_attempt(attempts)): with attempt: - url = self._get_alternative_patroni_url(attempt) cluster_status = requests.get( - f"{url}/{PATRONI_CLUSTER_STATUS_ENDPOINT}", + f"{next(urls)}/{PATRONI_CLUSTER_STATUS_ENDPOINT}", verify=self.verify, timeout=API_REQUEST_TIMEOUT, auth=self._patroni_auth, @@ -338,11 +344,12 @@ def get_sync_standby_names(self) -> list[str]: """Get the list of sync standby unit names.""" sync_standbys = [] # Request info from cluster endpoint (which returns all members of the cluster). - for attempt in Retrying(stop=stop_after_attempt(2 * len(self.peers_ips) + 1)): + attempts = 2 * len(self.peers_ips) + 2 + urls = self._get_alternative_patroni_url() + for attempt in Retrying(stop=stop_after_attempt(attempts)): with attempt: - url = self._get_alternative_patroni_url(attempt) r = requests.get( - f"{url}/cluster", + f"{next(urls)}/cluster", verify=self.verify, auth=self._patroni_auth, timeout=PATRONI_TIMEOUT, @@ -352,33 +359,26 @@ def get_sync_standby_names(self) -> list[str]: sync_standbys.append("/".join(member["name"].rsplit("-", 1))) return sync_standbys - def _get_alternative_patroni_url( - self, attempt: AttemptManager, alternative_endpoints: list[str] | None = None - ) -> str: + def _get_alternative_patroni_url(self, alternative_endpoints: list[str] | None = None) -> str: """Get an alternative REST API URL from another member each time. When the Patroni process is not running in the current unit it's needed to use a URL from another cluster member REST API to do some operations. """ - if alternative_endpoints is not None: - return self._patroni_url.replace( - self.unit_ip, alternative_endpoints[attempt.retry_state.attempt_number - 1] - ) - attempt_number = attempt.retry_state.attempt_number - if attempt_number > 1: - url = self._patroni_url - # Build the URL using http and later using https for each peer. - if (attempt_number - 1) <= len(self.peers_ips): - url = url.replace("https://", "http://") - unit_number = attempt_number - 2 - else: - url = url.replace("http://", "https://") - unit_number = attempt_number - 2 - len(self.peers_ips) - other_unit_ip = list(self.peers_ips)[unit_number] - url = url.replace(self.unit_ip, other_unit_ip) + if not alternative_endpoints: + alternative_endpoints = list(self.peers_ips) + urls = [self._patroni_url] + urls += [ + self._patroni_url.replace(self.unit_ip, endpoint) for endpoint in alternative_endpoints + ] + if self.tls_enabled: + default_schema = "https" + new_schema = "http" else: - url = self._patroni_url - return url + default_schema = "http" + new_schema = "https" + urls += [url.replace(default_schema, new_schema) for url in urls] + yield from urls def are_all_members_ready(self) -> bool: """Check if all members are correctly running Patroni and PostgreSQL. diff --git a/tests/unit/test_charm.py b/tests/unit/test_charm.py index a97ec3eb96..e500a8d099 100644 --- a/tests/unit/test_charm.py +++ b/tests/unit/test_charm.py @@ -177,8 +177,6 @@ def test_patroni_scrape_config_tls(harness): def test_primary_endpoint(harness): with ( - patch("charm.stop_after_delay", new_callable=PropertyMock) as _stop_after_delay, - patch("charm.wait_fixed", new_callable=PropertyMock) as _wait_fixed, patch( "charm.PostgresqlOperatorCharm._units_ips", new_callable=PropertyMock, @@ -190,12 +188,8 @@ def test_primary_endpoint(harness): _patroni.return_value.get_primary.return_value = sentinel.primary assert harness.charm.primary_endpoint == "1.1.1.1" - # Check needed to ensure a fast charm deployment. - _stop_after_delay.assert_called_once_with(5) - _wait_fixed.assert_called_once_with(3) - _patroni.return_value.get_member_ip.assert_called_once_with(sentinel.primary) - _patroni.return_value.get_primary.assert_called_once_with(alternative_endpoints=None) + _patroni.return_value.get_primary.assert_called_once_with() def test_primary_endpoint_no_peers(harness): diff --git a/tests/unit/test_cluster.py b/tests/unit/test_cluster.py index 7dd1e1ddf7..6bcf643f7f 100644 --- a/tests/unit/test_cluster.py +++ b/tests/unit/test_cluster.py @@ -11,8 +11,6 @@ from ops.testing import Harness from pysyncobj.utility import UtilityException from tenacity import ( - AttemptManager, - RetryCallState, RetryError, Retrying, stop_after_delay, @@ -95,49 +93,59 @@ def patroni(harness, peers_ips): def test_get_alternative_patroni_url(peers_ips, patroni): - # Mock tenacity attempt. - retry = Retrying() - retry_state = RetryCallState(retry, None, None, None) - attempt = AttemptManager(retry_state) - # Test the first URL that is returned (it should have the current unit IP). - url = patroni._get_alternative_patroni_url(attempt) + url = next(patroni._get_alternative_patroni_url()) assert url == f"http://{patroni.unit_ip}:8008" - # Test returning the other servers URLs. - for attempt_number in range(attempt.retry_state.attempt_number + 1, len(peers_ips) + 2): - attempt.retry_state.attempt_number = attempt_number - url = patroni._get_alternative_patroni_url(attempt) - assert url.split("http://")[1].split(":8008")[0] in peers_ips + other_urls = list(patroni._get_alternative_patroni_url()) + for ip in list(peers_ips): + assert f"http://{ip}:8008" in other_urls + assert f"https://{ip}:8008" in other_urls + + # Test alternatives + assert [ + f"http://{patroni.unit_ip}:8008", + "http://a:8008", + "http://b:8008", + "http://c:8008", + f"https://{patroni.unit_ip}:8008", + "https://a:8008", + "https://b:8008", + "https://c:8008", + ] == list(patroni._get_alternative_patroni_url(["a", "b", "c"])) def test_get_member_ip(peers_ips, patroni): with ( patch("requests.get", side_effect=mocked_requests_get) as _get, - patch("charm.Patroni._get_alternative_patroni_url") as _get_alternative_patroni_url, + patch("charm.Patroni._patroni_url", new_callable=PropertyMock) as _patroni_url, ): # Test error on trying to get the member IP. - _get_alternative_patroni_url.side_effect = "http://server2" + _patroni_url.return_value = "http://server2" + patroni.unit_ip = "server2" + patroni.peers_ips = [] with pytest.raises(RetryError): patroni.get_member_ip(patroni.member_name) assert False # Test using an alternative Patroni URL. - _get_alternative_patroni_url.side_effect = [ - "http://server3", - "http://server2", - "http://server1", + _patroni_url.return_value = "http://server3" + patroni.unit_ip = "server3" + patroni.peers_ips = [ + "server2", + "server1", ] ip = patroni.get_member_ip(patroni.member_name) assert ip == "1.1.1.1" # Test using the current Patroni URL. - _get_alternative_patroni_url.side_effect = ["http://server1"] + _patroni_url.return_value = "http://server1" + patroni.unit_ip = "server1" + patroni.peers_ips = [] ip = patroni.get_member_ip(patroni.member_name) assert ip == "1.1.1.1" # Test when not having that specific member in the cluster. - _get_alternative_patroni_url.side_effect = ["http://server1"] ip = patroni.get_member_ip("other-member-name") assert ip is None @@ -184,30 +192,36 @@ def test_get_postgresql_version(peers_ips, patroni): def test_get_primary(peers_ips, patroni): with ( patch("requests.get", side_effect=mocked_requests_get) as _get, - patch("charm.Patroni._get_alternative_patroni_url") as _get_alternative_patroni_url, + patch("charm.Patroni._patroni_url", new_callable=PropertyMock) as _patroni_url, ): # Test error on trying to get the member IP. - _get_alternative_patroni_url.side_effect = "http://server2" + _patroni_url.return_value = "http://server2" + patroni.unit_ip = "server2" + patroni.peers_ips = [] with pytest.raises(RetryError): patroni.get_primary(patroni.member_name) assert False # Test using an alternative Patroni URL. - _get_alternative_patroni_url.side_effect = [ - "http://server3", - "http://server2", - "http://server1", + _patroni_url.return_value = "http://server3" + patroni.unit_ip = "server3" + patroni.peers_ips = [ + "server2", + "server1", ] primary = patroni.get_primary() assert primary == "postgresql-0" # Test using the current Patroni URL. - _get_alternative_patroni_url.side_effect = ["http://server1"] + _patroni_url.return_value = "http://server1" + patroni.unit_ip = "server1" + patroni.peers_ips = [] primary = patroni.get_primary() assert primary == "postgresql-0" # Test requesting the primary in the unit name pattern. - _get_alternative_patroni_url.side_effect = ["http://server1"] + patroni.unit_ip = "server1" + patroni.peers_ips = [] primary = patroni.get_primary(unit_name_pattern=True) assert primary == "postgresql/0" @@ -235,6 +249,7 @@ def test_is_replication_healthy(peers_ips, patroni): with ( patch("requests.get") as _get, patch("charm.Patroni.get_primary"), + patch("charm.Patroni.get_member_ip"), patch("cluster.stop_after_delay", return_value=stop_after_delay(0)), ): # Test when replication is healthy.