Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[MISC] Suppress oversee users in standby clusters #507

Merged
merged 7 commits into from
Jun 25, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 2 additions & 1 deletion src/relations/async_replication.py
Original file line number Diff line number Diff line change
Expand Up @@ -193,6 +193,7 @@ def _configure_standby_cluster(self, event: RelationChangedEvent) -> bool:
filename = f"{POSTGRESQL_DATA_PATH}-{str(datetime.now()).replace(' ', '-').replace(':', '-')}.tar.gz"
subprocess.check_call(f"tar -zcf {filename} {POSTGRESQL_DATA_PATH}".split())
logger.warning("Please review the backup file %s and handle its removal", filename)
self.charm.app_peer_data["suppress-oversee-users"] = "true"
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Set a flag when a cluster is made a standby

return True

def get_all_primary_cluster_endpoints(self) -> List[str]:
Expand Down Expand Up @@ -481,7 +482,7 @@ def is_primary_cluster(self) -> bool:
return self.charm.app == self._get_primary_cluster()

def _on_async_relation_broken(self, _) -> None:
if "departing" in self.charm._peers.data[self.charm.unit]:
if not self.charm._peers or "departing" in self.charm._peers.data[self.charm.unit]:
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Seen it fail in CI.

logger.debug("Early exit on_async_relation_broken: Skipping departing unit.")
return

Expand Down
19 changes: 12 additions & 7 deletions src/relations/postgresql_provider.py
Original file line number Diff line number Diff line change
Expand Up @@ -137,6 +137,8 @@ def oversee_users(self) -> None:
if not self.charm.unit.is_leader():
return

delete_user = "suppress-oversee-users" not in self.charm.app_peer_data

Comment on lines +140 to +141
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

If the cluster is or was a standby, it will stop deleting relation users.

# Retrieve database users.
try:
database_users = {
Expand All @@ -159,13 +161,16 @@ def oversee_users(self) -> None:

# Delete that users that exist in the database but not in the active relations.
for user in database_users - relation_users:
try:
logger.info("Remove relation user: %s", user)
self.charm.set_secret(APP_SCOPE, user, None)
self.charm.set_secret(APP_SCOPE, f"{user}-database", None)
self.charm.postgresql.delete_user(user)
except PostgreSQLDeleteUserError:
logger.error(f"Failed to delete user {user}")
if delete_user:
try:
logger.info("Remove relation user: %s", user)
self.charm.set_secret(APP_SCOPE, user, None)
self.charm.set_secret(APP_SCOPE, f"{user}-database", None)
self.charm.postgresql.delete_user(user)
except PostgreSQLDeleteUserError:
logger.error("Failed to delete user %s", user)
else:
logger.info("Stale relation user detected: %s", user)

def update_endpoints(self, event: DatabaseRequestedEvent = None) -> None:
"""Set the read/write and read-only endpoints."""
Expand Down
48 changes: 47 additions & 1 deletion tests/integration/ha_tests/test_async_replication.py
Original file line number Diff line number Diff line change
Expand Up @@ -41,6 +41,8 @@
IDLE_PERIOD = 5
TIMEOUT = 2000

DATA_INTEGRATOR_APP_NAME = "data-integrator"


@contextlib.asynccontextmanager
async def fast_forward(
Expand Down Expand Up @@ -115,6 +117,14 @@ async def test_deploy_async_replication_setup(
num_units=CLUSTER_SIZE,
config={"profile": "testing"},
)
if not await app_name(ops_test, DATA_INTEGRATOR_APP_NAME):
await ops_test.model.deploy(
DATA_INTEGRATOR_APP_NAME,
num_units=1,
channel="latest/edge",
config={"database-name": "testdb"},
)
await ops_test.model.relate(DATABASE_APP_NAME, DATA_INTEGRATOR_APP_NAME)
if not await app_name(ops_test, model=second_model):
charm = await ops_test.build_charm(".")
await second_model.deploy(
Expand All @@ -128,7 +138,7 @@ async def test_deploy_async_replication_setup(
async with ops_test.fast_forward(), fast_forward(second_model):
await gather(
first_model.wait_for_idle(
apps=[DATABASE_APP_NAME, APPLICATION_NAME],
apps=[DATABASE_APP_NAME, APPLICATION_NAME, DATA_INTEGRATOR_APP_NAME],
status="active",
timeout=TIMEOUT,
),
Expand Down Expand Up @@ -218,6 +228,19 @@ async def test_async_replication(
await check_writes(ops_test, extra_model=second_model)


@pytest.mark.group(1)
@markers.juju3
@pytest.mark.abort_on_fail
async def test_get_data_integrator_credentials(
ops_test: OpsTest,
):
unit = ops_test.model.applications[DATA_INTEGRATOR_APP_NAME].units[0]
action = await unit.run_action(action_name="get-credentials")
result = await action.wait()
global data_integrator_credentials
data_integrator_credentials = result.results


Comment on lines +231 to +243
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Just gets the credentials from data integrator.

@pytest.mark.group(1)
@markers.juju3
@pytest.mark.abort_on_fail
Expand Down Expand Up @@ -273,6 +296,29 @@ async def test_switchover(
await are_writes_increasing(ops_test, extra_model=second_model)


@pytest.mark.group(1)
@markers.juju3
@pytest.mark.abort_on_fail
async def test_data_integrator_creds_keep_on_working(
ops_test: OpsTest,
second_model: Model,
) -> None:
user = data_integrator_credentials["postgresql"]["username"]
password = data_integrator_credentials["postgresql"]["password"]
database = data_integrator_credentials["postgresql"]["database"]

any_unit = second_model.applications[DATABASE_APP_NAME].units[0].name
primary = await get_primary(ops_test, any_unit, second_model)
address = second_model.units.get(primary).public_address

connstr = f"dbname='{database}' user='{user}' host='{address}' port='5432' password='{password}' connect_timeout=1"
try:
with psycopg2.connect(connstr) as connection:
pass
finally:
connection.close()


@pytest.mark.group(1)
@markers.juju3
@pytest.mark.abort_on_fail
Expand Down
7 changes: 5 additions & 2 deletions tests/integration/helpers.py
Original file line number Diff line number Diff line change
Expand Up @@ -621,17 +621,20 @@ async def get_password(ops_test: OpsTest, unit_name: str, username: str = "opera
stop=stop_after_attempt(10),
wait=wait_exponential(multiplier=1, min=2, max=30),
)
async def get_primary(ops_test: OpsTest, unit_name: str) -> str:
async def get_primary(ops_test: OpsTest, unit_name: str, model=None) -> str:
"""Get the primary unit.

Args:
ops_test: ops_test instance.
unit_name: the name of the unit.
model: Model to use.

Returns:
the current primary unit.
"""
action = await ops_test.model.units.get(unit_name).run_action("get-primary")
if not model:
model = ops_test.model
action = await model.units.get(unit_name).run_action("get-primary")
action = await action.wait()
return action.results["primary"]

Expand Down
Loading