From 054c5992a22795f59dd67d7c41777e95c765d62b Mon Sep 17 00:00:00 2001 From: Sam Rose Date: Tue, 15 Apr 2025 14:50:32 -0400 Subject: [PATCH 01/27] feat: multi-ext-versios-pgrcron --- nix/ext/pg_cron.nix | 128 +++++++++++++++++++++++++++++++++++++------- 1 file changed, 109 insertions(+), 19 deletions(-) diff --git a/nix/ext/pg_cron.nix b/nix/ext/pg_cron.nix index 792db7676..7b8c00e72 100644 --- a/nix/ext/pg_cron.nix +++ b/nix/ext/pg_cron.nix @@ -1,31 +1,121 @@ { lib, stdenv, fetchFromGitHub, postgresql }: -stdenv.mkDerivation rec { - pname = "pg_cron"; - version = "1.6.4"; +let + allVersions = { + "1.3.1" = { + rev = "v1.3.1"; + hash = "sha256-rXotNOtQNmA55ErNxGoNSKZ0pP1uxEVlDGITFHuqGG4="; + postPatch = '' + # Add necessary includes + substituteInPlace src/pg_cron.c \ + --replace '#include "postgres.h"' '#include "postgres.h" + #include "commands/async.h" + #include "miscadmin.h"' - buildInputs = [ postgresql ]; + # Update function calls to use PostgreSQL 15 APIs + substituteInPlace src/pg_cron.c \ + --replace 'ProcessCompletedNotifies();' '/* ProcessCompletedNotifies removed */' \ + --replace 'pg_analyze_and_rewrite(parsetree, sql, NULL, 0,NULL);' 'pg_analyze_and_rewrite_fixedparams(parsetree, sql, NULL, 0, NULL);' + ''; + }; + "1.4.2" = { + rev = "v1.4.2"; + hash = "sha256-P0Fd10Q1p+KrExb35G6otHpc6pD61WnMll45H2jkevM="; + }; + "1.6.4" = { + rev = "v1.6.4"; + hash = "sha256-t1DpFkPiSfdoGG2NgNT7g1lkvSooZoRoUrix6cBID40="; + }; + "1.5.2" = { + rev = "v1.5.2"; + hash = "sha256-+quVWbKJy6wXpL/zwTk5FF7sYwHA7I97WhWmPO/HSZ4="; + }; + }; + + mkPgCron = pgCronVersion: { rev, hash, postPatch ? "" }: stdenv.mkDerivation { + pname = "pg_cron"; + version = "${pgCronVersion}-pg${lib.versions.major postgresql.version}"; + + buildInputs = [ postgresql ]; + inherit postPatch; + + src = fetchFromGitHub { + owner = "citusdata"; + repo = "pg_cron"; + inherit rev hash; + }; + + buildPhase = '' + make PG_CONFIG=${postgresql}/bin/pg_config + + # Create version-specific SQL file + cp pg_cron.sql pg_cron--${pgCronVersion}.sql + + # Create versioned control file with modified module path + sed -e "/^default_version =/d" \ + -e "s|^module_pathname = .*|module_pathname = '\$libdir/pg_cron'|" \ + pg_cron.control > pg_cron--${pgCronVersion}.control + ''; - src = fetchFromGitHub { - owner = "citusdata"; - repo = pname; - rev = "v${version}"; - hash = "sha256-t1DpFkPiSfdoGG2NgNT7g1lkvSooZoRoUrix6cBID40="; + installPhase = '' + mkdir -p $out/{lib,share/postgresql/extension} + + # Install versioned library + install -Dm755 pg_cron${postgresql.dlSuffix} $out/lib/pg_cron-${pgCronVersion}${postgresql.dlSuffix} + + # Install version-specific files + install -Dm644 pg_cron--${pgCronVersion}.sql $out/share/postgresql/extension/ + install -Dm644 pg_cron--${pgCronVersion}.control $out/share/postgresql/extension/ + + # Install upgrade scripts + find . -name 'pg_cron--*--*.sql' -exec install -Dm644 {} $out/share/postgresql/extension/ \; + ''; }; + getVersions = pg: + if lib.versionAtLeast pg.version "17" + then { "1.6.4" = allVersions."1.6.4"; } + else allVersions; + + allVersionsForPg = lib.mapAttrs mkPgCron (getVersions postgresql); + +in +stdenv.mkDerivation { + pname = "pg_cron-all"; + version = "multi"; + + buildInputs = lib.attrValues allVersionsForPg; + + dontUnpack = true; + dontConfigure = true; + dontBuild = true; + installPhase = '' mkdir -p $out/{lib,share/postgresql/extension} - - cp *${postgresql.dlSuffix} $out/lib - cp *.sql $out/share/postgresql/extension - cp *.control $out/share/postgresql/extension + + # Install all versions + for drv in ${lib.concatStringsSep " " (lib.attrValues allVersionsForPg)}; do + ln -sv $drv/lib/* $out/lib/ + cp -v --no-clobber $drv/share/postgresql/extension/* $out/share/postgresql/extension/ || true + done + + # Create default symlinks + latest_control=$(ls -v $out/share/postgresql/extension/pg_cron--*.control | tail -n1) + latest_version=$(basename "$latest_control" | sed -E 's/pg_cron--([0-9.]+).control/\1/') + + # Create main control file with default_version + echo "default_version = '$latest_version'" > $out/share/postgresql/extension/pg_cron.control + cat "$latest_control" >> $out/share/postgresql/extension/pg_cron.control + + # Library symlink + ln -sfnv pg_cron-$latest_version${postgresql.dlSuffix} $out/lib/pg_cron${postgresql.dlSuffix} ''; meta = with lib; { - description = "Run Cron jobs through PostgreSQL"; - homepage = "https://github.com/citusdata/pg_cron"; - changelog = "https://github.com/citusdata/pg_cron/raw/v${version}/CHANGELOG.md"; - platforms = postgresql.meta.platforms; - license = licenses.postgresql; + description = "Run Cron jobs through PostgreSQL (multi-version compatible)"; + homepage = "https://github.com/citusdata/pg_cron"; + maintainers = with maintainers; [ samrose ]; + platforms = postgresql.meta.platforms; + license = licenses.postgresql; }; -} +} \ No newline at end of file From c8aefa81496907b47b22b282a3e8e57cceb048bd Mon Sep 17 00:00:00 2001 From: Sam Rose Date: Tue, 15 Apr 2025 15:33:34 -0400 Subject: [PATCH 02/27] feat: add version to drv and patch instead of postPatch rewrite --- nix/ext/pg_cron-1.3.1-pg15.patch | 31 +++++++++++++++++++++++++++++++ nix/ext/pg_cron.nix | 19 ++++--------------- 2 files changed, 35 insertions(+), 15 deletions(-) create mode 100644 nix/ext/pg_cron-1.3.1-pg15.patch diff --git a/nix/ext/pg_cron-1.3.1-pg15.patch b/nix/ext/pg_cron-1.3.1-pg15.patch new file mode 100644 index 000000000..d3b6cd702 --- /dev/null +++ b/nix/ext/pg_cron-1.3.1-pg15.patch @@ -0,0 +1,31 @@ +diff --git a/src/pg_cron.c b/src/pg_cron.c +index e0ca973..4d51b2c 100644 +--- a/src/pg_cron.c ++++ b/src/pg_cron.c +@@ -14,6 +14,8 @@ + #include + + #include "postgres.h" ++#include "commands/async.h" ++#include "miscadmin.h" + #include "fmgr.h" + + /* these are always necessary for a bgworker */ +@@ -1908,7 +1910,7 @@ CronBackgroundWorker(Datum main_arg) + /* Post-execution cleanup. */ + disable_timeout(STATEMENT_TIMEOUT, false); + CommitTransactionCommand(); +- ProcessCompletedNotifies(); ++ /* ProcessCompletedNotifies removed */ + pgstat_report_activity(STATE_IDLE, command); + pgstat_report_stat(true); + +@@ -2025,7 +2027,7 @@ ExecuteSqlString(const char *sql) + */ + oldcontext = MemoryContextSwitchTo(parsecontext); + #if PG_VERSION_NUM >= 100000 +- querytree_list = pg_analyze_and_rewrite(parsetree, sql, NULL, 0,NULL); ++ querytree_list = pg_analyze_and_rewrite_fixedparams(parsetree, sql, NULL, 0, NULL); + #else + querytree_list = pg_analyze_and_rewrite(parsetree, sql, NULL, 0); + #endif diff --git a/nix/ext/pg_cron.nix b/nix/ext/pg_cron.nix index 7b8c00e72..a59cb5ae5 100644 --- a/nix/ext/pg_cron.nix +++ b/nix/ext/pg_cron.nix @@ -5,18 +5,7 @@ let "1.3.1" = { rev = "v1.3.1"; hash = "sha256-rXotNOtQNmA55ErNxGoNSKZ0pP1uxEVlDGITFHuqGG4="; - postPatch = '' - # Add necessary includes - substituteInPlace src/pg_cron.c \ - --replace '#include "postgres.h"' '#include "postgres.h" - #include "commands/async.h" - #include "miscadmin.h"' - - # Update function calls to use PostgreSQL 15 APIs - substituteInPlace src/pg_cron.c \ - --replace 'ProcessCompletedNotifies();' '/* ProcessCompletedNotifies removed */' \ - --replace 'pg_analyze_and_rewrite(parsetree, sql, NULL, 0,NULL);' 'pg_analyze_and_rewrite_fixedparams(parsetree, sql, NULL, 0, NULL);' - ''; + patches = [ ./pg_cron-1.3.1-pg15.patch ]; }; "1.4.2" = { rev = "v1.4.2"; @@ -32,12 +21,12 @@ let }; }; - mkPgCron = pgCronVersion: { rev, hash, postPatch ? "" }: stdenv.mkDerivation { + mkPgCron = pgCronVersion: { rev, hash, patches ? [] }: stdenv.mkDerivation { pname = "pg_cron"; version = "${pgCronVersion}-pg${lib.versions.major postgresql.version}"; buildInputs = [ postgresql ]; - inherit postPatch; + inherit patches; src = fetchFromGitHub { owner = "citusdata"; @@ -82,7 +71,7 @@ let in stdenv.mkDerivation { pname = "pg_cron-all"; - version = "multi"; + version = "multi-001"; #increment this if you change this package in any way buildInputs = lib.attrValues allVersionsForPg; From 45d5fceb8fff9e6d384cd4cac2b0a763f333c237 Mon Sep 17 00:00:00 2001 From: Sam Rose Date: Tue, 15 Apr 2025 15:34:48 -0400 Subject: [PATCH 03/27] chore: newline --- nix/ext/pg_cron.nix | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/nix/ext/pg_cron.nix b/nix/ext/pg_cron.nix index a59cb5ae5..a134dfb5e 100644 --- a/nix/ext/pg_cron.nix +++ b/nix/ext/pg_cron.nix @@ -107,4 +107,4 @@ stdenv.mkDerivation { platforms = postgresql.meta.platforms; license = licenses.postgresql; }; -} \ No newline at end of file +} From c29cb2d2f6cf10775b7f78b5e6a3a39a7467d687 Mon Sep 17 00:00:00 2001 From: Sam Rose Date: Tue, 15 Apr 2025 15:41:10 -0400 Subject: [PATCH 04/27] feat: auto create multi version --- nix/ext/pg_cron.nix | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/nix/ext/pg_cron.nix b/nix/ext/pg_cron.nix index a134dfb5e..b5e20732b 100644 --- a/nix/ext/pg_cron.nix +++ b/nix/ext/pg_cron.nix @@ -21,6 +21,9 @@ let }; }; + # Simple version string that concatenates all versions with dashes + versionString = "multi-" + lib.concatStringsSep "-" (map (v: lib.replaceStrings ["."] ["-"] v) (lib.attrNames allVersions)); + mkPgCron = pgCronVersion: { rev, hash, patches ? [] }: stdenv.mkDerivation { pname = "pg_cron"; version = "${pgCronVersion}-pg${lib.versions.major postgresql.version}"; @@ -71,7 +74,7 @@ let in stdenv.mkDerivation { pname = "pg_cron-all"; - version = "multi-001"; #increment this if you change this package in any way + version = versionString; buildInputs = lib.attrValues allVersionsForPg; From 8d64f0104c1adb7971c2fa8b89ae6475828fc22b Mon Sep 17 00:00:00 2001 From: Sam Rose Date: Tue, 15 Apr 2025 15:42:27 -0400 Subject: [PATCH 05/27] chore: do not re-intro maintainers here not needed --- nix/ext/pg_cron.nix | 1 - 1 file changed, 1 deletion(-) diff --git a/nix/ext/pg_cron.nix b/nix/ext/pg_cron.nix index b5e20732b..d42cff367 100644 --- a/nix/ext/pg_cron.nix +++ b/nix/ext/pg_cron.nix @@ -106,7 +106,6 @@ stdenv.mkDerivation { meta = with lib; { description = "Run Cron jobs through PostgreSQL (multi-version compatible)"; homepage = "https://github.com/citusdata/pg_cron"; - maintainers = with maintainers; [ samrose ]; platforms = postgresql.meta.platforms; license = licenses.postgresql; }; From 684420f6846e1f6a4fff1994a13cbf85a3e788f5 Mon Sep 17 00:00:00 2001 From: Sam Rose Date: Tue, 15 Apr 2025 21:21:04 -0400 Subject: [PATCH 06/27] chore: bump version --- ansible/vars.yml | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/ansible/vars.yml b/ansible/vars.yml index e57848646..3efdab2d9 100644 --- a/ansible/vars.yml +++ b/ansible/vars.yml @@ -9,9 +9,9 @@ postgres_major: # Full version strings for each major version postgres_release: - postgresorioledb-17: "17.0.1.072-orioledb" - postgres17: "17.4.1.022" - postgres15: "15.8.1.079" + postgresorioledb-17: "17.0.1.067-orioledb-pgcron-1" + postgres17: "17.4.1.017-pgcron-1" + postgres15: "15.8.1.074-pgcron-1" # Non Postgres Extensions pgbouncer_release: "1.19.0" From 7ea04ee3695b27038b1de7e5e2181247b376a82a Mon Sep 17 00:00:00 2001 From: Sam Rose Date: Thu, 24 Apr 2025 11:22:15 -0400 Subject: [PATCH 07/27] feat: pg_cron version switcher in pkg and prestart --- ansible/files/postgres_prestart.sh.j2 | 55 +++++++++++++++++++++++++ ansible/tasks/stage2-setup-postgres.yml | 6 +++ ansible/vars.yml | 6 +-- nix/ext/pg_cron.nix | 32 +++++++++++++- 4 files changed, 94 insertions(+), 5 deletions(-) diff --git a/ansible/files/postgres_prestart.sh.j2 b/ansible/files/postgres_prestart.sh.j2 index 3ffe54c85..a045f298f 100644 --- a/ansible/files/postgres_prestart.sh.j2 +++ b/ansible/files/postgres_prestart.sh.j2 @@ -26,7 +26,62 @@ update_orioledb_buffers() { fi } +check_extensions_file() { + local extensions_file="/root/pg_extensions.json" + if [ ! -f "$extensions_file" ]; then + echo "extensions: No extensions file found, skipping extensions versions check" + return 1 + fi + return 0 +} + +get_pg_cron_version() { + if ! check_extensions_file; then + return + fi + + local version + version=$(sudo -u postgres /home/postgres/.nix-profile/bin/jq -r '.pg_cron // empty' "/root/pg_extensions.json") + if [ -z "$version" ]; then + echo "pg_cron: Not specified in extensions file" + return + fi + + if ! [[ "$version" =~ ^[0-9]+\.[0-9]+\.[0-9]+$ ]]; then + echo "pg_cron: Invalid version format: $version" + return + fi + + echo "$version" +} + +switch_pg_cron_version() { + local version="$1" + local switch_script="/home/postgres/.nix-profile/bin/switch_pg_cron_version" + + if [ ! -x "$switch_script" ]; then + echo "pg_cron: No version switch script available" + return + fi + + echo "pg_cron: Switching to version $version" + sudo -u postgres "$switch_script" "$version" + echo "pg_cron: Version switch completed" +} + +handle_pg_cron_version() { + local version + version=$(get_pg_cron_version) + if [ -n "$version" ]; then + switch_pg_cron_version "$version" + fi +} + main() { + # 1. pg_cron version handling + handle_pg_cron_version + + # 2. orioledb handling local has_orioledb=$(check_orioledb_enabled) if [ "$has_orioledb" -lt 1 ]; then return 0 diff --git a/ansible/tasks/stage2-setup-postgres.yml b/ansible/tasks/stage2-setup-postgres.yml index 99b89d6d9..ed0d667ed 100644 --- a/ansible/tasks/stage2-setup-postgres.yml +++ b/ansible/tasks/stage2-setup-postgres.yml @@ -90,6 +90,12 @@ shell: | sudo -u postgres bash -c ". /nix/var/nix/profiles/default/etc/profile.d/nix-daemon.sh && nix profile install github:supabase/postgres/{{ git_commit_sha }}#{{postgresql_version}}_src" when: stage2_nix + +- name: Install jq from nix binary cache + become: yes + shell: | + sudo -u postgres bash -c ". /nix/var/nix/profiles/default/etc/profile.d/nix-daemon.sh && nix profile install github:supabase/postgres/{{ git_commit_sha }}#jq" + when: stage2_nix - name: Set ownership and permissions for /etc/ssl/private become: yes diff --git a/ansible/vars.yml b/ansible/vars.yml index 3efdab2d9..29256a9ce 100644 --- a/ansible/vars.yml +++ b/ansible/vars.yml @@ -9,9 +9,9 @@ postgres_major: # Full version strings for each major version postgres_release: - postgresorioledb-17: "17.0.1.067-orioledb-pgcron-1" - postgres17: "17.4.1.017-pgcron-1" - postgres15: "15.8.1.074-pgcron-1" + postgresorioledb-17: "17.0.1.067-orioledb-pgcron-2" + postgres17: "17.4.1.017-pgcron-2" + postgres15: "15.8.1.074-pgcron-2" # Non Postgres Extensions pgbouncer_release: "1.19.0" diff --git a/nix/ext/pg_cron.nix b/nix/ext/pg_cron.nix index d42cff367..3f438931e 100644 --- a/nix/ext/pg_cron.nix +++ b/nix/ext/pg_cron.nix @@ -50,7 +50,7 @@ let ''; installPhase = '' - mkdir -p $out/{lib,share/postgresql/extension} + mkdir -p $out/{lib,share/postgresql/extension,bin} # Install versioned library install -Dm755 pg_cron${postgresql.dlSuffix} $out/lib/pg_cron-${pgCronVersion}${postgresql.dlSuffix} @@ -83,7 +83,7 @@ stdenv.mkDerivation { dontBuild = true; installPhase = '' - mkdir -p $out/{lib,share/postgresql/extension} + mkdir -p $out/{lib,share/postgresql/extension,bin} # Install all versions for drv in ${lib.concatStringsSep " " (lib.attrValues allVersionsForPg)}; do @@ -101,6 +101,34 @@ stdenv.mkDerivation { # Library symlink ln -sfnv pg_cron-$latest_version${postgresql.dlSuffix} $out/lib/pg_cron${postgresql.dlSuffix} + + # Create version switcher script + cat > $out/bin/switch_pg_cron_version <<'EOF' + #!/bin/sh + set -e + + if [ $# -ne 1 ]; then + echo "Usage: $0 " + echo "Example: $0 1.4.2" + exit 1 + fi + + VERSION=$1 + LIB_DIR=$(dirname "$0")/../lib + + # Check if version exists + if [ ! -f "$LIB_DIR/pg_cron-$VERSION${postgresql.dlSuffix}" ]; then + echo "Error: Version $VERSION not found" + exit 1 + fi + + # Update library symlink + ln -sfnv "pg_cron-$VERSION${postgresql.dlSuffix}" "$LIB_DIR/pg_cron${postgresql.dlSuffix}" + + echo "Successfully switched pg_cron to version $VERSION" + EOF + + chmod +x $out/bin/switch_pg_cron_version ''; meta = with lib; { From 6248e883b03b425e13b7e9e308bd84a55417e186 Mon Sep 17 00:00:00 2001 From: Sam Rose Date: Thu, 24 Apr 2025 12:42:21 -0400 Subject: [PATCH 08/27] test: a tmp test for this branch to test older versions --- .github/workflows/testinfra-nix.yml | 16 ++++++++++++++++ testinfra/test_ami_nix.py | 24 ++++++++++++++++++++++++ 2 files changed, 40 insertions(+) diff --git a/.github/workflows/testinfra-nix.yml b/.github/workflows/testinfra-nix.yml index 2b07e716f..9baa2e200 100644 --- a/.github/workflows/testinfra-nix.yml +++ b/.github/workflows/testinfra-nix.yml @@ -94,6 +94,22 @@ jobs: sudo rm -rf /tmp/* # Clean temporary files df -h / # Display available space + - name: Patch stage2-nix-psql.pkr.hcl to create pg_extensions.json + run: | + cat >> stage2-nix-psql.pkr.hcl << 'EOF' + # Add provisioner to create pg_extensions.json + provisioner "shell" { + inline = [ + "echo '{\"pg_cron\":\"1.3.1\"}' | sudo tee /root/pg_extensions.json", + "sudo chmod 644 /root/pg_extensions.json", + "echo 'Created pg_extensions.json with content:' && sudo cat /root/pg_extensions.json" + ] + } + EOF + # Display the modified file to verify + echo "Modified stage2-nix-psql.pkr.hcl:" + tail -n 10 stage2-nix-psql.pkr.hcl + - name: Build AMI stage 2 run: | packer init stage2-nix-psql.pkr.hcl diff --git a/testinfra/test_ami_nix.py b/testinfra/test_ami_nix.py index 4d354fac3..e6c1b0da5 100644 --- a/testinfra/test_ami_nix.py +++ b/testinfra/test_ami_nix.py @@ -476,3 +476,27 @@ def test_postgrest_ending_empty_key_query_parameter_is_removed(host): }, ) assert res.ok + + +def test_pg_cron_extension(host): + # Connect as supabase_admin and create the extension + with host.sudo("postgres"): + result = host.run('psql -U supabase_admin -d postgres -c "CREATE EXTENSION pg_cron WITH SCHEMA pg_catalog VERSION \'1.3.1\';"') + assert result.rc == 0, f"Failed to create pg_cron extension: {result.stderr}" + + # Create test table + result = host.run('psql -U supabase_admin -d postgres -c "CREATE TABLE cron_test_log (id SERIAL PRIMARY KEY, message TEXT, log_time TIMESTAMP WITH TIME ZONE DEFAULT CURRENT_TIMESTAMP);"') + assert result.rc == 0, f"Failed to create test table: {result.stderr}" + + # Schedule a job + result = host.run('psql -U supabase_admin -d postgres -c "SELECT cron.schedule(\'* * * * *\', \'INSERT INTO cron_test_log (message) VALUES (\\\'Hello from pg_cron!\\\');\');"') + assert result.rc == 0, f"Failed to schedule job: {result.stderr}" + assert "1" in result.stdout, "Expected schedule ID 1" + + # Verify job is scheduled + result = host.run('psql -U supabase_admin -d postgres -c "SELECT * FROM cron.job;"') + assert result.rc == 0, f"Failed to query cron.job: {result.stderr}" + assert "* * * * *" in result.stdout, "Expected cron schedule pattern" + assert "INSERT INTO cron_test_log" in result.stdout, "Expected cron command" + assert "postgres" in result.stdout, "Expected postgres username" + assert "postgres" in result.stdout, "Expected postgres database" From e26afc591cca0b96b2311346026ad59be3ac2200 Mon Sep 17 00:00:00 2001 From: Sam Rose Date: Thu, 24 Apr 2025 13:25:34 -0400 Subject: [PATCH 09/27] test: temp test for ext handling --- .github/workflows/testinfra-nix.yml | 12 +++++++++++- 1 file changed, 11 insertions(+), 1 deletion(-) diff --git a/.github/workflows/testinfra-nix.yml b/.github/workflows/testinfra-nix.yml index 9baa2e200..55f3ff3fa 100644 --- a/.github/workflows/testinfra-nix.yml +++ b/.github/workflows/testinfra-nix.yml @@ -96,7 +96,12 @@ jobs: - name: Patch stage2-nix-psql.pkr.hcl to create pg_extensions.json run: | - cat >> stage2-nix-psql.pkr.hcl << 'EOF' + # Get the line number of the last closing brace + LAST_BRACE_LINE=$(grep -n '}' stage2-nix-psql.pkr.hcl | tail -n 1 | cut -d: -f1) + + # Create a temporary file with the new content + head -n $((LAST_BRACE_LINE-1)) stage2-nix-psql.pkr.hcl > temp.pkr.hcl + cat >> temp.pkr.hcl << 'EOF' # Add provisioner to create pg_extensions.json provisioner "shell" { inline = [ @@ -105,7 +110,12 @@ jobs: "echo 'Created pg_extensions.json with content:' && sudo cat /root/pg_extensions.json" ] } + } EOF + + # Replace the original file + mv temp.pkr.hcl stage2-nix-psql.pkr.hcl + # Display the modified file to verify echo "Modified stage2-nix-psql.pkr.hcl:" tail -n 10 stage2-nix-psql.pkr.hcl From de12b8bf63c66b5da79859322e9b8ae778c07d01 Mon Sep 17 00:00:00 2001 From: Sam Rose Date: Thu, 24 Apr 2025 13:37:00 -0400 Subject: [PATCH 10/27] test: instead of patch, add prior to start of machine in testinfra --- .github/workflows/testinfra-nix.yml | 26 -------------------------- testinfra/test_ami_nix.py | 1 + 2 files changed, 1 insertion(+), 26 deletions(-) diff --git a/.github/workflows/testinfra-nix.yml b/.github/workflows/testinfra-nix.yml index 55f3ff3fa..2b07e716f 100644 --- a/.github/workflows/testinfra-nix.yml +++ b/.github/workflows/testinfra-nix.yml @@ -94,32 +94,6 @@ jobs: sudo rm -rf /tmp/* # Clean temporary files df -h / # Display available space - - name: Patch stage2-nix-psql.pkr.hcl to create pg_extensions.json - run: | - # Get the line number of the last closing brace - LAST_BRACE_LINE=$(grep -n '}' stage2-nix-psql.pkr.hcl | tail -n 1 | cut -d: -f1) - - # Create a temporary file with the new content - head -n $((LAST_BRACE_LINE-1)) stage2-nix-psql.pkr.hcl > temp.pkr.hcl - cat >> temp.pkr.hcl << 'EOF' - # Add provisioner to create pg_extensions.json - provisioner "shell" { - inline = [ - "echo '{\"pg_cron\":\"1.3.1\"}' | sudo tee /root/pg_extensions.json", - "sudo chmod 644 /root/pg_extensions.json", - "echo 'Created pg_extensions.json with content:' && sudo cat /root/pg_extensions.json" - ] - } - } - EOF - - # Replace the original file - mv temp.pkr.hcl stage2-nix-psql.pkr.hcl - - # Display the modified file to verify - echo "Modified stage2-nix-psql.pkr.hcl:" - tail -n 10 stage2-nix-psql.pkr.hcl - - name: Build AMI stage 2 run: | packer init stage2-nix-psql.pkr.hcl diff --git a/testinfra/test_ami_nix.py b/testinfra/test_ami_nix.py index e6c1b0da5..98ea5e5ae 100644 --- a/testinfra/test_ami_nix.py +++ b/testinfra/test_ami_nix.py @@ -226,6 +226,7 @@ def gzip_then_base64_encode(s: str) -> str: - {{path: /etc/gotrue.env, content: {gzip_then_base64_encode(gotrue_env_content)}, permissions: '0664', encoding: gz+b64}} - {{path: /etc/wal-g/config.json, content: {gzip_then_base64_encode(walg_config_json_content)}, permissions: '0664', owner: 'wal-g:wal-g', encoding: gz+b64}} - {{path: /tmp/init.json, content: {gzip_then_base64_encode(init_json_content)}, permissions: '0600', encoding: gz+b64}} + - {{path: /root/pg_extensions.json, content: {gzip_then_base64_encode('{"pg_cron":"1.3.1"}')}, permissions: '0644', encoding: gz+b64}} runcmd: - 'sudo echo \"pgbouncer\" \"postgres\" >> /etc/pgbouncer/userlist.txt' - 'cd /tmp && aws s3 cp --region ap-southeast-1 s3://init-scripts-staging/project/init.sh .' From e7b7245a991d95ef5d40e11ce966c0ea2fbf1115 Mon Sep 17 00:00:00 2001 From: Sam Rose Date: Thu, 24 Apr 2025 14:10:16 -0400 Subject: [PATCH 11/27] test: only run on pg 15 for this test, as 1.3.1 limited to 15 --- testinfra/test_ami_nix.py | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/testinfra/test_ami_nix.py b/testinfra/test_ami_nix.py index 98ea5e5ae..dd5f54a0d 100644 --- a/testinfra/test_ami_nix.py +++ b/testinfra/test_ami_nix.py @@ -226,10 +226,10 @@ def gzip_then_base64_encode(s: str) -> str: - {{path: /etc/gotrue.env, content: {gzip_then_base64_encode(gotrue_env_content)}, permissions: '0664', encoding: gz+b64}} - {{path: /etc/wal-g/config.json, content: {gzip_then_base64_encode(walg_config_json_content)}, permissions: '0664', owner: 'wal-g:wal-g', encoding: gz+b64}} - {{path: /tmp/init.json, content: {gzip_then_base64_encode(init_json_content)}, permissions: '0600', encoding: gz+b64}} - - {{path: /root/pg_extensions.json, content: {gzip_then_base64_encode('{"pg_cron":"1.3.1"}')}, permissions: '0644', encoding: gz+b64}} runcmd: - 'sudo echo \"pgbouncer\" \"postgres\" >> /etc/pgbouncer/userlist.txt' - 'cd /tmp && aws s3 cp --region ap-southeast-1 s3://init-scripts-staging/project/init.sh .' + - 'if [ "$POSTGRES_MAJOR_VERSION" = "15" ]; then echo \'{"pg_cron":"1.3.1"}\' | sudo tee /root/pg_extensions.json && sudo chmod 644 /root/pg_extensions.json; fi' - 'bash init.sh "staging"' - 'rm -rf /tmp/*' """, @@ -480,6 +480,11 @@ def test_postgrest_ending_empty_key_query_parameter_is_removed(host): def test_pg_cron_extension(host): + # Only run this test for PostgreSQL 15 + postgres_version = os.environ.get("POSTGRES_MAJOR_VERSION") + if postgres_version != "15": + pytest.skip(f"Skipping pg_cron test for PostgreSQL version {postgres_version}") + # Connect as supabase_admin and create the extension with host.sudo("postgres"): result = host.run('psql -U supabase_admin -d postgres -c "CREATE EXTENSION pg_cron WITH SCHEMA pg_catalog VERSION \'1.3.1\';"') From 5af1724283e6a0e9b7244bc726e6838ad4d7a8c8 Mon Sep 17 00:00:00 2001 From: Sam Rose Date: Thu, 24 Apr 2025 14:46:36 -0400 Subject: [PATCH 12/27] feat: use jq from nixpkgs --- ansible/tasks/stage2-setup-postgres.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ansible/tasks/stage2-setup-postgres.yml b/ansible/tasks/stage2-setup-postgres.yml index ed0d667ed..8b67eabe5 100644 --- a/ansible/tasks/stage2-setup-postgres.yml +++ b/ansible/tasks/stage2-setup-postgres.yml @@ -94,7 +94,7 @@ - name: Install jq from nix binary cache become: yes shell: | - sudo -u postgres bash -c ". /nix/var/nix/profiles/default/etc/profile.d/nix-daemon.sh && nix profile install github:supabase/postgres/{{ git_commit_sha }}#jq" + sudo -u postgres bash -c ". /nix/var/nix/profiles/default/etc/profile.d/nix-daemon.sh && nix profile install nixpkgs#jq" when: stage2_nix - name: Set ownership and permissions for /etc/ssl/private From fb3fee281d9d2cdd84ac4af569cd33cbaa8a8706 Mon Sep 17 00:00:00 2001 From: Sam Rose Date: Thu, 24 Apr 2025 16:21:13 -0400 Subject: [PATCH 13/27] test: handle braces properly --- testinfra/test_ami_nix.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/testinfra/test_ami_nix.py b/testinfra/test_ami_nix.py index dd5f54a0d..a3d489224 100644 --- a/testinfra/test_ami_nix.py +++ b/testinfra/test_ami_nix.py @@ -229,7 +229,7 @@ def gzip_then_base64_encode(s: str) -> str: runcmd: - 'sudo echo \"pgbouncer\" \"postgres\" >> /etc/pgbouncer/userlist.txt' - 'cd /tmp && aws s3 cp --region ap-southeast-1 s3://init-scripts-staging/project/init.sh .' - - 'if [ "$POSTGRES_MAJOR_VERSION" = "15" ]; then echo \'{"pg_cron":"1.3.1"}\' | sudo tee /root/pg_extensions.json && sudo chmod 644 /root/pg_extensions.json; fi' + - 'if [ "$POSTGRES_MAJOR_VERSION" = "15" ]; then echo \'{{"pg_cron":"1.3.1"}}\' | sudo tee /root/pg_extensions.json && sudo chmod 644 /root/pg_extensions.json; fi' - 'bash init.sh "staging"' - 'rm -rf /tmp/*' """, From 67a80761c1106e351edf8d84f93969b2ab159a89 Mon Sep 17 00:00:00 2001 From: Sam Rose Date: Fri, 25 Apr 2025 08:33:33 -0400 Subject: [PATCH 14/27] test: propagate errors from any failure --- testinfra/test_ami_nix.py | 16 +++++++++++++++- 1 file changed, 15 insertions(+), 1 deletion(-) diff --git a/testinfra/test_ami_nix.py b/testinfra/test_ami_nix.py index a3d489224..9044e64c5 100644 --- a/testinfra/test_ami_nix.py +++ b/testinfra/test_ami_nix.py @@ -325,11 +325,25 @@ def is_healthy(host, instance_ip, ssh_identity_file) -> bool: cmd = check(host) if cmd.failed is True: logger.warning(f"{service} not ready") + logger.error(f"{service} command failed with rc={cmd.rc}") + logger.error(f"{service} stdout: {cmd.stdout}") + logger.error(f"{service} stderr: {cmd.stderr}") + + # For PostgreSQL, also check the logs and systemd status + if service == "postgres": + logger.error("PostgreSQL logs:") + host.run("sudo cat /var/log/postgresql/postgresql-*.log") + logger.error("PostgreSQL systemd status:") + host.run("sudo systemctl status postgresql") + logger.error("PostgreSQL journal logs:") + host.run("sudo journalctl -u postgresql --no-pager") + return False - except Exception: + except Exception as e: logger.warning( f"Connection failed during {service} check, attempting reconnect..." ) + logger.error(f"Error details: {str(e)}") host = get_ssh_connection(instance_ip, ssh_identity_file) return False From 2bd7b6d95d5d8bfa60492e7ff6c13bc6938db257 Mon Sep 17 00:00:00 2001 From: Sam Rose Date: Fri, 25 Apr 2025 09:52:36 -0400 Subject: [PATCH 15/27] test: more logging for healthcheck --- testinfra/test_ami_nix.py | 72 ++++++++++++++++++++++++++++++--------- 1 file changed, 56 insertions(+), 16 deletions(-) diff --git a/testinfra/test_ami_nix.py b/testinfra/test_ami_nix.py index 9044e64c5..5a37eff83 100644 --- a/testinfra/test_ami_nix.py +++ b/testinfra/test_ami_nix.py @@ -296,7 +296,14 @@ def is_healthy(host, instance_ip, ssh_identity_file) -> bool: health_checks = [ ( "postgres", - lambda h: h.run("sudo -u postgres /usr/bin/pg_isready -U postgres"), + lambda h: ( + # First check if PostgreSQL is running + h.run("sudo systemctl is-active postgresql"), + # Then check if the socket directory exists and has correct permissions + h.run("sudo ls -la /run/postgresql"), + # Then try pg_isready + h.run("sudo -u postgres /usr/bin/pg_isready -U postgres") + ), ), ( "adminapi", @@ -322,23 +329,56 @@ def is_healthy(host, instance_ip, ssh_identity_file) -> bool: for service, check in health_checks: try: - cmd = check(host) - if cmd.failed is True: - logger.warning(f"{service} not ready") - logger.error(f"{service} command failed with rc={cmd.rc}") - logger.error(f"{service} stdout: {cmd.stdout}") - logger.error(f"{service} stderr: {cmd.stderr}") + if service == "postgres": + # For PostgreSQL, we need to check multiple things + systemd_status, socket_check, pg_isready = check(host) + + if systemd_status.failed: + logger.error("PostgreSQL systemd service is not active") + logger.error(f"systemd status: {systemd_status.stdout}") + logger.error(f"systemd error: {systemd_status.stderr}") + + # Check init script logs + logger.error("Init script logs:") + host.run("sudo journalctl -u cloud-init --no-pager") + + # Check cloud-init logs + logger.error("Cloud-init logs:") + host.run("sudo cat /var/log/cloud-init-output.log") + + # Check if init script exists and its contents + logger.error("Init script status:") + host.run("ls -la /tmp/init.sh") + host.run("cat /tmp/init.sh") + + if socket_check.failed: + logger.error("PostgreSQL socket directory check failed") + logger.error(f"socket check: {socket_check.stdout}") + logger.error(f"socket error: {socket_check.stderr}") + + if pg_isready.failed: + logger.error("pg_isready check failed") + logger.error(f"pg_isready output: {pg_isready.stdout}") + logger.error(f"pg_isready error: {pg_isready.stderr}") - # For PostgreSQL, also check the logs and systemd status - if service == "postgres": - logger.error("PostgreSQL logs:") - host.run("sudo cat /var/log/postgresql/postgresql-*.log") - logger.error("PostgreSQL systemd status:") - host.run("sudo systemctl status postgresql") - logger.error("PostgreSQL journal logs:") - host.run("sudo journalctl -u postgresql --no-pager") + # Check PostgreSQL logs for startup issues + logger.error("PostgreSQL logs:") + host.run("sudo cat /var/log/postgresql/postgresql-*.log") + logger.error("PostgreSQL systemd status:") + host.run("sudo systemctl status postgresql") + logger.error("PostgreSQL journal logs:") + host.run("sudo journalctl -u postgresql --no-pager") - return False + if any(cmd.failed for cmd in [systemd_status, socket_check, pg_isready]): + return False + else: + cmd = check(host) + if cmd.failed is True: + logger.warning(f"{service} not ready") + logger.error(f"{service} command failed with rc={cmd.rc}") + logger.error(f"{service} stdout: {cmd.stdout}") + logger.error(f"{service} stderr: {cmd.stderr}") + return False except Exception as e: logger.warning( f"Connection failed during {service} check, attempting reconnect..." From 4535b50e68c50e0613e511fe48ec04ccccc165d1 Mon Sep 17 00:00:00 2001 From: Sam Rose Date: Fri, 25 Apr 2025 10:32:02 -0400 Subject: [PATCH 16/27] chore: bump version --- ansible/vars.yml | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/ansible/vars.yml b/ansible/vars.yml index 29256a9ce..1af539d07 100644 --- a/ansible/vars.yml +++ b/ansible/vars.yml @@ -9,9 +9,9 @@ postgres_major: # Full version strings for each major version postgres_release: - postgresorioledb-17: "17.0.1.067-orioledb-pgcron-2" - postgres17: "17.4.1.017-pgcron-2" - postgres15: "15.8.1.074-pgcron-2" + postgresorioledb-17: "17.0.1.067-orioledb-pgcron-3" + postgres17: "17.4.1.017-pgcron-3" + postgres15: "15.8.1.074-pgcron-3" # Non Postgres Extensions pgbouncer_release: "1.19.0" From 66515f848c088c9bff677712ed3cca2e5bb7d06c Mon Sep 17 00:00:00 2001 From: Sam Rose Date: Fri, 25 Apr 2025 11:33:48 -0400 Subject: [PATCH 17/27] test: adding even more logging --- testinfra/test_ami_nix.py | 42 +++++++++++++++++++++++++++++++++++++++ 1 file changed, 42 insertions(+) diff --git a/testinfra/test_ami_nix.py b/testinfra/test_ami_nix.py index 5a37eff83..7befb11c8 100644 --- a/testinfra/test_ami_nix.py +++ b/testinfra/test_ami_nix.py @@ -338,6 +338,10 @@ def is_healthy(host, instance_ip, ssh_identity_file) -> bool: logger.error(f"systemd status: {systemd_status.stdout}") logger.error(f"systemd error: {systemd_status.stderr}") + # Get detailed systemd status + logger.error("Detailed systemd status:") + host.run("sudo systemctl status postgresql -l --no-pager") + # Check init script logs logger.error("Init script logs:") host.run("sudo journalctl -u cloud-init --no-pager") @@ -350,6 +354,44 @@ def is_healthy(host, instance_ip, ssh_identity_file) -> bool: logger.error("Init script status:") host.run("ls -la /tmp/init.sh") host.run("cat /tmp/init.sh") + + # Check PostgreSQL configuration + logger.error("PostgreSQL configuration:") + host.run("sudo cat /etc/postgresql/*/main/postgresql.conf") + host.run("sudo cat /etc/postgresql/*/main/pg_hba.conf") + + # Check PostgreSQL data directory permissions + logger.error("PostgreSQL data directory permissions:") + host.run("sudo ls -la /var/lib/postgresql/*/main/") + + # Check PostgreSQL startup logs + logger.error("PostgreSQL startup logs:") + host.run("sudo cat /var/log/postgresql/postgresql-*.log") + + # Check systemd journal for PostgreSQL + logger.error("Systemd journal for PostgreSQL:") + host.run("sudo journalctl -u postgresql -n 100 --no-pager") + + # Check for any PostgreSQL-related errors in system logs + logger.error("System logs with PostgreSQL errors:") + host.run("sudo journalctl | grep -i postgres | tail -n 100") + + # Check for any disk space issues + logger.error("Disk space information:") + host.run("df -h") + host.run("sudo du -sh /var/lib/postgresql/*") + + # Check for any memory issues + logger.error("Memory information:") + host.run("free -h") + + # Check for any process conflicts + logger.error("Running processes:") + host.run("ps aux | grep postgres") + + # Check for any port conflicts + logger.error("Port usage:") + host.run("sudo netstat -tulpn | grep 5432") if socket_check.failed: logger.error("PostgreSQL socket directory check failed") From 7c00cd2cfd12118b9faab3027a48ca1794a2be27 Mon Sep 17 00:00:00 2001 From: Sam Rose Date: Fri, 25 Apr 2025 14:49:27 -0400 Subject: [PATCH 18/27] test: extend logging more to see what happens when pg starts --- testinfra/test_ami_nix.py | 148 ++++++++++++++++++++------------------ 1 file changed, 77 insertions(+), 71 deletions(-) diff --git a/testinfra/test_ami_nix.py b/testinfra/test_ami_nix.py index 7befb11c8..c57acedae 100644 --- a/testinfra/test_ami_nix.py +++ b/testinfra/test_ami_nix.py @@ -338,89 +338,95 @@ def is_healthy(host, instance_ip, ssh_identity_file) -> bool: logger.error(f"systemd status: {systemd_status.stdout}") logger.error(f"systemd error: {systemd_status.stderr}") - # Get detailed systemd status - logger.error("Detailed systemd status:") - host.run("sudo systemctl status postgresql -l --no-pager") + # Check systemd service unit file + logger.error("PostgreSQL systemd service unit file:") + result = host.run("sudo systemctl cat postgresql") + logger.error(f"service unit file:\n{result.stdout}\n{result.stderr}") - # Check init script logs - logger.error("Init script logs:") - host.run("sudo journalctl -u cloud-init --no-pager") + # Check systemd service environment + logger.error("PostgreSQL systemd service environment:") + result = host.run("sudo systemctl show postgresql") + logger.error(f"service environment:\n{result.stdout}\n{result.stderr}") - # Check cloud-init logs - logger.error("Cloud-init logs:") - host.run("sudo cat /var/log/cloud-init-output.log") + # Check systemd service dependencies + logger.error("PostgreSQL systemd service dependencies:") + result = host.run("sudo systemctl list-dependencies postgresql") + logger.error(f"service dependencies:\n{result.stdout}\n{result.stderr}") - # Check if init script exists and its contents - logger.error("Init script status:") - host.run("ls -la /tmp/init.sh") - host.run("cat /tmp/init.sh") + # Check if service is enabled + logger.error("PostgreSQL service enabled status:") + result = host.run("sudo systemctl is-enabled postgresql") + logger.error(f"service enabled status:\n{result.stdout}\n{result.stderr}") - # Check PostgreSQL configuration - logger.error("PostgreSQL configuration:") - host.run("sudo cat /etc/postgresql/*/main/postgresql.conf") - host.run("sudo cat /etc/postgresql/*/main/pg_hba.conf") + # Check systemd journal for service execution logs + logger.error("Systemd journal entries for PostgreSQL service execution:") + result = host.run("sudo journalctl -u postgresql -n 100 --no-pager") + logger.error(f"systemd journal:\n{result.stdout}\n{result.stderr}") - # Check PostgreSQL data directory permissions - logger.error("PostgreSQL data directory permissions:") - host.run("sudo ls -la /var/lib/postgresql/*/main/") + # Check systemd journal specifically for ExecStartPre and ExecStart + logger.error("Systemd journal entries for ExecStartPre and ExecStart:") + result = host.run("sudo journalctl -u postgresql -n 100 --no-pager | grep -E 'ExecStartPre|ExecStart'") + logger.error(f"execution logs:\n{result.stdout}\n{result.stderr}") - # Check PostgreSQL startup logs - logger.error("PostgreSQL startup logs:") - host.run("sudo cat /var/log/postgresql/postgresql-*.log") + # Check systemd journal for any errors + logger.error("Systemd journal entries with error level:") + result = host.run("sudo journalctl -u postgresql -n 100 --no-pager -p err") + logger.error(f"error logs:\n{result.stdout}\n{result.stderr}") - # Check systemd journal for PostgreSQL - logger.error("Systemd journal for PostgreSQL:") - host.run("sudo journalctl -u postgresql -n 100 --no-pager") + # Check pre-start script output + logger.error("Checking pre-start script output:") + result = host.run("sudo -u postgres /usr/local/bin/postgres_prestart.sh") + logger.error(f"pre-start script output:\n{result.stdout}\n{result.stderr}") - # Check for any PostgreSQL-related errors in system logs - logger.error("System logs with PostgreSQL errors:") - host.run("sudo journalctl | grep -i postgres | tail -n 100") + # Check PostgreSQL logs directory + logger.error("Checking PostgreSQL logs directory:") + result = host.run("sudo ls -la /var/log/postgresql/") + logger.error(f"log directory contents:\n{result.stdout}\n{result.stderr}") - # Check for any disk space issues - logger.error("Disk space information:") - host.run("df -h") - host.run("sudo du -sh /var/lib/postgresql/*") + # Check any existing PostgreSQL logs + logger.error("Checking existing PostgreSQL logs:") + result = host.run("sudo cat /var/log/postgresql/*.log") + logger.error(f"postgresql logs:\n{result.stdout}\n{result.stderr}") - # Check for any memory issues - logger.error("Memory information:") - host.run("free -h") + # Try starting PostgreSQL directly with pg_ctl and capture output + logger.error("Attempting to start PostgreSQL directly with pg_ctl:") + startup_log = "/tmp/postgres-start.log" + result = host.run(f"sudo -u postgres /usr/lib/postgresql/bin/pg_ctl -D /var/lib/postgresql/data start -l {startup_log}") + logger.error(f"pg_ctl start attempt:\n{result.stdout}\n{result.stderr}") - # Check for any process conflicts - logger.error("Running processes:") - host.run("ps aux | grep postgres") + # Check the startup log + logger.error("PostgreSQL startup log:") + result = host.run(f"sudo cat {startup_log}") + logger.error(f"startup log contents:\n{result.stdout}\n{result.stderr}") - # Check for any port conflicts - logger.error("Port usage:") - host.run("sudo netstat -tulpn | grep 5432") - - if socket_check.failed: - logger.error("PostgreSQL socket directory check failed") - logger.error(f"socket check: {socket_check.stdout}") - logger.error(f"socket error: {socket_check.stderr}") - - if pg_isready.failed: - logger.error("pg_isready check failed") - logger.error(f"pg_isready output: {pg_isready.stdout}") - logger.error(f"pg_isready error: {pg_isready.stderr}") - - # Check PostgreSQL logs for startup issues - logger.error("PostgreSQL logs:") - host.run("sudo cat /var/log/postgresql/postgresql-*.log") - logger.error("PostgreSQL systemd status:") - host.run("sudo systemctl status postgresql") - logger.error("PostgreSQL journal logs:") - host.run("sudo journalctl -u postgresql --no-pager") - - if any(cmd.failed for cmd in [systemd_status, socket_check, pg_isready]): - return False - else: - cmd = check(host) - if cmd.failed is True: - logger.warning(f"{service} not ready") - logger.error(f"{service} command failed with rc={cmd.rc}") - logger.error(f"{service} stdout: {cmd.stdout}") - logger.error(f"{service} stderr: {cmd.stderr}") - return False + # Clean up the startup log + result = host.run(f"sudo rm -f {startup_log}") + + # Check PostgreSQL configuration + logger.error("PostgreSQL configuration:") + result = host.run("sudo cat /etc/postgresql/postgresql.conf") + logger.error(f"postgresql.conf:\n{result.stdout}\n{result.stderr}") + + # Check PostgreSQL authentication configuration + logger.error("PostgreSQL authentication configuration:") + result = host.run("sudo cat /etc/postgresql/pg_hba.conf") + logger.error(f"pg_hba.conf:\n{result.stdout}\n{result.stderr}") + + # Check PostgreSQL environment + logger.error("PostgreSQL environment:") + result = host.run("sudo -u postgres env | grep POSTGRES") + logger.error(f"postgres environment:\n{result.stdout}\n{result.stderr}") + + if any(cmd.failed for cmd in [systemd_status, socket_check, pg_isready]): + return False + else: + cmd = check(host) + if cmd.failed is True: + logger.warning(f"{service} not ready") + logger.error(f"{service} command failed with rc={cmd.rc}") + logger.error(f"{service} stdout: {cmd.stdout}") + logger.error(f"{service} stderr: {cmd.stderr}") + return False except Exception as e: logger.warning( f"Connection failed during {service} check, attempting reconnect..." From 7990c3cae2e0268ef7240d8a12250390af430877 Mon Sep 17 00:00:00 2001 From: Sam Rose Date: Mon, 28 Apr 2025 13:12:08 -0400 Subject: [PATCH 19/27] test: handle lib extension names per system --- nix/ext/pg_cron.nix | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) diff --git a/nix/ext/pg_cron.nix b/nix/ext/pg_cron.nix index 3f438931e..25121fb8c 100644 --- a/nix/ext/pg_cron.nix +++ b/nix/ext/pg_cron.nix @@ -116,14 +116,21 @@ stdenv.mkDerivation { VERSION=$1 LIB_DIR=$(dirname "$0")/../lib + # Use platform-specific extension + if [ "$(uname)" = "Darwin" ]; then + EXT=".dylib" + else + EXT=".so" + fi + # Check if version exists - if [ ! -f "$LIB_DIR/pg_cron-$VERSION${postgresql.dlSuffix}" ]; then + if [ ! -f "$LIB_DIR/pg_cron-$VERSION$EXT" ]; then echo "Error: Version $VERSION not found" exit 1 fi # Update library symlink - ln -sfnv "pg_cron-$VERSION${postgresql.dlSuffix}" "$LIB_DIR/pg_cron${postgresql.dlSuffix}" + ln -sfnv "pg_cron-$VERSION$EXT" "$LIB_DIR/pg_cron$EXT" echo "Successfully switched pg_cron to version $VERSION" EOF From 90d79b4ddc6e1236843a7dd19bcf7fd20064dbcd Mon Sep 17 00:00:00 2001 From: Sam Rose Date: Mon, 28 Apr 2025 13:25:28 -0400 Subject: [PATCH 20/27] chore: bump versions --- ansible/vars.yml | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/ansible/vars.yml b/ansible/vars.yml index 1af539d07..bce0fcd12 100644 --- a/ansible/vars.yml +++ b/ansible/vars.yml @@ -9,9 +9,9 @@ postgres_major: # Full version strings for each major version postgres_release: - postgresorioledb-17: "17.0.1.067-orioledb-pgcron-3" - postgres17: "17.4.1.017-pgcron-3" - postgres15: "15.8.1.074-pgcron-3" + postgresorioledb-17: "17.0.1.067-orioledb-pgcron-4" + postgres17: "17.4.1.022-pgcron-4" + postgres15: "15.8.1.079-pgcron-4" # Non Postgres Extensions pgbouncer_release: "1.19.0" From 1f4f065278192aa7a077ffce5b4aa2e358ad41ac Mon Sep 17 00:00:00 2001 From: Sam Rose Date: Mon, 28 Apr 2025 14:45:33 -0400 Subject: [PATCH 21/27] test: more debugging --- testinfra/test_ami_nix.py | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/testinfra/test_ami_nix.py b/testinfra/test_ami_nix.py index c57acedae..d73f6b5e4 100644 --- a/testinfra/test_ami_nix.py +++ b/testinfra/test_ami_nix.py @@ -302,7 +302,13 @@ def is_healthy(host, instance_ip, ssh_identity_file) -> bool: # Then check if the socket directory exists and has correct permissions h.run("sudo ls -la /run/postgresql"), # Then try pg_isready - h.run("sudo -u postgres /usr/bin/pg_isready -U postgres") + h.run("sudo -u postgres /usr/bin/pg_isready -U postgres"), + # Check Nix profile setup + h.run("echo 'Check Nix profile setup'"), + h.run("sudo -u postgres ls -la /home/postgres/.nix-profile"), + h.run("sudo -u postgres ls -la /home/postgres/.nix-profile/bin"), + h.run("sudo -u postgres test -x /home/postgres/.nix-profile/bin/switch_pg_cron_version"), + h.run("sudo -u postgres cat /home/postgres/.nix-profile/bin/switch_pg_cron_version") ), ), ( From 4e5c3f4e1959d6c964d9f35d2c92bc4eadb62beb Mon Sep 17 00:00:00 2001 From: Sam Rose Date: Mon, 28 Apr 2025 16:00:19 -0400 Subject: [PATCH 22/27] test: move logging here --- testinfra/test_ami_nix.py | 22 +++++++++++++++------- 1 file changed, 15 insertions(+), 7 deletions(-) diff --git a/testinfra/test_ami_nix.py b/testinfra/test_ami_nix.py index d73f6b5e4..d26acb3d6 100644 --- a/testinfra/test_ami_nix.py +++ b/testinfra/test_ami_nix.py @@ -302,13 +302,7 @@ def is_healthy(host, instance_ip, ssh_identity_file) -> bool: # Then check if the socket directory exists and has correct permissions h.run("sudo ls -la /run/postgresql"), # Then try pg_isready - h.run("sudo -u postgres /usr/bin/pg_isready -U postgres"), - # Check Nix profile setup - h.run("echo 'Check Nix profile setup'"), - h.run("sudo -u postgres ls -la /home/postgres/.nix-profile"), - h.run("sudo -u postgres ls -la /home/postgres/.nix-profile/bin"), - h.run("sudo -u postgres test -x /home/postgres/.nix-profile/bin/switch_pg_cron_version"), - h.run("sudo -u postgres cat /home/postgres/.nix-profile/bin/switch_pg_cron_version") + h.run("sudo -u postgres /usr/bin/pg_isready -U postgres") ), ), ( @@ -339,6 +333,20 @@ def is_healthy(host, instance_ip, ssh_identity_file) -> bool: # For PostgreSQL, we need to check multiple things systemd_status, socket_check, pg_isready = check(host) + # Log Nix profile setup checks + logger.info("Checking Nix profile setup:") + nix_profile_result = host.run("sudo -u postgres ls -la /home/postgres/.nix-profile") + logger.info(f"Nix profile directory:\n{nix_profile_result.stdout}\n{nix_profile_result.stderr}") + + nix_bin_result = host.run("sudo -u postgres ls -la /home/postgres/.nix-profile/bin") + logger.info(f"Nix profile bin directory:\n{nix_bin_result.stdout}\n{nix_bin_result.stderr}") + + nix_script_result = host.run("sudo -u postgres test -x /home/postgres/.nix-profile/bin/switch_pg_cron_version") + logger.info(f"Switch script executable check: {'success' if not nix_script_result.failed else 'failed'}") + + nix_script_output = host.run("sudo -u postgres /home/postgres/.nix-profile/bin/switch_pg_cron_version") + logger.info(f"Switch script output:\n{nix_script_output.stdout}\n{nix_script_output.stderr}") + if systemd_status.failed: logger.error("PostgreSQL systemd service is not active") logger.error(f"systemd status: {systemd_status.stdout}") From 4ab837e634c34e82076be67883dfbb2e69644dc3 Mon Sep 17 00:00:00 2001 From: Sam Rose Date: Mon, 28 Apr 2025 17:38:55 -0400 Subject: [PATCH 23/27] test: try direct --- testinfra/test_ami_nix.py | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/testinfra/test_ami_nix.py b/testinfra/test_ami_nix.py index d26acb3d6..a399eb7dc 100644 --- a/testinfra/test_ami_nix.py +++ b/testinfra/test_ami_nix.py @@ -335,38 +335,38 @@ def is_healthy(host, instance_ip, ssh_identity_file) -> bool: # Log Nix profile setup checks logger.info("Checking Nix profile setup:") - nix_profile_result = host.run("sudo -u postgres ls -la /home/postgres/.nix-profile") + nix_profile_result = host.run("ls -la /home/postgres/.nix-profile") logger.info(f"Nix profile directory:\n{nix_profile_result.stdout}\n{nix_profile_result.stderr}") - nix_bin_result = host.run("sudo -u postgres ls -la /home/postgres/.nix-profile/bin") + nix_bin_result = host.run("ls -la /home/postgres/.nix-profile/bin") logger.info(f"Nix profile bin directory:\n{nix_bin_result.stdout}\n{nix_bin_result.stderr}") - nix_script_result = host.run("sudo -u postgres test -x /home/postgres/.nix-profile/bin/switch_pg_cron_version") + nix_script_result = host.run("test -x /home/postgres/.nix-profile/bin/switch_pg_cron_version") logger.info(f"Switch script executable check: {'success' if not nix_script_result.failed else 'failed'}") - nix_script_output = host.run("sudo -u postgres /home/postgres/.nix-profile/bin/switch_pg_cron_version") + nix_script_output = host.run("/home/postgres/.nix-profile/bin/switch_pg_cron_version") logger.info(f"Switch script output:\n{nix_script_output.stdout}\n{nix_script_output.stderr}") if systemd_status.failed: logger.error("PostgreSQL systemd service is not active") logger.error(f"systemd status: {systemd_status.stdout}") logger.error(f"systemd error: {systemd_status.stderr}") - + # Check systemd service unit file logger.error("PostgreSQL systemd service unit file:") result = host.run("sudo systemctl cat postgresql") logger.error(f"service unit file:\n{result.stdout}\n{result.stderr}") - + # Check systemd service environment logger.error("PostgreSQL systemd service environment:") result = host.run("sudo systemctl show postgresql") logger.error(f"service environment:\n{result.stdout}\n{result.stderr}") - + # Check systemd service dependencies logger.error("PostgreSQL systemd service dependencies:") result = host.run("sudo systemctl list-dependencies postgresql") logger.error(f"service dependencies:\n{result.stdout}\n{result.stderr}") - + # Check if service is enabled logger.error("PostgreSQL service enabled status:") result = host.run("sudo systemctl is-enabled postgresql") From b8c9e3175cfcec80736312533a6aab22fc2c9b92 Mon Sep 17 00:00:00 2001 From: Sam Rose Date: Mon, 28 Apr 2025 18:19:07 -0400 Subject: [PATCH 24/27] test: use the right alias on machine --- .github/workflows/testinfra-nix.yml | 4 +- ansible/files/postgres_prestart.sh.j2 | 4 +- testinfra/test_ami_nix.py | 152 ++++++++------------------ 3 files changed, 51 insertions(+), 109 deletions(-) diff --git a/.github/workflows/testinfra-nix.yml b/.github/workflows/testinfra-nix.yml index 2b07e716f..8845e954a 100644 --- a/.github/workflows/testinfra-nix.yml +++ b/.github/workflows/testinfra-nix.yml @@ -108,13 +108,13 @@ jobs: df -h / # Display available space - name: Run tests - timeout-minutes: 10 + timeout-minutes: 30 env: AMI_NAME: "supabase-postgres-${{ steps.random.outputs.random_string }}" run: | # TODO: use poetry for pkg mgmt pip3 install boto3 boto3-stubs[essential] docker ec2instanceconnectcli pytest pytest-testinfra[paramiko,docker] requests - pytest -vv -s testinfra/test_ami_nix.py + pytest -vvvv -s testinfra/test_ami_nix.py - name: Cleanup resources on build cancellation if: ${{ cancelled() }} diff --git a/ansible/files/postgres_prestart.sh.j2 b/ansible/files/postgres_prestart.sh.j2 index a045f298f..40e8debd7 100644 --- a/ansible/files/postgres_prestart.sh.j2 +++ b/ansible/files/postgres_prestart.sh.j2 @@ -41,7 +41,7 @@ get_pg_cron_version() { fi local version - version=$(sudo -u postgres /home/postgres/.nix-profile/bin/jq -r '.pg_cron // empty' "/root/pg_extensions.json") + version=$(sudo -u postgres /var/lib/postgresql/.nix-profile/bin/jq -r '.pg_cron // empty' "/root/pg_extensions.json") if [ -z "$version" ]; then echo "pg_cron: Not specified in extensions file" return @@ -57,7 +57,7 @@ get_pg_cron_version() { switch_pg_cron_version() { local version="$1" - local switch_script="/home/postgres/.nix-profile/bin/switch_pg_cron_version" + local switch_script="/var/lib/postgresql/.nix-profile/bin/switch_pg_cron_version" if [ ! -x "$switch_script" ]; then echo "pg_cron: No version switch script available" diff --git a/testinfra/test_ami_nix.py b/testinfra/test_ami_nix.py index a399eb7dc..7d7f73ea8 100644 --- a/testinfra/test_ami_nix.py +++ b/testinfra/test_ami_nix.py @@ -292,16 +292,48 @@ def get_ssh_connection(instance_ip, ssh_identity_file, max_retries=10): temp_key.get_priv_key_file(), ) + def run_detailed_checks(host): + logger.info("Running detailed system checks...") + + # Log Nix profile setup checks + logger.info("Checking Nix profile setup:") + nix_profile_result = host.run("ls -la /var/lib/postgresql/.nix-profile") + logger.info(f"Nix profile directory:\n{nix_profile_result.stdout}\n{nix_profile_result.stderr}") + + nix_bin_result = host.run("ls -la /var/lib/postgresql/.nix-profile/bin") + logger.info(f"Nix profile bin directory:\n{nix_bin_result.stdout}\n{nix_bin_result.stderr}") + + # Check PostgreSQL logs directory + logger.info("Checking PostgreSQL logs directory:") + result = host.run("sudo ls -la /var/log/postgresql/") + logger.info(f"log directory contents:\n{result.stdout}\n{result.stderr}") + + # Check any existing PostgreSQL logs + logger.info("Checking existing PostgreSQL logs:") + result = host.run("sudo cat /var/log/postgresql/*.log") + logger.info(f"postgresql logs:\n{result.stdout}\n{result.stderr}") + + # Try starting PostgreSQL directly with pg_ctl and capture output + logger.info("Attempting to start PostgreSQL directly with pg_ctl:") + startup_log = "/tmp/postgres-start.log" + result = host.run(f"sudo -u postgres /usr/lib/postgresql/bin/pg_ctl -D /var/lib/postgresql/data start -l {startup_log}") + logger.info(f"pg_ctl start attempt:\n{result.stdout}\n{result.stderr}") + + # Check the startup log + logger.info("PostgreSQL startup log:") + result = host.run(f"sudo cat {startup_log}") + logger.info(f"startup log contents:\n{result.stdout}\n{result.stderr}") + + # Check PostgreSQL environment + logger.info("PostgreSQL environment:") + result = host.run("sudo -u postgres env | grep POSTGRES") + logger.info(f"postgres environment:\n{result.stdout}\n{result.stderr}") + def is_healthy(host, instance_ip, ssh_identity_file) -> bool: health_checks = [ ( "postgres", lambda h: ( - # First check if PostgreSQL is running - h.run("sudo systemctl is-active postgresql"), - # Then check if the socket directory exists and has correct permissions - h.run("sudo ls -la /run/postgresql"), - # Then try pg_isready h.run("sudo -u postgres /usr/bin/pg_isready -U postgres") ), ), @@ -333,114 +365,24 @@ def is_healthy(host, instance_ip, ssh_identity_file) -> bool: # For PostgreSQL, we need to check multiple things systemd_status, socket_check, pg_isready = check(host) - # Log Nix profile setup checks - logger.info("Checking Nix profile setup:") - nix_profile_result = host.run("ls -la /home/postgres/.nix-profile") - logger.info(f"Nix profile directory:\n{nix_profile_result.stdout}\n{nix_profile_result.stderr}") - - nix_bin_result = host.run("ls -la /home/postgres/.nix-profile/bin") - logger.info(f"Nix profile bin directory:\n{nix_bin_result.stdout}\n{nix_bin_result.stderr}") - - nix_script_result = host.run("test -x /home/postgres/.nix-profile/bin/switch_pg_cron_version") - logger.info(f"Switch script executable check: {'success' if not nix_script_result.failed else 'failed'}") - - nix_script_output = host.run("/home/postgres/.nix-profile/bin/switch_pg_cron_version") - logger.info(f"Switch script output:\n{nix_script_output.stdout}\n{nix_script_output.stderr}") - if systemd_status.failed: logger.error("PostgreSQL systemd service is not active") logger.error(f"systemd status: {systemd_status.stdout}") logger.error(f"systemd error: {systemd_status.stderr}") - - # Check systemd service unit file - logger.error("PostgreSQL systemd service unit file:") - result = host.run("sudo systemctl cat postgresql") - logger.error(f"service unit file:\n{result.stdout}\n{result.stderr}") - - # Check systemd service environment - logger.error("PostgreSQL systemd service environment:") - result = host.run("sudo systemctl show postgresql") - logger.error(f"service environment:\n{result.stdout}\n{result.stderr}") - - # Check systemd service dependencies - logger.error("PostgreSQL systemd service dependencies:") - result = host.run("sudo systemctl list-dependencies postgresql") - logger.error(f"service dependencies:\n{result.stdout}\n{result.stderr}") - - # Check if service is enabled - logger.error("PostgreSQL service enabled status:") - result = host.run("sudo systemctl is-enabled postgresql") - logger.error(f"service enabled status:\n{result.stdout}\n{result.stderr}") - - # Check systemd journal for service execution logs - logger.error("Systemd journal entries for PostgreSQL service execution:") - result = host.run("sudo journalctl -u postgresql -n 100 --no-pager") - logger.error(f"systemd journal:\n{result.stdout}\n{result.stderr}") - - # Check systemd journal specifically for ExecStartPre and ExecStart - logger.error("Systemd journal entries for ExecStartPre and ExecStart:") - result = host.run("sudo journalctl -u postgresql -n 100 --no-pager | grep -E 'ExecStartPre|ExecStart'") - logger.error(f"execution logs:\n{result.stdout}\n{result.stderr}") - - # Check systemd journal for any errors - logger.error("Systemd journal entries with error level:") - result = host.run("sudo journalctl -u postgresql -n 100 --no-pager -p err") - logger.error(f"error logs:\n{result.stdout}\n{result.stderr}") - # Check pre-start script output - logger.error("Checking pre-start script output:") - result = host.run("sudo -u postgres /usr/local/bin/postgres_prestart.sh") - logger.error(f"pre-start script output:\n{result.stdout}\n{result.stderr}") - - # Check PostgreSQL logs directory - logger.error("Checking PostgreSQL logs directory:") - result = host.run("sudo ls -la /var/log/postgresql/") - logger.error(f"log directory contents:\n{result.stdout}\n{result.stderr}") - - # Check any existing PostgreSQL logs - logger.error("Checking existing PostgreSQL logs:") - result = host.run("sudo cat /var/log/postgresql/*.log") - logger.error(f"postgresql logs:\n{result.stdout}\n{result.stderr}") - - # Try starting PostgreSQL directly with pg_ctl and capture output - logger.error("Attempting to start PostgreSQL directly with pg_ctl:") - startup_log = "/tmp/postgres-start.log" - result = host.run(f"sudo -u postgres /usr/lib/postgresql/bin/pg_ctl -D /var/lib/postgresql/data start -l {startup_log}") - logger.error(f"pg_ctl start attempt:\n{result.stdout}\n{result.stderr}") - - # Check the startup log - logger.error("PostgreSQL startup log:") - result = host.run(f"sudo cat {startup_log}") - logger.error(f"startup log contents:\n{result.stdout}\n{result.stderr}") - - # Clean up the startup log - result = host.run(f"sudo rm -f {startup_log}") - - # Check PostgreSQL configuration - logger.error("PostgreSQL configuration:") - result = host.run("sudo cat /etc/postgresql/postgresql.conf") - logger.error(f"postgresql.conf:\n{result.stdout}\n{result.stderr}") - - # Check PostgreSQL authentication configuration - logger.error("PostgreSQL authentication configuration:") - result = host.run("sudo cat /etc/postgresql/pg_hba.conf") - logger.error(f"pg_hba.conf:\n{result.stdout}\n{result.stderr}") - - # Check PostgreSQL environment - logger.error("PostgreSQL environment:") - result = host.run("sudo -u postgres env | grep POSTGRES") - logger.error(f"postgres environment:\n{result.stdout}\n{result.stderr}") + # Run detailed checks since we know we have a working connection + run_detailed_checks(host) if any(cmd.failed for cmd in [systemd_status, socket_check, pg_isready]): return False - else: - cmd = check(host) - if cmd.failed is True: - logger.warning(f"{service} not ready") - logger.error(f"{service} command failed with rc={cmd.rc}") - logger.error(f"{service} stdout: {cmd.stdout}") - logger.error(f"{service} stderr: {cmd.stderr}") - return False + else: + cmd = check(host) + if cmd.failed is True: + logger.warning(f"{service} not ready") + logger.error(f"{service} command failed with rc={cmd.rc}") + logger.error(f"{service} stdout: {cmd.stdout}") + logger.error(f"{service} stderr: {cmd.stderr}") + return False except Exception as e: logger.warning( f"Connection failed during {service} check, attempting reconnect..." From 65ef0692547b2ef4ac76f3251bbf90bfe40bebea Mon Sep 17 00:00:00 2001 From: Sam Rose Date: Mon, 28 Apr 2025 21:34:12 -0400 Subject: [PATCH 25/27] test: do not unpack result --- testinfra/test_ami_nix.py | 14 ++++++-------- 1 file changed, 6 insertions(+), 8 deletions(-) diff --git a/testinfra/test_ami_nix.py b/testinfra/test_ami_nix.py index 7d7f73ea8..fce6aa83f 100644 --- a/testinfra/test_ami_nix.py +++ b/testinfra/test_ami_nix.py @@ -363,18 +363,16 @@ def is_healthy(host, instance_ip, ssh_identity_file) -> bool: try: if service == "postgres": # For PostgreSQL, we need to check multiple things - systemd_status, socket_check, pg_isready = check(host) + pg_isready = check(host) - if systemd_status.failed: - logger.error("PostgreSQL systemd service is not active") - logger.error(f"systemd status: {systemd_status.stdout}") - logger.error(f"systemd error: {systemd_status.stderr}") + if pg_isready.failed: + logger.error("PostgreSQL is not ready") + logger.error(f"pg_isready stdout: {pg_isready.stdout}") + logger.error(f"pg_isready stderr: {pg_isready.stderr}") # Run detailed checks since we know we have a working connection run_detailed_checks(host) - - if any(cmd.failed for cmd in [systemd_status, socket_check, pg_isready]): - return False + return False else: cmd = check(host) if cmd.failed is True: From c2631e8ca1b9f28374b49c664297ba0b9cccce47 Mon Sep 17 00:00:00 2001 From: Sam Rose Date: Tue, 29 Apr 2025 10:42:39 -0400 Subject: [PATCH 26/27] test: reorg and print logs while waiting continue on other checks when ready --- testinfra/test_ami_nix.py | 40 +++++++++++++++++++++++++-------------- 1 file changed, 26 insertions(+), 14 deletions(-) diff --git a/testinfra/test_ami_nix.py b/testinfra/test_ami_nix.py index fce6aa83f..9a319474f 100644 --- a/testinfra/test_ami_nix.py +++ b/testinfra/test_ami_nix.py @@ -313,12 +313,6 @@ def run_detailed_checks(host): result = host.run("sudo cat /var/log/postgresql/*.log") logger.info(f"postgresql logs:\n{result.stdout}\n{result.stderr}") - # Try starting PostgreSQL directly with pg_ctl and capture output - logger.info("Attempting to start PostgreSQL directly with pg_ctl:") - startup_log = "/tmp/postgres-start.log" - result = host.run(f"sudo -u postgres /usr/lib/postgresql/bin/pg_ctl -D /var/lib/postgresql/data start -l {startup_log}") - logger.info(f"pg_ctl start attempt:\n{result.stdout}\n{result.stderr}") - # Check the startup log logger.info("PostgreSQL startup log:") result = host.run(f"sudo cat {startup_log}") @@ -362,17 +356,35 @@ def is_healthy(host, instance_ip, ssh_identity_file) -> bool: for service, check in health_checks: try: if service == "postgres": - # For PostgreSQL, we need to check multiple things pg_isready = check(host) - if pg_isready.failed: - logger.error("PostgreSQL is not ready") - logger.error(f"pg_isready stdout: {pg_isready.stdout}") - logger.error(f"pg_isready stderr: {pg_isready.stderr}") + # Always read and log the PostgreSQL logs first + logger.warning("PostgreSQL status check:") + try: + # Read both .log and .csv files + log_files = [ + "/var/log/postgresql/*.log", + "/var/log/postgresql/*.csv" + ] - # Run detailed checks since we know we have a working connection - run_detailed_checks(host) - return False + for log_pattern in log_files: + log_result = host.run(f"sudo cat {log_pattern}") + if not log_result.failed: + logger.error(f"PostgreSQL logs from {log_pattern}:") + logger.error(log_result.stdout) + if log_result.stderr: + logger.error(f"Log read errors: {log_result.stderr}") + else: + logger.error(f"Failed to read PostgreSQL logs from {log_pattern}: {log_result.stderr}") + except Exception as e: + logger.error(f"Error reading PostgreSQL logs: {str(e)}") + + # Then check the status and return + if not pg_isready.failed: + continue + # Wait before next attempt + sleep(5) + return False else: cmd = check(host) if cmd.failed is True: From 164752bd8be38e7c1378b45b4dfb77fb05b63ea3 Mon Sep 17 00:00:00 2001 From: Sam Rose Date: Tue, 29 Apr 2025 12:15:43 -0400 Subject: [PATCH 27/27] test: restructure checks to avoid race --- testinfra/test_ami_nix.py | 48 ++++++++++++++++++++++++--------------- 1 file changed, 30 insertions(+), 18 deletions(-) diff --git a/testinfra/test_ami_nix.py b/testinfra/test_ami_nix.py index 9a319474f..66546da60 100644 --- a/testinfra/test_ami_nix.py +++ b/testinfra/test_ami_nix.py @@ -323,7 +323,9 @@ def run_detailed_checks(host): result = host.run("sudo -u postgres env | grep POSTGRES") logger.info(f"postgres environment:\n{result.stdout}\n{result.stderr}") - def is_healthy(host, instance_ip, ssh_identity_file) -> bool: + def is_healthy(host, instance_ip, ssh_identity_file) -> tuple[bool, dict]: + service_status = {} # Track status of each service + health_checks = [ ( "postgres", @@ -358,10 +360,9 @@ def is_healthy(host, instance_ip, ssh_identity_file) -> bool: if service == "postgres": pg_isready = check(host) - # Always read and log the PostgreSQL logs first + # Always read and log the PostgreSQL logs logger.warning("PostgreSQL status check:") try: - # Read both .log and .csv files log_files = [ "/var/log/postgresql/*.log", "/var/log/postgresql/*.csv" @@ -379,37 +380,48 @@ def is_healthy(host, instance_ip, ssh_identity_file) -> bool: except Exception as e: logger.error(f"Error reading PostgreSQL logs: {str(e)}") - # Then check the status and return - if not pg_isready.failed: - continue - # Wait before next attempt - sleep(5) - return False + service_status[service] = not pg_isready.failed + else: cmd = check(host) - if cmd.failed is True: + service_status[service] = not cmd.failed + if cmd.failed: logger.warning(f"{service} not ready") logger.error(f"{service} command failed with rc={cmd.rc}") logger.error(f"{service} stdout: {cmd.stdout}") logger.error(f"{service} stderr: {cmd.stderr}") - return False + except Exception as e: - logger.warning( - f"Connection failed during {service} check, attempting reconnect..." - ) + logger.warning(f"Connection failed during {service} check, attempting reconnect...") logger.error(f"Error details: {str(e)}") host = get_ssh_connection(instance_ip, ssh_identity_file) - return False + service_status[service] = False + + # Log overall status of all services + logger.info("Service health status:") + for service, healthy in service_status.items(): + logger.info(f"{service}: {'healthy' if healthy else 'unhealthy'}") - return True + # If any service is unhealthy, wait and return False with status + if not all(service_status.values()): + if service_status.get("postgres", False): # If postgres is healthy but others aren't + sleep(5) # Only wait if postgres is up but other services aren't + logger.warning("Some services are not healthy, will retry...") + return False, service_status + + logger.info("All services are healthy, proceeding to tests...") + return True, service_status while True: - if is_healthy( + healthy, status = is_healthy( host=host, instance_ip=instance.public_ip_address, ssh_identity_file=temp_key.get_priv_key_file(), - ): + ) + if healthy: + logger.info("Health check passed, starting tests...") break + logger.warning(f"Health check failed, service status: {status}") sleep(1) # return a testinfra connection to the instance