From dc7b8cf9b86cb0fb78dedab2080eed059c542adf Mon Sep 17 00:00:00 2001 From: Sam Rose Date: Tue, 15 Apr 2025 14:50:32 -0400 Subject: [PATCH 01/42] feat: multi-ext-versios-pgrcron --- nix/ext/pg_cron.nix | 128 +++++++++++++++++++++++++++++++++++++------- 1 file changed, 109 insertions(+), 19 deletions(-) diff --git a/nix/ext/pg_cron.nix b/nix/ext/pg_cron.nix index 792db7676..7b8c00e72 100644 --- a/nix/ext/pg_cron.nix +++ b/nix/ext/pg_cron.nix @@ -1,31 +1,121 @@ { lib, stdenv, fetchFromGitHub, postgresql }: -stdenv.mkDerivation rec { - pname = "pg_cron"; - version = "1.6.4"; +let + allVersions = { + "1.3.1" = { + rev = "v1.3.1"; + hash = "sha256-rXotNOtQNmA55ErNxGoNSKZ0pP1uxEVlDGITFHuqGG4="; + postPatch = '' + # Add necessary includes + substituteInPlace src/pg_cron.c \ + --replace '#include "postgres.h"' '#include "postgres.h" + #include "commands/async.h" + #include "miscadmin.h"' - buildInputs = [ postgresql ]; + # Update function calls to use PostgreSQL 15 APIs + substituteInPlace src/pg_cron.c \ + --replace 'ProcessCompletedNotifies();' '/* ProcessCompletedNotifies removed */' \ + --replace 'pg_analyze_and_rewrite(parsetree, sql, NULL, 0,NULL);' 'pg_analyze_and_rewrite_fixedparams(parsetree, sql, NULL, 0, NULL);' + ''; + }; + "1.4.2" = { + rev = "v1.4.2"; + hash = "sha256-P0Fd10Q1p+KrExb35G6otHpc6pD61WnMll45H2jkevM="; + }; + "1.6.4" = { + rev = "v1.6.4"; + hash = "sha256-t1DpFkPiSfdoGG2NgNT7g1lkvSooZoRoUrix6cBID40="; + }; + "1.5.2" = { + rev = "v1.5.2"; + hash = "sha256-+quVWbKJy6wXpL/zwTk5FF7sYwHA7I97WhWmPO/HSZ4="; + }; + }; + + mkPgCron = pgCronVersion: { rev, hash, postPatch ? "" }: stdenv.mkDerivation { + pname = "pg_cron"; + version = "${pgCronVersion}-pg${lib.versions.major postgresql.version}"; + + buildInputs = [ postgresql ]; + inherit postPatch; + + src = fetchFromGitHub { + owner = "citusdata"; + repo = "pg_cron"; + inherit rev hash; + }; + + buildPhase = '' + make PG_CONFIG=${postgresql}/bin/pg_config + + # Create version-specific SQL file + cp pg_cron.sql pg_cron--${pgCronVersion}.sql + + # Create versioned control file with modified module path + sed -e "/^default_version =/d" \ + -e "s|^module_pathname = .*|module_pathname = '\$libdir/pg_cron'|" \ + pg_cron.control > pg_cron--${pgCronVersion}.control + ''; - src = fetchFromGitHub { - owner = "citusdata"; - repo = pname; - rev = "v${version}"; - hash = "sha256-t1DpFkPiSfdoGG2NgNT7g1lkvSooZoRoUrix6cBID40="; + installPhase = '' + mkdir -p $out/{lib,share/postgresql/extension} + + # Install versioned library + install -Dm755 pg_cron${postgresql.dlSuffix} $out/lib/pg_cron-${pgCronVersion}${postgresql.dlSuffix} + + # Install version-specific files + install -Dm644 pg_cron--${pgCronVersion}.sql $out/share/postgresql/extension/ + install -Dm644 pg_cron--${pgCronVersion}.control $out/share/postgresql/extension/ + + # Install upgrade scripts + find . -name 'pg_cron--*--*.sql' -exec install -Dm644 {} $out/share/postgresql/extension/ \; + ''; }; + getVersions = pg: + if lib.versionAtLeast pg.version "17" + then { "1.6.4" = allVersions."1.6.4"; } + else allVersions; + + allVersionsForPg = lib.mapAttrs mkPgCron (getVersions postgresql); + +in +stdenv.mkDerivation { + pname = "pg_cron-all"; + version = "multi"; + + buildInputs = lib.attrValues allVersionsForPg; + + dontUnpack = true; + dontConfigure = true; + dontBuild = true; + installPhase = '' mkdir -p $out/{lib,share/postgresql/extension} - - cp *${postgresql.dlSuffix} $out/lib - cp *.sql $out/share/postgresql/extension - cp *.control $out/share/postgresql/extension + + # Install all versions + for drv in ${lib.concatStringsSep " " (lib.attrValues allVersionsForPg)}; do + ln -sv $drv/lib/* $out/lib/ + cp -v --no-clobber $drv/share/postgresql/extension/* $out/share/postgresql/extension/ || true + done + + # Create default symlinks + latest_control=$(ls -v $out/share/postgresql/extension/pg_cron--*.control | tail -n1) + latest_version=$(basename "$latest_control" | sed -E 's/pg_cron--([0-9.]+).control/\1/') + + # Create main control file with default_version + echo "default_version = '$latest_version'" > $out/share/postgresql/extension/pg_cron.control + cat "$latest_control" >> $out/share/postgresql/extension/pg_cron.control + + # Library symlink + ln -sfnv pg_cron-$latest_version${postgresql.dlSuffix} $out/lib/pg_cron${postgresql.dlSuffix} ''; meta = with lib; { - description = "Run Cron jobs through PostgreSQL"; - homepage = "https://github.com/citusdata/pg_cron"; - changelog = "https://github.com/citusdata/pg_cron/raw/v${version}/CHANGELOG.md"; - platforms = postgresql.meta.platforms; - license = licenses.postgresql; + description = "Run Cron jobs through PostgreSQL (multi-version compatible)"; + homepage = "https://github.com/citusdata/pg_cron"; + maintainers = with maintainers; [ samrose ]; + platforms = postgresql.meta.platforms; + license = licenses.postgresql; }; -} +} \ No newline at end of file From 278f67b06d0bcc2bdaad79c0703edc3cf77f6198 Mon Sep 17 00:00:00 2001 From: Sam Rose Date: Tue, 15 Apr 2025 15:33:34 -0400 Subject: [PATCH 02/42] feat: add version to drv and patch instead of postPatch rewrite --- nix/ext/pg_cron-1.3.1-pg15.patch | 31 +++++++++++++++++++++++++++++++ nix/ext/pg_cron.nix | 19 ++++--------------- 2 files changed, 35 insertions(+), 15 deletions(-) create mode 100644 nix/ext/pg_cron-1.3.1-pg15.patch diff --git a/nix/ext/pg_cron-1.3.1-pg15.patch b/nix/ext/pg_cron-1.3.1-pg15.patch new file mode 100644 index 000000000..d3b6cd702 --- /dev/null +++ b/nix/ext/pg_cron-1.3.1-pg15.patch @@ -0,0 +1,31 @@ +diff --git a/src/pg_cron.c b/src/pg_cron.c +index e0ca973..4d51b2c 100644 +--- a/src/pg_cron.c ++++ b/src/pg_cron.c +@@ -14,6 +14,8 @@ + #include + + #include "postgres.h" ++#include "commands/async.h" ++#include "miscadmin.h" + #include "fmgr.h" + + /* these are always necessary for a bgworker */ +@@ -1908,7 +1910,7 @@ CronBackgroundWorker(Datum main_arg) + /* Post-execution cleanup. */ + disable_timeout(STATEMENT_TIMEOUT, false); + CommitTransactionCommand(); +- ProcessCompletedNotifies(); ++ /* ProcessCompletedNotifies removed */ + pgstat_report_activity(STATE_IDLE, command); + pgstat_report_stat(true); + +@@ -2025,7 +2027,7 @@ ExecuteSqlString(const char *sql) + */ + oldcontext = MemoryContextSwitchTo(parsecontext); + #if PG_VERSION_NUM >= 100000 +- querytree_list = pg_analyze_and_rewrite(parsetree, sql, NULL, 0,NULL); ++ querytree_list = pg_analyze_and_rewrite_fixedparams(parsetree, sql, NULL, 0, NULL); + #else + querytree_list = pg_analyze_and_rewrite(parsetree, sql, NULL, 0); + #endif diff --git a/nix/ext/pg_cron.nix b/nix/ext/pg_cron.nix index 7b8c00e72..a59cb5ae5 100644 --- a/nix/ext/pg_cron.nix +++ b/nix/ext/pg_cron.nix @@ -5,18 +5,7 @@ let "1.3.1" = { rev = "v1.3.1"; hash = "sha256-rXotNOtQNmA55ErNxGoNSKZ0pP1uxEVlDGITFHuqGG4="; - postPatch = '' - # Add necessary includes - substituteInPlace src/pg_cron.c \ - --replace '#include "postgres.h"' '#include "postgres.h" - #include "commands/async.h" - #include "miscadmin.h"' - - # Update function calls to use PostgreSQL 15 APIs - substituteInPlace src/pg_cron.c \ - --replace 'ProcessCompletedNotifies();' '/* ProcessCompletedNotifies removed */' \ - --replace 'pg_analyze_and_rewrite(parsetree, sql, NULL, 0,NULL);' 'pg_analyze_and_rewrite_fixedparams(parsetree, sql, NULL, 0, NULL);' - ''; + patches = [ ./pg_cron-1.3.1-pg15.patch ]; }; "1.4.2" = { rev = "v1.4.2"; @@ -32,12 +21,12 @@ let }; }; - mkPgCron = pgCronVersion: { rev, hash, postPatch ? "" }: stdenv.mkDerivation { + mkPgCron = pgCronVersion: { rev, hash, patches ? [] }: stdenv.mkDerivation { pname = "pg_cron"; version = "${pgCronVersion}-pg${lib.versions.major postgresql.version}"; buildInputs = [ postgresql ]; - inherit postPatch; + inherit patches; src = fetchFromGitHub { owner = "citusdata"; @@ -82,7 +71,7 @@ let in stdenv.mkDerivation { pname = "pg_cron-all"; - version = "multi"; + version = "multi-001"; #increment this if you change this package in any way buildInputs = lib.attrValues allVersionsForPg; From b53ab41a6c2c45ecc56284478236538050ee22a8 Mon Sep 17 00:00:00 2001 From: Sam Rose Date: Tue, 15 Apr 2025 15:34:48 -0400 Subject: [PATCH 03/42] chore: newline --- nix/ext/pg_cron.nix | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/nix/ext/pg_cron.nix b/nix/ext/pg_cron.nix index a59cb5ae5..a134dfb5e 100644 --- a/nix/ext/pg_cron.nix +++ b/nix/ext/pg_cron.nix @@ -107,4 +107,4 @@ stdenv.mkDerivation { platforms = postgresql.meta.platforms; license = licenses.postgresql; }; -} \ No newline at end of file +} From 3e565a7141cbaac304033a8d47d62d9667b26acf Mon Sep 17 00:00:00 2001 From: Sam Rose Date: Tue, 15 Apr 2025 15:41:10 -0400 Subject: [PATCH 04/42] feat: auto create multi version --- nix/ext/pg_cron.nix | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/nix/ext/pg_cron.nix b/nix/ext/pg_cron.nix index a134dfb5e..b5e20732b 100644 --- a/nix/ext/pg_cron.nix +++ b/nix/ext/pg_cron.nix @@ -21,6 +21,9 @@ let }; }; + # Simple version string that concatenates all versions with dashes + versionString = "multi-" + lib.concatStringsSep "-" (map (v: lib.replaceStrings ["."] ["-"] v) (lib.attrNames allVersions)); + mkPgCron = pgCronVersion: { rev, hash, patches ? [] }: stdenv.mkDerivation { pname = "pg_cron"; version = "${pgCronVersion}-pg${lib.versions.major postgresql.version}"; @@ -71,7 +74,7 @@ let in stdenv.mkDerivation { pname = "pg_cron-all"; - version = "multi-001"; #increment this if you change this package in any way + version = versionString; buildInputs = lib.attrValues allVersionsForPg; From d48f7b7b2c0f52945d6311cde8715b1220e81453 Mon Sep 17 00:00:00 2001 From: Sam Rose Date: Tue, 15 Apr 2025 15:42:27 -0400 Subject: [PATCH 05/42] chore: do not re-intro maintainers here not needed --- nix/ext/pg_cron.nix | 1 - 1 file changed, 1 deletion(-) diff --git a/nix/ext/pg_cron.nix b/nix/ext/pg_cron.nix index b5e20732b..d42cff367 100644 --- a/nix/ext/pg_cron.nix +++ b/nix/ext/pg_cron.nix @@ -106,7 +106,6 @@ stdenv.mkDerivation { meta = with lib; { description = "Run Cron jobs through PostgreSQL (multi-version compatible)"; homepage = "https://github.com/citusdata/pg_cron"; - maintainers = with maintainers; [ samrose ]; platforms = postgresql.meta.platforms; license = licenses.postgresql; }; From dbac6325164e54244ecf88ad28be71939412c821 Mon Sep 17 00:00:00 2001 From: Sam Rose Date: Thu, 24 Apr 2025 11:22:15 -0400 Subject: [PATCH 06/42] feat: pg_cron version switcher in pkg and prestart --- ansible/files/postgres_prestart.sh.j2 | 55 +++++++++++++++++++++++++ ansible/tasks/stage2-setup-postgres.yml | 6 +++ nix/ext/pg_cron.nix | 32 +++++++++++++- 3 files changed, 91 insertions(+), 2 deletions(-) diff --git a/ansible/files/postgres_prestart.sh.j2 b/ansible/files/postgres_prestart.sh.j2 index 3ffe54c85..a045f298f 100644 --- a/ansible/files/postgres_prestart.sh.j2 +++ b/ansible/files/postgres_prestart.sh.j2 @@ -26,7 +26,62 @@ update_orioledb_buffers() { fi } +check_extensions_file() { + local extensions_file="/root/pg_extensions.json" + if [ ! -f "$extensions_file" ]; then + echo "extensions: No extensions file found, skipping extensions versions check" + return 1 + fi + return 0 +} + +get_pg_cron_version() { + if ! check_extensions_file; then + return + fi + + local version + version=$(sudo -u postgres /home/postgres/.nix-profile/bin/jq -r '.pg_cron // empty' "/root/pg_extensions.json") + if [ -z "$version" ]; then + echo "pg_cron: Not specified in extensions file" + return + fi + + if ! [[ "$version" =~ ^[0-9]+\.[0-9]+\.[0-9]+$ ]]; then + echo "pg_cron: Invalid version format: $version" + return + fi + + echo "$version" +} + +switch_pg_cron_version() { + local version="$1" + local switch_script="/home/postgres/.nix-profile/bin/switch_pg_cron_version" + + if [ ! -x "$switch_script" ]; then + echo "pg_cron: No version switch script available" + return + fi + + echo "pg_cron: Switching to version $version" + sudo -u postgres "$switch_script" "$version" + echo "pg_cron: Version switch completed" +} + +handle_pg_cron_version() { + local version + version=$(get_pg_cron_version) + if [ -n "$version" ]; then + switch_pg_cron_version "$version" + fi +} + main() { + # 1. pg_cron version handling + handle_pg_cron_version + + # 2. orioledb handling local has_orioledb=$(check_orioledb_enabled) if [ "$has_orioledb" -lt 1 ]; then return 0 diff --git a/ansible/tasks/stage2-setup-postgres.yml b/ansible/tasks/stage2-setup-postgres.yml index d3209fc04..77803a0a3 100644 --- a/ansible/tasks/stage2-setup-postgres.yml +++ b/ansible/tasks/stage2-setup-postgres.yml @@ -94,6 +94,12 @@ shell: | sudo -u postgres bash -c ". /nix/var/nix/profiles/default/etc/profile.d/nix-daemon.sh && nix profile install github:supabase/postgres/{{ git_commit_sha }}#{{postgresql_version}}_src" when: stage2_nix + +- name: Install jq from nix binary cache + become: yes + shell: | + sudo -u postgres bash -c ". /nix/var/nix/profiles/default/etc/profile.d/nix-daemon.sh && nix profile install github:supabase/postgres/{{ git_commit_sha }}#jq" + when: stage2_nix - name: Set ownership and permissions for /etc/ssl/private become: yes diff --git a/nix/ext/pg_cron.nix b/nix/ext/pg_cron.nix index d42cff367..3f438931e 100644 --- a/nix/ext/pg_cron.nix +++ b/nix/ext/pg_cron.nix @@ -50,7 +50,7 @@ let ''; installPhase = '' - mkdir -p $out/{lib,share/postgresql/extension} + mkdir -p $out/{lib,share/postgresql/extension,bin} # Install versioned library install -Dm755 pg_cron${postgresql.dlSuffix} $out/lib/pg_cron-${pgCronVersion}${postgresql.dlSuffix} @@ -83,7 +83,7 @@ stdenv.mkDerivation { dontBuild = true; installPhase = '' - mkdir -p $out/{lib,share/postgresql/extension} + mkdir -p $out/{lib,share/postgresql/extension,bin} # Install all versions for drv in ${lib.concatStringsSep " " (lib.attrValues allVersionsForPg)}; do @@ -101,6 +101,34 @@ stdenv.mkDerivation { # Library symlink ln -sfnv pg_cron-$latest_version${postgresql.dlSuffix} $out/lib/pg_cron${postgresql.dlSuffix} + + # Create version switcher script + cat > $out/bin/switch_pg_cron_version <<'EOF' + #!/bin/sh + set -e + + if [ $# -ne 1 ]; then + echo "Usage: $0 " + echo "Example: $0 1.4.2" + exit 1 + fi + + VERSION=$1 + LIB_DIR=$(dirname "$0")/../lib + + # Check if version exists + if [ ! -f "$LIB_DIR/pg_cron-$VERSION${postgresql.dlSuffix}" ]; then + echo "Error: Version $VERSION not found" + exit 1 + fi + + # Update library symlink + ln -sfnv "pg_cron-$VERSION${postgresql.dlSuffix}" "$LIB_DIR/pg_cron${postgresql.dlSuffix}" + + echo "Successfully switched pg_cron to version $VERSION" + EOF + + chmod +x $out/bin/switch_pg_cron_version ''; meta = with lib; { From adcc2e7a3d1ea793e412e1ebd31f7381acfee129 Mon Sep 17 00:00:00 2001 From: Sam Rose Date: Thu, 24 Apr 2025 12:42:21 -0400 Subject: [PATCH 07/42] test: a tmp test for this branch to test older versions --- .github/workflows/testinfra-ami-build.yml | 16 +++++++++++++++ testinfra/test_ami_nix.py | 24 +++++++++++++++++++++++ 2 files changed, 40 insertions(+) diff --git a/.github/workflows/testinfra-ami-build.yml b/.github/workflows/testinfra-ami-build.yml index 2b07e716f..9baa2e200 100644 --- a/.github/workflows/testinfra-ami-build.yml +++ b/.github/workflows/testinfra-ami-build.yml @@ -94,6 +94,22 @@ jobs: sudo rm -rf /tmp/* # Clean temporary files df -h / # Display available space + - name: Patch stage2-nix-psql.pkr.hcl to create pg_extensions.json + run: | + cat >> stage2-nix-psql.pkr.hcl << 'EOF' + # Add provisioner to create pg_extensions.json + provisioner "shell" { + inline = [ + "echo '{\"pg_cron\":\"1.3.1\"}' | sudo tee /root/pg_extensions.json", + "sudo chmod 644 /root/pg_extensions.json", + "echo 'Created pg_extensions.json with content:' && sudo cat /root/pg_extensions.json" + ] + } + EOF + # Display the modified file to verify + echo "Modified stage2-nix-psql.pkr.hcl:" + tail -n 10 stage2-nix-psql.pkr.hcl + - name: Build AMI stage 2 run: | packer init stage2-nix-psql.pkr.hcl diff --git a/testinfra/test_ami_nix.py b/testinfra/test_ami_nix.py index 1975818d6..832cf51cd 100644 --- a/testinfra/test_ami_nix.py +++ b/testinfra/test_ami_nix.py @@ -507,3 +507,27 @@ def test_postgrest_ending_empty_key_query_parameter_is_removed(host): }, ) assert res.ok + + +def test_pg_cron_extension(host): + # Connect as supabase_admin and create the extension + with host.sudo("postgres"): + result = host.run('psql -U supabase_admin -d postgres -c "CREATE EXTENSION pg_cron WITH SCHEMA pg_catalog VERSION \'1.3.1\';"') + assert result.rc == 0, f"Failed to create pg_cron extension: {result.stderr}" + + # Create test table + result = host.run('psql -U supabase_admin -d postgres -c "CREATE TABLE cron_test_log (id SERIAL PRIMARY KEY, message TEXT, log_time TIMESTAMP WITH TIME ZONE DEFAULT CURRENT_TIMESTAMP);"') + assert result.rc == 0, f"Failed to create test table: {result.stderr}" + + # Schedule a job + result = host.run('psql -U supabase_admin -d postgres -c "SELECT cron.schedule(\'* * * * *\', \'INSERT INTO cron_test_log (message) VALUES (\\\'Hello from pg_cron!\\\');\');"') + assert result.rc == 0, f"Failed to schedule job: {result.stderr}" + assert "1" in result.stdout, "Expected schedule ID 1" + + # Verify job is scheduled + result = host.run('psql -U supabase_admin -d postgres -c "SELECT * FROM cron.job;"') + assert result.rc == 0, f"Failed to query cron.job: {result.stderr}" + assert "* * * * *" in result.stdout, "Expected cron schedule pattern" + assert "INSERT INTO cron_test_log" in result.stdout, "Expected cron command" + assert "postgres" in result.stdout, "Expected postgres username" + assert "postgres" in result.stdout, "Expected postgres database" From 7f4163198bb84a316e4c93f3c04b9ab7342fe090 Mon Sep 17 00:00:00 2001 From: Sam Rose Date: Thu, 24 Apr 2025 13:25:34 -0400 Subject: [PATCH 08/42] test: temp test for ext handling --- .github/workflows/testinfra-ami-build.yml | 12 +++++++++++- 1 file changed, 11 insertions(+), 1 deletion(-) diff --git a/.github/workflows/testinfra-ami-build.yml b/.github/workflows/testinfra-ami-build.yml index 9baa2e200..55f3ff3fa 100644 --- a/.github/workflows/testinfra-ami-build.yml +++ b/.github/workflows/testinfra-ami-build.yml @@ -96,7 +96,12 @@ jobs: - name: Patch stage2-nix-psql.pkr.hcl to create pg_extensions.json run: | - cat >> stage2-nix-psql.pkr.hcl << 'EOF' + # Get the line number of the last closing brace + LAST_BRACE_LINE=$(grep -n '}' stage2-nix-psql.pkr.hcl | tail -n 1 | cut -d: -f1) + + # Create a temporary file with the new content + head -n $((LAST_BRACE_LINE-1)) stage2-nix-psql.pkr.hcl > temp.pkr.hcl + cat >> temp.pkr.hcl << 'EOF' # Add provisioner to create pg_extensions.json provisioner "shell" { inline = [ @@ -105,7 +110,12 @@ jobs: "echo 'Created pg_extensions.json with content:' && sudo cat /root/pg_extensions.json" ] } + } EOF + + # Replace the original file + mv temp.pkr.hcl stage2-nix-psql.pkr.hcl + # Display the modified file to verify echo "Modified stage2-nix-psql.pkr.hcl:" tail -n 10 stage2-nix-psql.pkr.hcl From 75b6e53c13737ca0ff7fa546882391a61c2e8cf8 Mon Sep 17 00:00:00 2001 From: Sam Rose Date: Thu, 24 Apr 2025 13:37:00 -0400 Subject: [PATCH 09/42] test: instead of patch, add prior to start of machine in testinfra --- .github/workflows/testinfra-ami-build.yml | 26 ----------------------- testinfra/test_ami_nix.py | 1 + 2 files changed, 1 insertion(+), 26 deletions(-) diff --git a/.github/workflows/testinfra-ami-build.yml b/.github/workflows/testinfra-ami-build.yml index 55f3ff3fa..2b07e716f 100644 --- a/.github/workflows/testinfra-ami-build.yml +++ b/.github/workflows/testinfra-ami-build.yml @@ -94,32 +94,6 @@ jobs: sudo rm -rf /tmp/* # Clean temporary files df -h / # Display available space - - name: Patch stage2-nix-psql.pkr.hcl to create pg_extensions.json - run: | - # Get the line number of the last closing brace - LAST_BRACE_LINE=$(grep -n '}' stage2-nix-psql.pkr.hcl | tail -n 1 | cut -d: -f1) - - # Create a temporary file with the new content - head -n $((LAST_BRACE_LINE-1)) stage2-nix-psql.pkr.hcl > temp.pkr.hcl - cat >> temp.pkr.hcl << 'EOF' - # Add provisioner to create pg_extensions.json - provisioner "shell" { - inline = [ - "echo '{\"pg_cron\":\"1.3.1\"}' | sudo tee /root/pg_extensions.json", - "sudo chmod 644 /root/pg_extensions.json", - "echo 'Created pg_extensions.json with content:' && sudo cat /root/pg_extensions.json" - ] - } - } - EOF - - # Replace the original file - mv temp.pkr.hcl stage2-nix-psql.pkr.hcl - - # Display the modified file to verify - echo "Modified stage2-nix-psql.pkr.hcl:" - tail -n 10 stage2-nix-psql.pkr.hcl - - name: Build AMI stage 2 run: | packer init stage2-nix-psql.pkr.hcl diff --git a/testinfra/test_ami_nix.py b/testinfra/test_ami_nix.py index 832cf51cd..6d7ddcc7b 100644 --- a/testinfra/test_ami_nix.py +++ b/testinfra/test_ami_nix.py @@ -272,6 +272,7 @@ def gzip_then_base64_encode(s: str) -> str: - {{path: /etc/gotrue.env, content: {gzip_then_base64_encode(gotrue_env_content)}, permissions: '0664', encoding: gz+b64}} - {{path: /etc/wal-g/config.json, content: {gzip_then_base64_encode(walg_config_json_content)}, permissions: '0664', owner: 'wal-g:wal-g', encoding: gz+b64}} - {{path: /tmp/init.json, content: {gzip_then_base64_encode(init_json_content)}, permissions: '0600', encoding: gz+b64}} + - {{path: /root/pg_extensions.json, content: {gzip_then_base64_encode('{"pg_cron":"1.3.1"}')}, permissions: '0644', encoding: gz+b64}} runcmd: - 'sudo echo \"pgbouncer\" \"postgres\" >> /etc/pgbouncer/userlist.txt' - 'cd /tmp && aws s3 cp --region ap-southeast-1 s3://init-scripts-staging/project/init.sh .' From 2d1aa55b0ce1d016c7dde31781111e9f74215fb7 Mon Sep 17 00:00:00 2001 From: Sam Rose Date: Thu, 24 Apr 2025 14:10:16 -0400 Subject: [PATCH 10/42] test: only run on pg 15 for this test, as 1.3.1 limited to 15 --- testinfra/test_ami_nix.py | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/testinfra/test_ami_nix.py b/testinfra/test_ami_nix.py index 6d7ddcc7b..cfe2801fb 100644 --- a/testinfra/test_ami_nix.py +++ b/testinfra/test_ami_nix.py @@ -272,10 +272,10 @@ def gzip_then_base64_encode(s: str) -> str: - {{path: /etc/gotrue.env, content: {gzip_then_base64_encode(gotrue_env_content)}, permissions: '0664', encoding: gz+b64}} - {{path: /etc/wal-g/config.json, content: {gzip_then_base64_encode(walg_config_json_content)}, permissions: '0664', owner: 'wal-g:wal-g', encoding: gz+b64}} - {{path: /tmp/init.json, content: {gzip_then_base64_encode(init_json_content)}, permissions: '0600', encoding: gz+b64}} - - {{path: /root/pg_extensions.json, content: {gzip_then_base64_encode('{"pg_cron":"1.3.1"}')}, permissions: '0644', encoding: gz+b64}} runcmd: - 'sudo echo \"pgbouncer\" \"postgres\" >> /etc/pgbouncer/userlist.txt' - 'cd /tmp && aws s3 cp --region ap-southeast-1 s3://init-scripts-staging/project/init.sh .' + - 'if [ "$POSTGRES_MAJOR_VERSION" = "15" ]; then echo \'{"pg_cron":"1.3.1"}\' | sudo tee /root/pg_extensions.json && sudo chmod 644 /root/pg_extensions.json; fi' - 'bash init.sh "staging"' - 'touch /var/lib/init-complete' - 'rm -rf /tmp/*' @@ -511,6 +511,11 @@ def test_postgrest_ending_empty_key_query_parameter_is_removed(host): def test_pg_cron_extension(host): + # Only run this test for PostgreSQL 15 + postgres_version = os.environ.get("POSTGRES_MAJOR_VERSION") + if postgres_version != "15": + pytest.skip(f"Skipping pg_cron test for PostgreSQL version {postgres_version}") + # Connect as supabase_admin and create the extension with host.sudo("postgres"): result = host.run('psql -U supabase_admin -d postgres -c "CREATE EXTENSION pg_cron WITH SCHEMA pg_catalog VERSION \'1.3.1\';"') From 7b63c9f0af95611e5e7d75a81f476b79c7eb1a67 Mon Sep 17 00:00:00 2001 From: Sam Rose Date: Thu, 24 Apr 2025 14:46:36 -0400 Subject: [PATCH 11/42] feat: use jq from nixpkgs --- ansible/tasks/stage2-setup-postgres.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ansible/tasks/stage2-setup-postgres.yml b/ansible/tasks/stage2-setup-postgres.yml index 77803a0a3..a8310f5cf 100644 --- a/ansible/tasks/stage2-setup-postgres.yml +++ b/ansible/tasks/stage2-setup-postgres.yml @@ -98,7 +98,7 @@ - name: Install jq from nix binary cache become: yes shell: | - sudo -u postgres bash -c ". /nix/var/nix/profiles/default/etc/profile.d/nix-daemon.sh && nix profile install github:supabase/postgres/{{ git_commit_sha }}#jq" + sudo -u postgres bash -c ". /nix/var/nix/profiles/default/etc/profile.d/nix-daemon.sh && nix profile install nixpkgs#jq" when: stage2_nix - name: Set ownership and permissions for /etc/ssl/private From dd924e8196dfc92d40446cf6613d6688460391f0 Mon Sep 17 00:00:00 2001 From: Sam Rose Date: Thu, 24 Apr 2025 16:21:13 -0400 Subject: [PATCH 12/42] test: handle braces properly --- testinfra/test_ami_nix.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/testinfra/test_ami_nix.py b/testinfra/test_ami_nix.py index cfe2801fb..ea66f7106 100644 --- a/testinfra/test_ami_nix.py +++ b/testinfra/test_ami_nix.py @@ -275,7 +275,7 @@ def gzip_then_base64_encode(s: str) -> str: runcmd: - 'sudo echo \"pgbouncer\" \"postgres\" >> /etc/pgbouncer/userlist.txt' - 'cd /tmp && aws s3 cp --region ap-southeast-1 s3://init-scripts-staging/project/init.sh .' - - 'if [ "$POSTGRES_MAJOR_VERSION" = "15" ]; then echo \'{"pg_cron":"1.3.1"}\' | sudo tee /root/pg_extensions.json && sudo chmod 644 /root/pg_extensions.json; fi' + - 'if [ "$POSTGRES_MAJOR_VERSION" = "15" ]; then echo \'{{"pg_cron":"1.3.1"}}\' | sudo tee /root/pg_extensions.json && sudo chmod 644 /root/pg_extensions.json; fi' - 'bash init.sh "staging"' - 'touch /var/lib/init-complete' - 'rm -rf /tmp/*' From d136e4f66a2069b3355be30bb2f86001704d0397 Mon Sep 17 00:00:00 2001 From: Sam Rose Date: Fri, 25 Apr 2025 08:33:33 -0400 Subject: [PATCH 13/42] test: propagate errors from any failure --- testinfra/test_ami_nix.py | 21 +++++++++++++++++++-- 1 file changed, 19 insertions(+), 2 deletions(-) diff --git a/testinfra/test_ami_nix.py b/testinfra/test_ami_nix.py index ea66f7106..49035e554 100644 --- a/testinfra/test_ami_nix.py +++ b/testinfra/test_ami_nix.py @@ -361,9 +361,26 @@ def is_healthy(ssh) -> bool: result = run_ssh_command(ssh, command) if not result['succeeded']: logger.warning(f"{service} not ready") + logger.error(f"{service} command failed with rc={cmd.rc}") + logger.error(f"{service} stdout: {cmd.stdout}") + logger.error(f"{service} stderr: {cmd.stderr}") + + # For PostgreSQL, also check the logs and systemd status + if service == "postgres": + logger.error("PostgreSQL logs:") + host.run("sudo cat /var/log/postgresql/postgresql-*.log") + logger.error("PostgreSQL systemd status:") + host.run("sudo systemctl status postgresql") + logger.error("PostgreSQL journal logs:") + host.run("sudo journalctl -u postgresql --no-pager") + return False - except Exception: - logger.warning(f"Connection failed during {service} check") + except Exception as e: + logger.warning( + f"Connection failed during {service} check, attempting reconnect..." + ) + logger.error(f"Error details: {str(e)}") + host = get_ssh_connection(instance_ip, ssh_identity_file) return False return True From e7c403c89f3bdea04f9d3f71d3a443c30528fd99 Mon Sep 17 00:00:00 2001 From: Sam Rose Date: Fri, 25 Apr 2025 09:52:36 -0400 Subject: [PATCH 14/42] test: more logging for healthcheck --- testinfra/test_ami_nix.py | 94 +++++++++++++++++++++++++++++++++++---- 1 file changed, 85 insertions(+), 9 deletions(-) diff --git a/testinfra/test_ami_nix.py b/testinfra/test_ami_nix.py index 49035e554..8bc9c5b3a 100644 --- a/testinfra/test_ami_nix.py +++ b/testinfra/test_ami_nix.py @@ -348,33 +348,109 @@ def gzip_then_base64_encode(s: str) -> str: def is_healthy(ssh) -> bool: health_checks = [ +<<<<<<< HEAD ("postgres", "sudo -u postgres /usr/bin/pg_isready -U postgres"), ("adminapi", f"curl -sf -k --connect-timeout 30 --max-time 60 https://localhost:8085/health -H 'apikey: {supabase_admin_key}'"), ("postgrest", "curl -sf --connect-timeout 30 --max-time 60 http://localhost:3001/ready"), ("gotrue", "curl -sf --connect-timeout 30 --max-time 60 http://localhost:8081/health"), ("kong", "sudo kong health"), ("fail2ban", "sudo fail2ban-client status"), +======= + ( + "postgres", + lambda h: ( + # First check if PostgreSQL is running + h.run("sudo systemctl is-active postgresql"), + # Then check if the socket directory exists and has correct permissions + h.run("sudo ls -la /run/postgresql"), + # Then try pg_isready + h.run("sudo -u postgres /usr/bin/pg_isready -U postgres") + ), + ), + ( + "adminapi", + lambda h: h.run( + f"curl -sf -k --connect-timeout 30 --max-time 60 https://localhost:8085/health -H 'apikey: {supabase_admin_key}'" + ), + ), + ( + "postgrest", + lambda h: h.run( + "curl -sf --connect-timeout 30 --max-time 60 http://localhost:3001/ready" + ), + ), + ( + "gotrue", + lambda h: h.run( + "curl -sf --connect-timeout 30 --max-time 60 http://localhost:8081/health" + ), + ), + ("kong", lambda h: h.run("sudo kong health")), + ("fail2ban", lambda h: h.run("sudo fail2ban-client status")), +>>>>>>> 2bd7b6d9 (test: more logging for healthcheck) ] for service, command in health_checks: try: +<<<<<<< HEAD result = run_ssh_command(ssh, command) if not result['succeeded']: logger.warning(f"{service} not ready") logger.error(f"{service} command failed with rc={cmd.rc}") logger.error(f"{service} stdout: {cmd.stdout}") logger.error(f"{service} stderr: {cmd.stderr}") +======= + if service == "postgres": + # For PostgreSQL, we need to check multiple things + systemd_status, socket_check, pg_isready = check(host) +>>>>>>> 2bd7b6d9 (test: more logging for healthcheck) - # For PostgreSQL, also check the logs and systemd status - if service == "postgres": - logger.error("PostgreSQL logs:") - host.run("sudo cat /var/log/postgresql/postgresql-*.log") - logger.error("PostgreSQL systemd status:") - host.run("sudo systemctl status postgresql") - logger.error("PostgreSQL journal logs:") - host.run("sudo journalctl -u postgresql --no-pager") + if systemd_status.failed: + logger.error("PostgreSQL systemd service is not active") + logger.error(f"systemd status: {systemd_status.stdout}") + logger.error(f"systemd error: {systemd_status.stderr}") + + # Check init script logs + logger.error("Init script logs:") + host.run("sudo journalctl -u cloud-init --no-pager") + + # Check cloud-init logs + logger.error("Cloud-init logs:") + host.run("sudo cat /var/log/cloud-init-output.log") + + # Check if init script exists and its contents + logger.error("Init script status:") + host.run("ls -la /tmp/init.sh") + host.run("cat /tmp/init.sh") - return False + if socket_check.failed: + logger.error("PostgreSQL socket directory check failed") + logger.error(f"socket check: {socket_check.stdout}") + logger.error(f"socket error: {socket_check.stderr}") + + if pg_isready.failed: + logger.error("pg_isready check failed") + logger.error(f"pg_isready output: {pg_isready.stdout}") + logger.error(f"pg_isready error: {pg_isready.stderr}") + + # Check PostgreSQL logs for startup issues + logger.error("PostgreSQL logs:") + host.run("sudo cat /var/log/postgresql/postgresql-*.log") + logger.error("PostgreSQL systemd status:") + host.run("sudo systemctl status postgresql") + logger.error("PostgreSQL journal logs:") + host.run("sudo journalctl -u postgresql --no-pager") + + if any(cmd.failed for cmd in [systemd_status, socket_check, pg_isready]): + return False + else: + cmd = check(host) + if cmd.failed is True: + logger.warning(f"{service} not ready") + logger.error(f"{service} command failed with rc={cmd.rc}") + logger.error(f"{service} stdout: {cmd.stdout}") + logger.error(f"{service} stderr: {cmd.stderr}") + return False except Exception as e: logger.warning( f"Connection failed during {service} check, attempting reconnect..." From 5e8c008e6a71669ba027b09e1f6d3f379c4f1c85 Mon Sep 17 00:00:00 2001 From: Sam Rose Date: Fri, 25 Apr 2025 11:33:48 -0400 Subject: [PATCH 15/42] test: adding even more logging --- testinfra/test_ami_nix.py | 42 +++++++++++++++++++++++++++++++++++++++ 1 file changed, 42 insertions(+) diff --git a/testinfra/test_ami_nix.py b/testinfra/test_ami_nix.py index 8bc9c5b3a..61f4d1b56 100644 --- a/testinfra/test_ami_nix.py +++ b/testinfra/test_ami_nix.py @@ -410,6 +410,10 @@ def is_healthy(ssh) -> bool: logger.error(f"systemd status: {systemd_status.stdout}") logger.error(f"systemd error: {systemd_status.stderr}") + # Get detailed systemd status + logger.error("Detailed systemd status:") + host.run("sudo systemctl status postgresql -l --no-pager") + # Check init script logs logger.error("Init script logs:") host.run("sudo journalctl -u cloud-init --no-pager") @@ -422,6 +426,44 @@ def is_healthy(ssh) -> bool: logger.error("Init script status:") host.run("ls -la /tmp/init.sh") host.run("cat /tmp/init.sh") + + # Check PostgreSQL configuration + logger.error("PostgreSQL configuration:") + host.run("sudo cat /etc/postgresql/*/main/postgresql.conf") + host.run("sudo cat /etc/postgresql/*/main/pg_hba.conf") + + # Check PostgreSQL data directory permissions + logger.error("PostgreSQL data directory permissions:") + host.run("sudo ls -la /var/lib/postgresql/*/main/") + + # Check PostgreSQL startup logs + logger.error("PostgreSQL startup logs:") + host.run("sudo cat /var/log/postgresql/postgresql-*.log") + + # Check systemd journal for PostgreSQL + logger.error("Systemd journal for PostgreSQL:") + host.run("sudo journalctl -u postgresql -n 100 --no-pager") + + # Check for any PostgreSQL-related errors in system logs + logger.error("System logs with PostgreSQL errors:") + host.run("sudo journalctl | grep -i postgres | tail -n 100") + + # Check for any disk space issues + logger.error("Disk space information:") + host.run("df -h") + host.run("sudo du -sh /var/lib/postgresql/*") + + # Check for any memory issues + logger.error("Memory information:") + host.run("free -h") + + # Check for any process conflicts + logger.error("Running processes:") + host.run("ps aux | grep postgres") + + # Check for any port conflicts + logger.error("Port usage:") + host.run("sudo netstat -tulpn | grep 5432") if socket_check.failed: logger.error("PostgreSQL socket directory check failed") From be713ac37e0de522d5f2557dae2bee099b0b7ed2 Mon Sep 17 00:00:00 2001 From: Sam Rose Date: Fri, 25 Apr 2025 14:49:27 -0400 Subject: [PATCH 16/42] test: extend logging more to see what happens when pg starts --- testinfra/test_ami_nix.py | 148 ++++++++++++++++++++------------------ 1 file changed, 77 insertions(+), 71 deletions(-) diff --git a/testinfra/test_ami_nix.py b/testinfra/test_ami_nix.py index 61f4d1b56..e55a89b61 100644 --- a/testinfra/test_ami_nix.py +++ b/testinfra/test_ami_nix.py @@ -410,89 +410,95 @@ def is_healthy(ssh) -> bool: logger.error(f"systemd status: {systemd_status.stdout}") logger.error(f"systemd error: {systemd_status.stderr}") - # Get detailed systemd status - logger.error("Detailed systemd status:") - host.run("sudo systemctl status postgresql -l --no-pager") + # Check systemd service unit file + logger.error("PostgreSQL systemd service unit file:") + result = host.run("sudo systemctl cat postgresql") + logger.error(f"service unit file:\n{result.stdout}\n{result.stderr}") - # Check init script logs - logger.error("Init script logs:") - host.run("sudo journalctl -u cloud-init --no-pager") + # Check systemd service environment + logger.error("PostgreSQL systemd service environment:") + result = host.run("sudo systemctl show postgresql") + logger.error(f"service environment:\n{result.stdout}\n{result.stderr}") - # Check cloud-init logs - logger.error("Cloud-init logs:") - host.run("sudo cat /var/log/cloud-init-output.log") + # Check systemd service dependencies + logger.error("PostgreSQL systemd service dependencies:") + result = host.run("sudo systemctl list-dependencies postgresql") + logger.error(f"service dependencies:\n{result.stdout}\n{result.stderr}") - # Check if init script exists and its contents - logger.error("Init script status:") - host.run("ls -la /tmp/init.sh") - host.run("cat /tmp/init.sh") + # Check if service is enabled + logger.error("PostgreSQL service enabled status:") + result = host.run("sudo systemctl is-enabled postgresql") + logger.error(f"service enabled status:\n{result.stdout}\n{result.stderr}") - # Check PostgreSQL configuration - logger.error("PostgreSQL configuration:") - host.run("sudo cat /etc/postgresql/*/main/postgresql.conf") - host.run("sudo cat /etc/postgresql/*/main/pg_hba.conf") + # Check systemd journal for service execution logs + logger.error("Systemd journal entries for PostgreSQL service execution:") + result = host.run("sudo journalctl -u postgresql -n 100 --no-pager") + logger.error(f"systemd journal:\n{result.stdout}\n{result.stderr}") - # Check PostgreSQL data directory permissions - logger.error("PostgreSQL data directory permissions:") - host.run("sudo ls -la /var/lib/postgresql/*/main/") + # Check systemd journal specifically for ExecStartPre and ExecStart + logger.error("Systemd journal entries for ExecStartPre and ExecStart:") + result = host.run("sudo journalctl -u postgresql -n 100 --no-pager | grep -E 'ExecStartPre|ExecStart'") + logger.error(f"execution logs:\n{result.stdout}\n{result.stderr}") - # Check PostgreSQL startup logs - logger.error("PostgreSQL startup logs:") - host.run("sudo cat /var/log/postgresql/postgresql-*.log") + # Check systemd journal for any errors + logger.error("Systemd journal entries with error level:") + result = host.run("sudo journalctl -u postgresql -n 100 --no-pager -p err") + logger.error(f"error logs:\n{result.stdout}\n{result.stderr}") - # Check systemd journal for PostgreSQL - logger.error("Systemd journal for PostgreSQL:") - host.run("sudo journalctl -u postgresql -n 100 --no-pager") + # Check pre-start script output + logger.error("Checking pre-start script output:") + result = host.run("sudo -u postgres /usr/local/bin/postgres_prestart.sh") + logger.error(f"pre-start script output:\n{result.stdout}\n{result.stderr}") - # Check for any PostgreSQL-related errors in system logs - logger.error("System logs with PostgreSQL errors:") - host.run("sudo journalctl | grep -i postgres | tail -n 100") + # Check PostgreSQL logs directory + logger.error("Checking PostgreSQL logs directory:") + result = host.run("sudo ls -la /var/log/postgresql/") + logger.error(f"log directory contents:\n{result.stdout}\n{result.stderr}") - # Check for any disk space issues - logger.error("Disk space information:") - host.run("df -h") - host.run("sudo du -sh /var/lib/postgresql/*") + # Check any existing PostgreSQL logs + logger.error("Checking existing PostgreSQL logs:") + result = host.run("sudo cat /var/log/postgresql/*.log") + logger.error(f"postgresql logs:\n{result.stdout}\n{result.stderr}") - # Check for any memory issues - logger.error("Memory information:") - host.run("free -h") + # Try starting PostgreSQL directly with pg_ctl and capture output + logger.error("Attempting to start PostgreSQL directly with pg_ctl:") + startup_log = "/tmp/postgres-start.log" + result = host.run(f"sudo -u postgres /usr/lib/postgresql/bin/pg_ctl -D /var/lib/postgresql/data start -l {startup_log}") + logger.error(f"pg_ctl start attempt:\n{result.stdout}\n{result.stderr}") - # Check for any process conflicts - logger.error("Running processes:") - host.run("ps aux | grep postgres") + # Check the startup log + logger.error("PostgreSQL startup log:") + result = host.run(f"sudo cat {startup_log}") + logger.error(f"startup log contents:\n{result.stdout}\n{result.stderr}") - # Check for any port conflicts - logger.error("Port usage:") - host.run("sudo netstat -tulpn | grep 5432") - - if socket_check.failed: - logger.error("PostgreSQL socket directory check failed") - logger.error(f"socket check: {socket_check.stdout}") - logger.error(f"socket error: {socket_check.stderr}") - - if pg_isready.failed: - logger.error("pg_isready check failed") - logger.error(f"pg_isready output: {pg_isready.stdout}") - logger.error(f"pg_isready error: {pg_isready.stderr}") - - # Check PostgreSQL logs for startup issues - logger.error("PostgreSQL logs:") - host.run("sudo cat /var/log/postgresql/postgresql-*.log") - logger.error("PostgreSQL systemd status:") - host.run("sudo systemctl status postgresql") - logger.error("PostgreSQL journal logs:") - host.run("sudo journalctl -u postgresql --no-pager") - - if any(cmd.failed for cmd in [systemd_status, socket_check, pg_isready]): - return False - else: - cmd = check(host) - if cmd.failed is True: - logger.warning(f"{service} not ready") - logger.error(f"{service} command failed with rc={cmd.rc}") - logger.error(f"{service} stdout: {cmd.stdout}") - logger.error(f"{service} stderr: {cmd.stderr}") - return False + # Clean up the startup log + result = host.run(f"sudo rm -f {startup_log}") + + # Check PostgreSQL configuration + logger.error("PostgreSQL configuration:") + result = host.run("sudo cat /etc/postgresql/postgresql.conf") + logger.error(f"postgresql.conf:\n{result.stdout}\n{result.stderr}") + + # Check PostgreSQL authentication configuration + logger.error("PostgreSQL authentication configuration:") + result = host.run("sudo cat /etc/postgresql/pg_hba.conf") + logger.error(f"pg_hba.conf:\n{result.stdout}\n{result.stderr}") + + # Check PostgreSQL environment + logger.error("PostgreSQL environment:") + result = host.run("sudo -u postgres env | grep POSTGRES") + logger.error(f"postgres environment:\n{result.stdout}\n{result.stderr}") + + if any(cmd.failed for cmd in [systemd_status, socket_check, pg_isready]): + return False + else: + cmd = check(host) + if cmd.failed is True: + logger.warning(f"{service} not ready") + logger.error(f"{service} command failed with rc={cmd.rc}") + logger.error(f"{service} stdout: {cmd.stdout}") + logger.error(f"{service} stderr: {cmd.stderr}") + return False except Exception as e: logger.warning( f"Connection failed during {service} check, attempting reconnect..." From 3e01f39f498b75ce5b56f18fc1253ccbdeb931a7 Mon Sep 17 00:00:00 2001 From: Sam Rose Date: Mon, 28 Apr 2025 13:12:08 -0400 Subject: [PATCH 17/42] test: handle lib extension names per system --- nix/ext/pg_cron.nix | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) diff --git a/nix/ext/pg_cron.nix b/nix/ext/pg_cron.nix index 3f438931e..25121fb8c 100644 --- a/nix/ext/pg_cron.nix +++ b/nix/ext/pg_cron.nix @@ -116,14 +116,21 @@ stdenv.mkDerivation { VERSION=$1 LIB_DIR=$(dirname "$0")/../lib + # Use platform-specific extension + if [ "$(uname)" = "Darwin" ]; then + EXT=".dylib" + else + EXT=".so" + fi + # Check if version exists - if [ ! -f "$LIB_DIR/pg_cron-$VERSION${postgresql.dlSuffix}" ]; then + if [ ! -f "$LIB_DIR/pg_cron-$VERSION$EXT" ]; then echo "Error: Version $VERSION not found" exit 1 fi # Update library symlink - ln -sfnv "pg_cron-$VERSION${postgresql.dlSuffix}" "$LIB_DIR/pg_cron${postgresql.dlSuffix}" + ln -sfnv "pg_cron-$VERSION$EXT" "$LIB_DIR/pg_cron$EXT" echo "Successfully switched pg_cron to version $VERSION" EOF From 6aa337654653bf974f2eb38c251c84d4eea20c78 Mon Sep 17 00:00:00 2001 From: Sam Rose Date: Mon, 28 Apr 2025 14:45:33 -0400 Subject: [PATCH 18/42] test: more debugging --- testinfra/test_ami_nix.py | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/testinfra/test_ami_nix.py b/testinfra/test_ami_nix.py index e55a89b61..d04a0a001 100644 --- a/testinfra/test_ami_nix.py +++ b/testinfra/test_ami_nix.py @@ -364,7 +364,13 @@ def is_healthy(ssh) -> bool: # Then check if the socket directory exists and has correct permissions h.run("sudo ls -la /run/postgresql"), # Then try pg_isready - h.run("sudo -u postgres /usr/bin/pg_isready -U postgres") + h.run("sudo -u postgres /usr/bin/pg_isready -U postgres"), + # Check Nix profile setup + h.run("echo 'Check Nix profile setup'"), + h.run("sudo -u postgres ls -la /home/postgres/.nix-profile"), + h.run("sudo -u postgres ls -la /home/postgres/.nix-profile/bin"), + h.run("sudo -u postgres test -x /home/postgres/.nix-profile/bin/switch_pg_cron_version"), + h.run("sudo -u postgres cat /home/postgres/.nix-profile/bin/switch_pg_cron_version") ), ), ( From b6322fddb0d3aaf8f422764d7677c46e6a3b7e1a Mon Sep 17 00:00:00 2001 From: Sam Rose Date: Mon, 28 Apr 2025 16:00:19 -0400 Subject: [PATCH 19/42] test: move logging here --- testinfra/test_ami_nix.py | 22 +++++++++++++++------- 1 file changed, 15 insertions(+), 7 deletions(-) diff --git a/testinfra/test_ami_nix.py b/testinfra/test_ami_nix.py index d04a0a001..56d4ad790 100644 --- a/testinfra/test_ami_nix.py +++ b/testinfra/test_ami_nix.py @@ -364,13 +364,7 @@ def is_healthy(ssh) -> bool: # Then check if the socket directory exists and has correct permissions h.run("sudo ls -la /run/postgresql"), # Then try pg_isready - h.run("sudo -u postgres /usr/bin/pg_isready -U postgres"), - # Check Nix profile setup - h.run("echo 'Check Nix profile setup'"), - h.run("sudo -u postgres ls -la /home/postgres/.nix-profile"), - h.run("sudo -u postgres ls -la /home/postgres/.nix-profile/bin"), - h.run("sudo -u postgres test -x /home/postgres/.nix-profile/bin/switch_pg_cron_version"), - h.run("sudo -u postgres cat /home/postgres/.nix-profile/bin/switch_pg_cron_version") + h.run("sudo -u postgres /usr/bin/pg_isready -U postgres") ), ), ( @@ -411,6 +405,20 @@ def is_healthy(ssh) -> bool: systemd_status, socket_check, pg_isready = check(host) >>>>>>> 2bd7b6d9 (test: more logging for healthcheck) + # Log Nix profile setup checks + logger.info("Checking Nix profile setup:") + nix_profile_result = host.run("sudo -u postgres ls -la /home/postgres/.nix-profile") + logger.info(f"Nix profile directory:\n{nix_profile_result.stdout}\n{nix_profile_result.stderr}") + + nix_bin_result = host.run("sudo -u postgres ls -la /home/postgres/.nix-profile/bin") + logger.info(f"Nix profile bin directory:\n{nix_bin_result.stdout}\n{nix_bin_result.stderr}") + + nix_script_result = host.run("sudo -u postgres test -x /home/postgres/.nix-profile/bin/switch_pg_cron_version") + logger.info(f"Switch script executable check: {'success' if not nix_script_result.failed else 'failed'}") + + nix_script_output = host.run("sudo -u postgres /home/postgres/.nix-profile/bin/switch_pg_cron_version") + logger.info(f"Switch script output:\n{nix_script_output.stdout}\n{nix_script_output.stderr}") + if systemd_status.failed: logger.error("PostgreSQL systemd service is not active") logger.error(f"systemd status: {systemd_status.stdout}") From 138ab0738b4ba3697a5b984de3cd00a0f7f800ce Mon Sep 17 00:00:00 2001 From: Sam Rose Date: Mon, 28 Apr 2025 17:38:55 -0400 Subject: [PATCH 20/42] test: try direct --- testinfra/test_ami_nix.py | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/testinfra/test_ami_nix.py b/testinfra/test_ami_nix.py index 56d4ad790..43690e3db 100644 --- a/testinfra/test_ami_nix.py +++ b/testinfra/test_ami_nix.py @@ -407,38 +407,38 @@ def is_healthy(ssh) -> bool: # Log Nix profile setup checks logger.info("Checking Nix profile setup:") - nix_profile_result = host.run("sudo -u postgres ls -la /home/postgres/.nix-profile") + nix_profile_result = host.run("ls -la /home/postgres/.nix-profile") logger.info(f"Nix profile directory:\n{nix_profile_result.stdout}\n{nix_profile_result.stderr}") - nix_bin_result = host.run("sudo -u postgres ls -la /home/postgres/.nix-profile/bin") + nix_bin_result = host.run("ls -la /home/postgres/.nix-profile/bin") logger.info(f"Nix profile bin directory:\n{nix_bin_result.stdout}\n{nix_bin_result.stderr}") - nix_script_result = host.run("sudo -u postgres test -x /home/postgres/.nix-profile/bin/switch_pg_cron_version") + nix_script_result = host.run("test -x /home/postgres/.nix-profile/bin/switch_pg_cron_version") logger.info(f"Switch script executable check: {'success' if not nix_script_result.failed else 'failed'}") - nix_script_output = host.run("sudo -u postgres /home/postgres/.nix-profile/bin/switch_pg_cron_version") + nix_script_output = host.run("/home/postgres/.nix-profile/bin/switch_pg_cron_version") logger.info(f"Switch script output:\n{nix_script_output.stdout}\n{nix_script_output.stderr}") if systemd_status.failed: logger.error("PostgreSQL systemd service is not active") logger.error(f"systemd status: {systemd_status.stdout}") logger.error(f"systemd error: {systemd_status.stderr}") - + # Check systemd service unit file logger.error("PostgreSQL systemd service unit file:") result = host.run("sudo systemctl cat postgresql") logger.error(f"service unit file:\n{result.stdout}\n{result.stderr}") - + # Check systemd service environment logger.error("PostgreSQL systemd service environment:") result = host.run("sudo systemctl show postgresql") logger.error(f"service environment:\n{result.stdout}\n{result.stderr}") - + # Check systemd service dependencies logger.error("PostgreSQL systemd service dependencies:") result = host.run("sudo systemctl list-dependencies postgresql") logger.error(f"service dependencies:\n{result.stdout}\n{result.stderr}") - + # Check if service is enabled logger.error("PostgreSQL service enabled status:") result = host.run("sudo systemctl is-enabled postgresql") From 141c3a0346b0cc0acd860d9ef1fddf21b5c9a5dd Mon Sep 17 00:00:00 2001 From: Sam Rose Date: Mon, 28 Apr 2025 18:19:07 -0400 Subject: [PATCH 21/42] test: use the right alias on machine --- .github/workflows/testinfra-ami-build.yml | 4 +- ansible/files/postgres_prestart.sh.j2 | 4 +- testinfra/test_ami_nix.py | 150 ++-------------------- 3 files changed, 14 insertions(+), 144 deletions(-) diff --git a/.github/workflows/testinfra-ami-build.yml b/.github/workflows/testinfra-ami-build.yml index 2b07e716f..8845e954a 100644 --- a/.github/workflows/testinfra-ami-build.yml +++ b/.github/workflows/testinfra-ami-build.yml @@ -108,13 +108,13 @@ jobs: df -h / # Display available space - name: Run tests - timeout-minutes: 10 + timeout-minutes: 30 env: AMI_NAME: "supabase-postgres-${{ steps.random.outputs.random_string }}" run: | # TODO: use poetry for pkg mgmt pip3 install boto3 boto3-stubs[essential] docker ec2instanceconnectcli pytest pytest-testinfra[paramiko,docker] requests - pytest -vv -s testinfra/test_ami_nix.py + pytest -vvvv -s testinfra/test_ami_nix.py - name: Cleanup resources on build cancellation if: ${{ cancelled() }} diff --git a/ansible/files/postgres_prestart.sh.j2 b/ansible/files/postgres_prestart.sh.j2 index a045f298f..40e8debd7 100644 --- a/ansible/files/postgres_prestart.sh.j2 +++ b/ansible/files/postgres_prestart.sh.j2 @@ -41,7 +41,7 @@ get_pg_cron_version() { fi local version - version=$(sudo -u postgres /home/postgres/.nix-profile/bin/jq -r '.pg_cron // empty' "/root/pg_extensions.json") + version=$(sudo -u postgres /var/lib/postgresql/.nix-profile/bin/jq -r '.pg_cron // empty' "/root/pg_extensions.json") if [ -z "$version" ]; then echo "pg_cron: Not specified in extensions file" return @@ -57,7 +57,7 @@ get_pg_cron_version() { switch_pg_cron_version() { local version="$1" - local switch_script="/home/postgres/.nix-profile/bin/switch_pg_cron_version" + local switch_script="/var/lib/postgresql/.nix-profile/bin/switch_pg_cron_version" if [ ! -x "$switch_script" ]; then echo "pg_cron: No version switch script available" diff --git a/testinfra/test_ami_nix.py b/testinfra/test_ami_nix.py index 43690e3db..4a77dd723 100644 --- a/testinfra/test_ami_nix.py +++ b/testinfra/test_ami_nix.py @@ -348,171 +348,41 @@ def gzip_then_base64_encode(s: str) -> str: def is_healthy(ssh) -> bool: health_checks = [ -<<<<<<< HEAD ("postgres", "sudo -u postgres /usr/bin/pg_isready -U postgres"), ("adminapi", f"curl -sf -k --connect-timeout 30 --max-time 60 https://localhost:8085/health -H 'apikey: {supabase_admin_key}'"), ("postgrest", "curl -sf --connect-timeout 30 --max-time 60 http://localhost:3001/ready"), ("gotrue", "curl -sf --connect-timeout 30 --max-time 60 http://localhost:8081/health"), ("kong", "sudo kong health"), ("fail2ban", "sudo fail2ban-client status"), -======= - ( - "postgres", - lambda h: ( - # First check if PostgreSQL is running - h.run("sudo systemctl is-active postgresql"), - # Then check if the socket directory exists and has correct permissions - h.run("sudo ls -la /run/postgresql"), - # Then try pg_isready - h.run("sudo -u postgres /usr/bin/pg_isready -U postgres") - ), - ), - ( - "adminapi", - lambda h: h.run( - f"curl -sf -k --connect-timeout 30 --max-time 60 https://localhost:8085/health -H 'apikey: {supabase_admin_key}'" - ), - ), - ( - "postgrest", - lambda h: h.run( - "curl -sf --connect-timeout 30 --max-time 60 http://localhost:3001/ready" - ), - ), - ( - "gotrue", - lambda h: h.run( - "curl -sf --connect-timeout 30 --max-time 60 http://localhost:8081/health" - ), - ), - ("kong", lambda h: h.run("sudo kong health")), - ("fail2ban", lambda h: h.run("sudo fail2ban-client status")), ->>>>>>> 2bd7b6d9 (test: more logging for healthcheck) ] for service, command in health_checks: try: -<<<<<<< HEAD result = run_ssh_command(ssh, command) if not result['succeeded']: logger.warning(f"{service} not ready") logger.error(f"{service} command failed with rc={cmd.rc}") logger.error(f"{service} stdout: {cmd.stdout}") logger.error(f"{service} stderr: {cmd.stderr}") -======= - if service == "postgres": - # For PostgreSQL, we need to check multiple things - systemd_status, socket_check, pg_isready = check(host) ->>>>>>> 2bd7b6d9 (test: more logging for healthcheck) - # Log Nix profile setup checks - logger.info("Checking Nix profile setup:") - nix_profile_result = host.run("ls -la /home/postgres/.nix-profile") - logger.info(f"Nix profile directory:\n{nix_profile_result.stdout}\n{nix_profile_result.stderr}") - - nix_bin_result = host.run("ls -la /home/postgres/.nix-profile/bin") - logger.info(f"Nix profile bin directory:\n{nix_bin_result.stdout}\n{nix_bin_result.stderr}") - - nix_script_result = host.run("test -x /home/postgres/.nix-profile/bin/switch_pg_cron_version") - logger.info(f"Switch script executable check: {'success' if not nix_script_result.failed else 'failed'}") - - nix_script_output = host.run("/home/postgres/.nix-profile/bin/switch_pg_cron_version") - logger.info(f"Switch script output:\n{nix_script_output.stdout}\n{nix_script_output.stderr}") - if systemd_status.failed: logger.error("PostgreSQL systemd service is not active") logger.error(f"systemd status: {systemd_status.stdout}") logger.error(f"systemd error: {systemd_status.stderr}") - - # Check systemd service unit file - logger.error("PostgreSQL systemd service unit file:") - result = host.run("sudo systemctl cat postgresql") - logger.error(f"service unit file:\n{result.stdout}\n{result.stderr}") - - # Check systemd service environment - logger.error("PostgreSQL systemd service environment:") - result = host.run("sudo systemctl show postgresql") - logger.error(f"service environment:\n{result.stdout}\n{result.stderr}") - - # Check systemd service dependencies - logger.error("PostgreSQL systemd service dependencies:") - result = host.run("sudo systemctl list-dependencies postgresql") - logger.error(f"service dependencies:\n{result.stdout}\n{result.stderr}") - - # Check if service is enabled - logger.error("PostgreSQL service enabled status:") - result = host.run("sudo systemctl is-enabled postgresql") - logger.error(f"service enabled status:\n{result.stdout}\n{result.stderr}") - - # Check systemd journal for service execution logs - logger.error("Systemd journal entries for PostgreSQL service execution:") - result = host.run("sudo journalctl -u postgresql -n 100 --no-pager") - logger.error(f"systemd journal:\n{result.stdout}\n{result.stderr}") - - # Check systemd journal specifically for ExecStartPre and ExecStart - logger.error("Systemd journal entries for ExecStartPre and ExecStart:") - result = host.run("sudo journalctl -u postgresql -n 100 --no-pager | grep -E 'ExecStartPre|ExecStart'") - logger.error(f"execution logs:\n{result.stdout}\n{result.stderr}") - - # Check systemd journal for any errors - logger.error("Systemd journal entries with error level:") - result = host.run("sudo journalctl -u postgresql -n 100 --no-pager -p err") - logger.error(f"error logs:\n{result.stdout}\n{result.stderr}") - - # Check pre-start script output - logger.error("Checking pre-start script output:") - result = host.run("sudo -u postgres /usr/local/bin/postgres_prestart.sh") - logger.error(f"pre-start script output:\n{result.stdout}\n{result.stderr}") - # Check PostgreSQL logs directory - logger.error("Checking PostgreSQL logs directory:") - result = host.run("sudo ls -la /var/log/postgresql/") - logger.error(f"log directory contents:\n{result.stdout}\n{result.stderr}") - - # Check any existing PostgreSQL logs - logger.error("Checking existing PostgreSQL logs:") - result = host.run("sudo cat /var/log/postgresql/*.log") - logger.error(f"postgresql logs:\n{result.stdout}\n{result.stderr}") - - # Try starting PostgreSQL directly with pg_ctl and capture output - logger.error("Attempting to start PostgreSQL directly with pg_ctl:") - startup_log = "/tmp/postgres-start.log" - result = host.run(f"sudo -u postgres /usr/lib/postgresql/bin/pg_ctl -D /var/lib/postgresql/data start -l {startup_log}") - logger.error(f"pg_ctl start attempt:\n{result.stdout}\n{result.stderr}") - - # Check the startup log - logger.error("PostgreSQL startup log:") - result = host.run(f"sudo cat {startup_log}") - logger.error(f"startup log contents:\n{result.stdout}\n{result.stderr}") - - # Clean up the startup log - result = host.run(f"sudo rm -f {startup_log}") - - # Check PostgreSQL configuration - logger.error("PostgreSQL configuration:") - result = host.run("sudo cat /etc/postgresql/postgresql.conf") - logger.error(f"postgresql.conf:\n{result.stdout}\n{result.stderr}") - - # Check PostgreSQL authentication configuration - logger.error("PostgreSQL authentication configuration:") - result = host.run("sudo cat /etc/postgresql/pg_hba.conf") - logger.error(f"pg_hba.conf:\n{result.stdout}\n{result.stderr}") - - # Check PostgreSQL environment - logger.error("PostgreSQL environment:") - result = host.run("sudo -u postgres env | grep POSTGRES") - logger.error(f"postgres environment:\n{result.stdout}\n{result.stderr}") + # Run detailed checks since we know we have a working connection + run_detailed_checks(host) if any(cmd.failed for cmd in [systemd_status, socket_check, pg_isready]): return False - else: - cmd = check(host) - if cmd.failed is True: - logger.warning(f"{service} not ready") - logger.error(f"{service} command failed with rc={cmd.rc}") - logger.error(f"{service} stdout: {cmd.stdout}") - logger.error(f"{service} stderr: {cmd.stderr}") - return False + else: + cmd = check(host) + if cmd.failed is True: + logger.warning(f"{service} not ready") + logger.error(f"{service} command failed with rc={cmd.rc}") + logger.error(f"{service} stdout: {cmd.stdout}") + logger.error(f"{service} stderr: {cmd.stderr}") + return False except Exception as e: logger.warning( f"Connection failed during {service} check, attempting reconnect..." From ac4983adde4f2bd1dfae907da74ed931cc5d00d4 Mon Sep 17 00:00:00 2001 From: Sam Rose Date: Mon, 28 Apr 2025 21:34:12 -0400 Subject: [PATCH 22/42] test: do not unpack result --- testinfra/test_ami_nix.py | 18 +++++++++++------- 1 file changed, 11 insertions(+), 7 deletions(-) diff --git a/testinfra/test_ami_nix.py b/testinfra/test_ami_nix.py index 4a77dd723..694ddbc1b 100644 --- a/testinfra/test_ami_nix.py +++ b/testinfra/test_ami_nix.py @@ -358,23 +358,27 @@ def is_healthy(ssh) -> bool: for service, command in health_checks: try: +<<<<<<< HEAD result = run_ssh_command(ssh, command) if not result['succeeded']: logger.warning(f"{service} not ready") logger.error(f"{service} command failed with rc={cmd.rc}") logger.error(f"{service} stdout: {cmd.stdout}") logger.error(f"{service} stderr: {cmd.stderr}") +======= + if service == "postgres": + # For PostgreSQL, we need to check multiple things + pg_isready = check(host) +>>>>>>> 65ef0692 (test: do not unpack result) - if systemd_status.failed: - logger.error("PostgreSQL systemd service is not active") - logger.error(f"systemd status: {systemd_status.stdout}") - logger.error(f"systemd error: {systemd_status.stderr}") + if pg_isready.failed: + logger.error("PostgreSQL is not ready") + logger.error(f"pg_isready stdout: {pg_isready.stdout}") + logger.error(f"pg_isready stderr: {pg_isready.stderr}") # Run detailed checks since we know we have a working connection run_detailed_checks(host) - - if any(cmd.failed for cmd in [systemd_status, socket_check, pg_isready]): - return False + return False else: cmd = check(host) if cmd.failed is True: From de37a769e7c59824c8b3eb25f77285eb4ad36115 Mon Sep 17 00:00:00 2001 From: Sam Rose Date: Tue, 29 Apr 2025 10:42:39 -0400 Subject: [PATCH 23/42] test: reorg and print logs while waiting continue on other checks when ready --- testinfra/test_ami_nix.py | 58 +++++++++++++++++++++++++++++++++------ 1 file changed, 50 insertions(+), 8 deletions(-) diff --git a/testinfra/test_ami_nix.py b/testinfra/test_ami_nix.py index 694ddbc1b..a4fa990d4 100644 --- a/testinfra/test_ami_nix.py +++ b/testinfra/test_ami_nix.py @@ -346,7 +346,31 @@ def gzip_then_base64_encode(s: str) -> str: instance.terminate() raise TimeoutError("init.sh failed to complete within the timeout period") +<<<<<<< HEAD def is_healthy(ssh) -> bool: +======= + # Check PostgreSQL logs directory + logger.info("Checking PostgreSQL logs directory:") + result = host.run("sudo ls -la /var/log/postgresql/") + logger.info(f"log directory contents:\n{result.stdout}\n{result.stderr}") + + # Check any existing PostgreSQL logs + logger.info("Checking existing PostgreSQL logs:") + result = host.run("sudo cat /var/log/postgresql/*.log") + logger.info(f"postgresql logs:\n{result.stdout}\n{result.stderr}") + + # Check the startup log + logger.info("PostgreSQL startup log:") + result = host.run(f"sudo cat {startup_log}") + logger.info(f"startup log contents:\n{result.stdout}\n{result.stderr}") + + # Check PostgreSQL environment + logger.info("PostgreSQL environment:") + result = host.run("sudo -u postgres env | grep POSTGRES") + logger.info(f"postgres environment:\n{result.stdout}\n{result.stderr}") + + def is_healthy(host, instance_ip, ssh_identity_file) -> bool: +>>>>>>> c2631e8c (test: reorg and print logs while waiting continue on other checks when ready) health_checks = [ ("postgres", "sudo -u postgres /usr/bin/pg_isready -U postgres"), ("adminapi", f"curl -sf -k --connect-timeout 30 --max-time 60 https://localhost:8085/health -H 'apikey: {supabase_admin_key}'"), @@ -367,18 +391,36 @@ def is_healthy(ssh) -> bool: logger.error(f"{service} stderr: {cmd.stderr}") ======= if service == "postgres": - # For PostgreSQL, we need to check multiple things pg_isready = check(host) >>>>>>> 65ef0692 (test: do not unpack result) - if pg_isready.failed: - logger.error("PostgreSQL is not ready") - logger.error(f"pg_isready stdout: {pg_isready.stdout}") - logger.error(f"pg_isready stderr: {pg_isready.stderr}") + # Always read and log the PostgreSQL logs first + logger.warning("PostgreSQL status check:") + try: + # Read both .log and .csv files + log_files = [ + "/var/log/postgresql/*.log", + "/var/log/postgresql/*.csv" + ] - # Run detailed checks since we know we have a working connection - run_detailed_checks(host) - return False + for log_pattern in log_files: + log_result = host.run(f"sudo cat {log_pattern}") + if not log_result.failed: + logger.error(f"PostgreSQL logs from {log_pattern}:") + logger.error(log_result.stdout) + if log_result.stderr: + logger.error(f"Log read errors: {log_result.stderr}") + else: + logger.error(f"Failed to read PostgreSQL logs from {log_pattern}: {log_result.stderr}") + except Exception as e: + logger.error(f"Error reading PostgreSQL logs: {str(e)}") + + # Then check the status and return + if not pg_isready.failed: + continue + # Wait before next attempt + sleep(5) + return False else: cmd = check(host) if cmd.failed is True: From e85e36cf508fd5ff40083b7177d8209a9f00bbe1 Mon Sep 17 00:00:00 2001 From: Sam Rose Date: Tue, 29 Apr 2025 12:15:43 -0400 Subject: [PATCH 24/42] test: restructure checks to avoid race --- testinfra/test_ami_nix.py | 67 ++++++++++++++------------------------- 1 file changed, 23 insertions(+), 44 deletions(-) diff --git a/testinfra/test_ami_nix.py b/testinfra/test_ami_nix.py index a4fa990d4..b73264c38 100644 --- a/testinfra/test_ami_nix.py +++ b/testinfra/test_ami_nix.py @@ -346,31 +346,7 @@ def gzip_then_base64_encode(s: str) -> str: instance.terminate() raise TimeoutError("init.sh failed to complete within the timeout period") -<<<<<<< HEAD def is_healthy(ssh) -> bool: -======= - # Check PostgreSQL logs directory - logger.info("Checking PostgreSQL logs directory:") - result = host.run("sudo ls -la /var/log/postgresql/") - logger.info(f"log directory contents:\n{result.stdout}\n{result.stderr}") - - # Check any existing PostgreSQL logs - logger.info("Checking existing PostgreSQL logs:") - result = host.run("sudo cat /var/log/postgresql/*.log") - logger.info(f"postgresql logs:\n{result.stdout}\n{result.stderr}") - - # Check the startup log - logger.info("PostgreSQL startup log:") - result = host.run(f"sudo cat {startup_log}") - logger.info(f"startup log contents:\n{result.stdout}\n{result.stderr}") - - # Check PostgreSQL environment - logger.info("PostgreSQL environment:") - result = host.run("sudo -u postgres env | grep POSTGRES") - logger.info(f"postgres environment:\n{result.stdout}\n{result.stderr}") - - def is_healthy(host, instance_ip, ssh_identity_file) -> bool: ->>>>>>> c2631e8c (test: reorg and print logs while waiting continue on other checks when ready) health_checks = [ ("postgres", "sudo -u postgres /usr/bin/pg_isready -U postgres"), ("adminapi", f"curl -sf -k --connect-timeout 30 --max-time 60 https://localhost:8085/health -H 'apikey: {supabase_admin_key}'"), @@ -382,22 +358,16 @@ def is_healthy(host, instance_ip, ssh_identity_file) -> bool: for service, command in health_checks: try: -<<<<<<< HEAD result = run_ssh_command(ssh, command) if not result['succeeded']: logger.warning(f"{service} not ready") logger.error(f"{service} command failed with rc={cmd.rc}") logger.error(f"{service} stdout: {cmd.stdout}") logger.error(f"{service} stderr: {cmd.stderr}") -======= - if service == "postgres": - pg_isready = check(host) ->>>>>>> 65ef0692 (test: do not unpack result) - # Always read and log the PostgreSQL logs first + # Always read and log the PostgreSQL logs logger.warning("PostgreSQL status check:") try: - # Read both .log and .csv files log_files = [ "/var/log/postgresql/*.log", "/var/log/postgresql/*.csv" @@ -415,33 +385,42 @@ def is_healthy(host, instance_ip, ssh_identity_file) -> bool: except Exception as e: logger.error(f"Error reading PostgreSQL logs: {str(e)}") - # Then check the status and return - if not pg_isready.failed: - continue - # Wait before next attempt - sleep(5) - return False + service_status[service] = not pg_isready.failed + else: cmd = check(host) - if cmd.failed is True: + service_status[service] = not cmd.failed + if cmd.failed: logger.warning(f"{service} not ready") logger.error(f"{service} command failed with rc={cmd.rc}") logger.error(f"{service} stdout: {cmd.stdout}") logger.error(f"{service} stderr: {cmd.stderr}") - return False + except Exception as e: - logger.warning( - f"Connection failed during {service} check, attempting reconnect..." - ) + logger.warning(f"Connection failed during {service} check, attempting reconnect...") logger.error(f"Error details: {str(e)}") host = get_ssh_connection(instance_ip, ssh_identity_file) - return False + service_status[service] = False + + # Log overall status of all services + logger.info("Service health status:") + for service, healthy in service_status.items(): + logger.info(f"{service}: {'healthy' if healthy else 'unhealthy'}") + + # If any service is unhealthy, wait and return False with status + if not all(service_status.values()): + if service_status.get("postgres", False): # If postgres is healthy but others aren't + sleep(5) # Only wait if postgres is up but other services aren't + logger.warning("Some services are not healthy, will retry...") + return False, service_status - return True + logger.info("All services are healthy, proceeding to tests...") + return True, service_status while True: if is_healthy(ssh): break + logger.warning(f"Health check failed, service status: {status}") sleep(1) # Return both the SSH connection and instance IP for use in tests From 7f1c020bb2ee0ce63bccc55c86ee90f4593c0321 Mon Sep 17 00:00:00 2001 From: samrose Date: Thu, 1 May 2025 19:08:48 -0400 Subject: [PATCH 25/42] Update nix/ext/pg_cron.nix Co-authored-by: Tristan Ross --- nix/ext/pg_cron.nix | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/nix/ext/pg_cron.nix b/nix/ext/pg_cron.nix index 25121fb8c..29901606c 100644 --- a/nix/ext/pg_cron.nix +++ b/nix/ext/pg_cron.nix @@ -141,7 +141,7 @@ stdenv.mkDerivation { meta = with lib; { description = "Run Cron jobs through PostgreSQL (multi-version compatible)"; homepage = "https://github.com/citusdata/pg_cron"; - platforms = postgresql.meta.platforms; + inherit (postgresql.meta) platforms; license = licenses.postgresql; }; } From 5cc0b2c2e7ea5267840924d5a453dc077f5dc71c Mon Sep 17 00:00:00 2001 From: Sam Rose Date: Fri, 2 May 2025 14:46:21 -0400 Subject: [PATCH 26/42] chore: refactor based on review --- nix/ext/pg_cron.nix | 156 +++++++++++++++++++++----------------------- 1 file changed, 76 insertions(+), 80 deletions(-) diff --git a/nix/ext/pg_cron.nix b/nix/ext/pg_cron.nix index 29901606c..99669dd7b 100644 --- a/nix/ext/pg_cron.nix +++ b/nix/ext/pg_cron.nix @@ -1,67 +1,25 @@ -{ lib, stdenv, fetchFromGitHub, postgresql }: +{ + lib, + stdenv, + fetchFromGitHub, + postgresql, + runCommand +}: let - allVersions = { - "1.3.1" = { - rev = "v1.3.1"; - hash = "sha256-rXotNOtQNmA55ErNxGoNSKZ0pP1uxEVlDGITFHuqGG4="; - patches = [ ./pg_cron-1.3.1-pg15.patch ]; - }; - "1.4.2" = { - rev = "v1.4.2"; - hash = "sha256-P0Fd10Q1p+KrExb35G6otHpc6pD61WnMll45H2jkevM="; - }; - "1.6.4" = { - rev = "v1.6.4"; - hash = "sha256-t1DpFkPiSfdoGG2NgNT7g1lkvSooZoRoUrix6cBID40="; - }; - "1.5.2" = { - rev = "v1.5.2"; - hash = "sha256-+quVWbKJy6wXpL/zwTk5FF7sYwHA7I97WhWmPO/HSZ4="; - }; - }; - - # Simple version string that concatenates all versions with dashes - versionString = "multi-" + lib.concatStringsSep "-" (map (v: lib.replaceStrings ["."] ["-"] v) (lib.attrNames allVersions)); - - mkPgCron = pgCronVersion: { rev, hash, patches ? [] }: stdenv.mkDerivation { - pname = "pg_cron"; - version = "${pgCronVersion}-pg${lib.versions.major postgresql.version}"; + pname = "pg_cron"; - buildInputs = [ postgresql ]; - inherit patches; - - src = fetchFromGitHub { - owner = "citusdata"; - repo = "pg_cron"; - inherit rev hash; - }; + meta = with lib; { + description = "Run Cron jobs through PostgreSQL (multi-version compatible)"; + homepage = "https://github.com/citusdata/${pname}"; + inherit (postgresql.meta) platforms; + license = licenses.postgresql; + }; - buildPhase = '' - make PG_CONFIG=${postgresql}/bin/pg_config - - # Create version-specific SQL file - cp pg_cron.sql pg_cron--${pgCronVersion}.sql - - # Create versioned control file with modified module path - sed -e "/^default_version =/d" \ - -e "s|^module_pathname = .*|module_pathname = '\$libdir/pg_cron'|" \ - pg_cron.control > pg_cron--${pgCronVersion}.control - ''; - - installPhase = '' - mkdir -p $out/{lib,share/postgresql/extension,bin} - - # Install versioned library - install -Dm755 pg_cron${postgresql.dlSuffix} $out/lib/pg_cron-${pgCronVersion}${postgresql.dlSuffix} - - # Install version-specific files - install -Dm644 pg_cron--${pgCronVersion}.sql $out/share/postgresql/extension/ - install -Dm644 pg_cron--${pgCronVersion}.control $out/share/postgresql/extension/ - - # Install upgrade scripts - find . -name 'pg_cron--*--*.sql' -exec install -Dm644 {} $out/share/postgresql/extension/ \; - ''; + allVersions = { + "1.6.4" = "sha256-t1DpFkPiSfdoGG2NgNT7g1lkvSooZoRoUrix6cBID40="; + "1.5.2" = "sha256-+quVWbKJy6wXpL/zwTk5FF7sYwHA7I97WhWmPO/HSZ4="; + "1.4.2" = "sha256-P0Fd10Q1p+KrExb35G6otHpc6pD61WnMll45H2jkevM="; }; getVersions = pg: @@ -69,24 +27,70 @@ let then { "1.6.4" = allVersions."1.6.4"; } else allVersions; - allVersionsForPg = lib.mapAttrs mkPgCron (getVersions postgresql); + mkPackage = version: hash: + stdenv.mkDerivation (finalAttrs: { + inherit pname meta; + version = "${version}-pg${lib.versions.major postgresql.version}"; -in -stdenv.mkDerivation { - pname = "pg_cron-all"; - version = versionString; + src = fetchFromGitHub { + owner = "citusdata"; + repo = pname; + rev = "refs/tags/v${version}"; + inherit hash; + }; + + buildInputs = [ postgresql ]; - buildInputs = lib.attrValues allVersionsForPg; + buildPhase = '' + make PG_CONFIG=${postgresql}/bin/pg_config + + # Create version-specific SQL file + cp pg_cron.sql pg_cron--${version}.sql + + # Create versioned control file with modified module path + sed -e "/^default_version =/d" \ + -e "s|^module_pathname = .*|module_pathname = '\$libdir/pg_cron'|" \ + pg_cron.control > pg_cron--${version}.control + ''; + + installPhase = '' + mkdir -p $out/{lib,share/postgresql/extension} + + # Install versioned library + install -Dm755 pg_cron${postgresql.dlSuffix} $out/lib/pg_cron-${version}${postgresql.dlSuffix} + + # Install version-specific files + install -Dm644 pg_cron--${version}.sql $out/share/postgresql/extension/ + install -Dm644 pg_cron--${version}.control $out/share/postgresql/extension/ + + # Install upgrade scripts + find . -name 'pg_cron--*--*.sql' -exec install -Dm644 {} $out/share/postgresql/extension/ \; + ''; + }); + + packages = lib.listToAttrs ( + lib.attrValues ( + lib.mapAttrs (version: hash: lib.nameValuePair "v${version}" (mkPackage version hash)) (getVersions postgresql) + ) + ); + +in +runCommand "${pname}-all" + { + inherit pname meta; + version = "multi-" + lib.concatStringsSep "-" (map (v: lib.replaceStrings ["."] ["-"] v) (lib.attrNames (getVersions postgresql))); - dontUnpack = true; - dontConfigure = true; - dontBuild = true; + buildInputs = lib.attrValues packages; - installPhase = '' + passthru = { + inherit packages; + }; + } + '' mkdir -p $out/{lib,share/postgresql/extension,bin} # Install all versions - for drv in ${lib.concatStringsSep " " (lib.attrValues allVersionsForPg)}; do + for drv in ''${buildInputs[@]}; do ln -sv $drv/lib/* $out/lib/ cp -v --no-clobber $drv/share/postgresql/extension/* $out/share/postgresql/extension/ || true done @@ -136,12 +140,4 @@ stdenv.mkDerivation { EOF chmod +x $out/bin/switch_pg_cron_version - ''; - - meta = with lib; { - description = "Run Cron jobs through PostgreSQL (multi-version compatible)"; - homepage = "https://github.com/citusdata/pg_cron"; - inherit (postgresql.meta) platforms; - license = licenses.postgresql; - }; -} + '' From ab1f69476dd3f3c988308ba4d8eb0e9c2c110b1f Mon Sep 17 00:00:00 2001 From: Sam Rose Date: Mon, 5 May 2025 18:46:43 -0400 Subject: [PATCH 27/42] fix: fixing broken elements of script --- flake.nix | 241 ++++++++++++++++---------------------- nix/ext/pg_cron.nix | 167 +++++++++++++------------- testinfra/test_ami_nix.py | 69 ++++++----- 3 files changed, 227 insertions(+), 250 deletions(-) diff --git a/flake.nix b/flake.nix index d49c9e3fa..6302a6894 100644 --- a/flake.nix +++ b/flake.nix @@ -637,157 +637,124 @@ --prefix PATH : ${pkgs.nushell}/bin ''; # Script to run the AMI build and tests locally - build-test-ami = pkgs.runCommand "build-test-ami" - { - buildInputs = with pkgs; [ - packer - awscli2 - yq - jq - openssl - git - coreutils - aws-vault - ]; - } '' - mkdir -p $out/bin - cat > $out/bin/build-test-ami << 'EOL' - #!/usr/bin/env bash - set -euo pipefail + build-test-ami = pkgs.writeScriptBin "build-test-ami" '' + #!/usr/bin/env bash + set -euo pipefail - show_help() { - cat << EOF - Usage: build-test-ami [--help] + show_help() { + cat << EOF + Usage: build-test-ami [--help] - Build AMI images for PostgreSQL testing. + Build AMI images for PostgreSQL testing. - This script will: - 1. Check for required tools and AWS authentication - 2. Build two AMI stages using Packer - 3. Clean up any temporary instances - 4. Output the final AMI name for use with run-testinfra + This script will: + 1. Check for required tools and AWS authentication + 2. Build two AMI stages using Packer + 3. Clean up any temporary instances + 4. Output the final AMI name for use with run-testinfra - Arguments: - postgres-version PostgreSQL major version to build (required) + Arguments: + postgres-version PostgreSQL major version to build (required) - Options: - --help Show this help message and exit + Options: + --help Show this help message and exit - Requirements: - - AWS Vault profile must be set in AWS_VAULT environment variable - - Packer, AWS CLI, yq, jq, and OpenSSL must be installed - - Must be run from a git repository + Requirements: + - AWS Vault profile must be set in AWS_VAULT environment variable + - Packer, AWS CLI, yq, jq, and OpenSSL must be installed + - Must be run from a git repository - Example: - aws-vault exec -- nix run .#build-test-ami 15 - EOF - } - - # Handle help flag - if [[ "$#" -gt 0 && "$1" == "--help" ]]; then - show_help - exit 0 - fi + Example: + aws-vault exec -- nix run .#build-test-ami 15 + EOF + } - export PATH="${pkgs.lib.makeBinPath (with pkgs; [ - packer - awscli2 - yq - jq - openssl - git - coreutils - aws-vault - ])}:$PATH" + # Handle help flag + if [[ "$#" -gt 0 && "$1" == "--help" ]]; then + show_help + exit 0 + fi - # Check for required tools - for cmd in packer aws-vault yq jq openssl; do - if ! command -v $cmd &> /dev/null; then - echo "Error: $cmd is required but not found" - exit 1 - fi - done + export PATH="${pkgs.lib.makeBinPath (with pkgs; [ + packer + awscli2 + yq + jq + openssl + git + coreutils + aws-vault + ])}:$PATH" - # Check AWS Vault profile - if [ -z "''${AWS_VAULT:-}" ]; then - echo "Error: AWS_VAULT environment variable must be set with the profile name" - echo "Usage: aws-vault exec -- nix run .#build-test-ami " + # Check for required tools + for cmd in packer aws-vault yq jq openssl; do + if ! command -v $cmd &> /dev/null; then + echo "Error: $cmd is required but not found" exit 1 fi + done - # Set values - REGION="ap-southeast-1" - POSTGRES_VERSION="$1" - RANDOM_STRING=$(openssl rand -hex 8) - GIT_SHA=$(git rev-parse HEAD) - RUN_ID=$(date +%s) - - # Generate common-nix.vars.pkr.hcl - PG_VERSION=$(yq -r ".postgres_release[\"postgres$POSTGRES_VERSION\"]" ansible/vars.yml) - echo "postgres-version = \"$PG_VERSION\"" > common-nix.vars.pkr.hcl - - # Build AMI Stage 1 - packer init amazon-arm64-nix.pkr.hcl - packer build \ - -var "git-head-version=$GIT_SHA" \ - -var "packer-execution-id=$RUN_ID" \ - -var-file="development-arm.vars.pkr.hcl" \ - -var-file="common-nix.vars.pkr.hcl" \ - -var "ansible_arguments=" \ - -var "postgres-version=$RANDOM_STRING" \ - -var "region=$REGION" \ - -var 'ami_regions=["'"$REGION"'"]' \ - -var "force-deregister=true" \ - -var "ansible_arguments=-e postgresql_major=$POSTGRES_VERSION" \ - amazon-arm64-nix.pkr.hcl - - # Build AMI Stage 2 - packer init stage2-nix-psql.pkr.hcl - packer build \ - -var "git-head-version=$GIT_SHA" \ - -var "packer-execution-id=$RUN_ID" \ - -var "postgres_major_version=$POSTGRES_VERSION" \ - -var-file="development-arm.vars.pkr.hcl" \ - -var-file="common-nix.vars.pkr.hcl" \ - -var "postgres-version=$RANDOM_STRING" \ - -var "region=$REGION" \ - -var 'ami_regions=["'"$REGION"'"]' \ - -var "force-deregister=true" \ - -var "git_sha=$GIT_SHA" \ - stage2-nix-psql.pkr.hcl - - # Cleanup instances from AMI builds - cleanup_instances() { - echo "Terminating EC2 instances with tag testinfra-run-id=$RUN_ID..." - aws ec2 --region $REGION describe-instances \ - --filters "Name=tag:testinfra-run-id,Values=$RUN_ID" \ - --query "Reservations[].Instances[].InstanceId" \ - --output text | xargs -r aws ec2 terminate-instances \ - --region $REGION --instance-ids || true - } - - # Set up traps for various signals to ensure cleanup - trap cleanup_instances EXIT HUP INT QUIT TERM - - # Create and activate virtual environment - VENV_DIR=$(mktemp -d) - trap 'rm -rf "$VENV_DIR"' EXIT HUP INT QUIT TERM - python3 -m venv "$VENV_DIR" - source "$VENV_DIR/bin/activate" - - # Install required Python packages - echo "Installing required Python packages..." - pip install boto3 boto3-stubs[essential] docker ec2instanceconnectcli pytest paramiko requests - - # Run the tests with aws-vault - echo "Running tests for AMI: $RANDOM_STRING using AWS Vault profile: $AWS_VAULT_PROFILE" - aws-vault exec $AWS_VAULT_PROFILE -- pytest -vv -s testinfra/test_ami_nix.py + # Check AWS Vault profile + if [ -z "''${AWS_VAULT:-}" ]; then + echo "Error: AWS_VAULT environment variable must be set with the profile name" + echo "Usage: aws-vault exec -- nix run .#build-test-ami " + exit 1 + fi - # Deactivate virtual environment (cleanup is handled by trap) - deactivate - EOL - chmod +x $out/bin/build-test-ami - ''; + # Set values + REGION="ap-southeast-1" + POSTGRES_VERSION="$1" + RANDOM_STRING=$(openssl rand -hex 8) + GIT_SHA=$(git rev-parse HEAD) + RUN_ID=$(date +%s) + + # Generate common-nix.vars.pkr.hcl + PG_VERSION=$(yq -r ".postgres_release[\"postgres$POSTGRES_VERSION\"]" ansible/vars.yml) + echo "postgres-version = \"$PG_VERSION\"" > common-nix.vars.pkr.hcl + + # Build AMI Stage 1 + packer init amazon-arm64-nix.pkr.hcl + packer build \ + -var "git-head-version=$GIT_SHA" \ + -var "packer-execution-id=$RUN_ID" \ + -var-file="development-arm.vars.pkr.hcl" \ + -var-file="common-nix.vars.pkr.hcl" \ + -var "ansible_arguments=" \ + -var "postgres-version=$RANDOM_STRING" \ + -var "region=$REGION" \ + -var 'ami_regions=["'"$REGION"'"]' \ + -var "force-deregister=true" \ + -var "ansible_arguments=-e postgresql_major=$POSTGRES_VERSION" \ + amazon-arm64-nix.pkr.hcl + + # Build AMI Stage 2 + packer init stage2-nix-psql.pkr.hcl + packer build \ + -var "git-head-version=$GIT_SHA" \ + -var "packer-execution-id=$RUN_ID" \ + -var "postgres_major_version=$POSTGRES_VERSION" \ + -var-file="development-arm.vars.pkr.hcl" \ + -var-file="common-nix.vars.pkr.hcl" \ + -var "postgres-version=$RANDOM_STRING" \ + -var "region=$REGION" \ + -var 'ami_regions=["'"$REGION"'"]' \ + -var "force-deregister=true" \ + -var "git_sha=$GIT_SHA" \ + stage2-nix-psql.pkr.hcl + + # Cleanup instances from AMI builds + cleanup_instances() { + echo "Terminating EC2 instances with tag testinfra-run-id=$RUN_ID..." + aws ec2 --region $REGION describe-instances \ + --filters "Name=tag:testinfra-run-id,Values=$RUN_ID" \ + --query "Reservations[].Instances[].InstanceId" \ + --output text | xargs -r aws ec2 terminate-instances \ + --region $REGION --instance-ids || true + } + + # Set up traps for various signals to ensure cleanup + trap cleanup_instances EXIT HUP INT QUIT TERM + ''; run-testinfra = pkgs.runCommand "run-testinfra" { diff --git a/nix/ext/pg_cron.nix b/nix/ext/pg_cron.nix index 99669dd7b..3f438931e 100644 --- a/nix/ext/pg_cron.nix +++ b/nix/ext/pg_cron.nix @@ -1,25 +1,67 @@ -{ - lib, - stdenv, - fetchFromGitHub, - postgresql, - runCommand -}: +{ lib, stdenv, fetchFromGitHub, postgresql }: let - pname = "pg_cron"; - - meta = with lib; { - description = "Run Cron jobs through PostgreSQL (multi-version compatible)"; - homepage = "https://github.com/citusdata/${pname}"; - inherit (postgresql.meta) platforms; - license = licenses.postgresql; + allVersions = { + "1.3.1" = { + rev = "v1.3.1"; + hash = "sha256-rXotNOtQNmA55ErNxGoNSKZ0pP1uxEVlDGITFHuqGG4="; + patches = [ ./pg_cron-1.3.1-pg15.patch ]; + }; + "1.4.2" = { + rev = "v1.4.2"; + hash = "sha256-P0Fd10Q1p+KrExb35G6otHpc6pD61WnMll45H2jkevM="; + }; + "1.6.4" = { + rev = "v1.6.4"; + hash = "sha256-t1DpFkPiSfdoGG2NgNT7g1lkvSooZoRoUrix6cBID40="; + }; + "1.5.2" = { + rev = "v1.5.2"; + hash = "sha256-+quVWbKJy6wXpL/zwTk5FF7sYwHA7I97WhWmPO/HSZ4="; + }; }; - allVersions = { - "1.6.4" = "sha256-t1DpFkPiSfdoGG2NgNT7g1lkvSooZoRoUrix6cBID40="; - "1.5.2" = "sha256-+quVWbKJy6wXpL/zwTk5FF7sYwHA7I97WhWmPO/HSZ4="; - "1.4.2" = "sha256-P0Fd10Q1p+KrExb35G6otHpc6pD61WnMll45H2jkevM="; + # Simple version string that concatenates all versions with dashes + versionString = "multi-" + lib.concatStringsSep "-" (map (v: lib.replaceStrings ["."] ["-"] v) (lib.attrNames allVersions)); + + mkPgCron = pgCronVersion: { rev, hash, patches ? [] }: stdenv.mkDerivation { + pname = "pg_cron"; + version = "${pgCronVersion}-pg${lib.versions.major postgresql.version}"; + + buildInputs = [ postgresql ]; + inherit patches; + + src = fetchFromGitHub { + owner = "citusdata"; + repo = "pg_cron"; + inherit rev hash; + }; + + buildPhase = '' + make PG_CONFIG=${postgresql}/bin/pg_config + + # Create version-specific SQL file + cp pg_cron.sql pg_cron--${pgCronVersion}.sql + + # Create versioned control file with modified module path + sed -e "/^default_version =/d" \ + -e "s|^module_pathname = .*|module_pathname = '\$libdir/pg_cron'|" \ + pg_cron.control > pg_cron--${pgCronVersion}.control + ''; + + installPhase = '' + mkdir -p $out/{lib,share/postgresql/extension,bin} + + # Install versioned library + install -Dm755 pg_cron${postgresql.dlSuffix} $out/lib/pg_cron-${pgCronVersion}${postgresql.dlSuffix} + + # Install version-specific files + install -Dm644 pg_cron--${pgCronVersion}.sql $out/share/postgresql/extension/ + install -Dm644 pg_cron--${pgCronVersion}.control $out/share/postgresql/extension/ + + # Install upgrade scripts + find . -name 'pg_cron--*--*.sql' -exec install -Dm644 {} $out/share/postgresql/extension/ \; + ''; }; getVersions = pg: @@ -27,70 +69,24 @@ let then { "1.6.4" = allVersions."1.6.4"; } else allVersions; - mkPackage = version: hash: - stdenv.mkDerivation (finalAttrs: { - inherit pname meta; - version = "${version}-pg${lib.versions.major postgresql.version}"; - - src = fetchFromGitHub { - owner = "citusdata"; - repo = pname; - rev = "refs/tags/v${version}"; - inherit hash; - }; - - buildInputs = [ postgresql ]; - - buildPhase = '' - make PG_CONFIG=${postgresql}/bin/pg_config - - # Create version-specific SQL file - cp pg_cron.sql pg_cron--${version}.sql - - # Create versioned control file with modified module path - sed -e "/^default_version =/d" \ - -e "s|^module_pathname = .*|module_pathname = '\$libdir/pg_cron'|" \ - pg_cron.control > pg_cron--${version}.control - ''; - - installPhase = '' - mkdir -p $out/{lib,share/postgresql/extension} - - # Install versioned library - install -Dm755 pg_cron${postgresql.dlSuffix} $out/lib/pg_cron-${version}${postgresql.dlSuffix} - - # Install version-specific files - install -Dm644 pg_cron--${version}.sql $out/share/postgresql/extension/ - install -Dm644 pg_cron--${version}.control $out/share/postgresql/extension/ - - # Install upgrade scripts - find . -name 'pg_cron--*--*.sql' -exec install -Dm644 {} $out/share/postgresql/extension/ \; - ''; - }); - - packages = lib.listToAttrs ( - lib.attrValues ( - lib.mapAttrs (version: hash: lib.nameValuePair "v${version}" (mkPackage version hash)) (getVersions postgresql) - ) - ); + allVersionsForPg = lib.mapAttrs mkPgCron (getVersions postgresql); in -runCommand "${pname}-all" - { - inherit pname meta; - version = "multi-" + lib.concatStringsSep "-" (map (v: lib.replaceStrings ["."] ["-"] v) (lib.attrNames (getVersions postgresql))); +stdenv.mkDerivation { + pname = "pg_cron-all"; + version = versionString; - buildInputs = lib.attrValues packages; + buildInputs = lib.attrValues allVersionsForPg; - passthru = { - inherit packages; - }; - } - '' + dontUnpack = true; + dontConfigure = true; + dontBuild = true; + + installPhase = '' mkdir -p $out/{lib,share/postgresql/extension,bin} # Install all versions - for drv in ''${buildInputs[@]}; do + for drv in ${lib.concatStringsSep " " (lib.attrValues allVersionsForPg)}; do ln -sv $drv/lib/* $out/lib/ cp -v --no-clobber $drv/share/postgresql/extension/* $out/share/postgresql/extension/ || true done @@ -120,24 +116,25 @@ runCommand "${pname}-all" VERSION=$1 LIB_DIR=$(dirname "$0")/../lib - # Use platform-specific extension - if [ "$(uname)" = "Darwin" ]; then - EXT=".dylib" - else - EXT=".so" - fi - # Check if version exists - if [ ! -f "$LIB_DIR/pg_cron-$VERSION$EXT" ]; then + if [ ! -f "$LIB_DIR/pg_cron-$VERSION${postgresql.dlSuffix}" ]; then echo "Error: Version $VERSION not found" exit 1 fi # Update library symlink - ln -sfnv "pg_cron-$VERSION$EXT" "$LIB_DIR/pg_cron$EXT" + ln -sfnv "pg_cron-$VERSION${postgresql.dlSuffix}" "$LIB_DIR/pg_cron${postgresql.dlSuffix}" echo "Successfully switched pg_cron to version $VERSION" EOF chmod +x $out/bin/switch_pg_cron_version - '' + ''; + + meta = with lib; { + description = "Run Cron jobs through PostgreSQL (multi-version compatible)"; + homepage = "https://github.com/citusdata/pg_cron"; + platforms = postgresql.meta.platforms; + license = licenses.postgresql; + }; +} diff --git a/testinfra/test_ami_nix.py b/testinfra/test_ami_nix.py index b73264c38..51278dfa4 100644 --- a/testinfra/test_ami_nix.py +++ b/testinfra/test_ami_nix.py @@ -162,6 +162,7 @@ "init_database_only": false }} """ +pg_cron_json = '{"pg_cron": "1.3.1"}' logger = logging.getLogger("ami-tests") handler = logging.StreamHandler() @@ -272,11 +273,11 @@ def gzip_then_base64_encode(s: str) -> str: - {{path: /etc/gotrue.env, content: {gzip_then_base64_encode(gotrue_env_content)}, permissions: '0664', encoding: gz+b64}} - {{path: /etc/wal-g/config.json, content: {gzip_then_base64_encode(walg_config_json_content)}, permissions: '0664', owner: 'wal-g:wal-g', encoding: gz+b64}} - {{path: /tmp/init.json, content: {gzip_then_base64_encode(init_json_content)}, permissions: '0600', encoding: gz+b64}} + - {{path: /root/pg_extensions.json, content: {gzip_then_base64_encode('{"pg_cron": "1.3.1"}')}, permissions: '0644', encoding: gz+b64}} runcmd: - 'sudo echo \"pgbouncer\" \"postgres\" >> /etc/pgbouncer/userlist.txt' - - 'cd /tmp && aws s3 cp --region ap-southeast-1 s3://init-scripts-staging/project/init.sh .' - - 'if [ "$POSTGRES_MAJOR_VERSION" = "15" ]; then echo \'{{"pg_cron":"1.3.1"}}\' | sudo tee /root/pg_extensions.json && sudo chmod 644 /root/pg_extensions.json; fi' - - 'bash init.sh "staging"' + - 'cd /tmp && aws s3 cp --region ap-southeast-1 s3://init-scripts-staging/project/init.sh . 2>&1 | tee /var/log/init-download.log' + - 'bash init.sh "staging" 2>&1 | tee /var/log/init-script.log' - 'touch /var/lib/init-complete' - 'rm -rf /tmp/*' """, @@ -343,6 +344,25 @@ def gzip_then_base64_encode(s: str) -> str: if attempt >= max_attempts: logger.error("init.sh failed to complete within the timeout period") + + # Check init script logs before terminating + try: + download_log = run_ssh_command(ssh, "sudo cat /var/log/init-download.log") + if download_log['succeeded']: + logger.error("Init script download log:") + logger.error(download_log['stdout']) + else: + logger.error(f"Failed to read download log: {download_log['stderr']}") + + init_log = run_ssh_command(ssh, "sudo cat /var/log/init-script.log") + if init_log['succeeded']: + logger.error("Init script execution log:") + logger.error(init_log['stdout']) + else: + logger.error(f"Failed to read init script log: {init_log['stderr']}") + except Exception as e: + logger.error(f"Error reading logs: {str(e)}") + instance.terminate() raise TimeoutError("init.sh failed to complete within the timeout period") @@ -356,15 +376,16 @@ def is_healthy(ssh) -> bool: ("fail2ban", "sudo fail2ban-client status"), ] + service_status = {} for service, command in health_checks: try: result = run_ssh_command(ssh, command) if not result['succeeded']: logger.warning(f"{service} not ready") - logger.error(f"{service} command failed with rc={cmd.rc}") - logger.error(f"{service} stdout: {cmd.stdout}") - logger.error(f"{service} stderr: {cmd.stderr}") - + logger.error(f"{service} command failed") + logger.error(f"{service} stdout: {result['stdout']}") + logger.error(f"{service} stderr: {result['stderr']}") + # Always read and log the PostgreSQL logs logger.warning("PostgreSQL status check:") try: @@ -372,34 +393,26 @@ def is_healthy(ssh) -> bool: "/var/log/postgresql/*.log", "/var/log/postgresql/*.csv" ] - + for log_pattern in log_files: - log_result = host.run(f"sudo cat {log_pattern}") - if not log_result.failed: + log_result = run_ssh_command(ssh, f"sudo cat {log_pattern}") + if log_result['succeeded']: logger.error(f"PostgreSQL logs from {log_pattern}:") - logger.error(log_result.stdout) - if log_result.stderr: - logger.error(f"Log read errors: {log_result.stderr}") + logger.error(log_result['stdout']) + if log_result['stderr']: + logger.error(f"Log read errors: {log_result['stderr']}") else: - logger.error(f"Failed to read PostgreSQL logs from {log_pattern}: {log_result.stderr}") + logger.error(f"Failed to read PostgreSQL logs from {log_pattern}: {log_result['stderr']}") except Exception as e: logger.error(f"Error reading PostgreSQL logs: {str(e)}") - service_status[service] = not pg_isready.failed - + service_status[service] = False else: - cmd = check(host) - service_status[service] = not cmd.failed - if cmd.failed: - logger.warning(f"{service} not ready") - logger.error(f"{service} command failed with rc={cmd.rc}") - logger.error(f"{service} stdout: {cmd.stdout}") - logger.error(f"{service} stderr: {cmd.stderr}") + service_status[service] = True except Exception as e: logger.warning(f"Connection failed during {service} check, attempting reconnect...") logger.error(f"Error details: {str(e)}") - host = get_ssh_connection(instance_ip, ssh_identity_file) service_status[service] = False # Log overall status of all services @@ -412,16 +425,16 @@ def is_healthy(ssh) -> bool: if service_status.get("postgres", False): # If postgres is healthy but others aren't sleep(5) # Only wait if postgres is up but other services aren't logger.warning("Some services are not healthy, will retry...") - return False, service_status + return False logger.info("All services are healthy, proceeding to tests...") - return True, service_status + return True while True: if is_healthy(ssh): break - logger.warning(f"Health check failed, service status: {status}") - sleep(1) + logger.warning("Health check failed, retrying...") + sleep(5) # Return both the SSH connection and instance IP for use in tests yield { From ad6bffb710dad7b0c439850f70d41fec6ed84221 Mon Sep 17 00:00:00 2001 From: Sam Rose Date: Tue, 13 May 2025 18:09:03 -0400 Subject: [PATCH 28/42] test: new flag and other fixes to testing --- flake.nix | 10 ++++- nix/ext/pg_cron.nix | 5 +++ testinfra/test_ami_nix.py | 85 +++++++++++++++++++++++++++++---------- 3 files changed, 78 insertions(+), 22 deletions(-) diff --git a/flake.nix b/flake.nix index 6302a6894..3bf24afbd 100644 --- a/flake.nix +++ b/flake.nix @@ -772,7 +772,7 @@ show_help() { cat << EOF - Usage: run-testinfra --ami-name NAME [--aws-vault-profile PROFILE] + Usage: run-testinfra --ami-name NAME [--aws-vault-profile PROFILE] [--postgres-version VERSION] Run the testinfra tests locally against a specific AMI. @@ -789,6 +789,7 @@ Optional flags: --aws-vault-profile PROFILE AWS Vault profile to use (default: staging) + --postgres-version VERSION PostgreSQL major version to test (default: 15) --help Show this help message and exit Requirements: @@ -799,12 +800,14 @@ Examples: run-testinfra --ami-name supabase-postgres-abc123 run-testinfra --ami-name supabase-postgres-abc123 --aws-vault-profile production + run-testinfra --ami-name supabase-postgres-abc123 --postgres-version 15 EOF } # Default values AWS_VAULT_PROFILE="staging" AMI_NAME="" + POSTGRES_MAJOR_VERSION="15" # Parse arguments while [[ $# -gt 0 ]]; do @@ -817,6 +820,10 @@ AMI_NAME="$2" shift 2 ;; + --postgres-version) + POSTGRES_MAJOR_VERSION="$2" + shift 2 + ;; --help) show_help exit 0 @@ -847,6 +854,7 @@ export AWS_DEFAULT_REGION="ap-southeast-1" export AMI_NAME="$AMI_NAME" # Export AMI_NAME for pytest export RUN_ID="local-$(date +%s)" # Generate a unique RUN_ID + export POSTGRES_MAJOR_VERSION="$POSTGRES_MAJOR_VERSION" # Export PostgreSQL version for pytest # Function to terminate EC2 instances terminate_instances() { diff --git a/nix/ext/pg_cron.nix b/nix/ext/pg_cron.nix index 3f438931e..612fa3465 100644 --- a/nix/ext/pg_cron.nix +++ b/nix/ext/pg_cron.nix @@ -115,6 +115,7 @@ stdenv.mkDerivation { VERSION=$1 LIB_DIR=$(dirname "$0")/../lib + EXTENSION_DIR=$(dirname "$0")/../share/postgresql/extension # Check if version exists if [ ! -f "$LIB_DIR/pg_cron-$VERSION${postgresql.dlSuffix}" ]; then @@ -125,6 +126,10 @@ stdenv.mkDerivation { # Update library symlink ln -sfnv "pg_cron-$VERSION${postgresql.dlSuffix}" "$LIB_DIR/pg_cron${postgresql.dlSuffix}" + # Update control file + echo "default_version = '$VERSION'" > "$EXTENSION_DIR/pg_cron.control" + cat "$EXTENSION_DIR/pg_cron--$VERSION.control" >> "$EXTENSION_DIR/pg_cron.control" + echo "Successfully switched pg_cron to version $VERSION" EOF diff --git a/testinfra/test_ami_nix.py b/testinfra/test_ami_nix.py index 51278dfa4..cde1207e6 100644 --- a/testinfra/test_ami_nix.py +++ b/testinfra/test_ami_nix.py @@ -579,24 +579,67 @@ def test_pg_cron_extension(host): if postgres_version != "15": pytest.skip(f"Skipping pg_cron test for PostgreSQL version {postgres_version}") - # Connect as supabase_admin and create the extension - with host.sudo("postgres"): - result = host.run('psql -U supabase_admin -d postgres -c "CREATE EXTENSION pg_cron WITH SCHEMA pg_catalog VERSION \'1.3.1\';"') - assert result.rc == 0, f"Failed to create pg_cron extension: {result.stderr}" - - # Create test table - result = host.run('psql -U supabase_admin -d postgres -c "CREATE TABLE cron_test_log (id SERIAL PRIMARY KEY, message TEXT, log_time TIMESTAMP WITH TIME ZONE DEFAULT CURRENT_TIMESTAMP);"') - assert result.rc == 0, f"Failed to create test table: {result.stderr}" - - # Schedule a job - result = host.run('psql -U supabase_admin -d postgres -c "SELECT cron.schedule(\'* * * * *\', \'INSERT INTO cron_test_log (message) VALUES (\\\'Hello from pg_cron!\\\');\');"') - assert result.rc == 0, f"Failed to schedule job: {result.stderr}" - assert "1" in result.stdout, "Expected schedule ID 1" - - # Verify job is scheduled - result = host.run('psql -U supabase_admin -d postgres -c "SELECT * FROM cron.job;"') - assert result.rc == 0, f"Failed to query cron.job: {result.stderr}" - assert "* * * * *" in result.stdout, "Expected cron schedule pattern" - assert "INSERT INTO cron_test_log" in result.stdout, "Expected cron command" - assert "postgres" in result.stdout, "Expected postgres username" - assert "postgres" in result.stdout, "Expected postgres database" + # Use the SSH connection to run commands as postgres user + ssh = host['ssh'] + + # Check prestart script + result = run_ssh_command(ssh, 'ls -l /etc/postgresql/prestart.d/postgres_prestart.sh') + assert result['succeeded'], f"Failed to find prestart script: {result['stderr']}" + logger.info(f"Prestart script details: {result['stdout']}") + + # Check if extensions file exists + result = run_ssh_command(ssh, 'cat /root/pg_extensions.json') + assert result['succeeded'], f"Failed to read extensions file: {result['stderr']}" + logger.info(f"Extensions file contents: {result['stdout']}") + + # Check if version switcher exists + result = run_ssh_command(ssh, 'ls -l /var/lib/postgresql/.nix-profile/bin/switch_pg_cron_version') + assert result['succeeded'], f"Failed to find version switcher: {result['stderr']}" + logger.info(f"Version switcher details: {result['stdout']}") + + # Create the extension + result = run_ssh_command(ssh, 'sudo -u postgres psql -d postgres -c "CREATE EXTENSION pg_cron WITH SCHEMA pg_catalog VERSION \'1.3.1\';"') + assert result['succeeded'], f"Failed to create pg_cron extension: {result['stderr']}" + + # Verify the extension version + result = run_ssh_command(ssh, 'sudo -u postgres psql -d postgres -c "SELECT extversion FROM pg_extension WHERE extname = \'pg_cron\';"') + assert result['succeeded'], f"Failed to get pg_cron version: {result['stderr']}" + assert "1.3.1" in result['stdout'], f"Expected pg_cron version 1.3.1, but got: {result['stdout']}" + logger.info(f"pg_cron version: {result['stdout']}") + + # Check the actual function definition + result = run_ssh_command(ssh, 'sudo -u postgres psql -d postgres -c "\sf cron.schedule"') + assert result['succeeded'], f"Failed to get cron.schedule function definition: {result['stderr']}" + logger.info(f"cron.schedule function definition: {result['stdout']}") + + # Check extension details + result = run_ssh_command(ssh, 'sudo -u postgres psql -d postgres -c "SELECT * FROM pg_extension WHERE extname = \'pg_cron\';"') + assert result['succeeded'], f"Failed to get pg_cron extension details: {result['stderr']}" + logger.info(f"pg_cron extension details: {result['stdout']}") + + # Create test table + result = run_ssh_command(ssh, 'sudo -u postgres psql -d postgres -c "CREATE TABLE cron_test_log (id SERIAL PRIMARY KEY, message TEXT, log_time TIMESTAMP WITH TIME ZONE DEFAULT CURRENT_TIMESTAMP);"') + assert result['succeeded'], f"Failed to create test table: {result['stderr']}" + + # Check the schema of cron.job table + result = run_ssh_command(ssh, 'sudo -u postgres psql -d postgres -c "\d cron.job"') + assert result['succeeded'], f"Failed to get cron.job schema: {result['stderr']}" + logger.info(f"cron.job schema: {result['stdout']}") + + # Check available cron functions + result = run_ssh_command(ssh, 'sudo -u postgres psql -d postgres -c "\df cron.*"') + assert result['succeeded'], f"Failed to get cron functions: {result['stderr']}" + logger.info(f"Available cron functions: {result['stdout']}") + + # Schedule a job using the basic schedule function + result = run_ssh_command(ssh, '''sudo -u postgres psql -d postgres -c "SELECT cron.schedule('* * * * *'::text, 'INSERT INTO cron_test_log (message) VALUES (''Hello from pg_cron!'');'::text);"''') + assert result['succeeded'], f"Failed to schedule job: {result['stderr']}" + assert "1" in result['stdout'], "Expected schedule ID 1" + + # Verify job is scheduled + result = run_ssh_command(ssh, 'sudo -u postgres psql -d postgres -c "SELECT * FROM cron.job;"') + assert result['succeeded'], f"Failed to query cron.job: {result['stderr']}" + assert "* * * * *" in result['stdout'], "Expected cron schedule pattern" + assert "INSERT INTO cron_test_log" in result['stdout'], "Expected cron command" + assert "postgres" in result['stdout'], "Expected postgres username" + assert "postgres" in result['stdout'], "Expected postgres database" From 8f1b895a12afa4acb414a30ff943f70e921eab71 Mon Sep 17 00:00:00 2001 From: Sam Rose Date: Mon, 19 May 2025 16:49:47 -0400 Subject: [PATCH 29/42] test: pushing commit to register for nix build and testing --- ansible/files/postgres_prestart.sh.j2 | 38 +++++++++-- nix/ext/pg_cron.nix | 5 +- testinfra/test_ami_nix.py | 97 +++++++++++++++++++++++++-- 3 files changed, 126 insertions(+), 14 deletions(-) diff --git a/ansible/files/postgres_prestart.sh.j2 b/ansible/files/postgres_prestart.sh.j2 index 40e8debd7..ddc5d58e8 100644 --- a/ansible/files/postgres_prestart.sh.j2 +++ b/ansible/files/postgres_prestart.sh.j2 @@ -1,5 +1,12 @@ #!/bin/bash +set -e # Exit on error +set -x # Print commands + +log() { + echo "[$(date '+%Y-%m-%d %H:%M:%S')] $1" +} + check_orioledb_enabled() { local pg_conf="/etc/postgresql/postgresql.conf" if [ ! -f "$pg_conf" ]; then @@ -29,7 +36,11 @@ update_orioledb_buffers() { check_extensions_file() { local extensions_file="/root/pg_extensions.json" if [ ! -f "$extensions_file" ]; then - echo "extensions: No extensions file found, skipping extensions versions check" + log "extensions: No extensions file found, skipping extensions versions check" + return 1 + fi + if [ ! -r "$extensions_file" ]; then + log "extensions: Cannot read extensions file" return 1 fi return 0 @@ -40,18 +51,20 @@ get_pg_cron_version() { return fi + # Run jq as postgres user since it's in their nix profile local version version=$(sudo -u postgres /var/lib/postgresql/.nix-profile/bin/jq -r '.pg_cron // empty' "/root/pg_extensions.json") if [ -z "$version" ]; then - echo "pg_cron: Not specified in extensions file" + log "pg_cron: Not specified in extensions file" return fi if ! [[ "$version" =~ ^[0-9]+\.[0-9]+\.[0-9]+$ ]]; then - echo "pg_cron: Invalid version format: $version" + log "pg_cron: Invalid version format: $version" return fi + log "pg_cron: Found version $version in extensions file" echo "$version" } @@ -60,13 +73,20 @@ switch_pg_cron_version() { local switch_script="/var/lib/postgresql/.nix-profile/bin/switch_pg_cron_version" if [ ! -x "$switch_script" ]; then - echo "pg_cron: No version switch script available" - return + log "pg_cron: No version switch script available at $switch_script" + return 1 fi - echo "pg_cron: Switching to version $version" + log "pg_cron: Switching to version $version" + # Run as postgres user since we're modifying their nix profile sudo -u postgres "$switch_script" "$version" - echo "pg_cron: Version switch completed" + local exit_code=$? + if [ $exit_code -eq 0 ]; then + log "pg_cron: Version switch completed successfully" + else + log "pg_cron: Version switch failed with exit code $exit_code" + fi + return $exit_code } handle_pg_cron_version() { @@ -78,6 +98,8 @@ handle_pg_cron_version() { } main() { + log "Starting prestart script" + # 1. pg_cron version handling handle_pg_cron_version @@ -90,6 +112,8 @@ main() { if [ ! -z "$shared_buffers_value" ]; then update_orioledb_buffers "$shared_buffers_value" fi + + log "Prestart script completed" } # Initial locale setup diff --git a/nix/ext/pg_cron.nix b/nix/ext/pg_cron.nix index 612fa3465..2233df41f 100644 --- a/nix/ext/pg_cron.nix +++ b/nix/ext/pg_cron.nix @@ -114,8 +114,9 @@ stdenv.mkDerivation { fi VERSION=$1 - LIB_DIR=$(dirname "$0")/../lib - EXTENSION_DIR=$(dirname "$0")/../share/postgresql/extension + NIX_PROFILE="/var/lib/postgresql/.nix-profile" + LIB_DIR="$NIX_PROFILE/lib" + EXTENSION_DIR="$NIX_PROFILE/share/postgresql/extension" # Check if version exists if [ ! -f "$LIB_DIR/pg_cron-$VERSION${postgresql.dlSuffix}" ]; then diff --git a/testinfra/test_ami_nix.py b/testinfra/test_ami_nix.py index cde1207e6..10ae5f356 100644 --- a/testinfra/test_ami_nix.py +++ b/testinfra/test_ami_nix.py @@ -338,6 +338,16 @@ def gzip_then_base64_encode(s: str) -> str: except Exception as e: logger.warning(f"Error checking init.sh status: {str(e)}") + # Capture logs during initialization + result = run_ssh_command(ssh, 'sudo journalctl -u postgresql@main -n 100 --no-pager') + logger.info(f"PostgreSQL service logs during initialization:\n{result['stdout']}") + + result = run_ssh_command(ssh, 'sudo journalctl -u postgresql@main -n 100 | grep -i "prestart"') + logger.info(f"Prestart script execution logs during initialization:\n{result['stdout']}") + + result = run_ssh_command(ssh, 'sudo journalctl -u postgresql@main -n 100 | grep -i "switch_pg_cron_version"') + logger.info(f"Version switcher execution logs during initialization:\n{result['stdout']}") + attempt += 1 logger.warning(f"Waiting for init.sh to complete (attempt {attempt}/{max_attempts})") sleep(5) @@ -376,6 +386,18 @@ def is_healthy(ssh) -> bool: ("fail2ban", "sudo fail2ban-client status"), ] + # Capture systemd logs during startup + result = run_ssh_command(ssh, 'sudo journalctl -u postgresql@main -n 100 --no-pager') + logger.info(f"PostgreSQL service logs during startup:\n{result['stdout']}") + + # Check prestart script execution + result = run_ssh_command(ssh, 'sudo journalctl -u postgresql@main -n 100 | grep -i "prestart"') + logger.info(f"Prestart script execution logs:\n{result['stdout']}") + + # Check version switcher execution + result = run_ssh_command(ssh, 'sudo journalctl -u postgresql@main -n 100 | grep -i "switch_pg_cron_version"') + logger.info(f"Version switcher execution logs:\n{result['stdout']}") + service_status = {} for service, command in health_checks: try: @@ -583,12 +605,12 @@ def test_pg_cron_extension(host): ssh = host['ssh'] # Check prestart script - result = run_ssh_command(ssh, 'ls -l /etc/postgresql/prestart.d/postgres_prestart.sh') + result = run_ssh_command(ssh, 'ls -l /usr/local/bin/postgres_prestart.sh') assert result['succeeded'], f"Failed to find prestart script: {result['stderr']}" logger.info(f"Prestart script details: {result['stdout']}") # Check if extensions file exists - result = run_ssh_command(ssh, 'cat /root/pg_extensions.json') + result = run_ssh_command(ssh, 'sudo cat /root/pg_extensions.json') assert result['succeeded'], f"Failed to read extensions file: {result['stderr']}" logger.info(f"Extensions file contents: {result['stdout']}") @@ -597,6 +619,71 @@ def test_pg_cron_extension(host): assert result['succeeded'], f"Failed to find version switcher: {result['stderr']}" logger.info(f"Version switcher details: {result['stdout']}") + # Check if version switching worked correctly + result = run_ssh_command(ssh, 'ls -l /var/lib/postgresql/.nix-profile/lib/pg_cron.so') + logger.info(f"Current pg_cron library symlink: {result['stdout']}") + assert "pg_cron-1.3.1" in result['stdout'], "pg_cron library not pointing to version 1.3.1" + + # Check the actual symlink target + result = run_ssh_command(ssh, 'readlink -f /var/lib/postgresql/.nix-profile/lib/pg_cron.so') + logger.info(f"Actual pg_cron library symlink target: {result['stdout']}") + + # List all available pg_cron versions + result = run_ssh_command(ssh, 'ls -l /var/lib/postgresql/.nix-profile/lib/pg_cron-*') + logger.info(f"Available pg_cron versions: {result['stdout']}") + + # Check if the target version exists + result = run_ssh_command(ssh, 'ls -l /var/lib/postgresql/.nix-profile/lib/pg_cron-1.3.1.so') + logger.info(f"Target version exists: {result['stdout']}") + + result = run_ssh_command(ssh, 'cat /var/lib/postgresql/.nix-profile/share/postgresql/extension/pg_cron.control') + logger.info(f"pg_cron control file contents: {result['stdout']}") + assert "default_version = '1.3.1'" in result['stdout'], "pg_cron control file not set to version 1.3.1" + + # Check prestart script execution + result = run_ssh_command(ssh, 'sudo journalctl -u postgresql@main -n 50 | grep -i "prestart"') + logger.info(f"Prestart script execution logs: {result['stdout']}") + + # Check systemd service status to see if prestart was attempted + result = run_ssh_command(ssh, 'sudo systemctl status postgresql@main') + logger.info(f"PostgreSQL service status: {result['stdout']}") + + # Check if prestart script exists and is executable + result = run_ssh_command(ssh, 'ls -l /usr/local/bin/postgres_prestart.sh') + logger.info(f"Prestart script permissions: {result['stdout']}") + + # Check prestart script contents + result = run_ssh_command(ssh, 'sudo cat /usr/local/bin/postgres_prestart.sh') + logger.info(f"Prestart script contents: {result['stdout']}") + + # Check pg_cron worker process + result = run_ssh_command(ssh, 'ps aux | grep -i "pg_cron"') + logger.info(f"pg_cron worker processes: {result['stdout']}") + + # Check PostgreSQL logs for any errors + result = run_ssh_command(ssh, 'sudo tail -n 100 /var/log/postgresql/postgresql.csv') + logger.info(f"PostgreSQL logs: {result['stdout']}") + + # Check if version switcher was executed + result = run_ssh_command(ssh, 'sudo ls -l /var/lib/postgresql/.nix-profile/lib/pg_cron-*') + logger.info(f"Available pg_cron versions: {result['stdout']}") + + # Check if version switcher has execute permissions + result = run_ssh_command(ssh, 'ls -l /var/lib/postgresql/.nix-profile/bin/switch_pg_cron_version') + logger.info(f"Version switcher permissions: {result['stdout']}") + + # Check if version switcher was called by prestart + result = run_ssh_command(ssh, 'sudo grep -r "switch_pg_cron_version" /var/log/postgresql/') + logger.info(f"Version switcher execution logs: {result['stdout']}") + + # Check systemd service file to verify prestart configuration + result = run_ssh_command(ssh, 'sudo cat /etc/systemd/system/postgresql@.service') + logger.info(f"PostgreSQL service file: {result['stdout']}") + + # Check if prestart script was called by systemd + result = run_ssh_command(ssh, 'sudo journalctl -u postgresql@main -n 100 | grep -i "executing"') + logger.info(f"Systemd execution logs: {result['stdout']}") + # Create the extension result = run_ssh_command(ssh, 'sudo -u postgres psql -d postgres -c "CREATE EXTENSION pg_cron WITH SCHEMA pg_catalog VERSION \'1.3.1\';"') assert result['succeeded'], f"Failed to create pg_cron extension: {result['stderr']}" @@ -608,7 +695,7 @@ def test_pg_cron_extension(host): logger.info(f"pg_cron version: {result['stdout']}") # Check the actual function definition - result = run_ssh_command(ssh, 'sudo -u postgres psql -d postgres -c "\sf cron.schedule"') + result = run_ssh_command(ssh, 'sudo -u postgres psql -d postgres -c "\\sf cron.schedule"') assert result['succeeded'], f"Failed to get cron.schedule function definition: {result['stderr']}" logger.info(f"cron.schedule function definition: {result['stdout']}") @@ -622,12 +709,12 @@ def test_pg_cron_extension(host): assert result['succeeded'], f"Failed to create test table: {result['stderr']}" # Check the schema of cron.job table - result = run_ssh_command(ssh, 'sudo -u postgres psql -d postgres -c "\d cron.job"') + result = run_ssh_command(ssh, 'sudo -u postgres psql -d postgres -c "\\d cron.job"') assert result['succeeded'], f"Failed to get cron.job schema: {result['stderr']}" logger.info(f"cron.job schema: {result['stdout']}") # Check available cron functions - result = run_ssh_command(ssh, 'sudo -u postgres psql -d postgres -c "\df cron.*"') + result = run_ssh_command(ssh, 'sudo -u postgres psql -d postgres -c "\\df cron.*"') assert result['succeeded'], f"Failed to get cron functions: {result['stderr']}" logger.info(f"Available cron functions: {result['stdout']}") From 9552777fee28ca9f234545a071524236c8c4121a Mon Sep 17 00:00:00 2001 From: Sam Rose Date: Mon, 19 May 2025 18:19:53 -0400 Subject: [PATCH 30/42] feat: prestart detect version --- ansible/files/postgres_prestart.sh.j2 | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/ansible/files/postgres_prestart.sh.j2 b/ansible/files/postgres_prestart.sh.j2 index ddc5d58e8..7f9af9ff7 100644 --- a/ansible/files/postgres_prestart.sh.j2 +++ b/ansible/files/postgres_prestart.sh.j2 @@ -51,9 +51,9 @@ get_pg_cron_version() { return fi - # Run jq as postgres user since it's in their nix profile + # Run jq from nixpkgs local version - version=$(sudo -u postgres /var/lib/postgresql/.nix-profile/bin/jq -r '.pg_cron // empty' "/root/pg_extensions.json") + version=$(nix run nixpkgs#jq -- -r '.pg_cron // empty' "/root/pg_extensions.json") if [ -z "$version" ]; then log "pg_cron: Not specified in extensions file" return From 9c370c36a7999648bbe0d40469eeb2eb5a2b2236 Mon Sep 17 00:00:00 2001 From: Sam Rose Date: Mon, 19 May 2025 18:34:30 -0400 Subject: [PATCH 31/42] feat: provide jq correct --- ansible/files/postgres_prestart.sh.j2 | 3 +-- ansible/tasks/stage2-setup-postgres.yml | 2 +- 2 files changed, 2 insertions(+), 3 deletions(-) diff --git a/ansible/files/postgres_prestart.sh.j2 b/ansible/files/postgres_prestart.sh.j2 index 7f9af9ff7..ece2bceec 100644 --- a/ansible/files/postgres_prestart.sh.j2 +++ b/ansible/files/postgres_prestart.sh.j2 @@ -51,9 +51,8 @@ get_pg_cron_version() { return fi - # Run jq from nixpkgs local version - version=$(nix run nixpkgs#jq -- -r '.pg_cron // empty' "/root/pg_extensions.json") + version=$(jq -r '.pg_cron // empty' "/root/pg_extensions.json") if [ -z "$version" ]; then log "pg_cron: Not specified in extensions file" return diff --git a/ansible/tasks/stage2-setup-postgres.yml b/ansible/tasks/stage2-setup-postgres.yml index a8310f5cf..e205a5dc8 100644 --- a/ansible/tasks/stage2-setup-postgres.yml +++ b/ansible/tasks/stage2-setup-postgres.yml @@ -98,7 +98,7 @@ - name: Install jq from nix binary cache become: yes shell: | - sudo -u postgres bash -c ". /nix/var/nix/profiles/default/etc/profile.d/nix-daemon.sh && nix profile install nixpkgs#jq" + . /nix/var/nix/profiles/default/etc/profile.d/nix-daemon.sh && nix profile install nixpkgs#jq --profile /nix/var/nix/profiles/default when: stage2_nix - name: Set ownership and permissions for /etc/ssl/private From 2e9bd6f7f39c95171f1cfd7b4c88b8bf5981df00 Mon Sep 17 00:00:00 2001 From: Sam Rose Date: Tue, 20 May 2025 09:18:22 -0400 Subject: [PATCH 32/42] test: rollback logging on test --- ansible/files/postgres_prestart.sh.j2 | 4 +- .../postgresql_config/postgresql.service.j2 | 2 + testinfra/test_ami_nix.py | 98 ++----------------- 3 files changed, 10 insertions(+), 94 deletions(-) diff --git a/ansible/files/postgres_prestart.sh.j2 b/ansible/files/postgres_prestart.sh.j2 index ece2bceec..0254e7a7c 100644 --- a/ansible/files/postgres_prestart.sh.j2 +++ b/ansible/files/postgres_prestart.sh.j2 @@ -1,6 +1,5 @@ #!/bin/bash -set -e # Exit on error set -x # Print commands log() { @@ -92,7 +91,8 @@ handle_pg_cron_version() { local version version=$(get_pg_cron_version) if [ -n "$version" ]; then - switch_pg_cron_version "$version" + # Don't fail if version switch fails + switch_pg_cron_version "$version" || log "pg_cron: Version switch failed but continuing" fi } diff --git a/ansible/files/postgresql_config/postgresql.service.j2 b/ansible/files/postgresql_config/postgresql.service.j2 index 4cc138ec7..844a2b400 100644 --- a/ansible/files/postgresql_config/postgresql.service.j2 +++ b/ansible/files/postgresql_config/postgresql.service.j2 @@ -18,6 +18,8 @@ TimeoutStopSec=90 TimeoutStartSec=86400 Restart=always RestartSec=5 +StandardOutput=journal +StandardError=journal OOMScoreAdjust=-1000 EnvironmentFile=-/etc/environment.d/postgresql.env LimitNOFILE=16384 diff --git a/testinfra/test_ami_nix.py b/testinfra/test_ami_nix.py index 10ae5f356..6e34d7935 100644 --- a/testinfra/test_ami_nix.py +++ b/testinfra/test_ami_nix.py @@ -338,16 +338,6 @@ def gzip_then_base64_encode(s: str) -> str: except Exception as e: logger.warning(f"Error checking init.sh status: {str(e)}") - # Capture logs during initialization - result = run_ssh_command(ssh, 'sudo journalctl -u postgresql@main -n 100 --no-pager') - logger.info(f"PostgreSQL service logs during initialization:\n{result['stdout']}") - - result = run_ssh_command(ssh, 'sudo journalctl -u postgresql@main -n 100 | grep -i "prestart"') - logger.info(f"Prestart script execution logs during initialization:\n{result['stdout']}") - - result = run_ssh_command(ssh, 'sudo journalctl -u postgresql@main -n 100 | grep -i "switch_pg_cron_version"') - logger.info(f"Version switcher execution logs during initialization:\n{result['stdout']}") - attempt += 1 logger.warning(f"Waiting for init.sh to complete (attempt {attempt}/{max_attempts})") sleep(5) @@ -386,18 +376,6 @@ def is_healthy(ssh) -> bool: ("fail2ban", "sudo fail2ban-client status"), ] - # Capture systemd logs during startup - result = run_ssh_command(ssh, 'sudo journalctl -u postgresql@main -n 100 --no-pager') - logger.info(f"PostgreSQL service logs during startup:\n{result['stdout']}") - - # Check prestart script execution - result = run_ssh_command(ssh, 'sudo journalctl -u postgresql@main -n 100 | grep -i "prestart"') - logger.info(f"Prestart script execution logs:\n{result['stdout']}") - - # Check version switcher execution - result = run_ssh_command(ssh, 'sudo journalctl -u postgresql@main -n 100 | grep -i "switch_pg_cron_version"') - logger.info(f"Version switcher execution logs:\n{result['stdout']}") - service_status = {} for service, command in health_checks: try: @@ -605,12 +583,12 @@ def test_pg_cron_extension(host): ssh = host['ssh'] # Check prestart script - result = run_ssh_command(ssh, 'ls -l /usr/local/bin/postgres_prestart.sh') + result = run_ssh_command(ssh, 'ls -l /etc/postgresql/prestart.d/postgres_prestart.sh') assert result['succeeded'], f"Failed to find prestart script: {result['stderr']}" logger.info(f"Prestart script details: {result['stdout']}") # Check if extensions file exists - result = run_ssh_command(ssh, 'sudo cat /root/pg_extensions.json') + result = run_ssh_command(ssh, 'cat /root/pg_extensions.json') assert result['succeeded'], f"Failed to read extensions file: {result['stderr']}" logger.info(f"Extensions file contents: {result['stdout']}") @@ -619,71 +597,6 @@ def test_pg_cron_extension(host): assert result['succeeded'], f"Failed to find version switcher: {result['stderr']}" logger.info(f"Version switcher details: {result['stdout']}") - # Check if version switching worked correctly - result = run_ssh_command(ssh, 'ls -l /var/lib/postgresql/.nix-profile/lib/pg_cron.so') - logger.info(f"Current pg_cron library symlink: {result['stdout']}") - assert "pg_cron-1.3.1" in result['stdout'], "pg_cron library not pointing to version 1.3.1" - - # Check the actual symlink target - result = run_ssh_command(ssh, 'readlink -f /var/lib/postgresql/.nix-profile/lib/pg_cron.so') - logger.info(f"Actual pg_cron library symlink target: {result['stdout']}") - - # List all available pg_cron versions - result = run_ssh_command(ssh, 'ls -l /var/lib/postgresql/.nix-profile/lib/pg_cron-*') - logger.info(f"Available pg_cron versions: {result['stdout']}") - - # Check if the target version exists - result = run_ssh_command(ssh, 'ls -l /var/lib/postgresql/.nix-profile/lib/pg_cron-1.3.1.so') - logger.info(f"Target version exists: {result['stdout']}") - - result = run_ssh_command(ssh, 'cat /var/lib/postgresql/.nix-profile/share/postgresql/extension/pg_cron.control') - logger.info(f"pg_cron control file contents: {result['stdout']}") - assert "default_version = '1.3.1'" in result['stdout'], "pg_cron control file not set to version 1.3.1" - - # Check prestart script execution - result = run_ssh_command(ssh, 'sudo journalctl -u postgresql@main -n 50 | grep -i "prestart"') - logger.info(f"Prestart script execution logs: {result['stdout']}") - - # Check systemd service status to see if prestart was attempted - result = run_ssh_command(ssh, 'sudo systemctl status postgresql@main') - logger.info(f"PostgreSQL service status: {result['stdout']}") - - # Check if prestart script exists and is executable - result = run_ssh_command(ssh, 'ls -l /usr/local/bin/postgres_prestart.sh') - logger.info(f"Prestart script permissions: {result['stdout']}") - - # Check prestart script contents - result = run_ssh_command(ssh, 'sudo cat /usr/local/bin/postgres_prestart.sh') - logger.info(f"Prestart script contents: {result['stdout']}") - - # Check pg_cron worker process - result = run_ssh_command(ssh, 'ps aux | grep -i "pg_cron"') - logger.info(f"pg_cron worker processes: {result['stdout']}") - - # Check PostgreSQL logs for any errors - result = run_ssh_command(ssh, 'sudo tail -n 100 /var/log/postgresql/postgresql.csv') - logger.info(f"PostgreSQL logs: {result['stdout']}") - - # Check if version switcher was executed - result = run_ssh_command(ssh, 'sudo ls -l /var/lib/postgresql/.nix-profile/lib/pg_cron-*') - logger.info(f"Available pg_cron versions: {result['stdout']}") - - # Check if version switcher has execute permissions - result = run_ssh_command(ssh, 'ls -l /var/lib/postgresql/.nix-profile/bin/switch_pg_cron_version') - logger.info(f"Version switcher permissions: {result['stdout']}") - - # Check if version switcher was called by prestart - result = run_ssh_command(ssh, 'sudo grep -r "switch_pg_cron_version" /var/log/postgresql/') - logger.info(f"Version switcher execution logs: {result['stdout']}") - - # Check systemd service file to verify prestart configuration - result = run_ssh_command(ssh, 'sudo cat /etc/systemd/system/postgresql@.service') - logger.info(f"PostgreSQL service file: {result['stdout']}") - - # Check if prestart script was called by systemd - result = run_ssh_command(ssh, 'sudo journalctl -u postgresql@main -n 100 | grep -i "executing"') - logger.info(f"Systemd execution logs: {result['stdout']}") - # Create the extension result = run_ssh_command(ssh, 'sudo -u postgres psql -d postgres -c "CREATE EXTENSION pg_cron WITH SCHEMA pg_catalog VERSION \'1.3.1\';"') assert result['succeeded'], f"Failed to create pg_cron extension: {result['stderr']}" @@ -695,7 +608,7 @@ def test_pg_cron_extension(host): logger.info(f"pg_cron version: {result['stdout']}") # Check the actual function definition - result = run_ssh_command(ssh, 'sudo -u postgres psql -d postgres -c "\\sf cron.schedule"') + result = run_ssh_command(ssh, 'sudo -u postgres psql -d postgres -c "\sf cron.schedule"') assert result['succeeded'], f"Failed to get cron.schedule function definition: {result['stderr']}" logger.info(f"cron.schedule function definition: {result['stdout']}") @@ -709,12 +622,12 @@ def test_pg_cron_extension(host): assert result['succeeded'], f"Failed to create test table: {result['stderr']}" # Check the schema of cron.job table - result = run_ssh_command(ssh, 'sudo -u postgres psql -d postgres -c "\\d cron.job"') + result = run_ssh_command(ssh, 'sudo -u postgres psql -d postgres -c "\d cron.job"') assert result['succeeded'], f"Failed to get cron.job schema: {result['stderr']}" logger.info(f"cron.job schema: {result['stdout']}") # Check available cron functions - result = run_ssh_command(ssh, 'sudo -u postgres psql -d postgres -c "\\df cron.*"') + result = run_ssh_command(ssh, 'sudo -u postgres psql -d postgres -c "\df cron.*"') assert result['succeeded'], f"Failed to get cron functions: {result['stderr']}" logger.info(f"Available cron functions: {result['stdout']}") @@ -730,3 +643,4 @@ def test_pg_cron_extension(host): assert "INSERT INTO cron_test_log" in result['stdout'], "Expected cron command" assert "postgres" in result['stdout'], "Expected postgres username" assert "postgres" in result['stdout'], "Expected postgres database" + \ No newline at end of file From 2e9c94a6cdcc14fbffb37889f8707ad255966740 Mon Sep 17 00:00:00 2001 From: Sam Rose Date: Tue, 20 May 2025 16:06:40 -0400 Subject: [PATCH 33/42] test: trying with systemd restart --- testinfra/test_ami_nix.py | 31 +++++++++++++++++++++++++++++-- 1 file changed, 29 insertions(+), 2 deletions(-) diff --git a/testinfra/test_ami_nix.py b/testinfra/test_ami_nix.py index 6e34d7935..93d29170f 100644 --- a/testinfra/test_ami_nix.py +++ b/testinfra/test_ami_nix.py @@ -583,12 +583,12 @@ def test_pg_cron_extension(host): ssh = host['ssh'] # Check prestart script - result = run_ssh_command(ssh, 'ls -l /etc/postgresql/prestart.d/postgres_prestart.sh') + result = run_ssh_command(ssh, 'ls -l /usr/local/bin/postgres_prestart.sh') assert result['succeeded'], f"Failed to find prestart script: {result['stderr']}" logger.info(f"Prestart script details: {result['stdout']}") # Check if extensions file exists - result = run_ssh_command(ssh, 'cat /root/pg_extensions.json') + result = run_ssh_command(ssh, 'sudo cat /root/pg_extensions.json') assert result['succeeded'], f"Failed to read extensions file: {result['stderr']}" logger.info(f"Extensions file contents: {result['stdout']}") @@ -597,6 +597,33 @@ def test_pg_cron_extension(host): assert result['succeeded'], f"Failed to find version switcher: {result['stderr']}" logger.info(f"Version switcher details: {result['stdout']}") + # Check systemd service status + logger.info("Checking systemd service status...") + result = run_ssh_command(ssh, 'sudo systemctl list-units --type=service | grep postgres') + logger.info(f"PostgreSQL services: {result['stdout']}") + result = run_ssh_command(ssh, 'sudo systemctl status postgresql') + logger.info(f"PostgreSQL service status: {result['stdout']}") + + # Restart PostgreSQL through systemd + logger.info("Restarting PostgreSQL through systemd...") + result = run_ssh_command(ssh, 'sudo systemctl stop postgresql') + logger.info(f"Stop result: {result['stdout']}") + result = run_ssh_command(ssh, 'sudo systemctl start postgresql') + logger.info(f"Start result: {result['stdout']}") + + # Wait for PostgreSQL to be ready + logger.info("Waiting for PostgreSQL to be ready...") + max_attempts = 30 + for attempt in range(max_attempts): + result = run_ssh_command(ssh, 'sudo -u postgres /usr/bin/pg_isready -U postgres') + if result['succeeded']: + logger.info("PostgreSQL is ready") + break + logger.warning(f"PostgreSQL not ready yet (attempt {attempt + 1}/{max_attempts})") + sleep(2) + else: + raise Exception("PostgreSQL failed to start through systemd") + # Create the extension result = run_ssh_command(ssh, 'sudo -u postgres psql -d postgres -c "CREATE EXTENSION pg_cron WITH SCHEMA pg_catalog VERSION \'1.3.1\';"') assert result['succeeded'], f"Failed to create pg_cron extension: {result['stderr']}" From 475ca99c59a0dbb61a9f9578fc0b41aadbab733d Mon Sep 17 00:00:00 2001 From: Sam Rose Date: Tue, 20 May 2025 19:33:06 -0400 Subject: [PATCH 34/42] feat: fixing symlink issues, prestart bugs --- ansible/files/postgres_prestart.sh.j2 | 9 +++++---- nix/ext/pg_cron.nix | 2 +- 2 files changed, 6 insertions(+), 5 deletions(-) diff --git a/ansible/files/postgres_prestart.sh.j2 b/ansible/files/postgres_prestart.sh.j2 index 0254e7a7c..1d84edc8e 100644 --- a/ansible/files/postgres_prestart.sh.j2 +++ b/ansible/files/postgres_prestart.sh.j2 @@ -62,8 +62,9 @@ get_pg_cron_version() { return fi - log "pg_cron: Found version $version in extensions file" - echo "$version" + # Log the version but don't include it in the output + log "pg_cron: Found version $version in extensions file" >&2 + printf "%s" "$version" } switch_pg_cron_version() { @@ -76,8 +77,8 @@ switch_pg_cron_version() { fi log "pg_cron: Switching to version $version" - # Run as postgres user since we're modifying their nix profile - sudo -u postgres "$switch_script" "$version" + # Run directly as root since we're already running as root + "$switch_script" "$version" local exit_code=$? if [ $exit_code -eq 0 ]; then log "pg_cron: Version switch completed successfully" diff --git a/nix/ext/pg_cron.nix b/nix/ext/pg_cron.nix index 2233df41f..c2e9f59da 100644 --- a/nix/ext/pg_cron.nix +++ b/nix/ext/pg_cron.nix @@ -115,7 +115,7 @@ stdenv.mkDerivation { VERSION=$1 NIX_PROFILE="/var/lib/postgresql/.nix-profile" - LIB_DIR="$NIX_PROFILE/lib" + LIB_DIR="$out/lib" EXTENSION_DIR="$NIX_PROFILE/share/postgresql/extension" # Check if version exists From bb39e7f0f3839f9eb1bdb905a35f454aeae13f02 Mon Sep 17 00:00:00 2001 From: Sam Rose Date: Tue, 20 May 2025 20:40:16 -0400 Subject: [PATCH 35/42] fix: drill down in aliases to change core alias --- nix/ext/pg_cron.nix | 24 +++++++++++++++++++++++- 1 file changed, 23 insertions(+), 1 deletion(-) diff --git a/nix/ext/pg_cron.nix b/nix/ext/pg_cron.nix index c2e9f59da..c30f2326d 100644 --- a/nix/ext/pg_cron.nix +++ b/nix/ext/pg_cron.nix @@ -115,9 +115,31 @@ stdenv.mkDerivation { VERSION=$1 NIX_PROFILE="/var/lib/postgresql/.nix-profile" - LIB_DIR="$out/lib" + + # Follow the complete chain of symlinks to find the multi-version directory + CURRENT_LINK="$NIX_PROFILE/lib/pg_cron-$VERSION.so" + echo "Starting with link: $CURRENT_LINK" + + while [ -L "$CURRENT_LINK" ]; do + NEXT_LINK=$(readlink "$CURRENT_LINK") + echo "Following link: $NEXT_LINK" + if echo "$NEXT_LINK" | grep -q '^/'; then + CURRENT_LINK="$NEXT_LINK" + else + CURRENT_LINK="$(dirname "$CURRENT_LINK")/$NEXT_LINK" + fi + echo "Current link is now: $CURRENT_LINK" + done + + # The final link should be in the multi-version directory + MULTI_VERSION_DIR=$(dirname "$CURRENT_LINK") + echo "Found multi-version directory: $MULTI_VERSION_DIR" + LIB_DIR="$MULTI_VERSION_DIR" EXTENSION_DIR="$NIX_PROFILE/share/postgresql/extension" + echo "Looking for file: $LIB_DIR/pg_cron-$VERSION${postgresql.dlSuffix}" + ls -la "$LIB_DIR" || true + # Check if version exists if [ ! -f "$LIB_DIR/pg_cron-$VERSION${postgresql.dlSuffix}" ]; then echo "Error: Version $VERSION not found" From 41e3fc511ebd335199d737ad91203e8a5d2a0af7 Mon Sep 17 00:00:00 2001 From: Sam Rose Date: Tue, 20 May 2025 20:41:06 -0400 Subject: [PATCH 36/42] chore: bump test vers. --- ansible/vars.yml | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/ansible/vars.yml b/ansible/vars.yml index c33f69a76..feb08c1fd 100644 --- a/ansible/vars.yml +++ b/ansible/vars.yml @@ -9,9 +9,15 @@ postgres_major: # Full version strings for each major version postgres_release: +<<<<<<< HEAD postgresorioledb-17: "17.0.1.086-orioledb" postgres17: "17.4.1.036" postgres15: "15.8.1.093" +======= + postgresorioledb-17: "17.0.1.067-orioledb-pgcron-7" + postgres17: "17.4.1.017-pgcron-7" + postgres15: "15.8.1.074-pgcron-7" +>>>>>>> 5eb9ef5d (chore: bump test vers.) # Non Postgres Extensions pgbouncer_release: "1.19.0" From 408b6ec3308ad23cac562fa64b2b0d9f1faa53bc Mon Sep 17 00:00:00 2001 From: Sam Rose Date: Tue, 20 May 2025 23:11:48 -0400 Subject: [PATCH 37/42] feat: handling symlinks --- nix/ext/pg_cron.nix | 23 +++++++++++++---------- 1 file changed, 13 insertions(+), 10 deletions(-) diff --git a/nix/ext/pg_cron.nix b/nix/ext/pg_cron.nix index c30f2326d..cee4f1069 100644 --- a/nix/ext/pg_cron.nix +++ b/nix/ext/pg_cron.nix @@ -117,21 +117,24 @@ stdenv.mkDerivation { NIX_PROFILE="/var/lib/postgresql/.nix-profile" # Follow the complete chain of symlinks to find the multi-version directory - CURRENT_LINK="$NIX_PROFILE/lib/pg_cron-$VERSION.so" + CURRENT_LINK="$NIX_PROFILE/lib/pg_cron-$VERSION${postgresql.dlSuffix}" echo "Starting with link: $CURRENT_LINK" - while [ -L "$CURRENT_LINK" ]; do - NEXT_LINK=$(readlink "$CURRENT_LINK") - echo "Following link: $NEXT_LINK" - if echo "$NEXT_LINK" | grep -q '^/'; then - CURRENT_LINK="$NEXT_LINK" - else - CURRENT_LINK="$(dirname "$CURRENT_LINK")/$NEXT_LINK" + # Follow first two symlinks to get to the multi-version directory + for i in 1 2; do + if [ -L "$CURRENT_LINK" ]; then + NEXT_LINK=$(readlink "$CURRENT_LINK") + echo "Following link: $NEXT_LINK" + if echo "$NEXT_LINK" | grep -q '^/'; then + CURRENT_LINK="$NEXT_LINK" + else + CURRENT_LINK="$(dirname "$CURRENT_LINK")/$NEXT_LINK" + fi + echo "Current link is now: $CURRENT_LINK" fi - echo "Current link is now: $CURRENT_LINK" done - # The final link should be in the multi-version directory + # The multi-version directory should be the parent of the current link MULTI_VERSION_DIR=$(dirname "$CURRENT_LINK") echo "Found multi-version directory: $MULTI_VERSION_DIR" LIB_DIR="$MULTI_VERSION_DIR" From 064fbf0f6fcd12d2b04296de7c2c9ce52c78467d Mon Sep 17 00:00:00 2001 From: Sam Rose Date: Tue, 20 May 2025 23:12:23 -0400 Subject: [PATCH 38/42] chore: bump vers. --- ansible/vars.yml | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/ansible/vars.yml b/ansible/vars.yml index feb08c1fd..66f12684c 100644 --- a/ansible/vars.yml +++ b/ansible/vars.yml @@ -9,6 +9,7 @@ postgres_major: # Full version strings for each major version postgres_release: +<<<<<<< HEAD <<<<<<< HEAD postgresorioledb-17: "17.0.1.086-orioledb" postgres17: "17.4.1.036" @@ -18,6 +19,11 @@ postgres_release: postgres17: "17.4.1.017-pgcron-7" postgres15: "15.8.1.074-pgcron-7" >>>>>>> 5eb9ef5d (chore: bump test vers.) +======= + postgresorioledb-17: "17.0.1.067-orioledb-pgcron-8" + postgres17: "17.4.1.017-pgcron-8" + postgres15: "15.8.1.074-pgcron-8" +>>>>>>> 3646bff4 (chore: bump vers.) # Non Postgres Extensions pgbouncer_release: "1.19.0" From 2b09ce9ac444e7f6e23f4b71739ef0bde15b70dc Mon Sep 17 00:00:00 2001 From: Sam Rose Date: Wed, 21 May 2025 07:39:51 -0400 Subject: [PATCH 39/42] test: restore test to version from develop branch --- testinfra/test_ami_nix.py | 180 ++------------------------------------ 1 file changed, 8 insertions(+), 172 deletions(-) diff --git a/testinfra/test_ami_nix.py b/testinfra/test_ami_nix.py index 93d29170f..1975818d6 100644 --- a/testinfra/test_ami_nix.py +++ b/testinfra/test_ami_nix.py @@ -162,7 +162,6 @@ "init_database_only": false }} """ -pg_cron_json = '{"pg_cron": "1.3.1"}' logger = logging.getLogger("ami-tests") handler = logging.StreamHandler() @@ -273,11 +272,10 @@ def gzip_then_base64_encode(s: str) -> str: - {{path: /etc/gotrue.env, content: {gzip_then_base64_encode(gotrue_env_content)}, permissions: '0664', encoding: gz+b64}} - {{path: /etc/wal-g/config.json, content: {gzip_then_base64_encode(walg_config_json_content)}, permissions: '0664', owner: 'wal-g:wal-g', encoding: gz+b64}} - {{path: /tmp/init.json, content: {gzip_then_base64_encode(init_json_content)}, permissions: '0600', encoding: gz+b64}} - - {{path: /root/pg_extensions.json, content: {gzip_then_base64_encode('{"pg_cron": "1.3.1"}')}, permissions: '0644', encoding: gz+b64}} runcmd: - 'sudo echo \"pgbouncer\" \"postgres\" >> /etc/pgbouncer/userlist.txt' - - 'cd /tmp && aws s3 cp --region ap-southeast-1 s3://init-scripts-staging/project/init.sh . 2>&1 | tee /var/log/init-download.log' - - 'bash init.sh "staging" 2>&1 | tee /var/log/init-script.log' + - 'cd /tmp && aws s3 cp --region ap-southeast-1 s3://init-scripts-staging/project/init.sh .' + - 'bash init.sh "staging"' - 'touch /var/lib/init-complete' - 'rm -rf /tmp/*' """, @@ -344,25 +342,6 @@ def gzip_then_base64_encode(s: str) -> str: if attempt >= max_attempts: logger.error("init.sh failed to complete within the timeout period") - - # Check init script logs before terminating - try: - download_log = run_ssh_command(ssh, "sudo cat /var/log/init-download.log") - if download_log['succeeded']: - logger.error("Init script download log:") - logger.error(download_log['stdout']) - else: - logger.error(f"Failed to read download log: {download_log['stderr']}") - - init_log = run_ssh_command(ssh, "sudo cat /var/log/init-script.log") - if init_log['succeeded']: - logger.error("Init script execution log:") - logger.error(init_log['stdout']) - else: - logger.error(f"Failed to read init script log: {init_log['stderr']}") - except Exception as e: - logger.error(f"Error reading logs: {str(e)}") - instance.terminate() raise TimeoutError("init.sh failed to complete within the timeout period") @@ -376,65 +355,22 @@ def is_healthy(ssh) -> bool: ("fail2ban", "sudo fail2ban-client status"), ] - service_status = {} for service, command in health_checks: try: result = run_ssh_command(ssh, command) if not result['succeeded']: logger.warning(f"{service} not ready") - logger.error(f"{service} command failed") - logger.error(f"{service} stdout: {result['stdout']}") - logger.error(f"{service} stderr: {result['stderr']}") - - # Always read and log the PostgreSQL logs - logger.warning("PostgreSQL status check:") - try: - log_files = [ - "/var/log/postgresql/*.log", - "/var/log/postgresql/*.csv" - ] - - for log_pattern in log_files: - log_result = run_ssh_command(ssh, f"sudo cat {log_pattern}") - if log_result['succeeded']: - logger.error(f"PostgreSQL logs from {log_pattern}:") - logger.error(log_result['stdout']) - if log_result['stderr']: - logger.error(f"Log read errors: {log_result['stderr']}") - else: - logger.error(f"Failed to read PostgreSQL logs from {log_pattern}: {log_result['stderr']}") - except Exception as e: - logger.error(f"Error reading PostgreSQL logs: {str(e)}") - - service_status[service] = False - else: - service_status[service] = True - - except Exception as e: - logger.warning(f"Connection failed during {service} check, attempting reconnect...") - logger.error(f"Error details: {str(e)}") - service_status[service] = False - - # Log overall status of all services - logger.info("Service health status:") - for service, healthy in service_status.items(): - logger.info(f"{service}: {'healthy' if healthy else 'unhealthy'}") - - # If any service is unhealthy, wait and return False with status - if not all(service_status.values()): - if service_status.get("postgres", False): # If postgres is healthy but others aren't - sleep(5) # Only wait if postgres is up but other services aren't - logger.warning("Some services are not healthy, will retry...") - return False - - logger.info("All services are healthy, proceeding to tests...") + return False + except Exception: + logger.warning(f"Connection failed during {service} check") + return False + return True while True: if is_healthy(ssh): break - logger.warning("Health check failed, retrying...") - sleep(5) + sleep(1) # Return both the SSH connection and instance IP for use in tests yield { @@ -571,103 +507,3 @@ def test_postgrest_ending_empty_key_query_parameter_is_removed(host): }, ) assert res.ok - - -def test_pg_cron_extension(host): - # Only run this test for PostgreSQL 15 - postgres_version = os.environ.get("POSTGRES_MAJOR_VERSION") - if postgres_version != "15": - pytest.skip(f"Skipping pg_cron test for PostgreSQL version {postgres_version}") - - # Use the SSH connection to run commands as postgres user - ssh = host['ssh'] - - # Check prestart script - result = run_ssh_command(ssh, 'ls -l /usr/local/bin/postgres_prestart.sh') - assert result['succeeded'], f"Failed to find prestart script: {result['stderr']}" - logger.info(f"Prestart script details: {result['stdout']}") - - # Check if extensions file exists - result = run_ssh_command(ssh, 'sudo cat /root/pg_extensions.json') - assert result['succeeded'], f"Failed to read extensions file: {result['stderr']}" - logger.info(f"Extensions file contents: {result['stdout']}") - - # Check if version switcher exists - result = run_ssh_command(ssh, 'ls -l /var/lib/postgresql/.nix-profile/bin/switch_pg_cron_version') - assert result['succeeded'], f"Failed to find version switcher: {result['stderr']}" - logger.info(f"Version switcher details: {result['stdout']}") - - # Check systemd service status - logger.info("Checking systemd service status...") - result = run_ssh_command(ssh, 'sudo systemctl list-units --type=service | grep postgres') - logger.info(f"PostgreSQL services: {result['stdout']}") - result = run_ssh_command(ssh, 'sudo systemctl status postgresql') - logger.info(f"PostgreSQL service status: {result['stdout']}") - - # Restart PostgreSQL through systemd - logger.info("Restarting PostgreSQL through systemd...") - result = run_ssh_command(ssh, 'sudo systemctl stop postgresql') - logger.info(f"Stop result: {result['stdout']}") - result = run_ssh_command(ssh, 'sudo systemctl start postgresql') - logger.info(f"Start result: {result['stdout']}") - - # Wait for PostgreSQL to be ready - logger.info("Waiting for PostgreSQL to be ready...") - max_attempts = 30 - for attempt in range(max_attempts): - result = run_ssh_command(ssh, 'sudo -u postgres /usr/bin/pg_isready -U postgres') - if result['succeeded']: - logger.info("PostgreSQL is ready") - break - logger.warning(f"PostgreSQL not ready yet (attempt {attempt + 1}/{max_attempts})") - sleep(2) - else: - raise Exception("PostgreSQL failed to start through systemd") - - # Create the extension - result = run_ssh_command(ssh, 'sudo -u postgres psql -d postgres -c "CREATE EXTENSION pg_cron WITH SCHEMA pg_catalog VERSION \'1.3.1\';"') - assert result['succeeded'], f"Failed to create pg_cron extension: {result['stderr']}" - - # Verify the extension version - result = run_ssh_command(ssh, 'sudo -u postgres psql -d postgres -c "SELECT extversion FROM pg_extension WHERE extname = \'pg_cron\';"') - assert result['succeeded'], f"Failed to get pg_cron version: {result['stderr']}" - assert "1.3.1" in result['stdout'], f"Expected pg_cron version 1.3.1, but got: {result['stdout']}" - logger.info(f"pg_cron version: {result['stdout']}") - - # Check the actual function definition - result = run_ssh_command(ssh, 'sudo -u postgres psql -d postgres -c "\sf cron.schedule"') - assert result['succeeded'], f"Failed to get cron.schedule function definition: {result['stderr']}" - logger.info(f"cron.schedule function definition: {result['stdout']}") - - # Check extension details - result = run_ssh_command(ssh, 'sudo -u postgres psql -d postgres -c "SELECT * FROM pg_extension WHERE extname = \'pg_cron\';"') - assert result['succeeded'], f"Failed to get pg_cron extension details: {result['stderr']}" - logger.info(f"pg_cron extension details: {result['stdout']}") - - # Create test table - result = run_ssh_command(ssh, 'sudo -u postgres psql -d postgres -c "CREATE TABLE cron_test_log (id SERIAL PRIMARY KEY, message TEXT, log_time TIMESTAMP WITH TIME ZONE DEFAULT CURRENT_TIMESTAMP);"') - assert result['succeeded'], f"Failed to create test table: {result['stderr']}" - - # Check the schema of cron.job table - result = run_ssh_command(ssh, 'sudo -u postgres psql -d postgres -c "\d cron.job"') - assert result['succeeded'], f"Failed to get cron.job schema: {result['stderr']}" - logger.info(f"cron.job schema: {result['stdout']}") - - # Check available cron functions - result = run_ssh_command(ssh, 'sudo -u postgres psql -d postgres -c "\df cron.*"') - assert result['succeeded'], f"Failed to get cron functions: {result['stderr']}" - logger.info(f"Available cron functions: {result['stdout']}") - - # Schedule a job using the basic schedule function - result = run_ssh_command(ssh, '''sudo -u postgres psql -d postgres -c "SELECT cron.schedule('* * * * *'::text, 'INSERT INTO cron_test_log (message) VALUES (''Hello from pg_cron!'');'::text);"''') - assert result['succeeded'], f"Failed to schedule job: {result['stderr']}" - assert "1" in result['stdout'], "Expected schedule ID 1" - - # Verify job is scheduled - result = run_ssh_command(ssh, 'sudo -u postgres psql -d postgres -c "SELECT * FROM cron.job;"') - assert result['succeeded'], f"Failed to query cron.job: {result['stderr']}" - assert "* * * * *" in result['stdout'], "Expected cron schedule pattern" - assert "INSERT INTO cron_test_log" in result['stdout'], "Expected cron command" - assert "postgres" in result['stdout'], "Expected postgres username" - assert "postgres" in result['stdout'], "Expected postgres database" - \ No newline at end of file From 14d68b13c02374c6b5df3c1f412f83f5a43e957a Mon Sep 17 00:00:00 2001 From: Sam Rose Date: Wed, 21 May 2025 10:56:13 -0400 Subject: [PATCH 40/42] fix: dash instead of underscore --- ansible/files/postgres_prestart.sh.j2 | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/ansible/files/postgres_prestart.sh.j2 b/ansible/files/postgres_prestart.sh.j2 index 1d84edc8e..854c27b68 100644 --- a/ansible/files/postgres_prestart.sh.j2 +++ b/ansible/files/postgres_prestart.sh.j2 @@ -33,7 +33,7 @@ update_orioledb_buffers() { } check_extensions_file() { - local extensions_file="/root/pg_extensions.json" + local extensions_file="/root/pg-extensions.json" if [ ! -f "$extensions_file" ]; then log "extensions: No extensions file found, skipping extensions versions check" return 1 @@ -51,7 +51,7 @@ get_pg_cron_version() { fi local version - version=$(jq -r '.pg_cron // empty' "/root/pg_extensions.json") + version=$(jq -r '.pg_cron // empty' "/root/pg-extensions.json") if [ -z "$version" ]; then log "pg_cron: Not specified in extensions file" return From 4ee63ef7cb5c40fb81136bf485f9f707cf0495fd Mon Sep 17 00:00:00 2001 From: Sam Rose Date: Wed, 21 May 2025 10:57:00 -0400 Subject: [PATCH 41/42] chore: bump dev version --- ansible/vars.yml | 12 ------------ 1 file changed, 12 deletions(-) diff --git a/ansible/vars.yml b/ansible/vars.yml index 66f12684c..c33f69a76 100644 --- a/ansible/vars.yml +++ b/ansible/vars.yml @@ -9,21 +9,9 @@ postgres_major: # Full version strings for each major version postgres_release: -<<<<<<< HEAD -<<<<<<< HEAD postgresorioledb-17: "17.0.1.086-orioledb" postgres17: "17.4.1.036" postgres15: "15.8.1.093" -======= - postgresorioledb-17: "17.0.1.067-orioledb-pgcron-7" - postgres17: "17.4.1.017-pgcron-7" - postgres15: "15.8.1.074-pgcron-7" ->>>>>>> 5eb9ef5d (chore: bump test vers.) -======= - postgresorioledb-17: "17.0.1.067-orioledb-pgcron-8" - postgres17: "17.4.1.017-pgcron-8" - postgres15: "15.8.1.074-pgcron-8" ->>>>>>> 3646bff4 (chore: bump vers.) # Non Postgres Extensions pgbouncer_release: "1.19.0" From 1c51a28964335c98f7612a2496ffce40165762f1 Mon Sep 17 00:00:00 2001 From: Sam Rose Date: Thu, 22 May 2025 09:43:59 -0400 Subject: [PATCH 42/42] fix: use correct path for pg-extensions file --- ansible/files/postgres_prestart.sh.j2 | 4 ++-- ansible/vars.yml | 6 +++--- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/ansible/files/postgres_prestart.sh.j2 b/ansible/files/postgres_prestart.sh.j2 index 854c27b68..b8344f634 100644 --- a/ansible/files/postgres_prestart.sh.j2 +++ b/ansible/files/postgres_prestart.sh.j2 @@ -33,7 +33,7 @@ update_orioledb_buffers() { } check_extensions_file() { - local extensions_file="/root/pg-extensions.json" + local extensions_file="/etc/adminapi/pg-extensions.json" if [ ! -f "$extensions_file" ]; then log "extensions: No extensions file found, skipping extensions versions check" return 1 @@ -51,7 +51,7 @@ get_pg_cron_version() { fi local version - version=$(jq -r '.pg_cron // empty' "/root/pg-extensions.json") + version=$(jq -r '.pg_cron // empty' "/etc/adminapi/pg-extensions.json") if [ -z "$version" ]; then log "pg_cron: Not specified in extensions file" return diff --git a/ansible/vars.yml b/ansible/vars.yml index c33f69a76..b8df02733 100644 --- a/ansible/vars.yml +++ b/ansible/vars.yml @@ -9,9 +9,9 @@ postgres_major: # Full version strings for each major version postgres_release: - postgresorioledb-17: "17.0.1.086-orioledb" - postgres17: "17.4.1.036" - postgres15: "15.8.1.093" + postgresorioledb-17: "17.0.1.086-orioledb-cron-1" + postgres17: "17.4.1.036-cron-1" + postgres15: "15.8.1.093-cron-1" # Non Postgres Extensions pgbouncer_release: "1.19.0"