apple · saintstack · Oct 20, 2025 · Oct 20, 2025 · Oct 21, 2025
diff --git a/bindings/python/fdb/tuple.py b/bindings/python/fdb/tuple.py
@@ -341,7 +341,14 @@ def _decode(v, pos):
                 ret.append(val)
         return tuple(ret), end_pos + 1
     else:
-        raise ValueError("Unknown data type in DB: " + repr(v))
+        # Enhanced error reporting for debugging upgrade issues
+        error_context = {
+            'unknown_code': hex(code) if code < 256 else 'invalid',
+            'position': pos,
+            'data_length': len(v),
+            'surrounding_bytes': v[max(0, pos-5):pos+10].hex() if pos < len(v) else 'N/A'
+        }
+        raise ValueError(f"Unknown data type in DB at position {pos}: code={hex(code)} context={error_context} data={repr(v)}")
 
 
 def _reduce_children(child_values):

diff --git a/fdbbackup/tests/test_1k_parallel.sh b/fdbbackup/tests/test_1k_parallel.sh
@@ -0,0 +1,153 @@
+#!/usr/bin/env bash
+
+# 1K s3_backup_test.sh with LIMITED parallelism (2 concurrent instances)
+# Based on proven simple_s3backup_test.sh pattern that works
+
+set -euo pipefail
+
+readonly SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
+readonly TOTAL_TESTS=1000
+readonly MAX_CONCURRENT=2  # CRITICAL: Only 2 concurrent safe! 3+ causes failures!
+readonly SOURCE_DIR="/Users/stack/checkouts/fdb/foundationdb"  
+readonly BUILD_DIR="/Users/stack/build_output"
+readonly LOG_PREFIX="/tmp/s3backup_1k_parallel"
+
+START_TIME=$(date)
+START_TIMESTAMP=$(date +%s)  # Store as timestamp for macOS compatibility
+COMPLETED_TESTS=0
+declare -a RUNNING_PIDS=()
+declare -a RUNNING_IDS=()
+
+log() {
+    echo "[$(date '+%Y-%m-%d %H:%M:%S')] $*"
+}
+
+# Function to run a single test in background
+start_test() {
+    local test_id="$1"
+    local scratch_dir logfile
+
+    scratch_dir=$(mktemp -d "/tmp/s3backup_parallel_${test_id}.XXXXXX")
+    logfile="${LOG_PREFIX}_${test_id}.log"
+
+    # Run s3_backup_test.sh in background
+    (
+        "${SCRIPT_DIR}/s3_backup_test.sh" "${SOURCE_DIR}" "${BUILD_DIR}" "${scratch_dir}" &> "${logfile}" || true
+        rm -rf "${scratch_dir}" 2>/dev/null || true
+    ) &
+
+    local pid=$!
+    RUNNING_PIDS+=("$pid")
+    RUNNING_IDS+=("$test_id")
+
+    log "Started test ${test_id}/${TOTAL_TESTS} (PID: $pid)"
+}
+
+# Function to wait for one test to complete
+wait_for_completion() {
+    # Wait for any background job to complete
+    wait -n
+    local exit_code=$?
+
+    # Find which process completed
+    local completed_idx=-1
+    for i in "${!RUNNING_PIDS[@]}"; do
+        local pid="${RUNNING_PIDS[$i]}"
+        if ! kill -0 "$pid" 2>/dev/null; then
+            completed_idx=$i
+            break
+        fi
+    done
+
+    if [[ $completed_idx -ge 0 ]]; then
+        local test_id="${RUNNING_IDS[$completed_idx]}"
+        local logfile="${LOG_PREFIX}_${test_id}.log"
+
+        # Check if test passed by looking for "PASSED" in log
+        if grep -q "PASSED test_s3_backup_and_restore" "${logfile}"; then
+            COMPLETED_TESTS=$((COMPLETED_TESTS + 1))
+            log "✅ Test ${test_id} PASSED (${COMPLETED_TESTS}/${TOTAL_TESTS})"
+        else
+            log "❌ Test ${test_id} FAILED - stopping execution"
+            log "Check log: ${logfile}"
+
+            # Kill remaining background processes
+            for pid in "${RUNNING_PIDS[@]}"; do
+                kill "$pid" 2>/dev/null || true
+            done
+            exit 1
+        fi
+
+        # Remove completed process from arrays
+        unset RUNNING_PIDS[$completed_idx]
+        unset RUNNING_IDS[$completed_idx]
+        RUNNING_PIDS=("${RUNNING_PIDS[@]}")  # Reindex array
+        RUNNING_IDS=("${RUNNING_IDS[@]}")    # Reindex array
+
+        # Progress update every 50 tests
+        if [[ $((COMPLETED_TESTS % 50)) -eq 0 ]]; then
+            local elapsed=$(($(date +%s) - START_TIMESTAMP))
+            local rate=$(( COMPLETED_TESTS * 60 / elapsed ))
+            log "Progress: ${COMPLETED_TESTS}/${TOTAL_TESTS} completed (${rate} tests/minute)"
+        fi
+    fi
+}
+
+log "=== S3 Backup Test 1K Parallel Runner ==="
+log "Total tests to run: ${TOTAL_TESTS}"
+log "Maximum concurrency: ${MAX_CONCURRENT} (CRITICAL LIMIT - DO NOT INCREASE)"
+log "⚠️  WARNING: Each s3_backup_test.sh runs full FDB cluster (very resource intensive)"
+log "⚠️  WARNING: 3+ concurrent instances cause failures due to resource exhaustion"
+log "Start time: ${START_TIME}"
+
+# Clean up any existing log files
+rm -f "${LOG_PREFIX}"_*.log 2>/dev/null || true
+
+# Run tests with limited parallelism
+for test_id in $(seq 1 $TOTAL_TESTS); do
+    # Start new test if we have capacity
+    if [[ ${#RUNNING_PIDS[@]} -lt $MAX_CONCURRENT ]]; then
+        start_test "$test_id"
+
+        # Small delay to stagger startups
+        sleep 3
+    fi
+
+    # If we're at capacity, wait for one to complete
+    if [[ ${#RUNNING_PIDS[@]} -ge $MAX_CONCURRENT ]]; then
+        wait_for_completion
+    fi
+done
+
+# Wait for remaining tests to complete
+log "Waiting for final ${#RUNNING_PIDS[@]} tests to complete..."
+while [[ ${#RUNNING_PIDS[@]} -gt 0 ]]; do
+    wait_for_completion
+done
+
+# Final report
+end_time=$(date)
+total_elapsed=$(($(date +%s) - START_TIMESTAMP))
+
+log "🎉 ALL ${TOTAL_TESTS} TESTS COMPLETED SUCCESSFULLY! 🎉"
+log "Total time: $((total_elapsed / 3600))h $(((total_elapsed % 3600) / 60))m $((total_elapsed % 60))s"
+log "Success rate: 100%"
+log "Parallelism: ${MAX_CONCURRENT} concurrent tests"
+
+# Generate proof file
+proof_file="/tmp/s3backup_1k_parallel_proof.txt"
+{
+    echo "=== S3 Backup Test 1K Parallel Execution Proof ==="
+    echo "Start Time: ${START_TIME}"
+    echo "End Time: ${end_time}"
+    echo "Total Duration: $((total_elapsed / 3600))h $(((total_elapsed % 3600) / 60))m $((total_elapsed % 60))s"
+    echo "Tests Completed: ${COMPLETED_TESTS}/${TOTAL_TESTS}"
+    echo "Concurrency Level: ${MAX_CONCURRENT}"
+    echo "Success Rate: 100%"
+    echo "Log Files: $(ls "${LOG_PREFIX}"_*.log | wc -l)"
+    echo "Average Rate: $(( COMPLETED_TESTS * 3600 / total_elapsed )) tests/hour"
+    echo "Completion: $(date)"
+} > "${proof_file}"
+
+log "Proof written to: ${proof_file}"
+exit 0
diff --git a/fdbclient/tests/README_concurrent_s3client_test.md b/fdbclient/tests/README_concurrent_s3client_test.md
@@ -0,0 +1,153 @@
+# Concurrent S3Client Test Script
+
+## Overview
+
+The `concurrent_s3client_test.sh` script is designed to test the robustness of the s3client_test.sh script when multiple instances are run concurrently. It specifically tests:
+
+1. **Port conflict handling** - Ensures that when multiple MockS3Server instances try to bind to the same port, the retry logic works correctly
+2. **Process cleanup** - Verifies that all spawned processes are properly cleaned up, preventing orphaned MockS3Server processes
+3. **Concurrent execution** - Tests that multiple s3client_test.sh instances can run simultaneously without interfering with each other
+4. **Signal handling** - Ensures proper cleanup when tests are interrupted
+
+## Usage
+
+```bash
+./concurrent_s3client_test.sh <build_dir> [max_concurrent] [test_duration]
+```
+
+### Parameters
+
+- `build_dir` (required): Path to your FoundationDB build output directory (e.g., `~/build_output`)
+- `max_concurrent` (optional): Maximum number of concurrent test instances to run (default: 5)
+- `test_duration` (optional): Maximum time in seconds to wait for tests to complete (default: 60)
+
+### Examples
+
+```bash
+# Basic usage with default settings (5 concurrent tests, 60s timeout)
+./concurrent_s3client_test.sh ~/build_output
+
+# Run 10 concurrent tests with 2-minute timeout
+./concurrent_s3client_test.sh ~/build_output 10 120
+
+# Run 3 concurrent tests with 30-second timeout
+./concurrent_s3client_test.sh ~/build_output 3 30
+```
+
+## Test Scenarios
+
+The script runs three main test scenarios:
+
+### 1. Basic Concurrent Execution
+- Starts multiple s3client_test.sh instances with staggered delays
+- Tests normal concurrent operation
+- Verifies all instances complete successfully
+
+### 2. Port Occupation Simulation
+- Pre-occupies some ports in the range that MockS3Server uses (8080-8090)
+- Starts test instances that must find alternative ports
+- Verifies the port retry logic works correctly
+
+### 3. Signal Handling Test
+- Starts test instances and then sends SIGTERM signals
+- Verifies that cleanup happens properly when tests are interrupted
+- Checks for orphaned processes
+
+## What It Tests
+
+### Port Conflict Resolution
+The script verifies that the [`mocks3_fixture.sh`](mocks3_fixture.sh:75-141) port retry logic works correctly:
+- When port 8080 is occupied, MockS3Server should try 8081, 8082, etc.
+- Multiple instances should be able to find available ports
+- Port conflicts should not cause test failures
+
+### Process Cleanup
+The script monitors for:
+- All MockS3Server processes are terminated when tests complete
+- No orphaned processes remain after test completion
+- Proper cleanup happens even when tests are killed with signals
+
+### Concurrent Safety
+The script tests:
+- Multiple s3client_test.sh instances can run simultaneously
+- Tests don't interfere with each other's scratch directories
+- Log files are properly isolated per instance
+
+## Output and Reporting
+
+The script provides:
+
+### Real-time Logging
+- Timestamped log messages showing test progress
+- Port usage monitoring
+- Process lifecycle tracking
+
+### Final Report
+- Summary of all test results
+- Count of tests started, completed, failed, and killed
+- Number of port conflicts detected
+- Detection of any orphaned processes
+
+### Detailed Report File
+A detailed report is written to `/tmp/concurrent_s3client_test_report.txt` containing:
+- Individual test instance details
+- Port usage analysis
+- Log file locations and sizes
+- Complete test timeline
+
+## Exit Codes
+
+- `0`: All tests passed successfully with proper cleanup
+- `1`: Some tests completed successfully but with issues
+- `2`: Tests failed or cleanup problems detected
+
+## Requirements
+
+The script requires:
+- Bash 4.0 or later
+- Standard Unix utilities (`netstat` or `ss`, `pgrep`, `pkill`)
+- The s3client_test.sh script must be executable
+- A valid FoundationDB build directory with fdbserver binary
+
+## Port Range
+
+The script uses ports 8080-8090 by default (configurable via `BASE_PORT` and `MAX_PORT_RETRIES` constants).
+
+## Troubleshooting
+
+### "Port already in use" errors
+This is expected behavior when testing port conflict resolution. The script should handle these automatically.
+
+### Orphaned processes
+If the script detects orphaned MockS3Server processes, it will:
+1. Report them in the logs
+2. Attempt to kill them automatically
+3. Mark the test as failed
+
+### Permission issues
+Ensure the script has execute permissions:
+```bash
+chmod +x concurrent_s3client_test.sh
+```
+
+### Missing dependencies
+The script will fail if required binaries are missing. Ensure:
+- `fdbserver` exists in `<build_dir>/bin/fdbserver`
+- Standard Unix utilities are available
+- The system supports process monitoring commands
+
+## Integration with CI/CD
+
+This script can be integrated into continuous integration pipelines to:
+- Verify port handling robustness under load
+- Catch process cleanup regressions
+- Test concurrent execution scenarios
+- Validate signal handling behavior
+
+Example CI usage:
+```bash
+# Run quick concurrent test
+./concurrent_s3client_test.sh "$BUILD_DIR" 3 30
+
+# Run stress test
+./concurrent_s3client_test.sh "$BUILD_DIR" 10 180