Skip to content

handle hourly files in the download script #120

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 3 commits into from
May 6, 2025
Merged
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
44 changes: 38 additions & 6 deletions scripts/download_db.sh
Original file line number Diff line number Diff line change
Expand Up @@ -30,26 +30,58 @@ cd "$DATA_DIR"

# get date as YYYY-MM-DD
get_date() {
local offset=${1:-0}
# macOS
if [[ "$OSTYPE" == "darwin"* ]]; then
date -v -"$1"d '+%Y-%m-%d'
date -u -v -"$offset"d '+%Y-%m-%d'
# linux
else
date -d "-$1 days" '+%Y-%m-%d'
date -u -d "-$offset days" '+%Y-%m-%d'
fi
}

# get most recent GMT hour as HHMM
get_hour() {
local offset=${1:-0}
# macOS
if [[ "$OSTYPE" == "darwin"* ]]; then
date -u -v -"$offset"H '+%H00'
# linux
else
date -u -d "-$offset hours" '+%H00'
fi
}


# Look for the most recent archive node DB dump from the last 3 hours
for i in $(seq 0 2); do
DATE=$(get_date)
HOUR=$(get_hour "$i")
FILE="${NETWORK}-archive-dump-${DATE}_${HOUR}.sql.tar.gz"
URL="${BASE_URL}/${FILE}"

echo "Attempting to download archive node DB dump from: $URL"

# abort download if the file is an XML error page
if curl -# -O "$URL" && ! grep -q "<Error>" "$FILE"; then
tar -xf "$FILE"
mv "${FILE%.tar.gz}" "$PG_DUMP"
rm "$FILE"
echo "Downloaded and extracted to $DATA_DIR/$PG_DUMP"
exit 0
fi
done

# look for most recent db dump up to 10 days old
for i in $(seq 0 9); do
# If not found, try the last 3 days at 00:00
for i in $(seq 0 2); do
DATE=$(get_date "$i")
FILE="${NETWORK}-archive-dump-${DATE}_0000.sql.tar.gz"
URL="${BASE_URL}/${FILE}"

echo "Attempting to download archive node DB dump from: $URL"

# abort download if the file is an XML error page
if curl -sf -O "$URL" && ! grep -q "<Error>" "$FILE"; then
if curl -# -O "$URL" && ! grep -q "<Error>" "$FILE"; then
tar -xf "$FILE"
mv "${FILE%.tar.gz}" "$PG_DUMP"
rm "$FILE"
Expand All @@ -58,5 +90,5 @@ for i in $(seq 0 9); do
fi
done

echo "No valid dump found for network=$NETWORK in the last 10 days"
echo "No valid dump found for network=$NETWORK in the last 3 days"
exit 1