Skip to content

Daily Job Data Update #205

Daily Job Data Update

Daily Job Data Update #205

Workflow file for this run

name: Daily Job Data Update
on:
push:
branches: [master]
pull_request:
branches: [master]
schedule:
- cron: "0 0 * * *"
jobs:
update-job-data:
runs-on: ubuntu-latest
steps:
# Get our code from GitHub
- name: Checkout code
uses: actions/checkout@v4
# Set up Python
- name: Set up Python
uses: actions/setup-python@v5
with:
python-version: "3.12"
cache: "pip"
# Check Python version
- name: Check Python version
run: python --version
# Install required packages
- name: Install packages
run: |
python -m pip install --upgrade pip
pip install -r requirements.txt
# Install Playwright browsers
- name: Install Playwright
run: playwright install --with-deps chromium firefox webkit
# Check if we have the needed secret info
- name: Check secret info
run: |
if [ -z "${{ secrets.GCP_JSON }}" ]; then echo "Missing GCP_JSON"; exit 1; fi
if [ -z "${{ secrets.GOOGLE_SHEETS_ID }}" ]; then echo "Missing GOOGLE_SHEETS_ID"; exit 1; fi
# Run our web scraping script
- name: Run scraping
env:
PYTHONUNBUFFERED: 1
run: |
chmod +x ./pipeline/1_scrape.sh
./pipeline/1_scrape.sh
# Upload data to Google Sheets
- name: Upload to Google Sheets
env:
GCP_JSON: ${{ secrets.GCP_JSON }}
GOOGLE_SHEETS_ID: ${{ secrets.GOOGLE_SHEETS_ID }}
PYTHONUNBUFFERED: 1
run: PYTHONPATH="${PYTHONPATH:-$(pwd)}" python -m pipeline.2_upload_to_sheets
# Format the Google Sheet
- name: Format Google Sheet
env:
GCP_JSON: ${{ secrets.GCP_JSON }}
GOOGLE_SHEETS_ID: ${{ secrets.GOOGLE_SHEETS_ID }}
PYTHONUNBUFFERED: 1
run: PYTHONPATH="${PYTHONPATH:-$(pwd)}" python -m pipeline.3_sheet_updater
# Clean up temporary files
- name: Clean up
if: always()
run: rm -rf output
# Print success message
- name: Success message
run: echo "Job data update complete!"