Skip to content

Commit

Permalink
Merge pull request #192 from tgxn/develop
Browse files Browse the repository at this point in the history
Crawler Improvements
  • Loading branch information
tgxn authored Jan 4, 2025
2 parents 87d5167 + ec00e80 commit 270ce17
Show file tree
Hide file tree
Showing 63 changed files with 5,815 additions and 6,359 deletions.
56 changes: 25 additions & 31 deletions .github/workflows/aws-deploy-dev.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -9,35 +9,34 @@ on:
# only runs on the base branch, weird, github????
# schedule:
# - cron: "0 */6 * * *"

# Allows you to run this workflow manually from the Actions tab
workflow_dispatch:

concurrency:
concurrency:
group: develop
cancel-in-progress: false

jobs:
aws_deploy_dev:
runs-on: ubuntu-latest

environment:
environment:
name: develop
url: https://develop.lemmyverse.net

steps:

# https://github.com/actions/toolkit/issues/946#issuecomment-1590016041
- name: root suid tar
run: sudo chown root:root /bin/tar && sudo chmod u+s /bin/tar

- uses: actions/checkout@v3

- name: Use Node.js 18.15.0
- name: Use Node.js 22.12.0
uses: actions/setup-node@v3
with:
node-version: 18.15.0
node-version: 22.12.0

# download latest redis database
- name: get current hour for cache busting
id: cache-hour
Expand All @@ -49,7 +48,7 @@ jobs:
env:
cache-name: cache-redis
with:
path: ./.redis/
path: ./.redis/dump.rdb
key: cache-redis-${{ steps.cache-hour.outputs.hour }}

# download redis db dump from s3
Expand All @@ -64,6 +63,9 @@ jobs:
aws_secret_access_key: ${{ secrets.AWS_SECRET_ACCESS_KEY }}
aws_region: ap-southeast-2

# log the sha 256 hash of the redis dump
- name: Log Redis Dump Hash
run: sha256sum ./.redis/dump.rdb

# start redis & check if it is running
- name: Start Redis
Expand All @@ -74,33 +76,32 @@ jobs:
- working-directory: ./crawler
run: docker compose -f "docker-compose.github.yaml" logs redis


# install npm dependencies
- name: Cache NPM Modules
# Install + Cache Crawler Dependencies
- name: Cache Node Modules | Crawler
id: cache-crawler-npm
uses: actions/cache@v3
env:
cache-name: cache-crawler-npm
with:
path: ./crawler/node_modules/
key: cache-crawler-npm-${{ hashFiles('crawler/package-lock.json') }}
key: cache-crawler-yarn-${{ hashFiles('crawler/yarn.lock') }}

- name: Install Dependencies
- name: Install Node Modules | Crawler
if: steps.cache-crawler-npm.outputs.cache-hit != 'true'
run: npm ci
run: yarn --frozen-lockfile
working-directory: ./crawler

# Run Crawler Output Script
- name: Run Health Script
run: node index.js --health
run: yarn health
working-directory: ./crawler

- name: Run Output Script
run: node index.js --out
run: yarn output
working-directory: ./crawler


# install npm dependencies
- name: Cache NPM Modules
# Install + Cache Frontend Dependencies
- name: Cache Node Modules | Frontend
id: cache-frontend-npm
uses: actions/cache@v3
env:
Expand All @@ -109,7 +110,7 @@ jobs:
path: ./frontend/node_modules/
key: cache-frontend-npm-${{ hashFiles('frontend/package-lock.json') }}

- name: Install Dependencies
- name: Install Node Modules | Frontend
if: steps.cache-frontend-npm.outputs.cache-hit != 'true'
run: npm ci
working-directory: ./frontend
Expand All @@ -118,23 +119,16 @@ jobs:
run: npm run build
working-directory: ./frontend

# - name: archive frontend bundle
# uses: actions/upload-artifact@v3
# with:
# name: dist-frontend-bundle
# path: |
# ./frontend/dist/

- name: create-json
- name: Create CDK Config JSON
id: create-json
uses: jsdaniell/[email protected]
with:
dir: ./cdk
name: "config.json"
json: ${{ vars.CONFIG_JSON }}

# install npm dependencies
- name: Cache NPM Modules
# Install + Cache CDK Dependencies
- name: Cache Node Modules | CDK
id: cache-cdk-npm
uses: actions/cache@v3
env:
Expand All @@ -143,7 +137,7 @@ jobs:
path: ./cdk/node_modules/
key: cache-cdk-npm-${{ hashFiles('frontend/package-lock.json') }}

- name: Install CDK Dependencies
- name: Install Node Modules | CDK
if: steps.cache-cdk-npm.outputs.cache-hit != 'true'
run: npm ci
working-directory: ./cdk
Expand Down
54 changes: 24 additions & 30 deletions .github/workflows/aws-deploy-prod.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -12,31 +12,30 @@ on:
schedule:
- cron: "0 */6 * * *"

concurrency:
concurrency:
group: production
cancel-in-progress: false

jobs:
aws_deploy_prod:
runs-on: ubuntu-latest

environment:
environment:
name: production
url: https://lemmyverse.net

steps:

# https://github.com/actions/toolkit/issues/946#issuecomment-1590016041
- name: root suid tar
run: sudo chown root:root /bin/tar && sudo chmod u+s /bin/tar

- uses: actions/checkout@v3

- name: Use Node.js 18.15.0
- name: Use Node.js 22.12.0
uses: actions/setup-node@v3
with:
node-version: 18.15.0
node-version: 22.12.0

# download latest redis database
- name: get current hour for cache busting
id: cache-hour
Expand All @@ -48,7 +47,7 @@ jobs:
env:
cache-name: cache-redis
with:
path: ./.redis/
path: ./.redis/dump.rdb
key: cache-redis-${{ steps.cache-hour.outputs.hour }}

# download redis db dump from s3
Expand All @@ -63,6 +62,9 @@ jobs:
aws_secret_access_key: ${{ secrets.AWS_SECRET_ACCESS_KEY }}
aws_region: ap-southeast-2

# log the sha 256 hash of the redis dump
- name: Log Redis Dump Hash
run: sha256sum ./.redis/dump.rdb

# start redis & check if it is running
- name: Start Redis
Expand All @@ -73,33 +75,32 @@ jobs:
- working-directory: ./crawler
run: docker compose -f "docker-compose.github.yaml" logs redis


# install npm dependencies
- name: Cache NPM Modules
# Install + Cache Crawler Dependencies
- name: Cache Node Modules | Crawler
id: cache-crawler-npm
uses: actions/cache@v3
env:
cache-name: cache-crawler-npm
with:
path: ./crawler/node_modules/
key: cache-crawler-npm-${{ hashFiles('crawler/package-lock.json') }}
key: cache-crawler-yarn-${{ hashFiles('crawler/yarn.lock') }}

- name: Install Dependencies
- name: Install Node Modules | Crawler
if: steps.cache-crawler-npm.outputs.cache-hit != 'true'
run: npm ci
run: yarn --frozen-lockfile
working-directory: ./crawler

# Run Crawler Output Script
- name: Run Health Script
run: node index.js --health
run: yarn health
working-directory: ./crawler

- name: Run Output Script
run: node index.js --out
run: yarn output
working-directory: ./crawler


# install npm dependencies
- name: Cache NPM Modules
# Install + Cache Frontend Dependencies
- name: Cache Node Modules | Frontend
id: cache-frontend-npm
uses: actions/cache@v3
env:
Expand All @@ -108,7 +109,7 @@ jobs:
path: ./frontend/node_modules/
key: cache-frontend-npm-${{ hashFiles('frontend/package-lock.json') }}

- name: Install Dependencies
- name: Install Node Modules | Frontend
if: steps.cache-frontend-npm.outputs.cache-hit != 'true'
run: npm ci
working-directory: ./frontend
Expand All @@ -117,23 +118,16 @@ jobs:
run: npm run build
working-directory: ./frontend

# - name: archive frontend bundle
# uses: actions/upload-artifact@v3
# with:
# name: dist-frontend-bundle
# path: |
# ./frontend/dist/

- name: create-json
- name: Create CDK Config JSON
id: create-json
uses: jsdaniell/[email protected]
with:
dir: ./cdk
name: "config.json"
json: ${{ vars.CONFIG_JSON }}

# install npm dependencies
- name: Cache NPM Modules
# Install + Cache CDK Dependencies
- name: Cache Node Modules | CDK
id: cache-cdk-npm
uses: actions/cache@v3
env:
Expand All @@ -142,7 +136,7 @@ jobs:
path: ./cdk/node_modules/
key: cache-cdk-npm-${{ hashFiles('frontend/package-lock.json') }}

- name: Install CDK Dependencies
- name: Install Node Modules | CDK
if: steps.cache-cdk-npm.outputs.cache-hit != 'true'
run: npm ci
working-directory: ./cdk
Expand Down
Loading

0 comments on commit 270ce17

Please sign in to comment.