forked from sharmavaibhav31/arachnode
-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathdocker-compose.yml
More file actions
210 lines (196 loc) Β· 7.64 KB
/
Copy pathdocker-compose.yml
File metadata and controls
210 lines (196 loc) Β· 7.64 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
version: "3.9"
# βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
# Job Discovery System β Full Stack
#
# Services:
# redis β shared message broker
# postgres β shared database
# crawler β Scrapy job crawler (one-shot)
# aggregator β FastAPI stream consumer + jobs REST API (:8000)
# scraper β Platform scraper (Naukri/LinkedIn/Internshala) (:8001)
# contact β Contact discovery pipeline (:8002)
# email-gen β Cold email generator (:8003)
# gateway β API gateway + dashboard (:8080)
# scheduler β APScheduler pipeline automation (background)
# βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
x-common-env: &common-env
REDIS_HOST: redis
REDIS_PORT: "6379"
DATABASE_URL: postgresql://${POSTGRES_USER:-jobuser}:${POSTGRES_PASSWORD:-jobpass}@postgres:5432/${POSTGRES_DB:-jobsdb}
services:
# ββ Redis ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
redis:
image: redis:7-alpine
ports:
- "6379:6379"
command: redis-server --maxmemory 512mb --maxmemory-policy allkeys-lru
healthcheck:
test: ["CMD", "redis-cli", "ping"]
interval: 5s
retries: 5
networks: [jobnet]
# ββ PostgreSQL βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
postgres:
image: postgres:16-alpine
environment:
POSTGRES_USER: ${POSTGRES_USER:-jobuser}
POSTGRES_PASSWORD: ${POSTGRES_PASSWORD:-jobpass}
POSTGRES_DB: ${POSTGRES_DB:-jobsdb}
ports:
- "5433:5432"
volumes:
- pgdata:/var/lib/postgresql/data
healthcheck:
test: ["CMD-SHELL", "pg_isready -U ${POSTGRES_USER:-jobuser} -d ${POSTGRES_DB:-jobsdb}"]
interval: 5s
retries: 10
networks: [jobnet]
# ββ Crawler (Scrapy, one-shot) βββββββββββββββββββββββββββββββββββββββββββββ
crawler:
build:
context: ./crawler-service
dockerfile: Dockerfile
environment:
<<: *common-env
JOBSEEKER_ROLE: ${JOBSEEKER_ROLE:-Backend Engineer}
JOBSEEKER_STACK: ${JOBSEEKER_STACK:-Python,FastAPI,PostgreSQL,Redis,Go}
depends_on:
redis:
condition: service_healthy
restart: "no"
command: ["scrapy", "crawl", "remotive"]
networks: [jobnet]
# ββ Aggregator βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
aggregator:
build:
context: ./aggregator-service
dockerfile: Dockerfile
environment:
<<: *common-env
depends_on:
redis:
condition: service_healthy
postgres:
condition: service_healthy
networks: [jobnet]
restart: unless-stopped
healthcheck:
test: ["CMD-SHELL", "curl -sf http://localhost:8000/health || exit 1"]
interval: 10s
retries: 5
start_period: 15s
# ββ Platform Scraper βββββββββββββββββββββββββββββββββββββββββββββββββββββββ
scraper:
build:
context: ./scraper-service
dockerfile: Dockerfile
environment:
<<: *common-env
depends_on:
redis:
condition: service_healthy
networks: [jobnet]
restart: unless-stopped
healthcheck:
test: ["CMD-SHELL", "curl -sf http://localhost:8000/health || exit 1"]
interval: 10s
retries: 5
start_period: 20s
# ββ Contact Discovery ββββββββββββββββββββββββββββββββββββββββββββββββββββββ
contact:
build:
context: ./contact-discovery-service
dockerfile: Dockerfile
environment:
<<: *common-env
GITHUB_TOKEN: ${GITHUB_TOKEN:-}
depends_on:
postgres:
condition: service_healthy
networks: [jobnet]
restart: unless-stopped
healthcheck:
test: ["CMD-SHELL", "curl -sf http://localhost:8000/health || exit 1"]
interval: 10s
retries: 5
start_period: 25s
# ββ Email Generator ββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
email-gen:
build:
context: ./email-generator-service
dockerfile: Dockerfile
environment:
<<: *common-env
OLLAMA_BASE_URL: ${OLLAMA_BASE_URL:-http://host.docker.internal:11434}
GMAIL_ADDRESS: ${GMAIL_ADDRESS:-}
GMAIL_APP_PASSWORD: ${GMAIL_APP_PASSWORD:-}
YOUR_NAME: ${YOUR_NAME:-Applicant}
YOUR_GITHUB_URL: ${YOUR_GITHUB_URL:-}
depends_on:
postgres:
condition: service_healthy
networks: [jobnet]
restart: unless-stopped
healthcheck:
test: ["CMD-SHELL", "curl -sf http://localhost:8000/health || exit 1"]
interval: 10s
retries: 5
start_period: 15s
# ββ API Gateway + Dashboard ββββββββββββββββββββββββββββββββββββββββββββββββ
gateway:
build:
context: ./gateway
dockerfile: Dockerfile
ports:
- "${GATEWAY_PORT:-8080}:8080"
environment:
AGGREGATOR_URL: http://aggregator:8000
SCRAPER_URL: http://scraper:8000
CONTACT_URL: http://contact:8000
EMAIL_GEN_URL: http://email-gen:8000
SUMMARY_PATH: /data/run_summary.json
volumes:
- scheduler_data:/data
depends_on:
aggregator:
condition: service_healthy
scraper:
condition: service_healthy
contact:
condition: service_healthy
email-gen:
condition: service_healthy
networks: [jobnet]
restart: unless-stopped
healthcheck:
test: ["CMD-SHELL", "curl -sf http://localhost:8080/api/health || exit 1"]
interval: 15s
retries: 5
start_period: 30s
# ββ Scheduler (APScheduler background process) βββββββββββββββββββββββββββββββ
scheduler:
build:
context: ./scheduler
dockerfile: Dockerfile
environment:
GATEWAY_URL: http://gateway:8080
JOBSEEKER_ROLE: ${JOBSEEKER_ROLE:-Backend Engineer}
JOBSEEKER_STACK: ${JOBSEEKER_STACK:-Python,FastAPI,PostgreSQL,Redis,Go}
CRAWL_INTERVAL_HOURS: ${CRAWL_INTERVAL_HOURS:-8}
DISCOVER_INTERVAL_HOURS: ${DISCOVER_INTERVAL_HOURS:-24}
SCRAPER_WAIT_SECS: ${SCRAPER_WAIT_SECS:-60}
DISCOVER_DELAY_SECS: ${DISCOVER_DELAY_SECS:-30}
SUMMARY_PATH: /data/run_summary.json
volumes:
- scheduler_data:/data # shared with gateway for GET /api/summary
depends_on:
gateway:
condition: service_healthy
networks: [jobnet]
restart: unless-stopped
volumes:
pgdata:
scheduler_data: # shared between scheduler (writer) and gateway (reader)
networks:
jobnet:
driver: bridge