From 9d7661eeda9bf3bc0c7cb94cd2ea427c44791147 Mon Sep 17 00:00:00 2001 From: Sean Sica <23294618+seansica@users.noreply.github.com> Date: Fri, 26 Sep 2025 12:38:21 -0400 Subject: [PATCH 01/12] fix(webapp): move prisma db push from runner to db-init docker stage --- apps/webapp/db-init.sh | 3 +++ apps/webapp/init.sh | 6 +----- 2 files changed, 4 insertions(+), 5 deletions(-) diff --git a/apps/webapp/db-init.sh b/apps/webapp/db-init.sh index 0f9fdd8d7..db54edba0 100644 --- a/apps/webapp/db-init.sh +++ b/apps/webapp/db-init.sh @@ -5,6 +5,9 @@ set -a source .env set +a +echo "Running Prisma db push..." +npx prisma db push + echo "Running database migrations..." npx prisma migrate deploy diff --git a/apps/webapp/init.sh b/apps/webapp/init.sh index 5777691d1..f092c6613 100755 --- a/apps/webapp/init.sh +++ b/apps/webapp/init.sh @@ -4,10 +4,6 @@ set -a source .env set +a -# Wait for database to be ready and run database operations at runtime -echo "Waiting for database and running Prisma db push..." -./node_modules/.bin/prisma db push - -# Start the Next.js application +# Start the Next.js application (database operations handled by db-init container) echo "Starting Next.js application..." node server.js \ No newline at end of file From 36a4cde72a2dca3c3c70aab513679953695f8711 Mon Sep 17 00:00:00 2001 From: Sean Sica <23294618+seansica@users.noreply.github.com> Date: Fri, 26 Sep 2025 12:49:11 -0400 Subject: [PATCH 02/12] fix(webapp): use prisma from node_modules instead of npx to avoid version mismatch --- apps/webapp/db-init.sh | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/apps/webapp/db-init.sh b/apps/webapp/db-init.sh index db54edba0..56f358c5b 100644 --- a/apps/webapp/db-init.sh +++ b/apps/webapp/db-init.sh @@ -6,12 +6,12 @@ source .env set +a echo "Running Prisma db push..." -npx prisma db push +./node_modules/.bin/prisma db push echo "Running database migrations..." -npx prisma migrate deploy +./node_modules/.bin/prisma migrate deploy echo "Running database seed..." -npx prisma db seed +./node_modules/.bin/prisma db seed echo "Database initialization completed successfully!" \ No newline at end of file From 46110e5b84cc24c03fb26b7096e7774ebb24e588 Mon Sep 17 00:00:00 2001 From: Sean Sica <23294618+seansica@users.noreply.github.com> Date: Fri, 26 Sep 2025 15:11:53 -0400 Subject: [PATCH 03/12] fix(webapp): restore support for the webapp docker dev workflow --- Makefile | 27 +++++++++++++++++++++++---- apps/webapp/package.json | 3 ++- docker/compose.webapp.dev.yaml | 8 ++++++-- 3 files changed, 31 insertions(+), 7 deletions(-) diff --git a/Makefile b/Makefile index c4de7a69e..e1169f376 100644 --- a/Makefile +++ b/Makefile @@ -1,6 +1,9 @@ # Default to nocuda BUILD_TYPE ?= nocuda +# optional build argument, useful for zero trust environments +CUSTOM_CA_BUNDLE ?= .nocustomca + help: ## Show available commands @echo "\n\033[1;35mThe pattern for commands is generally 'make [app]-[environment]-[action]''.\nFor example, 'make webapp-demo-build' will _build_ the _webapp for the demo environment.\033[0m" @awk 'BEGIN {FS = ":.*## "; printf "\n"} /^[a-zA-Z_-]+:.*## / { printf "\033[36m%-30s\033[0m %s\n", $$1, $$2}' $(MAKEFILE_LIST) @@ -54,8 +57,7 @@ webapp-demo-check: ## Webapp: Public Demo Environment - Check Config @echo "Printing the webapp configuration - this is useful to see if your environment variables are set correctly." ENV_FILE=../.env.demo docker compose -f docker/compose.yaml config webapp -CUSTOM_CA_BUNDLE ?= .nocustomca -webapp-localhost-build: ## Webapp: Localhost Environment - Build (Production Build) +webapp-localhost-prod-build: ## Webapp: Localhost Environment - Build (Production Build) @echo "Building the webapp for connecting to the localhost database..." @if ! which docker > /dev/null 2>&1; then \ echo "Error: Docker is not installed. Please install Docker first."; \ @@ -67,7 +69,7 @@ webapp-localhost-build: ## Webapp: Localhost Environment - Build (Production Bui CUSTOM_CA_BUNDLE=$(CUSTOM_CA_BUNDLE) ENV_FILE=../.env.localhost \ docker compose -f docker/compose.yaml build webapp db-init postgres -webapp-localhost-run: ## Webapp: Localhost Environment - Run (Production Build) +webapp-localhost-prod-run: ## Webapp: Localhost Environment - Run (Production Run) @echo "Bringing up the webapp and connecting to the localhost database..." @if ! which docker > /dev/null 2>&1; then \ echo "Error: Docker is not installed. Please install Docker first."; \ @@ -90,7 +92,24 @@ webapp-localhost-install: ## Webapp: Localhost Environment - Install Dependencie cd apps/webapp && \ npm install -webapp-localhost-dev: ## Webapp: Localhost Environment - Run (Development Build) +webapp-localhost-dev-build: ## Webapp: Localhost Environment - Run (Development Build) + @echo "Bringing up the webapp for development and connecting to the localhost database..." + @if ! which docker > /dev/null 2>&1; then \ + echo "Error: Docker is not installed. Please install Docker first."; \ + exit 1; \ + fi + @if [ "$(CUSTOM_CA_BUNDLE)" != ".nocustomca" ]; then \ + echo "Using custom CA bundle: $(CUSTOM_CA_BUNDLE)"; \ + fi + CUSTOM_CA_BUNDLE=$(CUSTOM_CA_BUNDLE) \ + ENV_FILE=../.env.localhost docker compose \ + -f docker/compose.yaml \ + -f docker/compose.webapp.dev.yaml \ + --env-file .env.localhost \ + --env-file .env \ + build webapp db-init postgres + +webapp-localhost-dev-run: ## Webapp: Localhost Environment - Run (Development Run) @echo "Bringing up the webapp for development and connecting to the localhost database..." @if ! which docker > /dev/null 2>&1; then \ echo "Error: Docker is not installed. Please install Docker first."; \ diff --git a/apps/webapp/package.json b/apps/webapp/package.json index a818a29e2..a5bd653fd 100644 --- a/apps/webapp/package.json +++ b/apps/webapp/package.json @@ -3,7 +3,8 @@ "version": "1.0.0", "private": true, "scripts": { - "dev": "prisma generate && next dev", + "dev": "next dev", + "dev:generate": "prisma generate && next dev", "dev:localhost": "env-cmd -f .env.localhost --use-shell \"prisma generate && next dev\"", "dev:remote": "env-cmd -f .env.remote --use-shell \"prisma generate && next dev\"", "dev:demo": "env-cmd -f .env.demo --use-shell \"prisma generate && next dev\"", diff --git a/docker/compose.webapp.dev.yaml b/docker/compose.webapp.dev.yaml index f554628c7..55725c3fd 100644 --- a/docker/compose.webapp.dev.yaml +++ b/docker/compose.webapp.dev.yaml @@ -1,10 +1,14 @@ services: webapp: + build: + context: .. + dockerfile: apps/webapp/Dockerfile + # the builder stage includes the non production optimized build, e.g., node_modules + target: builder environment: - NODE_ENV=development - WATCHPACK_POLLING=true volumes: - ../apps/webapp:/app/apps/webapp - ../apps/webapp/node_modules:/app/apps/webapp/node_modules - command: > - sh -c "npx prisma migrate dev && npm run dev" + command: npm run dev # all prisma commands are handled in the db-init stage From 7035e2ec5a0bd6a33cbc8ff2211b3fb498cbf105 Mon Sep 17 00:00:00 2001 From: Sean Sica <23294618+seansica@users.noreply.github.com> Date: Wed, 3 Dec 2025 11:23:58 -0500 Subject: [PATCH 04/12] build: clean up Makefile --- Makefile | 129 +++++++++++++++++++++++++++++++++++++------------------ 1 file changed, 87 insertions(+), 42 deletions(-) diff --git a/Makefile b/Makefile index e1169f376..4d97adb8a 100644 --- a/Makefile +++ b/Makefile @@ -4,6 +4,17 @@ BUILD_TYPE ?= nocuda # optional build argument, useful for zero trust environments CUSTOM_CA_BUNDLE ?= .nocustomca +# Environment files to load (can be overridden, e.g., make webapp-prod-build ENV_FILES=".env.custom .env") +# Multiple files can be specified, later files take precedence +ENV_FILES ?= .env.localhost .env + +# Helper function to generate --env-file flags for docker compose +# Usage: $(call env_file_flags,$(ENV_FILES)) +# Note: Only includes files that exist to avoid errors +define env_file_flags +$(foreach file,$(1),--env-file $(file) ) +endef + help: ## Show available commands @echo "\n\033[1;35mThe pattern for commands is generally 'make [app]-[environment]-[action]''.\nFor example, 'make webapp-demo-build' will _build_ the _webapp for the demo environment.\033[0m" @awk 'BEGIN {FS = ":.*## "; printf "\n"} /^[a-zA-Z_-]+:.*## / { printf "\033[36m%-30s\033[0m %s\n", $$1, $$2}' $(MAKEFILE_LIST) @@ -37,27 +48,36 @@ init-env: ## Initialize the environment fi @echo "Environment initialized successfully." -webapp-demo-build: ## Webapp: Public Demo Environment - Build +webapp-demo-build: ## Webapp: Public Demo Environment - Build. Usage: make webapp-demo-build [ENV_FILES=".env.demo .env"] @echo "Building the webapp for connecting to the public demo database and servers..." @if ! which docker > /dev/null 2>&1; then \ echo "Error: Docker is not installed. Please install Docker first."; \ exit 1; \ fi - ENV_FILE=../.env.demo docker compose -f docker/compose.yaml build webapp + @echo "Using environment files: $(or $(ENV_FILES_DEMO),.env.demo .env)" + ENV_FILE=../.env.demo docker compose -f docker/compose.yaml \ + $(call env_file_flags,$(or $(ENV_FILES_DEMO),.env.demo .env)) \ + build webapp -webapp-demo-run: ## Webapp: Public Demo Environment - Run +webapp-demo-run: ## Webapp: Public Demo Environment - Run. Usage: make webapp-demo-run [ENV_FILES=".env.demo .env"] @echo "Bringing up the webapp and connecting to the demo database..." @if ! which docker > /dev/null 2>&1; then \ echo "Error: Docker is not installed. Please install Docker first."; \ exit 1; \ fi - ENV_FILE=../.env.demo docker compose -f docker/compose.yaml --env-file .env.demo --env-file .env up webapp + @echo "Using environment files: $(or $(ENV_FILES_DEMO),.env.demo .env)" + docker compose -f docker/compose.yaml \ + $(call env_file_flags,$(or $(ENV_FILES_DEMO),.env.demo .env)) \ + up webapp -webapp-demo-check: ## Webapp: Public Demo Environment - Check Config +webapp-demo-check: ## Webapp: Public Demo Environment - Check Config. Usage: make webapp-demo-check [ENV_FILES=".env.demo .env"] @echo "Printing the webapp configuration - this is useful to see if your environment variables are set correctly." - ENV_FILE=../.env.demo docker compose -f docker/compose.yaml config webapp + @echo "Using environment files: $(or $(ENV_FILES_DEMO),.env.demo .env)" + docker compose -f docker/compose.yaml \ + $(call env_file_flags,$(or $(ENV_FILES_DEMO),.env.demo .env)) \ + config webapp -webapp-localhost-prod-build: ## Webapp: Localhost Environment - Build (Production Build) +webapp-prod-build: ## Webapp: Localhost Environment - Build (Production Build). Usage: make webapp-prod-build [ENV_FILES=".env.localhost .env"] @echo "Building the webapp for connecting to the localhost database..." @if ! which docker > /dev/null 2>&1; then \ echo "Error: Docker is not installed. Please install Docker first."; \ @@ -66,23 +86,28 @@ webapp-localhost-prod-build: ## Webapp: Localhost Environment - Build (Productio @if [ "$(CUSTOM_CA_BUNDLE)" != ".nocustomca" ]; then \ echo "Using custom CA bundle: $(CUSTOM_CA_BUNDLE)"; \ fi - CUSTOM_CA_BUNDLE=$(CUSTOM_CA_BUNDLE) ENV_FILE=../.env.localhost \ - docker compose -f docker/compose.yaml build webapp db-init postgres + @echo "Using environment files: $(ENV_FILES)" + CUSTOM_CA_BUNDLE=$(CUSTOM_CA_BUNDLE) \ + docker compose -f docker/compose.yaml \ + $(call env_file_flags,$(ENV_FILES)) \ + build --no-cache webapp db-init postgres -webapp-localhost-prod-run: ## Webapp: Localhost Environment - Run (Production Run) +webapp-prod-run: ## Webapp: Localhost Environment - Run (Production Run) @echo "Bringing up the webapp and connecting to the localhost database..." @if ! which docker > /dev/null 2>&1; then \ echo "Error: Docker is not installed. Please install Docker first."; \ exit 1; \ fi - docker compose -f docker/compose.yaml --env-file .env.localhost --env-file .env up webapp db-init postgres + docker compose -f docker/compose.yaml \ + $(call env_file_flags,$(ENV_FILES)) \ + up webapp db-init postgres install-nodejs: # Install Node.js for Webapp curl -o- https://raw.githubusercontent.com/nvm-sh/nvm/v0.40.1/install.sh | bash # Need to source NVM in the same shell . ${HOME}/.nvm/nvm.sh && nvm install 22 -webapp-localhost-install: ## Webapp: Localhost Environment - Install Dependencies (Development Build) +webapp-install: ## Webapp: Localhost Environment - Install Dependencies (Development Build) @echo "Installing the webapp dependencies for development in the localhost environment..." # check if npm exists if ! which npm > /dev/null 2>&1; then \ @@ -92,7 +117,7 @@ webapp-localhost-install: ## Webapp: Localhost Environment - Install Dependencie cd apps/webapp && \ npm install -webapp-localhost-dev-build: ## Webapp: Localhost Environment - Run (Development Build) +webapp-dev-build: ## Webapp: Localhost Environment - Run (Development Build). Usage: make webapp-dev-build [ENV_FILES=".env.localhost .env"] @echo "Bringing up the webapp for development and connecting to the localhost database..." @if ! which docker > /dev/null 2>&1; then \ echo "Error: Docker is not installed. Please install Docker first."; \ @@ -101,15 +126,15 @@ webapp-localhost-dev-build: ## Webapp: Localhost Environment - Run (Development @if [ "$(CUSTOM_CA_BUNDLE)" != ".nocustomca" ]; then \ echo "Using custom CA bundle: $(CUSTOM_CA_BUNDLE)"; \ fi + @echo "Using environment files: $(ENV_FILES)" CUSTOM_CA_BUNDLE=$(CUSTOM_CA_BUNDLE) \ - ENV_FILE=../.env.localhost docker compose \ + docker compose \ -f docker/compose.yaml \ -f docker/compose.webapp.dev.yaml \ - --env-file .env.localhost \ - --env-file .env \ + $(call env_file_flags,$(ENV_FILES)) \ build webapp db-init postgres -webapp-localhost-dev-run: ## Webapp: Localhost Environment - Run (Development Run) +webapp-dev-run: ## Webapp: Localhost Environment - Run (Development Run) @echo "Bringing up the webapp for development and connecting to the localhost database..." @if ! which docker > /dev/null 2>&1; then \ echo "Error: Docker is not installed. Please install Docker first."; \ @@ -122,7 +147,7 @@ webapp-localhost-dev-run: ## Webapp: Localhost Environment - Run (Development Ru --env-file .env \ up webapp db-init postgres -webapp-localhost-test: ## Webapp: Localhost Environment - Run (Playwright) +webapp-test: ## Webapp: Localhost Environment - Run (Playwright) @echo "Bringing up the webapp for development and connecting to the localhost database..." @if ! which docker > /dev/null 2>&1; then \ echo "Error: Docker is not installed. Please install Docker first."; \ @@ -135,14 +160,30 @@ webapp-localhost-test: ## Webapp: Localhost Environment - Run (Playwright) --env-file .env \ up webapp db-init postgres -inference-localhost-install: ## Inference: Localhost Environment - Install Dependencies (Development Build) +db-run: ## Database: Localhost Environment - Run + @echo "Bringing up the database..." + @if ! which docker > /dev/null 2>&1; then \ + echo "Error: Docker is not installed. Please install Docker first."; \ + exit 1; \ + fi + docker compose -f docker/compose.yaml --env-file .env.localhost --env-file .env up db-init postgres -d + +db-down: ## Database: Localhost Environment - Down + @echo "Bringing down the database..." + @if ! which docker > /dev/null 2>&1; then \ + echo "Error: Docker is not installed. Please install Docker first."; \ + exit 1; \ + fi + docker compose -f docker/compose.yaml --env-file .env.localhost --env-file .env down postgres + +inference-install: ## Inference: Localhost Environment - Install Dependencies (Development Build) @echo "Installing the inference dependencies for development in the localhost environment..." cd apps/inference && \ poetry remove neuronpedia-inference-client || true && \ poetry add ../../packages/python/neuronpedia-inference-client && \ poetry lock && poetry install -inference-localhost-build: ## Inference: Localhost Environment - Build +inference-build: ## Inference: Localhost Environment - Build @echo "Building the inference server for the localhost environment..." CUSTOM_CA_BUNDLE=$(CUSTOM_CA_BUNDLE) \ ENV_FILE=../.env.localhost \ @@ -152,10 +193,10 @@ inference-localhost-build: ## Inference: Localhost Environment - Build $(if $(USE_LOCAL_HF_CACHE),-f docker/compose.hf-cache.yaml,) \ build inference -inference-localhost-build-gpu: ## Inference: Localhost Environment - Build (CUDA). Usage: make inference-localhost-build-gpu [USE_LOCAL_HF_CACHE=1] - $(MAKE) inference-localhost-build BUILD_TYPE=cuda CUSTOM_CA_BUNDLE=$(CUSTOM_CA_BUNDLE) +inference-build-gpu: ## Inference: Localhost Environment - Build (CUDA). Usage: make inference-build-gpu [USE_LOCAL_HF_CACHE=1] + $(MAKE) inference-build BUILD_TYPE=cuda CUSTOM_CA_BUNDLE=$(CUSTOM_CA_BUNDLE) -inference-localhost-dev: ## Inference: Localhost Environment - Run (Development Build). Usage: make inference-localhost-dev [MODEL_SOURCESET=gpt2-small.res-jb] [AUTORELOAD=1] +inference-dev: ## Inference: Localhost Environment - Run (Development Build). Usage: make inference-dev [MODEL_SOURCESET=gpt2-small.res-jb] [AUTORELOAD=1] @echo "Bringing up the inference server for development in the localhost environment..." @if [ "$(MODEL_SOURCESET)" != "" ]; then \ if [ ! -f ".env.inference.$(MODEL_SOURCESET)" ]; then \ @@ -176,13 +217,13 @@ inference-localhost-dev: ## Inference: Localhost Environment - Run (Development --env-file .env \ up inference; \ else \ - echo "Error: MODEL_SOURCESET not specified. Please specify a model+source configuration, e.g. to load .env.inference.gpt2-small.res-jb, run: make inference-localhost-dev MODEL_SOURCESET=gpt2-small.res-jb"; \ + echo "Error: MODEL_SOURCESET not specified. Please specify a model+source configuration, e.g. to load .env.inference.gpt2-small.res-jb, run: make inference-dev MODEL_SOURCESET=gpt2-small.res-jb"; \ echo "Please run 'make inference-list-configs' to see available configurations."; \ exit 1; \ fi -inference-localhost-dev-gpu: ## Inference: Localhost Environment - Run (Development Build with CUDA). Usage: make inference-localhost-dev-gpu [MODEL_SOURCESET=gpt2-small.res-jb] [AUTORELOAD=1] [USE_LOCAL_HF_CACHE=1] - $(MAKE) inference-localhost-dev ENABLE_GPU=1 MODEL_SOURCESET=$(MODEL_SOURCESET) AUTORELOAD=$(AUTORELOAD) +inference-dev-gpu: ## Inference: Localhost Environment - Run (Development Build with CUDA). Usage: make inference-dev-gpu [MODEL_SOURCESET=gpt2-small.res-jb] [AUTORELOAD=1] [USE_LOCAL_HF_CACHE=1] + $(MAKE) inference-dev ENABLE_GPU=1 MODEL_SOURCESET=$(MODEL_SOURCESET) AUTORELOAD=$(AUTORELOAD) inference-list-configs: ## Inference: List Configurations (possible values for MODEL_SOURCESET) @echo "\nAvailable Inference Configurations (.env.inference.*)\n================================================\n" @@ -193,20 +234,24 @@ inference-list-configs: ## Inference: List Configurations (possible values for M sae_sets=$$(grep "^SAE_SETS=" $$config | cut -d'=' -f2); \ echo " Model: \033[33m$$model_id\033[0m"; \ echo " Source/SAE Sets: \033[32m$$sae_sets\033[0m"; \ - echo " \033[1;35mmake inference-localhost-dev MODEL_SOURCESET=$$name\033[0m"; \ - echo " \033[1;35mmake inference-localhost-dev-gpu MODEL_SOURCESET=$$name\033[0m"; \ + echo " \033[1;35mmake inference-dev MODEL_SOURCESET=$$name\033[0m"; \ + echo " \033[1;35mmake inference-dev-gpu MODEL_SOURCESET=$$name\033[0m"; \ echo ""; \ done -autointerp-localhost-install: ## Autointerp: Localhost Environment - Install Dependencies (Development Build) +autointerp-install: ## Autointerp: Localhost Environment - Install Dependencies (Development Build) @echo "Installing the autointerp dependencies for development in the localhost environment..." cd apps/autointerp && \ poetry remove neuronpedia-autointerp-client || true && \ poetry add ../../packages/python/neuronpedia-autointerp-client && \ poetry lock && poetry install -autointerp-localhost-build: ## Autointerp: Localhost Environment - Build +autointerp-build: ## Autointerp: Localhost Environment - Build @echo "Building the autointerp server for the localhost environment..." + @if [ "$(CUSTOM_CA_BUNDLE)" != ".nocustomca" ]; then \ + echo "Using custom CA bundle: $(CUSTOM_CA_BUNDLE)"; \ + fi + CUSTOM_CA_BUNDLE=$(CUSTOM_CA_BUNDLE) \ ENV_FILE=../.env.localhost \ BUILD_TYPE=$(BUILD_TYPE) \ docker compose \ @@ -214,10 +259,10 @@ autointerp-localhost-build: ## Autointerp: Localhost Environment - Build $(if $(USE_LOCAL_HF_CACHE),-f docker/compose.hf-cache.yaml,) \ build autointerp -autointerp-localhost-build-gpu: ## Autointerp: Localhost Environment - Build (CUDA). Usage: make autointerp-localhost-build-gpu [USE_LOCAL_HF_CACHE=1] - $(MAKE) autointerp-localhost-build BUILD_TYPE=cuda +autointerp-build-gpu: ## Autointerp: Localhost Environment - Build (CUDA). Usage: make autointerp-build-gpu [USE_LOCAL_HF_CACHE=1] + $(MAKE) autointerp-build BUILD_TYPE=cuda CUSTOM_CA_BUNDLE=$(CUSTOM_CA_BUNDLE) -autointerp-localhost-dev: ## Autointerp: Localhost Environment - Run (Development Build). Usage: make autointerp-localhost-dev [AUTORELOAD=1] +autointerp-dev: ## Autointerp: Localhost Environment - Run (Development Build). Usage: make autointerp-dev [AUTORELOAD=1] @echo "Bringing up the autointerp server for development in the localhost environment..." RELOAD=$$([ "$(AUTORELOAD)" = "1" ] && echo "1" || echo "0") \ ENV_FILE=../.env.localhost \ @@ -230,8 +275,8 @@ autointerp-localhost-dev: ## Autointerp: Localhost Environment - Run (Developmen --env-file .env \ up autointerp -autointerp-localhost-dev-gpu: ## Autointerp: Localhost Environment - Run (Development Build with CUDA). Usage: make autointerp-localhost-dev-gpu [AUTORELOAD=1] [USE_LOCAL_HF_CACHE=1] - $(MAKE) autointerp-localhost-dev ENABLE_GPU=1 AUTORELOAD=$(AUTORELOAD) +autointerp-dev-gpu: ## Autointerp: Localhost Environment - Run (Development Build with CUDA). Usage: make autointerp-dev-gpu [AUTORELOAD=1] [USE_LOCAL_HF_CACHE=1] + $(MAKE) autointerp-dev ENABLE_GPU=1 AUTORELOAD=$(AUTORELOAD) reset-docker-data: ## Reset Docker Data - this deletes your local database! @echo "WARNING: This will delete all your local neuronpedia Docker data and databases!" @@ -243,13 +288,13 @@ reset-docker-data: ## Reset Docker Data - this deletes your local database! @echo "Resetting Docker data..." ENV_FILE=../.env.localhost docker compose -f docker/compose.yaml down -v -graph-localhost-install: ## Graph: Localhost Environment - Install Dependencies (Development Build) +graph-install: ## Graph: Localhost Environment - Install Dependencies (Development Build) @echo "Installing the graph server dependencies for development in the localhost environment..." cd apps/graph && \ poetry lock && poetry install -graph-localhost-build: ## Graph: Localhost Environment - Build +graph-build: ## Graph: Localhost Environment - Build @echo "Building the graph server for the localhost environment..." ENV_FILE=.env.localhost \ BUILD_TYPE=$(BUILD_TYPE) \ @@ -258,10 +303,10 @@ graph-localhost-build: ## Graph: Localhost Environment - Build $(if $(USE_LOCAL_HF_CACHE),-f docker/compose.hf-cache.yaml,) \ build graph -graph-localhost-build-gpu: ## Graph: Localhost Environment - Build (CUDA). Usage: make graph-localhost-build-gpu [USE_LOCAL_HF_CACHE=1] - $(MAKE) graph-localhost-build BUILD_TYPE=cuda +graph-build-gpu: ## Graph: Localhost Environment - Build (CUDA). Usage: make graph-build-gpu [USE_LOCAL_HF_CACHE=1] + $(MAKE) graph-build BUILD_TYPE=cuda -graph-localhost-dev: ## Graph: Localhost Environment - Run (Development Build). Usage: make graph-localhost-dev [AUTORELOAD=1] +graph-dev: ## Graph: Localhost Environment - Run (Development Build). Usage: make graph-dev [AUTORELOAD=1] @echo "Bringing up the graph server for development in the localhost environment..." RELOAD=$$([ "$(AUTORELOAD)" = "1" ] && echo "1" || echo "0") \ ENV_FILE=.env.localhost \ @@ -275,5 +320,5 @@ graph-localhost-dev: ## Graph: Localhost Environment - Run (Development Build). --env-file apps/graph/.env \ up graph -graph-localhost-dev-gpu: ## Graph: Localhost Environment - Run (Development Build with CUDA). Usage: make graph-localhost-dev-gpu [AUTORELOAD=1] [USE_LOCAL_HF_CACHE=1] - $(MAKE) graph-localhost-dev ENABLE_GPU=1 AUTORELOAD=$(AUTORELOAD) \ No newline at end of file +graph-dev-gpu: ## Graph: Localhost Environment - Run (Development Build with CUDA). Usage: make graph-dev-gpu [AUTORELOAD=1] [USE_LOCAL_HF_CACHE=1] + $(MAKE) graph-dev ENABLE_GPU=1 AUTORELOAD=$(AUTORELOAD) \ No newline at end of file From 2bf4c1e3cdd2fbf3ce1a13309ae043c264884679 Mon Sep 17 00:00:00 2001 From: Sean Sica <23294618+seansica@users.noreply.github.com> Date: Wed, 3 Dec 2025 11:24:35 -0500 Subject: [PATCH 05/12] feat: clean up .env.localhost --- .env.localhost | 88 ++++++++++++++++++++++++++++++++++++++------------ 1 file changed, 67 insertions(+), 21 deletions(-) diff --git a/.env.localhost b/.env.localhost index 458f13e48..0cb2911f2 100644 --- a/.env.localhost +++ b/.env.localhost @@ -1,35 +1,81 @@ -POSTGRES_PRISMA_URL="postgres://postgres:postgres@postgres:5432/postgres?pgbouncer=true&connect_timeout=15" -POSTGRES_URL_NON_POOLING="postgres://postgres:postgres@postgres:5432/postgres" -INFERENCE_SERVER_SECRET=localhost-secret -AUTOINTERP_SERVER_SECRET=localhost-secret -GRAPH_SERVER_SECRET=localhost-secret -USE_LOCALHOST_INFERENCE=true -USE_LOCALHOST_AUTOINTERP=false -OPENAI_API_KEY=${OPENAI_API_KEY} # this is required for explanation search to work! -HOSTNAME=0.0.0.0 -PORT=3000 +# ========================== Application Settings ========================== + +# Domain and Server Configuration NEXT_PUBLIC_URL=http://localhost:3000 NEXTAUTH_URL=http://localhost:3000 +HOSTNAME=0.0.0.0 +PORT=3000 +IS_DOCKER_COMPOSE=true + +# Authentication NEXTAUTH_SECRET=88888888888888888888888888888888 + +# Contact Information +NEXT_PUBLIC_CONTACT_EMAIL_ADDRESS=johnny@neuronpedia.org + +# ========================== Database Configuration ========================== + +# Postgres Connection Strings +POSTGRES_PRISMA_URL="postgres://postgres:postgres@postgres:5432/postgres?pgbouncer=true&connect_timeout=15" +POSTGRES_URL_NON_POOLING="postgres://postgres:postgres@postgres:5432/postgres" + +# Postgres Credentials +POSTGRES_USER=postgres +POSTGRES_PASSWORD=postgres +POSTGRES_DB=postgres + +# ========================== Feature Flags ========================== + NEXT_PUBLIC_ENABLE_SIGNIN=false +NEXT_PUBLIC_DEMO_MODE=false + +# ========================== Default Model Configuration ========================== + +# Model Defaults +NEXT_PUBLIC_DEFAULT_MODELID=gemma-2-2b +NEXT_PUBLIC_DEFAULT_SOURCESET=gemmascope-res-16k +NEXT_PUBLIC_DEFAULT_SOURCE=20-gemmascope-res-16k +NEXT_PUBLIC_DEFAULT_RELEASE_NAME=gemma-scope + +# Steering Configuration +NEXT_PUBLIC_DEFAULT_STEER_MODEL=gemma-2-2b-it +NEXT_PUBLIC_STEER_FORCE_ALLOW_INSTRUCT_MODELS=gemma-2-2b-it + +# ========================== Default User IDs ========================== + DEFAULT_CREATOR_USER_ID=clkht01d40000jv08hvalcvly PUBLIC_ACTIVATIONS_USER_IDS=clsxqq2xd0000vvp2k5itlhqj,clkht01d40000jv08hvalcvly,cljqfoqm1000776wmbr1f5mux,cljj57d3c000076ei38vwnv35 INFERENCE_ACTIVATION_USER_ID=cljgamm90000076zdchicy6zj -NEXT_PUBLIC_STEER_FORCE_ALLOW_INSTRUCT_MODELS=gemma-2-2b-it -IS_DOCKER_COMPOSE=true +# ========================== External Services Configuration ========================== -# NEXT_PUBLIC_DEFAULT_STEER_MODEL=gemma-2-2b-it -# NEXT_PUBLIC_DEFAULT_MODELID=gemma-2-2b -# NEXT_PUBLIC_DEFAULT_SOURCESET=gemmascope-res-16k -# NEXT_PUBLIC_DEFAULT_SOURCE=20-gemmascope-res-16k -# NEXT_PUBLIC_DEFAULT_RELEASE_NAME=gemma-scope -NEXT_PUBLIC_DEMO_MODE=false +# Inference Server +USE_LOCALHOST_INFERENCE=true +INFERENCE_SERVER_SECRET=localhost-secret +# Autointerp Server +USE_LOCALHOST_AUTOINTERP=false +AUTOINTERP_SERVER_SECRET=localhost-secret -POSTGRES_USER=postgres -POSTGRES_PASSWORD=postgres -POSTGRES_DB=postgres +# Graph Server +GRAPH_SERVER_SECRET=localhost-secret + +# ========================== AI API Keys ========================== +# NOTE: Sensitive API keys are defined in .env (gitignored) and referenced here +# To set your keys, edit .env file in the root directory + +# Hugging Face (defined in .env) +HF_TOKEN=${HF_TOKEN} +# OpenAI (required for explanation search, defined in .env) +OPENAI_API_KEY=${OPENAI_API_KEY} +# Azure OpenAI (for embeddings, defined in .env) +AZURE_OPENAI_API_KEY=${AZURE_OPENAI_API_KEY} +AZURE_OPENAI_ENDPOINT=${AZURE_OPENAI_ENDPOINT} +EMBEDDING_PROVIDER=azure +# Optional API Keys (define in .env to use) +# ANTHROPIC_API_KEY=${ANTHROPIC_API_KEY} +# GEMINI_API_KEY=${GEMINI_API_KEY} +# OPENROUTER_API_KEY=${OPENROUTER_API_KEY} From 4b1391790dbdd3822c5bd1af02ed61c757a00852 Mon Sep 17 00:00:00 2001 From: Sean Sica <23294618+seansica@users.noreply.github.com> Date: Wed, 3 Dec 2025 11:25:24 -0500 Subject: [PATCH 06/12] feat(autointerp): add custom ca bundle support to Dockerfile --- apps/autointerp/Dockerfile | 89 ++++++++++++++++++++++++++++++++++---- 1 file changed, 81 insertions(+), 8 deletions(-) diff --git a/apps/autointerp/Dockerfile b/apps/autointerp/Dockerfile index d9baec929..9c405ccc2 100644 --- a/apps/autointerp/Dockerfile +++ b/apps/autointerp/Dockerfile @@ -2,20 +2,76 @@ ARG BUILD_TYPE ARG CUDA_VERSION=12.1.0 ARG UBUNTU_VERSION=22.04 +# Optional custom CA bundle file support +ARG CUSTOM_CA_BUNDLE + # NON-CUDA base FROM python:3.10-slim AS base-nocuda +# Re-declare ARG after FROM (ARGs don't persist across FROM statements) +ARG CUSTOM_CA_BUNDLE + +# Copy the CA bundle file if provided, otherwise copy nothing (using .nocustomca as a no-op) +COPY ${CUSTOM_CA_BUNDLE:-.nocustomca} /tmp/ca-bundle-temp + +# Set up CA certificates and environment if bundle was provided +RUN if [ -f /tmp/ca-bundle-temp ] && [ "${CUSTOM_CA_BUNDLE}" != ".nocustomca" ]; then \ + apt-get update && apt-get install -y ca-certificates && \ + mkdir -p /usr/local/share/ca-certificates && \ + mv /tmp/ca-bundle-temp /usr/local/share/ca-certificates/custom-ca.crt && \ + cat /usr/local/share/ca-certificates/custom-ca.crt >> /etc/ssl/certs/ca-certificates.crt && \ + update-ca-certificates && \ + rm -rf /var/lib/apt/lists/*; \ + else \ + rm -f /tmp/ca-bundle-temp; \ + fi + +# Set SSL environment variables if CA bundle was provided +ENV SSL_CERT_FILE=${CUSTOM_CA_BUNDLE:+/etc/ssl/certs/ca-certificates.crt} +ENV SSL_CERT_DIR=${CUSTOM_CA_BUNDLE:+/etc/ssl/certs} +ENV REQUESTS_CA_BUNDLE=${CUSTOM_CA_BUNDLE:+/etc/ssl/certs/ca-certificates.crt} +ENV CURL_CA_BUNDLE=${CUSTOM_CA_BUNDLE:+/etc/ssl/certs/ca-certificates.crt} +ENV GIT_SSL_CAINFO=${CUSTOM_CA_BUNDLE:+/etc/ssl/certs/ca-certificates.crt} + # CUDA base FROM nvidia/cuda:${CUDA_VERSION}-runtime-ubuntu${UBUNTU_VERSION} AS base-cuda -# Nvidia container toolkit -RUN apt-get update && apt-get install -y \ - curl gpg -RUN curl -fsSL https://nvidia.github.io/libnvidia-container/gpgkey | gpg --dearmor -o /usr/share/keyrings/nvidia-container-toolkit-keyring.gpg \ - && curl -s -L https://nvidia.github.io/libnvidia-container/stable/deb/nvidia-container-toolkit.list | \ - sed 's#deb https://#deb [signed-by=/usr/share/keyrings/nvidia-container-toolkit-keyring.gpg] https://#g' | \ - tee /etc/apt/sources.list.d/nvidia-container-toolkit.list + +# Re-declare ARG after FROM (ARGs don't persist across FROM statements) +ARG CUSTOM_CA_BUNDLE + +# Copy the CA bundle file if provided, otherwise copy nothing (using .nocustomca as a no-op) +COPY ${CUSTOM_CA_BUNDLE:-.nocustomca} /tmp/ca-bundle-temp + +# Install dependencies and set up CA certificates, then download NVIDIA toolkit +RUN apt-get update && apt-get install -y curl gpg ca-certificates && \ + if [ -f /tmp/ca-bundle-temp ] && [ "${CUSTOM_CA_BUNDLE}" != ".nocustomca" ]; then \ + mkdir -p /usr/local/share/ca-certificates && \ + mv /tmp/ca-bundle-temp /usr/local/share/ca-certificates/custom-ca.crt && \ + cat /usr/local/share/ca-certificates/custom-ca.crt >> /etc/ssl/certs/ca-certificates.crt && \ + update-ca-certificates && \ + export CURL_CA_BUNDLE=/etc/ssl/certs/ca-certificates.crt && \ + curl -fsSL https://nvidia.github.io/libnvidia-container/gpgkey | gpg --dearmor -o /usr/share/keyrings/nvidia-container-toolkit-keyring.gpg && \ + curl -s -L https://nvidia.github.io/libnvidia-container/stable/deb/nvidia-container-toolkit.list | \ + sed 's#deb https://#deb [signed-by=/usr/share/keyrings/nvidia-container-toolkit-keyring.gpg] https://#g' | \ + tee /etc/apt/sources.list.d/nvidia-container-toolkit.list; \ + else \ + rm -f /tmp/ca-bundle-temp && \ + curl -fsSL https://nvidia.github.io/libnvidia-container/gpgkey | gpg --dearmor -o /usr/share/keyrings/nvidia-container-toolkit-keyring.gpg && \ + curl -s -L https://nvidia.github.io/libnvidia-container/stable/deb/nvidia-container-toolkit.list | \ + sed 's#deb https://#deb [signed-by=/usr/share/keyrings/nvidia-container-toolkit-keyring.gpg] https://#g' | \ + tee /etc/apt/sources.list.d/nvidia-container-toolkit.list; \ + fi && \ + rm -rf /var/lib/apt/lists/* + +# Set SSL environment variables if CA bundle was provided +ENV SSL_CERT_FILE=${CUSTOM_CA_BUNDLE:+/etc/ssl/certs/ca-certificates.crt} +ENV SSL_CERT_DIR=${CUSTOM_CA_BUNDLE:+/etc/ssl/certs} +ENV REQUESTS_CA_BUNDLE=${CUSTOM_CA_BUNDLE:+/etc/ssl/certs/ca-certificates.crt} +ENV CURL_CA_BUNDLE=${CUSTOM_CA_BUNDLE:+/etc/ssl/certs/ca-certificates.crt} +ENV GIT_SSL_CAINFO=${CUSTOM_CA_BUNDLE:+/etc/ssl/certs/ca-certificates.crt} RUN apt-get update && apt-get install -y \ - nvidia-container-toolkit + nvidia-container-toolkit \ + && rm -rf /var/lib/apt/lists/* RUN apt-get update && apt-get install -y \ python3.10 \ python3-pip \ @@ -31,6 +87,9 @@ WORKDIR /app ENV HOST=0.0.0.0 +# Optional custom CA bundle file support (re-declare for final stage) +ARG CUSTOM_CA_BUNDLE + # Ignore hash sum mismatch for apt-get RUN echo "Acquire::http::Pipeline-Depth 0;" > /etc/apt/apt.conf.d/99custom && \ echo "Acquire::http::No-Cache true;" >> /etc/apt/apt.conf.d/99custom && \ @@ -42,6 +101,7 @@ RUN apt-get update && apt-get install -y \ gcc \ g++ \ make \ + ca-certificates \ && rm -rf /var/lib/apt/lists/* # Install poetry @@ -50,6 +110,19 @@ RUN pip install poetry ENV POETRY_VIRTUALENVS_CREATE=false RUN poetry config virtualenvs.create false +# Set SSL environment variables if CA bundle was provided (for final stage) +ENV SSL_CERT_FILE=${CUSTOM_CA_BUNDLE:+/etc/ssl/certs/ca-certificates.crt} +ENV SSL_CERT_DIR=${CUSTOM_CA_BUNDLE:+/etc/ssl/certs} +ENV REQUESTS_CA_BUNDLE=${CUSTOM_CA_BUNDLE:+/etc/ssl/certs/ca-certificates.crt} +ENV CURL_CA_BUNDLE=${CUSTOM_CA_BUNDLE:+/etc/ssl/certs/ca-certificates.crt} +ENV GIT_SSL_CAINFO=${CUSTOM_CA_BUNDLE:+/etc/ssl/certs/ca-certificates.crt} + +# Configure poetry/pip to use custom CA if provided +RUN if [ -n "${CUSTOM_CA_BUNDLE}" ] && [ "${CUSTOM_CA_BUNDLE}" != ".nocustomca" ]; then \ + pip config set global.cert /etc/ssl/certs/ca-certificates.crt && \ + poetry config certificates.default.cert /etc/ssl/certs/ca-certificates.crt; \ + fi + # Copy the client package first COPY packages/python/neuronpedia-autointerp-client /app/packages/python/neuronpedia-autointerp-client/ From 88a3fb55f84352fab967872ef2c2765106929880 Mon Sep 17 00:00:00 2001 From: Sean Sica <23294618+seansica@users.noreply.github.com> Date: Wed, 3 Dec 2025 11:27:23 -0500 Subject: [PATCH 07/12] fix(autointerp): pin hf-xet to specific version to avoid network issues --- apps/autointerp/poetry.lock | 23 +++++++++++------------ apps/autointerp/pyproject.toml | 1 + 2 files changed, 12 insertions(+), 12 deletions(-) diff --git a/apps/autointerp/poetry.lock b/apps/autointerp/poetry.lock index c843a2293..da626c67d 100644 --- a/apps/autointerp/poetry.lock +++ b/apps/autointerp/poetry.lock @@ -1,4 +1,4 @@ -# This file is automatically @generated by Poetry 2.1.3 and should not be changed by hand. +# This file is automatically @generated by Poetry 2.2.1 and should not be changed by hand. [[package]] name = "accelerate" @@ -1652,21 +1652,20 @@ files = [ [[package]] name = "hf-xet" -version = "1.1.4" +version = "1.1.3" description = "Fast transfer of large files with the Hugging Face Hub." optional = false python-versions = ">=3.8" groups = ["main"] -markers = "(platform_machine == \"x86_64\" or platform_machine == \"amd64\" or platform_machine == \"arm64\" or platform_machine == \"aarch64\") and python_version <= \"3.12\"" files = [ - {file = "hf_xet-1.1.4-cp37-abi3-macosx_10_12_x86_64.whl", hash = "sha256:6591ab9f61ea82d261107ed90237e2ece972f6a7577d96f5f071208bbf255d1c"}, - {file = "hf_xet-1.1.4-cp37-abi3-macosx_11_0_arm64.whl", hash = "sha256:071b0b4d4698990f746edd666c7cc42555833d22035d88db0df936677fb57d29"}, - {file = "hf_xet-1.1.4-cp37-abi3-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:b5b610831e92e41182d4c028653978b844d332d492cdcba1b920d3aca4a0207e"}, - {file = "hf_xet-1.1.4-cp37-abi3-manylinux_2_28_aarch64.whl", hash = "sha256:f6578bcd71393abfd60395279cc160ca808b61f5f9d535b922fcdcd3f77a708d"}, - {file = "hf_xet-1.1.4-cp37-abi3-musllinux_1_2_aarch64.whl", hash = "sha256:fb2bbfa2aae0e4f0baca988e7ba8d8c1a39a25adf5317461eb7069ad00505b3e"}, - {file = "hf_xet-1.1.4-cp37-abi3-musllinux_1_2_x86_64.whl", hash = "sha256:73346ba3e2e15ea8909a26b0862b458f15b003e6277935e3fba5bf273508d698"}, - {file = "hf_xet-1.1.4-cp37-abi3-win_amd64.whl", hash = "sha256:52e8f8bc2029d8b911493f43cea131ac3fa1f0dc6a13c50b593c4516f02c6fc3"}, - {file = "hf_xet-1.1.4.tar.gz", hash = "sha256:875158df90cb13547752532ed73cad9dfaad3b29e203143838f67178418d08a4"}, + {file = "hf_xet-1.1.3-cp37-abi3-macosx_10_12_x86_64.whl", hash = "sha256:c3b508b5f583a75641aebf732853deb058953370ce8184f5dabc49f803b0819b"}, + {file = "hf_xet-1.1.3-cp37-abi3-macosx_11_0_arm64.whl", hash = "sha256:b788a61977fbe6b5186e66239e2a329a3f0b7e7ff50dad38984c0c74f44aeca1"}, + {file = "hf_xet-1.1.3-cp37-abi3-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:fd2da210856444a34aad8ada2fc12f70dabed7cc20f37e90754d1d9b43bc0534"}, + {file = "hf_xet-1.1.3-cp37-abi3-manylinux_2_28_aarch64.whl", hash = "sha256:8203f52827e3df65981984936654a5b390566336956f65765a8aa58c362bb841"}, + {file = "hf_xet-1.1.3-cp37-abi3-musllinux_1_2_aarch64.whl", hash = "sha256:30c575a5306f8e6fda37edb866762140a435037365eba7a17ce7bd0bc0216a8b"}, + {file = "hf_xet-1.1.3-cp37-abi3-musllinux_1_2_x86_64.whl", hash = "sha256:7c1a6aa6abed1f696f8099aa9796ca04c9ee778a58728a115607de9cc4638ff1"}, + {file = "hf_xet-1.1.3-cp37-abi3-win_amd64.whl", hash = "sha256:b578ae5ac9c056296bb0df9d018e597c8dc6390c5266f35b5c44696003cde9f3"}, + {file = "hf_xet-1.1.3.tar.gz", hash = "sha256:a5f09b1dd24e6ff6bcedb4b0ddab2d81824098bb002cf8b4ffa780545fa348c3"}, ] [package.extras] @@ -7567,4 +7566,4 @@ type = ["pytest-mypy"] [metadata] lock-version = "2.1" python-versions = ">=3.10,<4.0" -content-hash = "e6e4cfe213d8d29543d444a457cf9d8db6d42722dc09831e634f5ede3a74c417" +content-hash = "222726fe8b4dbf20142e179bf81ffed0df9cc410380914dffe6262f9f2efe58f" diff --git a/apps/autointerp/pyproject.toml b/apps/autointerp/pyproject.toml index 384cd4333..378b77a20 100644 --- a/apps/autointerp/pyproject.toml +++ b/apps/autointerp/pyproject.toml @@ -20,6 +20,7 @@ bitsandbytes = "==0.42.0" torchtyping = "^0.1.5" pytest = "^8.1.1" neuronpedia-autointerp-client = {path = "../../packages/python/neuronpedia-autointerp-client"} +hf-xet = "1.1.3" [tool.poetry.group.dev.dependencies] pytest = "^8.3.1" From 6f41fe1605d4ca81dc689b13dfe26517ffde1220 Mon Sep 17 00:00:00 2001 From: Sean Sica <23294618+seansica@users.noreply.github.com> Date: Wed, 3 Dec 2025 11:28:35 -0500 Subject: [PATCH 08/12] feat(webapp): improve env var handling in docker + overhaul db-init --- apps/webapp/Dockerfile | 54 +++++++++++++++++++++++++++++++----------- apps/webapp/db-init.sh | 45 ++++++++++++++++++++++++++++++----- apps/webapp/init.sh | 6 ++--- docker/compose.yaml | 26 ++++++++++++++++---- 4 files changed, 103 insertions(+), 28 deletions(-) diff --git a/apps/webapp/Dockerfile b/apps/webapp/Dockerfile index 8f98cfb42..dc27a5a23 100644 --- a/apps/webapp/Dockerfile +++ b/apps/webapp/Dockerfile @@ -35,6 +35,38 @@ RUN if [ -n "${CUSTOM_CA_BUNDLE}" ] && [ "${CUSTOM_CA_BUNDLE}" != ".nocustomca" # Install OpenSSL and bash RUN apk add --no-cache openssl bash +# Build-time environment variables (only NEXT_PUBLIC_* are inlined into JS bundle) +# Reference: https://nextjs.org/docs/pages/building-your-application/configuring/environment-variables +# For local/Docker Compose: Uses env_file at runtime (simpler) +# For K8s/production: Pass these as build args in CI, inject others via ConfigMaps/Secrets at runtime +ARG NEXT_PUBLIC_URL +ARG NEXT_PUBLIC_CONTACT_EMAIL_ADDRESS +ARG NEXT_PUBLIC_DEFAULT_MODELID +ARG NEXT_PUBLIC_DEFAULT_SOURCESET +ARG NEXT_PUBLIC_DEFAULT_SOURCE +ARG NEXT_PUBLIC_DEFAULT_RELEASE_NAME +ARG NEXT_PUBLIC_DEFAULT_STEER_MODEL +ARG NEXT_PUBLIC_STEER_FORCE_ALLOW_INSTRUCT_MODELS +ARG NEXT_PUBLIC_ENABLE_SIGNIN +ARG NEXT_PUBLIC_DEMO_MODE +ARG NEXT_PUBLIC_SEARCH_TOPK_MAX_CHAR_LENGTH +ARG NEXT_PUBLIC_SITE_NAME_VERCEL_DEPLOY + +# Convert ARGs to ENVs so they're available in child stages during build +# Note: All other (non-NEXT_PUBLIC_*) variables should be injected at runtime +ENV NEXT_PUBLIC_URL=${NEXT_PUBLIC_URL} \ + NEXT_PUBLIC_CONTACT_EMAIL_ADDRESS=${NEXT_PUBLIC_CONTACT_EMAIL_ADDRESS} \ + NEXT_PUBLIC_DEFAULT_MODELID=${NEXT_PUBLIC_DEFAULT_MODELID} \ + NEXT_PUBLIC_DEFAULT_SOURCESET=${NEXT_PUBLIC_DEFAULT_SOURCESET} \ + NEXT_PUBLIC_DEFAULT_SOURCE=${NEXT_PUBLIC_DEFAULT_SOURCE} \ + NEXT_PUBLIC_DEFAULT_RELEASE_NAME=${NEXT_PUBLIC_DEFAULT_RELEASE_NAME} \ + NEXT_PUBLIC_DEFAULT_STEER_MODEL=${NEXT_PUBLIC_DEFAULT_STEER_MODEL} \ + NEXT_PUBLIC_STEER_FORCE_ALLOW_INSTRUCT_MODELS=${NEXT_PUBLIC_STEER_FORCE_ALLOW_INSTRUCT_MODELS} \ + NEXT_PUBLIC_ENABLE_SIGNIN=${NEXT_PUBLIC_ENABLE_SIGNIN} \ + NEXT_PUBLIC_DEMO_MODE=${NEXT_PUBLIC_DEMO_MODE} \ + NEXT_PUBLIC_SEARCH_TOPK_MAX_CHAR_LENGTH=${NEXT_PUBLIC_SEARCH_TOPK_MAX_CHAR_LENGTH} \ + NEXT_PUBLIC_SITE_NAME_VERCEL_DEPLOY=${NEXT_PUBLIC_SITE_NAME_VERCEL_DEPLOY} + ############################################################################################### # Install dependencies only when needed FROM base AS deps @@ -56,12 +88,9 @@ COPY apps/webapp ./ # Ensure startup script is executable RUN chmod +x ./init.sh -# Load environment variables from dotenv file -ARG ENV_FILE=.env.localhost -COPY ${ENV_FILE} .env - # Build without database operations - only generate Prisma client and build Next.js -RUN bash -c 'set -a && source .env && set +a && npm run build:simple' +# Environment variables are inherited from base stage +RUN npm run build:simple ############################################################################################### # Database initialization image (has access to ts-node and dev dependencies) @@ -75,16 +104,11 @@ COPY apps/webapp ./ # Install ts-node globally for seeding RUN npm install -g ts-node typescript -# Load environment variables from dotenv file -ARG ENV_FILE=.env.localhost -COPY ${ENV_FILE} .env - -# Generate Prisma client for db operations -RUN bash -c 'set -a && source .env && set +a && npx prisma generate' - # Make db-init script executable RUN chmod +x db-init.sh +# Note: Prisma client is already generated in builder stage (via npm run build:simple) +# This stage only runs db push, migrations, and seeding against live database CMD ["./db-init.sh"] ############################################################################################### @@ -108,8 +132,10 @@ COPY --from=builder --chown=nextjs:nodejs /app/prisma ./prisma # Copy startup script for runtime COPY --from=builder --chown=nextjs:nodejs /app/init.sh ./init.sh -# Copy environment file for runtime -COPY --from=builder --chown=nextjs:nodejs /app/.env ./.env +# NOTE: .env files are NOT copied to production image +# Environment variables are injected at runtime via: +# - Kubernetes: ConfigMaps/Secrets +# - Docker Compose: --env-file flag (e.g., docker compose --env-file .env.localhost up) USER nextjs diff --git a/apps/webapp/db-init.sh b/apps/webapp/db-init.sh index 56f358c5b..5c4bf9fc0 100644 --- a/apps/webapp/db-init.sh +++ b/apps/webapp/db-init.sh @@ -1,17 +1,50 @@ #!/bin/bash set -e # Exit on any error -set -a -source .env -set +a -echo "Running Prisma db push..." -./node_modules/.bin/prisma db push +echo "=== Environment Variables Debug ===" +echo "POSTGRES_PRISMA_URL: ${POSTGRES_PRISMA_URL:-NOT_SET}" +echo "POSTGRES_URL_NON_POOLING: ${POSTGRES_URL_NON_POOLING:-NOT_SET}" +echo "====================================" + +echo "Generating Prisma Client..." +./node_modules/.bin/prisma generate + +echo "Checking database migration status..." +# Check if _prisma_migrations table exists to determine if we need to baseline +if ! ./node_modules/.bin/prisma db execute --stdin <<< "SELECT 1 FROM _prisma_migrations LIMIT 1;" 2>/dev/null; then + echo "Migration history table missing - checking if database is empty..." + + # Check if any tables exist (excluding pg_* system tables) + TABLE_COUNT=$(./node_modules/.bin/prisma db execute --stdin <<< "SELECT COUNT(*) FROM information_schema.tables WHERE table_schema = 'public' AND table_type = 'BASE TABLE';" 2>/dev/null | tail -n 1 | tr -d '[:space:]') + + if [ "$TABLE_COUNT" != "0" ] && [ -n "$TABLE_COUNT" ]; then + echo "ERROR: Database has $TABLE_COUNT tables but no migration history." + echo "This database was likely initialized with 'prisma db push' instead of migrations." + echo "" + echo "To fix this issue, you have two options:" + echo "1. Drop and recreate the database (recommended for dev/staging)" + echo "2. Baseline the existing database with: prisma migrate resolve --applied " + echo "" + echo "For a fresh start, run: kubectl exec -n superpod postgres-0 -- psql -U postgres -c 'DROP DATABASE IF EXISTS postgres; CREATE DATABASE postgres;'" + exit 1 + fi + + echo "Database is empty, proceeding with migrations..." +fi echo "Running database migrations..." ./node_modules/.bin/prisma migrate deploy echo "Running database seed..." -./node_modules/.bin/prisma db seed +# Allow seed to fail gracefully if already seeded (non-critical) +./node_modules/.bin/prisma db seed || { + echo "Seed failed or already seeded. Continuing..." + exit_code=$? + if [ $exit_code -ne 0 ] && [ $exit_code -ne 1 ]; then + echo "Seed command failed with unexpected error code: $exit_code" + exit $exit_code + fi +} echo "Database initialization completed successfully!" \ No newline at end of file diff --git a/apps/webapp/init.sh b/apps/webapp/init.sh index f092c6613..d2d515cf5 100755 --- a/apps/webapp/init.sh +++ b/apps/webapp/init.sh @@ -1,9 +1,7 @@ #!/bin/bash -set -a -source .env -set +a +# Environment variables are injected at runtime via docker compose --env-file or Kubernetes ConfigMaps/Secrets +# No .env file sourcing needed - all variables are already available in the container environment -# Start the Next.js application (database operations handled by db-init container) echo "Starting Next.js application..." node server.js \ No newline at end of file diff --git a/docker/compose.yaml b/docker/compose.yaml index ee98d9acb..0358ff85c 100644 --- a/docker/compose.yaml +++ b/docker/compose.yaml @@ -1,5 +1,23 @@ # compose.yaml name: neuronpedia + +# Extension fields (reusable configuration blocks) +x-shared-build-args: &shared-build-args + CUSTOM_CA_BUNDLE: ${CUSTOM_CA_BUNDLE:-.nocustomca} + # NEXT_PUBLIC_* variables are inlined into JS bundle at build time + NEXT_PUBLIC_URL: ${NEXT_PUBLIC_URL} + NEXT_PUBLIC_CONTACT_EMAIL_ADDRESS: ${NEXT_PUBLIC_CONTACT_EMAIL_ADDRESS} + NEXT_PUBLIC_DEFAULT_MODELID: ${NEXT_PUBLIC_DEFAULT_MODELID} + NEXT_PUBLIC_DEFAULT_SOURCESET: ${NEXT_PUBLIC_DEFAULT_SOURCESET} + NEXT_PUBLIC_DEFAULT_SOURCE: ${NEXT_PUBLIC_DEFAULT_SOURCE} + NEXT_PUBLIC_DEFAULT_RELEASE_NAME: ${NEXT_PUBLIC_DEFAULT_RELEASE_NAME} + NEXT_PUBLIC_DEFAULT_STEER_MODEL: ${NEXT_PUBLIC_DEFAULT_STEER_MODEL} + NEXT_PUBLIC_STEER_FORCE_ALLOW_INSTRUCT_MODELS: ${NEXT_PUBLIC_STEER_FORCE_ALLOW_INSTRUCT_MODELS} + NEXT_PUBLIC_ENABLE_SIGNIN: ${NEXT_PUBLIC_ENABLE_SIGNIN} + NEXT_PUBLIC_DEMO_MODE: ${NEXT_PUBLIC_DEMO_MODE} + NEXT_PUBLIC_SEARCH_TOPK_MAX_CHAR_LENGTH: ${NEXT_PUBLIC_SEARCH_TOPK_MAX_CHAR_LENGTH} + NEXT_PUBLIC_SITE_NAME_VERCEL_DEPLOY: ${NEXT_PUBLIC_SITE_NAME_VERCEL_DEPLOY} + services: webapp: image: neuronpedia-webapp:latest @@ -8,8 +26,7 @@ services: dockerfile: apps/webapp/Dockerfile target: runner args: - ENV_FILE: ${ENV_FILE:-.env.localhost} - CUSTOM_CA_BUNDLE: ${CUSTOM_CA_BUNDLE:-.nocustomca} + <<: *shared-build-args ports: - "3000:3000" env_file: @@ -25,9 +42,9 @@ services: dockerfile: apps/webapp/Dockerfile target: db-init args: - ENV_FILE: ${ENV_FILE:-.env.localhost} - CUSTOM_CA_BUNDLE: ${CUSTOM_CA_BUNDLE:-.nocustomca} + <<: *shared-build-args restart: "no" + # Environment variables injected via --env-file flag (see Makefile targets) environment: - POSTGRES_URL_NON_POOLING=${POSTGRES_URL_NON_POOLING} - POSTGRES_PRISMA_URL=${POSTGRES_PRISMA_URL} @@ -87,6 +104,7 @@ services: BUILD_TYPE: ${BUILD_TYPE:-nocuda} CUDA_VERSION: "12.1.0" UBUNTU_VERSION: "22.04" + CUSTOM_CA_BUNDLE: ${CUSTOM_CA_BUNDLE:-.nocustomca} ports: - "5003:5003" env_file: From 1753128cc0b40494d20db55982c557042c731e6d Mon Sep 17 00:00:00 2001 From: Sean Sica <23294618+seansica@users.noreply.github.com> Date: Wed, 3 Dec 2025 11:29:05 -0500 Subject: [PATCH 09/12] fix(webapp): make IS_ONE_CLICK_VERCEL_DEPLOY loader robust to empty string --- apps/webapp/lib/env.ts | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/apps/webapp/lib/env.ts b/apps/webapp/lib/env.ts index d5e4c29bd..01a7e1394 100644 --- a/apps/webapp/lib/env.ts +++ b/apps/webapp/lib/env.ts @@ -1,11 +1,11 @@ import { config } from 'dotenv'; import { z } from 'zod'; -// If it's not undefined, then it's a one click deploy. It doesn't matter what the value itself is. +// If it's not undefined and not an empty string, then it's a one click deploy. // Also, if it's one-click-deploy on Vercel, we always use the demo environment variables. export const SITE_NAME_VERCEL_DEPLOY = process.env.NEXT_PUBLIC_SITE_NAME_VERCEL_DEPLOY; -export const IS_ONE_CLICK_VERCEL_DEPLOY = SITE_NAME_VERCEL_DEPLOY !== undefined; -if (SITE_NAME_VERCEL_DEPLOY) { +export const IS_ONE_CLICK_VERCEL_DEPLOY = SITE_NAME_VERCEL_DEPLOY !== undefined && SITE_NAME_VERCEL_DEPLOY !== ''; +if (IS_ONE_CLICK_VERCEL_DEPLOY) { // @ts-ignore if (typeof EdgeRuntime !== 'string') { config({ path: '.env.demo', override: true }); From 7da4dc1c8b3079b6c9c0e3d15390796b1502a84e Mon Sep 17 00:00:00 2001 From: Sean Sica <23294618+seansica@users.noreply.github.com> Date: Wed, 3 Dec 2025 11:32:04 -0500 Subject: [PATCH 10/12] build(k8s): lots of small improvements --- k8s/base/deployments/webapp.yaml | 48 ++++++++++++++++++- k8s/base/jobs/db-init.yaml | 48 ------------------- k8s/base/kustomization.yaml | 1 - .../corporate-ca-certs/deployment-patch.yaml | 13 +++++ .../corporate-ca-certs/job-patch.yaml | 18 ------- .../corporate-ca-certs/kustomization.yaml | 7 ++- .../inference-gpu-lite/patches/resources.yaml | 6 ++- .../patches/resources.yaml | 2 + 8 files changed, 71 insertions(+), 72 deletions(-) delete mode 100644 k8s/base/jobs/db-init.yaml create mode 100644 k8s/components/corporate-ca-certs/deployment-patch.yaml delete mode 100644 k8s/components/corporate-ca-certs/job-patch.yaml diff --git a/k8s/base/deployments/webapp.yaml b/k8s/base/deployments/webapp.yaml index 2b89b96a9..3005cf3dd 100644 --- a/k8s/base/deployments/webapp.yaml +++ b/k8s/base/deployments/webapp.yaml @@ -15,6 +15,48 @@ spec: labels: app: webapp spec: + initContainers: + - name: wait-for-postgres + image: alpine:3.14 + command: ['/bin/sh', '-c'] + args: + - | + apk add --no-cache netcat-openbsd + echo "Waiting for PostgreSQL to become ready..." + until nc -z postgres 5432; do + echo "PostgreSQL is not ready yet - waiting..." + sleep 2 + done + echo "PostgreSQL is ready!" + - name: db-init + image: neuronpedia-db-init + imagePullPolicy: Always + resources: + requests: + cpu: "100m" + memory: "1Gi" + ephemeral-storage: "2Gi" + limits: + cpu: "1000m" + memory: "2Gi" + ephemeral-storage: "4Gi" + env: + - name: NODE_EXTRA_CA_CERTS + value: /etc/ssl/certs/ca-certificates.crt + envFrom: + - secretRef: + name: db-credentials + # NOTE uncomment to mount custom CA certs + # volumeMounts: + # - name: etc-ssl-certs + # mountPath: /usr/local/share/ca-certificates + # readOnly: true + # NOTE uncomment to mount custom CA certs + # volumes: + # - name: etc-ssl-certs + # configMap: + # name: corporate-ca-certificates + # optional: false containers: - name: webapp image: neuronpedia-webapp @@ -31,15 +73,17 @@ spec: resources: limits: cpu: "1000m" - memory: "1Gi" + memory: "4Gi" requests: cpu: "500m" - memory: "512Mi" + memory: "2Gi" --- apiVersion: v1 kind: Service metadata: name: webapp + labels: + app: neuronpedia spec: selector: app: webapp diff --git a/k8s/base/jobs/db-init.yaml b/k8s/base/jobs/db-init.yaml deleted file mode 100644 index 5f760829a..000000000 --- a/k8s/base/jobs/db-init.yaml +++ /dev/null @@ -1,48 +0,0 @@ -# k8s/base/jobs/db-init.yaml -apiVersion: batch/v1 -kind: Job -metadata: - name: db-init - # Don't include namespace here, as it's set in the overlay kustomization -spec: - backoffLimit: 4 - template: - spec: - # This container runs first and blocks until PostgreSQL is ready - initContainers: - - name: wait-for-postgres - image: alpine:3.14 - command: ['/bin/sh', '-c'] - args: - - | - apk add --no-cache netcat-openbsd - echo "Waiting for PostgreSQL to become ready..." - # Use fully qualified domain name - # TODO change the target address in the ``nc`` command below!!! The "neuronpedia" subdomain typically maps to the k8s namespace (i.e., namespace is assumed to be "neuronpedia") - until nc -z postgres.neuronpedia.svc.cluster.local 5432; do - echo "PostgreSQL is not ready yet - waiting..." - sleep 2 - done - echo "PostgreSQL is ready!" - - # Main container only runs after the initContainer succeeds - containers: - - name: db-init - image: neuronpedia-webapp - command: ["/bin/sh", "-c"] - args: ["npm run db:migrate:deploy && npm run db:seed"] - envFrom: - - secretRef: - name: db-credentials - # TODO test the new corporate-ca-certs patch component before removing - # volumeMounts: - # - name: etc-ssl-certs - # mountPath: /etc/ssl/certs - # readOnly: true - # TODO test the new corporate-ca-certs patch component before removing - # volumes: - # - name: etc-ssl-certs - # configMap: - # name: corporate-ca-certificates - # optional: false - restartPolicy: OnFailure \ No newline at end of file diff --git a/k8s/base/kustomization.yaml b/k8s/base/kustomization.yaml index 98ce68eef..95d6537d7 100644 --- a/k8s/base/kustomization.yaml +++ b/k8s/base/kustomization.yaml @@ -8,7 +8,6 @@ resources: - deployments/inference.yaml - deployments/autointerp.yaml - statefulsets/postgres.yaml - - jobs/db-init.yaml - networking/ingress.yaml - services/postgres.yaml diff --git a/k8s/components/corporate-ca-certs/deployment-patch.yaml b/k8s/components/corporate-ca-certs/deployment-patch.yaml new file mode 100644 index 000000000..93b99f3f3 --- /dev/null +++ b/k8s/components/corporate-ca-certs/deployment-patch.yaml @@ -0,0 +1,13 @@ +--- +apiVersion: apps/v1 +kind: Deployment +metadata: + name: webapp +spec: + template: + spec: + volumes: + - name: etc-ssl-certs + configMap: + name: corporate-ca-certificates + optional: false diff --git a/k8s/components/corporate-ca-certs/job-patch.yaml b/k8s/components/corporate-ca-certs/job-patch.yaml deleted file mode 100644 index 5847e6dd3..000000000 --- a/k8s/components/corporate-ca-certs/job-patch.yaml +++ /dev/null @@ -1,18 +0,0 @@ -apiVersion: batch/v1 -kind: Job -metadata: - name: db-init -spec: - template: - spec: - containers: - - name: db-init - volumeMounts: - - name: corporate-ca-certs - mountPath: /usr/local/share/ca-certificates # TODO specify path to CA bundle here - readOnly: true - volumes: - - name: corporate-ca-certs - configMap: - name: corporate-ca-certificates - optional: false \ No newline at end of file diff --git a/k8s/components/corporate-ca-certs/kustomization.yaml b/k8s/components/corporate-ca-certs/kustomization.yaml index 4ebd63da2..79983b517 100644 --- a/k8s/components/corporate-ca-certs/kustomization.yaml +++ b/k8s/components/corporate-ca-certs/kustomization.yaml @@ -2,6 +2,11 @@ apiVersion: kustomize.config.k8s.io/v1alpha1 kind: Component +configMapGenerator: +- name: corporate-ca-certificates + files: + - ca-certificates.crt + patchesStrategicMerge: - statefulset-patch.yaml -- job-patch.yaml \ No newline at end of file +- deployment-patch.yaml \ No newline at end of file diff --git a/k8s/components/resources/inference-gpu-lite/patches/resources.yaml b/k8s/components/resources/inference-gpu-lite/patches/resources.yaml index 5b5dd1d99..695e89001 100644 --- a/k8s/components/resources/inference-gpu-lite/patches/resources.yaml +++ b/k8s/components/resources/inference-gpu-lite/patches/resources.yaml @@ -13,9 +13,11 @@ spec: requests: nvidia.com/gpu: 1 cpu: "7" - memory: "26Gi" + memory: "32Gi" + ephemeral-storage: "35Gi" limits: nvidia.com/gpu: 1 cpu: "8" - memory: "26Gi" + memory: "48Gi" + ephemeral-storage: "40Gi" --- \ No newline at end of file diff --git a/k8s/components/resources/inference-gpu-standard/patches/resources.yaml b/k8s/components/resources/inference-gpu-standard/patches/resources.yaml index 9824124ea..ea00dee4d 100644 --- a/k8s/components/resources/inference-gpu-standard/patches/resources.yaml +++ b/k8s/components/resources/inference-gpu-standard/patches/resources.yaml @@ -14,8 +14,10 @@ spec: nvidia.com/gpu: 1 cpu: "10" memory: "70Gi" + ephemeral-storage: "25Gi" limits: nvidia.com/gpu: 1 cpu: "12" memory: "80Gi" + ephemeral-storage: "30Gi" --- \ No newline at end of file From 46f683af2af83a11675a7a82d9ff80a66cb7ad01 Mon Sep 17 00:00:00 2001 From: Sean Sica <23294618+seansica@users.noreply.github.com> Date: Wed, 3 Dec 2025 11:33:15 -0500 Subject: [PATCH 11/12] build(k8s): increase resource alloc for postgres and diable istio injection --- k8s/base/statefulsets/postgres.yaml | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/k8s/base/statefulsets/postgres.yaml b/k8s/base/statefulsets/postgres.yaml index dddbcb560..c51ebb4c0 100644 --- a/k8s/base/statefulsets/postgres.yaml +++ b/k8s/base/statefulsets/postgres.yaml @@ -14,6 +14,8 @@ spec: metadata: labels: app: postgres + annotations: + sidecar.istio.io/inject: "false" spec: securityContext: fsGroup: 999 # postgres group ID @@ -46,11 +48,11 @@ spec: mountPath: /docker-entrypoint-initdb.d resources: limits: - cpu: "500m" - memory: "512Mi" + cpu: "2000m" + memory: "4Gi" requests: - cpu: "200m" - memory: "256Mi" + cpu: "500m" + memory: "2Gi" readinessProbe: exec: command: From eef23171a31e941ad2f960784f60179e20c1e57c Mon Sep 17 00:00:00 2001 From: Sean Sica <23294618+seansica@users.noreply.github.com> Date: Wed, 3 Dec 2025 11:37:21 -0500 Subject: [PATCH 12/12] fix(webapp): overhaul db/import.ts - create singleton importPool for reuse across all imports - default to ssl false for internal postgres and add runtime ssl handling - reduce WORK_MEM from 2GB to 256MB - add 10 min timeout for queries - enhance logging Changes made as part of troubleshooting K8S deployment issue where model imports fail without reason --- apps/webapp/lib/db/import.ts | 95 ++++++++++++++++++++++++++++-------- 1 file changed, 74 insertions(+), 21 deletions(-) diff --git a/apps/webapp/lib/db/import.ts b/apps/webapp/lib/db/import.ts index 51e6061ef..53f6c9d4a 100644 --- a/apps/webapp/lib/db/import.ts +++ b/apps/webapp/lib/db/import.ts @@ -5,7 +5,42 @@ import { prisma } from '@/lib/db'; import { CONFIG_BASE_PATH, downloadFileJsonlParsedLines } from '@/lib/utils/s3'; import { Pool } from 'pg'; -const WORK_MEM = '2GB'; +// Reduced from 2GB to 256MB to fit within Kubernetes PostgreSQL memory limits (512Mi) +// If PostgreSQL has more memory, this can be increased for better performance +const WORK_MEM = '256MB'; + +// Singleton pool for import operations to avoid connection churn +let importPool: Pool | null = null; +let useSSL: boolean | null = null; // null = not determined yet, true/false = determined + +function getImportPool(): Pool { + if (!importPool) { + // For localhost/Docker Compose with internal Postgres, SSL is typically not needed + // For cloud/managed Postgres, SSL is usually required + // Default to no SSL for internal Kubernetes postgres + const shouldTrySSL = useSSL ?? false; + + importPool = new Pool({ + connectionString: process.env.POSTGRES_URL_NON_POOLING || '', + ssl: shouldTrySSL ? { rejectUnauthorized: false } : false, + max: 5, // Limit concurrent connections + idleTimeoutMillis: 30000, // Close idle connections after 30s + connectionTimeoutMillis: 10000, // Timeout after 10s if can't connect + // statement_timeout is set per-query instead + }); + + console.log(`Created import pool with SSL=${shouldTrySSL}`); + } + return importPool; +} + +// Helper to reset pool if SSL settings need to change +function resetImportPool() { + if (importPool) { + importPool.end(); + importPool = null; + } +} export async function importConfigFromS3() { const explanationModelTypeLines = await downloadFileJsonlParsedLines( @@ -61,46 +96,53 @@ export async function importConfigFromS3() { } export async function importJsonlString(tableName: string, jsonlData: string) { - let pool; let client; // replace all \u0000 with ' ' because it's not supported by postgres // eslint-disable-next-line no-param-reassign jsonlData = jsonlData.replaceAll('\\u0000', ' '); try { + const pool = getImportPool(); + console.log(`Connecting to database for table ${tableName}...`); + try { - pool = new Pool({ - connectionString: process.env.POSTGRES_URL_NON_POOLING || '', - ssl: { rejectUnauthorized: false }, - }); client = await pool.connect(); - // Set work_mem for this connection - await client.query(`SET work_mem = '${WORK_MEM}'`); - } catch (error) { - pool = new Pool({ - connectionString: process.env.POSTGRES_URL_NON_POOLING || '', - ssl: false, - }); - client = await pool.connect(); - // Set work_mem for this connection - await client.query(`SET work_mem = '${WORK_MEM}'`); + console.log(`Connected successfully. Setting work_mem=${WORK_MEM} and statement_timeout=600s`); + } catch (sslError: any) { + // Handle SSL connection errors specifically + if (sslError?.message?.includes('SSL') || sslError?.message?.includes('ssl')) { + console.log(`SSL connection failed: ${sslError.message}. Retrying without SSL...`); + resetImportPool(); + useSSL = false; + const newPool = getImportPool(); + client = await newPool.connect(); + console.log(`Connected successfully without SSL. Setting work_mem=${WORK_MEM} and statement_timeout=600s`); + } else { + throw sslError; + } } + // Set work_mem and statement_timeout for this connection + // statement_timeout prevents queries from running indefinitely + await client.query(`SET work_mem = '${WORK_MEM}'`); + await client.query(`SET statement_timeout = '600000'`); // 10 minutes per statement + console.log(`Database configuration applied for ${tableName}`); + // Parse first line to get available columns const firstLine = jsonlData.trim().split('\n')[0]; const availableColumns = Object.keys(JSON.parse(firstLine)); // Get column information only for columns that exist in the JSON const columnQuery = ` - SELECT + SELECT column_name, - CASE - WHEN data_type = 'ARRAY' THEN + CASE + WHEN data_type = 'ARRAY' THEN udt_name::regtype::text || '[]' WHEN data_type = 'USER-DEFINED' THEN ( SELECT t.typname::text FROM pg_type t WHERE t.typname = c.udt_name ) - ELSE + ELSE data_type END as data_type FROM information_schema.columns c @@ -122,18 +164,29 @@ export async function importJsonlString(tableName: string, jsonlData: string) { const lines = jsonlData.trim().split('\n'); const chunkSize = 65000; // there's a limit of ~200MB per insert + const totalChunks = Math.ceil(lines.length / chunkSize); + console.log(`Importing ${lines.length} lines into ${tableName} in ${totalChunks} chunks`); for (let i = 0; i < lines.length; i += chunkSize) { const chunk = lines.slice(i, i + chunkSize); const jsonArray = `[${chunk.join(',')}]`; + const chunkNum = Math.floor(i / chunkSize) + 1; + console.log(`Executing insert for ${tableName} chunk ${chunkNum}/${totalChunks} (${chunk.length} rows)`); await client.query(query, [jsonArray]); + console.log(`Completed ${tableName} chunk ${chunkNum}/${totalChunks}`); } + console.log(`Successfully imported all data into ${tableName}`); } catch (err) { - console.error('Error importing data:', err); + console.error(`Error importing data into ${tableName}:`, err); + if (err instanceof Error) { + console.error('Error details:', { message: err.message, stack: err.stack, name: err.name }); + } throw err; } finally { if (client) { + console.log(`Releasing database connection for ${tableName}`); client.release(); } + // Don't end the pool here - it's a singleton that will be reused } }