hindsight/.env.example at main · vectorize-io/hindsight · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
# Hindsight Environment Variables
# Copy this file to .env and fill in your values

# LLM Configuration (Required)
# Supported providers: openai, groq, ollama, gemini, anthropic, lmstudio, vertexai, minimax, deepseek, zai, atlas, volcano
HINDSIGHT_API_LLM_PROVIDER=openai
HINDSIGHT_API_LLM_API_KEY=your-api-key-here
HINDSIGHT_API_LLM_MODEL=gpt-4o-mini
HINDSIGHT_API_LLM_BASE_URL=https://api.openai.com/v1
# Reasoning effort for providers/models that support it. Examples: low, medium, high, xhigh.
# HINDSIGHT_API_LLM_REASONING_EFFORT=low

# Example: Anthropic Claude configuration
# HINDSIGHT_API_LLM_PROVIDER=anthropic
# HINDSIGHT_API_LLM_API_KEY=your-anthropic-api-key
# HINDSIGHT_API_LLM_MODEL=claude-sonnet-4-20250514

# Example: Google Vertex AI configuration
# HINDSIGHT_API_LLM_PROVIDER=vertexai
# HINDSIGHT_API_LLM_MODEL=google/gemini-2.0-flash-001
# HINDSIGHT_API_LLM_VERTEXAI_PROJECT_ID=your-gcp-project-id
# HINDSIGHT_API_LLM_VERTEXAI_REGION=us-central1
# HINDSIGHT_API_LLM_VERTEXAI_SERVICE_ACCOUNT_KEY=/path/to/service-account-key.json  # Optional, uses ADC if not set

# Example: MiniMax configuration (1M context window)
# HINDSIGHT_API_LLM_PROVIDER=minimax
# HINDSIGHT_API_LLM_API_KEY=your-minimax-api-key
# HINDSIGHT_API_LLM_MODEL=MiniMax-M3  # or MiniMax-M2.7 for the previous generation

# Example: DeepSeek configuration (https://api.deepseek.com)
# HINDSIGHT_API_LLM_PROVIDER=deepseek
# HINDSIGHT_API_LLM_API_KEY=your-deepseek-api-key
# HINDSIGHT_API_LLM_MODEL=deepseek-v4-flash  # or deepseek-v4-pro / deepseek-chat / deepseek-reasoner

# Example: z.ai configuration (Zhipu GLM series, https://z.ai)
# HINDSIGHT_API_LLM_PROVIDER=zai
# HINDSIGHT_API_LLM_API_KEY=your-zai-api-key
# HINDSIGHT_API_LLM_MODEL=glm-4.5-flash  # or glm-4.5-air for the paid tier

# Example: Atlas Cloud configuration (OpenAI-compatible, https://www.atlascloud.ai)
# HINDSIGHT_API_LLM_PROVIDER=atlas
# HINDSIGHT_API_LLM_API_KEY=your-atlascloud-api-key
# HINDSIGHT_API_LLM_MODEL=deepseek-ai/deepseek-v4-pro  # reasoning model; also Qwen / GLM / Kimi / MiniMax, etc.

# Example: LM Studio local configuration (Qwen 2.5 32B recommended)
# HINDSIGHT_API_LLM_PROVIDER=lmstudio
# HINDSIGHT_API_LLM_API_KEY=lmstudio
# HINDSIGHT_API_LLM_BASE_URL=http://localhost:1234/v1
# HINDSIGHT_API_LLM_MODEL=qwen2.5-32b-instruct

# Multi-LLM strategies: configure extra LLMs by index alongside the primary above,
# then pick a routing strategy. Unset = single primary LLM (default). Members are
# numbered from 1; indices must be contiguous. Each operation can override with a
# RETAIN_/REFLECT_/CONSOLIDATION_ prefix (e.g. HINDSIGHT_API_RETAIN_LLM_1_PROVIDER).
# HINDSIGHT_API_LLM_1_PROVIDER=groq
# HINDSIGHT_API_LLM_1_API_KEY=your-groq-api-key
# HINDSIGHT_API_LLM_1_MODEL=openai/gpt-oss-120b
# HINDSIGHT_API_LLM_2_PROVIDER=anthropic
# HINDSIGHT_API_LLM_2_API_KEY=your-anthropic-api-key
# Strategy JSON: {"mode": "failover"} or {"mode": "round-robin"}.
# Round-robin accepts optional positive-int "weights" (one per member, primary first).
# HINDSIGHT_API_LLM_STRATEGY={"mode": "failover"}

# API Configuration (Optional)
HINDSIGHT_API_HOST=0.0.0.0
HINDSIGHT_API_PORT=8888
HINDSIGHT_API_LOG_LEVEL=info
# Optional retain chunking override for structured logs/transcripts.
# Unset uses HINDSIGHT_API_RETAIN_CHUNK_SIZE as the structured-chunk limit.
# HINDSIGHT_API_RETAIN_STRUCTURED_CHUNK_SIZE=

# Dry-run extraction preview endpoint (POST /memories/dry-run-extract). Enabled by default; it makes
# a real LLM call but stores nothing. Set to false to remove the endpoint (returns 404).
# HINDSIGHT_API_ENABLE_DRY_RUN_EXTRACT=true

# Base Path / Reverse Proxy Support (Optional)
# Set these when deploying behind a reverse proxy with path-based routing
# Example: To deploy at example.com/hindsight/, set both to "/hindsight"
# HINDSIGHT_API_BASE_PATH=/hindsight
# NEXT_PUBLIC_BASE_PATH=/hindsight

# Database (Optional - uses embedded pg0 by default)
# HINDSIGHT_API_DATABASE_URL=postgresql://user:pass@host:5432/db
# HINDSIGHT_API_READ_DATABASE_URL=  # Optional read-replica URL. When set, recall queries (semantic, BM25, graph, temporal) flow through a separate pool against this URL, offloading the primary. Typically points to a read-only endpoint (CNPG's <cluster>-ro service or Aurora reader endpoint).
# HINDSIGHT_API_MIGRATION_DATABASE_URL=  # Direct PostgreSQL URL for migrations (bypasses PgBouncer). Falls back to DATABASE_URL.
# HINDSIGHT_API_DATABASE_SCHEMA=public  # PostgreSQL schema name (default: public)
# HINDSIGHT_API_MIGRATION_CONCURRENCY=1  # Tenant schemas to migrate concurrently (PG only, each in its own process; per-schema work stays sequential). Each worker has ~1-2s startup cost + uses ~3 DB connections, so it only pays off with many schemas (tens+) or slow migrations; keep concurrency*3 <= spare max_connections. Default: 1 (sequential).

# Vector Extension (Optional - uses pgvector by default)
# Options: "pgvector" (default), "vchord", "pgvectorscale" (DiskANN)
# HINDSIGHT_API_VECTOR_EXTENSION=pgvector
# For Azure PostgreSQL with DiskANN:
# HINDSIGHT_API_VECTOR_EXTENSION=pgvectorscale  # Auto-detects pg_diskann on Azure

# Text Search Extension (Optional - uses native PostgreSQL full-text search by default)
# Backend options: "native" (default), "vchord", "pg_textsearch", "pgroonga", "pg_search"
# HINDSIGHT_API_TEXT_SEARCH_EXTENSION=native
# Native backend dictionary (only used by HINDSIGHT_API_TEXT_SEARCH_EXTENSION=native)
# HINDSIGHT_API_TEXT_SEARCH_EXTENSION_NATIVE_LANGUAGE=english
# ParadeDB pg_search tokenizer (only used when creating pg_search BM25 indexes).
# Empty uses ParadeDB's default tokenizer: unicode_words.
# Supported values: unicode_words, simple, whitespace, literal, literal_normalized,
# chinese_compatible, icu, jieba, source_code,
# chinese_lindera/lindera(chinese), japanese_lindera/lindera(japanese),
# korean_lindera/lindera(korean), ngram(min,max), edge_ngram(min,max)
# HINDSIGHT_API_TEXT_SEARCH_EXTENSION_PG_SEARCH_TOKENIZER=

# File Parser (Optional - uses markitdown by default)
# HINDSIGHT_API_FILE_PARSER=markitdown
# Enable image OCR for MarkItDown using an OpenAI-compatible OCR/vision endpoint.
# These OCR settings are independent from HINDSIGHT_API_LLM_* because MarkItDown
# uses the OpenAI SDK directly and requires Chat Completions image input support.
# When OCR is enabled, API_KEY, BASE_URL, and MODEL are required.
# HINDSIGHT_API_FILE_PARSER_MARKITDOWN_OCR_ENABLED=false
# HINDSIGHT_API_FILE_PARSER_MARKITDOWN_OCR_API_KEY=
# HINDSIGHT_API_FILE_PARSER_MARKITDOWN_OCR_BASE_URL=
# HINDSIGHT_API_FILE_PARSER_MARKITDOWN_OCR_MODEL=
# HINDSIGHT_API_FILE_PARSER_MARKITDOWN_OCR_PROMPT=

# Embeddings Configuration (Optional - uses local by default)
# Provider: "local" (default), "onnx", "tei", "openai", "cohere", "google", "openrouter", "zeroentropy", "litellm", or "litellm-sdk"
# HINDSIGHT_API_EMBEDDINGS_PROVIDER=local
# For local provider:
# HINDSIGHT_API_EMBEDDINGS_LOCAL_MODEL=BAAI/bge-small-en-v1.5
# For ONNX provider (local CPU embeddings without an Ollama/TEI sidecar):
# HINDSIGHT_API_EMBEDDINGS_PROVIDER=onnx
# HINDSIGHT_API_EMBEDDINGS_ONNX_MODEL_ID=intfloat/multilingual-e5-small
# HINDSIGHT_API_EMBEDDINGS_ONNX_FILE=onnx/model.onnx
# HINDSIGHT_API_EMBEDDINGS_ONNX_DIMENSIONS=384
# HINDSIGHT_API_EMBEDDINGS_ONNX_MAX_TOKENS=512
# HINDSIGHT_API_EMBEDDINGS_ONNX_POOLING=mean
# HINDSIGHT_API_EMBEDDINGS_ONNX_NORMALIZE=true
# HINDSIGHT_API_EMBEDDINGS_ONNX_QUERY_PREFIX="query: "
# HINDSIGHT_API_EMBEDDINGS_ONNX_PASSAGE_PREFIX="passage: "
# Optional for local model paths or pre-downloaded artifacts:
# HINDSIGHT_API_EMBEDDINGS_ONNX_MODEL_PATH=/models/multilingual-e5-small/onnx/model.onnx
# HINDSIGHT_API_EMBEDDINGS_ONNX_TOKENIZER_NAME_OR_PATH=/models/multilingual-e5-small
# Optional for China network / restricted HF access:
# HF_ENDPOINT=https://hf-mirror.com
# For TEI provider:
# HINDSIGHT_API_EMBEDDINGS_TEI_URL=http://localhost:8080
# For OpenAI-compatible embeddings:
# HINDSIGHT_API_EMBEDDINGS_OPENAI_API_KEY=sk-xxxx
# HINDSIGHT_API_EMBEDDINGS_OPENAI_MODEL=text-embedding-3-small
# HINDSIGHT_API_EMBEDDINGS_OPENAI_BASE_URL=https://api.openai.com/v1
# For ZeroEntropy zembed-1:
# HINDSIGHT_API_EMBEDDINGS_PROVIDER=zeroentropy
# HINDSIGHT_API_EMBEDDINGS_ZEROENTROPY_API_KEY=ze-xxxx
# HINDSIGHT_API_EMBEDDINGS_ZEROENTROPY_MODEL=zembed-1
# HINDSIGHT_API_EMBEDDINGS_ZEROENTROPY_DIMENSIONS=1280
# HINDSIGHT_API_EMBEDDINGS_ZEROENTROPY_ENCODING_FORMAT=float
# HINDSIGHT_API_EMBEDDINGS_ZEROENTROPY_LATENCY=fast
#
# IMPORTANT: Embedding keys require provider-specific names:
# HINDSIGHT_API_EMBEDDINGS_{PROVIDER}_{PARAMETER}
# (for example, HINDSIGHT_API_EMBEDDINGS_OPENAI_MODEL).
#
# DeepSeek note: DeepSeek is supported for LLM calls, but not for embeddings.
# If using DeepSeek as LLM provider, keep embeddings on local/openai/cohere/google/etc.

# Reranker Configuration (Optional - uses local by default)
# Provider: "local" (default) or "tei" (HuggingFace Text Embeddings Inference)
# HINDSIGHT_API_RERANKER_PROVIDER=local
# For local provider:
# HINDSIGHT_API_RERANKER_LOCAL_MODEL=cross-encoder/ms-marco-MiniLM-L-6-v2
# For TEI provider:
# HINDSIGHT_API_RERANKER_TEI_URL=http://localhost:8081

# Observability & Tracing (Optional - disabled by default)
# Enable OpenTelemetry tracing for LLM calls (GenAI semantic conventions)
# HINDSIGHT_API_OTEL_TRACES_ENABLED=true
#
# Local development with Grafana LGTM stack (recommended - see scripts/dev/grafana/README.md)
# HINDSIGHT_API_OTEL_EXPORTER_OTLP_ENDPOINT=http://localhost:4318
#
# Cloud backends (Grafana Cloud, Langfuse, DataDog, etc.)
# HINDSIGHT_API_OTEL_EXPORTER_OTLP_ENDPOINT=https://your-backend-url
# HINDSIGHT_API_OTEL_EXPORTER_OTLP_HEADERS="Authorization=Bearer your-token"
#
# Custom service name and environment (optional, defaults: hindsight-api, development)
# HINDSIGHT_API_OTEL_SERVICE_NAME=hindsight-production
# HINDSIGHT_API_OTEL_DEPLOYMENT_ENVIRONMENT=production
#
# Expose async-operation queue + consolidation-backlog gauges on /metrics.
# Runs periodic per-schema COUNT queries on a background task (disabled by default).
# HINDSIGHT_API_METRICS_BACKLOG_ENABLED=true

# -----------------------------------------------------------------------------
# Control Plane (Optional)
# -----------------------------------------------------------------------------

# Dataplane API URL - where the CP proxies requests to
# HINDSIGHT_CP_DATAPLANE_API_URL=http://localhost:8888

# Optional: Bearer token the CP sends as `Authorization: Bearer <key>` to the
# dataplane API. Required when the API service is auth-protected; omit for a
# public/unauthenticated API.
# HINDSIGHT_CP_DATAPLANE_API_KEY=your-dataplane-bearer-token

# Optional: Require a shared access key to view the Control Plane UI.
# When set, visitors see a login page and must enter the key before
# accessing the dashboard or any /api/* routes (except /api/health).
# HINDSIGHT_CP_ACCESS_KEY=your-shared-secret-key