-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathconfig.py
More file actions
272 lines (209 loc) · 10.3 KB
/
Copy pathconfig.py
File metadata and controls
272 lines (209 loc) · 10.3 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
"""
Partner OS — System Configuration
==================================
Single source of truth for all constants, paths, environment variables,
and enumerated states used across the entire Partner OS codebase.
Rules:
- Every configurable value lives here. Nothing is hardcoded elsewhere.
- All file paths use pathlib.Path — never raw strings.
- Secrets (API keys, credentials) are read from environment variables only.
They are never hardcoded here or committed to version control.
- Status enumerations are defined as classes with string constants so they
can be imported cleanly and used without magic strings anywhere in the code.
"""
import logging
import os
from pathlib import Path
# =============================================================================
# PROJECT ROOT & DIRECTORY LAYOUT
# =============================================================================
ROOT_DIR: Path = Path(__file__).parent.resolve()
"""Absolute path to the project root directory (where this file lives)."""
# --- Knowledge Base (manually loaded by principals — read-only by agents) ----
KNOWLEDGE_DIR: Path = ROOT_DIR / "knowledge"
PINNEO_DIR: Path = KNOWLEDGE_DIR / "pinneo"
"""WISDOM: Greg Pinneo transcript .md files. Feeds ChromaDB pinneo_brain."""
CCIM_DIR: Path = KNOWLEDGE_DIR / "ccim"
"""WISDOM: CCIM course module .md files. Feeds ChromaDB pinneo_brain."""
REFERENCE_DIR: Path = KNOWLEDGE_DIR / "reference"
"""REFERENCE: Zoning codes, laws, regulations. Feeds ChromaDB reference_library."""
OUTCOMES_DIR: Path = KNOWLEDGE_DIR / "outcomes"
"""LEARNED: Closed-deal summary .md files generated by the Learning Loop.
Reviewed and approved by principals before being embedded into ChromaDB."""
# --- Staging (all inbound files land here before Librarian processing) -------
STAGING_DIR: Path = ROOT_DIR / "staging"
STAGING_INBOX_DIR: Path = STAGING_DIR / "inbox"
"""Primary drop zone. rclone Drive syncs land here. UI uploads land here."""
STAGING_UNRESOLVED_DIR: Path = STAGING_DIR / "inbox" / "unresolved"
"""Files the Librarian could not auto-assign to a deal. Flagged for
principal review in the Streamlit UI."""
# --- Deal Jackets (Librarian creates and manages — isolated per deal) --------
DEALS_DIR: Path = ROOT_DIR / "deals"
"""Root of all Deal Jacket directories. One subdirectory per deal.
Format: {DEAL_ID}_{address_slug}/ e.g. 0042_1234_main_st_vancouver/"""
# --- Persistent Data (auto-created on first run) -----------------------------
DATA_DIR: Path = ROOT_DIR / "data"
CHROMA_DB_PATH: Path = DATA_DIR / "chroma"
"""ChromaDB vector store. Created automatically by src/brain/embedder.py."""
DATABASE_PATH: Path = DATA_DIR / "partner_os.db"
"""SQLite operational database. Created automatically on first run."""
# --- Logs --------------------------------------------------------------------
LOG_DIR: Path = ROOT_DIR / "logs"
# =============================================================================
# GOOGLE GEMINI API
# =============================================================================
# Authentication: Google account OAuth (no API key).
# Gemini CLI has already configured Application Default Credentials (ADC)
# on this machine via 'gcloud auth application-default login' or equivalent.
# The google-genai SDK picks up these credentials automatically when
# GOOGLE_API_KEY is not set. Do not set GOOGLE_API_KEY unless switching
# to API key authentication.
#
# Import pattern in all agent files:
# from google import genai
# import google.auth
# credentials, _ = google.auth.default()
# client = genai.Client(credentials=credentials)
GEMINI_GENERATION_MODEL: str = os.environ.get(
"PARTNER_OS_GEMINI_MODEL", "gemini-2.0-flash"
)
"""Primary generation model. Override via PARTNER_OS_GEMINI_MODEL env var."""
GEMINI_EMBEDDING_MODEL: str = "models/gemini-embedding-001"
"""Embedding model used to generate vectors for ChromaDB.
Verified for standard Gemini API keys."""
# =============================================================================
# CHROMADB — COLLECTION NAMES
# =============================================================================
COLLECTION_PINNEO_BRAIN: str = "pinneo_brain"
"""ChromaDB collection for WISDOM (Pinneo transcripts, CCIM modules)
and LEARNED OUTCOMES (closed-deal summaries approved by principals)."""
COLLECTION_REFERENCE_LIBRARY: str = "reference_library"
"""ChromaDB collection for REFERENCE material (zoning codes, laws,
regulations, compliance documents)."""
# =============================================================================
# RAG (RETRIEVAL-AUGMENTED GENERATION) PARAMETERS
# =============================================================================
CHUNK_SIZE: int = 800
"""Maximum character length of each text chunk before embedding.
Tuned for Pinneo transcript structure. Adjust with care — larger chunks
reduce retrieval precision; smaller chunks lose contextual meaning."""
CHUNK_OVERLAP: int = 150
"""Characters of overlap between adjacent chunks.
Prevents context loss at chunk boundaries."""
RAG_TOP_K: int = 5
"""Number of most-relevant chunks to retrieve per query.
These chunks are injected into the agent's Gemini prompt as context."""
LOW_CONFIDENCE_THRESHOLD: float = 0.40
"""Minimum acceptable relevance score for a retrieved chunk.
Retrievals below this threshold trigger LOW_CONFIDENCE: TRUE flag."""
# =============================================================================
# DEAL STATE MACHINE
# =============================================================================
class DealStatus:
"""
Enumeration of all valid deal status values.
Deals move forward through this sequence. No backwards transitions.
An agent is only authorized to act on a deal when the deal's status
is the one that calls for that agent. Enforced at the start of each
agent's main processing function.
"""
INTAKE = "INTAKE"
LIBRARIAN_PROCESSING = "LIBRARIAN_PROCESSING"
AWAITING_VERIFICATION = "AWAITING_VERIFICATION"
CFO_CALCULATING = "CFO_CALCULATING"
SCOUT_RUNNING = "SCOUT_RUNNING"
PROFILER_RUNNING = "PROFILER_RUNNING"
MANAGER_SYNTHESIZING = "MANAGER_SYNTHESIZING"
VERDICT_ISSUED = "VERDICT_ISSUED"
PRINCIPAL_REVIEW = "PRINCIPAL_REVIEW"
CLOSED = "CLOSED"
DEAD = "DEAD"
ALL: tuple = (
INTAKE, LIBRARIAN_PROCESSING, AWAITING_VERIFICATION,
CFO_CALCULATING, SCOUT_RUNNING, PROFILER_RUNNING,
MANAGER_SYNTHESIZING, VERDICT_ISSUED, PRINCIPAL_REVIEW,
CLOSED, DEAD,
)
class Verdict:
"""
The two and only two possible verdicts issued by the Manager agent.
CONDITIONAL is not a valid verdict. Every KILL verdict must include
a conditions_to_flip field listing what would be required to flip
the verdict to APPROVE.
"""
APPROVE = "APPROVE"
KILL = "KILL"
class VerificationStatus:
"""Status of CFO financial extraction records in SQLite."""
UNVERIFIED = "UNVERIFIED"
"""AI has extracted financials. Principal has not yet reviewed."""
VERIFIED = "VERIFIED"
"""Principal has reviewed and approved the extracted numbers.
CFO is now authorized to run Phase 3 deterministic calculations."""
LOCKED = "LOCKED"
"""Calculations complete. Record is immutable."""
class FileStatus:
"""Status of file records tracked by the Librarian in SQLite."""
PENDING = "PENDING"
"""File discovered in staging. Not yet processed."""
PROCESSING = "PROCESSING"
"""Librarian is actively classifying and routing this file."""
COMPLETE = "COMPLETE"
"""File successfully routed to its Deal Jacket subdirectory."""
FAILED = "FAILED"
"""Processing failed. See error_detail column in SQLite for reason."""
AWAITING_PRINCIPAL = "AWAITING_PRINCIPAL"
"""Librarian could not determine deal association. File moved to
staging/inbox/unresolved/ and flagged in UI for principal assignment."""
class ContentClass:
"""Librarian content classification labels for ingested files."""
SELLER_CORRESPONDENCE = "SELLER_CORRESPONDENCE"
FINANCIAL_DOCUMENT = "FINANCIAL_DOCUMENT"
TITLE_REPORT = "TITLE_REPORT"
INSPECTION_REPORT = "INSPECTION_REPORT"
FIELD_NOTES = "FIELD_NOTES"
MUNICIPAL_RECORD = "MUNICIPAL_RECORD"
OFFERING_MEMORANDUM = "OFFERING_MEMORANDUM"
LEGAL_DOCUMENT = "LEGAL_DOCUMENT"
OTHER = "OTHER"
# =============================================================================
# CLARK COUNTY — LOCAL INTELLIGENCE TARGETS
# =============================================================================
CLARK_COUNTY_ASSESSOR_URL: str = "https://www.clark.wa.gov/assessor"
"""Primary public records target for the Scout agent."""
CLARK_COUNTY_STATE: str = "WA"
CLARK_COUNTY_NAME: str = "Clark"
MARKET_GEOGRAPHY: str = "Clark County, WA"
"""Human-readable geography label used in all agent outputs and reports."""
# =============================================================================
# LOGGING
# =============================================================================
LOG_LEVEL: str = os.environ.get("PARTNER_OS_LOG_LEVEL", "INFO")
"""Default log level. Override via PARTNER_OS_LOG_LEVEL env var.
Valid values: DEBUG, INFO, WARNING, ERROR, CRITICAL"""
LOG_FORMAT: str = "%(asctime)s | %(levelname)-8s | %(name)s | %(message)s"
LOG_DATE_FORMAT: str = "%Y-%m-%d %H:%M:%S"
# =============================================================================
# RCLONE SYNC
# =============================================================================
RCLONE_REMOTE_NAME: str = os.environ.get("PARTNER_OS_RCLONE_REMOTE", "gdrive")
"""Name of the rclone remote configured on this machine for Google Drive.
Override via PARTNER_OS_RCLONE_REMOTE env var.
Find your remote name by running: rclone listremotes"""
RCLONE_DRIVE_PATH: str = os.environ.get(
"PARTNER_OS_RCLONE_PATH", "Business with Brother"
)
"""Path within the Google Drive remote to sync.
Override via PARTNER_OS_RCLONE_PATH env var."""
# =============================================================================
# DEAL JACKET SUBDIRECTORY NAMES
# Centralised here so they are never magic strings in agent code.
# =============================================================================
class DealSubdir:
"""Subdirectory names within a Deal Jacket. Never hardcode these."""
AUDIO = "audio"
TRANSCRIPTS = "transcripts"
DOCUMENTS = "documents"
ANALYSIS = "analysis"
DRAFTS = "drafts"
JACKET_FILENAME = "jacket.json"