@@ -89,7 +89,6 @@ class DocumentDB(Base):
89
89
countries : Mapped [Optional [list [str ]]] = mapped_column (JSONB , nullable = True )
90
90
organizations : Mapped [Optional [list [str ]]] = mapped_column (JSONB , nullable = True )
91
91
regions : Mapped [Optional [list [str ]]] = mapped_column (JSONB , nullable = True )
92
- notes : Mapped [Optional [str ]] = mapped_column (Text , nullable = True )
93
92
drive_link : Mapped [Optional [str ]] = mapped_column (Text , nullable = True )
94
93
year : Mapped [Optional [int ]] = mapped_column (Integer , nullable = True )
95
94
date_added : Mapped [Optional [datetime ]] = mapped_column (
@@ -108,8 +107,8 @@ async def save_document_to_db(
108
107
asession : AsyncSession ,
109
108
metadata : dict ,
110
109
pdf_url : str ,
111
- summary : str = None ,
112
- title : str = None ,
110
+ summary : str ,
111
+ title : str ,
113
112
document_id : int ,
114
113
) -> None :
115
114
"""
@@ -127,7 +126,6 @@ async def save_document_to_db(
127
126
countries = metadata .get ("Countries" , [])
128
127
organizations = metadata .get ("Organization(s)" , [])
129
128
regions = metadata .get ("Region(s)" , [])
130
- notes = metadata .get ("Notes" , "" )
131
129
drive_link = metadata .get ("Drive link" , "" )
132
130
year = metadata .get ("Year" , None )
133
131
document_id = metadata .get ("ID" , None )
@@ -152,9 +150,8 @@ async def save_document_to_db(
152
150
countries = countries ,
153
151
organizations = organizations ,
154
152
regions = regions ,
155
- notes = notes ,
156
153
drive_link = drive_link ,
157
- year = metadata . get ( "Year" ) ,
154
+ year = year ,
158
155
date_added = date_added ,
159
156
document_id = document_id , # Using the document_id passed to the function
160
157
pdf_url = pdf_url ,
@@ -171,7 +168,7 @@ async def save_document_to_db(
171
168
extracted_qa_pairs = []
172
169
173
170
qa_pairs = []
174
- for qa_idx , qa_pair in enumerate (extracted_qa_pairs ):
171
+ for _ , qa_pair in enumerate (extracted_qa_pairs ):
175
172
try :
176
173
question = qa_pair .get ("question" , "" )
177
174
answers = qa_pair .get ("answers" , [])
@@ -220,13 +217,11 @@ async def save_single_document(metadata: Dict[str, Any]) -> bool:
220
217
file_name = metadata ["file_name" ],
221
218
processed_pages = metadata ["processed_pages" ],
222
219
asession = asession ,
223
- metadata = metadata .get ("fields" , {}),
224
- pdf_url = metadata .get ("pdf_url" , "" ), # Use empty string as fallback
225
- title = metadata .get ("survey_name" ),
226
- summary = metadata .get ("summary" ),
227
- document_id = metadata .get (
228
- "document_id"
229
- ), # Pass document_id from metadata
220
+ metadata = metadata ["fields" ],
221
+ pdf_url = metadata ["pdf_url" ],
222
+ title = metadata ["survey_name" ],
223
+ summary = metadata ["summary" ],
224
+ document_id = metadata ["document_id" ],
230
225
)
231
226
logger .info (
232
227
f"File '{ metadata ['file_name' ]} ' processed and saved successfully."
0 commit comments