Skip to content

Commit 919eb41

Browse files
committed
fix: refined bank statement parsing for CSV and PDF specifically
1 parent ca2f71c commit 919eb41

File tree

1 file changed

+36
-16
lines changed

1 file changed

+36
-16
lines changed

frontend/src/pages/BusinessDashboard.jsx

Lines changed: 36 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -177,7 +177,10 @@ export default function BusinessDashboard({ account, provider, chainId }) {
177177
addLog(`Initiating File Scan: ${file.name}`);
178178
let text = "";
179179

180-
if (file.type === "application/pdf") {
180+
// Check if it's a PDF by mime type OR file extension
181+
const isPDF = file.type === "application/pdf" || file.name.toLowerCase().endsWith(".pdf");
182+
183+
if (isPDF) {
181184
addLog("> File Type: PDF detected. Loading PDF.js...");
182185
const pdfLib = await loadPDFLib();
183186
const arrayBuffer = await file.arrayBuffer();
@@ -190,13 +193,17 @@ export default function BusinessDashboard({ account, provider, chainId }) {
190193
text += content.items.map(item => item.str).join(' ') + '\n';
191194
}
192195
} else {
193-
addLog("> File Type: CSV/Text detected.");
196+
addLog("> File Type: CSV detected.");
194197
text = await file.text();
195198
}
196199

197200
setExtractedText(text);
198201
const lines = text.split('\n').filter(l => l.trim());
199-
addLog(`> Analyzing ${lines.length} lines...`);
202+
addLog(`> Status: Extracted ${lines.length} lines of text.`);
203+
204+
if (text.length > 0) {
205+
addLog(`> Debug: First 50 chars: "${text.substring(0, 50).replace(/\n/g, ' ')}..."`);
206+
}
200207

201208
// Parse the file content
202209
const results = parseFileContent(lines);
@@ -271,30 +278,31 @@ export default function BusinessDashboard({ account, provider, chainId }) {
271278

272279
if (isCSV) {
273280
const headers = lines[0].split(',').map(h => h.trim().toLowerCase());
274-
const creditIdx = headers.findIndex(h => h.includes('credit'));
275-
const debitIdx = headers.findIndex(h => h.includes('debit'));
276-
const balanceIdx = headers.findIndex(h => h.includes('balance'));
281+
// Broader CSV keyword matching
282+
const creditIdx = headers.findIndex(h => h.includes('credit') || h.includes('deposit') || h.includes('inward') || h.includes('inout'));
283+
const debitIdx = headers.findIndex(h => h.includes('debit') || h.includes('withdraw') || h.includes('outward') || h.includes('expense'));
284+
const balanceIdx = headers.findIndex(h => h.includes('balance') || h.includes('bal'));
277285

278286
let totalCredits = 0, totalDebits = 0, lastBalance = 0, txCount = 0;
279287

280288
for (let i = 1; i < lines.length; i++) {
281289
const cols = lines[i].split(',').map(c => c.trim());
282290

283291
if (creditIdx >= 0 && cols[creditIdx]) {
284-
const val = parseFloat(cols[creditIdx].replace(/[,$]/g, ''));
292+
const val = parseFloat(cols[creditIdx].replace(/[,$#\s]/g, ''));
285293
if (!isNaN(val) && val > 0) { totalCredits += val; txCount++; }
286294
}
287295
if (debitIdx >= 0 && cols[debitIdx]) {
288-
const val = parseFloat(cols[debitIdx].replace(/[,$]/g, ''));
296+
const val = parseFloat(cols[debitIdx].replace(/[,$#\s]/g, ''));
289297
if (!isNaN(val) && val > 0) { totalDebits += val; txCount++; }
290298
}
291299
if (balanceIdx >= 0 && cols[balanceIdx]) {
292-
const val = parseFloat(cols[balanceIdx].replace(/[,$]/g, ''));
300+
const val = parseFloat(cols[balanceIdx].replace(/[,$#\s]/g, ''));
293301
if (!isNaN(val)) lastBalance = val;
294302
}
295303
}
296304

297-
addLog(`Parsed CSV: Credits=$${totalCredits.toLocaleString()}, Debits=$${totalDebits.toLocaleString()}, Balance=$${lastBalance.toLocaleString()}`);
305+
addLog(`Parsed CSV: Inflow=$${totalCredits.toLocaleString()}, Outflow=$${totalDebits.toLocaleString()}, Balance=$${lastBalance.toLocaleString()}`);
298306

299307
return {
300308
totalDeposits: totalCredits,
@@ -307,37 +315,49 @@ export default function BusinessDashboard({ account, provider, chainId }) {
307315

308316
// PDF/Text fallback parsing using regex and keywords
309317
let totalDeposits = 0, totalWithdrawals = 0, lastBalance = 0, txCount = 0;
310-
const amountRegex = /-?\d{1,3}(?:,\d{3})*(?:\.\d+)?|-?\d+(?:\.\d+)?/g;
318+
// More robust amount regex: handles $ , . and spaces
319+
const amountRegex = /(?:[$]?)\s?-?\d{1,3}(?:,\d{3})*(?:\.\d+)?/g;
320+
321+
// Expanded keyword lists
322+
const depositKeywords = ['deposit', 'credit', 'cr', 'inflow', 'received', 'incoming', 'total in', 'funding'];
323+
const withdrawalKeywords = ['withdraw', 'debit', 'dr', 'outflow', 'sent', 'outgoing', 'total out', 'expense', 'payment'];
324+
const balanceKeywords = ['balance', 'bal', 'closing', 'ending', 'current'];
311325

312326
for (const line of lines) {
313327
const lower = line.toLowerCase();
314328
const matches = line.match(amountRegex);
315329
if (!matches) continue;
316330

317331
for (const raw of matches) {
318-
const val = parseFloat(raw.replace(/,/g, ''));
332+
const val = parseFloat(raw.replace(/[,$#\s]/g, ''));
319333
if (isNaN(val) || Math.abs(val) === 0) continue;
320334

321-
if (lower.includes('deposit') || lower.includes('credit') || lower.includes('cr')) {
335+
if (depositKeywords.some(k => lower.includes(k))) {
322336
totalDeposits += Math.abs(val);
323337
txCount++;
324-
} else if (lower.includes('withdraw') || lower.includes('debit') || lower.includes('dr')) {
338+
} else if (withdrawalKeywords.some(k => lower.includes(k))) {
325339
totalWithdrawals += Math.abs(val);
326340
txCount++;
327341
}
328342

329-
if (lower.includes('balance')) {
343+
if (balanceKeywords.some(k => lower.includes(k))) {
344+
// Update last balance found in file
330345
lastBalance = Math.abs(val);
331346
}
332347
}
333348
}
334349

350+
// Diagnostic log if nothing found
351+
if (totalDeposits === 0) {
352+
addLog("Search Debug: Scanned lines but found no 'Deposit' keywords near numbers.");
353+
}
354+
335355
// If balance missing, estimate from net flow
336356
if (lastBalance === 0 && totalDeposits > 0) {
337357
lastBalance = Math.max(0, totalDeposits - totalWithdrawals);
338358
}
339359

340-
addLog(`Parsed PDF/Text: Deposits=$${totalDeposits.toLocaleString()}, Withdrawals=$${totalWithdrawals.toLocaleString()}, Balance=$${lastBalance.toLocaleString()}, Tx=${txCount}`);
360+
addLog(`Parsed Statement: Inflow=$${totalDeposits.toLocaleString()}, Outflow=$${totalWithdrawals.toLocaleString()}, Balance=$${lastBalance.toLocaleString()}, Tx=${txCount}`);
341361

342362
return {
343363
totalDeposits,

0 commit comments

Comments
 (0)