From 814c59f3671c7c3b36c53de1fcc744df270f4e69 Mon Sep 17 00:00:00 2001 From: rpidanny Date: Tue, 2 Jul 2024 21:18:47 +0200 Subject: [PATCH] fix: handle captcha errors --- .../search/paper-search.service.spec.ts | 13 +++++++ src/services/search/paper-search.service.ts | 37 ++++++++++++------- 2 files changed, 36 insertions(+), 14 deletions(-) diff --git a/src/services/search/paper-search.service.spec.ts b/src/services/search/paper-search.service.spec.ts index ce516ab..6cb90d6 100644 --- a/src/services/search/paper-search.service.spec.ts +++ b/src/services/search/paper-search.service.spec.ts @@ -1,5 +1,6 @@ import { jest } from '@jest/globals' import { GoogleScholar } from '@rpidanny/google-scholar' +import { CaptchaError } from '@rpidanny/odysseus/dist/index.js' import { Quill } from '@rpidanny/quill' import { mock } from 'jest-mock-extended' @@ -125,6 +126,18 @@ describe('PaperSearchService', () => { ) }) + it('should skip paper when CaptchaError is thrown', async () => { + paperService.getTextContent.mockRejectedValue(new CaptchaError()) + + const entities = await service.search({ + keywords: 'some keywords', + minItemCount: 10, + filterPattern: 'cas9', + }) + + expect(entities).toHaveLength(0) + }) + it('should summarize papers if summarize is true', async () => { llmService.summarize.mockResolvedValue('This is a summary.') diff --git a/src/services/search/paper-search.service.ts b/src/services/search/paper-search.service.ts index e5895bf..2c41e2e 100644 --- a/src/services/search/paper-search.service.ts +++ b/src/services/search/paper-search.service.ts @@ -1,4 +1,5 @@ import { GoogleScholar, IPaperMetadata } from '@rpidanny/google-scholar/dist' +import { CaptchaError } from '@rpidanny/odysseus' import { Quill } from '@rpidanny/quill' import { join } from 'path' @@ -83,21 +84,29 @@ export class PaperSearchService { if (!filterPattern && !summarize) return entity - const textContent = await this.paperService.getTextContent(paper) - - if (filterPattern) { - const matches = await this.paperService.findInPaper(textContent, filterPattern) - if (matches.length === 0) return undefined - this.logMatches(matches) - entity.matches = matches - } - - if (summarize) { - const summary = await this.llmService.summarize(textContent) - entity.summary = summary + try { + const textContent = await this.paperService.getTextContent(paper) + + if (filterPattern) { + const matches = await this.paperService.findInPaper(textContent, filterPattern) + if (matches.length === 0) return undefined + this.logMatches(matches) + entity.matches = matches + } + + if (summarize) { + const summary = await this.llmService.summarize(textContent) + entity.summary = summary + } + + return entity + } catch (error) { + if (error instanceof CaptchaError) { + this.logger?.debug(`Failed processing paper due to Captcha: ${paper.url}`) + return + } + throw error } - - return entity } private logMatches(foundItems: ITextMatch[]): void {