Skip to content

Commit

Permalink
refactor: args and flags into reusable modules (#74)
Browse files Browse the repository at this point in the history
* refactor: args and flags into reusable modules

* chore: remove redundant flag
  • Loading branch information
rpidanny authored Jul 3, 2024
1 parent 1637c4d commit 64cea66
Show file tree
Hide file tree
Showing 20 changed files with 216 additions and 228 deletions.
2 changes: 2 additions & 0 deletions cspell.json
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@
// words - list of words to be always considered correct
"words": [
"camelcase",
"Caproiciproducens",
"Colidextribacter",
"commitlint",
"crispr",
Expand All @@ -25,6 +26,7 @@
"posttest",
"PRJNA",
"rpidanny",
"tcell",
"typedi",
"vectorstore",
"vectorstores"
Expand Down
5 changes: 4 additions & 1 deletion src/base.command.ts
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@ import prettyMilliseconds from 'pretty-ms'

import { CONFIG_FILE_NAME } from './config/constants.js'
import { ConfigSchema, TConfig } from './config/schema.js'
import { FlagChar } from './inputs/flags/char.js'
import { Metric } from './utils/analytics/metric.js'

export type Flags<T extends typeof Command> = Interfaces.InferredFlags<
Expand All @@ -25,10 +26,12 @@ export abstract class BaseCommand<T extends typeof Command> extends Command {
// define flags that can be inherited by any command that extends BaseCommand
static baseFlags = {
'log-level': Flags.option({
char: FlagChar.LogLevel,
default: LogLevel.INFO,
helpGroup: 'GLOBAL',
helpValue: Object.values(LogLevel).join('|'),
options: Object.values(LogLevel),
summary: 'Specify level for logging.',
summary: 'Specify logging level.',
})(),
}

Expand Down
60 changes: 11 additions & 49 deletions src/commands/chat/index.ts
Original file line number Diff line number Diff line change
@@ -1,78 +1,40 @@
import * as oclif from '@oclif/core'
import { Odysseus } from '@rpidanny/odysseus/dist/odysseus.js'
import { Container } from 'typedi'

import { BaseCommand } from '../../base.command.js'
import { LLMProvider } from '../../config/schema.js'
import { initChatContainer } from '../../containers/chat.container.js'
import concurrencyFlag from '../../inputs/flags/concurrency.flag.js'
import legacyFlag from '../../inputs/flags/legacy.flag.js'
import llmProviderFlag from '../../inputs/flags/llm-provider.flag.js'
import skipCaptchaFlag from '../../inputs/flags/skip-captcha.flag.js'
import { ChatService } from '../../services/chat/chat.service.js'

export default class Chat extends BaseCommand<typeof Chat> {
service!: ChatService
odysseus!: Odysseus

static summary =
'Chat with Darwin. Can be used to instruct Darwin to do things in natural language.'
static summary = 'Chat with Darwin using natural language.'

static examples = ['<%= config.bin %> <%= command.id %>']

static flags = {
concurrency: oclif.Flags.integer({
char: 'p',
summary: 'The number papers to process in parallel.',
required: false,
default: 10,
}),
logs: oclif.Flags.boolean({
char: 'l',
summary: 'Include application logs along with the chat conversations.',
required: false,
default: false,
}),
'skip-captcha': oclif.Flags.boolean({
char: 's',
summary: 'Skip captcha on paper URLs. Note: Google Scholar captcha still needs to be solved.',
required: false,
default: false,
}),
'legacy-processing': oclif.Flags.boolean({
summary:
'Enable legacy processing of papers that only extracts text from the main URL. The new method attempts to extract text from the source URLs (pdf or html) and falls back to the main URL.',
required: false,
default: false,
}),
'llm-provider': oclif.Flags.custom<LLMProvider>({
summary: 'The LLM provider to use for generating summaries.',
options: Object.values(LLMProvider) as string[],
default: LLMProvider.Ollama,
parse: async (input: string): Promise<LLMProvider> => {
if (Object.values(LLMProvider).includes(input as LLMProvider)) {
return input as LLMProvider
} else {
throw new Error(
`Invalid LLM provider: ${input}. Must be one of ${Object.values(LLMProvider).join(', ')}`,
)
}
},
})(),
concurrency: concurrencyFlag,
'skip-captcha': skipCaptchaFlag,
legacy: legacyFlag,
llm: llmProviderFlag,
}

async init() {
await super.init()

const {
concurrency,
'llm-provider': llmProvider,
'skip-captcha': skipCaptcha,
'legacy-processing': legacyProcessing,
} = this.flags
const { concurrency, llm: llmProvider, 'skip-captcha': skipCaptcha, legacy } = this.flags

initChatContainer(
{
concurrency,
llmProvider,
skipCaptcha,
legacyProcessing,
legacy,
},
this.localConfig,
this.logger,
Expand Down
29 changes: 9 additions & 20 deletions src/commands/download/papers.ts
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,10 @@ import { Container } from 'typedi'

import { BaseCommand } from '../../base.command.js'
import { initDownloadContainer } from '../../containers/download.container.js'
import keywordsArg from '../../inputs/args/keywords.arg.js'
import { FlagChar } from '../../inputs/flags/char.js'
import countFlag from '../../inputs/flags/count.flag.js'
import headlessFlag from '../../inputs/flags/headless.flag.js'
import { PaperDownloadService } from '../../services/download/paper-download.service.js'

export default class DownloadPapers extends BaseCommand<typeof DownloadPapers> {
Expand All @@ -14,36 +18,21 @@ export default class DownloadPapers extends BaseCommand<typeof DownloadPapers> {

static examples = [
'<%= config.bin %> <%= command.id %> --help',
'<%= config.bin %> <%= command.id %> "crispr cas9" -o papers/ -c 100 --log-level debug',
'<%= config.bin %> <%= command.id %> "crispr cas9" --output papers/ --count 100 --log-level debug',
]

static args = {
keywords: oclif.Args.string({
name: 'keywords',
required: true,
description: 'The keywords to search for',
}),
keywords: keywordsArg,
}

static flags = {
count: oclif.Flags.integer({
char: 'c',
summary:
'The minimum number of papers to search for. (When running concurrently, the actual number of papers may be a bit higher)',
required: false,
default: 10,
}),
count: countFlag,
output: oclif.Flags.string({
char: 'o',
char: FlagChar.Output,
summary: 'The path to save the downloaded papers.',
required: true,
}),
headless: oclif.Flags.boolean({
char: 'h',
summary: 'Run the browser in headless mode (no UI).',
required: false,
default: false,
}),
headless: headlessFlag,
}

async init(): Promise<void> {
Expand Down
108 changes: 28 additions & 80 deletions src/commands/search/accession.ts
Original file line number Diff line number Diff line change
Expand Up @@ -3,9 +3,17 @@ import { Odysseus } from '@rpidanny/odysseus'
import { Container } from 'typedi'

import { BaseCommand } from '../../base.command.js'
import { LLMProvider } from '../../config/schema.js'
import { initSearchContainer } from '../../containers/search.container.js'
import { AccessionPattern } from '../../services/search/constants.js'
import keywordsArg from '../../inputs/args/keywords.arg.js'
import accessionNumberRegexFlag from '../../inputs/flags/accession-number-regex.flag.js'
import concurrencyFlag from '../../inputs/flags/concurrency.flag.js'
import countFlag from '../../inputs/flags/count.flag.js'
import headlessFlag from '../../inputs/flags/headless.flag.js'
import legacyFlag from '../../inputs/flags/legacy.flag.js'
import llmProviderFlag from '../../inputs/flags/llm-provider.flag.js'
import outputFlag from '../../inputs/flags/output.flag.js'
import skipCaptchaFlag from '../../inputs/flags/skip-captcha.flag.js'
import summaryFlag from '../../inputs/flags/summary.flag.js'
import { PaperSearchService } from '../../services/search/paper-search.service.js'

export default class SearchAccession extends BaseCommand<typeof SearchAccession> {
Expand All @@ -22,78 +30,23 @@ export default class SearchAccession extends BaseCommand<typeof SearchAccession>

static examples = [
'<%= config.bin %> <%= command.id %> --help',
'<%= config.bin %> <%= command.id %> "mocrobiome, nRNA" -o output.csv -n 5 -c 1 --log-level DEBUG',
'<%= config.bin %> <%= command.id %> "mocrobiome, nRNA" --output ./ --count 10 --log-level DEBUG',
]

static args = {
keywords: oclif.Args.string({
name: 'keywords',
required: true,
description: 'The keywords to search for',
}),
keywords: keywordsArg,
}

static flags = {
count: oclif.Flags.integer({
char: 'c',
summary:
'The minimum number of papers to search for. (When running concurrently, the actual number of papers may be a bit higher)',
default: 10,
}),
concurrency: oclif.Flags.integer({
char: 'p',
summary: 'The number papers to process in parallel.',
default: 10,
}),
output: oclif.Flags.string({
char: 'o',
summary:
'Specify the output destination for the CSV file. If a folder path is given, the filename is auto-generated; if a file path is given, it is used directly.',
default: '.',
}),
'accession-number-regex': oclif.Flags.string({
char: 'a',
summary:
'Regex to match accession numbers. Defaults to matching BioProject accession numbers.',
default: AccessionPattern.BioProject,
}),
'skip-captcha': oclif.Flags.boolean({
char: 's',
summary: 'Skip captcha on paper URLs. Note: Google Scholar captcha still needs to be solved.',
default: false,
}),
'legacy-processing': oclif.Flags.boolean({
summary:
'Enable legacy processing of papers that only extracts text from the main URL. The new method attempts to extract text from the source URLs (pdf or html) and falls back to the main URL.',
default: false,
}),
headless: oclif.Flags.boolean({
char: 'h',
summary: 'Run the browser in headless mode (no UI).',
default: false,
}),
'include-summary': oclif.Flags.boolean({
char: 'S',
summary:
'[LLM Required] Include the paper summary in the output CSV file. When enabled, concurrency is set to 1.',
description:
'Summaries are generated using LLM so make sure LLMs are configured by running `darwin config set`',
default: false,
}),
'llm-provider': oclif.Flags.custom<LLMProvider>({
summary: 'The LLM provider to use for generating summaries.',
options: Object.values(LLMProvider) as string[],
default: LLMProvider.Ollama,
parse: async (input: string): Promise<LLMProvider> => {
if (Object.values(LLMProvider).includes(input as LLMProvider)) {
return input as LLMProvider
} else {
throw new Error(
`Invalid LLM provider: ${input}. Must be one of ${Object.values(LLMProvider).join(', ')}`,
)
}
},
})(),
count: countFlag,
concurrency: concurrencyFlag,
output: outputFlag,
'accession-number-regex': accessionNumberRegexFlag,
'skip-captcha': skipCaptchaFlag,
legacy: legacyFlag,
headless: headlessFlag,
summary: summaryFlag,
llm: llmProviderFlag,
}

async init(): Promise<void> {
Expand All @@ -102,20 +55,20 @@ export default class SearchAccession extends BaseCommand<typeof SearchAccession>
const {
headless,
concurrency,
'include-summary': summarize,
'llm-provider': llmProvider,
summary,
llm: llmProvider,
'skip-captcha': skipCaptcha,
'legacy-processing': legacyProcessing,
legacy,
} = this.flags

initSearchContainer(
{
headless,
concurrency,
summarize,
summary,
llmProvider,
skipCaptcha,
legacyProcessing,
legacy,
},
this.localConfig,
this.logger,
Expand All @@ -133,12 +86,7 @@ export default class SearchAccession extends BaseCommand<typeof SearchAccession>
}

public async run(): Promise<void> {
const {
count,
output,
'accession-number-regex': filterPattern,
'include-summary': summarize,
} = this.flags
const { count, output, 'accession-number-regex': filterPattern, summary } = this.flags
const { keywords } = this.args

this.logger.info(`Searching papers with Accession Numbers (${filterPattern}) for: ${keywords}`)
Expand All @@ -147,7 +95,7 @@ export default class SearchAccession extends BaseCommand<typeof SearchAccession>
keywords,
minItemCount: count,
filterPattern,
summarize,
summarize: summary,
})

this.logger.info(`Exported papers list to: ${outputPath}`)
Expand Down
Loading

0 comments on commit 64cea66

Please sign in to comment.