diff --git a/Cargo.lock b/Cargo.lock index 525eb06..79884d6 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -5570,7 +5570,7 @@ dependencies = [ [[package]] name = "semtools" -version = "1.4.0" +version = "1.5.0" dependencies = [ "anyhow", "arrow-array", diff --git a/Cargo.toml b/Cargo.toml index 01bc995..c00be7d 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "semtools" -version = "1.4.0" +version = "1.5.0" edition = "2024" license = "MIT" description = "Semantic search and document parsing tools for the command line" diff --git a/README.md b/README.md index 5d486ca..534a80f 100644 --- a/README.md +++ b/README.md @@ -64,8 +64,14 @@ ask "What are the main findings?" papers/*.txt # Combine parsing and search parse my_docs/*.pdf | xargs search "API endpoints" +# Ask a question to a set of files +ask "Some question?" *.txt + # Combine parsing with the ask agent parse research_papers/*.pdf | xargs ask "Summarize the key methodologies" + +# Ask based on stdin content +cat README.md | ask "How do I install SemTools?" ``` Advanced Usage: @@ -74,13 +80,13 @@ Advanced Usage: # Combine with grep for exact-match pre-filtering and distance thresholding parse *.pdf | xargs cat | grep -i "error" | search "network error" --max-distance 0.3 -# Pipeline with content search (note the 'cat') +# Pipeline with content search (note the 'xargs' on search to search files instead of stdin) find . -name "*.md" | xargs parse | xargs search "installation" # Combine with grep for filtering (grep could be before or after parse/search!) parse docs/*.pdf | xargs search "API" | grep -A5 "authentication" -# Save search results +# Save search results from stdin search parse report.pdf | xargs cat | search "summary" > results.txt ``` diff --git a/cli/ask.js b/cli/ask.js new file mode 100644 index 0000000..502ea6b --- /dev/null +++ b/cli/ask.js @@ -0,0 +1,16 @@ +#!/usr/bin/env node +const { spawn } = require('node:child_process'); +const { join } = require('node:path'); +const { existsSync } = require('node:fs'); + +const isWindows = process.platform === 'win32'; +const exe = isWindows ? '.exe' : ''; +const localPath = join(__dirname, '..', 'dist', 'bin', `ask${exe}`); + +const bin = existsSync(localPath) ? localPath : `ask${exe}`; + +const child = spawn(bin, process.argv.slice(2), { stdio: 'inherit', shell: isWindows }); +child.on('exit', (code, signal) => { + if (signal) process.kill(process.pid, signal); + process.exit(code ?? 1); +}); diff --git a/cli/parse.js b/cli/parse.js index 8bffa44..12350af 100644 --- a/cli/parse.js +++ b/cli/parse.js @@ -7,7 +7,7 @@ const isWindows = process.platform === 'win32'; const exe = isWindows ? '.exe' : ''; const localPath = join(__dirname, '..', 'dist', 'bin', `parse${exe}`); -const bin = existsSync(localPath) ? localPath : 'parse'; +const bin = existsSync(localPath) ? localPath : `parse${exe}`; const child = spawn(bin, process.argv.slice(2), { stdio: 'inherit', shell: isWindows }); child.on('exit', (code, signal) => { diff --git a/cli/search.js b/cli/search.js index 0c734ce..16a4cd1 100644 --- a/cli/search.js +++ b/cli/search.js @@ -7,7 +7,7 @@ const isWindows = process.platform === 'win32'; const exe = isWindows ? '.exe' : ''; const localPath = join(__dirname, '..', 'dist', 'bin', `search${exe}`); -const bin = existsSync(localPath) ? localPath : 'search'; +const bin = existsSync(localPath) ? localPath : `search${exe}`; const child = spawn(bin, process.argv.slice(2), { stdio: 'inherit', shell: isWindows }); child.on('exit', (code, signal) => { diff --git a/package.json b/package.json index 6e43076..fe5897d 100644 --- a/package.json +++ b/package.json @@ -1,6 +1,6 @@ { "name": "@llamaindex/semtools", - "version": "1.4.0", + "version": "1.5.0", "description": "Semantic search and document parsing tools for the command line (Rust-backed, npm-distributed)", "license": "MIT", "author": "LlamaIndex", @@ -15,7 +15,8 @@ "bin": { "parse": "cli/parse.js", "search": "cli/search.js", - "workspace": "cli/workspace.js" + "workspace": "cli/workspace.js", + "ask": "cli/ask.js" }, "main": "index.js", "scripts": { diff --git a/src/ask/chat_agent.rs b/src/ask/chat_agent.rs index 98571c9..9a2df6f 100644 --- a/src/ask/chat_agent.rs +++ b/src/ask/chat_agent.rs @@ -9,7 +9,7 @@ use async_openai::{Client, types::chat::CreateChatCompletionRequestArgs}; use model2vec_rs::model::StaticModel; use serde_json::Value; -use crate::ask::system_prompt::SYSTEM_PROMPT; +use crate::ask::system_prompt::{STDIN_SYSTEM_PROMPT, SYSTEM_PROMPT}; use crate::ask::tools::{AgentTool, GrepTool, ReadTool, SearchTool}; use crate::search::SearchConfig; @@ -262,3 +262,59 @@ fn print_tool_summary(response: &str) { println!(" → Returned {} lines", response.lines().count()); } } + +/// Run an agent with stdin content injected directly (no tools available) +/// +/// # Arguments +/// * `stdin_content` - The content from stdin to include in the prompt +/// * `user_message` - The user's query/message +/// * `client` - OpenAI API client +/// * `api_model` - The LLM model to use (e.g., "gpt-4o-mini") +/// +/// # Returns +/// The response from the agent as a String +pub async fn ask_agent_with_stdin( + stdin_content: &str, + user_message: &str, + client: &Client, + api_model: &str, +) -> Result { + // Construct the user message with stdin content + let full_message = format!( + "\n{}\n\n\n{}", + stdin_content, user_message + ); + + // Initialize messages with system prompt and user message (no tools) + let messages: Vec = vec![ + ChatCompletionRequestSystemMessageArgs::default() + .content(STDIN_SYSTEM_PROMPT) + .build()? + .into(), + ChatCompletionRequestUserMessage::from(full_message.as_str()).into(), + ]; + + // Create request without tools + let request = CreateChatCompletionRequestArgs::default() + .model(api_model) + .messages(messages) + .build()?; + + // Get response from LLM + let response_message = client + .chat() + .create(request) + .await? + .choices + .first() + .ok_or_else(|| anyhow::anyhow!("No choices in response"))? + .message + .clone(); + + // Return the content + if let Some(content) = response_message.content { + Ok(content) + } else { + Err(anyhow::anyhow!("No content in response")) + } +} diff --git a/src/ask/responses_agent.rs b/src/ask/responses_agent.rs index 070c206..a481a64 100644 --- a/src/ask/responses_agent.rs +++ b/src/ask/responses_agent.rs @@ -8,7 +8,7 @@ use async_openai::types::responses::{ use model2vec_rs::model::StaticModel; use serde_json::Value; -use crate::ask::system_prompt::SYSTEM_PROMPT; +use crate::ask::system_prompt::{STDIN_SYSTEM_PROMPT, SYSTEM_PROMPT}; use crate::ask::tools::{AgentTool, GrepTool, ReadTool, SearchTool}; use crate::search::SearchConfig; @@ -278,3 +278,46 @@ fn output_item_to_item(output_item: &OutputItem) -> Result { )), } } + +/// Run an agent with stdin content injected directly using Responses API (no tools available) +/// +/// # Arguments +/// * `stdin_content` - The content from stdin to include in the prompt +/// * `user_message` - The user's query/message +/// * `client` - OpenAI API client +/// * `api_model` - The LLM model to use (e.g., "gpt-4.1") +/// +/// # Returns +/// The response from the agent as a String +pub async fn ask_agent_responses_with_stdin( + stdin_content: &str, + user_message: &str, + client: &Client, + api_model: &str, +) -> Result { + // Construct the user message with stdin content + let full_message = format!( + "\n{}\n\n\n{}", + stdin_content, user_message + ); + + // Initialize input items with user message (no tools) + let input_items: Vec = vec![InputItem::text_message(Role::User, &full_message)]; + + // Create request without tools + let request = CreateResponseArgs::default() + .max_output_tokens(4096u32) + .model(api_model) + .input(InputParam::Items(input_items)) + .instructions(STDIN_SYSTEM_PROMPT) + .store(false) + .build()?; + + // Get response from LLM + let response = client.responses().create(request).await?; + + // Return the text output + Ok(response + .output_text() + .unwrap_or("".to_string())) +} diff --git a/src/ask/system_prompt.rs b/src/ask/system_prompt.rs index 5e5161d..0b63081 100644 --- a/src/ask/system_prompt.rs +++ b/src/ask/system_prompt.rs @@ -33,3 +33,5 @@ Graph Convolutional Networks are powerful for node classification [1]. The archi [4] papers/graphsage.txt:67,234,891 Remember: Every factual claim needs a citation with a specific file path and line number."; + +pub const STDIN_SYSTEM_PROMPT: &str = "You are a helpful assistant. The user has provided you with content via stdin, which will be included in their message. Please analyze and respond to their query based on this content."; diff --git a/src/bin/ask.rs b/src/bin/ask.rs index fae6c58..796cfd9 100644 --- a/src/bin/ask.rs +++ b/src/bin/ask.rs @@ -3,10 +3,11 @@ use async_openai::Client; use async_openai::config::OpenAIConfig; use clap::Parser; use model2vec_rs::model::StaticModel; +use std::io::{self, BufRead, IsTerminal}; use semtools::SemtoolsConfig; -use semtools::ask::chat_agent::ask_agent; -use semtools::ask::responses_agent::ask_agent_responses; +use semtools::ask::chat_agent::{ask_agent, ask_agent_with_stdin}; +use semtools::ask::responses_agent::{ask_agent_responses, ask_agent_responses_with_stdin}; use semtools::config::ApiMode; use semtools::search::MODEL_NAME; @@ -41,6 +42,12 @@ struct Args { api_mode: Option, } +fn read_from_stdin() -> Result> { + let stdin = io::stdin(); + let lines: Result, _> = stdin.lock().lines().collect(); + Ok(lines?) +} + #[tokio::main] async fn main() -> Result<()> { let args = Args::parse(); @@ -91,14 +98,6 @@ async fn main() -> Result<()> { ask_config.api_mode }; - // Load embedding model - let model = StaticModel::from_pretrained( - MODEL_NAME, // "minishlab/potion-multilingual-128M", - None, // Optional: Hugging Face API token for private models - None, // Optional: bool to override model's default normalization. `None` uses model's config. - None, // Optional: subfolder if model files are not at the root of the repo/path - )?; - // Create OpenAI client let mut openai_config = OpenAIConfig::default().with_api_key(api_key); if let Some(url) = base_url { @@ -106,6 +105,49 @@ async fn main() -> Result<()> { } let client = Client::with_config(openai_config); + // Check if we have stdin input (no files and stdin is not a terminal) + if args.files.is_empty() && !io::stdin().is_terminal() { + let stdin_lines = read_from_stdin()?; + if !stdin_lines.is_empty() { + let stdin_content = stdin_lines.join("\n"); + + // Run the appropriate agent with stdin content (no tools) + let response = match api_mode { + ApiMode::Chat => { + ask_agent_with_stdin(&stdin_content, &args.query, &client, &model_name).await? + } + ApiMode::Responses => { + ask_agent_responses_with_stdin( + &stdin_content, + &args.query, + &client, + &model_name, + ) + .await? + } + }; + + println!("\n{}", response); + return Ok(()); + } + } + + // If no stdin, we need files to search through + if args.files.is_empty() { + eprintln!( + "Error: No input provided. Either specify files as arguments or pipe input to stdin." + ); + std::process::exit(1); + } + + // Load embedding model (only needed for file-based search) + let model = StaticModel::from_pretrained( + MODEL_NAME, // "minishlab/potion-multilingual-128M", + None, // Optional: Hugging Face API token for private models + None, // Optional: bool to override model's default normalization. `None` uses model's config. + None, // Optional: subfolder if model files are not at the root of the repo/path + )?; + // Run the appropriate agent based on API mode let response = match api_mode { ApiMode::Chat => {