run-llama · logan-markewich · Nov 27, 2025 · Nov 26, 2025 · Nov 26, 2025 · Nov 26, 2025
diff --git a/Cargo.lock b/Cargo.lock
diff --git a/Cargo.toml b/Cargo.toml
@@ -1,6 +1,6 @@
 [package]
 name = "semtools"
-version = "1.4.0"
+version = "1.5.0"
 edition = "2024"
 license = "MIT"
 description = "Semantic search and document parsing tools for the command line"

diff --git a/README.md b/README.md
@@ -64,8 +64,14 @@ ask "What are the main findings?" papers/*.txt
 # Combine parsing and search
 parse my_docs/*.pdf | xargs search "API endpoints"
 
+# Ask a question to a set of files
+ask "Some question?" *.txt 
+
 # Combine parsing with the ask agent
 parse research_papers/*.pdf | xargs ask "Summarize the key methodologies"
+
+# Ask based on stdin content
+cat README.md | ask "How do I install SemTools?"
 ```
 
 Advanced Usage:
@@ -74,13 +80,13 @@ Advanced Usage:
 # Combine with grep for exact-match pre-filtering and distance thresholding
 parse *.pdf | xargs cat | grep -i "error" | search "network error" --max-distance 0.3
 
-# Pipeline with content search (note the 'cat')
+# Pipeline with content search (note the 'xargs' on search to search files instead of stdin)
 find . -name "*.md" | xargs parse | xargs search "installation"
 
 # Combine with grep for filtering (grep could be before or after parse/search!)
 parse docs/*.pdf | xargs search "API" | grep -A5 "authentication"
 
-# Save search results
+# Save search results from stdin search
 parse report.pdf | xargs cat | search "summary" > results.txt
 ```
 

diff --git a/cli/ask.js b/cli/ask.js
@@ -0,0 +1,16 @@
+#!/usr/bin/env node
+const { spawn } = require('node:child_process');
+const { join } = require('node:path');
+const { existsSync } = require('node:fs');
+
+const isWindows = process.platform === 'win32';
+const exe = isWindows ? '.exe' : '';
+const localPath = join(__dirname, '..', 'dist', 'bin', `ask${exe}`);
+
+const bin = existsSync(localPath) ? localPath : `ask${exe}`;
+
+const child = spawn(bin, process.argv.slice(2), { stdio: 'inherit', shell: isWindows });
+child.on('exit', (code, signal) => {
+  if (signal) process.kill(process.pid, signal);
+  process.exit(code ?? 1);
+});
diff --git a/cli/parse.js b/cli/parse.js
@@ -7,7 +7,7 @@ const isWindows = process.platform === 'win32';
 const exe = isWindows ? '.exe' : '';
 const localPath = join(__dirname, '..', 'dist', 'bin', `parse${exe}`);
 
-const bin = existsSync(localPath) ? localPath : 'parse';
+const bin = existsSync(localPath) ? localPath : `parse${exe}`;
 
 const child = spawn(bin, process.argv.slice(2), { stdio: 'inherit', shell: isWindows });
 child.on('exit', (code, signal) => {

diff --git a/cli/search.js b/cli/search.js
@@ -7,7 +7,7 @@ const isWindows = process.platform === 'win32';
 const exe = isWindows ? '.exe' : '';
 const localPath = join(__dirname, '..', 'dist', 'bin', `search${exe}`);
 
-const bin = existsSync(localPath) ? localPath : 'search';
+const bin = existsSync(localPath) ? localPath : `search${exe}`;
 
 const child = spawn(bin, process.argv.slice(2), { stdio: 'inherit', shell: isWindows });
 child.on('exit', (code, signal) => {

diff --git a/package.json b/package.json
@@ -1,6 +1,6 @@
 {
   "name": "@llamaindex/semtools",
-  "version": "1.4.0",
+  "version": "1.5.0",
   "description": "Semantic search and document parsing tools for the command line (Rust-backed, npm-distributed)",
   "license": "MIT",
   "author": "LlamaIndex",
@@ -15,7 +15,8 @@
   "bin": {
     "parse": "cli/parse.js",
     "search": "cli/search.js",
-    "workspace": "cli/workspace.js"
+    "workspace": "cli/workspace.js",
+    "ask": "cli/ask.js"
   },
   "main": "index.js",
   "scripts": {

diff --git a/src/ask/chat_agent.rs b/src/ask/chat_agent.rs
@@ -9,7 +9,7 @@ use async_openai::{Client, types::chat::CreateChatCompletionRequestArgs};
 use model2vec_rs::model::StaticModel;
 use serde_json::Value;
 
-use crate::ask::system_prompt::SYSTEM_PROMPT;
+use crate::ask::system_prompt::{STDIN_SYSTEM_PROMPT, SYSTEM_PROMPT};
 use crate::ask::tools::{AgentTool, GrepTool, ReadTool, SearchTool};
 use crate::search::SearchConfig;
 
@@ -262,3 +262,59 @@ fn print_tool_summary(response: &str) {
         println!("  → Returned {} lines", response.lines().count());
     }
 }
+
+/// Run an agent with stdin content injected directly (no tools available)
+///
+/// # Arguments
+/// * `stdin_content` - The content from stdin to include in the prompt
+/// * `user_message` - The user's query/message
+/// * `client` - OpenAI API client
+/// * `api_model` - The LLM model to use (e.g., "gpt-4o-mini")
+///
+/// # Returns
+/// The response from the agent as a String
+pub async fn ask_agent_with_stdin(
+    stdin_content: &str,
+    user_message: &str,
+    client: &Client<OpenAIConfig>,
+    api_model: &str,
+) -> Result<String> {
+    // Construct the user message with stdin content
+    let full_message = format!(
+        "<stdin_content>\n{}\n</stdin_content>\n\n{}",
+        stdin_content, user_message
+    );
+
+    // Initialize messages with system prompt and user message (no tools)
+    let messages: Vec<ChatCompletionRequestMessage> = vec![
+        ChatCompletionRequestSystemMessageArgs::default()
+            .content(STDIN_SYSTEM_PROMPT)
+            .build()?
+            .into(),
+        ChatCompletionRequestUserMessage::from(full_message.as_str()).into(),
+    ];
+
+    // Create request without tools
+    let request = CreateChatCompletionRequestArgs::default()
+        .model(api_model)
+        .messages(messages)
+        .build()?;
+
+    // Get response from LLM
+    let response_message = client
+        .chat()
+        .create(request)
+        .await?
+        .choices
+        .first()
+        .ok_or_else(|| anyhow::anyhow!("No choices in response"))?
+        .message
+        .clone();
+
+    // Return the content
+    if let Some(content) = response_message.content {
+        Ok(content)
+    } else {
+        Err(anyhow::anyhow!("No content in response"))
+    }
+}
diff --git a/src/ask/responses_agent.rs b/src/ask/responses_agent.rs
@@ -8,7 +8,7 @@ use async_openai::types::responses::{
 use model2vec_rs::model::StaticModel;
 use serde_json::Value;
 
-use crate::ask::system_prompt::SYSTEM_PROMPT;
+use crate::ask::system_prompt::{STDIN_SYSTEM_PROMPT, SYSTEM_PROMPT};
 use crate::ask::tools::{AgentTool, GrepTool, ReadTool, SearchTool};
 use crate::search::SearchConfig;
 
@@ -278,3 +278,46 @@ fn output_item_to_item(output_item: &OutputItem) -> Result<Item> {
         )),
     }
 }
+
+/// Run an agent with stdin content injected directly using Responses API (no tools available)
+///
+/// # Arguments
+/// * `stdin_content` - The content from stdin to include in the prompt
+/// * `user_message` - The user's query/message
+/// * `client` - OpenAI API client
+/// * `api_model` - The LLM model to use (e.g., "gpt-4.1")
+///
+/// # Returns
+/// The response from the agent as a String
+pub async fn ask_agent_responses_with_stdin(
+    stdin_content: &str,
+    user_message: &str,
+    client: &Client<OpenAIConfig>,
+    api_model: &str,
+) -> Result<String> {
+    // Construct the user message with stdin content
+    let full_message = format!(
+        "<stdin_content>\n{}\n</stdin_content>\n\n{}",
+        stdin_content, user_message
+    );
+
+    // Initialize input items with user message (no tools)
+    let input_items: Vec<InputItem> = vec![InputItem::text_message(Role::User, &full_message)];
+
+    // Create request without tools
+    let request = CreateResponseArgs::default()
+        .max_output_tokens(4096u32)
+        .model(api_model)
+        .input(InputParam::Items(input_items))
+        .instructions(STDIN_SYSTEM_PROMPT)
+        .store(false)
+        .build()?;
+
+    // Get response from LLM
+    let response = client.responses().create(request).await?;
+
+    // Return the text output
+    Ok(response
+        .output_text()
+        .unwrap_or("<No response>".to_string()))
+}
diff --git a/src/ask/system_prompt.rs b/src/ask/system_prompt.rs
@@ -33,3 +33,5 @@ Graph Convolutional Networks are powerful for node classification [1]. The archi
 [4] papers/graphsage.txt:67,234,891
 
 Remember: Every factual claim needs a citation with a specific file path and line number.";
+
+pub const STDIN_SYSTEM_PROMPT: &str = "You are a helpful assistant. The user has provided you with content via stdin, which will be included in their message. Please analyze and respond to their query based on this content.";
diff --git a/src/bin/ask.rs b/src/bin/ask.rs
@@ -3,10 +3,11 @@ use async_openai::Client;
 use async_openai::config::OpenAIConfig;
 use clap::Parser;
 use model2vec_rs::model::StaticModel;
+use std::io::{self, BufRead, IsTerminal};
 
 use semtools::SemtoolsConfig;
-use semtools::ask::chat_agent::ask_agent;
-use semtools::ask::responses_agent::ask_agent_responses;
+use semtools::ask::chat_agent::{ask_agent, ask_agent_with_stdin};
+use semtools::ask::responses_agent::{ask_agent_responses, ask_agent_responses_with_stdin};
 use semtools::config::ApiMode;
 use semtools::search::MODEL_NAME;
 
@@ -41,6 +42,12 @@ struct Args {
     api_mode: Option<String>,
 }
 
+fn read_from_stdin() -> Result<Vec<String>> {
+    let stdin = io::stdin();
+    let lines: Result<Vec<String>, _> = stdin.lock().lines().collect();
+    Ok(lines?)
+}
+
 #[tokio::main]
 async fn main() -> Result<()> {
     let args = Args::parse();
@@ -91,21 +98,56 @@ async fn main() -> Result<()> {
         ask_config.api_mode
     };
 
-    // Load embedding model
-    let model = StaticModel::from_pretrained(
-        MODEL_NAME, // "minishlab/potion-multilingual-128M",
-        None,       // Optional: Hugging Face API token for private models
-        None, // Optional: bool to override model's default normalization. `None` uses model's config.
-        None, // Optional: subfolder if model files are not at the root of the repo/path
-    )?;
-
     // Create OpenAI client
     let mut openai_config = OpenAIConfig::default().with_api_key(api_key);
     if let Some(url) = base_url {
         openai_config = openai_config.with_api_base(url);
     }
     let client = Client::with_config(openai_config);
 
+    // Check if we have stdin input (no files and stdin is not a terminal)
+    if args.files.is_empty() && !io::stdin().is_terminal() {
+        let stdin_lines = read_from_stdin()?;
+        if !stdin_lines.is_empty() {
+            let stdin_content = stdin_lines.join("\n");
+
+            // Run the appropriate agent with stdin content (no tools)
+            let response = match api_mode {
+                ApiMode::Chat => {
+                    ask_agent_with_stdin(&stdin_content, &args.query, &client, &model_name).await?
+                }
+                ApiMode::Responses => {
+                    ask_agent_responses_with_stdin(
+                        &stdin_content,
+                        &args.query,
+                        &client,
+                        &model_name,
+                    )
+                    .await?
+                }
+            };
+
+            println!("\n{}", response);
+            return Ok(());
+        }
+    }
+
+    // If no stdin, we need files to search through
+    if args.files.is_empty() {
+        eprintln!(
+            "Error: No input provided. Either specify files as arguments or pipe input to stdin."
+        );
+        std::process::exit(1);
+    }
+
+    // Load embedding model (only needed for file-based search)
+    let model = StaticModel::from_pretrained(
+        MODEL_NAME, // "minishlab/potion-multilingual-128M",
+        None,       // Optional: Hugging Face API token for private models
+        None, // Optional: bool to override model's default normalization. `None` uses model's config.
+        None, // Optional: subfolder if model files are not at the root of the repo/path
+    )?;
+
     // Run the appropriate agent based on API mode
     let response = match api_mode {
         ApiMode::Chat => {
Original file line number	Diff line number	Diff line change
Expand Up		@@ -33,3 +33,5 @@ Graph Convolutional Networks are powerful for node classification [1]. The archi
		[4] papers/graphsage.txt:67,234,891

		Remember: Every factual claim needs a citation with a specific file path and line number.";

		pub const STDIN_SYSTEM_PROMPT: &str = "You are a helpful assistant. The user has provided you with content via stdin, which will be included in their message. Please analyze and respond to their query based on this content.";