Skip to content
Closed
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions config.example.toml
Original file line number Diff line number Diff line change
Expand Up @@ -353,13 +353,15 @@ max_subagents = 10 # optional (1-20)
# # baidu: 百度 AI Search via qianfan.baidubce.com,需 api_key
# # volcengine: 火山引擎 Ark web_search (免费 2 万次/月), 需 api_key
# # 也回退到 VOLCENGINE_API_KEY / VOLCENGINE_ARK_API_KEY / ARK_API_KEY 环境变量
# base_url = "https://search.example/html/" # optional DuckDuckGo-compatible HTML endpoint
# api_key = "YOUR_SEARCH_KEY" # required for tavily, bocha, and baidu; optional for metaso
# # WARNING: treat config.toml like a secret file when
# # storing API keys. Prefer env vars for local smoke tests.
#
# Env-var overrides:
# DEEPSEEK_SEARCH_PROVIDER → search.provider
# DEEPSEEK_SEARCH_API_KEY → search.api_key
# DEEPSEEK_SEARCH_BASE_URL → search.base_url
# METASO_API_KEY → metaso key fallback
# BAIDU_SEARCH_API_KEY → baidu key fallback

Expand Down
53 changes: 53 additions & 0 deletions crates/tui/src/config.rs
Original file line number Diff line number Diff line change
Expand Up @@ -931,6 +931,11 @@ pub struct SearchConfig {
/// Search provider: `bing` | `duckduckgo` | `tavily` | `bocha` | `metaso` | `baidu` | `volcengine`. Default: `duckduckgo`.
#[serde(default)]
pub provider: Option<SearchProvider>,
/// Optional DuckDuckGo-compatible HTML endpoint. When set with the
/// DuckDuckGo provider, `web_search` appends the `q` query parameter to
/// this URL instead of using `https://html.duckduckgo.com/html/`.
#[serde(default)]
pub base_url: Option<String>,
/// API key for Tavily, Bocha, Metaso, Baidu, or Volcengine. Not required for Bing or DuckDuckGo.
/// Metaso also falls back to `METASO_API_KEY` env var, then a built-in default.
/// Baidu also falls back to `BAIDU_SEARCH_API_KEY` env var.
Expand Down Expand Up @@ -3340,6 +3345,14 @@ fn apply_env_overrides(config: &mut Config) {
.get_or_insert_with(SearchConfig::default)
.api_key = Some(value);
}
if let Ok(value) = std::env::var("DEEPSEEK_SEARCH_BASE_URL")
&& !value.trim().is_empty()
{
config
.search
.get_or_insert_with(SearchConfig::default)
.base_url = Some(value);
}

Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

high

The use of let-chains (if let ... && ...) is currently an unstable feature in Rust (RFC 2497) and will fail to compile on stable Rust compilers. To ensure compatibility with stable Rust, please rewrite this using nested if statements or combinators.

    if let Ok(value) = std::env::var("DEEPSEEK_SEARCH_BASE_URL") {
        if !value.trim().is_empty() {
            config
                .search
                .get_or_insert_with(SearchConfig::default)
                .base_url = Some(value);
        }
    }

if let Ok(value) = std::env::var("DEEPSEEK_REQUIREMENTS_PATH") {
config.requirements_path = Some(value);
}
Expand Down Expand Up @@ -4868,6 +4881,25 @@ mod tests {
);
}

#[test]
fn search_config_preserves_custom_base_url() {
let config: Config = toml::from_str(
r#"
[search]
provider = "duckduckgo"
base_url = "https://search.internal.example/html/"
"#,
)
.expect("search config");

let search = config.search.expect("search table");
assert_eq!(search.provider, Some(SearchProvider::DuckDuckGo));
assert_eq!(
search.base_url.as_deref(),
Some("https://search.internal.example/html/")
);
}

#[test]
fn explicit_baidu_search_provider_is_preserved() {
let config: Config = toml::from_str(
Expand Down Expand Up @@ -5011,6 +5043,27 @@ mod tests {
);
}

#[test]
fn apply_env_overrides_sets_search_base_url() {
let _guard = lock_test_env();
let prev = env::var_os("DEEPSEEK_SEARCH_BASE_URL");
unsafe {
env::set_var(
"DEEPSEEK_SEARCH_BASE_URL",
"https://search.internal.example/html/",
)
};
let mut config = Config::default();

apply_env_overrides(&mut config);

unsafe { EnvGuard::restore_var("DEEPSEEK_SEARCH_BASE_URL", prev) };
assert_eq!(
config.search.and_then(|search| search.base_url),
Some("https://search.internal.example/html/".to_string())
);
}

#[test]
fn search_provider_resolution_ignores_invalid_env_override() {
let _guard = lock_test_env();
Expand Down
4 changes: 4 additions & 0 deletions crates/tui/src/core/engine.rs
Original file line number Diff line number Diff line change
Expand Up @@ -182,6 +182,8 @@ pub struct EngineConfig {
/// Metaso also falls back to `METASO_API_KEY` env var, then a built-in key.
/// Baidu also falls back to `BAIDU_SEARCH_API_KEY`.
pub search_api_key: Option<String>,
/// Optional DuckDuckGo-compatible HTML endpoint override.
pub search_base_url: Option<String>,
/// Per-step DeepSeek API timeout for sub-agent `create_message` requests.
/// Resolved from `[subagents] api_timeout_secs` (clamped to 1..=1800)
/// once at engine construction, then threaded onto every
Expand Down Expand Up @@ -241,6 +243,7 @@ impl Default for EngineConfig {
workshop: None,
search_provider: crate::config::SearchProvider::default(),
search_api_key: None,
search_base_url: None,
subagent_api_timeout: Duration::from_secs(
crate::config::DEFAULT_SUBAGENT_API_TIMEOUT_SECS,
),
Expand Down Expand Up @@ -1711,6 +1714,7 @@ impl Engine {
// Wire search provider config.
ctx.search_provider = self.config.search_provider;
ctx.search_api_key = self.config.search_api_key.clone();
ctx.search_base_url = self.config.search_base_url.clone();

let policy = sandbox_policy_for_mode(mode, &self.session.workspace);
let mut ctx = ctx.with_elevated_sandbox_policy(policy);
Expand Down
3 changes: 3 additions & 0 deletions crates/tui/src/main.rs
Original file line number Diff line number Diff line change
Expand Up @@ -5385,6 +5385,7 @@ async fn run_exec_agent(
workshop: config.workshop.clone(),
search_provider: config.search_provider(),
search_api_key: config.search.as_ref().and_then(|s| s.api_key.clone()),
search_base_url: config.search.as_ref().and_then(|s| s.base_url.clone()),
tools_always_load: config.tools_always_load(),
tools: config.tools.clone(),
};
Expand Down Expand Up @@ -5956,6 +5957,7 @@ mod doctor_endpoint_tests {
let config = Config {
search: Some(crate::config::SearchConfig {
provider: Some(crate::config::SearchProvider::DuckDuckGo),
base_url: None,
api_key: None,
}),
..Default::default()
Expand Down Expand Up @@ -5995,6 +5997,7 @@ mod doctor_endpoint_tests {
let config = Config {
search: Some(crate::config::SearchConfig {
provider: Some(crate::config::SearchProvider::Bing),
base_url: None,
api_key: None,
}),
..Default::default()
Expand Down
1 change: 1 addition & 0 deletions crates/tui/src/runtime_threads.rs
Original file line number Diff line number Diff line change
Expand Up @@ -2026,6 +2026,7 @@ impl RuntimeThreadManager {
workshop: self.config.workshop.clone(),
search_provider: self.config.search_provider(),
search_api_key: self.config.search.as_ref().and_then(|s| s.api_key.clone()),
search_base_url: self.config.search.as_ref().and_then(|s| s.base_url.clone()),
tools_always_load: self.config.tools_always_load(),
tools: self.config.tools.clone(),
};
Expand Down
5 changes: 5 additions & 0 deletions crates/tui/src/tools/spec.rs
Original file line number Diff line number Diff line change
Expand Up @@ -169,6 +169,8 @@ pub struct ToolContext {
/// Metaso also falls back to `METASO_API_KEY` env var, then a built-in key.
/// Baidu also falls back to `BAIDU_SEARCH_API_KEY`.
pub search_api_key: Option<String>,
/// Optional DuckDuckGo-compatible HTML endpoint override for `web_search`.
pub search_base_url: Option<String>,

/// Per-session workshop variable store (#548). Holds the raw content of
/// the most recent large-tool routing event so the parent can call
Expand Down Expand Up @@ -210,6 +212,7 @@ impl ToolContext {
large_output_router: None,
search_provider: crate::config::SearchProvider::default(),
search_api_key: None,
search_base_url: None,
workshop_vars: None,
}
}
Expand Down Expand Up @@ -247,6 +250,7 @@ impl ToolContext {
large_output_router: None,
search_provider: crate::config::SearchProvider::default(),
search_api_key: None,
search_base_url: None,
workshop_vars: None,
}
}
Expand Down Expand Up @@ -284,6 +288,7 @@ impl ToolContext {
large_output_router: None,
search_provider: crate::config::SearchProvider::default(),
search_api_key: None,
search_base_url: None,
workshop_vars: None,
}
}
Expand Down
76 changes: 64 additions & 12 deletions crates/tui/src/tools/web_search.rs
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@
//!
//! Set `[search]` in config.toml to switch providers:
//! provider = "duckduckgo" # or tavily/bocha/metaso/baidu/volcengine
//! base_url = "https://search.example/html/" # optional DDG-compatible URL
//! api_key = "tvly-..."

use super::spec::{
Expand All @@ -22,7 +23,7 @@ use serde_json::{Value, json};
use std::sync::OnceLock;
use std::time::Duration;

const DUCKDUCKGO_HOST: &str = "html.duckduckgo.com";
const DUCKDUCKGO_ENDPOINT: &str = "https://html.duckduckgo.com/html/";
const BING_HOST: &str = "www.bing.com";
const TAVILY_ENDPOINT: &str = "https://api.tavily.com/search";
const BOCHA_ENDPOINT: &str = "https://api.bochaai.com/v1/ai/search";
Expand Down Expand Up @@ -139,7 +140,7 @@ impl ToolSpec for WebSearchTool {
}

fn description(&self) -> &'static str {
"Search the web and return ranked results with URLs and snippets. Default backend is DuckDuckGo with Bing fallback; set `[search] provider = \"bing\" | \"tavily\" | \"bocha\" | \"metaso\" | \"baidu\"` in config.toml to switch backends. Use this instead of scraping search engines with `curl` in `exec_shell`. For a known canonical URL, prefer `fetch_url` directly."
"Search the web and return ranked results with URLs and snippets. Default backend is DuckDuckGo with Bing fallback; set `[search] provider = \"bing\" | \"tavily\" | \"bocha\" | \"metaso\" | \"baidu\"` in config.toml to switch backends, or `[search] base_url` for a DuckDuckGo-compatible endpoint. Use this instead of scraping search engines with `curl` in `exec_shell`. For a known canonical URL, prefer `fetch_url` directly."
}

fn input_schema(&self) -> Value {
Expand Down Expand Up @@ -261,13 +262,16 @@ impl ToolSpec for WebSearchTool {
}

// Per-domain network policy gate (#135). The "host" for web search is
// the upstream search engine domain — DuckDuckGo first, Bing on
// fallback. We gate DuckDuckGo here; Bing is gated separately inside
// the fallback path so a deny on one engine doesn't block the other.
check_policy(decider, DUCKDUCKGO_HOST)?;
// the upstream search engine domain — DuckDuckGo-compatible first,
// Bing on fallback. We gate the configured endpoint here; Bing is
// gated separately inside the fallback path so a deny on one engine
// doesn't silently allow the other.
let (url, duckduckgo_host) =
duckduckgo_search_url(context.search_base_url.as_deref(), &query)?;
let allow_bing_fallback =
duckduckgo_allows_bing_fallback(context.search_base_url.as_deref());
check_policy(decider, &duckduckgo_host)?;

let encoded = url_encode(&query);
let url = format!("https://html.duckduckgo.com/html/?q={encoded}");
let resp = client
.get(&url)
.header(
Expand Down Expand Up @@ -302,7 +306,7 @@ impl ToolSpec for WebSearchTool {
message_suffix = Some("Bing returned no results; used DuckDuckGo fallback");
}

if results.is_empty() {
if results.is_empty() && allow_bing_fallback {
let duckduckgo_blocked = is_duckduckgo_challenge(&body);
// Bing is a separate host — gate it independently so a deny on
// DuckDuckGo doesn't silently let Bing through (and vice versa).
Expand Down Expand Up @@ -1332,6 +1336,30 @@ fn normalize_bing_url(href: &str) -> String {
href.to_string()
}

fn duckduckgo_search_url(
base_url: Option<&str>,
query: &str,
) -> Result<(String, String), ToolError> {
let raw = base_url
.map(str::trim)
.filter(|value| !value.is_empty())
.unwrap_or(DUCKDUCKGO_ENDPOINT);
let mut url = reqwest::Url::parse(raw).map_err(|err| {
ToolError::invalid_input(format!(
"Invalid DuckDuckGo-compatible search base_url: {err}"
))
})?;
url.query_pairs_mut().append_pair("q", query);
let host = url.host_str().ok_or_else(|| {
ToolError::invalid_input("DuckDuckGo-compatible search base_url must include a host")
})?;
Ok((url.to_string(), host.to_string()))
}
Comment thread
greptile-apps[bot] marked this conversation as resolved.

fn duckduckgo_allows_bing_fallback(base_url: Option<&str>) -> bool {
base_url.is_none_or(|value| value.trim().is_empty())
}
Comment on lines +1375 to +1377

Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

medium

The Option::is_none_or method was stabilized in Rust 1.82.0. If the project targets or supports older Rust versions (MSRV < 1.82.0), this will cause compilation errors. Using map_or is fully compatible with older Rust versions and is equally idiomatic.

fn duckduckgo_allows_bing_fallback(base_url: Option<&str>) -> bool {
    base_url.map_or(true, |value| value.trim().is_empty())
}


fn normalize_text(text: &str) -> String {
let stripped = strip_html_tags(text);
let decoded = decode_html_entities(&stripped);
Expand Down Expand Up @@ -1435,9 +1463,9 @@ fn extract_query_param(url: &str, key: &str) -> Option<String> {
mod tests {
use super::{
ERROR_BODY_PREVIEW_BYTES, WebSearchEntry, WebSearchTool, baidu_search_payload,
decode_html_entities, extract_search_query, is_likely_spam_results, normalize_bing_url,
optional_search_max_results, parse_baidu_results, root_domain, sanitize_error_body,
truncate_error_body, volcengine_extract_text,
decode_html_entities, duckduckgo_search_url, extract_search_query, is_likely_spam_results,
normalize_bing_url, optional_search_max_results, parse_baidu_results, root_domain,
sanitize_error_body, truncate_error_body, volcengine_extract_text,
};
use serde_json::json;

Expand Down Expand Up @@ -1969,4 +1997,28 @@ mod tests {
"should not complain about missing API key (built-in default); got `{msg}`"
);
}

#[test]
fn duckduckgo_compatible_url_uses_custom_base_url_and_preserves_query() {
let (url, host) = duckduckgo_search_url(
Some("https://search.internal.example/html/?region=us"),
"rust async",
)
.expect("custom duckduckgo-compatible url");

assert_eq!(host, "search.internal.example");
assert_eq!(
url,
"https://search.internal.example/html/?region=us&q=rust+async"
);
}

#[test]
fn custom_duckduckgo_endpoint_disables_public_bing_fallback() {
assert!(super::duckduckgo_allows_bing_fallback(None));
assert!(super::duckduckgo_allows_bing_fallback(Some(" ")));
assert!(!super::duckduckgo_allows_bing_fallback(Some(
"https://search.internal.example/html/"
)));
}
}
1 change: 1 addition & 0 deletions crates/tui/src/tui/ui.rs
Original file line number Diff line number Diff line change
Expand Up @@ -779,6 +779,7 @@ fn build_engine_config(app: &App, config: &Config) -> EngineConfig {
workshop: config.workshop.clone(),
search_provider: config.search_provider(),
search_api_key: config.search.as_ref().and_then(|s| s.api_key.clone()),
search_base_url: config.search.as_ref().and_then(|s| s.base_url.clone()),
tools_always_load: config.tools_always_load(),
tools: config.tools.clone(),
}
Expand Down
6 changes: 6 additions & 0 deletions docs/CONFIGURATION.md
Original file line number Diff line number Diff line change
Expand Up @@ -819,6 +819,11 @@ parseable results. Bing remains selectable for users who explicitly want it,
and Tavily, Bocha, Metaso, or Baidu can be selected when an API-backed provider
is preferred.

For a private/internal search service that serves DuckDuckGo-compatible HTML,
keep `provider = "duckduckgo"` and set `base_url`; CodeWhale appends the `q`
query parameter to that endpoint and applies network policy to its host.
Custom endpoints do not fall back to public Bing.

**Metaso** ([metaso.cn](https://metaso.cn)) has a 100 searches/day free quota;
set `METASO_API_KEY` or `[search] api_key` for a higher quota.

Expand All @@ -830,6 +835,7 @@ only; it does not add a Baidu model provider.
```toml
[search]
provider = "baidu" # duckduckgo | bing | tavily | bocha | metaso | baidu
# base_url = "https://search.example/html/" # optional with provider = "duckduckgo"
# api_key = "YOUR_KEY" # required for tavily, bocha, and baidu; optional for metaso
```

Expand Down
Loading