From b4771f55832c4b59db3348c394957b9b2eeae592 Mon Sep 17 00:00:00 2001 From: clankpan Date: Mon, 19 Jan 2026 11:39:48 +0900 Subject: [PATCH 1/2] feat: export-all command --- dfx.json | 6 ++-- rust/cli_defs.rs | 19 +++++++++++++ rust/clients/memory.rs | 18 ++++++++++++ rust/commands/export_all.rs | 57 +++++++++++++++++++++++++++++++++++++ rust/commands/mod.rs | 2 ++ 5 files changed, 99 insertions(+), 3 deletions(-) create mode 100644 rust/commands/export_all.rs diff --git a/dfx.json b/dfx.json index 84879cf..2deaa50 100644 --- a/dfx.json +++ b/dfx.json @@ -2,8 +2,8 @@ "canisters": { "launcher": { "type": "custom", - "candid": "https://github.com/ICME-Lab/kinic-cli/releases/download/v0.2.4-beta/service.did", - "wasm" : "https://github.com/ICME-Lab/kinic-cli/releases/download/v0.2.4-beta/launcher.wasm.gz" + "candid": "https://github.com/ICME-Lab/kinic-cli/releases/download/v0.2.5-beta/service.did", + "wasm" : "https://github.com/ICME-Lab/kinic-cli/releases/download/v0.2.5-beta/launcher.wasm.gz" }, "icrc1_ledger_canister": { "type": "custom", @@ -25,4 +25,4 @@ }, "output_env_file": ".env", "version": 1 -} +} \ No newline at end of file diff --git a/rust/cli_defs.rs b/rust/cli_defs.rs index 1774482..4793299 100644 --- a/rust/cli_defs.rs +++ b/rust/cli_defs.rs @@ -69,6 +69,8 @@ pub enum Command { SearchRaw(SearchRawArgs), #[command(about = "Fetch embeddings for a tag from a memory canister")] TaggedEmbeddings(TaggedEmbeddingsArgs), + #[command(about = "Export all embeddings and texts from a memory canister")] + ExportAll(ExportAllArgs), #[command(about = "Manage Kinic CLI configuration")] Config(ConfigArgs), #[command(about = "Update a memory canister instance")] @@ -217,6 +219,23 @@ pub struct TaggedEmbeddingsArgs { pub tag: String, } +#[derive(Args, Debug)] +pub struct ExportAllArgs { + #[arg( + long, + required = true, + help = "Principal of the memory canister to export" + )] + pub memory_id: String, + + #[arg( + long, + value_name = "PATH", + help = "Write JSON output to a file instead of stdout" + )] + pub out: Option, +} + #[derive(Args, Debug)] pub struct ConfigArgs { #[arg( diff --git a/rust/clients/memory.rs b/rust/clients/memory.rs index c945fd8..207532f 100644 --- a/rust/clients/memory.rs +++ b/rust/clients/memory.rs @@ -56,6 +56,21 @@ impl MemoryClient { Ok(results) } + pub async fn export_all(&self) -> Result, String)>> { + let payload = encode_export_all_args()?; + let response = self + .agent + .query(&self.canister_id, "export_all") + .with_arg(payload) + .call() + .await + .context("Failed to call export_all on memory canister")?; + + let results = Decode!(&response, Vec<(u32, Vec, String)>) + .context("Failed to decode export_all response")?; + Ok(results) + } + pub async fn add_new_user(&self, principal: Principal, role: u8) -> Result<()> { let payload = encode_add_user_args(principal, role)?; self.agent @@ -99,6 +114,9 @@ fn encode_add_user_args(principal: Principal, role: u8) -> Result> { fn encode_tagged_embeddings_args(tag: String) -> Result> { Ok(candid::encode_one(tag)?) } +fn encode_export_all_args() -> Result> { + Ok(candid::encode_args(())?) +} fn encode_reset_args(dim: usize) -> Result> { Ok(candid::encode_one(dim)?) } diff --git a/rust/commands/export_all.rs b/rust/commands/export_all.rs new file mode 100644 index 0000000..11a4ffc --- /dev/null +++ b/rust/commands/export_all.rs @@ -0,0 +1,57 @@ +use anyhow::{Context, Result}; +use ic_agent::export::Principal; +use serde_json::{Value, to_string_pretty, value::RawValue}; +use tracing::info; +use std::fs; + +use crate::{cli::ExportAllArgs, clients::memory::MemoryClient}; + +use super::CommandContext; + +pub async fn handle(args: ExportAllArgs, ctx: &CommandContext) -> Result<()> { + let client = build_memory_client(&args.memory_id, ctx).await?; + let entries = client.export_all().await?; + let entry_count = entries.len(); + let exported: Vec = entries + .into_iter() + .map(|(id, embedding, data)| { + let embedding = serde_json::value::to_raw_value(&embedding)?; + Ok(ExportEntry { + id, + embedding, + data: serde_json::from_str(&data).unwrap_or(Value::Null), + }) + }) + .collect::>>()?; + + info!( + canister_id = %client.canister_id(), + entry_count, + "export-all fetched" + ); + + let payload = to_string_pretty(&exported)?; + if let Some(path) = &args.out { + fs::write(path, payload).with_context(|| { + format!("Failed to write export-all output to {}", path.display()) + })?; + println!("Wrote export-all output to {}", path.display()); + } else { + println!("{payload}"); + } + Ok(()) +} + +#[derive(serde::Serialize)] +struct ExportEntry { + id: u32, + embedding: Box, + data: Value, +} + +async fn build_memory_client(id: &str, ctx: &CommandContext) -> Result { + let agent = ctx.agent_factory.build().await?; + let memory = + Principal::from_text(id).context("Failed to parse canister id for export-all command")?; + Ok(MemoryClient::new(agent, memory)) +} diff --git a/rust/commands/mod.rs b/rust/commands/mod.rs index fec93a5..45bb90d 100644 --- a/rust/commands/mod.rs +++ b/rust/commands/mod.rs @@ -7,6 +7,7 @@ pub mod balance; pub mod config; pub mod convert_pdf; pub mod create; +pub mod export_all; pub mod ii_login; pub mod insert; pub mod insert_raw; @@ -34,6 +35,7 @@ pub async fn run_command(command: Command, ctx: CommandContext) -> Result<()> { Command::Search(args) => search::handle(args, &ctx).await, Command::SearchRaw(args) => search_raw::handle(args, &ctx).await, Command::TaggedEmbeddings(args) => tagged_embeddings::handle(args, &ctx).await, + Command::ExportAll(args) => export_all::handle(args, &ctx).await, Command::ConvertPdf(args) => convert_pdf::handle(args).await, Command::Config(args) => config::handle(args, &ctx).await, Command::Update(args) => update::handle(args, &ctx).await, From 932bae06bb8e512000d084e1bae3bc12327f0480 Mon Sep 17 00:00:00 2001 From: clankpan Date: Mon, 19 Jan 2026 11:52:40 +0900 Subject: [PATCH 2/2] feat: python method --- README.md | 22 ++++++++++++++++++++++ docs/cli.md | 13 +++++++++++++ python/kinic_py/__init__.py | 2 ++ python/kinic_py/memories.py | 14 ++++++++++++++ rust/lib.rs | 13 +++++++++++++ rust/python.rs | 9 +++++++++ 6 files changed, 73 insertions(+) diff --git a/README.md b/README.md index b6bbc0a..91b2d9e 100644 --- a/README.md +++ b/README.md @@ -201,6 +201,15 @@ cargo run -- --identity update \ --memory-id ``` +## Export all entries (CLI) + +Export all `(id, embedding, data)` tuples as pretty-printed JSON: +```bash +cargo run -- --identity export-all \ + --memory-id \ + --out ./export.json +``` + ## Check token balance (CLI) Query the ledger for the current identity’s balance (base units): @@ -253,6 +262,18 @@ Search memories with semantic similarity. **Returns:** List of `(score, payload)` tuples sorted by relevance +#### `export_all(memory_id: str) -> List[Tuple[int, List[float], str]]` +Export all stored entries from the memory canister. + +**Returns:** List of `(id, embedding, data_json_string)` tuples + +**Example:** +```python +rows = km.export_all(memory_id) +for entry_id, embedding, data_json in rows: + print(entry_id, len(embedding)) +``` + #### `ask_ai(memory_id: str, query: str, top_k: int | None = None, language: str | None = None) -> Tuple[str, str]` Run the Ask AI flow: search, build an LLM prompt, and return `(prompt, answer)` where `answer` is the `` section from the chat endpoint. @@ -274,6 +295,7 @@ Stateless alternatives available: - `insert_pdf_file(identity, memory_id, tag, path, ic=False)` - `insert_pdf(identity, memory_id, tag, path, ic=False)` - `search_memories(identity, memory_id, query, ic=False)` +- `export_all(identity, memory_id, ic=False)` - `ask_ai(identity, memory_id, query, top_k=None, language=None, ic=False)` - `get_balance(identity, ic=False)` - `update_instance(identity, memory_id, ic=False)` diff --git a/docs/cli.md b/docs/cli.md index a31ec1e..d47b424 100644 --- a/docs/cli.md +++ b/docs/cli.md @@ -125,6 +125,16 @@ cargo run -- --identity alice search \ The CLI fetches an embedding for the query and prints the scored matches returned by the memory canister. +### Export all entries + +Export all stored entries as pretty-printed JSON. Use `--out` to write to a file (omit it to print to stdout): + +```bash +cargo run -- --identity alice export-all \ + --memory-id yta6k-5x777-77774-aaaaa-cai \ + --out ./export.json +``` + ### Manage config (add user) Grant a role for a user on a memory canister: @@ -192,6 +202,9 @@ memory_id = km.create("Demo", "Created from Python") km.insert_markdown(memory_id, "notes", "# Hello Kinic!") results = km.search(memory_id, "Hello") +# Export all entries +rows = km.export_all(memory_id) + # Ask AI (returns prompt and the text only) prompt, answer = km.ask_ai(memory_id, "What did we say?", top_k=3, language="en") diff --git a/python/kinic_py/__init__.py b/python/kinic_py/__init__.py index 2557063..eca327c 100644 --- a/python/kinic_py/__init__.py +++ b/python/kinic_py/__init__.py @@ -17,6 +17,7 @@ search_memories, search_raw, tagged_embeddings, + export_all, ask_ai, get_balance, update_instance, @@ -38,6 +39,7 @@ "search_memories", "search_raw", "tagged_embeddings", + "export_all", "ask_ai", "get_balance", "update_instance", diff --git a/python/kinic_py/memories.py b/python/kinic_py/memories.py index 25488d9..508fc1d 100644 --- a/python/kinic_py/memories.py +++ b/python/kinic_py/memories.py @@ -8,6 +8,7 @@ from . import _lib as native ScoreResult = Sequence[Tuple[float, str]] +ExportResult = Sequence[Tuple[int, List[float], str]] class KinicMemories: @@ -68,6 +69,10 @@ def tagged_embeddings(self, memory_id: str, tag: str) -> List[List[float]]: """Fetch all embeddings associated with a tag.""" return tagged_embeddings(self.identity, memory_id, tag, ic=self.ic) + def export_all(self, memory_id: str) -> ExportResult: + """Export all embeddings and data from a memory canister.""" + return export_all(self.identity, memory_id, ic=self.ic) + def ask_ai( self, memory_id: str, @@ -228,6 +233,15 @@ def tagged_embeddings( return native.tagged_embeddings(identity, memory_id, tag, ic=ic) +def export_all( + identity: str, + memory_id: str, + *, + ic: bool | None = None, +) -> ExportResult: + return native.export_all(identity, memory_id, ic=ic) + + def ask_ai( identity: str, memory_id: str, diff --git a/rust/lib.rs b/rust/lib.rs index f0c977f..a3ca964 100644 --- a/rust/lib.rs +++ b/rust/lib.rs @@ -103,6 +103,7 @@ fn _lib(_py: Python<'_>, m: &Bound<'_, PyModule>) -> PyResult<()> { m.add_function(wrap_pyfunction!(search_memories, m)?)?; m.add_function(wrap_pyfunction!(search_memories_raw, m)?)?; m.add_function(wrap_pyfunction!(tagged_embeddings, m)?)?; + m.add_function(wrap_pyfunction!(export_all, m)?)?; m.add_function(wrap_pyfunction!(ask_ai, m)?)?; m.add_function(wrap_pyfunction!(get_balance, m)?)?; m.add_function(wrap_pyfunction!(update_instance, m)?)?; @@ -268,6 +269,18 @@ fn tagged_embeddings( )) } +#[cfg(feature = "python-bindings")] +#[pyfunction] +#[pyo3(signature = (identity, memory_id, ic=None))] +fn export_all(identity: &str, memory_id: &str, ic: Option) -> PyResult, String)>> { + let ic = ic.unwrap_or(false); + block_on_py(python::export_all( + ic, + identity.to_string(), + memory_id.to_string(), + )) +} + #[cfg(feature = "python-bindings")] #[pyfunction] #[pyo3(signature = (identity, memory_id, query, top_k=None, language=None, ic=None))] diff --git a/rust/python.rs b/rust/python.rs index bff9314..426b1de 100644 --- a/rust/python.rs +++ b/rust/python.rs @@ -135,6 +135,15 @@ pub(crate) async fn tagged_embeddings( client.tagged_embeddings(tag).await } +pub(crate) async fn export_all( + use_mainnet: bool, + identity: String, + memory_id: String, +) -> Result, String)>> { + let client = build_memory_client(use_mainnet, identity, memory_id).await?; + client.export_all().await +} + pub(crate) async fn ask_ai( use_mainnet: bool, identity: String,