Skip to content

Commit

Permalink
feat(tools): additional tools in SQL toolkit
Browse files Browse the repository at this point in the history
Signed-off-by: Enrico Toniato <[email protected]>
  • Loading branch information
tonxxd committed Dec 3, 2024
1 parent 226aaf5 commit 064ecbc
Show file tree
Hide file tree
Showing 9 changed files with 11,117 additions and 14,233 deletions.
106 changes: 106 additions & 0 deletions examples/agents/data_analyst.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,106 @@
import { WatsonXChatLLM } from "bee-agent-framework/adapters/watsonx/chat";
import { BeeAgent } from "bee-agent-framework/agents/bee/agent";
import { BeeSystemPrompt } from "bee-agent-framework/agents/bee/prompts";
import { FrameworkError } from "bee-agent-framework/errors";
import { TokenMemory } from "bee-agent-framework/memory/tokenMemory";
import { SQLTool } from "bee-agent-framework/tools/database/sql";
import "dotenv/config.js";
import { createConsoleReader } from "examples/helpers/io.js";

const llm = WatsonXChatLLM.fromPreset("meta-llama/llama-3-1-8b-instruct", {
apiKey: process.env.WATSONX_API_KEY,
projectId: process.env.WATSONX_PROJECT_ID,
parameters: {
decoding_method: "greedy",
max_new_tokens: 1500,
},
});

const sqlTool = new SQLTool({
provider: "excel",
connection: {
storage: "/Users/eto/Downloads/Trial Balance - 2020-2023.xlsx",
},
});

// const sqlTool = new SQLTool({
// provider: "sqlite",
// examples: [
// {
// question: "Get wild albums",
// query: "SELECT * FROM Album where Title = 'Restless and Wild' LIMIT 1",
// },
// ],
// connection: {
// dialect: "sqlite",
// logging: false,
// storage: await fetch(
// "https://github.com/lerocha/chinook-database/releases/download/v1.4.5/chinook_sqlite.sqlite",
// ).then(async (response) => {
// if (!response.ok) {
// throw new Error("Failed to download Chinook database!");
// }

// const dbPath = path.join(os.tmpdir(), "bee_chinook.sqlite");
// const data = Buffer.from(await response.arrayBuffer());
// await fs.promises.writeFile(dbPath, data);
// return dbPath;
// }),
// },
// });

const agent = new BeeAgent({
llm,
templates: {
system: BeeSystemPrompt.fork((old) => ({
...old,
defaults: {
instructions:
"You are the Bee 🐝 Data Agent! If the user asks about data questions, use the FlowPilot tool, passing the user natural language question as 'question' to the tool. Do not render ascii tables, the user will already see it externally.",
},
})),
},

memory: new TokenMemory({ llm }),
tools: [sqlTool],
});

const reader = createConsoleReader();

try {
for await (const { prompt } of reader) {
const response = await agent
.run(
{ prompt },
{
execution: {
maxRetriesPerStep: 5,
totalMaxRetries: 10,
maxIterations: 15,
},
},
)
.observe((emitter) => {
emitter.on("error", ({ error }) => {
console.log(`Agent 🤖 : `, FrameworkError.ensure(error).dump());
});
emitter.on("retry", () => {
console.log(`Agent 🤖 : `, "retrying the action...");
});
emitter.on("update", async ({ data, update, meta }) => {
if (update.key === "tool_output" && update.value.includes('"results":')) {
const results = JSON.parse(update.value).results;
console.table(results);
} else {
console.log(`Agent (${update.key}) 🤖 : `, update.value);
}
});
});

console.log(`Agent 🤖 : `, response.result.text);
}
} catch (error) {
console.error(FrameworkError.ensure(error).dump());
} finally {
await sqlTool.destroy();
}
8 changes: 4 additions & 4 deletions examples/agents/sql.ts
Original file line number Diff line number Diff line change
@@ -1,12 +1,12 @@
import "dotenv/config.js";
import { BeeAgent } from "bee-agent-framework/agents/bee/agent";
import { GroqChatLLM } from "bee-agent-framework/adapters/groq/chat";
import { SQLTool } from "bee-agent-framework/tools/database/sql";
import { BeeAgent } from "bee-agent-framework/agents/bee/agent";
import { FrameworkError } from "bee-agent-framework/errors";
import { UnconstrainedMemory } from "bee-agent-framework/memory/unconstrainedMemory";
import { SQLTool } from "bee-agent-framework/tools/database/sql";
import "dotenv/config.js";
import fs from "node:fs";
import * as path from "node:path";
import os from "node:os";
import * as path from "node:path";

const llm = new GroqChatLLM({
modelId: "llama-3.1-70b-versatile",
Expand Down
4 changes: 4 additions & 0 deletions package.json
Original file line number Diff line number Diff line change
Expand Up @@ -166,12 +166,15 @@
"@connectrpc/connect-node": "^1.6.1",
"@opentelemetry/api": "^1.9.0",
"@streamparser/json": "^0.0.21",
"@xenova/transformers": "^2.17.2",
"ajv": "^8.17.1",
"ajv-formats": "^3.0.1",
"bee-proto": "0.0.2",
"csv-parse": "^5.6.0",
"duck-duck-scrape": "^2.2.6",
"fast-xml-parser": "^4.5.0",
"header-generator": "^2.1.56",
"hnswlib-node": "^3.0.0",
"joplin-turndown-plugin-gfm": "^1.0.12",
"js-yaml": "^4.1.0",
"json-schema-to-typescript": "^15.0.2",
Expand All @@ -189,6 +192,7 @@
"string-strip-html": "^13.4.8",
"turndown": "^7.2.0",
"wikipedia": "^2.1.2",
"xlsx": "^0.18.5",
"zod": "^3.23.8",
"zod-to-json-schema": "^3.23.3"
},
Expand Down
115 changes: 115 additions & 0 deletions src/tools/database/metadata.ts
Original file line number Diff line number Diff line change
Expand Up @@ -16,8 +16,10 @@

import { ToolError } from "@/tools/base.js";
import { Sequelize } from "sequelize";
import { ColumnType } from "./sql.js";

export type Provider = "mysql" | "mariadb" | "postgres" | "mssql" | "db2" | "sqlite" | "oracle";
export type PublicProvider = Provider | "excel" | "csv";

export interface Metadata {
tableName: string;
Expand Down Expand Up @@ -143,3 +145,116 @@ function getDefaultSchema(provider: Provider): string {
return "";
}
}

export async function searchColumnValues(
sequelize: Sequelize,
columns: ColumnType[],
searchValues?: string[],
limit = 30,
): Promise<ColumnType[]> {
try {
const results: ColumnType[] = [];

const searchPromises = columns.map(async ({ table, name }) => {
const searchResults = await getSearchResults(sequelize, table, name, searchValues, limit);
return { table, name, values: searchResults };
});

await Promise.all(searchPromises);

return results;
} catch (error) {
throw new ToolError(`Error searching column values: ${error}`, [], {
isRetryable: false,
});
}
}

async function getSearchResults(
sequelize: Sequelize,
tableName: string,
columnName: string,
searchValues?: string[],
limit = 10,
): Promise<string[]> {
const searchResults: string[] = [];
const distinctResults: string[] = [];

if (searchValues && searchValues.length > 0) {
const searchPromises = searchValues.map(async (searchValue) => {
const searchPatterns = createSearchPatterns(searchValue);
const searchPatternPromises = searchPatterns.map(async (pattern) => {
const [results] = await sequelize.query(
`
SELECT DISTINCT "${columnName}"
FROM "${tableName}"
WHERE "${columnName}" LIKE :pattern
LIMIT 10
`,
{
replacements: { pattern },
raw: true,
},
);

return results.map((r: any) => r[columnName]);
});

const allSearchResults = await Promise.all(searchPatternPromises);
allSearchResults.forEach((results) => {
searchResults.push(...results);
if (searchResults.length >= 10) {
return;
}
});
});

await Promise.all(searchPromises);
}

if (searchResults.length < limit) {
const [distinct] = await sequelize.query(
`
SELECT DISTINCT "${columnName}"
FROM "${tableName}"
LIMIT :limit
`,
{
replacements: { limit: limit - searchResults.length },
raw: true,
},
);

distinctResults.push(...distinct.map((r: any) => r[columnName]));
}

return deduplicateAndSortResults(searchResults, distinctResults, limit);
}

function createSearchPatterns(searchValue: string): string[] {
return [
searchValue, // Exact match
`%${searchValue}%`, // Contains
`${searchValue}%`, // Starts with
`%${searchValue}`, // Ends with
searchValue.replace(/\s+/g, "%"), // Words separated by wildcards
searchValue.toLowerCase(), // Lowercase
searchValue.toUpperCase(), // Uppercase
];
}

function deduplicateAndSortResults(
searchResults: string[],
distinctResults: string[],
limit: number,
): string[] {
const combined = [...new Set([...searchResults, ...distinctResults])];
const searchSet = new Set(searchResults);
return combined
.sort((a, b) => {
const aInSearch = searchSet.has(a);
const bInSearch = searchSet.has(b);
return aInSearch === bInSearch ? 0 : aInSearch ? -1 : 1;
})
.slice(0, limit);
}
4 changes: 2 additions & 2 deletions src/tools/database/sql.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -14,10 +14,10 @@
* limitations under the License.
*/

import { beforeEach, expect, vi } from "vitest";
import { SQLTool } from "@/tools/database/sql.js";
import { getMetadata } from "@/tools/database/metadata.js";
import { SQLTool } from "@/tools/database/sql.js";
import { Sequelize } from "sequelize";
import { beforeEach, expect, vi } from "vitest";

vi.mock("@/tools/database/metadata.js", () => ({
getMetadata: vi.fn(),
Expand Down
Loading

0 comments on commit 064ecbc

Please sign in to comment.