Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat (tools): additional tools in sql toolkit #200

Closed
wants to merge 1 commit into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
106 changes: 106 additions & 0 deletions examples/agents/data_analyst.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,106 @@
import { WatsonXChatLLM } from "bee-agent-framework/adapters/watsonx/chat";
import { BeeAgent } from "bee-agent-framework/agents/bee/agent";
import { BeeSystemPrompt } from "bee-agent-framework/agents/bee/prompts";
import { FrameworkError } from "bee-agent-framework/errors";
import { TokenMemory } from "bee-agent-framework/memory/tokenMemory";
import { SQLTool } from "bee-agent-framework/tools/database/sql";
import "dotenv/config.js";
import { createConsoleReader } from "examples/helpers/io.js";

const llm = WatsonXChatLLM.fromPreset("meta-llama/llama-3-1-8b-instruct", {
apiKey: process.env.WATSONX_API_KEY,
projectId: process.env.WATSONX_PROJECT_ID,
parameters: {
decoding_method: "greedy",
max_new_tokens: 1500,
},
});

const sqlTool = new SQLTool({
provider: "excel",
connection: {
storage: "/Users/eto/Downloads/Trial Balance - 2020-2023.xlsx",
},
});

// const sqlTool = new SQLTool({
// provider: "sqlite",
// examples: [
// {
// question: "Get wild albums",
// query: "SELECT * FROM Album where Title = 'Restless and Wild' LIMIT 1",
// },
// ],
// connection: {
// dialect: "sqlite",
// logging: false,
// storage: await fetch(
// "https://github.com/lerocha/chinook-database/releases/download/v1.4.5/chinook_sqlite.sqlite",
// ).then(async (response) => {
// if (!response.ok) {
// throw new Error("Failed to download Chinook database!");
// }

// const dbPath = path.join(os.tmpdir(), "bee_chinook.sqlite");
// const data = Buffer.from(await response.arrayBuffer());
// await fs.promises.writeFile(dbPath, data);
// return dbPath;
// }),
// },
// });

const agent = new BeeAgent({
llm,
templates: {
system: BeeSystemPrompt.fork((old) => ({
...old,
defaults: {
instructions:
"You are the Bee 🐝 Data Agent! If the user asks about data questions, use the FlowPilot tool, passing the user natural language question as 'question' to the tool. Do not render ascii tables, the user will already see it externally.",
},
})),
},

memory: new TokenMemory({ llm }),
tools: [sqlTool],
});

const reader = createConsoleReader();

try {
for await (const { prompt } of reader) {
const response = await agent
.run(
{ prompt },
{
execution: {
maxRetriesPerStep: 5,
totalMaxRetries: 10,
maxIterations: 15,
},
},
)
.observe((emitter) => {
emitter.on("error", ({ error }) => {
console.log(`Agent 🤖 : `, FrameworkError.ensure(error).dump());
});
emitter.on("retry", () => {
console.log(`Agent 🤖 : `, "retrying the action...");
});
emitter.on("update", async ({ data, update, meta }) => {
if (update.key === "tool_output" && update.value.includes('"results":')) {
const results = JSON.parse(update.value).results;
console.table(results);
} else {
console.log(`Agent (${update.key}) 🤖 : `, update.value);
}
});
});

console.log(`Agent 🤖 : `, response.result.text);
}
} catch (error) {
console.error(FrameworkError.ensure(error).dump());
} finally {
await sqlTool.destroy();
}
8 changes: 4 additions & 4 deletions examples/agents/sql.ts
Original file line number Diff line number Diff line change
@@ -1,12 +1,12 @@
import "dotenv/config.js";
import { BeeAgent } from "bee-agent-framework/agents/bee/agent";
import { GroqChatLLM } from "bee-agent-framework/adapters/groq/chat";
import { SQLTool } from "bee-agent-framework/tools/database/sql";
import { BeeAgent } from "bee-agent-framework/agents/bee/agent";
import { FrameworkError } from "bee-agent-framework/errors";
import { UnconstrainedMemory } from "bee-agent-framework/memory/unconstrainedMemory";
import { SQLTool } from "bee-agent-framework/tools/database/sql";
import "dotenv/config.js";
import fs from "node:fs";
import * as path from "node:path";
import os from "node:os";
import * as path from "node:path";

const llm = new GroqChatLLM({
modelId: "llama-3.1-70b-versatile",
Expand Down
4 changes: 4 additions & 0 deletions package.json
Original file line number Diff line number Diff line change
Expand Up @@ -166,12 +166,15 @@
"@connectrpc/connect-node": "^1.6.1",
"@opentelemetry/api": "^1.9.0",
"@streamparser/json": "^0.0.21",
"@xenova/transformers": "^2.17.2",
"ajv": "^8.17.1",
"ajv-formats": "^3.0.1",
"bee-proto": "0.0.2",
"csv-parse": "^5.6.0",
"duck-duck-scrape": "^2.2.6",
"fast-xml-parser": "^4.5.0",
"header-generator": "^2.1.56",
"hnswlib-node": "^3.0.0",
"joplin-turndown-plugin-gfm": "^1.0.12",
"js-yaml": "^4.1.0",
"json-schema-to-typescript": "^15.0.2",
Expand All @@ -189,6 +192,7 @@
"string-strip-html": "^13.4.8",
"turndown": "^7.2.0",
"wikipedia": "^2.1.2",
"xlsx": "^0.18.5",
"zod": "^3.23.8",
"zod-to-json-schema": "^3.23.3"
},
Expand Down
115 changes: 115 additions & 0 deletions src/tools/database/metadata.ts
Original file line number Diff line number Diff line change
Expand Up @@ -16,8 +16,10 @@

import { ToolError } from "@/tools/base.js";
import { Sequelize } from "sequelize";
import { ColumnType } from "./sql.js";

export type Provider = "mysql" | "mariadb" | "postgres" | "mssql" | "db2" | "sqlite" | "oracle";
export type PublicProvider = Provider | "excel" | "csv";

export interface Metadata {
tableName: string;
Expand Down Expand Up @@ -143,3 +145,116 @@ function getDefaultSchema(provider: Provider): string {
return "";
}
}

export async function searchColumnValues(
sequelize: Sequelize,
columns: ColumnType[],
searchValues?: string[],
limit = 30,
): Promise<ColumnType[]> {
try {
const results: ColumnType[] = [];

const searchPromises = columns.map(async ({ table, name }) => {
const searchResults = await getSearchResults(sequelize, table, name, searchValues, limit);
return { table, name, values: searchResults };
});

await Promise.all(searchPromises);

return results;
} catch (error) {
throw new ToolError(`Error searching column values: ${error}`, [], {
isRetryable: false,
});
}
}

async function getSearchResults(
sequelize: Sequelize,
tableName: string,
columnName: string,
searchValues?: string[],
limit = 10,
): Promise<string[]> {
const searchResults: string[] = [];
const distinctResults: string[] = [];

if (searchValues && searchValues.length > 0) {
const searchPromises = searchValues.map(async (searchValue) => {
const searchPatterns = createSearchPatterns(searchValue);
const searchPatternPromises = searchPatterns.map(async (pattern) => {
const [results] = await sequelize.query(
`
SELECT DISTINCT "${columnName}"
FROM "${tableName}"
WHERE "${columnName}" LIKE :pattern
LIMIT 10
`,
{
replacements: { pattern },
raw: true,
},
);

return results.map((r: any) => r[columnName]);
});

const allSearchResults = await Promise.all(searchPatternPromises);
allSearchResults.forEach((results) => {
searchResults.push(...results);
if (searchResults.length >= 10) {
return;
}
});
});

await Promise.all(searchPromises);
}

if (searchResults.length < limit) {
const [distinct] = await sequelize.query(
`
SELECT DISTINCT "${columnName}"
FROM "${tableName}"
LIMIT :limit
`,
{
replacements: { limit: limit - searchResults.length },
raw: true,
},
);

distinctResults.push(...distinct.map((r: any) => r[columnName]));
}

return deduplicateAndSortResults(searchResults, distinctResults, limit);
}

function createSearchPatterns(searchValue: string): string[] {
return [
searchValue, // Exact match
`%${searchValue}%`, // Contains
`${searchValue}%`, // Starts with
`%${searchValue}`, // Ends with
searchValue.replace(/\s+/g, "%"), // Words separated by wildcards
searchValue.toLowerCase(), // Lowercase
searchValue.toUpperCase(), // Uppercase
];
}

function deduplicateAndSortResults(
searchResults: string[],
distinctResults: string[],
limit: number,
): string[] {
const combined = [...new Set([...searchResults, ...distinctResults])];
const searchSet = new Set(searchResults);
return combined
.sort((a, b) => {
const aInSearch = searchSet.has(a);
const bInSearch = searchSet.has(b);
return aInSearch === bInSearch ? 0 : aInSearch ? -1 : 1;
})
.slice(0, limit);
}
4 changes: 2 additions & 2 deletions src/tools/database/sql.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -14,10 +14,10 @@
* limitations under the License.
*/

import { beforeEach, expect, vi } from "vitest";
import { SQLTool } from "@/tools/database/sql.js";
import { getMetadata } from "@/tools/database/metadata.js";
import { SQLTool } from "@/tools/database/sql.js";
import { Sequelize } from "sequelize";
import { beforeEach, expect, vi } from "vitest";

vi.mock("@/tools/database/metadata.js", () => ({
getMetadata: vi.fn(),
Expand Down
Loading