Skip to content

Commit

Permalink
SWEBench agent example (#23)
Browse files Browse the repository at this point in the history
* Add `createRoutingAgent`, refactor state, messages, and routing

* Don't set routing model, and always inject into run if unset

* Allow tool choice of "auto", "any", or forcing of particular tools

* WIP: SWEBench

* Fix `step` type errors

* Make stateful `.withModel()` return a new `Agent`

* nit in readfile tool

* Updates to swebench to add code editing agents

* Add example swebench using tree-sitter to edit code.

---------

Co-authored-by: Jack Williams <[email protected]>
  • Loading branch information
tonyhb and jpwilliams authored Dec 11, 2024
1 parent 4ce0c98 commit 7e65697
Show file tree
Hide file tree
Showing 18 changed files with 1,307 additions and 343 deletions.
8 changes: 4 additions & 4 deletions demo/inngest.ts
Original file line number Diff line number Diff line change
Expand Up @@ -22,17 +22,17 @@ export const inngest = new Inngest({
});

export const fn = inngest.createFunction(
{ id: "agent" },
{ id: "agent", retries: 0, },
{ event: "agent/run" },
async ({ event, step }) => {
const model = openai({ model: "gpt-4", step });

// 1. Single agent

// Run a single agent as a prompt without a network.
await codeWritingAgent.run(event.data.input, {
model,
});
// await codeWritingAgent.run(event.data.input, {
// model,
// });

// 2. A network of agents that works together
const network = createNetwork({
Expand Down
2 changes: 1 addition & 1 deletion eslint.config.mjs
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@ import tseslint from "typescript-eslint";

export default tseslint.config(
{
ignores: ["dist", "eslint.config.mjs", "demo"],
ignores: ["dist", "eslint.config.mjs", "demo", "examples"],
},
eslint.configs.recommended,
tseslint.configs.recommendedTypeChecked,
Expand Down
2 changes: 2 additions & 0 deletions examples/swebench/.gitignore
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
node_modules
opt/
4 changes: 4 additions & 0 deletions examples/swebench/Makefile
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
.PHONY: init
init:
mkdir ./opt/
wget -O ./opt/dev.parquet https://huggingface.co/datasets/princeton-nlp/SWE-bench_Lite/resolve/main/data/dev-00000-of-00001.parquet?download=true
6 changes: 6 additions & 0 deletions examples/swebench/agents/setup.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
import { createAgent } from "../../../src";

createAgent({
name: "setup",
system: "This is a system prompt",
});
23 changes: 23 additions & 0 deletions examples/swebench/index.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,23 @@
import express from "express";
import { serve } from "inngest/express";
import { fn, inngest } from "./inngest";

const app = express();
const port = 3001;

// Important: ensure you add JSON middleware to process incoming JSON POST payloads.
app.use(express.json({limit: '50mb'}));

app.use(
// Expose the middleware on our recommended path at `/api/inngest`.
"/api/inngest",
// eslint-disable-next-line @typescript-eslint/no-unsafe-argument
serve({
client: inngest,
functions: [fn],
}),
);

app.listen(port, () => {
console.log(`App listening on port ${port}`);
});
190 changes: 190 additions & 0 deletions examples/swebench/inngest.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,190 @@
/* eslint-disable @typescript-eslint/no-unused-vars */
import fs from "fs";
import { execSync } from 'child_process';
import { z } from "zod";
import {
createAgent,
createNetwork,
createTool,
anthropic,
State,
} from "../../src/index";
import { extractClassAndFns, listFilesTool, readFileTool, replaceClassMethodTool } from "./tools/tools";
import { Inngest, EventSchemas } from "inngest";

export const inngest = new Inngest({
id: "agents",
schemas: new EventSchemas().fromZod({
"swebench/run": {
data: z.object({
repo: z.string(),
base_commit: z.string(),
environment_setup_commit: z.string(),
problem_statement: z.string(),
})
},
}),
});

export const fn = inngest.createFunction(
{ id: "agent", retries: 2, },
{ event: "swebench/run" },
async ({ event, step }) => {

// This is some basic stuff to initialize and set up the repos
// for the swebench test.
//
// First, we clone the repo, then we ensure we're on the correct base commit.
const dir = `./opt/${event.data.repo}`;
await step.run("clone repo", async () => {
// Check if the dir already exists.
if (fs.existsSync(dir)) {
return
}
console.log("creating repo");
fs.mkdirSync(dir, { recursive: true });
execSync(`cd ${dir} && git init`);
execSync(`cd ${dir} && git remote add origin [email protected]:${event.data.repo}.git`);
});

await step.run("check out commit", async () => {
console.log("checking out commit");
execSync(`cd ${dir} && git fetch origin ${event.data.base_commit} --depth=1`);
execSync(`cd ${dir} && git reset --hard FETCH_HEAD`);
});


const model = anthropic({
model: "claude-3-5-haiku-latest",
max_tokens: 1000,
step: step as any,
});

const state = new State();
state.kv.set("repo", event.data.repo);

const network = createNetwork({
agents: [planningAgent.withModel(model), editingAgent.withModel(model)],
defaultModel: model,
state,
});
await network.run(event.data.problem_statement, (opts) => {
if (opts.network.state.kv.get("done")) {
// We're done editing.
return;
}

if (opts.network.state.kv.get("plan") !== undefined) {
return editingAgent.withModel(model);
}
return planningAgent.withModel(model);
});
},
);

// Now that the setup has been completed, we can run the agent properly within that repo.
const planningAgent = createAgent({
name: "Planner",
description: "Plans the code to write and which files should be edited",
tools: [
listFilesTool,
readFileTool,
extractClassAndFns,

createTool({
name: "create_plan",
description: "Describe a formal plan for how to fix the issue, including which files to edit and reasoning.",
parameters: z.object({
thoughts: z.string(),
plan_details: z.string(),
edits: z.array(z.object({
filename: z.string(),
idea: z.string(),
reasoning: z.string(),
}))
}),

handler: async (plan, opts) => {
// Store this in the function state for introspection in tracing.
await opts.step.run("plan created", () => plan);
opts.network?.state.kv.set("plan", plan);
},
}),
],

system: (network) => `
You are an expert Python programmer working on a specific project: ${network?.state.kv.get("repo")}.
You are given an issue reported within the project. You are planning how to fix the issue by investigating the report,
the current code, then devising a "plan" - a spec - to modify code to fix the issue.
Your plan will be worked on and implemented after you create it. You MUST create a plan to
fix the issue. Be thorough. Think step-by-step using available tools.
Techniques you may use to create a plan:
- Read entire files
- Find specific classes and functions within a file
`,
})

/**
* the editingAgent is enabled once a plan has been written. It disregards all conversation history
* and uses the plan from the current network state to construct a system prompt to edit the given
* files to resolve the input.
*/
const editingAgent = createAgent({
name: "Editor",
description: "Edits code by replacing contents in files, or creating new files with new code.",
tools: [
extractClassAndFns,
replaceClassMethodTool,
readFileTool,

createTool({
name: "done",
description: "Saves the current project and finishes editing",
handler: (_input, opts) => {
opts.network?.state.kv.delete("plan");
opts.network?.state.kv.set("done", true);
return "Done editing";
},
}),
],
lifecycle: {

// The editing agent is only enabled once we have a plan.
enabled: (opts) => {
return opts.network?.state.kv.get("plan") !== undefined;
},

// onStart is called when we start inference. We want to update the history here to remove
// things from the planning agent. We update the system prompt to include details from the
// plan via network state.
onStart: ({ agent, prompt, network }) => {

const history = (network?.state.results || []).
filter(i => i.agent === agent). // Return the current history from this agent only.
map(i => i.output.concat(i.toolCalls)). // Only add the output and tool calls to the conversation history
flat();

return { prompt, history, stop: false };
},
},

system: (network) => `
You are an expert Python programmer working on a specific project: ${network?.state.kv.get("repo")}. You have been
given a plan to fix the given issue supplied by the user.
The current plan is:
<plan>
${JSON.stringify(network?.state.kv.get("plan"))}
</plan>
You MUST:
- Understand the user's request
- Understand the given plan
- Write code using the tools available to fix the issue
Once the files have been edited and you are confident in the updated code, you MUST finish your editing via calling the "done" tool.
`,
})
17 changes: 17 additions & 0 deletions examples/swebench/package.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,17 @@
{
"name": "swebench",
"version": "1.0.0",
"description": "",
"main": "index.js",
"scripts": {
"test": "echo \"Error: no test specified\" && exit 1"
},
"keywords": [],
"author": "",
"license": "ISC",
"dependencies": {
"inngest": "^3.27.4",
"tree-sitter": "^0.22.1",
"tree-sitter-python": "^0.23.5"
}
}
Loading

0 comments on commit 7e65697

Please sign in to comment.