SWEBench agent example (#23)

* Add `createRoutingAgent`, refactor state, messages, and routing * Don't set routing model, and always inject into run if unset * Allow tool choice of "auto", "any", or forcing of particular tools * WIP: SWEBench * Fix `step` type errors * Make stateful `.withModel()` return a new `Agent` * nit in readfile tool * Updates to swebench to add code editing agents * Add example swebench using tree-sitter to edit code. --------- Co-authored-by: Jack Williams <[email protected]>
inngest · Dec 11, 2024 · 7e65697 · 7e65697
1 parent 4ce0c98
commit 7e65697
Show file tree

Hide file tree

Showing 18 changed files with 1,307 additions and 343 deletions.
diff --git a/demo/inngest.ts b/demo/inngest.ts
@@ -22,17 +22,17 @@ export const inngest = new Inngest({
 });
 
 export const fn = inngest.createFunction(
-  { id: "agent" },
+  { id: "agent", retries: 0, },
   { event: "agent/run" },
   async ({ event, step }) => {
     const model = openai({ model: "gpt-4", step });
 
     //  1. Single agent
 
     // Run a single agent as a prompt without a network.
-    await codeWritingAgent.run(event.data.input, {
-      model,
-    });
+    // await codeWritingAgent.run(event.data.input, {
+    //   model,
+    // });
 
     //  2. A network of agents that works together
     const network = createNetwork({

diff --git a/eslint.config.mjs b/eslint.config.mjs
@@ -6,7 +6,7 @@ import tseslint from "typescript-eslint";
 
 export default tseslint.config(
   {
-    ignores: ["dist", "eslint.config.mjs", "demo"],
+    ignores: ["dist", "eslint.config.mjs", "demo", "examples"],
   },
   eslint.configs.recommended,
   tseslint.configs.recommendedTypeChecked,

diff --git a/examples/swebench/.gitignore b/examples/swebench/.gitignore
@@ -0,0 +1,2 @@
+node_modules
+opt/
diff --git a/examples/swebench/Makefile b/examples/swebench/Makefile
@@ -0,0 +1,4 @@
+.PHONY: init
+init:
+	mkdir ./opt/
+	wget -O ./opt/dev.parquet https://huggingface.co/datasets/princeton-nlp/SWE-bench_Lite/resolve/main/data/dev-00000-of-00001.parquet?download=true
diff --git a/examples/swebench/agents/setup.ts b/examples/swebench/agents/setup.ts
@@ -0,0 +1,6 @@
+import { createAgent } from "../../../src";
+
+createAgent({
+  name: "setup",
+  system: "This is a system prompt",
+});
diff --git a/examples/swebench/index.ts b/examples/swebench/index.ts
@@ -0,0 +1,23 @@
+import express from "express";
+import { serve } from "inngest/express";
+import { fn, inngest } from "./inngest";
+
+const app = express();
+const port = 3001;
+
+// Important:  ensure you add JSON middleware to process incoming JSON POST payloads.
+app.use(express.json({limit: '50mb'}));
+
+app.use(
+  // Expose the middleware on our recommended path at `/api/inngest`.
+  "/api/inngest",
+  // eslint-disable-next-line @typescript-eslint/no-unsafe-argument
+  serve({
+    client: inngest,
+    functions: [fn],
+  }),
+);
+
+app.listen(port, () => {
+  console.log(`App listening on port ${port}`);
+});
diff --git a/examples/swebench/inngest.ts b/examples/swebench/inngest.ts
@@ -0,0 +1,190 @@
+/* eslint-disable @typescript-eslint/no-unused-vars */
+import fs from "fs";
+import { execSync } from 'child_process';
+import { z } from "zod";
+import {
+  createAgent,
+  createNetwork,
+  createTool,
+  anthropic,
+  State,
+} from "../../src/index";
+import { extractClassAndFns, listFilesTool, readFileTool, replaceClassMethodTool } from "./tools/tools";
+import { Inngest, EventSchemas } from "inngest";
+
+export const inngest = new Inngest({
+  id: "agents",
+  schemas: new EventSchemas().fromZod({
+    "swebench/run": {
+      data: z.object({
+        repo: z.string(),
+        base_commit: z.string(),
+        environment_setup_commit: z.string(),
+        problem_statement: z.string(),
+      })
+    },
+  }),
+});
+
+export const fn = inngest.createFunction(
+  { id: "agent", retries: 2, },
+  { event: "swebench/run" },
+  async ({ event, step }) => {
+
+    // This is some basic stuff to initialize and set up the repos
+    // for the swebench test.
+    //
+    // First, we clone the repo, then we ensure we're on the correct base commit.
+    const dir = `./opt/${event.data.repo}`;
+    await step.run("clone repo", async () => {
+      // Check if the dir already exists.
+      if (fs.existsSync(dir)) {
+        return
+      }
+      console.log("creating repo");
+      fs.mkdirSync(dir, { recursive: true });
+      execSync(`cd ${dir} && git init`);
+      execSync(`cd ${dir} && git remote add origin [email protected]:${event.data.repo}.git`);
+    });
+
+    await step.run("check out commit", async () => {
+      console.log("checking out commit");
+      execSync(`cd ${dir} && git fetch origin ${event.data.base_commit} --depth=1`);
+      execSync(`cd ${dir} && git reset --hard FETCH_HEAD`);
+    });
+
+
+    const model = anthropic({
+      model: "claude-3-5-haiku-latest",
+      max_tokens: 1000,
+      step: step as any,
+    });
+
+    const state = new State();
+    state.kv.set("repo", event.data.repo);
+
+    const network = createNetwork({
+      agents: [planningAgent.withModel(model), editingAgent.withModel(model)],
+      defaultModel: model,
+      state,
+    });
+    await network.run(event.data.problem_statement, (opts) => {
+      if (opts.network.state.kv.get("done")) {
+        // We're done editing.
+        return;
+      }
+
+      if (opts.network.state.kv.get("plan") !== undefined) {
+        return editingAgent.withModel(model);
+      }
+      return planningAgent.withModel(model);
+    });
+  },
+);
+
+// Now that the setup has been completed, we can run the agent properly within that repo.
+const planningAgent = createAgent({
+  name: "Planner",
+  description: "Plans the code to write and which files should be edited",
+  tools: [
+    listFilesTool,
+    readFileTool,
+    extractClassAndFns,
+
+    createTool({
+      name: "create_plan",
+      description: "Describe a formal plan for how to fix the issue, including which files to edit and reasoning.",
+      parameters: z.object({
+        thoughts: z.string(),
+        plan_details: z.string(),
+        edits: z.array(z.object({
+          filename: z.string(),
+          idea: z.string(),
+          reasoning: z.string(),
+        }))
+      }),
+
+      handler: async (plan, opts) => {
+        // Store this in the function state for introspection in tracing.
+        await opts.step.run("plan created", () => plan);
+        opts.network?.state.kv.set("plan", plan);
+      },
+    }),
+  ],
+
+  system: (network) => `
+    You are an expert Python programmer working on a specific project: ${network?.state.kv.get("repo")}.
+
+    You are given an issue reported within the project.  You are planning how to fix the issue by investigating the report,
+    the current code, then devising a "plan" - a spec - to modify code to fix the issue.
+
+    Your plan will be worked on and implemented after you create it.   You MUST create a plan to
+    fix the issue.  Be thorough. Think step-by-step using available tools.
+
+    Techniques you may use to create a plan:
+    - Read entire files
+    - Find specific classes and functions within a file
+  `,
+})
+
+/**
+ * the editingAgent is enabled once a plan has been written.  It disregards all conversation history
+ * and uses the plan from the current network state to construct a system prompt to edit the given
+ * files to resolve the input.
+ */
+const editingAgent = createAgent({
+  name: "Editor",
+  description: "Edits code by replacing contents in files, or creating new files with new code.",
+  tools: [
+    extractClassAndFns,
+    replaceClassMethodTool,
+    readFileTool,
+
+    createTool({
+      name: "done",
+      description: "Saves the current project and finishes editing",
+      handler: (_input, opts) => {
+        opts.network?.state.kv.delete("plan");
+        opts.network?.state.kv.set("done", true);
+        return "Done editing";
+      },
+    }),
+  ],
+  lifecycle: {
+
+    // The editing agent is only enabled once we have a plan.
+    enabled: (opts) => {
+      return opts.network?.state.kv.get("plan") !== undefined;
+    },
+
+    // onStart is called when we start inference.  We want to update the history here to remove
+    // things from the planning agent.  We update the system prompt to include details from the
+    // plan via network state.
+    onStart: ({ agent, prompt, network }) => {
+
+      const history = (network?.state.results || []).
+        filter(i => i.agent === agent). // Return the current history from this agent only.
+        map(i => i.output.concat(i.toolCalls)). // Only add the output and tool calls to the conversation history
+        flat();
+
+      return { prompt, history, stop: false };
+    },
+  },
+
+  system: (network) => `
+    You are an expert Python programmer working on a specific project: ${network?.state.kv.get("repo")}.  You have been
+    given a plan to fix the given issue supplied by the user.
+
+    The current plan is:
+    <plan>
+      ${JSON.stringify(network?.state.kv.get("plan"))}
+    </plan>
+
+    You MUST:
+      - Understand the user's request
+      - Understand the given plan
+      - Write code using the tools available to fix the issue
+
+    Once the files have been edited and you are confident in the updated code, you MUST finish your editing via calling the "done" tool.
+  `,
+})
diff --git a/examples/swebench/package.json b/examples/swebench/package.json
@@ -0,0 +1,17 @@
+{
+  "name": "swebench",
+  "version": "1.0.0",
+  "description": "",
+  "main": "index.js",
+  "scripts": {
+    "test": "echo \"Error: no test specified\" && exit 1"
+  },
+  "keywords": [],
+  "author": "",
+  "license": "ISC",
+  "dependencies": {
+    "inngest": "^3.27.4",
+    "tree-sitter": "^0.22.1",
+    "tree-sitter-python": "^0.23.5"
+  }
+}