Skip to content
Open
Show file tree
Hide file tree
Changes from 4 commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions package-lock.json

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

68 changes: 42 additions & 26 deletions src/app/api/generate/providers/gemini.ts
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,25 @@ export const MODEL_MAP: Record<ModelType, string> = {
"nano-banana-2": "gemini-3.1-flash-image-preview",
};

/**
* Convert a base64 data URL image to Gemini inlineData format
*/
function imageToInlineData(
requestId: string,
image: string,
label: string
): { inlineData: { mimeType: string; data: string } } {
if (image.includes("base64,")) {
const [header, data] = image.split("base64,");
const mimeMatch = header.match(/data:([^;]+)/);
const mimeType = mimeMatch ? mimeMatch[1] : "image/png";
console.log(`[API:${requestId}] Image ${label}: ${mimeType}, ${(data.length / 1024).toFixed(1)}KB`);
return { inlineData: { mimeType, data } };
}
console.log(`[API:${requestId}] Image ${label}: raw, ${(image.length / 1024).toFixed(1)}KB`);
return { inlineData: { mimeType: "image/png", data: image } };
}

/**
* Generate image using Gemini API (legacy/default path)
*/
Expand All @@ -30,37 +49,34 @@ export async function generateWithGemini(
aspectRatio?: string,
resolution?: string,
useGoogleSearch?: boolean,
useImageSearch?: boolean
useImageSearch?: boolean,
multimodalParts?: Array<{ type: string; value: string; name?: string }>
): Promise<NextResponse<GenerateResponse>> {
console.log(`[API:${requestId}] Gemini generation - Model: ${model}, Images: ${images?.length || 0}, Prompt: ${prompt?.length || 0} chars`);

// Extract base64 data and MIME types from data URLs
const imageData = (images || []).map((image, idx) => {
if (image.includes("base64,")) {
const [header, data] = image.split("base64,");
// Extract MIME type from header (e.g., "data:image/png;" -> "image/png")
const mimeMatch = header.match(/data:([^;]+)/);
const mimeType = mimeMatch ? mimeMatch[1] : "image/png";
console.log(`[API:${requestId}] Image ${idx + 1}: ${mimeType}, ${(data.length / 1024).toFixed(1)}KB`);
return { data, mimeType };
}
console.log(`[API:${requestId}] Image ${idx + 1}: raw, ${(image.length / 1024).toFixed(1)}KB`);
return { data: image, mimeType: "image/png" };
});
console.log(`[API:${requestId}] Gemini generation - Model: ${model}, Images: ${images?.length || 0}, Prompt: ${prompt?.length || 0} chars, Parts: ${multimodalParts?.length || 0}`);

// Initialize Gemini client
const ai = new GoogleGenAI({ apiKey });

// Build request parts array with prompt and all images
const requestParts: Array<{ text: string } | { inlineData: { mimeType: string; data: string } }> = [
{ text: prompt },
...imageData.map(({ data, mimeType }) => ({
inlineData: {
mimeType,
data,
},
})),
];
// Build request parts array — use multimodal parts if provided, otherwise legacy prompt+images
type GeminiPart = { text: string } | { inlineData: { mimeType: string; data: string } };
let requestParts: GeminiPart[];

if (multimodalParts && multimodalParts.length > 0) {
// Build interleaved multimodal request from image variable parts
requestParts = multimodalParts.map((part) => {
if (part.type === "image" && part.value) {
return imageToInlineData(requestId, part.value, part.name || "var");
}
return { text: part.value };
});
} else {
// Legacy: prompt text + all images appended
const imageData = (images || []).map((image, idx) => imageToInlineData(requestId, image, `${idx + 1}`));
requestParts = [
{ text: prompt },
...imageData,
];
}

// Build config object based on model capabilities
const config: Record<string, unknown> = {
Expand Down
6 changes: 4 additions & 2 deletions src/app/api/generate/route.ts
Original file line number Diff line number Diff line change
Expand Up @@ -100,7 +100,8 @@ export async function POST(request: NextRequest) {
parameters,
dynamicInputs,
mediaType,
} = body;
parts,
} = body as MultiProviderGenerateRequest & { parts?: Array<{ type: string; value: string; name?: string }> };

// Prompt is required unless:
// - Provided via dynamicInputs
Expand Down Expand Up @@ -524,7 +525,8 @@ export async function POST(request: NextRequest) {
aspectRatio,
resolution,
useGoogleSearch,
useImageSearch
useImageSearch,
parts as Array<{ type: string; value: string; name?: string }> | undefined
);
} catch (error) {
// Extract error information
Expand Down
56 changes: 30 additions & 26 deletions src/app/api/llm/route.ts
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,15 @@ import { logger } from "@/utils/logger";

export const maxDuration = 60; // 1 minute timeout

// Convert a base64 data URL image to Gemini inlineData format
function imageDataToInlinePart(img: string): { inlineData: { mimeType: string; data: string } } {
const matches = img.match(/^data:(.+?);base64,(.+)$/);
if (matches) {
return { inlineData: { mimeType: matches[1], data: matches[2] } };
}
return { inlineData: { mimeType: "image/png", data: img } };
}

// Generate a unique request ID for tracking
function generateRequestId(): string {
return `llm-${Date.now()}-${Math.random().toString(36).substring(2, 9)}`;
Expand Down Expand Up @@ -36,7 +45,8 @@ async function generateWithGoogle(
maxTokens: number,
images?: string[],
requestId?: string,
userApiKey?: string | null
userApiKey?: string | null,
parts?: Array<{ type: string; value: string; name?: string }>
): Promise<string> {
// User-provided key takes precedence over env variable
const apiKey = userApiKey || process.env.GEMINI_API_KEY;
Expand All @@ -55,31 +65,24 @@ async function generateWithGoogle(
maxTokens,
imageCount: images?.length || 0,
promptLength: prompt.length,
partsCount: parts?.length || 0,
});

// Build multimodal content if images are provided
let contents: string | Array<{ inlineData: { mimeType: string; data: string } } | { text: string }>;
if (images && images.length > 0) {
// Build multimodal content
type GeminiPart = { inlineData: { mimeType: string; data: string } } | { text: string };
let contents: string | GeminiPart[];

if (parts && parts.length > 0) {
// Interleaved multimodal parts from image variable resolution
contents = parts.map((part): GeminiPart => {
if (part.type === "image" && part.value) {
return imageDataToInlinePart(part.value);
}
return { text: part.value };
});
} else if (images && images.length > 0) {
contents = [
...images.map((img) => {
// Extract base64 data and mime type from data URL
const matches = img.match(/^data:(.+?);base64,(.+)$/);
if (matches) {
return {
inlineData: {
mimeType: matches[1],
data: matches[2],
},
};
}
// Fallback: assume PNG if no data URL prefix
return {
inlineData: {
mimeType: "image/png",
data: img,
},
};
}),
...images.map((img) => imageDataToInlinePart(img)),
{ text: prompt },
];
} else {
Expand Down Expand Up @@ -298,14 +301,15 @@ export async function POST(request: NextRequest) {
const openaiApiKey = request.headers.get("X-OpenAI-API-Key");
const anthropicApiKey = request.headers.get("X-Anthropic-API-Key");

const body: LLMGenerateRequest = await request.json();
const body = await request.json() as LLMGenerateRequest & { parts?: Array<{ type: string; value: string; name?: string }> };
const {
prompt,
images,
provider,
model,
temperature = 0.7,
maxTokens = 1024
maxTokens = 1024,
parts,
} = body;

logger.info('api.llm', 'LLM generation request received', {
Expand All @@ -330,7 +334,7 @@ export async function POST(request: NextRequest) {
let text: string;

if (provider === "google") {
text = await generateWithGoogle(prompt, model, temperature, maxTokens, images, requestId, geminiApiKey);
text = await generateWithGoogle(prompt, model, temperature, maxTokens, images, requestId, geminiApiKey, parts);
} else if (provider === "openai") {
text = await generateWithOpenAI(prompt, model, temperature, maxTokens, images, requestId, openaiApiKey);
} else if (provider === "anthropic") {
Expand Down
9 changes: 9 additions & 0 deletions src/components/ConnectionDropMenu.tsx
Original file line number Diff line number Diff line change
Expand Up @@ -109,6 +109,15 @@ const IMAGE_TARGET_OPTIONS: MenuOption[] = [
</svg>
),
},
{
type: "promptConstructor",
label: "Prompt Constructor",
icon: (
<svg className="w-4 h-4" fill="none" viewBox="0 0 24 24" stroke="currentColor" strokeWidth={1.5}>
<path strokeLinecap="round" strokeLinejoin="round" d="M14.25 6.087c0-.355.186-.676.401-.959.221-.29.349-.634.349-1.003 0-1.036-1.007-1.875-2.25-1.875s-2.25.84-2.25 1.875c0 .369.128.713.349 1.003.215.283.401.604.401.959v0a.64.64 0 01-.657.643 48.39 48.39 0 01-4.163-.3c.186 1.613.293 3.25.315 4.907a.656.656 0 01-.658.663v0c-.355 0-.676-.186-.959-.401a1.647 1.647 0 00-1.003-.349c-1.036 0-1.875 1.007-1.875 2.25s.84 2.25 1.875 2.25c.369 0 .713-.128 1.003-.349.283-.215.604-.401.959-.401v0c.31 0 .555.26.532.57a48.039 48.039 0 01-.642 5.056c1.518.19 3.058.309 4.616.354a.64.64 0 00.657-.643v0c0-.355-.186-.676-.401-.959a1.647 1.647 0 01-.349-1.003c0-1.035 1.008-1.875 2.25-1.875 1.243 0 2.25.84 2.25 1.875 0 .369-.128.713-.349 1.003-.215.283-.4.604-.4.959v0c0 .333.277.599.61.58a48.1 48.1 0 005.427-.63 48.05 48.05 0 00.582-4.717.532.532 0 00-.533-.57v0c-.355 0-.676.186-.959.401-.29.221-.634.349-1.003.349-1.035 0-1.875-1.007-1.875-2.25s.84-2.25 1.875-2.25c.37 0 .713.128 1.003.349.283.215.604.401.96.401v0a.656.656 0 00.658-.663 48.422 48.422 0 00-.37-5.36c-1.886.342-3.81.574-5.766.689a.578.578 0 01-.61-.58v0z" />
</svg>
),
},
Comment on lines +121 to +129
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

⚠️ Potential issue | 🟠 Major

Image→Prompt Constructor menu path currently creates an unconnected node

Line 112 exposes promptConstructor in the image-target menu, but handleMenuSelect (in src/components/WorkflowCanvas.tsx) does not map handleType === "image" + nodeType === "promptConstructor" to a target handle. The node is created, but the dropped connection is not created.

💡 Suggested fix (in src/components/WorkflowCanvas.tsx)
-      } else if (handleType === "image") {
+      } else if (handleType === "image") {
         if (nodeType === "annotation" || nodeType === "output" || nodeType === "splitGrid" || nodeType === "outputGallery" || nodeType === "imageCompare") {
           targetHandleId = "image";
           // annotation also has an image output
           if (nodeType === "annotation") {
             sourceHandleIdForNewNode = "image";
           }
-        } else if (nodeType === "nanoBanana" || nodeType === "generateVideo") {
+        } else if (nodeType === "nanoBanana" || nodeType === "generateVideo" || nodeType === "promptConstructor") {
           targetHandleId = "image";
         } else if (nodeType === "imageInput") {
           sourceHandleIdForNewNode = "image";
         }
       }
📝 Committable suggestion

‼️ IMPORTANT
Carefully review the code before committing. Ensure that it accurately replaces the highlighted code, contains no missing lines, and has no issues with indentation. Thoroughly test & benchmark the code to ensure it meets the requirements.

Suggested change
{
type: "promptConstructor",
label: "Prompt Constructor",
icon: (
<svg className="w-4 h-4" fill="none" viewBox="0 0 24 24" stroke="currentColor" strokeWidth={1.5}>
<path strokeLinecap="round" strokeLinejoin="round" d="M14.25 6.087c0-.355.186-.676.401-.959.221-.29.349-.634.349-1.003 0-1.036-1.007-1.875-2.25-1.875s-2.25.84-2.25 1.875c0 .369.128.713.349 1.003.215.283.401.604.401.959v0a.64.64 0 01-.657.643 48.39 48.39 0 01-4.163-.3c.186 1.613.293 3.25.315 4.907a.656.656 0 01-.658.663v0c-.355 0-.676-.186-.959-.401a1.647 1.647 0 00-1.003-.349c-1.036 0-1.875 1.007-1.875 2.25s.84 2.25 1.875 2.25c.369 0 .713-.128 1.003-.349.283-.215.604-.401.959-.401v0c.31 0 .555.26.532.57a48.039 48.039 0 01-.642 5.056c1.518.19 3.058.309 4.616.354a.64.64 0 00.657-.643v0c0-.355-.186-.676-.401-.959a1.647 1.647 0 01-.349-1.003c0-1.035 1.008-1.875 2.25-1.875 1.243 0 2.25.84 2.25 1.875 0 .369-.128.713-.349 1.003-.215.283-.4.604-.4.959v0c0 .333.277.599.61.58a48.1 48.1 0 005.427-.63 48.05 48.05 0 00.582-4.717.532.532 0 00-.533-.57v0c-.355 0-.676.186-.959.401-.29.221-.634.349-1.003.349-1.035 0-1.875-1.007-1.875-2.25s.84-2.25 1.875-2.25c.37 0 .713.128 1.003.349.283.215.604.401.96.401v0a.656.656 0 00.658-.663 48.422 48.422 0 00-.37-5.36c-1.886.342-3.81.574-5.766.689a.578.578 0 01-.61-.58v0z" />
</svg>
),
},
} else if (handleType === "image") {
if (nodeType === "annotation" || nodeType === "output" || nodeType === "splitGrid" || nodeType === "outputGallery" || nodeType === "imageCompare") {
targetHandleId = "image";
// annotation also has an image output
if (nodeType === "annotation") {
sourceHandleIdForNewNode = "image";
}
} else if (nodeType === "nanoBanana" || nodeType === "generateVideo" || nodeType === "promptConstructor") {
targetHandleId = "image";
} else if (nodeType === "imageInput") {
sourceHandleIdForNewNode = "image";
}
}
🤖 Prompt for AI Agents
Verify each finding against the current code and only fix it if needed.

In `@src/components/ConnectionDropMenu.tsx` around lines 112 - 120, The menu item
with type "promptConstructor" is being added from the image-target menu but
handleMenuSelect in WorkflowCanvas.tsx doesn't map the combination handleType
=== "image" and nodeType === "promptConstructor" to a target handle, so the
dropped node is created without a connection; update handleMenuSelect (or the
mapping it uses) to detect nodeType "promptConstructor" when handleType is
"image" and assign the correct target handle id (the node's input/target handle
name used by the Prompt Constructor node), then create the edge/connection using
the same code path used for other image→node mappings so the connection is
established on drop.

];

const TEXT_TARGET_OPTIONS: MenuOption[] = [
Expand Down
84 changes: 84 additions & 0 deletions src/components/Header.tsx
Original file line number Diff line number Diff line change
Expand Up @@ -68,6 +68,7 @@ export function Header() {
isSaving,
setWorkflowMetadata,
saveToFile,
saveWorkflow,
loadWorkflow,
previousWorkflowSnapshot,
revertToSnapshot,
Expand All @@ -83,6 +84,7 @@ export function Header() {
isSaving: state.isSaving,
setWorkflowMetadata: state.setWorkflowMetadata,
saveToFile: state.saveToFile,
saveWorkflow: state.saveWorkflow,
loadWorkflow: state.loadWorkflow,
previousWorkflowSnapshot: state.previousWorkflowSnapshot,
revertToSnapshot: state.revertToSnapshot,
Expand All @@ -94,6 +96,7 @@ export function Header() {
const [showProjectModal, setShowProjectModal] = useState(false);
const [projectModalMode, setProjectModalMode] = useState<"new" | "settings">("new");
const fileInputRef = useRef<HTMLInputElement>(null);
const uploadInputRef = useRef<HTMLInputElement>(null);

const isProjectConfigured = !!workflowName;
const canSave = !!(workflowId && workflowName && saveDirectoryPath);
Expand All @@ -119,6 +122,35 @@ export function Header() {
fileInputRef.current?.click();
};

const handleDownloadWorkflow = () => {
saveWorkflow(workflowName || undefined);
};

const handleUploadWorkflow = () => {
uploadInputRef.current?.click();
};

const handleUploadFileChange = (e: React.ChangeEvent<HTMLInputElement>) => {
const file = e.target.files?.[0];
if (!file) return;

const reader = new FileReader();
reader.onload = async (event) => {
try {
const workflow = JSON.parse(event.target?.result as string) as WorkflowFile;
if (workflow.version && workflow.nodes && workflow.edges) {
await loadWorkflow(workflow);
} else {
Comment on lines +133 to +143
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

⚠️ Potential issue | 🟠 Major

Strengthen JSON schema checks before calling loadWorkflow.

At Line 141, truthy checks allow malformed payloads through (e.g., non-array nodes, edges missing source/target/id), which can crash inside loadWorkflow.

🔧 Proposed fix
+  const isValidWorkflowImport = (value: unknown): value is WorkflowFile => {
+    if (!value || typeof value !== "object") return false;
+    const wf = value as Partial<WorkflowFile> & { nodes?: unknown; edges?: unknown };
+    if (wf.version !== 1) return false;
+    if (typeof wf.name !== "string" || !Array.isArray(wf.nodes) || !Array.isArray(wf.edges)) return false;
+
+    const nodesValid = wf.nodes.every((node) => {
+      if (!node || typeof node !== "object") return false;
+      const n = node as { id?: unknown; type?: unknown };
+      return typeof n.id === "string" && typeof n.type === "string";
+    });
+
+    const edgesValid = wf.edges.every((edge) => {
+      if (!edge || typeof edge !== "object") return false;
+      const e = edge as { id?: unknown; source?: unknown; target?: unknown };
+      return (
+        typeof e.id === "string" &&
+        typeof e.source === "string" &&
+        typeof e.target === "string"
+      );
+    });
+
+    return nodesValid && edgesValid;
+  };
+
   const handleUploadFileChange = (e: React.ChangeEvent<HTMLInputElement>) => {
     const file = e.target.files?.[0];
     if (!file) return;
@@
-        const workflow = JSON.parse(event.target?.result as string) as WorkflowFile;
-        if (workflow.version && workflow.nodes && workflow.edges) {
-          await loadWorkflow(workflow);
+        const parsed = JSON.parse(event.target?.result as string);
+        if (isValidWorkflowImport(parsed)) {
+          await loadWorkflow(parsed);
         } else {
           alert("Invalid workflow file format");
         }

Based on learnings: Validate workflows using validateWorkflow() in workflowStore.ts before execution.

🤖 Prompt for AI Agents
Verify each finding against the current code and only fix it if needed.

In `@src/components/Header.tsx` around lines 133 - 143, The uploaded JSON is only
loosely checked before calling loadWorkflow, so replace the current
post-JSON-parse branch in handleUploadFileChange with a validation step: after
parsing into WorkflowFile, call the exported validateWorkflow(...) from
workflowStore.ts and only await loadWorkflow(workflow) if validateWorkflow
returns success; also catch JSON.parse errors and log/report them before
returning, and ensure validateWorkflow verifies nodes is an array and each edge
has source, target, and id so malformed payloads never reach loadWorkflow.

alert("Invalid workflow file format");
}
} catch {
alert("Failed to parse workflow file");
}
};
reader.readAsText(file);
e.target.value = "";
};

const handleFileChange = (e: React.ChangeEvent<HTMLInputElement>) => {
const file = e.target.files?.[0];
if (!file) return;
Expand Down Expand Up @@ -189,6 +221,49 @@ export function Header() {
}
}, [revertToSnapshot]);

const clientWorkflowButtons = (
<div className="flex items-center gap-0.5 ml-1 pl-1 border-l border-neutral-700/50">
<button
onClick={handleDownloadWorkflow}
className="p-1.5 text-neutral-400 hover:text-neutral-200 hover:bg-neutral-800 rounded transition-colors"
title="Download workflow"
>
<svg
className="w-4 h-4"
fill="none"
viewBox="0 0 24 24"
stroke="currentColor"
strokeWidth={1.5}
>
<path
strokeLinecap="round"
strokeLinejoin="round"
d="M3 16.5v2.25A2.25 2.25 0 0 0 5.25 21h13.5A2.25 2.25 0 0 0 21 18.75V16.5M16.5 12 12 16.5m0 0L7.5 12m4.5 4.5V3"
/>
</svg>
</button>
<button
onClick={handleUploadWorkflow}
className="p-1.5 text-neutral-400 hover:text-neutral-200 hover:bg-neutral-800 rounded transition-colors"
title="Upload workflow"
>
<svg
className="w-4 h-4"
fill="none"
viewBox="0 0 24 24"
stroke="currentColor"
strokeWidth={1.5}
>
<path
strokeLinecap="round"
strokeLinejoin="round"
d="M3 16.5v2.25A2.25 2.25 0 0 0 5.25 21h13.5A2.25 2.25 0 0 0 21 18.75V16.5m-13.5-9L12 3m0 0 4.5 4.5M12 3v13.5"
/>
</svg>
</button>
</div>
);

const settingsButtons = (
<div className="flex items-center gap-0.5 ml-1 pl-1 border-l border-neutral-700/50">
<button
Expand Down Expand Up @@ -233,6 +308,13 @@ export function Header() {
onChange={handleFileChange}
className="hidden"
/>
<input
ref={uploadInputRef}
type="file"
accept=".json"
onChange={handleUploadFileChange}
className="hidden"
/>
<header className="h-11 bg-neutral-900 border-b border-neutral-800 flex items-center justify-between px-4 shrink-0">
<div className="flex items-center gap-2">
<button
Expand Down Expand Up @@ -321,6 +403,7 @@ export function Header() {
</div>

{settingsButtons}
{clientWorkflowButtons}
</>
) : (
<>
Expand Down Expand Up @@ -370,6 +453,7 @@ export function Header() {
</div>

{settingsButtons}
{clientWorkflowButtons}
</>
)}
</div>
Expand Down
2 changes: 1 addition & 1 deletion src/components/WorkflowCanvas.tsx
Original file line number Diff line number Diff line change
Expand Up @@ -144,7 +144,7 @@ const getNodeHandles = (nodeType: string): { inputs: string[]; outputs: string[]
case "array":
return { inputs: ["text"], outputs: ["text"] };
case "promptConstructor":
return { inputs: ["text"], outputs: ["text"] };
return { inputs: ["text", "image"], outputs: ["text"] };
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

⚠️ Potential issue | 🟡 Minor

Auto-wiring is missing for Prompt Constructor image drops.

After this adds an image input, handleMenuSelect() still omits promptConstructor from the handleType === "image" branch, so choosing Prompt Constructor from the connection drop menu creates the node but leaves it disconnected.

🤖 Prompt for AI Agents
Verify each finding against the current code and only fix it if needed.

In `@src/components/WorkflowCanvas.tsx` at line 147, The new node type returns an
"image" input but handleMenuSelect still omits auto-connecting the Prompt
Constructor for image drops; update WorkflowCanvas.handleMenuSelect so that in
the branch where handleType === "image" you include the promptConstructor case
and programmatically create the edge from the source node to the new node's
"image" input (use the same connect/edge-creation helper you use for text
drops), ensuring the target handle id or name matches the Prompt Constructor's
"image" input name; this will auto-wire the Prompt Constructor when selected
from the drop menu.

case "nanoBanana":
return { inputs: ["image", "text"], outputs: ["image"] };
case "generateVideo":
Expand Down
2 changes: 1 addition & 1 deletion src/components/__tests__/ConnectionDropMenu.test.tsx
Original file line number Diff line number Diff line change
Expand Up @@ -218,7 +218,7 @@ describe("ConnectionDropMenu", () => {
fireEvent.keyDown(document, { key: "ArrowUp" });

// Last item should now be highlighted
const lastButton = screen.getByText("Switch").closest("button");
const lastButton = screen.getByText("Prompt Constructor").closest("button");
expect(lastButton).toHaveClass("bg-neutral-700");
});

Expand Down
Loading