Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions package-lock.json

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

68 changes: 42 additions & 26 deletions src/app/api/generate/providers/gemini.ts
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,25 @@ export const MODEL_MAP: Record<ModelType, string> = {
"nano-banana-2": "gemini-3.1-flash-image-preview",
};

/**
* Convert a base64 data URL image to Gemini inlineData format
*/
function imageToInlineData(
requestId: string,
image: string,
label: string
): { inlineData: { mimeType: string; data: string } } {
if (image.includes("base64,")) {
const [header, data] = image.split("base64,");
const mimeMatch = header.match(/data:([^;]+)/);
const mimeType = mimeMatch ? mimeMatch[1] : "image/png";
console.log(`[API:${requestId}] Image ${label}: ${mimeType}, ${(data.length / 1024).toFixed(1)}KB`);
return { inlineData: { mimeType, data } };
}
console.log(`[API:${requestId}] Image ${label}: raw, ${(image.length / 1024).toFixed(1)}KB`);
return { inlineData: { mimeType: "image/png", data: image } };
}

/**
* Generate image using Gemini API (legacy/default path)
*/
Expand All @@ -30,37 +49,34 @@ export async function generateWithGemini(
aspectRatio?: string,
resolution?: string,
useGoogleSearch?: boolean,
useImageSearch?: boolean
useImageSearch?: boolean,
multimodalParts?: Array<{ type: string; value: string; name?: string }>
): Promise<NextResponse<GenerateResponse>> {
console.log(`[API:${requestId}] Gemini generation - Model: ${model}, Images: ${images?.length || 0}, Prompt: ${prompt?.length || 0} chars`);

// Extract base64 data and MIME types from data URLs
const imageData = (images || []).map((image, idx) => {
if (image.includes("base64,")) {
const [header, data] = image.split("base64,");
// Extract MIME type from header (e.g., "data:image/png;" -> "image/png")
const mimeMatch = header.match(/data:([^;]+)/);
const mimeType = mimeMatch ? mimeMatch[1] : "image/png";
console.log(`[API:${requestId}] Image ${idx + 1}: ${mimeType}, ${(data.length / 1024).toFixed(1)}KB`);
return { data, mimeType };
}
console.log(`[API:${requestId}] Image ${idx + 1}: raw, ${(image.length / 1024).toFixed(1)}KB`);
return { data: image, mimeType: "image/png" };
});
console.log(`[API:${requestId}] Gemini generation - Model: ${model}, Images: ${images?.length || 0}, Prompt: ${prompt?.length || 0} chars, Parts: ${multimodalParts?.length || 0}`);

// Initialize Gemini client
const ai = new GoogleGenAI({ apiKey });

// Build request parts array with prompt and all images
const requestParts: Array<{ text: string } | { inlineData: { mimeType: string; data: string } }> = [
{ text: prompt },
...imageData.map(({ data, mimeType }) => ({
inlineData: {
mimeType,
data,
},
})),
];
// Build request parts array — use multimodal parts if provided, otherwise legacy prompt+images
type GeminiPart = { text: string } | { inlineData: { mimeType: string; data: string } };
let requestParts: GeminiPart[];

if (multimodalParts && multimodalParts.length > 0) {
// Build interleaved multimodal request from image variable parts
requestParts = multimodalParts.map((part) => {
if (part.type === "image" && part.value) {
return imageToInlineData(requestId, part.value, part.name || "var");
}
return { text: part.value };
});
} else {
// Legacy: prompt text + all images appended
const imageData = (images || []).map((image, idx) => imageToInlineData(requestId, image, `${idx + 1}`));
requestParts = [
{ text: prompt },
...imageData,
];
}

// Build config object based on model capabilities
const config: Record<string, unknown> = {
Expand Down
6 changes: 4 additions & 2 deletions src/app/api/generate/route.ts
Original file line number Diff line number Diff line change
Expand Up @@ -100,7 +100,8 @@ export async function POST(request: NextRequest) {
parameters,
dynamicInputs,
mediaType,
} = body;
parts,
} = body as MultiProviderGenerateRequest & { parts?: Array<{ type: string; value: string; name?: string }> };

// Prompt is required unless:
// - Provided via dynamicInputs
Expand Down Expand Up @@ -524,7 +525,8 @@ export async function POST(request: NextRequest) {
aspectRatio,
resolution,
useGoogleSearch,
useImageSearch
useImageSearch,
parts as Array<{ type: string; value: string; name?: string }> | undefined
);
} catch (error) {
// Extract error information
Expand Down
56 changes: 30 additions & 26 deletions src/app/api/llm/route.ts
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,15 @@ import { logger } from "@/utils/logger";

export const maxDuration = 60; // 1 minute timeout

// Convert a base64 data URL image to Gemini inlineData format
function imageDataToInlinePart(img: string): { inlineData: { mimeType: string; data: string } } {
const matches = img.match(/^data:(.+?);base64,(.+)$/);
if (matches) {
return { inlineData: { mimeType: matches[1], data: matches[2] } };
}
return { inlineData: { mimeType: "image/png", data: img } };
}

// Generate a unique request ID for tracking
function generateRequestId(): string {
return `llm-${Date.now()}-${Math.random().toString(36).substring(2, 9)}`;
Expand Down Expand Up @@ -36,7 +45,8 @@ async function generateWithGoogle(
maxTokens: number,
images?: string[],
requestId?: string,
userApiKey?: string | null
userApiKey?: string | null,
parts?: Array<{ type: string; value: string; name?: string }>
): Promise<string> {
// User-provided key takes precedence over env variable
const apiKey = userApiKey || process.env.GEMINI_API_KEY;
Expand All @@ -55,31 +65,24 @@ async function generateWithGoogle(
maxTokens,
imageCount: images?.length || 0,
promptLength: prompt.length,
partsCount: parts?.length || 0,
});

// Build multimodal content if images are provided
let contents: string | Array<{ inlineData: { mimeType: string; data: string } } | { text: string }>;
if (images && images.length > 0) {
// Build multimodal content
type GeminiPart = { inlineData: { mimeType: string; data: string } } | { text: string };
let contents: string | GeminiPart[];

if (parts && parts.length > 0) {
// Interleaved multimodal parts from image variable resolution
contents = parts.map((part): GeminiPart => {
if (part.type === "image" && part.value) {
return imageDataToInlinePart(part.value);
}
return { text: part.value };
});
} else if (images && images.length > 0) {
contents = [
...images.map((img) => {
// Extract base64 data and mime type from data URL
const matches = img.match(/^data:(.+?);base64,(.+)$/);
if (matches) {
return {
inlineData: {
mimeType: matches[1],
data: matches[2],
},
};
}
// Fallback: assume PNG if no data URL prefix
return {
inlineData: {
mimeType: "image/png",
data: img,
},
};
}),
...images.map((img) => imageDataToInlinePart(img)),
{ text: prompt },
];
} else {
Expand Down Expand Up @@ -298,14 +301,15 @@ export async function POST(request: NextRequest) {
const openaiApiKey = request.headers.get("X-OpenAI-API-Key");
const anthropicApiKey = request.headers.get("X-Anthropic-API-Key");

const body: LLMGenerateRequest = await request.json();
const body = await request.json() as LLMGenerateRequest & { parts?: Array<{ type: string; value: string; name?: string }> };
const {
prompt,
images,
provider,
model,
temperature = 0.7,
maxTokens = 1024
maxTokens = 1024,
parts,
} = body;

logger.info('api.llm', 'LLM generation request received', {
Expand All @@ -330,7 +334,7 @@ export async function POST(request: NextRequest) {
let text: string;

if (provider === "google") {
text = await generateWithGoogle(prompt, model, temperature, maxTokens, images, requestId, geminiApiKey);
text = await generateWithGoogle(prompt, model, temperature, maxTokens, images, requestId, geminiApiKey, parts);
} else if (provider === "openai") {
text = await generateWithOpenAI(prompt, model, temperature, maxTokens, images, requestId, openaiApiKey);
} else if (provider === "anthropic") {
Expand Down
27 changes: 27 additions & 0 deletions src/components/ConnectionDropMenu.tsx
Original file line number Diff line number Diff line change
Expand Up @@ -42,6 +42,15 @@ const IMAGE_TARGET_OPTIONS: MenuOption[] = [
</svg>
),
},
{
type: "inpaint",
label: "Inpaint",
icon: (
<svg className="w-4 h-4" fill="none" viewBox="0 0 24 24" stroke="currentColor" strokeWidth={1.5}>
<path strokeLinecap="round" strokeLinejoin="round" d="M9.53 16.122a3 3 0 00-5.78 1.128 2.25 2.25 0 01-2.4 2.245 4.5 4.5 0 008.4-2.245c0-.399-.078-.78-.22-1.128zm0 0a15.998 15.998 0 003.388-1.62m-5.043-.025a15.994 15.994 0 011.622-3.395m3.42 3.42a15.995 15.995 0 004.764-4.648l3.876-5.814a1.151 1.151 0 00-1.597-1.597L14.146 6.32a15.996 15.996 0 00-4.649 4.763m3.42 3.42a6.776 6.776 0 00-3.42-3.42" />
</svg>
),
},
{
type: "splitGrid",
label: "Split Grid Node",
Expand Down Expand Up @@ -109,6 +118,15 @@ const IMAGE_TARGET_OPTIONS: MenuOption[] = [
</svg>
),
},
{
type: "promptConstructor",
label: "Prompt Constructor",
icon: (
<svg className="w-4 h-4" fill="none" viewBox="0 0 24 24" stroke="currentColor" strokeWidth={1.5}>
<path strokeLinecap="round" strokeLinejoin="round" d="M14.25 6.087c0-.355.186-.676.401-.959.221-.29.349-.634.349-1.003 0-1.036-1.007-1.875-2.25-1.875s-2.25.84-2.25 1.875c0 .369.128.713.349 1.003.215.283.401.604.401.959v0a.64.64 0 01-.657.643 48.39 48.39 0 01-4.163-.3c.186 1.613.293 3.25.315 4.907a.656.656 0 01-.658.663v0c-.355 0-.676-.186-.959-.401a1.647 1.647 0 00-1.003-.349c-1.036 0-1.875 1.007-1.875 2.25s.84 2.25 1.875 2.25c.369 0 .713-.128 1.003-.349.283-.215.604-.401.959-.401v0c.31 0 .555.26.532.57a48.039 48.039 0 01-.642 5.056c1.518.19 3.058.309 4.616.354a.64.64 0 00.657-.643v0c0-.355-.186-.676-.401-.959a1.647 1.647 0 01-.349-1.003c0-1.035 1.008-1.875 2.25-1.875 1.243 0 2.25.84 2.25 1.875 0 .369-.128.713-.349 1.003-.215.283-.4.604-.4.959v0c0 .333.277.599.61.58a48.1 48.1 0 005.427-.63 48.05 48.05 0 00.582-4.717.532.532 0 00-.533-.57v0c-.355 0-.676.186-.959.401-.29.221-.634.349-1.003.349-1.035 0-1.875-1.007-1.875-2.25s.84-2.25 1.875-2.25c.37 0 .713.128 1.003.349.283.215.604.401.96.401v0a.656.656 0 00.658-.663 48.422 48.422 0 00-.37-5.36c-1.886.342-3.81.574-5.766.689a.578.578 0 01-.61-.58v0z" />
</svg>
),
},
Comment on lines +121 to +129
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

⚠️ Potential issue | 🟠 Major

Image→Prompt Constructor menu path currently creates an unconnected node

Line 112 exposes promptConstructor in the image-target menu, but handleMenuSelect (in src/components/WorkflowCanvas.tsx) does not map handleType === "image" + nodeType === "promptConstructor" to a target handle. The node is created, but the dropped connection is not created.

💡 Suggested fix (in src/components/WorkflowCanvas.tsx)
-      } else if (handleType === "image") {
+      } else if (handleType === "image") {
         if (nodeType === "annotation" || nodeType === "output" || nodeType === "splitGrid" || nodeType === "outputGallery" || nodeType === "imageCompare") {
           targetHandleId = "image";
           // annotation also has an image output
           if (nodeType === "annotation") {
             sourceHandleIdForNewNode = "image";
           }
-        } else if (nodeType === "nanoBanana" || nodeType === "generateVideo") {
+        } else if (nodeType === "nanoBanana" || nodeType === "generateVideo" || nodeType === "promptConstructor") {
           targetHandleId = "image";
         } else if (nodeType === "imageInput") {
           sourceHandleIdForNewNode = "image";
         }
       }
📝 Committable suggestion

‼️ IMPORTANT
Carefully review the code before committing. Ensure that it accurately replaces the highlighted code, contains no missing lines, and has no issues with indentation. Thoroughly test & benchmark the code to ensure it meets the requirements.

Suggested change
{
type: "promptConstructor",
label: "Prompt Constructor",
icon: (
<svg className="w-4 h-4" fill="none" viewBox="0 0 24 24" stroke="currentColor" strokeWidth={1.5}>
<path strokeLinecap="round" strokeLinejoin="round" d="M14.25 6.087c0-.355.186-.676.401-.959.221-.29.349-.634.349-1.003 0-1.036-1.007-1.875-2.25-1.875s-2.25.84-2.25 1.875c0 .369.128.713.349 1.003.215.283.401.604.401.959v0a.64.64 0 01-.657.643 48.39 48.39 0 01-4.163-.3c.186 1.613.293 3.25.315 4.907a.656.656 0 01-.658.663v0c-.355 0-.676-.186-.959-.401a1.647 1.647 0 00-1.003-.349c-1.036 0-1.875 1.007-1.875 2.25s.84 2.25 1.875 2.25c.369 0 .713-.128 1.003-.349.283-.215.604-.401.959-.401v0c.31 0 .555.26.532.57a48.039 48.039 0 01-.642 5.056c1.518.19 3.058.309 4.616.354a.64.64 0 00.657-.643v0c0-.355-.186-.676-.401-.959a1.647 1.647 0 01-.349-1.003c0-1.035 1.008-1.875 2.25-1.875 1.243 0 2.25.84 2.25 1.875 0 .369-.128.713-.349 1.003-.215.283-.4.604-.4.959v0c0 .333.277.599.61.58a48.1 48.1 0 005.427-.63 48.05 48.05 0 00.582-4.717.532.532 0 00-.533-.57v0c-.355 0-.676.186-.959.401-.29.221-.634.349-1.003.349-1.035 0-1.875-1.007-1.875-2.25s.84-2.25 1.875-2.25c.37 0 .713.128 1.003.349.283.215.604.401.96.401v0a.656.656 0 00.658-.663 48.422 48.422 0 00-.37-5.36c-1.886.342-3.81.574-5.766.689a.578.578 0 01-.61-.58v0z" />
</svg>
),
},
} else if (handleType === "image") {
if (nodeType === "annotation" || nodeType === "output" || nodeType === "splitGrid" || nodeType === "outputGallery" || nodeType === "imageCompare") {
targetHandleId = "image";
// annotation also has an image output
if (nodeType === "annotation") {
sourceHandleIdForNewNode = "image";
}
} else if (nodeType === "nanoBanana" || nodeType === "generateVideo" || nodeType === "promptConstructor") {
targetHandleId = "image";
} else if (nodeType === "imageInput") {
sourceHandleIdForNewNode = "image";
}
}
🤖 Prompt for AI Agents
Verify each finding against the current code and only fix it if needed.

In `@src/components/ConnectionDropMenu.tsx` around lines 112 - 120, The menu item
with type "promptConstructor" is being added from the image-target menu but
handleMenuSelect in WorkflowCanvas.tsx doesn't map the combination handleType
=== "image" and nodeType === "promptConstructor" to a target handle, so the
dropped node is created without a connection; update handleMenuSelect (or the
mapping it uses) to detect nodeType "promptConstructor" when handleType is
"image" and assign the correct target handle id (the node's input/target handle
name used by the Prompt Constructor node), then create the edge/connection using
the same code path used for other image→node mappings so the connection is
established on drop.

];

const TEXT_TARGET_OPTIONS: MenuOption[] = [
Expand Down Expand Up @@ -254,6 +272,15 @@ const IMAGE_SOURCE_OPTIONS: MenuOption[] = [
</svg>
),
},
{
type: "inpaint",
label: "Inpaint",
icon: (
<svg className="w-4 h-4" fill="none" viewBox="0 0 24 24" stroke="currentColor" strokeWidth={1.5}>
<path strokeLinecap="round" strokeLinejoin="round" d="M9.53 16.122a3 3 0 00-5.78 1.128 2.25 2.25 0 01-2.4 2.245 4.5 4.5 0 008.4-2.245c0-.399-.078-.78-.22-1.128zm0 0a15.998 15.998 0 003.388-1.62m-5.043-.025a15.994 15.994 0 011.622-3.395m3.42 3.42a15.995 15.995 0 004.764-4.648l3.876-5.814a1.151 1.151 0 00-1.597-1.597L14.146 6.32a15.996 15.996 0 00-4.649 4.763m3.42 3.42a6.776 6.776 0 00-3.42-3.42" />
</svg>
),
},
{
type: "router",
label: "Router",
Expand Down
Loading