+ {PromptInputButtons && }
diff --git a/src/plugin/Panel/index.tsx b/src/plugin/Panel/index.tsx
index 6182baa..63bbf14 100644
--- a/src/plugin/Panel/index.tsx
+++ b/src/plugin/Panel/index.tsx
@@ -21,7 +21,7 @@ export function PluginPanelComponent(props: CloverPlugin & PluginProps) {
useEffect(() => {
if (provider) {
- provider.update_dispatch(dispatch);
+ provider.update_plugin_dispatch(dispatch);
provider.update_plugin_state(state);
provider.set_system_prompt();
dispatch({ type: "updateProvider", provider });
diff --git a/src/plugin/base_provider.tsx b/src/plugin/base_provider.tsx
index 8658953..63ba194 100644
--- a/src/plugin/base_provider.tsx
+++ b/src/plugin/base_provider.tsx
@@ -1,5 +1,6 @@
+import type { Button } from "@components";
import type { PluginContextActions, PluginContextStore } from "@context";
-import { ManifestNormalized } from "@iiif/presentation-3-normalized";
+import type { ManifestNormalized } from "@iiif/presentation-3-normalized";
import type { ConversationState, Message } from "@types";
import { getLabelByUserLanguage } from "@utils";
import dedent from "dedent";
@@ -9,7 +10,6 @@ type ProviderStatus = "initializing" | "ready" | "error";
/**
* A Provider class that handles interfacing with the Plugin.
- *
*/
export abstract class BaseProvider {
#plugin_dispatch: Dispatch | undefined;
@@ -20,6 +20,9 @@ export abstract class BaseProvider {
this.#status = "ready";
}
+ /**
+ * Get the dispatch function to allow the Provider to update Plugin state
+ */
private get plugin_dispatch(): Dispatch {
if (!this.#plugin_dispatch) {
throw new Error("Provider dispatch not initialized.");
@@ -28,12 +31,15 @@ export abstract class BaseProvider {
}
/**
- * Sets the dispatch function to allow the provider to update Plugin state
+ * Sets the dispatch function to allow the Provider to update Plugin state
*/
private set plugin_dispatch(dispatch: Dispatch) {
this.#plugin_dispatch = dispatch;
}
+ /**
+ * Get the current Plugin state
+ */
protected get plugin_state(): PluginContextStore {
if (!this.#plugin_state) {
throw new Error("Provider plugin_state not initialized.");
@@ -41,6 +47,9 @@ export abstract class BaseProvider {
return this.#plugin_state;
}
+ /**
+ * Sets the current Plugin state
+ */
protected set plugin_state(state: PluginContextStore) {
this.#plugin_state = state;
}
@@ -96,7 +105,7 @@ export abstract class BaseProvider {
}
/**
- * Update the Plugin state with the current provider.
+ * Update the Plugin state with the current Provider.
*/
protected update_plugin_provider() {
this.plugin_dispatch({
@@ -110,14 +119,27 @@ export abstract class BaseProvider {
*/
abstract generate_response(messages: Message[], conversationHistory: Message[]): Promise;
+ /**
+ * Get the current status of the Provider
+ */
get status(): ProviderStatus {
return this.#status;
}
+ /**
+ * Set the current status of the Provider
+ */
set status(value: ProviderStatus) {
this.#status = value;
}
+ /**
+ * A component that providers can implement to add buttons to the Prompt Input area, extending functionality.
+ */
+ PromptInputButtons(): JSX.Element & { props: { children: (typeof Button)[] } } {
+ return <>>;
+ }
+
/**
* Set the system prompt in the Plugin state based on the current manifest.
*/
@@ -131,16 +153,24 @@ export abstract class BaseProvider {
}
/**
- * A component that providers can implement to set up their UI.
+ * A component that a Provider can implement before users can chat.
+ *
+ * @remarks This is useful for setting up authentication or other pre-requisites.
*/
SetupComponent(): JSX.Element {
return <>>;
}
- update_dispatch(dispatch: Dispatch) {
+ /**
+ * Update the reference to the Plugin's dispatch function
+ */
+ update_plugin_dispatch(dispatch: Dispatch) {
this.plugin_dispatch = dispatch;
}
+ /**
+ * Update the reference to the Plugin's state
+ */
update_plugin_state(context: PluginContextStore) {
this.plugin_state = context;
}
diff --git a/src/plugin/context/plugin-context.tsx b/src/plugin/context/plugin-context.tsx
index cb3881f..2985605 100644
--- a/src/plugin/context/plugin-context.tsx
+++ b/src/plugin/context/plugin-context.tsx
@@ -66,7 +66,7 @@ interface SetVaultAction {
type: "setVault";
}
-interface SystemPromptAction {
+interface SetSystemPromptAction {
systemPrompt: string;
type: "setSystemPrompt";
}
@@ -84,16 +84,16 @@ interface UpdateLastMessageAction {
export type PluginContextActions =
| AddMessageAction
| ClearConversation
- | UpdateProviderAction
- | UpdateLastMessageAction
+ | SetActiveCanvasAction
| SetConversationState
| SetManifestAction
- | SetActiveCanvasAction
| SetMediaDialogStateAction
- | SetSelectedMediaAction
| SetOSDViewerAction
+ | SetSelectedMediaAction
+ | SetSystemPromptAction
| SetVaultAction
- | SystemPromptAction;
+ | UpdateProviderAction
+ | UpdateLastMessageAction;
/** Default values not inherited from the Clover Viewer */
type InitPluginContextStore = Omit;
diff --git a/src/providers/mediaPipeProvider/index.tsx b/src/providers/mediaPipeProvider/index.tsx
index 89cf91f..d34f4cb 100644
--- a/src/providers/mediaPipeProvider/index.tsx
+++ b/src/providers/mediaPipeProvider/index.tsx
@@ -354,15 +354,8 @@ export class MediaPipeProvider extends BaseProvider {
.
-
-
@@ -378,9 +371,7 @@ export class MediaPipeProvider extends BaseProvider {
Check the browser console for detailed error information.
-
- Back
-
+ Back
);
}
diff --git a/src/providers/userTokenProvider/components/ModelSelection/index.tsx b/src/providers/userTokenProvider/components/ModelSelection/index.tsx
index 45a15a3..518aa27 100644
--- a/src/providers/userTokenProvider/components/ModelSelection/index.tsx
+++ b/src/providers/userTokenProvider/components/ModelSelection/index.tsx
@@ -19,7 +19,7 @@ export function ModelSelection({ handleBack, handleClick, models }: Props) {
))}
-
+
Back
diff --git a/src/providers/userTokenProvider/index.tsx b/src/providers/userTokenProvider/index.tsx
index 78219e5..16d5463 100644
--- a/src/providers/userTokenProvider/index.tsx
+++ b/src/providers/userTokenProvider/index.tsx
@@ -1,15 +1,17 @@
import { createAnthropic, type AnthropicProvider } from "@ai-sdk/anthropic";
import { createGoogleGenerativeAI, type google } from "@ai-sdk/google";
import { createOpenAI, type OpenAIProvider } from "@ai-sdk/openai";
-import { Button, Heading, Input } from "@components";
+import { Button, Dialog, Heading, Input, MessagesContainer, PromptInput } from "@components";
+import { usePlugin } from "@context";
+import { BulletList } from "@icons";
import { serializeConfigPresentation3, Traverse } from "@iiif/parser";
-import type { Canvas } from "@iiif/presentation-3";
+import type { Canvas, ContentResource } from "@iiif/presentation-3";
import type { Tool } from "@langchain/core/tools";
-import type { AssistantMessage, Message } from "@types";
+import { ConversationState, type AssistantMessage, type Message } from "@types";
import { getLabelByUserLanguage } from "@utils";
-import { ModelMessage, stepCountIs, streamText, tool } from "ai";
+import { generateText, ModelMessage, stepCountIs, streamText, tool } from "ai";
import dedent from "dedent";
-import { useState } from "react";
+import { useEffect, useRef, useState } from "react";
import { BaseProvider } from "../../plugin/base_provider";
import { ModelSelection } from "./components/ModelSelection";
import { ProviderSelection } from "./components/ProviderSelection";
@@ -22,9 +24,43 @@ type OpenAIModels = Parameters[0];
type AnthropicModels = Parameters[0];
type UserTokenProviderProps = {
+ /**
+ * Maximum number of tool use steps before stopping the response generation
+ */
max_steps?: number;
+ /**
+ * Tools to enable for the AI model
+ */
tools?: Tool[];
+ /**
+ * User token (API key) for the selected AI provider
+ */
user_token?: string | null;
+ /**
+ * Define a callback function at the `` level to receive IIIF content updates
+ *
+ * @param iiif_resource a IIIF URL or base64 encoded Content State Annotation
+ *
+ * @example
+ * ```tsx
+ * function App() {
+ * const [iiifContent, setIiifContent] = useState();
+ *
+ * const tokenProvider = new UserTokenProvider({
+ * viewer_iiif_content_callback: (iiif_resource) => {
+ * setIiifContent(iiif_resource);
+ * },
+ * });
+ *
+ * return (
+ *
+ * }
+ *```
+ */
+ viewer_iiif_content_callback?: (iiif_resource: string) => void;
};
export class UserTokenProvider extends BaseProvider {
@@ -34,13 +70,19 @@ export class UserTokenProvider extends BaseProvider {
allowed_providers: Provider[] = ["google", "openai", "anthropic"];
max_steps: number;
tools: Tool[];
-
- constructor({ user_token, tools = [], max_steps = 3 }: UserTokenProviderProps = {}) {
+ viewer_iiif_content_callback?: (iiif_resource: string) => void;
+ constructor({
+ user_token,
+ tools = [],
+ max_steps = 3,
+ viewer_iiif_content_callback,
+ }: UserTokenProviderProps = {}) {
super();
+ this.#user_token = user_token || this.#user_token;
+ this.viewer_iiif_content_callback = viewer_iiif_content_callback;
this.tools = tools;
this.max_steps = max_steps;
super.status = user_token ? "ready" : "initializing";
- this.#user_token = user_token || this.#user_token;
}
/**
@@ -141,6 +183,23 @@ export class UserTokenProvider extends BaseProvider {
this.#reset_model();
}
+ /**
+ * Generate a response from the model for a given set of messages used within tasks
+ *
+ * @param messages
+ * @returns the model's response
+ */
+ async #task_generate_response(messages: ModelMessage[]) {
+ const model = this.setup_model(this.selected_provider, this.user_token, this.selected_model);
+
+ const { text } = await generateText({
+ model,
+ messages,
+ });
+
+ return text;
+ }
+
/**
* Transform the tools to the format expected by the AI SDK
*
@@ -168,7 +227,7 @@ export class UserTokenProvider extends BaseProvider {
get models_by_provider(): Record {
return {
- google: ["gemini-2.5-pro", "gemini-2.5-flash", "gemma-3-27b-it"] as GoogleModels[],
+ google: ["gemini-3-pro-preview", "gemini-2.5-pro", "gemini-2.5-flash"] as GoogleModels[],
openai: ["gpt-4.1", "o3", "o4-mini"] as OpenAIModels[],
anthropic: [
"claude-4-opus-20250514",
@@ -288,6 +347,103 @@ export class UserTokenProvider extends BaseProvider {
}
}
+ PromptInputButtons() {
+ /* eslint-disable react-hooks/rules-of-hooks */
+ const dialogRef = useRef(null);
+ const [isOpen, setIsOpen] = useState(false);
+ const [SelectedTaskComponent, setSelectedTaskComponent] = useState(
+ null,
+ );
+
+ useEffect(() => {
+ if (isOpen && dialogRef.current) {
+ dialogRef.current.showModal();
+ }
+ }, [isOpen]);
+
+ useEffect(() => {
+ const dialog = dialogRef.current;
+ if (!dialog) return;
+
+ const handleClose = () => {
+ closeDialog();
+ };
+
+ dialog.addEventListener("close", handleClose);
+ return () => {
+ dialog.removeEventListener("close", handleClose);
+ };
+ }, []);
+ /* eslint-enable react-hooks/rules-of-hooks */
+
+ function openDialog() {
+ setIsOpen(true);
+ }
+
+ function closeDialog() {
+ setIsOpen(false);
+ setSelectedTaskComponent(null);
+ }
+
+ const tasks: { component: React.ComponentType; name: string }[] = [
+ {
+ name: "Transcribe text",
+ component: this.TaskTranscribeCanvas.bind(this),
+ },
+ {
+ name: "Create annotation",
+ component: this.TaskCreateAnnotation.bind(this),
+ },
+ ];
+
+ return (
+ <>
+
+
+
+
+ >
+ );
+ }
+
setup_model(provider: Provider, token: string, modelName: string) {
switch (provider) {
case "google": {
@@ -375,12 +531,442 @@ export class UserTokenProvider extends BaseProvider {
onChange={(e) => setInputValue(e.target.value)}
/>
- Submit
- setSelectedModel(null)}>
+ Submit
+ setSelectedModel(null)}>
Back
);
}
+
+ TaskCreateAnnotation() {
+ /* eslint-disable react-hooks/rules-of-hooks */
+ const { state: pluginState } = usePlugin();
+ const [state, setState] = useState<"info" | "processing" | "error">("info");
+ const [conversationState, setConversationState] = useState("idle");
+ const [isButtonDisabled, setIsButtonDisabled] = useState(true);
+ const [updateViewerButton, setUpdateViewerButton] = useState<"hidden" | "visible">("hidden");
+ const [encodedContentState, setEncodedContentState] = useState("");
+ const [errorText, setErrorText] = useState("");
+ const [inputValue, setInputValue] = useState("");
+ const [messages, setMessages] = useState([]);
+ /* eslint-enable react-hooks/rules-of-hooks */
+
+ function encodeContentState(plainContentState: string): string {
+ const uriEncoded = encodeURIComponent(plainContentState); // using built in function
+ const base64 = btoa(uriEncoded); // using built in function
+ const base64url = base64.replace(/\+/g, "-").replace(/\//g, "_");
+ const base64urlNoPadding = base64url.replace(/=/g, "");
+ return base64urlNoPadding;
+ }
+
+ const startTask = async () => {
+ try {
+ setState("processing");
+ setConversationState("assistant_responding");
+
+ const first_tool_message: Message = {
+ role: "assistant",
+ type: "tool-call",
+ content: {
+ type: "text",
+ tool_name: "CreateAnnotation",
+ content: "Getting current canvas content for transcription.",
+ },
+ };
+ setMessages([first_tool_message]);
+
+ // step 1: get the current canvas from the plugin state
+ const canvas: Canvas = pluginState.vault.serialize(
+ {
+ type: "Canvas",
+ id: pluginState.activeCanvas.id,
+ },
+ serializeConfigPresentation3,
+ );
+
+ // step 2: get the first painting from the canvas
+ const paintings: ContentResource[] = [];
+ const traverse = new Traverse({
+ contentResource: [
+ (resource) => {
+ if (resource.type === "Image") {
+ paintings.push(resource);
+ }
+ },
+ ],
+ });
+ traverse.traverseCanvasItems(canvas);
+
+ const painting = paintings[0].id;
+
+ if (!painting) {
+ throw new Error("No painting found on canvas");
+ }
+
+ const width = canvas.width || 0;
+ const height = canvas.height || 0;
+
+ const second_tool_message: Message = {
+ role: "assistant",
+ type: "tool-call",
+ content: {
+ type: "text",
+ tool_name: "CreateAnnotation",
+ content: "Sending canvas to model.",
+ },
+ };
+ setMessages((prevMessages) => [...prevMessages, second_tool_message]);
+
+ // step 3: set up the messages to send to the model
+ const systemMessage: ModelMessage = {
+ role: "system",
+ content: dedent`
+ You are an AI assistant that helps with creating IIIF annotations.
+ `,
+ };
+
+ const userMessageText = dedent`
+ ## Context
+ You will be generating a IIIF annotation for an image based on user input.
+
+ ## Details
+ Here are some important details to consider when generating the annotation:
+
+ - The "text" field should contain HTML formatted text that will be displayed in the annotation.
+ - The "language" field should specify the language of the text using a standard language code (e.g., "en" for English).
+ - The "region" defines the area on the canvas for the annotation so be VERY precise with the coordinates and size.
+ - The image has a width of ${width} pixels and a height of ${height} pixels.
+ - Ensure that the x and y coordinates, as well as the width and height of the region, fit within the dimensions of the image.
+ - The region should be relevant to the user input provided.
+
+ ## Task
+ Here is the user input for the annotation:
+ ${inputValue}
+ Generate text and the region to be used in an annotation for the provided image.
+
+ ## Thinking
+ Think about the user instructions and the image details carefully before you respond.
+
+ ## Output Format
+ Provide the response in JSON format as follows:
+
+ {
+ "text": "
The text for the annotation.
",
+ "language": string (the language code you are providing the text in, e.g., "en"),
+ "region": {
+ "x": number (0 to ${width}),
+ "y": number (0 to ${height}),
+ "width": number (0 to ${width}),
+ "height": number (0 to ${height})
+ }
+ }
+
+ - Do NOT include any extra text outside the JSON object
+ - Only respond with the JSON object
+ `;
+
+ const userMessage: ModelMessage = {
+ role: "user",
+ content: [
+ {
+ type: "image",
+ image: painting,
+ },
+ {
+ type: "text",
+ text: userMessageText,
+ },
+ ],
+ };
+
+ // step 4: call a custom function to generate the response
+ const result = (await this.#task_generate_response([systemMessage, userMessage]))
+ .replace("```json", "")
+ .replace("```", "")
+ .trim();
+
+ // step 5: update the messages to show user model call is done
+ const modelResponseMessage: Message = {
+ role: "assistant",
+ type: "tool-call",
+ content: {
+ type: "text",
+ tool_name: "CreateAnnotation",
+ content: "Parsing model response",
+ },
+ };
+ setMessages((prevMessages) => [...prevMessages, modelResponseMessage]);
+
+ // step 6: parse the response
+ const parsed = JSON.parse(result);
+
+ if (
+ !parsed.text ||
+ !parsed.region ||
+ typeof parsed.region.x !== "number" ||
+ typeof parsed.region.y !== "number" ||
+ typeof parsed.region.width !== "number" ||
+ typeof parsed.region.height !== "number"
+ ) {
+ throw new Error("Invalid response format from model");
+ }
+
+ // step 7: create the annotation for the canvas and encode it
+ const annotation = {
+ "@context": "http://iiif.io/api/presentation/3/context.json",
+ id: "https://example.org/import/1",
+ type: "Annotation",
+ motivation: ["contentState"],
+ target: {
+ id: `${canvas.id}#xywh=${parsed.region.x},${parsed.region.y},${parsed.region.width},${parsed.region.height}`,
+ type: "Canvas",
+ partOf: [
+ {
+ id: pluginState.manifest.id,
+ type: "Manifest",
+ },
+ ],
+ },
+ body: {
+ type: "TextualBody",
+ value: parsed.text,
+ format: "text/html",
+ language: [parsed.language || "en"],
+ },
+ };
+
+ const contentState = encodeContentState(JSON.stringify(annotation));
+
+ setEncodedContentState(contentState);
+
+ const contentStateResponse: Message = {
+ role: "assistant",
+ type: "response",
+ content: {
+ type: "text",
+ content: this.viewer_iiif_content_callback
+ ? "Annotation created successfully. Click the button below to update the viewer."
+ : `Annotation created successfully. Here is the encoded Content State annotation:\n${contentState}`,
+ },
+ };
+ setMessages((prevMessages) => [...prevMessages, contentStateResponse]);
+ setConversationState("idle");
+ setUpdateViewerButton("visible");
+ } catch (error) {
+ console.error(error); // eslint-disable-line no-console
+ setErrorText(error instanceof Error ? error.message : "An unknown error occurred.");
+ setState("error");
+ }
+ };
+
+ if (state === "info") {
+ return (
+ <>
+
+ Create Annotation
+
+ This task will create a IIIF annotation for the current canvas based on the provided
+ user input.
+
Please acknowledge that you won't use this tool for evil.
-
+
I won't
@@ -131,7 +131,7 @@ export class MyCustomProvider extends BaseProvider {
);
}
- async send_messages(messages: Message[], conversationHistory: Message[]): Promise {
+ async generate_response(messages: Message[], conversationHistory: Message[]): Promise {
this.set_conversation_state("assistant_responding");
const mockWebSocket = new WebSocket("ws://mock-websocket-server");
@@ -163,7 +163,7 @@ export class MyCustomProvider extends BaseProvider {
}
```
-In this step, we implement the `send_messages` method, which is responsible for sending messages to the mock LLM provider.
+In this step, we implement the `generate_response` method, which is responsible for sending messages to the mock LLM provider.
First, we set the conversation state to `"assistant_responding"` to indicate that the assistant is processing the request.