Skip to content

Commit 507af0e

Browse files
committed
Update Gemma3 notebook to use OpenVINO GenAI
- Replace transformers library with openvino_genai for better performance - Update model loading to use OpenVINO GenAI pipeline - Modify gradio helper to work with OpenVINO GenAI streaming - Simplify inference code and remove unnecessary dependencies - Improve image processing for OpenVINO GenAI format
1 parent f2c57ca commit 507af0e

File tree

3 files changed

+128
-252
lines changed

3 files changed

+128
-252
lines changed

notebooks/gemma3/README.md

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
# Visual-language assistant with Gemma3 and OpenVINO
1+
# Visual-language assistant with Gemma3 and OpenVINO GenAI
22
![](https://github.com/user-attachments/assets/2540a58e-c242-4439-b151-0fd1e6938af1)
33

44
Gemma 3 is Google's new iteration of open weight LLMs. It comes in four sizes, 1 billion, 4 billion, 12 billion, and 27 billion parameters with base (pre-trained) and instruction-tuned versions. The 4, 12, and 27 billion parameter models can process both images and text, while the 1B variant is text only.
@@ -19,7 +19,7 @@ The tutorial consists from following steps:
1919

2020
- Install requirements
2121
- Convert and Optimize model
22-
- Run OpenVINO model inference
22+
- Run OpenVINO GenAI model inference
2323
- Launch Interactive demo
2424

2525
In this demonstration, you'll create interactive chatbot that can answer questions about provided image's content.

notebooks/gemma3/gemma3.ipynb

Lines changed: 78 additions & 226 deletions
Large diffs are not rendered by default.

notebooks/gemma3/gradio_helper.py

Lines changed: 48 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,11 @@
99
import gradio as gr
1010
import requests
1111
from PIL import Image
12-
from transformers import TextIteratorStreamer
12+
import numpy as np
13+
import openvino as ov
14+
import openvino_genai as ov_genai
15+
from threading import Event, Thread
16+
import queue
1317

1418
MAX_NUM_IMAGES = int(os.getenv("MAX_NUM_IMAGES", "5"))
1519

@@ -161,7 +165,7 @@ def process_history(history: list[dict]) -> list[dict]:
161165
return messages
162166

163167

164-
def make_demo(model, processor):
168+
def make_demo(pipe):
165169
download_example_images()
166170

167171
def run(message: dict, history: list[dict], system_prompt: str = "", max_new_tokens: int = 512) -> Iterator[str]:
@@ -174,28 +178,48 @@ def run(message: dict, history: list[dict], system_prompt: str = "", max_new_tok
174178
messages.append({"role": "system", "content": [{"type": "text", "text": system_prompt}]})
175179
messages.extend(process_history(history))
176180
messages.append({"role": "user", "content": process_new_user_message(message)})
177-
178-
inputs = processor.apply_chat_template(
179-
messages,
180-
add_generation_prompt=True,
181-
tokenize=True,
182-
return_dict=True,
183-
return_tensors="pt",
184-
).to(device=model.device)
185-
186-
streamer = TextIteratorStreamer(processor, skip_prompt=True, skip_special_tokens=True)
187-
generate_kwargs = dict(
188-
inputs,
189-
streamer=streamer,
190-
max_new_tokens=max_new_tokens,
191-
)
192-
t = Thread(target=model.generate, kwargs=generate_kwargs)
193-
t.start()
194-
195-
output = ""
196-
for delta in streamer:
197-
output += delta
198-
yield output
181+
182+
# Extract and convert images from files for OpenVINO GenAI
183+
images = []
184+
if message["files"]:
185+
for file_path in message["files"]:
186+
if not file_path.endswith(".mp4"): # Skip videos
187+
# Convert image file to OpenVINO Tensor format
188+
pic = Image.open(file_path).convert("RGB")
189+
image_data = np.array(pic.getdata()).reshape(1, pic.size[1], pic.size[0], 3).astype(np.byte)
190+
images.append(ov.Tensor(image_data))
191+
192+
# Create a queue to collect streaming output
193+
output_queue = queue.Queue()
194+
stream_complete = Event()
195+
196+
def streamer(subword):
197+
output_queue.put(subword)
198+
return ov_genai.StreamingStatus.RUNNING
199+
200+
def generate_in_thread():
201+
if images:
202+
pipe.generate(message["text"], images=images, max_new_tokens=max_new_tokens, streamer=streamer)
203+
else:
204+
pipe.generate(message["text"], max_new_tokens=max_new_tokens, streamer=streamer)
205+
stream_complete.set()
206+
207+
# Start generation in background thread
208+
Thread(target=generate_in_thread).start()
209+
210+
# Stream results as they come in
211+
buffer = ""
212+
while not stream_complete.is_set() or not output_queue.empty():
213+
try:
214+
# Get next token with timeout
215+
subword = output_queue.get(timeout=0.1)
216+
buffer += subword
217+
yield buffer
218+
except queue.Empty:
219+
continue
220+
221+
# Yield final result
222+
yield buffer
199223

200224
examples = [
201225
[

0 commit comments

Comments
 (0)