-
Notifications
You must be signed in to change notification settings - Fork 3
/
Copy pathmain.py
143 lines (117 loc) · 3.67 KB
/
main.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
import os
import google.generativeai as genai
from langchain_core.messages import HumanMessage
from langchain_google_genai import ChatGoogleGenerativeAI
import streamlit as st
from st_multimodal_chatinput import multimodal_chatinput
import streamlit as st
st.title("Multimodal Chat w/ Gemini")
st.image("gemini.jpeg")
# Set Google API key
GOOGLE_API_KEY=os.environ['GOOGLE_API_KEY']
genai.configure(api_key=GOOGLE_API_KEY)
# make model
model = genai.GenerativeModel('gemini-pro')
vision_model = ChatGoogleGenerativeAI(model="gemini-pro-vision")
# having trouble with chat input box, always in the middle of chat https://github.com/het-25/st-multimodal-chatinput
def reconfig_chatinput():
st.markdown(
"""
<style>
{
width: 100%; /* Span the full width of the viewport */;
background-color: #FFC0CB;
}
</style>
""",
unsafe_allow_html=True,
)
return
reconfig_chatinput()
# seed msg, init chat history
if "messages" not in st.session_state:
st.session_state.messages = [
{
"role":"assistant",
"content":"Ask Gemini anything || send pic w/ a prompt!"
}
]
# display chat msgs from history upon rerun
for msg in st.session_state.messages:
with st.chat_message(msg["role"]):
st.markdown(msg["content"])
# process, store query + resp
def call_llm_just_text(q):
response = model.generate_content(q)
# display assistant msg
with st.chat_message("assistant", avatar="https://encrypted-tbn0.gstatic.com/images?q=tbn:ANd9GcRGVfLdUg7kVxuSqqBGgAL3UJeQgRCLPhxIZlXbVxmUAdYaJm-fcUal7x-FHhwxzpeg6_M&usqp=CAU"):
st.markdown(response.text)
# store user msg
st.session_state.messages.append(
{
"role":"user",
"content": q
}
)
# store assistant msg
st.session_state.messages.append(
{
"role":"assistant",
"content": response.text
}
)
# process, store query + resp
def call_llm_with_img(q):
with st.spinner('Processing📈...'):
uploaded_images = q["images"] # list of base64 encodings of uploaded imgs
txt_inp = q["text"] # submitted text
msg = HumanMessage(
content=[
{
"type": "text",
"text": txt_inp,
},
{
"type": "image_url",
"image_url": uploaded_images[0]
},
]
)
resp = vision_model.invoke([msg])
# display assistant msg
with st.chat_message("assistant", avatar="https://encrypted-tbn0.gstatic.com/images?q=tbn:ANd9GcRGVfLdUg7kVxuSqqBGgAL3UJeQgRCLPhxIZlXbVxmUAdYaJm-fcUal7x-FHhwxzpeg6_M&usqp=CAU"):
st.markdown(resp.content)
# store user msg
st.session_state.messages.append(
{
"role":"user",
"content": txt_inp
}
)
# store assistant msg
st.session_state.messages.append(
{
"role":"assistant",
"content": resp.content
}
)
user_inp = multimodal_chatinput() # multimodal streamlit chat input
# call llm funcs when input is given
if user_inp:
# display user msg
with st.chat_message("user", avatar="https://d112y698adiu2z.cloudfront.net/photos/production/user_photos/000/517/300/datas/profile.png"):
st.markdown(user_inp["text"])
# check if just text
if len(user_inp["images"]) == 0:
call_llm_just_text(user_inp["text"])
# user_inp includes image
else:
call_llm_with_img(user_inp)
st.markdown(
"""
<div>
<p style="text-align: center;font-family:Arial; color:Pink; font-size: 12px;display: table-cell; vertical-align: bottom">Made with ❤️ in SF. ✅ out <a href="https://replit.com/@LizzieSiegle/gemini-multimodal-chat">the repl!</a></p>
</div>
""",
unsafe_allow_html=True, # HTML tags found in body escaped -> treated as pure text
)