Skip to content

Commit 75559e4

Browse files
committed
[Release] Docs Agent version 0.2.2
What's changed: - Bug fixes related to the new AQA model integration. - Prevent Docs Agent from crashing when OAuth is not set and the AQA model is not used. - Prevent Docs Agent from crashing when the AQA model fails to respond. - Enable the Docs Agent UI to display the URL matadata from an entry used by the AQA model. - Log the `Answerable probability` score when the AQA model is used. - Update `whats-new.md` for January, 2024.
1 parent f114bf5 commit 75559e4

File tree

5 files changed

+133
-17
lines changed

5 files changed

+133
-17
lines changed

demos/palm/python/docs-agent/README.md

Lines changed: 34 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -374,6 +374,38 @@ The following issues have been identified and need to be worked on:
374374

375375
This section provides instructions on how to set up the Docs Agent project on a Linux host machine.
376376

377+
### 0. (Optional) Authorize credentials for Docs Agent
378+
379+
**This step is needed only if you plan to use Gemini's AQA model.** For more information on this
380+
feature, see the
381+
[Using the Semantic Retrieval API and AQA model](#using-the-semantic-retrieval-api-and-aqa-model)
382+
section above.
383+
384+
1. Download the `client_secret.json` file from your Google Cloud Project (GCP) account.
385+
386+
See [Authorize credentials for a desktop application][authorize-credentials]
387+
on the _AI for Developers_ doc site.
388+
389+
2. Copy the `client_secret.json` file to your host machine.
390+
391+
3. To authenticate credentials, run the following command in the directory of
392+
the host machine where the `client_secret.json` file is located:
393+
394+
```
395+
gcloud auth application-default login --client-id-file=client_secret.json --scopes='https://www.googleapis.com/auth/cloud-platform,https://www.googleapis.com/auth/generative-language.retriever'
396+
```
397+
398+
This command opens a browser and asks to log in using your Google account.
399+
400+
**Note**: If the `gcloud` command doesn’t exist, install the Google Cloud SDK
401+
on your host machine: `sudo apt install google-cloud-sdk`
402+
403+
4. Follow the instructions on the browser and click **Allow** to authenticate.
404+
405+
This saves the authenticated credentials for Docs Agent
406+
(`application_default_credentials.json`) in the `$HOME/.config/gcloud/`
407+
directory of your host machine.
408+
377409
### 1. Prerequisites
378410

379411
1. Update the Linux package repositories on the host machine:
@@ -700,6 +732,7 @@ Meggin Kearney (`@Meggin`), and Kyo Lee (`@kyolee415`).
700732
[contribute-to-docs-agent]: #contribute-to-docs-agent
701733
[set-up-docs-agent]: #set-up-docs-agent
702734
[markdown-to-plain-text]: ./scripts/markdown_to_plain_text.py
735+
[files-to-plain-text]: ./scripts/files_to_plain_text.py
703736
[populate-vector-database]: ./scripts/populate_vector_database.py
704737
[context-source-01]: http://eventhorizontelescope.org
705738
[fact-check-section]: #using-a-language-model-to-fact-check-its-own-response
@@ -722,3 +755,4 @@ Meggin Kearney (`@Meggin`), and Kyo Lee (`@kyolee415`).
722755
[aqa-model]: https://ai.google.dev/models/gemini#model_variations
723756
[oauth-quickstart]: https://ai.google.dev/docs/oauth_quickstart
724757
[inline-passages]: https://ai.google.dev/docs/semantic_retriever#more_options_aqa_using_inline_passages
758+
[authorize-credentials]: https://ai.google.dev/docs/oauth_quickstart#authorize-credentials

demos/palm/python/docs-agent/aqa.py

Lines changed: 9 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -86,10 +86,15 @@ def create_a_doc(self, corpus_name, page_title, page_url):
8686
document_resource_name = get_document_response.name
8787
return document_resource_name
8888

89-
def create_a_chunk(self, doc_name, text):
89+
def create_a_chunk(self, doc_name, text, url):
9090
response = ""
9191
try:
92+
# Create a chunk.
9293
chunk = glm.Chunk(data={"string_value": text})
94+
# Add metadata.
95+
chunk.custom_metadata.append(
96+
glm.CustomMetadata(key="url", string_value=url)
97+
)
9398
create_chunk_requests = []
9499
create_chunk_requests.append(
95100
glm.CreateChunkRequest(parent=doc_name, chunk=chunk)
@@ -118,10 +123,10 @@ def create_a_chunk(self, doc_name, text):
118123
else:
119124
text_02 += line + "\n"
120125
i += 1
121-
self.create_a_chunk(doc_name, text_01)
122-
self.create_a_chunk(doc_name, text_02)
126+
self.create_a_chunk(doc_name, text_01, url)
127+
self.create_a_chunk(doc_name, text_02, url)
123128
return response
124129

125130
def create_a_doc_chunk(self, corpus_name, page_title, page_url, text):
126131
doc_name = self.create_a_doc(corpus_name, page_title, page_url)
127-
return self.create_a_chunk(doc_name, text)
132+
return self.create_a_chunk(doc_name, text, page_url)

demos/palm/python/docs-agent/chatbot/chatui.py

Lines changed: 34 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -192,9 +192,16 @@ def ask_model(question):
192192
clickable_urls = markdown.markdown(
193193
query_result.fetch_formatted(Format.CLICKABLE_URL)
194194
)
195-
fact_check_url = markdown.markdown(
196-
query_result.fetch_nearest_formatted(Format.CLICKABLE_URL)
197-
)
195+
fact_check_url = ""
196+
if docs_agent.check_if_aqa_is_used() and docs_agent.get_db_type() == "ONLINE_STORAGE":
197+
aqa_response_url = docs_agent.get_aqa_response_url()
198+
fact_check_url = (
199+
'<a href="' + aqa_response_url + '">' + aqa_response_url + "</a>"
200+
)
201+
else:
202+
fact_check_url = markdown.markdown(
203+
query_result.fetch_nearest_formatted(Format.CLICKABLE_URL)
204+
)
198205

199206
### PREPARE OTHER ELEMENTS NEEDED BY UI.
200207
# - Create a uuid for this request.
@@ -242,6 +249,21 @@ def ask_model(question):
242249
def parse_related_questions_response_to_html_list(response):
243250
soup = BeautifulSoup(response, "html.parser")
244251
for item in soup.find_all("li"):
252+
# In case there are code tags, remove the tag and just replace with
253+
# plain text
254+
if item.find("code"):
255+
text = item.find("code").text
256+
item.code.replace_with(text)
257+
# In case there are <p> tags within the <li> strip <p>
258+
if item.find("p"):
259+
text = item.find("p").text
260+
link = soup.new_tag(
261+
"a",
262+
href=url_for("chatui.question", ask=urllib.parse.quote_plus(text)),
263+
)
264+
link.string = text
265+
item.string = ""
266+
item.append(link)
245267
if item.string is not None:
246268
link = soup.new_tag(
247269
"a",
@@ -264,10 +286,19 @@ def log_question(uid, user_question, response):
264286
print("Question: " + user_question.strip() + "\n")
265287
print("Response:")
266288
print(response.strip() + "\n")
289+
if docs_agent.check_if_aqa_is_used():
290+
aqa_response = docs_agent.get_saved_aqa_response_json()
291+
try:
292+
probability = aqa_response.answerable_probability
293+
except:
294+
probability = 0.0
295+
print("Answerable probability: " + str(probability) + "\n")
267296
with open("chatui_logs.txt", "a", encoding="utf-8") as log_file:
268297
log_file.write("[" + date.strftime(date_format) + "][UID " + str(uid) + "]\n")
269298
log_file.write("# " + user_question.strip() + "\n\n")
270299
log_file.write(response.strip() + "\n\n")
300+
if docs_agent.check_if_aqa_is_used():
301+
log_file.write("Answerable probability: " + str(probability) + "\n\n")
271302
log_file.close()
272303

273304

demos/palm/python/docs-agent/docs/whats-new.md

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,9 @@
11
# What's new in Docs Agent
22

3+
## January 2024
4+
5+
* **Milestone: Docs Agent uses AQA model and Semantric Retrieval API**
6+
37
## December 2023
48

59
* **Milestone: Docs Agent uses Gemini model.**

demos/palm/python/docs-agent/docs_agent.py

Lines changed: 52 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -93,9 +93,12 @@
9393
else:
9494
palm = PaLM(api_key=API_KEY, api_endpoint=PALM_API_ENDPOINT)
9595

96-
embedding_function_gemini_retrieval = embedding_functions.GoogleGenerativeAiEmbeddingFunction(
97-
api_key=API_KEY, model_name="models/embedding-001",
98-
task_type="RETRIEVAL_QUERY")
96+
embedding_function_gemini_retrieval = (
97+
embedding_functions.GoogleGenerativeAiEmbeddingFunction(
98+
api_key=API_KEY, model_name="models/embedding-001", task_type="RETRIEVAL_QUERY"
99+
)
100+
)
101+
99102

100103
class DocsAgent:
101104
"""DocsAgent class"""
@@ -107,8 +110,9 @@ def __init__(self):
107110
)
108111
self.chroma = Chroma(LOCAL_VECTOR_DB_DIR)
109112
self.collection = self.chroma.get_collection(
110-
COLLECTION_NAME, embedding_model=EMBEDDING_MODEL,
111-
embedding_function=embedding_function_gemini_retrieval
113+
COLLECTION_NAME,
114+
embedding_model=EMBEDDING_MODEL,
115+
embedding_function=embedding_function_gemini_retrieval,
112116
)
113117
# Update PaLM's custom prompt strings
114118
self.prompt_condition = CONDITION_TEXT
@@ -121,12 +125,21 @@ def __init__(self):
121125
self.is_aqa_used = IS_AQA_USED
122126
self.db_type = DB_TYPE
123127
# AQA model setup
124-
self.generative_service_client = glm.GenerativeServiceClient()
125-
self.retriever_service_client = glm.RetrieverServiceClient()
126-
self.permission_service_client = glm.PermissionServiceClient()
128+
self.generative_service_client = {}
129+
self.retriever_service_client = {}
130+
self.permission_service_client = {}
127131
self.corpus_display = PRODUCT_NAME + " documentation"
128132
self.corpus_name = "corpora/" + PRODUCT_NAME.lower().replace(" ", "-")
129133
self.aqa_response_buffer = ""
134+
self.set_up_aqa_model_environment()
135+
136+
# Set up the AQA model environment
137+
def set_up_aqa_model_environment(self):
138+
if IS_AQA_USED == "YES":
139+
self.generative_service_client = glm.GenerativeServiceClient()
140+
self.retriever_service_client = glm.RetrieverServiceClient()
141+
self.permission_service_client = glm.PermissionServiceClient()
142+
return
130143

131144
# Use this method for talking to a PaLM text model
132145
def ask_text_model_with_context(self, context, question):
@@ -203,7 +216,11 @@ def ask_aqa_model_using_local_vector_store(self, question):
203216
elif LOG_LEVEL == "DEBUG":
204217
self.print_the_prompt(verbose_prompt)
205218
print(aqa_response)
206-
return aqa_response.answer.content.parts[0].text
219+
try:
220+
return aqa_response.answer.content.parts[0].text
221+
except:
222+
self.aqa_response_buffer = ""
223+
return self.model_error_message
207224

208225
# Use this method for talking to Gemini's AQA model using a corpus
209226
def ask_aqa_model_using_corpora(self, question):
@@ -243,7 +260,11 @@ def ask_aqa_model_using_corpora(self, question):
243260
self.print_the_prompt(verbose_prompt)
244261
elif LOG_LEVEL == "DEBUG":
245262
print(aqa_response)
246-
return aqa_response.answer.content.parts[0].text
263+
try:
264+
return aqa_response.answer.content.parts[0].text
265+
except:
266+
self.aqa_response_buffer = ""
267+
return self.model_error_message
247268

248269
def ask_aqa_model(self, question):
249270
response = ""
@@ -323,6 +344,27 @@ def check_if_aqa_is_used(self):
323344
def get_saved_aqa_response_json(self):
324345
return self.aqa_response_buffer
325346

347+
# Retrieve the URL metadata from the AQA model's response
348+
def get_aqa_response_url(self):
349+
url = ""
350+
try:
351+
# Get the metadata from the first attributed passages for the source
352+
chunk_resource_name = (
353+
self.aqa_response_buffer.answer.grounding_attributions[
354+
0
355+
].source_id.semantic_retriever_chunk.chunk
356+
)
357+
get_chunk_response = self.retriever_service_client.get_chunk(
358+
name=chunk_resource_name
359+
)
360+
metadata = get_chunk_response.custom_metadata
361+
for m in metadata:
362+
if m.key == "url":
363+
url = m.string_value
364+
except:
365+
url = "URL unknown"
366+
return url
367+
326368
# Print the prompt on the terminal for debugging
327369
def print_the_prompt(self, prompt):
328370
print("#########################################")

0 commit comments

Comments
 (0)