Skip to content
Open
Show file tree
Hide file tree
Changes from 3 commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions .github/workflows/tests.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,8 @@ env:
POSTGRES_USER: postgres-test-user
POSTGRES_DB: postgres-test-db
REDIS_HOST: redis://redis:6379
TOGGLE_VOICE: custom

jobs:
container-job:
runs-on: ubuntu-20.04
Expand Down Expand Up @@ -55,6 +57,7 @@ jobs:
env:
PROMETHEUS_MULTIPROC_DIR: /tmp
REDIS_HOST: ${{ env.REDIS_HOST }}
TOGGLE_VOICE: ${{ env.TOGGLE_VOICE }}
run: |
cd core_backend
export POSTGRES_HOST=postgres POSTGRES_USER=$POSTGRES_USER \
Expand Down
1 change: 1 addition & 0 deletions Makefile
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
#!make
SHELL := /bin/bash

PROJECT_NAME = aaq
CONDA_ACTIVATE=source $$(conda info --base)/etc/profile.d/conda.sh ; conda activate ; conda activate
Expand Down
2 changes: 1 addition & 1 deletion core_backend/Makefile
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
#!make
SHELL := /bin/bash

.PHONY : tests

Expand Down Expand Up @@ -49,4 +50,3 @@ teardown-redis-test:
teardown-test-db:
@docker stop testdb
@docker rm testdb

1 change: 1 addition & 0 deletions core_backend/app/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -83,6 +83,7 @@
BACKEND_ROOT_PATH = os.environ.get("BACKEND_ROOT_PATH", "")

# Speech API
TOGGLE_VOICE = os.environ.get("TOGGLE_VOICE", None)
CUSTOM_SPEECH_ENDPOINT = os.environ.get("CUSTOM_SPEECH_ENDPOINT", None)
# Logging
LANGFUSE = os.environ.get("LANGFUSE", "False")
Expand Down
230 changes: 119 additions & 111 deletions core_backend/app/question_answer/routers.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,12 @@
from sqlalchemy.ext.asyncio import AsyncSession

from ..auth.dependencies import authenticate_key, rate_limiter
from ..config import CUSTOM_SPEECH_ENDPOINT, GCS_SPEECH_BUCKET, USE_CROSS_ENCODER
from ..config import (
CUSTOM_SPEECH_ENDPOINT,
GCS_SPEECH_BUCKET,
TOGGLE_VOICE,
USE_CROSS_ENCODER,
)
from ..contents.models import (
get_similar_content_async,
increment_query_count,
Expand Down Expand Up @@ -157,134 +162,137 @@ async def search(
)


@router.post(
"/voice-search",
response_model=QueryAudioResponse,
responses={
status.HTTP_400_BAD_REQUEST: {
"model": QueryResponseError,
"description": "Bad Request",
},
status.HTTP_500_INTERNAL_SERVER_ERROR: {
"model": QueryResponseError,
"description": "Internal Server Error",
if TOGGLE_VOICE is not None:

@router.post(
"/voice-search",
response_model=QueryAudioResponse,
responses={
status.HTTP_400_BAD_REQUEST: {
"model": QueryResponseError,
"description": "Bad Request",
},
status.HTTP_500_INTERNAL_SERVER_ERROR: {
"model": QueryResponseError,
"description": "Internal Server Error",
},
},
},
)
async def voice_search(
file_url: str,
request: Request,
asession: AsyncSession = Depends(get_async_session),
user_db: UserDB = Depends(authenticate_key),
) -> QueryAudioResponse | JSONResponse:
"""
Endpoint to transcribe audio from a provided URL,
generate an LLM response, by default generate_tts is
set to true and return a public random URL of an audio
file containing the spoken version of the generated response.
"""
try:
file_stream, content_type, file_extension = await download_file_from_url(
file_url
)
)
async def voice_search(
file_url: str,
request: Request,
asession: AsyncSession = Depends(get_async_session),
user_db: UserDB = Depends(authenticate_key),
) -> QueryAudioResponse | JSONResponse:
"""
Endpoint to transcribe audio from a provided URL,
generate an LLM response, by default generate_tts is
set to true and return a public random URL of an audio
file containing the spoken version of the generated response.
"""
try:
file_stream, content_type, file_extension = await download_file_from_url(
file_url
)

unique_filename = generate_random_filename(file_extension)
destination_blob_name = f"stt-voice-notes/{unique_filename}"
unique_filename = generate_random_filename(file_extension)
destination_blob_name = f"stt-voice-notes/{unique_filename}"

await upload_file_to_gcs(
GCS_SPEECH_BUCKET, file_stream, destination_blob_name, content_type
)
await upload_file_to_gcs(
GCS_SPEECH_BUCKET, file_stream, destination_blob_name, content_type
)

file_path = f"temp/{unique_filename}"
with open(file_path, "wb") as f:
file_path = f"temp/{unique_filename}"
with open(file_path, "wb") as f:
file_stream.seek(0)
f.write(file_stream.read())
file_stream.seek(0)
f.write(file_stream.read())
file_stream.seek(0)

if CUSTOM_SPEECH_ENDPOINT is not None:
transcription = await post_to_speech(file_path, CUSTOM_SPEECH_ENDPOINT)
transcription_result = transcription["text"]
else:
transcription_result = await transcribe_audio(file_path)

user_query = QueryBase(
generate_llm_response=True,
query_text=transcription_result,
query_metadata={},
)

(
user_query_db,
user_query_refined_template,
response_template,
) = await get_user_query_and_response(
user_id=user_db.user_id,
user_query=user_query,
asession=asession,
generate_tts=True,
)
if CUSTOM_SPEECH_ENDPOINT is not None:
transcription = await post_to_speech(file_path, CUSTOM_SPEECH_ENDPOINT)
transcription_result = transcription["text"]
else:
transcription_result = await transcribe_audio(file_path)

response = await get_search_response(
query_refined=user_query_refined_template,
response=response_template,
user_id=user_db.user_id,
n_similar=int(N_TOP_CONTENT),
n_to_crossencoder=int(N_TOP_CONTENT_TO_CROSSENCODER),
asession=asession,
exclude_archived=True,
request=request,
)
user_query = QueryBase(
generate_llm_response=True,
query_text=transcription_result,
query_metadata={},
)

(
user_query_db,
user_query_refined_template,
response_template,
) = await get_user_query_and_response(
user_id=user_db.user_id,
user_query=user_query,
asession=asession,
generate_tts=True,
)

if user_query.generate_llm_response:
response = await get_generation_response(
response = await get_search_response(
query_refined=user_query_refined_template,
response=response,
response=response_template,
user_id=user_db.user_id,
n_similar=int(N_TOP_CONTENT),
n_to_crossencoder=int(N_TOP_CONTENT_TO_CROSSENCODER),
asession=asession,
exclude_archived=True,
request=request,
)

await save_query_response_to_db(user_query_db, response, asession)
await increment_query_count(
user_id=user_db.user_id,
contents=response.search_results,
asession=asession,
)
await save_content_for_query_to_db(
user_id=user_db.user_id,
query_id=response.query_id,
session_id=user_query.session_id,
contents=response.search_results,
asession=asession,
)
if user_query.generate_llm_response:
response = await get_generation_response(
query_refined=user_query_refined_template,
response=response,
)

await save_query_response_to_db(user_query_db, response, asession)
await increment_query_count(
user_id=user_db.user_id,
contents=response.search_results,
asession=asession,
)
await save_content_for_query_to_db(
user_id=user_db.user_id,
query_id=response.query_id,
session_id=user_query.session_id,
contents=response.search_results,
asession=asession,
)

if os.path.exists(file_path):
os.remove(file_path)
file_stream.close()

if os.path.exists(file_path):
os.remove(file_path)
file_stream.close()
if type(response) is QueryAudioResponse:
return response

if type(response) is QueryAudioResponse:
return response
if type(response) is QueryResponseError:
return JSONResponse(
status_code=status.HTTP_400_BAD_REQUEST,
content=response.model_dump(),
)

if type(response) is QueryResponseError:
return JSONResponse(
status_code=status.HTTP_400_BAD_REQUEST, content=response.model_dump()
status_code=status.HTTP_500_INTERNAL_SERVER_ERROR,
content={"error": "Internal server error"},
)

return JSONResponse(
status_code=status.HTTP_500_INTERNAL_SERVER_ERROR,
content={"error": "Internal server error"},
)

except ValueError as ve:
logger.error(f"ValueError: {str(ve)}")
return JSONResponse(
status_code=status.HTTP_500_INTERNAL_SERVER_ERROR,
content={"error": f"Value error: {str(ve)}"},
)
except ValueError as ve:
logger.error(f"ValueError: {str(ve)}")
return JSONResponse(
status_code=status.HTTP_500_INTERNAL_SERVER_ERROR,
content={"error": f"Value error: {str(ve)}"},
)

except Exception as e:
logger.error(f"Unexpected error: {str(e)}")
return JSONResponse(
status_code=status.HTTP_500_INTERNAL_SERVER_ERROR,
content={"error": "Internal server error"},
)
except Exception as e:
logger.error(f"Unexpected error: {str(e)}")
return JSONResponse(
status_code=status.HTTP_500_INTERNAL_SERVER_ERROR,
content={"error": "Internal server error"},
)


async def download_file_from_url(file_url: str) -> tuple[BytesIO, str, str]:
Expand Down
1 change: 1 addition & 0 deletions core_backend/tests/api/test.env
Original file line number Diff line number Diff line change
Expand Up @@ -12,3 +12,4 @@ ALIGN_SCORE_API="http://localhost:5002/alignscore_base"
# Speech Api endpoint
# if u want to try the tests for the external TTS and STT apis then comment this out
CUSTOM_SPEECH_ENDPOINT="http://localhost:8001/transcribe"
TOGGLE_VOICE=custom
5 changes: 5 additions & 0 deletions deployment/docker-compose/template.core_backend.env
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,11 @@ LITELLM_ENDPOINT="http://localhost:4000"
#PGVECTOR_VECTOR_SIZE=1024

#### Speech APIs ###############################################################
# Set this variable to 'external' or 'custom' accordingly to toggle the /voice-search endpoint
# By default it is not set so it defaults to None
# TOGGLE_VOICE=external
Copy link
Collaborator

@amiraliemami amiraliemami Sep 17, 2024

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

change to boolean ENABLE_VOICE_SEARCH for clarity? We only use it as a boolean check anyway. I'd suggest changing the description to:

# This variable controls whether the voice search endpoint is active (set to true) or inactive (set to false). Default is false.

# If enabled, we default to using external services unless `CUSTOM_SPEECH_ENDPOINT` is set, in which case the custom hosted APIs will be used.


# if TOGGLE_VOICE is set to 'Custom' then make sure to also set the Environment variables mentioned below
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

What is TOGGLE_VOICE, I don't see anywhere else. Probably that's what turned into ENABLE_VOICE_SEARCH

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

You should probably change that in the docs. I saw it in docs/components/voice-service/inded.md

# CUSTOM_SPEECH_ENDPOINT=http://speech_service:8001/transcribe

#### Temporary folder for prometheus gunicorn multiprocess ####################
Expand Down