akarev0 · akarev0 · Aug 28, 2024 · Aug 28, 2024
diff --git a/chains/employee_match/chain.py b/chains/employee_match/chain.py
@@ -10,28 +10,25 @@
 from database.database import database_langchain_get_schema, execute_langchain_query
 
 analyse_requirement_chain = (
-        RunnablePassthrough.assign()
-        | REQUEST_ANALYSER_PROMPT
-        | llm
-        | StrOutputParser()
+    RunnablePassthrough.assign() | REQUEST_ANALYSER_PROMPT | llm | StrOutputParser()
 )
 
 sql_builder_chain = (
-        RunnablePassthrough
-        .assign(employee_data=analyse_requirement_chain)
-        .assign(schema=database_langchain_get_schema)
-        | QUERY_BUILDER_PROMPT
-        | llm
-        | StrOutputParser()
+    RunnablePassthrough.assign(employee_data=analyse_requirement_chain).assign(
+        schema=database_langchain_get_schema
+    )
+    | QUERY_BUILDER_PROMPT
+    | llm
+    | StrOutputParser()
 )
 
 html_builder_chain = (
-        RunnablePassthrough
-        .assign(sql_query=sql_builder_chain)
-        .assign(
-            sql_query_result=lambda chain_variables: execute_langchain_query(chain_variables['sql_query'], chain_variables),
-        )
-        | HTML_RESPONSE_BUILDER_PROMPT
-        | llm
-        | StrOutputParser()
+    RunnablePassthrough.assign(sql_query=sql_builder_chain).assign(
+        sql_query_result=lambda chain_variables: execute_langchain_query(
+            chain_variables["sql_query"], chain_variables
+        ),
+    )
+    | HTML_RESPONSE_BUILDER_PROMPT
+    | llm
+    | StrOutputParser()
 )
diff --git a/chains/employee_match/prompts.py b/chains/employee_match/prompts.py
@@ -5,7 +5,7 @@
 2. **Commercial Level**: For example, Junior, Middle, Senior, Lead.
 3. **Sales campaign**: Programming languages or software platforms required to perform the employee's duties (e.g., Python, JavaScript, SQL, Hybris, Liferay, Alfresco, etc.).
 4. **Other Skills**: Skills that may be beneficial but are not essential (e.g., Docker, Kubernetes, Agile, React, Vue, etc.).
-5. **English Level**: Determine the level of English proficiency (e.g., Beginner, Intermediate, Upper IntermediateAdvanced, Fluent).
+5. **English Level**: Determine the level of English proficiency (e.g., Beginner, Intermediate, Upper-Intermediate, Advanced, Fluent).
 
 If any of the fields are not specified in the text, do not include them in the output.
 

diff --git a/chains/employee_match/prompts_configuration.py b/chains/employee_match/prompts_configuration.py
@@ -5,21 +5,39 @@
 
 REQUEST_ANALYSER_PROMPT = ChatPromptTemplate.from_messages(
     [
-        (LangChainConstants.SYSTEM_PROMPT, skills_extractor_prompt.REQUEST_ANALYSER_SYSTEM_PROMPT),
-        (LangChainConstants.HUMAN_PROMPT, skills_extractor_prompt.REQUEST_ANALYSER_HUMAN_PROMPT),
+        (
+            LangChainConstants.SYSTEM_PROMPT,
+            skills_extractor_prompt.REQUEST_ANALYSER_SYSTEM_PROMPT,
+        ),
+        (
+            LangChainConstants.HUMAN_PROMPT,
+            skills_extractor_prompt.REQUEST_ANALYSER_HUMAN_PROMPT,
+        ),
     ]
 )
 
 QUERY_BUILDER_PROMPT = ChatPromptTemplate.from_messages(
     [
-        (LangChainConstants.SYSTEM_PROMPT, skills_extractor_prompt.QUERY_BUILDER_SYSTEM_PROMPT),
-        (LangChainConstants.HUMAN_PROMPT, skills_extractor_prompt.QUERY_BUILDER_HUMAN_PROMPT),
+        (
+            LangChainConstants.SYSTEM_PROMPT,
+            skills_extractor_prompt.QUERY_BUILDER_SYSTEM_PROMPT,
+        ),
+        (
+            LangChainConstants.HUMAN_PROMPT,
+            skills_extractor_prompt.QUERY_BUILDER_HUMAN_PROMPT,
+        ),
     ]
 )
 
 HTML_RESPONSE_BUILDER_PROMPT = ChatPromptTemplate.from_messages(
     [
-        (LangChainConstants.SYSTEM_PROMPT, skills_extractor_prompt.HTML_RESPONSE_BUILDER_SYSTEM_PROMPT),
-        (LangChainConstants.HUMAN_PROMPT, skills_extractor_prompt.HTML_RESPONSE_BUILDER_HUMAN_PROMPT),
+        (
+            LangChainConstants.SYSTEM_PROMPT,
+            skills_extractor_prompt.HTML_RESPONSE_BUILDER_SYSTEM_PROMPT,
+        ),
+        (
+            LangChainConstants.HUMAN_PROMPT,
+            skills_extractor_prompt.HTML_RESPONSE_BUILDER_HUMAN_PROMPT,
+        ),
     ]
 )
diff --git a/chains/employee_match/v2/__init__.py b/chains/employee_match/v2/__init__.py
diff --git a/chains/employee_match/v2/chains.py b/chains/employee_match/v2/chains.py
@@ -0,0 +1,27 @@
+from database.database import (
+    database_langchain_get_schema,
+    execute_langchain_query,
+)
+from langchain_core.runnables import RunnablePassthrough
+from langchain_core.output_parsers import StrOutputParser
+from configuration.llm.llama import llm
+from chains.employee_match.v2.prompts_configuration import prompt, prompt_response
+
+sql_chain = (
+    RunnablePassthrough.assign(schema=database_langchain_get_schema)
+    | prompt
+    | llm.bind(stop=["SQL Result:"])
+    | StrOutputParser()
+)
+
+full_chain = (
+    RunnablePassthrough.assign(query=sql_chain).assign(
+        schema=database_langchain_get_schema,
+        response=lambda variables: execute_langchain_query(
+            variables["query"], variables
+        ),
+    )
+    | prompt_response
+    | llm
+    | StrOutputParser()
+)
diff --git a/chains/employee_match/v2/prompts.py b/chains/employee_match/v2/prompts.py
@@ -0,0 +1,75 @@
+from common.enums import EmployeeEnglishLevel, EmployeeLevel
+
+
+SYSTEM_QUESTION = """
+Given an input question, convert it to a SQL query compatible with SQLite.
+
+Decompose the question into SQL query requirements and combine them into a single SQL query by specific columns.
+
+### Query Requirements:
+1. **Spelling and Splitting**:
+   - Correct spelling mistakes in the question.
+   - Split all words by spaces, commas, underscores, and other common symbols.
+
+2. **Matching and Case Sensitivity**:
+   - Use the `LIKE` operator to find the best match with the table schema (e.g., "upper intermediate" -> "%upper%intermediate%").
+   - Ignore case and special characters using the `LOWER()` function (e.g., "Upper Intermediate" -> "upper intermediate").
+   - Convert all `VARCHAR` columns to lower case using the `LOWER()` function (e.g., `c.level` -> `LOWER(c.level)`).
+
+3. **English Level Formatting**:
+   - Add underscores between words for English levels, but not at the start or end of the search string (e.g., "upper intermediate" -> "upper_intermediate").
+
+4. **Grouping Criteria**:
+   - Group criteria with the same column name in brackets using the `OR` operator inside brackets.
+   - Use the `AND` operator between groups, starting from the `WHERE` clause.
+   - Example: `WHERE (LOWER(e.english_level) LIKE '%intermediate%' OR LOWER(e.english_level) LIKE '%upper_intermediate%') AND (LOWER(e.level) LIKE '%senior%' OR LOWER(e.level) LIKE '%middle%') AND LOWER(e.sales_campaign) LIKE '%hybris%' AND LOWER(e.sales_campaign) LIKE '%java%'`.
+
+5. **Column Filters**:
+   - Group every column filter with brackets.
+
+6. **Specific Column Values**:
+   - English level (`english_level`) can only be one of the values: {english_level}.
+   - Employee position (`position`) can be converted as follows: "full stack" -> "FS", "back end" -> "BE", "front end" -> "FE".
+   - Seniority level (`level`) can only be one of the values: {levels}.
+
+7. **Exclusions**:
+   - Do not use the `name` column in the query.
+   - Do not use tables other than `employees` in the query.
+
+8. **Optional Criteria**:
+   - Exclude criteria from the query if they are not presented or cannot be identified:
+     - Level
+     - English level
+     - Position
+     - Sales campaign
+     - Other skills
+     - Employee position
+
+9. **Position Relationships**:
+   - If the employee position is "BE", also add "FS" and vice versa (e.g., if the employee is "full stack" (FS), also add "back end" (BE) and vice versa).
+Return only the SQL query without any explanations. No pre-amble.
+""".format(
+    english_level=[level.value for level in EmployeeEnglishLevel],
+    levels=[level.value for level in EmployeeLevel],
+)
+
+
+SYSTEM_RESPONSE = """
+Given an input question and SQL response, convert list of employees to human-readable format.
+
+Requirements:            
+- Important: Do not use jinja2 or any other template engine. Use only string formatting.
+- If no results are found, return "No results found for requested criteria".
+- User header for each employee with the name of the employee and the name of the team.
+- Show employee information in human-readable format where each component is from the new line with format <column>: <value>.
+- Each column should be on a new line.
+- Wrap all response in HTML tags.
+- Wrap each employees in a div tag with class "employee".
+- Wrap each columns for each employee in table inside employee div
+- Do not add columns: last_interview, attendance_link, team_id, user_id, id
+- Do not use spaces in the response, only if they are in source data and empty line between employee.
+- Sort records by the best match with the question (best match on the top).
+- Important: Add empty row between employees!
+
+No preamble.
+"""
diff --git a/chains/employee_match/v2/prompts_configuration.py b/chains/employee_match/v2/prompts_configuration.py
@@ -0,0 +1,18 @@
+from langchain_core.prompts import ChatPromptTemplate
+from chains.employee_match.v2.prompts import SYSTEM_QUESTION, SYSTEM_RESPONSE
+from chains.employee_match.v2.templates import question_template, response_template
+
+prompt = ChatPromptTemplate.from_messages(
+    [
+        ("system", SYSTEM_QUESTION),
+        ("human", question_template),
+    ]
+)
+
+
+prompt_response = ChatPromptTemplate.from_messages(
+    [
+        ("system", SYSTEM_RESPONSE),
+        ("human", response_template),
+    ]
+)
diff --git a/chains/employee_match/v2/templates.py b/chains/employee_match/v2/templates.py
@@ -0,0 +1,15 @@
+question_template = """
+Based on the table schema below, write a SQL query that would answer the user's question:
+{schema}
+
+Question: {question}
+SQL Query:"""
+
+
+response_template = """
+Based on the table schema below, question, sql query, and sql response, write a natural language response: {schema}.
+Convert whole list of records
+
+Question: {question}
+SQL Query: {query}
+SQL Response: {response}"""
diff --git a/common/constants/llm.py b/common/constants/llm.py
@@ -11,7 +11,8 @@ class LLMConstants:
         MAX_TOKENS (ClassVar[int]): The maximum number of tokens allowed for a single request. A value of -1 indicates no limit.
         TEMPERATURE (ClassVar[int]): The temperature setting for the models, controlling randomness in output generation.
     """
+
     MODEL_NAME_LLAMA: ClassVar[str] = "llama3"
     MODEL_NAME_GPT: ClassVar[str] = "gpt-3.5-turbo-instruct"
-    MAX_TOKENS: ClassVar[int] = -1
+    MAX_TOKENS: ClassVar[int] = 150
     TEMPERATURE: ClassVar[int] = 0.1
diff --git a/enums.py → common/enums.py b/enums.py → common/enums.py
diff --git a/configuration/llm/llama.py b/configuration/llm/llama.py
@@ -1,9 +1,12 @@
+import os
 from langchain_ollama import ChatOllama
 
 from common.constants.llm import LLMConstants
 
+OLLAMA_HOST = os.getenv("OLLAMA_HOST", "")
+
 llm = ChatOllama(
     model=LLMConstants.MODEL_NAME_LLAMA,
     temperature=LLMConstants.TEMPERATURE,
-    max_tokens=LLMConstants.MAX_TOKENS,
-)
+    base_url=OLLAMA_HOST,
+)
diff --git a/database/database.py b/database/database.py
@@ -17,11 +17,10 @@
     DATABASE_URL,
     sample_rows_in_table_info=DatabaseConstants.SAMPLE_ROWS_IN_TABLE_INFO,
 )
-database_langchain_table_info = database_langchain.get_table_info()
 
 
 def database_langchain_get_schema(_):
-    return database_langchain_table_info
+    return database_langchain.get_table_info()
 
 
 def execute_langchain_query(query: str, chain_variables: dict):
@@ -30,7 +29,7 @@ def execute_langchain_query(query: str, chain_variables: dict):
     query_result = database_langchain.run(query, include_columns=True)
 
     print(f"Query result: {query_result}")
-    if len(query_result) == 0:
+    if not query_result:
         return []
 
     return query_result