Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
33 changes: 15 additions & 18 deletions chains/employee_match/chain.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,28 +10,25 @@
from database.database import database_langchain_get_schema, execute_langchain_query

analyse_requirement_chain = (
RunnablePassthrough.assign()
| REQUEST_ANALYSER_PROMPT
| llm
| StrOutputParser()
RunnablePassthrough.assign() | REQUEST_ANALYSER_PROMPT | llm | StrOutputParser()
)

sql_builder_chain = (
RunnablePassthrough
.assign(employee_data=analyse_requirement_chain)
.assign(schema=database_langchain_get_schema)
| QUERY_BUILDER_PROMPT
| llm
| StrOutputParser()
RunnablePassthrough.assign(employee_data=analyse_requirement_chain).assign(
schema=database_langchain_get_schema
)
| QUERY_BUILDER_PROMPT
| llm
| StrOutputParser()
)

html_builder_chain = (
RunnablePassthrough
.assign(sql_query=sql_builder_chain)
.assign(
sql_query_result=lambda chain_variables: execute_langchain_query(chain_variables['sql_query'], chain_variables),
)
| HTML_RESPONSE_BUILDER_PROMPT
| llm
| StrOutputParser()
RunnablePassthrough.assign(sql_query=sql_builder_chain).assign(
sql_query_result=lambda chain_variables: execute_langchain_query(
chain_variables["sql_query"], chain_variables
),
)
| HTML_RESPONSE_BUILDER_PROMPT
| llm
| StrOutputParser()
)
2 changes: 1 addition & 1 deletion chains/employee_match/prompts.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
2. **Commercial Level**: For example, Junior, Middle, Senior, Lead.
3. **Sales campaign**: Programming languages or software platforms required to perform the employee's duties (e.g., Python, JavaScript, SQL, Hybris, Liferay, Alfresco, etc.).
4. **Other Skills**: Skills that may be beneficial but are not essential (e.g., Docker, Kubernetes, Agile, React, Vue, etc.).
5. **English Level**: Determine the level of English proficiency (e.g., Beginner, Intermediate, Upper IntermediateAdvanced, Fluent).
5. **English Level**: Determine the level of English proficiency (e.g., Beginner, Intermediate, Upper-Intermediate, Advanced, Fluent).

If any of the fields are not specified in the text, do not include them in the output.

Expand Down
30 changes: 24 additions & 6 deletions chains/employee_match/prompts_configuration.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,21 +5,39 @@

REQUEST_ANALYSER_PROMPT = ChatPromptTemplate.from_messages(
[
(LangChainConstants.SYSTEM_PROMPT, skills_extractor_prompt.REQUEST_ANALYSER_SYSTEM_PROMPT),
(LangChainConstants.HUMAN_PROMPT, skills_extractor_prompt.REQUEST_ANALYSER_HUMAN_PROMPT),
(
LangChainConstants.SYSTEM_PROMPT,
skills_extractor_prompt.REQUEST_ANALYSER_SYSTEM_PROMPT,
),
(
LangChainConstants.HUMAN_PROMPT,
skills_extractor_prompt.REQUEST_ANALYSER_HUMAN_PROMPT,
),
]
)

QUERY_BUILDER_PROMPT = ChatPromptTemplate.from_messages(
[
(LangChainConstants.SYSTEM_PROMPT, skills_extractor_prompt.QUERY_BUILDER_SYSTEM_PROMPT),
(LangChainConstants.HUMAN_PROMPT, skills_extractor_prompt.QUERY_BUILDER_HUMAN_PROMPT),
(
LangChainConstants.SYSTEM_PROMPT,
skills_extractor_prompt.QUERY_BUILDER_SYSTEM_PROMPT,
),
(
LangChainConstants.HUMAN_PROMPT,
skills_extractor_prompt.QUERY_BUILDER_HUMAN_PROMPT,
),
]
)

HTML_RESPONSE_BUILDER_PROMPT = ChatPromptTemplate.from_messages(
[
(LangChainConstants.SYSTEM_PROMPT, skills_extractor_prompt.HTML_RESPONSE_BUILDER_SYSTEM_PROMPT),
(LangChainConstants.HUMAN_PROMPT, skills_extractor_prompt.HTML_RESPONSE_BUILDER_HUMAN_PROMPT),
(
LangChainConstants.SYSTEM_PROMPT,
skills_extractor_prompt.HTML_RESPONSE_BUILDER_SYSTEM_PROMPT,
),
(
LangChainConstants.HUMAN_PROMPT,
skills_extractor_prompt.HTML_RESPONSE_BUILDER_HUMAN_PROMPT,
),
]
)
Empty file.
27 changes: 27 additions & 0 deletions chains/employee_match/v2/chains.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,27 @@
from database.database import (
database_langchain_get_schema,
execute_langchain_query,
)
from langchain_core.runnables import RunnablePassthrough
from langchain_core.output_parsers import StrOutputParser
from configuration.llm.llama import llm
from chains.employee_match.v2.prompts_configuration import prompt, prompt_response

sql_chain = (
RunnablePassthrough.assign(schema=database_langchain_get_schema)
| prompt
| llm.bind(stop=["SQL Result:"])
| StrOutputParser()
)

full_chain = (
RunnablePassthrough.assign(query=sql_chain).assign(
schema=database_langchain_get_schema,
response=lambda variables: execute_langchain_query(
variables["query"], variables
),
)
| prompt_response
| llm
| StrOutputParser()
)
75 changes: 75 additions & 0 deletions chains/employee_match/v2/prompts.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,75 @@
from common.enums import EmployeeEnglishLevel, EmployeeLevel


SYSTEM_QUESTION = """
Given an input question, convert it to a SQL query compatible with SQLite.

Decompose the question into SQL query requirements and combine them into a single SQL query by specific columns.

### Query Requirements:
1. **Spelling and Splitting**:
- Correct spelling mistakes in the question.
- Split all words by spaces, commas, underscores, and other common symbols.

2. **Matching and Case Sensitivity**:
- Use the `LIKE` operator to find the best match with the table schema (e.g., "upper intermediate" -> "%upper%intermediate%").
- Ignore case and special characters using the `LOWER()` function (e.g., "Upper Intermediate" -> "upper intermediate").
- Convert all `VARCHAR` columns to lower case using the `LOWER()` function (e.g., `c.level` -> `LOWER(c.level)`).

3. **English Level Formatting**:
- Add underscores between words for English levels, but not at the start or end of the search string (e.g., "upper intermediate" -> "upper_intermediate").

4. **Grouping Criteria**:
- Group criteria with the same column name in brackets using the `OR` operator inside brackets.
- Use the `AND` operator between groups, starting from the `WHERE` clause.
- Example: `WHERE (LOWER(e.english_level) LIKE '%intermediate%' OR LOWER(e.english_level) LIKE '%upper_intermediate%') AND (LOWER(e.level) LIKE '%senior%' OR LOWER(e.level) LIKE '%middle%') AND LOWER(e.sales_campaign) LIKE '%hybris%' AND LOWER(e.sales_campaign) LIKE '%java%'`.

5. **Column Filters**:
- Group every column filter with brackets.

6. **Specific Column Values**:
- English level (`english_level`) can only be one of the values: {english_level}.
- Employee position (`position`) can be converted as follows: "full stack" -> "FS", "back end" -> "BE", "front end" -> "FE".
- Seniority level (`level`) can only be one of the values: {levels}.

7. **Exclusions**:
- Do not use the `name` column in the query.
- Do not use tables other than `employees` in the query.

8. **Optional Criteria**:
- Exclude criteria from the query if they are not presented or cannot be identified:
- Level
- English level
- Position
- Sales campaign
- Other skills
- Employee position

9. **Position Relationships**:
- If the employee position is "BE", also add "FS" and vice versa (e.g., if the employee is "full stack" (FS), also add "back end" (BE) and vice versa).
Return only the SQL query without any explanations. No pre-amble.
""".format(
english_level=[level.value for level in EmployeeEnglishLevel],
levels=[level.value for level in EmployeeLevel],
)


SYSTEM_RESPONSE = """
Given an input question and SQL response, convert list of employees to human-readable format.

Requirements:
- Important: Do not use jinja2 or any other template engine. Use only string formatting.
- If no results are found, return "No results found for requested criteria".
- User header for each employee with the name of the employee and the name of the team.
- Show employee information in human-readable format where each component is from the new line with format <column>: <value>.
- Each column should be on a new line.
- Wrap all response in HTML tags.
- Wrap each employees in a div tag with class "employee".
- Wrap each columns for each employee in table inside employee div
- Do not add columns: last_interview, attendance_link, team_id, user_id, id
- Do not use spaces in the response, only if they are in source data and empty line between employee.
- Sort records by the best match with the question (best match on the top).
- Important: Add empty row between employees!

No preamble.
"""
18 changes: 18 additions & 0 deletions chains/employee_match/v2/prompts_configuration.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
from langchain_core.prompts import ChatPromptTemplate
from chains.employee_match.v2.prompts import SYSTEM_QUESTION, SYSTEM_RESPONSE
from chains.employee_match.v2.templates import question_template, response_template

prompt = ChatPromptTemplate.from_messages(
[
("system", SYSTEM_QUESTION),
("human", question_template),
]
)


prompt_response = ChatPromptTemplate.from_messages(
[
("system", SYSTEM_RESPONSE),
("human", response_template),
]
)
15 changes: 15 additions & 0 deletions chains/employee_match/v2/templates.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
question_template = """
Based on the table schema below, write a SQL query that would answer the user's question:
{schema}

Question: {question}
SQL Query:"""


response_template = """
Based on the table schema below, question, sql query, and sql response, write a natural language response: {schema}.
Convert whole list of records

Question: {question}
SQL Query: {query}
SQL Response: {response}"""
3 changes: 2 additions & 1 deletion common/constants/llm.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,8 @@ class LLMConstants:
MAX_TOKENS (ClassVar[int]): The maximum number of tokens allowed for a single request. A value of -1 indicates no limit.
TEMPERATURE (ClassVar[int]): The temperature setting for the models, controlling randomness in output generation.
"""

MODEL_NAME_LLAMA: ClassVar[str] = "llama3"
MODEL_NAME_GPT: ClassVar[str] = "gpt-3.5-turbo-instruct"
MAX_TOKENS: ClassVar[int] = -1
MAX_TOKENS: ClassVar[int] = 150
TEMPERATURE: ClassVar[int] = 0.1
File renamed without changes.
7 changes: 5 additions & 2 deletions configuration/llm/llama.py
Original file line number Diff line number Diff line change
@@ -1,9 +1,12 @@
import os
from langchain_ollama import ChatOllama

from common.constants.llm import LLMConstants

OLLAMA_HOST = os.getenv("OLLAMA_HOST", "")

llm = ChatOllama(
model=LLMConstants.MODEL_NAME_LLAMA,
temperature=LLMConstants.TEMPERATURE,
max_tokens=LLMConstants.MAX_TOKENS,
)
base_url=OLLAMA_HOST,
)
5 changes: 2 additions & 3 deletions database/database.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,11 +17,10 @@
DATABASE_URL,
sample_rows_in_table_info=DatabaseConstants.SAMPLE_ROWS_IN_TABLE_INFO,
)
database_langchain_table_info = database_langchain.get_table_info()


def database_langchain_get_schema(_):
return database_langchain_table_info
return database_langchain.get_table_info()


def execute_langchain_query(query: str, chain_variables: dict):
Expand All @@ -30,7 +29,7 @@ def execute_langchain_query(query: str, chain_variables: dict):
query_result = database_langchain.run(query, include_columns=True)

print(f"Query result: {query_result}")
if len(query_result) == 0:
if not query_result:
return []

return query_result
Expand Down
Loading