-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathLangchain_fastapi.py
162 lines (126 loc) · 5.87 KB
/
Langchain_fastapi.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
from langchain.chat_models import ChatOpenAI
from langchain.prompts import ChatPromptTemplate
from langchain.schema.output_parser import StrOutputParser
from langchain.schema.runnable import RunnablePassthrough, RunnableLambda
from langchain_community.utilities import DuckDuckGoSearchAPIWrapper
from Bio import Entrez
from datetime import datetime
from fastapi import FastAPI
from langserve import add_routes
import os
import re
import streamlit as st
os.environ["OPENAI_API_KEY"] = st.secrets["OPENAI_API_KEY"]
Template2 = """
{text}
------------------
using the text above, answer the following question in short and no more than 500 words.
Question: {question}
-------------------------
if the question can not be answered, dont make up answers and just write that the answer to your question is not withing the document.
"""
prompt = ChatPromptTemplate.from_template(template=Template2)
def get_entrez(keywords: list):
# Set your email for PubMed requests (required)
Entrez.email = "your_email"
# Keywords list to search in PubMed
keywords_list = keywords
# Combine keywords with OR operator for PubMed query. you can also use AND
keywords_query = ' AND '.join(keywords_list)
# Get today's date in for the text file's name (YYYY/MM/DD)
today_date = datetime.today().strftime('%Y-%m-%d')
# All you need to search is the keywords query
search_query = f'({keywords_query})'
search_results = Entrez.read(
Entrez.esearch(db="pubmed", term=search_query, retmax=10, datetype="pdat", reldate=30, usehistory="y"))
webenv = search_results['WebEnv']
query_key = search_results['QueryKey']
id_list = search_results['IdList']
all_summaries = []
# Step 2: EFetch to retrieve titles based on the UIDs
for i in id_list:
fetch_handle = Entrez.efetch(db="pubmed", id=i, rettype="abstract", retmode="text", webenv=webenv,
query_key=query_key)
fetch_content = fetch_handle.read()
all_summaries.append(fetch_content) # Store title along with summary
output = ''.join(all_summaries)
return output
scrape_and_summarize = RunnablePassthrough.assign(
text=lambda x: get_entrez(x['keywords'])[:30000]
) | prompt | ChatOpenAI(model='gpt-4-1106-preview', temperature=1) | StrOutputParser()
SEARCH_PROMPT = ChatPromptTemplate.from_messages(
[
(
"user",
"Write 3 Pubmed search queries to search online that form an "
"objective medical opinion from the following: {question}\n"
"You must respond with a list of strings in the following format: "
'["query 1", "query 2", "query 3"].',
),
]
)
KEY_PROMPT = ChatPromptTemplate.from_messages(
[
(
"user",
"Write 3 Pubmed search queries to search online that form an "
"objective medical opinion from the following:{question}\n"
"then extract important keyword combination from the each question"
"You must respond with a list of strings in the following format: "
'["keyword_combination 1", "keyword_combination 2", "keyword_combination 3"].',
),
]
)
def parse_keywords(output):
# Initialize an empty set to store unique keywords
unique_keywords = set()
# Split the output into lines
lines = output['question'].strip().split('\n')
# Iterate through each line
for line in lines:
# Extract keywords using regular expression
keywords = re.findall(r'"([^"]*)"', line)
# Add keywords to the set
unique_keywords.update(keywords)
# Convert the set to a list and return
return list(unique_keywords)
key_search = KEY_PROMPT | ChatOpenAI(model='gpt-4-1106-preview', temperature=1) | {
'question': StrOutputParser()} | RunnablePassthrough.assign(
text=lambda x: get_entrez(parse_keywords(x))[:40000]) | prompt |ChatOpenAI(model='gpt-4-1106-preview', temperature=1) | StrOutputParser()
WRITER_SYSTEM_PROMPT = "You are an AI critical thinker research assistant. Your sole purpose is to write well written, critically acclaimed, objective and structured reports on given text." # noqa: E501
RESEARCH_REPORT_TEMPLATE = """Information:
--------
{research_summary}
--------
Using the above information, answer the following question or topic: "{question}" in a detailed report -- \
The report should focus on the answer to the question, should be well structured, informative, \
in depth, with facts and numbers if available and a minimum of 1,200 words.
You should strive to write the report as long as you can using all relevant and necessary information provided.
You must write the report with markdown syntax.
You MUST determine your own concrete and valid opinion based on the given information. Do NOT deter to general and meaningless conclusions.
Write all used source urls at the end of the report, and make sure to not add duplicated sources, but only one reference for each.
You must write the report in apa format.
Please do your best, this is very important to my career.""" # noqa: E501
prompt = ChatPromptTemplate.from_messages(
[
("system", WRITER_SYSTEM_PROMPT),
("user", RESEARCH_REPORT_TEMPLATE),
]
)
def collapse_list_of_lists(list_of_lists):
content = []
for l in list_of_lists:
content.append("\n\n".join(l))
return "\n\n".join(content)
chain = RunnablePassthrough.assign(
research_summary=key_search | collapse_list_of_lists
) | prompt | ChatOpenAI(model='gpt-4-1106-preview', temperature=1) | StrOutputParser()
system_message = st.text_area("Enter your question")
analyze_button = st.button("Answer")
if analyze_button:
answer= chain.invoke(
{
"question":system_message
}
)
st.write(answer)