diff --git a/.gitignore b/.gitignore index 894a44cc0..7db61e6d0 100644 --- a/.gitignore +++ b/.gitignore @@ -101,4 +101,4 @@ venv.bak/ /site # mypy -.mypy_cache/ +.mypy_cache/ \ No newline at end of file diff --git a/.vscode/settings.json b/.vscode/settings.json new file mode 100644 index 000000000..9ee86e71a --- /dev/null +++ b/.vscode/settings.json @@ -0,0 +1,6 @@ +{ + "[python]": { + "editor.defaultFormatter": "ms-python.autopep8" + }, + "python.formatting.provider": "none" +} \ No newline at end of file diff --git a/LICENSE b/LICENSE index 91f8bc944..cf627291e 100644 --- a/LICENSE +++ b/LICENSE @@ -18,4 +18,4 @@ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE -SOFTWARE. +SOFTWARE. \ No newline at end of file diff --git a/README.md b/README.md index 42553ee66..7c36ad01d 100644 --- a/README.md +++ b/README.md @@ -1,9 +1,5 @@ # README -This is the [Flask](http://flask.pocoo.org/) [quick start](http://flask.pocoo.org/docs/1.0/quickstart/#a-minimal-application) example for [Render](https://render.com). +This is the [Flask](http://flask.pocoo.org/) API for IAGORA. -The app in this repo is deployed at [https://flask.onrender.com](https://flask.onrender.com). - -## Deployment - -Follow the guide at https://render.com/docs/deploy-flask. +The app in this repo is deployed at [https://iagora.onrender.com/](https://iagora.onrender.com/). \ No newline at end of file diff --git a/app.py b/app.py index d82c51f0d..d559a28f9 100644 --- a/app.py +++ b/app.py @@ -1,6 +1,80 @@ -from flask import Flask +from flask import Flask, request, jsonify app = Flask(__name__) +from flask_cors import CORS + +import json +import gensim +from flask import Response +from models import llm +from models import txtmodel +from models import jsonloader +from models import expfind +from models import findstage + + +CORS(app) @app.route('/') def hello_world(): return 'Hello, World!' + +# Charger le modèle pré-entraîné de Word2Vec en français +model = gensim.models.KeyedVectors.load_word2vec_format("models/frWac_non_lem_no_postag_no_phrase_200_skip_cut100.bin", binary=True) + +# Fonction pour obtenir des synonymes d'un mot avec le modèle Word2Vec +def get_synonyms(word): + synonyms = [] + # Vérifier si le mot est dans le vocabulaire du modèle + if word in model.key_to_index: + # Obtenir les 10 mots les plus similaires au mot donné + similar_words = model.most_similar(word, topn=1000) + # Extraire les mots des tuples (mot, similarité) + for word, similarity in similar_words: + synonyms.append(word) + return synonyms + +@app.route("/ask", methods=["POST"]) +def ask(): + question = request.get_json() + response = { + "text": txtmodel.txtmodel.get_response(question['query']), + "wantToLearn": False + } + + # Vérifier si un synonyme de "apprendre" est présent dans la requête + synonyms_learn = get_synonyms("apprendre") # Utiliser le mot "apprendre" en français pour utiliser le modèle Word2Vec + query_lower = question['query'].lower() + + synonym_found = any(synonym in query_lower for synonym in synonyms_learn) + + # Imprimer "Tuteur Virtuel" et "Tuteur Réel" si un synonyme de "apprendre" est trouvé + if synonym_found: + response = { + "text": "Veuillez Choisir", + "wantToLearn": True + } + + return jsonify(response) + +@app.route("/llm", methods=["POST"]) +def llm_route(): + requete = request.get_json() + print(requete['q1']) + print(requete['url']) + #text = request.json["text"] + result = llm.Llm.callLlm(requete['q1'],requete['url']) + resp = Response(result) + resp.charset = "utf-8" + return resp + +@app.route("/expert", methods=["POST"]) +def loadjson(): + requete = request.get_json() + results = expfind.ExpFind.findExp(requete['message']) + return results + +@app.route("/stage", methods=["POST"]) +def stage(): + requete = request.get_json() + results = findstage.Findstage.finds(requete['message']) + return results \ No newline at end of file diff --git a/data/donnerExperts.json b/data/donnerExperts.json new file mode 100644 index 000000000..a1c101ba9 --- /dev/null +++ b/data/donnerExperts.json @@ -0,0 +1,92 @@ +[{ + "expert_id": 1, + "first_name": "Devlen", + "last_name": "Proughten", + "email": "dproughten0@house.gov", + "country": "China", + "years_of_experience": 4, + "specialization": "angular", + "certifications": "CPA" + }, { + "expert_id": 2, + "first_name": "Fawn", + "last_name": "Adriani", + "email": "fadriani1@thetimes.co.uk", + "country": "Philippines", + "years_of_experience": 4, + "specialization": "angular", + "certifications": "CFA" + }, { + "expert_id": 3, + "first_name": "Donielle", + "last_name": "Olner", + "email": "dolner2@msn.com", + "country": "Serbia", + "years_of_experience": 18, + "specialization": "angular", + "certifications": "CPA" + }, { + "expert_id": 4, + "first_name": "Theodor", + "last_name": "Custy", + "email": "tcusty3@examiner.com", + "country": "Russia", + "years_of_experience": 17, + "specialization": "Java", + "certifications": "PMP" + }, { + "expert_id": 5, + "first_name": "Barth", + "last_name": "Amort", + "email": "bamort4@sogou.com", + "country": "Thailand", + "years_of_experience": 19, + "specialization": "react", + "certifications": "PMP" + }, { + "expert_id": 6, + "first_name": "Lorinda", + "last_name": "Tertre", + "email": "ltertre5@newsvine.com", + "country": "Tanzania", + "years_of_experience": 10, + "specialization": "react", + "certifications": "PHR" + }, { + "expert_id": 7, + "first_name": "Benedetta", + "last_name": "Prestner", + "email": "bprestner6@usnews.com", + "country": "Portugal", + "years_of_experience": 8, + "specialization": "angular", + "certifications": "PHR" + }, { + "expert_id": 8, + "first_name": "Claudine", + "last_name": "Slinn", + "email": "cslinn7@canalblog.com", + "country": "China", + "years_of_experience": 7, + "specialization": "angular", + "certifications": "PMP" + }, { + "expert_id": 9, + "first_name": "Reuben", + "last_name": "Sedgefield", + "email": "rsedgefield8@de.vu", + "country": "France", + "years_of_experience": 13, + "specialization": "angular", + "certifications": "CFA" + }, { + "expert_id": 10, + "first_name": "Zebadiah", + "last_name": "Hodinton", + "email": "zhodinton9@csmonitor.com", + "country": "Indonesia", + "years_of_experience": 4, + "specialization": "angular", + "certifications": "PHR" + } +] \ No newline at end of file diff --git a/models/__init__.py b/models/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/models/daollm.py b/models/daollm.py new file mode 100644 index 000000000..a04579e2d --- /dev/null +++ b/models/daollm.py @@ -0,0 +1,35 @@ +from langchain import OpenAI, Cohere, sql_database +from langchain_experimental.sql import SQLDatabaseChain +import cx_Oracle +import os +from langchain.chains import load_chain +import os + +COHERE_API_KEY="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" +os.environ["COHERE_API_KEY"] = COHERE_API_KEY + +lib_dir = os.path.join(os.environ.get("HOME"), "Development", "instantclient_19_8") +cx_Oracle.init_oracle_client(lib_dir=lib_dir) + +hostname='localhost' +port='1521' +service_name='ORCLCDB' +username='c##iagora' +password='iagora' + +cx_Oracle.init_oracle_client(lib_dir=lib_dir) +oracle_connection_string_fmt = ( + 'oracle+cx_oracle://{username}:{password}@' + + cx_Oracle.makedsn('{hostname}', '{port}', service_name='{service_name}') +) +url = oracle_connection_string_fmt.format( + username=username, password=password, + hostname=hostname, port=port, + service_name=service_name, +) +from sqlalchemy import create_engine +engine=create_engine(url, echo=True) +db = SQLDatabase(engine) +llm = Cohere(temperature=1, verbose=True) +db_chain = SQLDatabaseChain.from_llm(llm, db, verbose=True) +db_chain.run("Is Casey Brown in the database?") \ No newline at end of file diff --git a/models/embedding.py b/models/embedding.py new file mode 100644 index 000000000..653647523 --- /dev/null +++ b/models/embedding.py @@ -0,0 +1,9 @@ +import openai + +def get_embedding(text, model="text-embedding-ada-002"): + + # Get the embedding from the text embedding model. + embedding = openai.Embedding.create(input=[text], model=model)['data'][0]['embedding'] + + # Return the embedding. + return embedding \ No newline at end of file diff --git a/models/expfind.py b/models/expfind.py new file mode 100644 index 000000000..70e4fc180 --- /dev/null +++ b/models/expfind.py @@ -0,0 +1,24 @@ +import openai +import os +import json + +data_dir = os.path.join(os.path.dirname(__file__), "..", "data") +class ExpFind: + def findExp(message): + # Chargez le fichier JSON local + with open(os.path.join(data_dir, "donnerExperts.json"), "r") as file: + developers = json.load(file) + json_data = json.dumps(developers) + # Appeler l'API GPT + response = openai.Completion.create( + engine='text-davinci-003', + prompt = f"""{json_data}\n\n{message} :""", + max_tokens= 1000 + ) + + # Traiter la réponse + answer = response.choices[0].text.strip() + + # Afficher la réponse + print(answer) + return answer \ No newline at end of file diff --git a/models/findstage.py b/models/findstage.py new file mode 100644 index 000000000..ffe28f719 --- /dev/null +++ b/models/findstage.py @@ -0,0 +1,24 @@ +import openai +import os +import json + +data_dir = os.path.join(os.path.dirname(__file__), "..", "data") +class Findstage: + def finds(message): + # Chargez le fichier JSON local + with open(os.path.join(data_dir, "MOC.json"), "r") as file: + developers = json.load(file) + json_data = json.dumps(developers) + # Appeler l'API GPT + response = openai.Completion.create( + engine='text-davinci-003', + prompt = f"""{json_data}\n\n{message} :""", + max_tokens= 1000 + ) + + # Traiter la réponse + answer = response.choices[0].text.strip() + + # Afficher la réponse + print(answer) + return answer \ No newline at end of file diff --git a/models/jsonloader.py b/models/jsonloader.py new file mode 100644 index 000000000..7f19c6723 --- /dev/null +++ b/models/jsonloader.py @@ -0,0 +1,29 @@ +import json +from pathlib import Path + +class JsonLoader: + def loadJson(path): + data = json.loads(Path(path).read_text()) + return data + + def search_in_json(data, search_string): + results = [] + for item in data: + if search_string in item: + results.append(item) + return results + + + import json + + def chercher_dans_json(cle, chaine_json): + try: + # Charger le JSON depuis la chaîne + data = json.loads(chaine_json) + + # Rechercher la clé dans le JSON + valeur = data[cle] + + return valeur + except (json.JSONDecodeError, KeyError): + return None diff --git a/models/keymodel.py b/models/keymodel.py new file mode 100644 index 000000000..570c52636 --- /dev/null +++ b/models/keymodel.py @@ -0,0 +1,10 @@ +import os + +class KeyModel: + def load_openai_api_key(): + # Charge le fichier .env + with open(".env", "r") as f: + env = f.read() + + # Récupère la clé API OpenAI + return os.environ.get("OPENAI_API_KEY", "") diff --git a/models/llm.py b/models/llm.py new file mode 100644 index 000000000..642b2ab7f --- /dev/null +++ b/models/llm.py @@ -0,0 +1,94 @@ +import requests +from bs4 import BeautifulSoup +import langchain +from langchain.embeddings.openai import OpenAIEmbeddings +from langchain.text_splitter import CharacterTextSplitter +from langchain.vectorstores import Chroma +from langchain.document_loaders import TextLoader +from langchain.text_splitter import RecursiveCharacterTextSplitter +from langchain.llms import OpenAI +from langchain import PromptTemplate + +from models import scrapPdf as sp +from models import scrapUrl as su + +from flask import Flask, request +from flask import jsonify + +class Llm: + def callLlm(user_question, url): + + if url.endswith(".pdf"): + scrap = sp.ScrapPdf.load_pdf_content(url) + print("URL "+scrap) + else: + scrap = su.ScrapUrl.scrapUrl(url) + print("URL "+scrap) + + # scapping text from PDF + # scrapP = sp.load_pdf_content("C:/Users/JerryHeritiana(RAPP)/OneDrive - OneWorkplace/Documents/IAGORA/FUNCHATGPTSerge.pdf") + # scrapP = sp.load_pdf_content("https://www.furet.com/media/pdf/feuilletage/9/7/8/2/8/0/4/1/9782804171018.pdf") + # scrapU = su.scrapUrl("https://en.wikipedia.org/wiki/GPT-4") + + text = scrap.replace('\n', '') + + # Open a new file called 'output.txt' in write mode and store the file object in a variable + with open('output.txt', 'w', encoding='utf-8') as file: + # Write the string to the file + file.write(text) + + # load the document + with open('./output.txt', encoding='utf-8') as f: + text = f.read() + + # define the text splitter + text_splitter = RecursiveCharacterTextSplitter( + chunk_size = 500, + chunk_overlap = 100, + length_function = len, + ) + + texts = text_splitter.create_documents([text]) + + # define the embeddings model + embeddings = OpenAIEmbeddings() + + # use the text chunks and the embeddings model to fill our vector store + db = Chroma.from_documents(texts, embeddings) + + # user_question = "C'est quoi chatGPT" + + # use our vector store to find similar text chunks + results = db.similarity_search( + query = user_question, + n_results=5 + ) + + # define the prompt template + template = """ + Tu es un chat bot qui aime aider les gens ! Compte tenu des sections contextuelles suivantes, répondez à la + question en utilisant uniquement le contexte donné. Si tu n'es pas sûr et que la réponse n'est pas + explicitement écrite dans la documentation, dites "Désolé, je ne sais pas comment vous aider." + + Context sections: + {context} + + Question: + {users_question} + + Answer: + """ + + prompt = PromptTemplate(template=template, input_variables=["context", "users_question"]) + + # fill the prompt template + prompt_text = prompt.format(context = results, users_question = user_question) + # print(prompt_text) + + # define the LLM you want to use + llm = OpenAI(temperature=1) + + # ask the defined LLM + result = llm(prompt_text) + print(result.encode("utf-8")) + return result.encode("utf-8") \ No newline at end of file diff --git a/models/oraclecnx.py b/models/oraclecnx.py new file mode 100644 index 000000000..424f57d36 --- /dev/null +++ b/models/oraclecnx.py @@ -0,0 +1,15 @@ +import cx_Oracle + +class Oraclecnx: + def dbcon(connection): +# Informations de connexion + host = "localhost" + port = 1521 + service_name = "ORCLCDB" + +# Créer une connexion + connection = cx_Oracle.connect(user="c##iagora", password="iagora", dsn=f"{host}:{port}/{service_name}") + +# Afficher le nom de la base de données + print(connection.database) + return connection diff --git a/models/scrapPdf.py b/models/scrapPdf.py new file mode 100644 index 000000000..f6020aa66 --- /dev/null +++ b/models/scrapPdf.py @@ -0,0 +1,30 @@ +# import PyPDF2 + +# def load_pdf_content(filepath): + +# with open(filepath, 'rb') as f: +# pdf = PyPDF2.PdfReader(f) +# text = '' +# for page in pdf.pages: +# text += page.extract_text() + +# return text + +import PyPDF2 +import requests +from io import BytesIO + +class ScrapPdf: + + def load_pdf_content(url): + response = requests.get(url) + response.raise_for_status() # Raise an error for bad responses + + with BytesIO(response.content) as f: + pdf = PyPDF2.PdfReader(f) + text = '' + for page in pdf.pages: + text += page.extract_text() + + return text + diff --git a/models/scrapUrl.py b/models/scrapUrl.py new file mode 100644 index 000000000..8a036b79f --- /dev/null +++ b/models/scrapUrl.py @@ -0,0 +1,22 @@ +import requests +from bs4 import BeautifulSoup + +class ScrapUrl: + + def scrapUrl(url): + + response = requests.get(url) + soup = BeautifulSoup(response.content, 'html.parser') + # find all the text on the page + text = soup.get_text() + # find the content div + content_div = soup.find(['main', 'content', 'body', 'div']) + # remove unwanted elements from main + unwanted_tags = ['sup', 'span', 'table', 'ul', 'ol', 'p', 'i', 'a'] + for tag in unwanted_tags: + for match in content_div.findAll(tag): + match.extract() + + #print(content_div.get_text()) + + return content_div.get_text() \ No newline at end of file diff --git a/models/txtmodel.py b/models/txtmodel.py new file mode 100644 index 000000000..e1e77775a --- /dev/null +++ b/models/txtmodel.py @@ -0,0 +1,22 @@ +from langchain.llms import OpenAI +from langchain.chat_models import ChatOpenAI + + +from langchain.chains import ConversationChain + +class txtmodel(): + + def get_response(question): + chat = ChatOpenAI() + conversation = ConversationChain(llm=chat) + + # Si l'étudiant a cliqué sur "Tuteur Virtuel" + if question.lower() == "virtual_tutor": + return "Vous avez choisi le Tuteur Virtuel. Comment puis-je vous aider davantage ?" + + # Si l'étudiant a cliqué sur "Tuteur Réel" + elif question.lower() == "real_tutor": + return "Vous avez choisi le Tuteur Réel. Un tuteur réel sera disponible pour vous aider." + + # Sinon, traiter la question normalement + return conversation.run(question) diff --git a/output.txt b/output.txt new file mode 100644 index 000000000..a2ba148c6 --- /dev/null +++ b/output.txt @@ -0,0 +1 @@ +Loading…New search experience powered by AICollectives™ on Stack Overflow1 month ago 540 times 0 from langchain.chains import ConversationalRetrievalChainfrom langchain.memory import ConversationBufferMemoryfrom langchain.vectorstores import DocArrayInMemorySearchfrom langchain.embeddings.openai import OpenAIEmbeddingsfrom langchain.document_loaders import ( UnstructuredWordDocumentLoader, TextLoader, UnstructuredPowerPointLoader,)from langchain.tools import Toolfrom langchain.utilities import GoogleSearchAPIWrapperfrom langchain.chat_models import ChatOpenAIimport osimport openaiimport sysfrom dotenv import load_dotenv, find_dotenvsys.path.append('../..')_ = load_dotenv(find_dotenv()) # read local .env filegoogle_api_key = os.environ.get("GOOGLE_API_KEY")google_cse_id = os.environ.get("GOOGLE_CSE_ID")# Initialize OpenAI API keyopenai.api_key = os.environ['OPENAI_API_KEY']# Initialize Langchain environmentos.environ["LANGCHAIN_TRACING_V2"] = "true"os.environ["LANGCHAIN_ENDPOINT"] = "https://api.langchain.plus"os.environ["LANGCHAIN_API_KEY"] = os.environ['LANGCHAIN_API_KEY']os.environ["GOOGLE_API_KEY"] = google_api_keyos.environ["GOOGLE_CSE_ID"] = google_cse_id# Replace with the actual folder pathsfolder_path_docx = "DB\\DB VARIADO\\DOCS"folder_path_txt = "DB\\BLOG-POSTS"folder_path_pptx_1 = "DB\\PPT JUNIO"folder_path_pptx_2 = "DB\\DB VARIADO\\PPTX"# Create a list to store the loaded contentloaded_content = []# Load and process DOCX filesfor file in os.listdir(folder_path_docx): if file.endswith(".docx"): file_path = os.path.join(folder_path_docx, file) loader = UnstructuredWordDocumentLoader(file_path) docx = loader.load() loaded_content.extend(docx)# Load and process TXT filesfor file in os.listdir(folder_path_txt): if file.endswith(".txt"): file_path = os.path.join(folder_path_txt, file) loader = TextLoader(file_path, encoding='utf-8') text = loader.load() loaded_content.extend(text)# Load and process PPTX files from folder 1for file in os.listdir(folder_path_pptx_1): if file.endswith(".pptx"): file_path = os.path.join(folder_path_pptx_1, file) loader = UnstructuredPowerPointLoader(file_path) slides_1 = loader.load() loaded_content.extend(slides_1)# Load and process PPTX files from folder 2for file in os.listdir(folder_path_pptx_2): if file.endswith(".pptx"): file_path = os.path.join(folder_path_pptx_2, file) loader = UnstructuredPowerPointLoader(file_path) slides_2 = loader.load() loaded_content.extend(slides_2)# Initialize OpenAI Embeddingsembedding = OpenAIEmbeddings()# Create embeddings for loaded contentembeddings_content = []for one_loaded_content in loaded_content: embedding_content = embedding.embed_query(one_loaded_content.page_content) embeddings_content.append(embedding_content)db = DocArrayInMemorySearch.from_documents(loaded_content, embedding)retriever = db.as_retriever(search_type="similarity", search_kwargs={"k": 3})search = GoogleSearchAPIWrapper()def custom_search(query): internet_results = search.run(query) print(internet_results) return internet_resultschain = ConversationalRetrievalChain.from_llm( llm=ChatOpenAI(model_name="gpt-4", temperature=0), chain_type="map_reduce", retriever=retriever, return_source_documents=True, return_generated_question=True,)history = []while True: query = input("Hola, soy Chatbot. ¿Qué te gustaría saber? ") # Use the custom_search function to get internet search results internet_results = custom_search(query) # Combine the custom data and internet search results combined_results = loaded_content + [internet_results] # Pass the combined results to the chain response = chain( {"question": query, "chat_history": history, "documents": combined_results}) print(response["answer"]) history.append(("system", query)) # user's query history.append(("assistant", response["answer"])) # chatbot's response Follow asked 1 Answer Sorted by: Highest score (default) Trending (recent votes count more) Date modified (newest first) Date created (oldest first) 0 query="what is stackoverflow?"internet_results_prior=search.run(query)def custom_search(query): #internet_results = search.run(query) # comment this line and add the following internet_results = search.results(query,3) # replace with it, so gets metadata results, 3 means return three results print(internet_results) return internet_resultsquery="what is stackoverflow?"internet_results = custom_search(query)[{'title': 'Stack Overflow - Where Developers Learn, Share, & Build Careers', 'link': 'https://stackoverflow.com/', 'snippet': 'Stack Overflow is the largest, most trusted online community for developers to learn, share\u200b \u200btheir programming \u200bknowledge, and build their careers.'}, {'title': 'Stack Overflow - Wikipedia', 'link': 'https://en.wikipedia.org/wiki/Stack_Overflow', 'snippet': 'Stack Overflow is a question-and-answer website for programmers. It is the flagship site of the Stack Exchange Network. It was created in 2008 by Jeff\xa0...'}, {'title': 'What is a stack overflow error?', 'link': 'https://www.techtarget.com/whatis/definition/stack-overflow', 'snippet': 'A stack overflow is a type of buffer overflow error that occurs when a computer program tries to use more memory space in the call stack than has been\xa0...'}]for result in internet_results: print(result['snippet']) print("---"*10)Stack Overflow is the largest, most trusted online community for developers to learn, share​ ​their programming ​knowledge, and build their careers.------------------------------Stack Overflow is a question-and-answer website for programmers. It is the flagship site of the Stack Exchange Network. It was created in 2008 by Jeff ...------------------------------A stack overflow is a type of buffer overflow error that occurs when a computer program tries to use more memory space in the call stack than has been ...import os#https://console.cloud.google.com/apis/credentialsos.environ["GOOGLE_CSE_ID"] = "xxx"#https://programmablesearchengine.google.com/controlpanel/createos.environ["GOOGLE_API_KEY"] = "xxx"from langchain.utilities import GoogleSearchAPIWrappersearch = GoogleSearchAPIWrapper()def custom_search(query): #internet_results = search.run(query) # comment this line and add the following internet_results = search.results(query,3) # replace with it, so gets metadata results print(internet_results) return internet_resultsquery="what is stackoverflow?"internet_results = custom_search(query)for result in internet_results: print(result['snippet']) print("---"*10)loaded_content=[]loaded_content=loaded_content + [internet_results_prior]#convert list to list of docsfrom langchain.docstore.document import Documentloaded_content_docs=[]for each_content in loaded_content: loaded_content_docs.append(Document(page_content=each_content, metadata={"source": "internet"}))from langchain.chains import ConversationalRetrievalChainfrom langchain.vectorstores import DocArrayInMemorySearchfrom langchain.chat_models import ChatOpenAIfrom langchain.embeddings.openai import OpenAIEmbeddings# Initialize OpenAI Embeddingsembedding = OpenAIEmbeddings()vector_db = DocArrayInMemorySearch.from_documents(loaded_content_docs, embedding)retriever = vector_db.as_retriever(search_type="similarity", search_kwargs={"k": 3})history = []chain = ConversationalRetrievalChain.from_llm( llm=ChatOpenAI(model_name="gpt-3.5-turbo", temperature=0), # did not have access to chat-gpt chain_type="map_reduce", retriever=retriever, return_source_documents=True, return_generated_question=True,)question="do you know about stackoverflow?"response = chain( {"question": question, "chat_history": history, "documents": loaded_content_docs})print(response["answer"]) Follow answered Your Answer Draft savedDraft discardedSign up or Sign up using Google Sign up using Facebook Sign up using Email and Password SubmitPost as a guestNameEmailPost as a guestNameEmail Post Your Answer Discard Not the answer you're looking for? Browse other questions tagged or . Related Subscribe to RSS Question feed lang-py Accept all cookies Necessary cookies only Customize settings   \ No newline at end of file diff --git a/req.txt b/req.txt new file mode 100644 index 000000000..b169b52ca --- /dev/null +++ b/req.txt @@ -0,0 +1,100 @@ +aiohttp==3.8.3 +aiosignal==1.3.1 +alembic==1.12.0 +aniso8601==9.0.1 +anyio==3.7.1 +async-timeout==4.0.2 +attrs==22.2.0 +bardapi==0.1.23a0 +beautifulsoup4==4.12.2 +blinker==1.6.2 +cachetools==4.2.4 +certifi==2022.12.7 +charset-normalizer==2.1.1 +click==8.1.6 +colorama==0.4.6 +dataclasses-json==0.5.14 +deep-translator==1.11.4 +dnspython==2.2.0 +email-validator==1.1.3 +exceptiongroup==1.1.2 +Flask==2.0.1 +Flask-Cors==4.0.0 +Flask-Login==0.5.0 +Flask-Migrate==3.1.0 +flask-restx==0.5.1 +Flask-SQLAlchemy==2.5.1 +Flask-WTF==0.15.1 +frozenlist==1.3.3 +google-api-core==1.34.0 +google-auth==1.35.0 +google-cloud-core==1.7.3 +google-cloud-translate==2.0.1 +googleapis-common-protos==1.59.1 +GoogleBard==1.4.0 +greenlet==2.0.2 +grpcio==1.56.0 +grpcio-status==1.48.2 +gunicorn==20.1.0 +h11==0.14.0 +h2==4.1.0 +hpack==4.0.0 +httpcore==0.17.3 +httpx==0.24.1 +hyperframe==6.0.1 +idna==3.4 +itsdangerous==2.1.2 +Jinja2==3.0.1 +joblib==1.3.1 +jsonschema==4.17.3 +jsonschema-specifications==0.1.3 +langchain==0.0.27 +langsmith==0.0.38 +Mako==1.2.4 +markdown-it-py==3.0.0 +MarkupSafe==2.1.3 +marshmallow==3.20.1 +mdurl==0.1.2 +multidict==6.0.4 +mypy-extensions==1.0.0 +numexpr==2.8.5 +numpy==1.25.1 +openai==0.28.0 +openapi-schema-pydantic==1.2.4 +packaging==23.1 +pandas==2.0.3 +prompt-toolkit==3.0.39 +protobuf==3.20.3 +pyasn1==0.5.0 +pyasn1-modules==0.3.0 +pydantic==1.10.12 +Pygments==2.15.1 +PyPDF2==3.0.1 +python-dateutil==2.8.2 +python-decouple==3.4 +python-dotenv==1.0.0 +pytz==2023.3 +PyYAML==6.0.1 +referencing==0.30.2 +requests==2.28.2 +rich==13.4.2 +rpds-py==0.10.0 +rsa==4.9 +scikit-learn==1.3.0 +scipy==1.11.1 +six==1.16.0 +sniffio==1.3.0 +socksio==1.0.0 +soupsieve==2.4.1 +SQLAlchemy==1.4.23 +tenacity==8.2.2 +threadpoolctl==3.2.0 +tqdm==4.64.1 +typing-inspect==0.9.0 +typing_extensions==4.7.1 +tzdata==2023.3 +urllib3==1.26.14 +wcwidth==0.2.6 +Werkzeug==2.3.6 +WTForms==2.3.3 +yarl==1.8.2 diff --git a/requirements.txt b/requirements.txt index 147ddd086..f92d8ade3 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,2 +1,131 @@ -Flask -Gunicorn +aiohttp==3.8.3 +aiosignal==1.3.1 +alembic==1.12.0 +aniso8601==9.0.1 +anyio==3.7.1 +async-timeout==4.0.2 +attrs==22.2.0 +backoff==2.2.1 +bardapi==0.1.23a0 +bcrypt==4.0.1 +beautifulsoup4==4.12.2 +blinker==1.6.2 +cachetools==4.2.4 +certifi==2022.12.7 +charset-normalizer==2.1.1 +Chroma==0.2.0 +chroma-hnswlib==0.7.3 +chromadb==0.4.12 +click==8.1.6 +colorama==0.4.6 +coloredlogs==15.0.1 +dataclasses-json==0.5.14 +deep-translator==1.11.4 +dnspython==2.4.2 +email-validator==1.1.3 +exceptiongroup==1.1.2 +fastapi==0.99.1 +filelock==3.12.4 +Flask==2.0.1 +Flask-Cors==4.0.0 +Flask-Login==0.5.0 +Flask-Migrate==3.1.0 +flask-restx==0.5.1 +Flask-SQLAlchemy==2.5.1 +Flask-WTF==0.15.1 +flatbuffers==23.5.26 +frozenlist==1.3.3 +fsspec==2023.9.1 +google-api-core==1.34.0 +google-auth==1.35.0 +google-cloud-core==1.7.3 +google-cloud-translate==2.0.1 +googleapis-common-protos==1.59.1 +GoogleBard==1.4.0 +greenlet==2.0.2 +grpcio==1.56.0 +grpcio-status==1.48.2 +gunicorn==20.1.0 +h11==0.14.0 +h2==4.1.0 +hpack==4.0.0 +httpcore==0.17.3 +httptools==0.6.0 +httpx==0.24.1 +huggingface-hub==0.16.4 +humanfriendly==10.0 +hyperframe==6.0.1 +idna==3.4 +importlib-resources==6.0.1 +itsdangerous==2.1.2 +Jinja2==3.0.1 +joblib==1.3.1 +jsonschema==4.19.0 +jsonschema-specifications==2023.7.1 +langchain==0.0.294 +langsmith==0.0.38 +Mako==1.2.4 +markdown-it-py==3.0.0 +MarkupSafe==2.1.3 +marshmallow==3.20.1 +mdurl==0.1.2 +monotonic==1.6 +mpmath==1.3.0 +multidict==6.0.4 +mypy-extensions==1.0.0 +numexpr==2.8.5 +numpy==1.25.1 +onnxruntime==1.16.0 +openai==0.28.0 +openapi-schema-pydantic==1.2.4 +overrides==7.4.0 +packaging==23.1 +pandas==2.0.3 +posthog==3.0.2 +prompt-toolkit==3.0.39 +protobuf==3.20.3 +pulsar-client==3.3.0 +pyasn1==0.5.0 +pyasn1-modules==0.3.0 +pydantic==1.10.12 +Pygments==2.15.1 +PyPDF2==3.0.1 +PyPika==0.48.9 +pyreadline3==3.4.1 +python-dateutil==2.8.2 +python-decouple==3.4 +python-dotenv==1.0.0 +pytz==2023.3 +PyYAML==6.0.1 +referencing==0.30.2 +regex==2023.8.8 +requests==2.28.2 +rich==13.4.2 +rpds-py==0.10.0 +rsa==4.9 +scikit-learn==1.3.0 +scipy==1.11.1 +six==1.16.0 +sniffio==1.3.0 +socksio==1.0.0 +soupsieve==2.4.1 +SQLAlchemy==1.4.23 +starlette==0.27.0 +sympy==1.12 +tenacity==8.2.2 +threadpoolctl==3.2.0 +tiktoken==0.5.1 +tokenizers==0.14.0 +tqdm==4.66.1 +typer==0.9.0 +typing-inspect==0.9.0 +typing_extensions==4.7.1 +tzdata==2023.3 +urllib3==1.26.14 +uvicorn==0.23.2 +watchfiles==0.20.0 +wcwidth==0.2.6 +websockets==11.0.3 +Werkzeug==2.3.6 +WTForms==2.3.3 +yarl==1.8.2