Skip to content

Commit

Permalink
insight gpt
Browse files Browse the repository at this point in the history
  • Loading branch information
samarth30 committed Dec 9, 2024
0 parents commit 90d1b38
Show file tree
Hide file tree
Showing 23 changed files with 259,600 additions and 0 deletions.
3 changes: 3 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
env
gpt_env
.env
1 change: 1 addition & 0 deletions README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
# Insight GPT
Empty file added __init__.py
Empty file.
Binary file added __pycache__/boilerplate.cpython-310.pyc
Binary file not shown.
Binary file added __pycache__/boilerplate.cpython-311.pyc
Binary file not shown.
Binary file added __pycache__/main.cpython-310.pyc
Binary file not shown.
Binary file added __pycache__/main.cpython-311.pyc
Binary file not shown.
Binary file added __pycache__/prefix.cpython-310.pyc
Binary file not shown.
Binary file added __pycache__/prefix.cpython-311.pyc
Binary file not shown.
Binary file added __pycache__/tools.cpython-310.pyc
Binary file not shown.
Binary file added __pycache__/tools.cpython-311.pyc
Binary file not shown.
128 changes: 128 additions & 0 deletions boilerplate.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,128 @@
marker_boilerplate = """"var marker = new google.maps.Marker({
position: {lat: markerData.lat, lng: markerData.lng},
map: map,
title: markerData.name + ' - ' + markerData.address,
label: markerData.name
});
"""

holding_period_boilerplate = """
WITH sale_deltas AS (
SELECT
cs1.condo_unit_id,
cs1.closing_date AS current_closing_date,
cs2.closing_date AS previous_closing_date,
(cs1.closing_date - cs2.closing_date) AS delta_days
FROM
core_condosale cs1
JOIN
core_condosale cs2
ON
cs1.condo_unit_id = cs2.condo_unit_id
WHERE
cs1.closing_date > cs2.closing_date
AND cs1.blacklist = FALSE
AND cs2.blacklist = FALSE
AND cs2.closing_date = (
SELECT MAX(cs3.closing_date)
FROM core_condosale cs3
WHERE cs3.condo_unit_id = cs1.condo_unit_id
AND cs3.closing_date < cs1.closing_date
AND cs3.blacklist = FALSE
)
AND cs1.condo_unit_id IN (
SELECT id
FROM core_condounit
WHERE blacklist = FALSE
AND building_id IN (
SELECT id
FROM core_condobuilding
WHERE market_id = (
SELECT id FROM core_condomarket WHERE name = 'Brickell'
)
)
)
)
SELECT
AVG(delta_days) AS average_delta
FROM
sale_deltas;
"""

two_bed_holding_period_boilerplate = """
WITH sale_deltas AS (
SELECT
cs1.condo_unit_id,
cs1.closing_date AS current_closing_date,
cs2.closing_date AS previous_closing_date,
(cs1.closing_date - cs2.closing_date) AS delta_days
FROM
core_condosale cs1
JOIN
core_condosale cs2
ON
cs1.condo_unit_id = cs2.condo_unit_id
WHERE
cs1.closing_date > cs2.closing_date
AND cs1.blacklist = FALSE
AND cs2.blacklist = FALSE
AND cs2.closing_date = (
SELECT MAX(cs3.closing_date)
FROM core_condosale cs3
WHERE cs3.condo_unit_id = cs1.condo_unit_id
AND cs3.closing_date < cs1.closing_date
AND cs3.blacklist = FALSE
)
AND cs1.condo_unit_id IN (
SELECT id
FROM core_condounit
WHERE blacklist = FALSE
AND beds = 2
AND building_id IN (
SELECT id
FROM core_condobuilding
WHERE market_id = (
SELECT id FROM core_condomarket WHERE name = 'Brickell'
)
)
)
)
SELECT
AVG(delta_days) AS average_delta
FROM
sale_deltas;
"""

javascript_map_boilerplate = """
function initMap() {
var locations = [
// Building and school markers will be listed here
];
var map = new google.maps.Map(document.getElementById('map'), {
zoom: 13,
center: {lat: [average_lat], lng: [average_lng]}
});
locations.forEach(function(location) {
var marker = new google.maps.Marker({
position: {lat: location.lat, lng: location.lng},
map: map,
label: location.label
});
});
}
"""

building_marker_format_boilerplate = "{lat: [building.lat], lng: [building.lon], label: '[building.alt_name] - [building.address]'}"

school_marker_format_boilerplate = "{lat: [school.geometry.location.lat], lng: [school.geometry.location.lng], label: '[school.name]'}"
2 changes: 2 additions & 0 deletions examples.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
# currently unused, see guide in https://python.langchain.com/v0.2/docs/how_to/sql_prompting/
examples = []
225 changes: 225 additions & 0 deletions main.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,225 @@
import ast
import os
import re

import markdown
from googlemaps import Client as GoogleMaps
from langchain_community.utilities.sql_database import SQLDatabase
from langchain_core.messages import HumanMessage, SystemMessage
from langchain_openai import ChatOpenAI
from langgraph.prebuilt import create_react_agent
from markupsafe import Markup

# for generating the pdf report, we receive reportlab code and execute it arbitrarily
from reportlab.lib.pagesizes import letter
from reportlab.pdfgen import canvas

from boilerplate import (
building_marker_format_boilerplate,
holding_period_boilerplate,
javascript_map_boilerplate,
marker_boilerplate,
school_marker_format_boilerplate,
two_bed_holding_period_boilerplate,
)
from prefix import SQL_PREFIX
from tools import setup_tools

# Update the following variables with your database credentials
POSTGRES_USER = os.getenv("PG_USER")
POSTGRES_PASSWORD = os.getenv("PG_PASSWORD")
POSTGRES_PORT = os.getenv("PG_PORT")
POSTGRES_DB = os.getenv("PG_DB")

connection_string = f"postgresql://{os.getenv('PG_USER')}:{os.getenv('PG_PASSWORD')}@localhost:{os.getenv('PG_PORT')}/{os.getenv('PG_DB')}"

# Add debug print
print(f"Using connection string: {connection_string}")

# Add error handling
try:
db = SQLDatabase.from_uri(connection_string)
except Exception as e:
print(f"Connection error: {e}")
print(f"Environment variables:")
print(f"PG_USER: {os.getenv('PG_USER')}")
print(f"PG_DB: {os.getenv('PG_DB')}")
print(f"PG_PORT: {os.getenv('PG_PORT')}")
raise

llm = ChatOpenAI(model="gpt-4o-mini")

gmaps = GoogleMaps(os.getenv("GPLACES_API_KEY"))


prefix = SQL_PREFIX.format(
table_names=db.get_usable_table_names(),
marker_boilerplate=marker_boilerplate,
holding_period_boilerplate=holding_period_boilerplate,
two_bed_holding_period_boilerplate=two_bed_holding_period_boilerplate,
javascript_map_boilerplate=javascript_map_boilerplate,
building_marker_format_boilerplate=building_marker_format_boilerplate,
school_marker_format_boilerplate=school_marker_format_boilerplate,
)

system_message = SystemMessage(content=prefix)


def query_as_list(db, query):
res = db.run(query)
res = [el for sub in ast.literal_eval(res) for el in sub if el]
res = [re.sub(r"\b\d+\b", "", string).strip() for string in res]
return list(set(res))


addresses = query_as_list(db, "SELECT address FROM core_condobuilding")
alt_names = query_as_list(db, "SELECT alt_name FROM core_condobuilding")


tools = setup_tools(db, llm)

agent_executor = create_react_agent(
llm, tools, messages_modifier=system_message)


def print_sql_1(sql):
print(
"""
The SQL query is:
{}
""".format(
sql
)
)


def extract_and_remove_html(text):
# Pattern to match HTML code block
html_pattern = r"```html\s*([\s\S]*?)\s*```"

# First look for any python code
python_pattern = (
r'<pre\s+class="codehilite"><code\s+class="language-python">(.*?)</code></pre>'
)
md_pattern = r"```python(.*?)```"
python_match = re.search(python_pattern, text, re.DOTALL | re.IGNORECASE)
md_match = re.search(md_pattern, text, re.DOTALL)
code_match = python_match or md_match
if code_match:
print(text)
code = code_match.group(1)
code = code.replace("&quot;", '"')
code = code.replace("&amp;", "&")
code = code.replace("&lt;", "<")
code = code.replace("&gt;", ">")
code = code.replace("&#39;", "'")
return None, "PDF Generated!", code

# Search for the pattern in the text
match = re.search(html_pattern, text, re.IGNORECASE)

if match:
# Extract the HTML code
html_code = match.group(1).strip()
cleaned_html = process_html(html_code)

# Remove the HTML code block from the original text
text_without_html = re.sub(
html_pattern, "", text, flags=re.IGNORECASE).strip()

# Return both the extracted HTML and the text without HTML
return Markup(cleaned_html), text_without_html, False
# If no HTML is found, return None for HTML and the original text
return None, text, False


def process_markdown(text):
# Convert Markdown to HTML
html = markdown.markdown(text, extensions=["extra", "codehilite"])
# Wrap the result in Markup to prevent auto-escaping
return Markup(html)


def process_html(text):
# Regular expression to find and remove the script tag containing {gmaps_api_key}
pattern = r"<script[^>]*\{gmaps_api_key\}[^>]*></script>"

# Replace the matched script tag with an empty string
return re.sub(pattern, "", text, flags=re.IGNORECASE)

# Function to detect malicious patterns


def detect_malicious_code(code):
# Define a list of regex patterns for dangerous functions or modules
malicious_patterns = [
# Importing dangerous modules
r'import\s+(sys|subprocess|shlex|socket|ctypes|signal|multiprocessing)',
# Dangerous os methods
r'os\.(system|popen|remove|rmdir|rename|chmod|chown|kill|fork)',
r'subprocess\.(Popen|run|call|check_output)', # Subprocess methods
r'eval\(', # Use of eval()
r'exec\(', # Use of exec()
r'compile\(', # Use of compile()
r'shutil\.(copy|move|rmtree)', # shutil file operations
r'socket\.', # Use of sockets for network access
r'requests\.', # Use of requests library
r'urllib\.', # Use of urllib library
r'getattr\(', r'setattr\(', # Reflection
r'globals\(', r'locals\(', # Accessing global or local variable scopes
r'importlib\.', # Dynamic importing
r'input\(', # Use of input() for potentially malicious prompts
r'os\.exec', # exec family in os module
# Use of ast.literal_eval() for dynamic evaluation
r'ast\.(literal_eval)',
]

for pattern in malicious_patterns:
if re.search(pattern, code):
print(f"Potentially dangerous pattern detected: {pattern}")
return True
return False


def process_question(prompted_question, conversation_history):
context = "\n".join(
[
f"Q: {entry['question']}\nA: {entry['answer']}"
for entry in conversation_history
]
)
consolidated_prompt = f"""
Previous conversation:
{context}
New question: {prompted_question}
Please answer the new question, taking into account the context from the previous conversation if relevant.
"""
prompt = consolidated_prompt if conversation_history else prompted_question

content = []
for s in agent_executor.stream({"messages": [HumanMessage(content=prompt)]}):

for msg in s.get("agent", {}).get("messages", []):
for call in msg.tool_calls:
if sql := call.get("args", {}).get("query", None):
print(print_sql_1(sql))

print(msg.content)
html, stripped_text, code = extract_and_remove_html(msg.content)
if code:
# # ----- Checking for Malicious Code

# Check for malicious patterns before executing
if not detect_malicious_code(code):
exec(code)

# # ----- Checking for Malicious Code
content.append(process_markdown(stripped_text))
if html:
content.append(html)
print("----")

return content
Loading

0 comments on commit 90d1b38

Please sign in to comment.