Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
18 changes: 12 additions & 6 deletions chatbot/app.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,16 +5,22 @@
from flask import Flask, request, jsonify
from flask_cors import CORS
from os.path import dirname, abspath, join
import nltk
from nltk.stem.porter import PorterStemmer

# Define a simple tokenizer and stemmer
# Initialize the stemmer globally
stemmer = PorterStemmer()

# Define a tokenizer and stemmer
def tokenize(sentence):
return sentence.split() # Tokenize by splitting on spaces
return nltk.word_tokenize(sentence)

def stem(word):
return word.lower() # Simple stemming by converting to lowercase
return stemmer.stem(word.lower())

def bag_of_words(tokenized_sentence, words):
bag = [1 if stem(word) in [stem(w) for w in tokenized_sentence] else 0 for word in words]
sentence_words = [stem(word) for word in tokenized_sentence]
bag = [1.0 if word in sentence_words else 0.0 for word in words]
return torch.tensor(bag, dtype=torch.float32)

class NeuralNet(nn.Module):
Expand All @@ -39,7 +45,7 @@ def forward(self, x):
intents = json.load(json_data)

FILE = "data.pth"
data = torch.load(FILE,weights_only=True)
data = torch.load(FILE)

input_size = data["input_size"]
hidden_size = data["hidden_size"]
Expand Down Expand Up @@ -87,4 +93,4 @@ def chat():
return jsonify({"error": str(e)})

if __name__ == '__main__':
app.run(host="0.0.0.0", port=5000,debug=True)
app.run(host="0.0.0.0", port=5000, debug=True)
Comment on lines 95 to +96
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

⚠️ Potential issue | 🟡 Minor

Development server configuration flagged by static analysis.

debug=True and binding to 0.0.0.0 are appropriate for development but pose security risks in production:

  • debug=True enables the interactive debugger, which can execute arbitrary code
  • 0.0.0.0 exposes the service to all network interfaces

Consider using environment variables or a configuration flag:

+import os
+
 if __name__ == '__main__':
-    app.run(host="0.0.0.0", port=5000, debug=True)
+    debug_mode = os.environ.get('FLASK_DEBUG', 'false').lower() == 'true'
+    app.run(host="0.0.0.0", port=5000, debug=debug_mode)

For production, use a WSGI server (e.g., Gunicorn) instead of the Flask development server.

🧰 Tools
🪛 ast-grep (0.40.0)

[warning] 95-95: Running flask app with host 0.0.0.0 could expose the server publicly.
Context: app.run(host="0.0.0.0", port=5000, debug=True)
Note: [CWE-668]: Exposure of Resource to Wrong Sphere [OWASP A01:2021]: Broken Access Control [REFERENCES]
https://owasp.org/Top10/A01_2021-Broken_Access_Control

(avoid_app_run_with_bad_host-python)


[warning] 95-95: Detected Flask app with debug=True. Do not deploy to production with this flag enabled as it will leak sensitive information. Instead, consider using Flask configuration variables or setting 'debug' using system environment variables.
Context: app.run(host="0.0.0.0", port=5000, debug=True)
Note: [CWE-489] Active Debug Code. [REFERENCES]
- https://labs.detectify.com/2015/10/02/how-patreon-got-hacked-publicly-exposed-werkzeug-debugger/

(debug-enabled-python)

🪛 Ruff (0.14.8)

96-96: Possible binding to all interfaces

(S104)


96-96: Use of debug=True in Flask app detected

(S201)

🤖 Prompt for AI Agents
In chatbot/app.py around lines 95-96, the app is started with debug=True and
host="0.0.0.0", which is unsafe for production; change the startup to read
configuration from environment variables (e.g., FLASK_ENV or APP_DEBUG and
APP_HOST/APP_PORT), default debug to False, and only enable debug and binding to
0.0.0.0 when an explicit development flag is set; for production remove the
built-in server invocation and run under a WSGI server like Gunicorn (document
example command) so the code no longer enables the interactive debugger or
indiscriminately binds all interfaces by default.

54 changes: 35 additions & 19 deletions chatbot/train.py
Original file line number Diff line number Diff line change
@@ -1,23 +1,34 @@
from operator import index
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

⚠️ Potential issue | 🟡 Minor

Remove unused import.

The index import from operator is never used in this file.

-from operator import index
📝 Committable suggestion

‼️ IMPORTANT
Carefully review the code before committing. Ensure that it accurately replaces the highlighted code, contains no missing lines, and has no issues with indentation. Thoroughly test & benchmark the code to ensure it meets the requirements.

Suggested change
from operator import index
🤖 Prompt for AI Agents
In chatbot/train.py at line 1, the file imports `index` from the standard
library `operator` module but never uses it; remove the unused import line `from
operator import index` to clean up imports and avoid lint warnings, ensuring
there are no other references to `index` elsewhere in the file before committing
the change.

import numpy as np
import random
import json

import torch
import torch.nn as nn
from torch.utils.data import Dataset, DataLoader
import nltk
from nltk.stem.porter import PorterStemmer

nltk.download('punkt')

# Configuration
INTENTS_FILE = 'intents.json'
MODEL_SAVE_FILE = "data.pth"

# Initialize the stemmer globally
stemmer = PorterStemmer()

# Define a simple tokenizer and stemmer
# Define a tokenizer and stemmer
def tokenize(sentence):
return sentence.split() # Tokenize by splitting on spaces
return nltk.word_tokenize(sentence)

def stem(word):
return word.lower() # Simple stemming by converting to lowercase
return stemmer.stem(word.lower())

def bag_of_words(tokenized_sentence, words):
bag = [1 if stem(word) in [stem(w) for w in tokenized_sentence] else 0 for word in words]
sentence_words = [stem(word) for word in tokenized_sentence]

bag = [1.0 if word in sentence_words else 0.0 for word in words]

return torch.tensor(bag, dtype=torch.float32)

class NeuralNet(nn.Module):
Expand All @@ -36,12 +47,13 @@ def forward(self, x):



with open('intents.json', 'r') as f:
with open(INTENTS_FILE, 'r') as f:
intents = json.load(f)

all_words = []
tags = []
xy = []

# loop through each sentence in our intents patterns
for intent in intents['intents']:
tag = intent['tag']
Expand All @@ -55,15 +67,13 @@ def forward(self, x):
# add to xy pair
xy.append((w, tag))

# stem and lower each word
ignore_words = ['?', '.', '!']
all_words = [stem(w) for w in all_words if w not in ignore_words]
all_words = [stem(w) for w in all_words]
# remove duplicates and sort
all_words = sorted(set(all_words))
tags = sorted(set(tags))
all_words = sorted(list(set(all_words)))
tags = sorted(list(set(tags)))

print(len(xy), "patterns")
print(len(tags), "tags:", tags)
print(len(tags), "unique tags:", tags)
print(len(all_words), "unique stemmed words:", all_words)

# create training data
Expand Down Expand Up @@ -98,7 +108,7 @@ def __init__(self):

# support indexing such that dataset[i] can be used to get i-th sample
def __getitem__(self, index):
return self.x_data[index], self.y_data[index]
return torch.from_numpy(self.x_data[index]), torch.tensor(self.y_data[index])

# we can call len(dataset) to return the size
def __len__(self):
Expand All @@ -120,6 +130,9 @@ def __len__(self):

# Train the model
for epoch in range(num_epochs):

total_loss = 0 # for tracking loss

for (words, labels) in train_loader:
words = words.to(device)
labels = labels.to(dtype=torch.long).to(device)
Expand All @@ -134,12 +147,16 @@ def __len__(self):
optimizer.zero_grad()
loss.backward()
optimizer.step()


total_loss += loss.item() * words.size(0) # Accumulate weighted loss

epoch_loss = total_loss / len(dataset)

if (epoch+1) % 100 == 0:
print (f'Epoch [{epoch+1}/{num_epochs}], Loss: {loss.item():.4f}')
print (f'Epoch [{epoch+1}/{num_epochs}], Average Loss: {epoch_loss:.4f}')


print(f'final loss: {loss.item():.4f}')
print(f'final average loss: {epoch_loss:.4f}')

data = {
"model_state": model.state_dict(),
Expand All @@ -150,7 +167,6 @@ def __len__(self):
"tags": tags
}

FILE = "data.pth"
torch.save(data, FILE)
torch.save(data, MODEL_SAVE_FILE)

print(f'training complete. file saved to {FILE}')
print(f'training complete. file saved to {MODEL_SAVE_FILE}')