Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
52 changes: 42 additions & 10 deletions src/modules/app.py
Original file line number Diff line number Diff line change
Expand Up @@ -66,20 +66,21 @@ def landingpage():


@app.route("/search", methods=["POST", "GET"])
def product_search(new_product="", sort=None, currency=None, num=None, filter_by_rating=None, csv=None):
def product_search(new_product="", sort=None, currency=None, num=None, filter_by_rating=None, csv=None,websites=None):
product = request.args.get("product_name")
if product is None:
product = new_product

data = driver(product, currency, num, 0, None, None, True, sort, filter_by_rating)
data = driver(product, currency, num, 0, None, None, True, sort, filter_by_rating,websites)

return render_template("./webapp/static/result.html", data=data, prod=product, currency=currency, sort=sort, num=num, user_login=current_user.is_authenticated)
return render_template("./webapp/static/result.html", data=data, prod=product, currency=currency, sort=sort, num=num, user_login=current_user.is_authenticated,websites=websites)


@app.route("/filter", methods=["POST", "GET"])
def product_search_filtered():
print("inside filtered search")
product = request.args.get("product_name")

print("inside filtered search")
if "add-to-wishlist" in request.form:
wishlist_product=Wishlist(user_id=current_user.id,
product_title=request.form["title"],
Expand All @@ -89,13 +90,17 @@ def product_search_filtered():
product_rating=request.form["rating"])
db.session.add(wishlist_product)
db.session.commit()
return product_search(product, None, None, None, None, None)

return product_search(product, None, None, None, None, None, None)
print("inside filtered search")
sort = request.form["sort"]
currency = request.form["currency"]
num = request.form["num"]
filter_by_rating = request.form["filter-by-rating"]

websites=[]



if sort == "default":
sort = None
if currency == "usd":
Expand All @@ -104,13 +109,40 @@ def product_search_filtered():
num = None
if filter_by_rating == "default":
filter_by_rating = None

if "filter-search" in request.form:
return product_search(product, sort, currency, num, filter_by_rating, None)
print("Filter Search Detected and Websites found")
amazon=-1
walmart=-1
etsy=-1
bj=-1
google=-1
print(request.form)
amazon=request.form.get("amazon")
print(amazon)
etsy=request.form.get("etsy")
print(etsy)
walmart=request.form.get("walmart")
print(walmart)
bj=request.form.get("bj")
print(bj)
google=request.form.get("google")
print(google)
if amazon!=-1:
websites.append(amazon)
if walmart!=-1:
websites.append(walmart)
if google!=-1:
websites.append(google)
if bj!=-1:
websites.append(bj)
if etsy!=-1:
websites.append(etsy)
return product_search(product, sort, currency, num, filter_by_rating, None,websites)

elif "convert-to-csv" in request.form:

data = driver(product, currency, num, 0, None, None, True, sort, filter_by_rating)
data = driver(product, currency, num, 0, None, None, True, sort, filter_by_rating, websites)

file_name = write_csv(data, product, "./src/modules/csvs")

Expand All @@ -119,7 +151,7 @@ def product_search_filtered():

elif "convert-to-pdf" in request.form:
now = datetime.now()
data = driver(product, currency, num, 0, None, None, True, sort, filter_by_rating)
data = driver(product, currency, num, 0, None, None, True, sort, filter_by_rating,websites)
html_table = render_template("./webapp/static/pdf_maker.html", data=data, prod=product)
file_name = product + now.strftime("%m%d%y_%H%M") + '.pdf'

Expand Down
58 changes: 42 additions & 16 deletions src/modules/scraper.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,13 +26,14 @@ def httpsGet(URL):
"""

headers = {
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/78.0.3904.108 Safari/537.36",
"Accept-Encoding": "gzip, deflate",
"Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8",
"DNT": "1",
"Connection": "close",
"Upgrade-Insecure-Requests": "1",
}
"User-Agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/119.0.0.0 Safari/537.36",
"Accept-Encoding": "gzip, deflate",
"Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8",
"DNT": "1",
"Connection": "close",
"Upgrade-Insecure-Requests": "1",
}

page = requests.get(URL, headers=headers)
soup1 = BeautifulSoup(page.content, "html.parser")
return BeautifulSoup(soup1.prettify(), "html.parser")
Expand All @@ -55,8 +56,17 @@ def searchAmazon(query, df_flag, currency):
res.select("span.a-price span"),
res.select("h2 a.a-link-normal"),
)
# print("....Res.....")
# print(res)
image = res.find("img", {"src": True})

if image :
image_url = image.get("src").strip()
else :
image_url = ""
ratings = res.select("span.a-icon-alt")
num_ratings = res.select("span.a-size-base")

trending = res.select("span.a-badge-text")
if len(trending) > 0:
trending = trending[0]
Expand All @@ -72,10 +82,12 @@ def searchAmazon(query, df_flag, currency):
trending,
df_flag,
currency,
image_url
)
products.append(product)
return products
print(f"Amazon is {len(products)}")

return products

def searchWalmart(query, df_flag, currency):
"""
Expand All @@ -84,10 +96,9 @@ def searchWalmart(query, df_flag, currency):
Returns a list of items available on walmart.com that match the product entered by the user
"""
query = formatSearchQuery(query)
URL = f"https://www.walmart.com/search?q={query}"
URL = f"https://www.walmart.com/s?query={query}"
page = httpsGet(URL)
results = page.findAll("div", {"data-item-id": True})
# print(results)
products = []
pattern = re.compile(r"out of 5 Stars")
for res in results:
Expand All @@ -102,7 +113,7 @@ def searchWalmart(query, df_flag, currency):
if len(trending) > 0:
trending = trending[0]
else:
trending = None
trending = None
product = formatResult(
"walmart",
titles,
Expand All @@ -115,6 +126,7 @@ def searchWalmart(query, df_flag, currency):
currency,
)
products.append(product)
print(f"Walmart is {len(products)}")
return products


Expand All @@ -128,8 +140,14 @@ def searchEtsy(query, df_flag, currency):
url = f"https://www.etsy.com/search?q={query}"
products = []
headers = {
"User-Agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_11_2) AppleWebKit/601.3.9 (KHTML, like Gecko) Version/9.0.2 Safari/601.3.9"
}
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/119.0.6045.123 Safari/537.36",
#"User-Agent":"Adsbot-Google",
"Accept-Encoding": "gzip, deflate",
"Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8",
"DNT": "1",
"Connection": "close",
"Upgrade-Insecure-Requests": "1",
}
response = requests.get(url, headers=headers)
soup = BeautifulSoup(response.content, "lxml")
for item in soup.select(".wt-grid__item-xs-6"):
Expand Down Expand Up @@ -158,6 +176,8 @@ def searchEtsy(query, df_flag, currency):
currency,
)
products.append(product)
print(f"Etsy is {len(products)}")

return products


Expand All @@ -184,7 +204,7 @@ def searchGoogleShopping(query, df_flag, currency):
image_url = image.get("data-image-src").strip()
else :
image_url = ""
print(image_url)

ratings = res.findAll("span", {"class": "Rsc7Yb"})
try:
num_ratings = pattern.findall(str(res.findAll("span")[1]))[0].replace(
Expand All @@ -210,6 +230,8 @@ def searchGoogleShopping(query, df_flag, currency):
image_url
)
products.append(product)
print(f"Google is {len(products)}")

return products


Expand All @@ -223,7 +245,7 @@ def searchBJs(query, df_flag, currency):
URL = f"https://www.bjs.com/search/{query}"
page = httpsGet(URL)
results = page.findAll("div", {"class": "product"})
# print(results)
#print(page)
products = []
for res in results:
titles, prices, links = (
Expand All @@ -244,6 +266,7 @@ def searchBJs(query, df_flag, currency):
if len(ratings) != 0:
product["rating"] = len(ratings)
products.append(product)
print(f"BJs is {len(products)}")
return products


Expand All @@ -258,7 +281,7 @@ def condense_helper(result_condensed, list, num):


def driver(
product, currency, num=None, df_flag=0, csv=None, cd=None, ui=False, sort=None, filter_by_rating=None
product, currency, num=None, df_flag=0, csv=None, cd=None, ui=False, sort=None, filter_by_rating=None, websites=None
):
"""Returns csv is the user enters the --csv arg,
else will display the result table in the terminal based on the args entered by the user"""
Expand Down Expand Up @@ -339,4 +362,7 @@ def driver(
file_name, index=False, header=results.columns
)
print(result_condensed)
if websites != None:
print("HERE HERE HERE")
print(type(websites))
return result_condensed
Loading