diff --git a/src/modules/app.py b/src/modules/app.py index e578e66a..2c7e1884 100644 --- a/src/modules/app.py +++ b/src/modules/app.py @@ -66,20 +66,21 @@ def landingpage(): @app.route("/search", methods=["POST", "GET"]) -def product_search(new_product="", sort=None, currency=None, num=None, filter_by_rating=None, csv=None): +def product_search(new_product="", sort=None, currency=None, num=None, filter_by_rating=None, csv=None,websites=None): product = request.args.get("product_name") if product is None: product = new_product - data = driver(product, currency, num, 0, None, None, True, sort, filter_by_rating) + data = driver(product, currency, num, 0, None, None, True, sort, filter_by_rating,websites) - return render_template("./webapp/static/result.html", data=data, prod=product, currency=currency, sort=sort, num=num, user_login=current_user.is_authenticated) + return render_template("./webapp/static/result.html", data=data, prod=product, currency=currency, sort=sort, num=num, user_login=current_user.is_authenticated,websites=websites) @app.route("/filter", methods=["POST", "GET"]) def product_search_filtered(): + print("inside filtered search") product = request.args.get("product_name") - + print("inside filtered search") if "add-to-wishlist" in request.form: wishlist_product=Wishlist(user_id=current_user.id, product_title=request.form["title"], @@ -89,13 +90,17 @@ def product_search_filtered(): product_rating=request.form["rating"]) db.session.add(wishlist_product) db.session.commit() - return product_search(product, None, None, None, None, None) - + return product_search(product, None, None, None, None, None, None) + print("inside filtered search") sort = request.form["sort"] currency = request.form["currency"] num = request.form["num"] filter_by_rating = request.form["filter-by-rating"] + websites=[] + + + if sort == "default": sort = None if currency == "usd": @@ -104,13 +109,40 @@ def product_search_filtered(): num = None if filter_by_rating == "default": filter_by_rating = None - + if "filter-search" in request.form: - return product_search(product, sort, currency, num, filter_by_rating, None) + print("Filter Search Detected and Websites found") + amazon=-1 + walmart=-1 + etsy=-1 + bj=-1 + google=-1 + print(request.form) + amazon=request.form.get("amazon") + print(amazon) + etsy=request.form.get("etsy") + print(etsy) + walmart=request.form.get("walmart") + print(walmart) + bj=request.form.get("bj") + print(bj) + google=request.form.get("google") + print(google) + if amazon!=-1: + websites.append(amazon) + if walmart!=-1: + websites.append(walmart) + if google!=-1: + websites.append(google) + if bj!=-1: + websites.append(bj) + if etsy!=-1: + websites.append(etsy) + return product_search(product, sort, currency, num, filter_by_rating, None,websites) elif "convert-to-csv" in request.form: - data = driver(product, currency, num, 0, None, None, True, sort, filter_by_rating) + data = driver(product, currency, num, 0, None, None, True, sort, filter_by_rating, websites) file_name = write_csv(data, product, "./src/modules/csvs") @@ -119,7 +151,7 @@ def product_search_filtered(): elif "convert-to-pdf" in request.form: now = datetime.now() - data = driver(product, currency, num, 0, None, None, True, sort, filter_by_rating) + data = driver(product, currency, num, 0, None, None, True, sort, filter_by_rating,websites) html_table = render_template("./webapp/static/pdf_maker.html", data=data, prod=product) file_name = product + now.strftime("%m%d%y_%H%M") + '.pdf' diff --git a/src/modules/scraper.py b/src/modules/scraper.py index 83a9cd5e..a8565669 100644 --- a/src/modules/scraper.py +++ b/src/modules/scraper.py @@ -26,13 +26,14 @@ def httpsGet(URL): """ headers = { - "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/78.0.3904.108 Safari/537.36", - "Accept-Encoding": "gzip, deflate", - "Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8", - "DNT": "1", - "Connection": "close", - "Upgrade-Insecure-Requests": "1", - } + "User-Agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/119.0.0.0 Safari/537.36", + "Accept-Encoding": "gzip, deflate", + "Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8", + "DNT": "1", + "Connection": "close", + "Upgrade-Insecure-Requests": "1", +} + page = requests.get(URL, headers=headers) soup1 = BeautifulSoup(page.content, "html.parser") return BeautifulSoup(soup1.prettify(), "html.parser") @@ -55,8 +56,17 @@ def searchAmazon(query, df_flag, currency): res.select("span.a-price span"), res.select("h2 a.a-link-normal"), ) + # print("....Res.....") + # print(res) + image = res.find("img", {"src": True}) + + if image : + image_url = image.get("src").strip() + else : + image_url = "" ratings = res.select("span.a-icon-alt") num_ratings = res.select("span.a-size-base") + trending = res.select("span.a-badge-text") if len(trending) > 0: trending = trending[0] @@ -72,10 +82,12 @@ def searchAmazon(query, df_flag, currency): trending, df_flag, currency, + image_url ) products.append(product) - return products + print(f"Amazon is {len(products)}") + return products def searchWalmart(query, df_flag, currency): """ @@ -84,10 +96,9 @@ def searchWalmart(query, df_flag, currency): Returns a list of items available on walmart.com that match the product entered by the user """ query = formatSearchQuery(query) - URL = f"https://www.walmart.com/search?q={query}" + URL = f"https://www.walmart.com/s?query={query}" page = httpsGet(URL) results = page.findAll("div", {"data-item-id": True}) - # print(results) products = [] pattern = re.compile(r"out of 5 Stars") for res in results: @@ -102,7 +113,7 @@ def searchWalmart(query, df_flag, currency): if len(trending) > 0: trending = trending[0] else: - trending = None + trending = None product = formatResult( "walmart", titles, @@ -115,6 +126,7 @@ def searchWalmart(query, df_flag, currency): currency, ) products.append(product) + print(f"Walmart is {len(products)}") return products @@ -128,8 +140,14 @@ def searchEtsy(query, df_flag, currency): url = f"https://www.etsy.com/search?q={query}" products = [] headers = { - "User-Agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_11_2) AppleWebKit/601.3.9 (KHTML, like Gecko) Version/9.0.2 Safari/601.3.9" - } + "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/119.0.6045.123 Safari/537.36", + #"User-Agent":"Adsbot-Google", + "Accept-Encoding": "gzip, deflate", + "Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8", + "DNT": "1", + "Connection": "close", + "Upgrade-Insecure-Requests": "1", +} response = requests.get(url, headers=headers) soup = BeautifulSoup(response.content, "lxml") for item in soup.select(".wt-grid__item-xs-6"): @@ -158,6 +176,8 @@ def searchEtsy(query, df_flag, currency): currency, ) products.append(product) + print(f"Etsy is {len(products)}") + return products @@ -184,7 +204,7 @@ def searchGoogleShopping(query, df_flag, currency): image_url = image.get("data-image-src").strip() else : image_url = "" - print(image_url) + ratings = res.findAll("span", {"class": "Rsc7Yb"}) try: num_ratings = pattern.findall(str(res.findAll("span")[1]))[0].replace( @@ -210,6 +230,8 @@ def searchGoogleShopping(query, df_flag, currency): image_url ) products.append(product) + print(f"Google is {len(products)}") + return products @@ -223,7 +245,7 @@ def searchBJs(query, df_flag, currency): URL = f"https://www.bjs.com/search/{query}" page = httpsGet(URL) results = page.findAll("div", {"class": "product"}) - # print(results) + #print(page) products = [] for res in results: titles, prices, links = ( @@ -244,6 +266,7 @@ def searchBJs(query, df_flag, currency): if len(ratings) != 0: product["rating"] = len(ratings) products.append(product) + print(f"BJs is {len(products)}") return products @@ -258,7 +281,7 @@ def condense_helper(result_condensed, list, num): def driver( - product, currency, num=None, df_flag=0, csv=None, cd=None, ui=False, sort=None, filter_by_rating=None + product, currency, num=None, df_flag=0, csv=None, cd=None, ui=False, sort=None, filter_by_rating=None, websites=None ): """Returns csv is the user enters the --csv arg, else will display the result table in the terminal based on the args entered by the user""" @@ -339,4 +362,7 @@ def driver( file_name, index=False, header=results.columns ) print(result_condensed) + if websites != None: + print("HERE HERE HERE") + print(type(websites)) return result_condensed diff --git a/src/modules/webapp/static/result.html b/src/modules/webapp/static/result.html index 19704baa..693eef1c 100644 --- a/src/modules/webapp/static/result.html +++ b/src/modules/webapp/static/result.html @@ -1,235 +1,319 @@ -
- -