From 8a1ba3c32f12e3fc4921a65be043d692e14eb586 Mon Sep 17 00:00:00 2001 From: AKmahim Date: Mon, 10 Feb 2025 01:56:33 +0600 Subject: [PATCH] added new program to download all image from a website url --- .gitignore | 4 ++- download-website-image/.gitignore | 1 + download-website-image/app.py | 52 +++++++++++++++++++++++++++++++ 3 files changed, 56 insertions(+), 1 deletion(-) create mode 100644 download-website-image/.gitignore create mode 100644 download-website-image/app.py diff --git a/.gitignore b/.gitignore index 16580d9..95167dd 100644 --- a/.gitignore +++ b/.gitignore @@ -1,4 +1,6 @@ *.csv *.xlsx *.json -*.env \ No newline at end of file +*.env +env + diff --git a/download-website-image/.gitignore b/download-website-image/.gitignore new file mode 100644 index 0000000..c291c09 --- /dev/null +++ b/download-website-image/.gitignore @@ -0,0 +1 @@ +images diff --git a/download-website-image/app.py b/download-website-image/app.py new file mode 100644 index 0000000..0f694c9 --- /dev/null +++ b/download-website-image/app.py @@ -0,0 +1,52 @@ +import os +import requests +from bs4 import BeautifulSoup +from urllib.parse import urljoin, urlparse + +def download_images(url, folder="images"): + # Create folder if it doesn't exist + os.makedirs(folder, exist_ok=True) + + # Get page content + response = requests.get(url) + if response.status_code != 200: + print(f"Failed to retrieve URL: {url}") + return + + # Parse HTML + soup = BeautifulSoup(response.text, "html.parser") + + # Find all image tags + img_tags = soup.find_all("img") + if not img_tags: + print("No images found on the page.") + return + + for img in img_tags: + img_url = img.get("src") + if not img_url: + continue + + # Convert relative URLs to absolute + img_url = urljoin(url, img_url) + + # Get image filename + parsed_url = urlparse(img_url) + img_name = os.path.basename(parsed_url.path) + + if not img_name: + continue + + # Download image + img_data = requests.get(img_url).content + img_path = os.path.join(folder, img_name) + + # Save image + with open(img_path, "wb") as f: + f.write(img_data) + + print(f"Downloaded: {img_name}") + +# Example usage +website_url = "https://themeholy.com/html/agenxe/demo/index.html" # Replace with your target URL +download_images(website_url)