face-attendance project added

AKmahim · Nov 27, 2023 · d65f25f · d65f25f
1 parent 35f4473
commit d65f25f
Show file tree

Hide file tree

Showing 18 changed files with 2,321 additions and 0 deletions.
diff --git a/data.py b/data.py
@@ -0,0 +1,214 @@
+# ========================= convert text data into json data =============
+
+# import json
+
+# # Initialize an empty list to store product dictionaries
+# products = []
+
+# # Read data from the file
+# with open('data.txt', 'r', encoding='utf-8') as file:
+#     data = file.read()
+
+# # Split the data into individual products using double newline as a separator
+# product_data = data.strip().split('\n\n')
+
+# # Process each product data to create a product dictionary
+# for product_info in product_data:
+#     parts = product_info.split('\n')
+#     product = {
+#         "product_name": parts[0],
+#         "product_image": parts[1],
+#         "brand": parts[2],
+#         "types": parts[3],
+#         "price": parts[4],
+#         "short_description": parts[5],
+#         "rating": parts[6]
+#     }
+#     products.append(product)
+
+# # Convert the list of product dictionaries to JSON
+# json_data = json.dumps(products, indent=4, ensure_ascii=False)
+
+# # Save the JSON data to a file
+# with open('symphony.json', 'w', encoding='utf-8') as json_file:
+#     json_file.write(json_data)
+
+# print("JSON data has been successfully created and saved to 'products.json'.")
+
+
+
+
+
+
+
+# ====================== web crawling ================
+# import requests
+# from bs4 import BeautifulSoup
+# import json
+
+# # Initialize an empty list to store the extracted data
+# all_data = []
+
+# # Read the list of URLs from the data.txt file
+# with open('data.txt', 'r') as file:
+#     urls = file.read().splitlines()
+
+# # Iterate through each URL
+# for url in urls:
+#     url = url.strip()  # Remove leading/trailing spaces
+#     if not url:
+#         continue  # Skip empty lines
+
+#     try:
+#         # Send an HTTP GET request to the URL
+#         response = requests.get(url)
+#         response.raise_for_status()  # Check for HTTP request errors
+
+#         # Parse the HTML content of the page using BeautifulSoup
+#         soup = BeautifulSoup(response.text, 'html.parser')
+
+#         # Find all tables on the page
+#         tables = soup.find_all('table')
+
+#         # Check if there are at least two tables on the page
+#         if len(tables) >= 2:
+#             # Get the second table (index 1) and extract its data
+#             second_table = tables[1]
+#             table_data = {}
+#             for row in second_table.find_all('tr'):
+#                 columns = row.find_all('td')
+#                 if len(columns) == 2:
+#                     key = columns[0].get_text().strip()
+#                     value = columns[1].get_text().strip()
+#                     table_data[key] = value
+
+#             # Add the extracted data to the list
+#             all_data.append(table_data)
+#         else:
+#             print(f"There are not enough tables on the page to extract data for {url}")
+#     except requests.exceptions.RequestException as e:
+#         print(f"Failed to retrieve the page for {url}: {str(e)}")
+
+# # Convert the list of data to JSON format
+# json_data = json.dumps(all_data, indent=4, ensure_ascii=False)
+
+# # Save the JSON data to a file
+# with open('details.json', 'w', encoding='utf-8') as json_file:
+#     json_file.write(json_data)
+
+# print("JSON data has been successfully created and saved to 'details.json'.")
+
+
+# ================= web crawling 2 =========================
+# import requests
+# from bs4 import BeautifulSoup
+# import json
+
+# # Function to scrape data from a URL
+# def scrape_data(url):
+#     # Send an HTTP GET request to the URL
+#     response = requests.get(url)
+
+#     # Create a dictionary to store the extracted data
+#     result = {}
+#     base_url = "https://www.mobiledokan.com"
+
+#     # Check if the request was successful
+#     if response.status_code == 200:
+#         # Parse the HTML content of the page
+#         soup = BeautifulSoup(response.text, 'html.parser')
+
+#         # Find all the figure tags with class 'size-full'
+#         figure_tags = soup.find_all('figure', class_=['size-full', 'size-large'])
+
+#         # Extract and store the image sources in the result dictionary
+#         image_sources = []
+#         for figure in figure_tags:
+#             img_tag = figure.find('img')
+#             if img_tag:
+#                 img_src = img_tag['data-src']
+#                 image_sources.append( base_url+img_src)
+
+#         # Find the "Highlights" header
+#         highlights_header = soup.find('h3', text='Highlights')
+
+#         if highlights_header:
+#             # Find the next three <p> tags after the "Highlights" header
+#             p_tags = highlights_header.find_all_next('p', limit=3)
+
+#             # Extract and store the content of the <p> tags in the result dictionary
+#             paragraph_texts = [p_tag.get_text() for p_tag in p_tags]
+
+#         # Store the image sources and details in the result dictionary
+#         result["image1"] = image_sources[0] if len(image_sources) > 0 else ""
+#         result["image2"] = image_sources[1] if len(image_sources) > 1 else ""
+#         result["image3"] = image_sources[2] if len(image_sources) > 2 else ""
+#         result["details"] = " ".join(paragraph_texts)
+
+#         return result
+
+#     else:
+#         print(f"Failed to retrieve the webpage at {url}. Status code: {response.status_code}")
+#         return None
+
+# # Read the list of URLs from the 'data.txt' file
+# with open('data.txt', 'r') as file:
+#     urls = file.read().splitlines()
+
+# # Create a list to store the scraped data
+# scraped_data = []
+
+# # Iterate through the URLs and scrape data
+# for url in urls:
+#     print(f"Scraping data from {url}")
+#     data = scrape_data(url)
+#     if data:
+#         scraped_data.append(data)
+
+# # Save the scraped data to a single JSON file
+# with open('phone2.json', 'w') as json_file:
+#     json.dump(scraped_data, json_file, indent=4)
+
+# print("Scraped data saved to 'phone2.json'")
+
+
+# {
+#     "image1":"https://www.mobiledokan.com/wp-content/uploads/2023/09/Xiaomi-13-Lite.jpg",
+#     "image2":"https://www.mobiledokan.com/wp-content/uploads/2023/09/Xiaomi-13-Lite-blue.webp",
+#     "image3": "https://www.mobiledokan.com/wp-content/uploads/2023/09/Xiaomi-13-Lite-colors.webp",
+#     "details": "Xiaomi 13 Lite is the Lite version of Xiaomi 13. Lite usually means with lower price and configuration. The main highlight of this gadget is its Dual 32+8 MP front camera which is rare these days. The main 32 MP lens captures 100˚ photos which makes it an ultrawide lens. Another great thing is that it charges fully in a maximum of 40 minutes although the battery is 4500 mAh and not a typical 5000 mAh one. The AMOLED display is pleasing for the eyes with 68B colors, Dolby Vision, and HDR10+ features.The 4 nm Snapdragon 7 Gen 1 chipset from Qualcomm is a good performance and gaming chipset at this price point. There is also an optical in-display fingerprint sensor. The 13 Lite from Xiaomi is a 5 G-supported smartphone."
+# }
+
+# ==================================== merge 2 file ===========================
+import json
+
+# Load data from "phone.json" and "phone2.json"
+with open("phone.json", "r") as file1, open("phone2.json", "r") as file2:
+    data1 = json.load(file1)
+    data2 = json.load(file2)
+
+# Merge the data into a single list
+merged_data = []
+
+for entry1, entry2 in zip(data1, data2):
+    # Combine data from both files into a single dictionary
+    merged_entry = {
+        "product_name": entry1["product_name"],
+        "product_image": entry1["product_image"],
+        "brand": entry1["brand"],
+        "types": entry1["types"],
+        "price": entry1["price"],
+        "short_description": entry1["short_description"],
+        "rating": entry1["rating"],
+        "image1": entry2["image1"],
+        "image2": entry2["image2"],
+        "image3": entry2["image3"],
+        "details": entry2["details"]
+    }
+    merged_data.append(merged_entry)
+
+# Save the merged data to "phone3.json"
+with open("phone3.json", "w") as output_file:
+    json.dump(merged_data, output_file, indent=4)
+
+print("Merged data saved to 'phone3.json'")
diff --git a/data.txt b/data.txt
@@ -0,0 +1,24 @@
+https://www.mobiledokan.com/xiaomi/xiaomi-redmi-a2-plus/
+https://www.mobiledokan.com/xiaomi/xiaomi-13-lite/
+https://www.mobiledokan.com/xiaomi/xiaomi-redmi-note-12-pro-5g/
+https://www.mobiledokan.com/xiaomi/xiaomi-redmi-note-12/
+https://www.mobiledokan.com/xiaomi/xiaomi-redmi-12c/
+https://www.mobiledokan.com/samsung/samsung-galaxy-a24/
+https://www.mobiledokan.com/samsung/samsung-galaxy-z-fold5/
+https://www.mobiledokan.com/samsung/samsung-galaxy-a04e/
+https://www.mobiledokan.com/samsung/samsung-galaxy-a14/
+https://www.mobiledokan.com/itel/itel-a05s/
+https://www.mobiledokan.com/itel/itel-a60s/
+https://www.mobiledokan.com/itel/itel-s23/
+https://www.mobiledokan.com/itel/itel-p40/
+https://www.mobiledokan.com/itel/itel-a60/
+https://www.mobiledokan.com/google-phone/google-pixel-fold/
+https://www.mobiledokan.com/google-phone/google-pixel-7-pro/
+https://www.mobiledokan.com/google-phone/google-pixel-7/
+https://www.mobiledokan.com/google-phone/google-pixel-7a/
+https://www.mobiledokan.com/google-phone/google-pixel-6-pro/
+https://www.mobiledokan.com/symphony/symphony-atom-4/
+https://www.mobiledokan.com/symphony/symphony-helio-80/
+https://www.mobiledokan.com/symphony/symphony-z60-plus/
+https://www.mobiledokan.com/symphony/symphony-innova-10/
+https://www.mobiledokan.com/symphony/symphony-z60/