|
| 1 | +import sys |
| 2 | +import os |
| 3 | +import requests |
| 4 | +import json |
| 5 | +import time |
| 6 | +from bs4 import BeautifulSoup |
| 7 | +import os |
| 8 | +from concurrent.futures import ThreadPoolExecutor |
| 9 | +from multiprocessing import Pool, cpu_count |
| 10 | +from dotenv import load_dotenv |
| 11 | +import logging |
| 12 | + |
| 13 | +sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) |
| 14 | +from config.log import * |
| 15 | +from config.mysql import * |
| 16 | +from models.product import save_product_info |
| 17 | +from config.file import read_product_numbers |
| 18 | +from models.shop_type import ShopType |
| 19 | + |
| 20 | +# 지그재그 상품 기본 URL |
| 21 | +USER_AGENT = os.getenv("USER_AGENT") |
| 22 | +ZIGZAG_PRODUCT_URL = os.getenv("ZIGZAG_PRODUCT_URL") |
| 23 | +ZIGZAG_PRODUCTS_FILE_PATH = os.getenv("ZIGZAG_PRODUCTS_FILE_PATH") |
| 24 | + |
| 25 | +load_dotenv() # 환경변수 로딩 |
| 26 | + |
| 27 | +# 리뷰 수 & 평점 |
| 28 | +def get_review_summary(catalog_product_id, headers, limit_count=5, order="BEST_SCORE_DESC"): |
| 29 | + |
| 30 | + # catalog_product_id (str): 상품 ID |
| 31 | + # limit_count (int): 가져올 리뷰 제한 수 (기본값: 5) |
| 32 | + # order (str): 리뷰 정렬 기준 (기본값: "BEST_SCORE_DESC") |
| 33 | + |
| 34 | + url = "https://api.zigzag.kr/api/2/graphql/GetPdpIntegratedData" |
| 35 | + |
| 36 | + payload = { |
| 37 | + "operationName": "GetPdpIntegratedData", |
| 38 | + "variables": { |
| 39 | + "catalog_product_id": catalog_product_id, |
| 40 | + "limit_count": limit_count, |
| 41 | + "order": order |
| 42 | + }, |
| 43 | + "query": """ |
| 44 | + query GetPdpIntegratedData( |
| 45 | + $catalog_product_id: ID!, |
| 46 | + $limit_count: Int, |
| 47 | + $order: ProductReviewListOrderType |
| 48 | + ) { |
| 49 | + related_product_review_summary(product_id: $catalog_product_id) { |
| 50 | + all_count |
| 51 | + ratings_average |
| 52 | + } |
| 53 | + product_review_list( |
| 54 | + product_id: $catalog_product_id, |
| 55 | + limit_count: $limit_count, |
| 56 | + order: $order |
| 57 | + ) { |
| 58 | + item_list { |
| 59 | + id |
| 60 | + contents |
| 61 | + rating |
| 62 | + reviewer { |
| 63 | + profile { |
| 64 | + masked_email |
| 65 | + } |
| 66 | + } |
| 67 | + } |
| 68 | + } |
| 69 | + } |
| 70 | + """ |
| 71 | + } |
| 72 | + |
| 73 | + try: |
| 74 | + response = requests.post(url, json=payload, headers=headers) |
| 75 | + response.raise_for_status() |
| 76 | + |
| 77 | + data = response.json() |
| 78 | + |
| 79 | + review_summary = data.get("data", {}).get("related_product_review_summary", {}) |
| 80 | + review_count = review_summary.get("all_count", 0) |
| 81 | + star_score = review_summary.get("ratings_average", 0.0) |
| 82 | + |
| 83 | + return { |
| 84 | + "review_count": review_count, |
| 85 | + "star_score": star_score |
| 86 | + } |
| 87 | + except requests.exceptions.RequestException as e: |
| 88 | + logging.error(f"HTTP 리뷰 수 요청 에러: {e}") |
| 89 | + except Exception as e: |
| 90 | + logging.error(f"리뷰 수 요청 오류 발생: {e}") |
| 91 | + return None |
| 92 | + |
| 93 | + |
| 94 | + |
| 95 | +def extract_product_info(json_data, product_num, product_url): |
| 96 | + return { |
| 97 | + 'name': json_data.get('goodsNm', 'N/A'), |
| 98 | + 'brand': json_data.get('brandInfo', {}).get('brandName', 'N/A'), |
| 99 | + 'parent_category': json_data.get('category', {}).get('categoryDepth1Title', 'N/A'), |
| 100 | + 'category': json_data.get('category', {}).get('categoryDepth2Title', 'N/A'), |
| 101 | + 'product_num': product_num, |
| 102 | + 'current_price': json_data.get('goodsPrice', {}).get('salePrice', 'N/A'), |
| 103 | + 'image_url': json_data.get('thumbnailImageUrl', 'N/A'), |
| 104 | + 'star_score': json_data.get('goodsReview', {}).get('satisfactionScore', 'N/A'), |
| 105 | + 'review_count': json_data.get('goodsReview', {}).get('totalCount', 'N/A'), |
| 106 | + 'product_url': product_url, |
| 107 | + 'brand_logo_url': json_data.get('brandInfo', {}).get('brandLogoImage', 'N/A'), |
| 108 | + 'like_count': 0, # 현재 like_count는 가상 데이터 |
| 109 | + } |
| 110 | + |
| 111 | +def parsing_product_to_json_data(product_num, session, headers, product_url): |
| 112 | + response = session.get(product_url, headers=headers, timeout=5) |
| 113 | + response.raise_for_status() |
| 114 | + soup = BeautifulSoup(response.content, 'lxml') |
| 115 | + |
| 116 | + |
| 117 | + script_data = soup.find('script', {'id': '__NEXT_DATA__'}) |
| 118 | + if not script_data: |
| 119 | + logging.warning(f'상품 정보를 찾을 수 없습니다. 상품 번호: {product_num}') |
| 120 | + return None |
| 121 | + |
| 122 | + json_data = json.loads(script_data.string) |
| 123 | + return json_data |
| 124 | + |
| 125 | +def extract_zigzag_product_main_info(product_num, session, headers): |
| 126 | + product_url = f'{ZIGZAG_PRODUCT_URL}/{product_num}' |
| 127 | + |
| 128 | + try: |
| 129 | + json_data = parsing_product_to_json_data(product_num, session, headers, product_url) |
| 130 | + |
| 131 | + product_data = json_data.get('props', {}).get('pageProps', {}).get('product', {}) |
| 132 | + brand_data = json_data.get('props', {}).get('pageProps', {}).get('shop', {}) |
| 133 | + review_summary = get_review_summary(product_num, headers) |
| 134 | + |
| 135 | + # 카테고리 정보 추출 |
| 136 | + managed_categories = product_data.get('managed_category_list', []) |
| 137 | + category_datas = [cat['value'] for cat in managed_categories] |
| 138 | + |
| 139 | + if not product_data: |
| 140 | + logging.warning(f'상품 데이터를 찾을 수 없습다. 상품 번호: {product_num}') |
| 141 | + return None |
| 142 | + |
| 143 | + return { |
| 144 | + 'name': product_data.get('name', 'N/A'), |
| 145 | + 'brand': brand_data.get('name','N/A'), |
| 146 | + 'parent_category': category_datas[2], |
| 147 | + 'category': category_datas[3], |
| 148 | + 'product_num': product_num, |
| 149 | + 'product_url' : product_url, |
| 150 | + 'current_price': product_data.get('product_price', {}).get('final_discount_info', {}).get('discount_price', 'N/A'), |
| 151 | + 'image_url': product_data.get('product_image_list', [{}])[0].get('url', 'N/A'), |
| 152 | + 'star_score': review_summary['star_score'], |
| 153 | + 'review_count': review_summary['review_count'], |
| 154 | + 'brand_logo_url':brand_data.get('typical_image_url', 'N/A'), |
| 155 | + 'like_count': 0, |
| 156 | + } |
| 157 | + |
| 158 | + except json.JSONDecodeError as e: |
| 159 | + logging.error(f'JSON 파싱 오류: {product_num}, 오류: {e}') |
| 160 | + return None |
| 161 | + |
| 162 | +def fetch_product_info_multithread(products_num, headers): |
| 163 | + products_info = [] |
| 164 | + with requests.Session() as session: |
| 165 | + with ThreadPoolExecutor(max_workers=cpu_count()) as executor: |
| 166 | + futures = [executor.submit(extract_zigzag_product_main_info, product_num, session, headers) for product_num in products_num] |
| 167 | + for future in futures: |
| 168 | + product_info = future.result() |
| 169 | + if product_info: |
| 170 | + products_info.append(product_info) |
| 171 | + return products_info |
| 172 | + |
| 173 | +def print_product_main_data(products_info): |
| 174 | + for product_info in products_info: |
| 175 | + print(f'상품 번호: {product_info["product_num"]}') |
| 176 | + print(f'상품 이름: {product_info["name"]}') |
| 177 | + print(f'브랜드: {product_info["brand"]}') |
| 178 | + print(f'상위 카테고리: {product_info["parent_category"]}') |
| 179 | + print(f'카테고리: {product_info["category"]}') |
| 180 | + print(f'상품 판매가: {product_info["current_price"]}') |
| 181 | + print(f'상품 URL: {product_info["product_url"]}') |
| 182 | + print(f'상품 이미지 URL: {product_info["image_url"]}') |
| 183 | + print(f'좋아요 수: {product_info["like_count"]}') |
| 184 | + print(f'별점: {product_info["star_score"]}') |
| 185 | + print(f'리뷰 수: {product_info["review_count"]}') |
| 186 | + print(f'로고 URL: {product_info["brand_logo_url"]}') |
| 187 | + print("---------------------------------------") |
| 188 | + |
| 189 | +def get_zigzag_product_info(): |
| 190 | + products_num = read_product_numbers(ZIGZAG_PRODUCTS_FILE_PATH) |
| 191 | + |
| 192 | + headers = { |
| 193 | + 'User-Agent': USER_AGENT, |
| 194 | + "Connection": "close" |
| 195 | + } |
| 196 | + |
| 197 | + start_time = time.time() |
| 198 | + products_info = fetch_product_info_multithread(products_num, headers) |
| 199 | + end_time = time.time() |
| 200 | + |
| 201 | + logging.info(f'총 실행 시간: {end_time - start_time:.2f}초') |
| 202 | + print_product_main_data(products_info) |
| 203 | + |
| 204 | + # DB에 저장 |
| 205 | + save_product_info(products_info, ShopType.ZIGZAG) |
| 206 | + |
| 207 | + |
| 208 | +if __name__ == "__main__": |
| 209 | + get_zigzag_product_info() |
| 210 | + |
0 commit comments