Skip to content
This repository has been archived by the owner on Jan 18, 2025. It is now read-only.

Commit

Permalink
fix: use css selector
Browse files Browse the repository at this point in the history
  • Loading branch information
KevinNitroG committed Jan 14, 2025
1 parent 8a2adc4 commit 6c7a627
Show file tree
Hide file tree
Showing 2 changed files with 58 additions and 47 deletions.
2 changes: 1 addition & 1 deletion src/check_phat_nguoi/constants/get_data.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@
GET_DATA_API_URL_CHECKPHATNGUOI: LiteralString = (
"https://api.checkphatnguoi.vn/phatnguoi"
)
API_URL_PHATNGUOI: LiteralString = "https://api.phatnguoi.vn/web/tra-cuu/"
API_URL_PHATNGUOI: LiteralString = "https://api.phatnguoi.vn/web/tra-cuu"
API_URL_CSGT_CAPTCHA: LiteralString = (
"https://www.csgt.vn/lib/captcha/captcha.class.php"
)
Expand Down
103 changes: 57 additions & 46 deletions src/check_phat_nguoi/get_data/engines/phat_nguoi.py
Original file line number Diff line number Diff line change
@@ -1,19 +1,16 @@
from __future__ import annotations

import re
from asyncio import TimeoutError
from logging import getLogger
from re import DOTALL
from typing import override

from aiohttp import ClientError
from bs4 import BeautifulSoup, ResultSet
from bs4 import BeautifulSoup, ResultSet, Tag

from check_phat_nguoi.config import PlateInfo
from check_phat_nguoi.constants import API_URL_PHATNGUOI
from check_phat_nguoi.context import PlateDetail
from check_phat_nguoi.context.plates import ViolationDetail
from check_phat_nguoi.types import ApiEnum, VehicleTypeEnum, get_vehicle_enum
from check_phat_nguoi.constants import API_URL_PHATNGUOI as API_URL
from check_phat_nguoi.context import PlateDetail, ViolationDetail
from check_phat_nguoi.types import ApiEnum, get_vehicle_enum
from check_phat_nguoi.utils import HttpaioSession

from .base import BaseGetDataEngine
Expand All @@ -28,56 +25,72 @@ def __init__(self) -> None:
HttpaioSession.__init__(self)

@staticmethod
def get_violations(html: str):
soup = BeautifulSoup(html, "html.parser")
violation_htmls: ResultSet[BeautifulSoup] = soup.find_all("tbody")
if violation_htmls is None:
def get_violations(html: str) -> tuple[ViolationDetail, ...] | None:
soup: BeautifulSoup = BeautifulSoup(html, "html.parser")
if not soup.css:
return
violation_htmls: ResultSet[Tag] | None = soup.css.select("tbody")
if not violation_htmls:
return
violation_detail_set: set[ViolationDetail] = set()

def _get_violation(violation_html: BeautifulSoup):
details: ResultSet[BeautifulSoup] = violation_html.find_all("tr")
color: ResultSet[BeautifulSoup] = details[1].find_all()
color_detail: str = color[1].text
location: ResultSet[BeautifulSoup] = details[4].find_all()
location_detail: str = location[1].text.strip()
action: ResultSet[BeautifulSoup] = details[5].find_all()
action_detail: str = action[1].text.strip()
status: ResultSet[BeautifulSoup] = details[6].find_all()
status_detail: bool = (
True if status[1].text.strip() == "ĐÃ XỬ PHẠT" else False
def _get_violation(violation_html: Tag) -> None:
# FIXME: Use CSS Selector
(
color_tag.text.strip()
if (
color_tag := violation_html.select_one(
"tr:nth-child(2) > td:nth-child(2)"
)
)
else None
)
enforcement_unit: ResultSet[BeautifulSoup] = details[7].find_all()
enforcement_unit_detail: str = enforcement_unit[1].text.strip()
resolution_offices: ResultSet[BeautifulSoup] = details[8].find_all()
resolution_office_details: str = resolution_offices[1].text.strip()
# TODO: Split resolution_office as other api
violation_detail_set.add(
ViolationDetail(
color=color_detail,
location=location_detail,
violation=action_detail,
status=status_detail,
enforcement_unit=enforcement_unit_detail,
resolution_offices_details=tuple(
re.findall(
r"\d\..*?(?=(?:\d\.|$))", resolution_office_details, DOTALL
)
),
(
location_tag.text.strip()
if (
location_tag := violation_html.select_one(
"tr:nth-child(3) > td:nth-child(2)"
)
)
else None
)
# location: ResultSet[BeautifulSoup] = details[4].find_all()
# location_detail: str = location[1].text.strip()
# action: ResultSet[BeautifulSoup] = details[5].find_all()
# action_detail: str = action[1].text.strip()
# status: ResultSet[BeautifulSoup] = details[6].find_all()
# status_detail: bool = (
# True if status[1].text.strip() == "ĐÃ XỬ PHẠT" else False
# )
# enforcement_unit: ResultSet[BeautifulSoup] = details[7].find_all()
# enforcement_unit_detail: str = enforcement_unit[1].text.strip()
# resolution_offices: ResultSet[BeautifulSoup] = details[8].find_all()
# resolution_office_details: str = resolution_offices[1].text.strip()
# # TODO: Split resolution_office as other api
# violation_detail_set.add(
# ViolationDetail(
# color=color,
# location=location_detail,
# violation=action_detail,
# status=status_detail,
# enforcement_unit=enforcement_unit_detail,
# resolution_offices_details=tuple(
# re.findall(
# r"\d\..*?(?=(?:\d\.|$))", resolution_office_details, DOTALL
# )
# ),
# )
# )

for violation_html in violation_htmls:
_get_violation(violation_html)
return tuple(violation_detail_set)

async def _request(self, plate_info: PlateInfo) -> str | None:
url: str = (
f"{API_URL_PHATNGUOI}{plate_info.plate}/{get_vehicle_enum(plate_info.type)}"
)
url: str = f"{API_URL}/{plate_info.plate}/{get_vehicle_enum(plate_info.type)}"
try:
async with self._session.get(url=url) as response:
html = await response.text()
html: str = await response.text()
return html
except TimeoutError as e:
logger.error(
Expand All @@ -97,12 +110,10 @@ async def get_data(self, plate_info: PlateInfo) -> PlateDetail | None:
html: str | None = await self._request(plate_info)
if not html:
return

type: VehicleTypeEnum = get_vehicle_enum(plate_info.type)
return PlateDetail(
plate=plate_info.plate,
owner=plate_info.owner,
type=type,
type=get_vehicle_enum(plate_info.type),
violations=self.get_violations(html),
)

Expand Down

0 comments on commit 6c7a627

Please sign in to comment.