Skip to content
This repository has been archived by the owner on Jan 18, 2025. It is now read-only.

Commit

Permalink
feat: get data from phatnguoi.vn
Browse files Browse the repository at this point in the history
  • Loading branch information
NTGNguyen committed Jan 14, 2025
1 parent b4fe3cb commit 10ff28b
Show file tree
Hide file tree
Showing 3 changed files with 119 additions and 0 deletions.
2 changes: 2 additions & 0 deletions src/check_phat_nguoi/constants/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
from .get_data import (
API_URL_CSGT_CAPTCHA,
API_URL_CSGT_QUERY,
API_URL_PHATNGUOI,
DATETIME_FORMAT_CHECKPHATNGUOI,
GET_DATA_API_URL_CHECKPHATNGUOI,
OFFICE_NAME_PATTERN,
Expand All @@ -24,4 +25,5 @@
"SEND_MESSAGE_API_URL_TELEGRAM",
"MESSAGE_MARKDOWN_PATTERN",
"RESOLUTION_LOCATION_MARKDOWN_PATTERN",
"API_URL_PHATNGUOI",
]
113 changes: 113 additions & 0 deletions src/check_phat_nguoi/get_data/engines/phat_nguoi.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,113 @@
from __future__ import annotations

import re
from asyncio import TimeoutError
from logging import getLogger
from re import DOTALL
from typing import override

from aiohttp import ClientError
from bs4 import BeautifulSoup, ResultSet

from check_phat_nguoi.config import PlateInfo
from check_phat_nguoi.constants import API_URL_PHATNGUOI
from check_phat_nguoi.context import PlateDetail
from check_phat_nguoi.context.plates import ViolationDetail
from check_phat_nguoi.types import ApiEnum, VehicleTypeEnum, get_vehicle_enum
from check_phat_nguoi.utils import HttpaioSession

from .base import BaseGetDataEngine

logger = getLogger(__name__)


class PhatNguoiGetDataEngine(HttpaioSession, BaseGetDataEngine):
api: ApiEnum = ApiEnum.phatnguoi_vn

def __init__(self) -> None:
HttpaioSession.__init__(self)

@staticmethod
def get_violations(html: str):
soup = BeautifulSoup(html, "html.parser")
violation_htmls: ResultSet[BeautifulSoup] = soup.find_all("tbody")
if violation_htmls == []:
return
violation_detail_set: set[ViolationDetail] = set()

def _get_violation(violation_html: BeautifulSoup):
details: ResultSet[BeautifulSoup] = violation_html.find_all("tr")
color: ResultSet[BeautifulSoup] = details[1].find_all()
color_detail: str = color[1].text
location: ResultSet[BeautifulSoup] = details[4].find_all()
location_detail: str = location[1].text.strip()
action: ResultSet[BeautifulSoup] = details[5].find_all()
action_detail: str = action[1].text.strip()
status: ResultSet[BeautifulSoup] = details[6].find_all()
status_detail: bool = (
True if status[1].text.strip() == "ĐÃ XỬ PHẠT" else False
)
enforcement_unit: ResultSet[BeautifulSoup] = details[7].find_all()
enforcement_unit_detail: str = enforcement_unit[1].text.strip()
resolution_offices: ResultSet[BeautifulSoup] = details[8].find_all()
resolution_office_details: str = resolution_offices[1].text.strip()
# TODO: Split resolution_office as other api
violation_detail_set.add(
ViolationDetail(
color=color_detail,
location=location_detail,
violation=action_detail,
status=status_detail,
enforcement_unit=enforcement_unit_detail,
resolution_offices_details=tuple(
re.findall(
r"\d\..*?(?=(?:\d\.|$))", resolution_office_details, DOTALL
)
),
)
)

for violation_html in violation_htmls:
_get_violation(violation_html)
return tuple(violation_detail_set)

async def _request(self, plate_info: PlateInfo) -> str | None:
url = (
API_URL_PHATNGUOI
+ plate_info.plate
+ f"/{get_vehicle_enum(plate_info.type)}/"
)
try:
async with self._session.get(url=url) as response:
html = await response.text()
return html
except TimeoutError as e:
logger.error(
f"Plate {plate_info.plate}: Time out ({self.timeout}s) getting data from API {self.api.value}. {e}"
)
except ClientError as e:
logger.error(
f"Plate {plate_info.plate}: Error occurs while getting data from API {self.api.value}. {e}"
)
except Exception as e:
logger.error(
f"Plate {plate_info.plate}: Error occurs while getting data (internally) {self.api.value}. {e}"
)

@override
async def get_data(self, plate_info: PlateInfo) -> PlateDetail | None:
html: str | None = await self._request(plate_info)
if not html:
return

type: VehicleTypeEnum = get_vehicle_enum(plate_info.type)
return PlateDetail(
plate=plate_info.plate,
owner=plate_info.owner,
type=type,
violations=self.get_violations(html),
)

@override
async def __aexit__(self, exc_type, exc_value, exc_traceback) -> None:
return await HttpaioSession.__aexit__(self, exc_type, exc_value, exc_traceback)
4 changes: 4 additions & 0 deletions src/check_phat_nguoi/get_data/get_data.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@
PlateDetail,
plates_context,
)
from check_phat_nguoi.get_data.engines.phat_nguoi import PhatNguoiGetDataEngine
from check_phat_nguoi.types import ApiEnum

from .engines import BaseGetDataEngine, CheckPhatNguoiGetDataEngine, CsgtGetDataEngine
Expand All @@ -18,6 +19,7 @@ class GetData:
def __init__(self) -> None:
self._checkphatnguoi_engine: CheckPhatNguoiGetDataEngine
self._csgt_engine: CsgtGetDataEngine
self._phatnguoi_engine: PhatNguoiGetDataEngine
self._plates_details: set[PlateDetail] = set()

async def _get_data_for_plate(self, plate_info: PlateInfo) -> None:
Expand All @@ -38,6 +40,8 @@ async def _get_data_for_plate(self, plate_info: PlateInfo) -> None:
engine = self._checkphatnguoi_engine
case ApiEnum.csgt_vn:
engine = self._csgt_engine
case ApiEnum.phatnguoi_vn:
engine = self._phatnguoi_engine
logger.info(
f"Plate {plate_info.plate}: Getting data with API: {api.value}..."
)
Expand Down

0 comments on commit 10ff28b

Please sign in to comment.