Skip to content

Commit 12e25af

Browse files
committed
feature/added-markdown: refatored, moved markdown to separate method
1 parent 566c467 commit 12e25af

File tree

3 files changed

+61
-35
lines changed

3 files changed

+61
-35
lines changed

README.md

+8-5
Original file line numberDiff line numberDiff line change
@@ -58,7 +58,12 @@ Main class of this library.
5858

5959
* * *
6060

61-
#### ScrapingAntClient.general_request and ScrapingAntClient.general_request_async
61+
#### Common arguments
62+
For:
63+
- ScrapingAntClient.general_request
64+
- ScrapingAntClient.general_request_async
65+
- ScrapingAntClient.markdown_request
66+
- ScrapingAntClient.markdown_request_async
6267

6368
https://docs.scrapingant.com/request-response-format#available-parameters
6469

@@ -76,7 +81,6 @@ https://docs.scrapingant.com/request-response-format#available-parameters
7681
| return_page_source | <code>boolean</code> | False |
7782
| data | same as [requests param 'data'](https://requests.readthedocs.io/en/latest/user/quickstart/#more-complicated-post-requests) | None |
7883
| json | same as [requests param 'json'](https://requests.readthedocs.io/en/latest/user/quickstart/#more-complicated-post-requests) | None |
79-
| endpoint | None or 'markdown' | None |
8084

8185
**IMPORTANT NOTE:** <code>js_snippet</code> will be encoded to Base64 automatically by the ScrapingAnt client library.
8286

@@ -275,11 +279,10 @@ from scrapingant_client import ScrapingAntClient
275279
client = ScrapingAntClient(token='<YOUR-SCRAPINGANT-API-TOKEN>')
276280

277281
# Sending POST request with json data
278-
result = client.general_request(
282+
result = client.markdown_request(
279283
url="https://example.com",
280-
endpoint='markdown',
281284
)
282-
print(result.text)
285+
print(result.markdown)
283286
```
284287

285288
## Useful links

scrapingant_client/client.py

+47-30
Original file line numberDiff line numberDiff line change
@@ -17,7 +17,7 @@
1717
)
1818
from scrapingant_client.headers import convert_headers
1919
from scrapingant_client.proxy_type import ProxyType
20-
from scrapingant_client.response import Response
20+
from scrapingant_client.response import Response, MarkdownResponse
2121
from scrapingant_client.utils import base64_encode_string
2222

2323

@@ -43,7 +43,8 @@ def _form_payload(
4343
browser: bool = True,
4444
return_page_source: Optional[bool] = None,
4545
) -> Dict:
46-
request_data = {'url': url}
46+
request_data = {
47+
'url': url}
4748
if cookies is not None:
4849
request_data['cookies'] = cookies_list_to_string(cookies)
4950
if js_snippet is not None:
@@ -60,7 +61,7 @@ def _form_payload(
6061
request_data['return_page_source'] = return_page_source
6162
return request_data
6263

63-
def _parse_response(self, response_status_code: int, response_data: Dict, url: str, endpoint: str) -> Response:
64+
def _check_status_code(self, response_status_code: int, response_data: Dict, url: str) -> None:
6465
if response_status_code == 403:
6566
raise ScrapingantInvalidTokenException()
6667
elif response_status_code == 404:
@@ -71,25 +72,25 @@ def _parse_response(self, response_status_code: int, response_data: Dict, url: s
7172
raise ScrapingantDetectedException()
7273
elif response_status_code == 500:
7374
raise ScrapingantInternalException()
74-
if endpoint is None or endpoint == 'extended':
75-
content = response_data['html']
76-
cookies_string = response_data['cookies']
77-
text = response_data['text']
78-
status_code = response_data['status_code']
79-
cookies_list = cookies_list_from_string(cookies_string)
80-
return Response(
81-
content=content,
82-
cookies=cookies_list,
83-
text=text,
84-
status_code=status_code
85-
)
86-
elif endpoint == 'markdown':
87-
return Response(
88-
content='',
89-
cookies=[],
90-
text=response_data['markdown'],
91-
status_code=0,
92-
)
75+
76+
def _parse_extended_response(self, response_data: Dict) -> Response:
77+
content = response_data['html']
78+
cookies_string = response_data['cookies']
79+
text = response_data['text']
80+
status_code = response_data['status_code']
81+
cookies_list = cookies_list_from_string(cookies_string)
82+
return Response(
83+
content=content,
84+
cookies=cookies_list,
85+
text=text,
86+
status_code=status_code,
87+
)
88+
89+
def _parse_markdown_response(self, response_data: Dict) -> MarkdownResponse:
90+
return MarkdownResponse(
91+
url=response_data['url'],
92+
markdown=response_data['markdown'],
93+
)
9394

9495
def _get_scrapingant_api_url(self, endpoint: Optional[str] = None) -> str:
9596
if endpoint is None or endpoint == 'extended':
@@ -99,7 +100,7 @@ def _get_scrapingant_api_url(self, endpoint: Optional[str] = None) -> str:
99100
else:
100101
raise ValueError(f'Invalid endpoint: {endpoint}, must be either None or "markdown"')
101102

102-
def general_request(
103+
def _request(
103104
self,
104105
url: str,
105106
method: str = 'GET',
@@ -114,7 +115,7 @@ def general_request(
114115
data=None,
115116
json=None,
116117
endpoint: Optional[str] = None,
117-
) -> Response:
118+
) -> Dict:
118119
request_data = self._form_payload(
119120
url=url,
120121
cookies=cookies,
@@ -138,10 +139,10 @@ def general_request(
138139
raise ScrapingantTimeoutException()
139140
response_status_code = response.status_code
140141
response_data = response.json()
141-
parsed_response: Response = self._parse_response(response_status_code, response_data, url, endpoint)
142-
return parsed_response
142+
self._check_status_code(response_status_code, response_data, url)
143+
return response_data
143144

144-
async def general_request_async(
145+
async def _request_async(
145146
self,
146147
url: str,
147148
method: str = 'GET',
@@ -156,7 +157,7 @@ async def general_request_async(
156157
data=None,
157158
json=None,
158159
endpoint: Optional[str] = None,
159-
) -> Response:
160+
) -> Dict:
160161
import httpx
161162

162163
request_data = self._form_payload(
@@ -189,5 +190,21 @@ async def general_request_async(
189190
raise ScrapingantTimeoutException()
190191
response_status_code = response.status_code
191192
response_data = response.json()
192-
parsed_response: Response = self._parse_response(response_status_code, response_data, url, endpoint)
193-
return parsed_response
193+
self._check_status_code(response_status_code, response_data, url)
194+
return response_data
195+
196+
def general_request(self, *args, **kwargs) -> Response:
197+
response_data = self._request(*args, **kwargs, endpoint='extended')
198+
return self._parse_extended_response(response_data)
199+
200+
async def general_request_async(self, *args, **kwargs) -> Response:
201+
response_data = await self._request_async(*args, **kwargs, endpoint='extended')
202+
return self._parse_extended_response(response_data)
203+
204+
def markdown_request(self, *args, **kwargs) -> MarkdownResponse:
205+
response_data = self._request(*args, **kwargs, endpoint='markdown')
206+
return self._parse_markdown_response(response_data)
207+
208+
async def markdown_request_async(self, *args, **kwargs) -> MarkdownResponse:
209+
response_data = await self._request_async(*args, **kwargs, endpoint='markdown')
210+
return self._parse_markdown_response(response_data)

scrapingant_client/response.py

+6
Original file line numberDiff line numberDiff line change
@@ -9,3 +9,9 @@ def __init__(self, content: str, cookies: List[Cookie], text: str, status_code:
99
self.cookies = cookies
1010
self.text = text
1111
self.status_code = status_code
12+
13+
14+
class MarkdownResponse:
15+
def __init__(self, url: str, markdown: str):
16+
self.url = url
17+
self.markdown = markdown

0 commit comments

Comments
 (0)