Skip to content

Commit dfd69e7

Browse files
committed
feature/added-markdown: done
1 parent 263d9d4 commit dfd69e7

File tree

3 files changed

+51
-17
lines changed

3 files changed

+51
-17
lines changed

README.md

+16
Original file line numberDiff line numberDiff line change
@@ -75,6 +75,7 @@ https://docs.scrapingant.com/request-response-format#available-parameters
7575
| browser | <code>boolean</code> | True |
7676
| data | same as [requests param 'data'](https://requests.readthedocs.io/en/latest/user/quickstart/#more-complicated-post-requests) | None |
7777
| json | same as [requests param 'json'](https://requests.readthedocs.io/en/latest/user/quickstart/#more-complicated-post-requests) | None |
78+
| endpoint | None or 'markdown' | None |
7879

7980
**IMPORTANT NOTE:** <code>js_snippet</code> will be encoded to Base64 automatically by the ScrapingAnt client library.
8081

@@ -265,6 +266,21 @@ result = client.general_request(
265266
print(result.content)
266267
```
267268

269+
### Receiving markdown
270+
271+
```python3
272+
from scrapingant_client import ScrapingAntClient
273+
274+
client = ScrapingAntClient(token='<YOUR-SCRAPINGANT-API-TOKEN>')
275+
276+
# Sending POST request with json data
277+
result = client.general_request(
278+
url="https://example.com",
279+
endpoint='markdown',
280+
)
281+
print(result.text)
282+
```
283+
268284
## Useful links
269285

270286
- [Scrapingant API doumentation](https://docs.scrapingant.com)

scrapingant_client/__init__.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
__version__ = "2.0.0"
1+
__version__ = "2.1.0"
22

33
from scrapingant_client.client import ScrapingAntClient
44
from scrapingant_client.cookie import Cookie

scrapingant_client/client.py

+34-16
Original file line numberDiff line numberDiff line change
@@ -56,7 +56,7 @@ def _form_payload(
5656
request_data['browser'] = browser
5757
return request_data
5858

59-
def _parse_response(self, response_status_code: int, response_data: Dict, url: str) -> Response:
59+
def _parse_response(self, response_status_code: int, response_data: Dict, url: str, endpoint: str) -> Response:
6060
if response_status_code == 403:
6161
raise ScrapingantInvalidTokenException()
6262
elif response_status_code == 404:
@@ -67,17 +67,33 @@ def _parse_response(self, response_status_code: int, response_data: Dict, url: s
6767
raise ScrapingantDetectedException()
6868
elif response_status_code == 500:
6969
raise ScrapingantInternalException()
70-
content = response_data['html']
71-
cookies_string = response_data['cookies']
72-
text = response_data['text']
73-
status_code = response_data['status_code']
74-
cookies_list = cookies_list_from_string(cookies_string)
75-
return Response(
76-
content=content,
77-
cookies=cookies_list,
78-
text=text,
79-
status_code=status_code
80-
)
70+
if endpoint is None or endpoint == 'extended':
71+
content = response_data['html']
72+
cookies_string = response_data['cookies']
73+
text = response_data['text']
74+
status_code = response_data['status_code']
75+
cookies_list = cookies_list_from_string(cookies_string)
76+
return Response(
77+
content=content,
78+
cookies=cookies_list,
79+
text=text,
80+
status_code=status_code
81+
)
82+
elif endpoint == 'markdown':
83+
return Response(
84+
content='',
85+
cookies=[],
86+
text=response_data['markdown'],
87+
status_code=0,
88+
)
89+
90+
def _get_scrapingant_api_url(self, endpoint: Optional[str] = None) -> str:
91+
if endpoint is None or endpoint == 'extended':
92+
return SCRAPINGANT_API_BASE_URL + '/extended'
93+
elif endpoint == 'markdown':
94+
return SCRAPINGANT_API_BASE_URL + '/markdown'
95+
else:
96+
raise ValueError(f'Invalid endpoint: {endpoint}, must be either None or "markdown"')
8197

8298
def general_request(
8399
self,
@@ -92,6 +108,7 @@ def general_request(
92108
browser: bool = True,
93109
data=None,
94110
json=None,
111+
endpoint: Optional[str] = None,
95112
) -> Response:
96113
request_data = self._form_payload(
97114
url=url,
@@ -105,7 +122,7 @@ def general_request(
105122
try:
106123
response = self.requests_session.request(
107124
method=method,
108-
url=SCRAPINGANT_API_BASE_URL + '/extended',
125+
url=self._get_scrapingant_api_url(endpoint),
109126
params=request_data,
110127
headers=convert_headers(headers),
111128
data=data,
@@ -115,7 +132,7 @@ def general_request(
115132
raise ScrapingantTimeoutException()
116133
response_status_code = response.status_code
117134
response_data = response.json()
118-
parsed_response: Response = self._parse_response(response_status_code, response_data, url)
135+
parsed_response: Response = self._parse_response(response_status_code, response_data, url, endpoint)
119136
return parsed_response
120137

121138
async def general_request_async(
@@ -131,6 +148,7 @@ async def general_request_async(
131148
browser: bool = True,
132149
data=None,
133150
json=None,
151+
endpoint: Optional[str] = None,
134152
) -> Response:
135153
import httpx
136154

@@ -153,7 +171,7 @@ async def general_request_async(
153171
try:
154172
response = await client.request(
155173
method=method,
156-
url=SCRAPINGANT_API_BASE_URL + '/extended',
174+
url=self._get_scrapingant_api_url(endpoint),
157175
params=request_data,
158176
headers=convert_headers(headers),
159177
data=data,
@@ -163,5 +181,5 @@ async def general_request_async(
163181
raise ScrapingantTimeoutException()
164182
response_status_code = response.status_code
165183
response_data = response.json()
166-
parsed_response: Response = self._parse_response(response_status_code, response_data, url)
184+
parsed_response: Response = self._parse_response(response_status_code, response_data, url, endpoint)
167185
return parsed_response

0 commit comments

Comments
 (0)