Skip to content

Commit f28a7e7

Browse files
committed
feature/added-markdown: done
1 parent 0e755cb commit f28a7e7

File tree

3 files changed

+51
-17
lines changed

3 files changed

+51
-17
lines changed

README.md

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -76,6 +76,7 @@ https://docs.scrapingant.com/request-response-format#available-parameters
7676
| return_page_source | <code>boolean</code> | False |
7777
| data | same as [requests param 'data'](https://requests.readthedocs.io/en/latest/user/quickstart/#more-complicated-post-requests) | None |
7878
| json | same as [requests param 'json'](https://requests.readthedocs.io/en/latest/user/quickstart/#more-complicated-post-requests) | None |
79+
| endpoint | None or 'markdown' | None |
7980

8081
**IMPORTANT NOTE:** <code>js_snippet</code> will be encoded to Base64 automatically by the ScrapingAnt client library.
8182

@@ -266,6 +267,21 @@ result = client.general_request(
266267
print(result.content)
267268
```
268269

270+
### Receiving markdown
271+
272+
```python3
273+
from scrapingant_client import ScrapingAntClient
274+
275+
client = ScrapingAntClient(token='<YOUR-SCRAPINGANT-API-TOKEN>')
276+
277+
# Sending POST request with json data
278+
result = client.general_request(
279+
url="https://example.com",
280+
endpoint='markdown',
281+
)
282+
print(result.text)
283+
```
284+
269285
## Useful links
270286

271287
- [Scrapingant API doumentation](https://docs.scrapingant.com)

scrapingant_client/__init__.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
__version__ = "2.0.1"
1+
__version__ = "2.1.0"
22

33
from scrapingant_client.client import ScrapingAntClient
44
from scrapingant_client.cookie import Cookie

scrapingant_client/client.py

Lines changed: 34 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -60,7 +60,7 @@ def _form_payload(
6060
request_data['return_page_source'] = return_page_source
6161
return request_data
6262

63-
def _parse_response(self, response_status_code: int, response_data: Dict, url: str) -> Response:
63+
def _parse_response(self, response_status_code: int, response_data: Dict, url: str, endpoint: str) -> Response:
6464
if response_status_code == 403:
6565
raise ScrapingantInvalidTokenException()
6666
elif response_status_code == 404:
@@ -71,17 +71,33 @@ def _parse_response(self, response_status_code: int, response_data: Dict, url: s
7171
raise ScrapingantDetectedException()
7272
elif response_status_code == 500:
7373
raise ScrapingantInternalException()
74-
content = response_data['html']
75-
cookies_string = response_data['cookies']
76-
text = response_data['text']
77-
status_code = response_data['status_code']
78-
cookies_list = cookies_list_from_string(cookies_string)
79-
return Response(
80-
content=content,
81-
cookies=cookies_list,
82-
text=text,
83-
status_code=status_code
84-
)
74+
if endpoint is None or endpoint == 'extended':
75+
content = response_data['html']
76+
cookies_string = response_data['cookies']
77+
text = response_data['text']
78+
status_code = response_data['status_code']
79+
cookies_list = cookies_list_from_string(cookies_string)
80+
return Response(
81+
content=content,
82+
cookies=cookies_list,
83+
text=text,
84+
status_code=status_code
85+
)
86+
elif endpoint == 'markdown':
87+
return Response(
88+
content='',
89+
cookies=[],
90+
text=response_data['markdown'],
91+
status_code=0,
92+
)
93+
94+
def _get_scrapingant_api_url(self, endpoint: Optional[str] = None) -> str:
95+
if endpoint is None or endpoint == 'extended':
96+
return SCRAPINGANT_API_BASE_URL + '/extended'
97+
elif endpoint == 'markdown':
98+
return SCRAPINGANT_API_BASE_URL + '/markdown'
99+
else:
100+
raise ValueError(f'Invalid endpoint: {endpoint}, must be either None or "markdown"')
85101

86102
def general_request(
87103
self,
@@ -97,6 +113,7 @@ def general_request(
97113
return_page_source: Optional[bool] = None,
98114
data=None,
99115
json=None,
116+
endpoint: Optional[str] = None,
100117
) -> Response:
101118
request_data = self._form_payload(
102119
url=url,
@@ -111,7 +128,7 @@ def general_request(
111128
try:
112129
response = self.requests_session.request(
113130
method=method,
114-
url=SCRAPINGANT_API_BASE_URL + '/extended',
131+
url=self._get_scrapingant_api_url(endpoint),
115132
params=request_data,
116133
headers=convert_headers(headers),
117134
data=data,
@@ -121,7 +138,7 @@ def general_request(
121138
raise ScrapingantTimeoutException()
122139
response_status_code = response.status_code
123140
response_data = response.json()
124-
parsed_response: Response = self._parse_response(response_status_code, response_data, url)
141+
parsed_response: Response = self._parse_response(response_status_code, response_data, url, endpoint)
125142
return parsed_response
126143

127144
async def general_request_async(
@@ -138,6 +155,7 @@ async def general_request_async(
138155
return_page_source: Optional[bool] = None,
139156
data=None,
140157
json=None,
158+
endpoint: Optional[str] = None,
141159
) -> Response:
142160
import httpx
143161

@@ -161,7 +179,7 @@ async def general_request_async(
161179
try:
162180
response = await client.request(
163181
method=method,
164-
url=SCRAPINGANT_API_BASE_URL + '/extended',
182+
url=self._get_scrapingant_api_url(endpoint),
165183
params=request_data,
166184
headers=convert_headers(headers),
167185
data=data,
@@ -171,5 +189,5 @@ async def general_request_async(
171189
raise ScrapingantTimeoutException()
172190
response_status_code = response.status_code
173191
response_data = response.json()
174-
parsed_response: Response = self._parse_response(response_status_code, response_data, url)
192+
parsed_response: Response = self._parse_response(response_status_code, response_data, url, endpoint)
175193
return parsed_response

0 commit comments

Comments
 (0)