@@ -56,7 +56,7 @@ def _form_payload(
56
56
request_data['browser'] = browser
57
57
return request_data
58
58
59
- def _parse_response(self, response_status_code: int, response_data: Dict, url: str) -> Response:
59
+ def _parse_response(self, response_status_code: int, response_data: Dict, url: str, endpoint: str ) -> Response:
60
60
if response_status_code == 403:
61
61
raise ScrapingantInvalidTokenException()
62
62
elif response_status_code == 404:
@@ -67,17 +67,33 @@ def _parse_response(self, response_status_code: int, response_data: Dict, url: s
67
67
raise ScrapingantDetectedException()
68
68
elif response_status_code == 500:
69
69
raise ScrapingantInternalException()
70
- content = response_data['html']
71
- cookies_string = response_data['cookies']
72
- text = response_data['text']
73
- status_code = response_data['status_code']
74
- cookies_list = cookies_list_from_string(cookies_string)
75
- return Response(
76
- content=content,
77
- cookies=cookies_list,
78
- text=text,
79
- status_code=status_code
80
- )
70
+ if endpoint is None or endpoint == 'extended':
71
+ content = response_data['html']
72
+ cookies_string = response_data['cookies']
73
+ text = response_data['text']
74
+ status_code = response_data['status_code']
75
+ cookies_list = cookies_list_from_string(cookies_string)
76
+ return Response(
77
+ content=content,
78
+ cookies=cookies_list,
79
+ text=text,
80
+ status_code=status_code
81
+ )
82
+ elif endpoint == 'markdown':
83
+ return Response(
84
+ content='',
85
+ cookies=[],
86
+ text=response_data['markdown'],
87
+ status_code=0,
88
+ )
89
+
90
+ def _get_scrapingant_api_url(self, endpoint: Optional[str] = None) -> str:
91
+ if endpoint is None or endpoint == 'extended':
92
+ return SCRAPINGANT_API_BASE_URL + '/extended'
93
+ elif endpoint == 'markdown':
94
+ return SCRAPINGANT_API_BASE_URL + '/markdown'
95
+ else:
96
+ raise ValueError(f'Invalid endpoint: {endpoint}, must be either None or "markdown"')
81
97
82
98
def general_request(
83
99
self,
@@ -92,6 +108,7 @@ def general_request(
92
108
browser: bool = True,
93
109
data=None,
94
110
json=None,
111
+ endpoint: Optional[str] = None,
95
112
) -> Response:
96
113
request_data = self._form_payload(
97
114
url=url,
@@ -105,7 +122,7 @@ def general_request(
105
122
try:
106
123
response = self.requests_session.request(
107
124
method=method,
108
- url=SCRAPINGANT_API_BASE_URL + '/extended' ,
125
+ url=self._get_scrapingant_api_url(endpoint) ,
109
126
params=request_data,
110
127
headers=convert_headers(headers),
111
128
data=data,
@@ -115,7 +132,7 @@ def general_request(
115
132
raise ScrapingantTimeoutException()
116
133
response_status_code = response.status_code
117
134
response_data = response.json()
118
- parsed_response: Response = self._parse_response(response_status_code, response_data, url)
135
+ parsed_response: Response = self._parse_response(response_status_code, response_data, url, endpoint )
119
136
return parsed_response
120
137
121
138
async def general_request_async(
@@ -131,6 +148,7 @@ async def general_request_async(
131
148
browser: bool = True,
132
149
data=None,
133
150
json=None,
151
+ endpoint: Optional[str] = None,
134
152
) -> Response:
135
153
import httpx
136
154
@@ -153,7 +171,7 @@ async def general_request_async(
153
171
try:
154
172
response = await client.request(
155
173
method=method,
156
- url=SCRAPINGANT_API_BASE_URL + '/extended' ,
174
+ url=self._get_scrapingant_api_url(endpoint) ,
157
175
params=request_data,
158
176
headers=convert_headers(headers),
159
177
data=data,
@@ -163,5 +181,5 @@ async def general_request_async(
163
181
raise ScrapingantTimeoutException()
164
182
response_status_code = response.status_code
165
183
response_data = response.json()
166
- parsed_response: Response = self._parse_response(response_status_code, response_data, url)
184
+ parsed_response: Response = self._parse_response(response_status_code, response_data, url, endpoint )
167
185
return parsed_response
0 commit comments