@@ -60,7 +60,7 @@ def _form_payload(
6060 request_data ['return_page_source' ] = return_page_source
6161 return request_data
6262
63- def _parse_response (self , response_status_code : int , response_data : Dict , url : str ) -> Response :
63+ def _parse_response (self , response_status_code : int , response_data : Dict , url : str , endpoint : str ) -> Response :
6464 if response_status_code == 403 :
6565 raise ScrapingantInvalidTokenException ()
6666 elif response_status_code == 404 :
@@ -71,17 +71,33 @@ def _parse_response(self, response_status_code: int, response_data: Dict, url: s
7171 raise ScrapingantDetectedException ()
7272 elif response_status_code == 500 :
7373 raise ScrapingantInternalException ()
74- content = response_data ['html' ]
75- cookies_string = response_data ['cookies' ]
76- text = response_data ['text' ]
77- status_code = response_data ['status_code' ]
78- cookies_list = cookies_list_from_string (cookies_string )
79- return Response (
80- content = content ,
81- cookies = cookies_list ,
82- text = text ,
83- status_code = status_code
84- )
74+ if endpoint is None or endpoint == 'extended' :
75+ content = response_data ['html' ]
76+ cookies_string = response_data ['cookies' ]
77+ text = response_data ['text' ]
78+ status_code = response_data ['status_code' ]
79+ cookies_list = cookies_list_from_string (cookies_string )
80+ return Response (
81+ content = content ,
82+ cookies = cookies_list ,
83+ text = text ,
84+ status_code = status_code
85+ )
86+ elif endpoint == 'markdown' :
87+ return Response (
88+ content = '' ,
89+ cookies = [],
90+ text = response_data ['markdown' ],
91+ status_code = 0 ,
92+ )
93+
94+ def _get_scrapingant_api_url (self , endpoint : Optional [str ] = None ) -> str :
95+ if endpoint is None or endpoint == 'extended' :
96+ return SCRAPINGANT_API_BASE_URL + '/extended'
97+ elif endpoint == 'markdown' :
98+ return SCRAPINGANT_API_BASE_URL + '/markdown'
99+ else :
100+ raise ValueError (f'Invalid endpoint: { endpoint } , must be either None or "markdown"' )
85101
86102 def general_request (
87103 self ,
@@ -97,6 +113,7 @@ def general_request(
97113 return_page_source : Optional [bool ] = None ,
98114 data = None ,
99115 json = None ,
116+ endpoint : Optional [str ] = None ,
100117 ) -> Response :
101118 request_data = self ._form_payload (
102119 url = url ,
@@ -111,7 +128,7 @@ def general_request(
111128 try :
112129 response = self .requests_session .request (
113130 method = method ,
114- url = SCRAPINGANT_API_BASE_URL + '/extended' ,
131+ url = self . _get_scrapingant_api_url ( endpoint ) ,
115132 params = request_data ,
116133 headers = convert_headers (headers ),
117134 data = data ,
@@ -121,7 +138,7 @@ def general_request(
121138 raise ScrapingantTimeoutException ()
122139 response_status_code = response .status_code
123140 response_data = response .json ()
124- parsed_response : Response = self ._parse_response (response_status_code , response_data , url )
141+ parsed_response : Response = self ._parse_response (response_status_code , response_data , url , endpoint )
125142 return parsed_response
126143
127144 async def general_request_async (
@@ -138,6 +155,7 @@ async def general_request_async(
138155 return_page_source : Optional [bool ] = None ,
139156 data = None ,
140157 json = None ,
158+ endpoint : Optional [str ] = None ,
141159 ) -> Response :
142160 import httpx
143161
@@ -161,7 +179,7 @@ async def general_request_async(
161179 try :
162180 response = await client .request (
163181 method = method ,
164- url = SCRAPINGANT_API_BASE_URL + '/extended' ,
182+ url = self . _get_scrapingant_api_url ( endpoint ) ,
165183 params = request_data ,
166184 headers = convert_headers (headers ),
167185 data = data ,
@@ -171,5 +189,5 @@ async def general_request_async(
171189 raise ScrapingantTimeoutException ()
172190 response_status_code = response .status_code
173191 response_data = response .json ()
174- parsed_response : Response = self ._parse_response (response_status_code , response_data , url )
192+ parsed_response : Response = self ._parse_response (response_status_code , response_data , url , endpoint )
175193 return parsed_response
0 commit comments