17
17
)
18
18
from scrapingant_client .headers import convert_headers
19
19
from scrapingant_client .proxy_type import ProxyType
20
- from scrapingant_client .response import Response
20
+ from scrapingant_client .response import Response , MarkdownResponse
21
21
from scrapingant_client .utils import base64_encode_string
22
22
23
23
@@ -43,7 +43,8 @@ def _form_payload(
43
43
browser : bool = True ,
44
44
return_page_source : Optional [bool ] = None ,
45
45
) -> Dict :
46
- request_data = {'url' : url }
46
+ request_data = {
47
+ 'url' : url }
47
48
if cookies is not None :
48
49
request_data ['cookies' ] = cookies_list_to_string (cookies )
49
50
if js_snippet is not None :
@@ -60,7 +61,7 @@ def _form_payload(
60
61
request_data ['return_page_source' ] = return_page_source
61
62
return request_data
62
63
63
- def _parse_response (self , response_status_code : int , response_data : Dict , url : str , endpoint : str ) -> Response :
64
+ def _check_status_code (self , response_status_code : int , response_data : Dict , url : str ) -> None :
64
65
if response_status_code == 403 :
65
66
raise ScrapingantInvalidTokenException ()
66
67
elif response_status_code == 404 :
@@ -71,25 +72,25 @@ def _parse_response(self, response_status_code: int, response_data: Dict, url: s
71
72
raise ScrapingantDetectedException ()
72
73
elif response_status_code == 500 :
73
74
raise ScrapingantInternalException ()
74
- if endpoint is None or endpoint == 'extended' :
75
- content = response_data [ 'html' ]
76
- cookies_string = response_data ['cookies ' ]
77
- text = response_data ['text ' ]
78
- status_code = response_data ['status_code ' ]
79
- cookies_list = cookies_list_from_string ( cookies_string )
80
- return Response (
81
- content = content ,
82
- cookies = cookies_list ,
83
- text = text ,
84
- status_code = status_code
85
- )
86
- elif endpoint == 'markdown' :
87
- return Response (
88
- content = '' ,
89
- cookies = [],
90
- text = response_data ['markdown ' ],
91
- status_code = 0 ,
92
- )
75
+
76
+ def _parse_extended_response ( self , response_data : Dict ) -> Response :
77
+ content = response_data ['html ' ]
78
+ cookies_string = response_data ['cookies ' ]
79
+ text = response_data ['text ' ]
80
+ status_code = response_data [ 'status_code' ]
81
+ cookies_list = cookies_list_from_string ( cookies_string )
82
+ return Response (
83
+ content = content ,
84
+ cookies = cookies_list ,
85
+ text = text ,
86
+ status_code = status_code ,
87
+ )
88
+
89
+ def _parse_markdown_response ( self , response_data : Dict ) -> MarkdownResponse :
90
+ return MarkdownResponse (
91
+ url = response_data ['url ' ],
92
+ markdown = response_data [ 'markdown' ] ,
93
+ )
93
94
94
95
def _get_scrapingant_api_url (self , endpoint : Optional [str ] = None ) -> str :
95
96
if endpoint is None or endpoint == 'extended' :
@@ -99,7 +100,7 @@ def _get_scrapingant_api_url(self, endpoint: Optional[str] = None) -> str:
99
100
else :
100
101
raise ValueError (f'Invalid endpoint: { endpoint } , must be either None or "markdown"' )
101
102
102
- def general_request (
103
+ def _request (
103
104
self ,
104
105
url : str ,
105
106
method : str = 'GET' ,
@@ -114,7 +115,7 @@ def general_request(
114
115
data = None ,
115
116
json = None ,
116
117
endpoint : Optional [str ] = None ,
117
- ) -> Response :
118
+ ) -> Dict :
118
119
request_data = self ._form_payload (
119
120
url = url ,
120
121
cookies = cookies ,
@@ -138,10 +139,10 @@ def general_request(
138
139
raise ScrapingantTimeoutException ()
139
140
response_status_code = response .status_code
140
141
response_data = response .json ()
141
- parsed_response : Response = self ._parse_response (response_status_code , response_data , url , endpoint )
142
- return parsed_response
142
+ self ._check_status_code (response_status_code , response_data , url )
143
+ return response_data
143
144
144
- async def general_request_async (
145
+ async def _request_async (
145
146
self ,
146
147
url : str ,
147
148
method : str = 'GET' ,
@@ -156,7 +157,7 @@ async def general_request_async(
156
157
data = None ,
157
158
json = None ,
158
159
endpoint : Optional [str ] = None ,
159
- ) -> Response :
160
+ ) -> Dict :
160
161
import httpx
161
162
162
163
request_data = self ._form_payload (
@@ -189,5 +190,21 @@ async def general_request_async(
189
190
raise ScrapingantTimeoutException ()
190
191
response_status_code = response .status_code
191
192
response_data = response .json ()
192
- parsed_response : Response = self ._parse_response (response_status_code , response_data , url , endpoint )
193
- return parsed_response
193
+ self ._check_status_code (response_status_code , response_data , url )
194
+ return response_data
195
+
196
+ def general_request (self , * args , ** kwargs ) -> Response :
197
+ response_data = self ._request (* args , ** kwargs , endpoint = 'extended' )
198
+ return self ._parse_extended_response (response_data )
199
+
200
+ async def general_request_async (self , * args , ** kwargs ) -> Response :
201
+ response_data = await self ._request_async (* args , ** kwargs , endpoint = 'extended' )
202
+ return self ._parse_extended_response (response_data )
203
+
204
+ def markdown_request (self , * args , ** kwargs ) -> MarkdownResponse :
205
+ response_data = self ._request (* args , ** kwargs , endpoint = 'markdown' )
206
+ return self ._parse_markdown_response (response_data )
207
+
208
+ async def markdown_request_async (self , * args , ** kwargs ) -> MarkdownResponse :
209
+ response_data = await self ._request_async (* args , ** kwargs , endpoint = 'markdown' )
210
+ return self ._parse_markdown_response (response_data )
0 commit comments