55
55
"Two Pointers" : "2P" ,
56
56
}
57
57
58
+ USER_AGENT = (
59
+ "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36"
60
+ + " (KHTML, like Gecko) Chrome/87.0.4280.88 Safari/537.36"
61
+ )
62
+
58
63
59
64
def refactor_readmes ():
60
65
"""Refactors README.md files in the src folder."""
@@ -164,15 +169,11 @@ def gsearch(query):
164
169
165
170
log (f"Searching Google for { query } " )
166
171
167
- user_agent = (
168
- "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36"
169
- + " (KHTML, like Gecko) Chrome/87.0.4280.88 Safari/537.36"
170
- )
171
172
for url in search (
172
173
"site:leetcode.com " + query ,
173
174
stop = 1 ,
174
175
tld = "com" ,
175
- user_agent = user_agent ,
176
+ user_agent = USER_AGENT ,
176
177
):
177
178
link = re .match (r"https:\/\/leetcode.com\/problems\/[a-z-]+\/" , url )[0 ]
178
179
@@ -189,36 +190,63 @@ def parse_json(url):
189
190
190
191
log (f"Parsing JSON data from { url } " )
191
192
192
- for i in range (5 ):
193
- try :
194
- page = requests .get (url )
195
- soup = BeautifulSoup (page .text , "html.parser" )
196
- data = soup .find ("script" , id = "__NEXT_DATA__" )
197
- json_ = json .loads (data .contents [0 ])
198
- queries = json_ ["props" ]["pageProps" ]["dehydratedState" ]["queries" ]
199
- except KeyError :
200
- if i == 4 :
201
- raise Exception ("Something went wrong!" )
202
- continue
203
- id = queries [0 ]["state" ]["data" ]["question" ]["questionFrontendId" ]
204
- title = queries [0 ]["state" ]["data" ]["question" ]["title" ]
205
- question = queries [6 ]["state" ]["data" ]["question" ]["content" ]
206
- difficulty = queries [0 ]["state" ]["data" ]["question" ]["difficulty" ]
207
- hints = queries [5 ]["state" ]["data" ]["question" ]["hints" ]
208
- tags = [i ["name" ] for i in queries [8 ]["state" ]["data" ]["question" ]["topicTags" ]]
209
- # tags = [
210
- # (lambda x: x.replace(x, TAGS.get(x, i["name"])))(i["name"])
211
- # for i in queries[8]["state"]["data"]["question"]["topicTags"]
212
- # ]
213
-
214
- return {
215
- "id" : id ,
216
- "title" : title ,
217
- "question" : question ,
218
- "difficulty" : difficulty ,
219
- "hints" : hints ,
220
- "tags" : tags ,
193
+ graph_ql = "https://leetcode.com/graphql"
194
+ params = {
195
+ "operationName" : "questionData" ,
196
+ "variables" : {"titleSlug" : url .split ("/" )[- 2 ]},
197
+ "query" : """
198
+ query questionData($titleSlug: String!) {
199
+ question(titleSlug: $titleSlug) {
200
+ questionId
201
+ title
202
+ content
203
+ difficulty
204
+ hints
205
+ topicTags {
206
+ name
207
+ }
208
+ }
221
209
}
210
+ """ ,
211
+ }
212
+
213
+ json_data = json .dumps (params ).encode ("utf8" )
214
+ headers = {
215
+ "User-Agent" : USER_AGENT ,
216
+ "Connection" : "keep-alive" ,
217
+ "Content-Type" : "application/json" ,
218
+ "Referer" : url ,
219
+ }
220
+
221
+ res = None
222
+ while not res :
223
+ res = requests .post (
224
+ graph_ql ,
225
+ data = json_data ,
226
+ headers = headers ,
227
+ timeout = 10 ,
228
+ )
229
+ question : dict = res .json ()["data" ]["question" ]
230
+
231
+ id = question ["questionId" ]
232
+ title = question ["title" ]
233
+ content = question ["content" ]
234
+ difficulty = question ["difficulty" ]
235
+ hints = question ["hints" ]
236
+ tags = [i ["name" ] for i in question ["topicTags" ]]
237
+ # tags = [
238
+ # (lambda x: x.replace(x, TAGS.get(x, i["name"])))(i["name"])
239
+ # for i in question["topicTags"]
240
+ # ]
241
+
242
+ return {
243
+ "id" : id ,
244
+ "title" : title ,
245
+ "question" : content ,
246
+ "difficulty" : difficulty ,
247
+ "hints" : hints ,
248
+ "tags" : tags ,
249
+ }
222
250
223
251
224
252
def write_readme (path , question , url ):
0 commit comments