Skip to content
This repository was archived by the owner on Mar 5, 2021. It is now read-only.

Commit 2faa5b3

Browse files
added geetest captcha
1 parent 3643dc4 commit 2faa5b3

File tree

5 files changed

+182
-4
lines changed

5 files changed

+182
-4
lines changed

.gitignore

+2
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,2 @@
1+
*.pyc
2+
.idea

README.md

+46-2
Original file line numberDiff line numberDiff line change
@@ -54,7 +54,10 @@ ita.solve_captcha('captcha.jpg', case_sensitive=False)
5454
``` python
5555
ita.solve_captcha('http://abc.com/your_captcha.jpg')
5656
```
57-
**Submit recaptcha details**
57+
58+
## reCAPTCHA
59+
60+
### Submit recaptcha details
5861

5962
For recaptcha submission there are two things that are required.
6063
- page_url
@@ -80,7 +83,7 @@ captcha_id = ita.submit_recaptcha(recaptcha_params)
8083
This method returns a captchaID. This ID will be used next, to retrieve the g-response, once workers have
8184
completed the captcha. This takes somewhere between 10-80 seconds.
8285

83-
**Retrieve captcha response**
86+
### Retrieve captcha response
8487

8588
Once you have the captchaID, you check for it's progress, and later on retrieve the gresponse.
8689

@@ -97,6 +100,47 @@ recaptcha_response = ita.retrieve_recaptcha(captcha_id) # captcha_id i
97100
print 'Recaptcha response: {}'.format(recaptcha_response) # print google response
98101
```
99102

103+
## GeeTest
104+
105+
GeeTest is a captcha that requires 3 parameters to be solved:
106+
- domain
107+
- challenge
108+
- gt
109+
110+
The response of this captcha after completion are 3 codes:
111+
- challenge
112+
- validate
113+
- seccode
114+
115+
### Submit GeeTest
116+
```python
117+
geetest_params = {
118+
'domain' :'domain_here',
119+
'challenge': 'challenge_here',
120+
'gt': 'gt_here',
121+
'proxy': '126.45.34.53:345', # or 126.45.34.53:123:joe:password, optional
122+
'user_agent': 'Mozilla/5.0 (X11; Linux x86_64; rv:60.0) Gecko/20100101 Firefox/60.0' # optional
123+
}
124+
captcha_id = ita.submit_geetest(geetest_params)
125+
```
126+
127+
Just like reCAPTCHA, you'll receive a captchaID.
128+
Using the ID, you'll be able to retrieve 3 codes after completion.
129+
130+
Optionally, you can send proxy and user_agent along.
131+
132+
### Retrieve GeeTest codes
133+
```python
134+
print ('Geetest captcha ID: {}'.format(captcha_id))
135+
print ('Waiting for geetest to be solved...')
136+
while ita.in_progress():
137+
sleep(10)
138+
geetest_response = ita.retrieve_geetest(captcha_id)
139+
print (geetest_response)
140+
```
141+
142+
Response will look like this: `{'challenge': '...', 'validate': '...', 'seccode': '...'}`
143+
100144
## Other methods/variables
101145

102146
**Affiliate id**

changelog.md

+3
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,3 @@
1+
14.03.2019
2+
------
3+
- added geetest captcha

imagetyperzapi2/imagetyperzapi.py

+114-2
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,7 @@
88

99
import os, json
1010
from base64 import b64encode
11+
from urllib import urlencode
1112

1213
# endpoints
1314
# -------------------------------------------------------------------------------------------
@@ -17,6 +18,8 @@
1718
BALANCE_ENDPOINT = 'http://captchatypers.com/Forms/RequestBalance.ashx'
1819
BAD_IMAGE_ENDPOINT = 'http://captchatypers.com/Forms/SetBadImage.ashx'
1920
PROXY_CHECK_ENDPOINT = 'http://captchatypers.com/captchaAPI/GetReCaptchaTextJSON.ashx'
21+
GEETEST_SUBMIT_ENDPOINT = 'http://captchatypers.com/captchaapi/UploadGeeTest.ashx'
22+
GEETEST_RETRIEVE_ENDPOINT = 'http://captchatypers.com/captchaapi/getrecaptchatext.ashx'
2023

2124
CAPTCHA_ENDPOINT_CONTENT_TOKEN = 'http://captchatypers.com/Forms/UploadFileAndGetTextNEWToken.ashx'
2225
CAPTCHA_ENDPOINT_URL_TOKEN = 'http://captchatypers.com/Forms/FileUploadAndGetTextCaptchaURLToken.ashx'
@@ -25,6 +28,8 @@
2528
BALANCE_ENDPOINT_TOKEN = 'http://captchatypers.com/Forms/RequestBalanceToken.ashx'
2629
BAD_IMAGE_ENDPOINT_TOKEN = 'http://captchatypers.com/Forms/SetBadImageToken.ashx'
2730
PROXY_CHECK_ENDPOINT_TOKEN = 'http://captchatypers.com/captchaAPI/GetReCaptchaTextTokenJSON.ashx'
31+
GEETEST_SUBMIT_ENDPOINT_TOKEN = 'http://captchatypers.com/captchaapi/UploadGeeTestToken.ashx'
32+
2833
# user agent used in requests
2934
# ---------------------------
3035
USER_AGENT = 'pythonAPI1.0'
@@ -76,6 +81,32 @@ def captcha_id(self):
7681
def response(self):
7782
return self._response
7883

84+
# Geetest class
85+
# ---------------------------------
86+
class Geetest:
87+
def __init__(self, captcha_id):
88+
self._captcha_id = captcha_id
89+
self._response = ''
90+
91+
# set response
92+
def set_response(self, response):
93+
self._response = response
94+
95+
@property
96+
def captcha_id(self):
97+
return self._captcha_id
98+
99+
@property
100+
def response(self):
101+
s = self._response.split(';;;')
102+
if len(s) == 3:
103+
return {
104+
'challenge': s[0],
105+
'validate': s[1],
106+
'seccode': s[2]
107+
}
108+
else: return self._response
109+
79110

80111
# API class
81112
# -----------------------------------------
@@ -221,6 +252,81 @@ def submit_recaptcha(self, d):
221252

222253
return self._recaptcha.captcha_id # return the ID
223254

255+
# submit geetest captcha
256+
def submit_geetest(self, d):
257+
# check if page_url and sitekey are != None
258+
if not d.has_key('domain'): raise Exception('domain is missing')
259+
if not d.has_key('challenge'): raise Exception('challenge is missing')
260+
if not d.has_key('gt'): raise Exception('gt is missing')
261+
d['action'] = 'UPLOADCAPTCHA'
262+
# credentials and url
263+
if self._username:
264+
d['username'] = self._username
265+
d['password'] = self._password
266+
url = GEETEST_SUBMIT_ENDPOINT
267+
else:
268+
d['token'] = self._access_token
269+
url = GEETEST_SUBMIT_ENDPOINT_TOKEN
270+
271+
# affiliate ID
272+
if self._affiliate_id: d['affiliateid'] = self._affiliate_id
273+
274+
url = '{}?{}'.format(url, urlencode(d))
275+
# make request with all data
276+
response = self._session.post(url, data=d,
277+
headers=self._headers, timeout=self._timeout)
278+
response_text = response.text.encode('utf-8') # get text from response
279+
280+
# check if we got an error
281+
# -------------------------------------------------------------
282+
if 'ERROR:' in response_text and response_text.split('|') != 2:
283+
response_err = response_text.split('ERROR:')[1].strip()
284+
self._error = response_err
285+
raise Exception(response_err) # raise Ex
286+
287+
self._geetest = Geetest(response_text) # init recaptcha obj with captcha_id (which is in the resp)
288+
return self._geetest.captcha_id # return the ID
289+
290+
# retrieve geetest captcha
291+
def retrieve_geetest(self, captcha_id = None):
292+
# if captcha id is not specified, use the ID of the last captcha submited
293+
if not captcha_id:
294+
if not self._geetest: raise Exception('no geetest was submited previously, submit a captcha'
295+
' first or give captcha_id as argument') # raise it
296+
captcha_id = self._geetest.captcha_id
297+
# create params dict (multipart)
298+
data = {
299+
'action': 'GETTEXT',
300+
'captchaid': captcha_id
301+
}
302+
# set URL
303+
if self._username:
304+
data['username'] = self._username
305+
data['password'] = self._password
306+
url = GEETEST_RETRIEVE_ENDPOINT
307+
else:
308+
data['token'] = self._access_token
309+
url = GEETEST_RETRIEVE_ENDPOINT
310+
311+
url = '{}?{}'.format(url, urlencode(data))
312+
# make request with all data
313+
response = self._session.get(url,
314+
headers=self._headers, timeout=self._timeout)
315+
response_text = response.text.encode('utf-8') # get text from response
316+
317+
# check if we got an error
318+
# -------------------------------------------------------------
319+
if 'ERROR:' in response_text and response_text.split('|') != 2:
320+
response_err = response_text.split('ERROR:')[1].strip()
321+
# if error is different than NOT_DECODED, save it to obj
322+
if response_err != 'NOT_DECODED': self._error = response_err
323+
324+
raise Exception(response_err) # raise Ex
325+
326+
self._geetest.set_response(response_text) # set response to recaptcha obj
327+
328+
return self._geetest.response # return response
329+
224330
# retrieve recaptcha
225331
def retrieve_recaptcha(self, captcha_id = None):
226332
# if captcha id is not specified, use the ID of the last captcha submited
@@ -264,8 +370,14 @@ def retrieve_recaptcha(self, captcha_id = None):
264370
# check if captcha is still being decoded
265371
def in_progress(self, captcha_id = None):
266372
try:
267-
self.retrieve_recaptcha(captcha_id) # retrieve captcha
268-
return False # captcha got decoded
373+
if self._geetest:
374+
# geetest
375+
self.retrieve_geetest(captcha_id)
376+
return False
377+
else:
378+
# recaptcha
379+
self.retrieve_recaptcha(captcha_id) # retrieve captcha
380+
return False # captcha got decoded
269381
except Exception, ex:
270382
if 'NOT_DECODED' in str(ex): # if NOT_DECODED in response, it's 'OK'
271383
return True

main.py

+17
Original file line numberDiff line numberDiff line change
@@ -47,6 +47,23 @@ def test_api():
4747
recaptcha_response = ita.retrieve_recaptcha(captcha_id) # captcha_id is optional, if not given, will use last captcha id submited
4848
print 'Recaptcha response: {}'.format(recaptcha_response) # print google response
4949

50+
# GeeTest captcha
51+
# ----------------
52+
# geetest_params = {
53+
# 'domain' :'domain_here',
54+
# 'challenge': 'challenge_here',
55+
# 'gt': 'gt_here',
56+
# 'proxy': '126.45.34.53:345', # or 126.45.34.53:123:joe:password
57+
# 'user_agent': 'Mozilla/5.0 (X11; Linux x86_64; rv:60.0) Gecko/20100101 Firefox/60.0' # optional
58+
# }
59+
# captcha_id = ita.submit_geetest(geetest_params)
60+
# print ('Geetest captcha ID: {}'.format(captcha_id))
61+
# print ('Waiting for geetest to be solved...')
62+
# while ita.in_progress():
63+
# sleep(10)
64+
# geetest_response = ita.retrieve_geetest(captcha_id)
65+
# print (geetest_response) # {'challenge': '...', 'validate': '...', 'seccode': '...'}
66+
5067
# other examples
5168
# --------------------------------------------------------------------------------------
5269
# ita = ImageTypersAPI(access_token, 123) # init imagetyperz api obj with access_token and affiliate id

0 commit comments

Comments
 (0)