Skip to content

Commit

Permalink
Add some new tests for coverage
Browse files Browse the repository at this point in the history
  • Loading branch information
VeNoMouS committed Dec 12, 2019
1 parent 6877137 commit 9fb6620
Show file tree
Hide file tree
Showing 20 changed files with 390 additions and 719 deletions.
3 changes: 3 additions & 0 deletions .codacy.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
exclude_paths:
- tests/*
- README.md
4 changes: 1 addition & 3 deletions .coveragerc
Original file line number Diff line number Diff line change
@@ -1,5 +1,3 @@
[run]
source = cloudscraper
omit =
*test*
cloudscraper/interpreters/jsfuck.py
omit = tests/*,cloudscraper/interpreters/jsfuck.py
5 changes: 2 additions & 3 deletions Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -16,8 +16,7 @@ retry:
py.test -n auto --forked --looponfail

ci:
/bin/true
#py.test -n 8 --forked --junitxml=report.xml --collect-only
py.test -n 8 --forked --junitxml=report.xml

lint:
flake8 --ignore $(pep8-rules) cloudscraper tests
Expand All @@ -27,7 +26,7 @@ format:
autopep8 -aaa --ignore $(pep8-rules) --in-place --recursive cloudscraper tests

coverage:
py.test --cov-config .coveragerc --verbose --cov-report term --cov-report xml --cov=cloudscraper tests
py.test --cov-config=.coveragerc --verbose --cov-report=term --cov-report=xml --cov=cloudscraper tests
coveralls

clean:
Expand Down
6 changes: 2 additions & 4 deletions cloudscraper/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -194,7 +194,7 @@ def request(self, method, url, *args, **kwargs):

resp = self.Challenge_Response(resp, **kwargs)
else:
if resp.status_code not in [302, 429, 503]:
if not resp.is_redirect and resp.status_code not in [429, 503]:
self._solveDepthCnt = 0

return resp
Expand Down Expand Up @@ -452,9 +452,7 @@ def updateAttr(obj, name, newValue):
cloudflare_kwargs['headers'] = updateAttr(
cloudflare_kwargs,
'headers',
{
'Referer': resp.url
}
{'Referer': resp.url}
)

ret = self.request(
Expand Down
8 changes: 4 additions & 4 deletions cloudscraper/user_agent/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,6 @@
import sys
import ssl


from collections import OrderedDict

# ------------------------------------------------------------------------------- #
Expand Down Expand Up @@ -111,6 +110,7 @@ def loadUserAgent(self, *args, **kwargs):

self.headers['User-Agent'] = random.SystemRandom().choice(filteredAgents[user_agent_version])

if not kwargs.get('allow_brotli', False):
if 'br' in self.headers['Accept-Encoding']:
self.headers['Accept-Encoding'] = ','.join([encoding for encoding in self.headers['Accept-Encoding'].split(',') if encoding.strip() != 'br']).strip()
if not kwargs.get('allow_brotli', False) and 'br' in self.headers['Accept-Encoding']:
self.headers['Accept-Encoding'] = ','.join([
encoding for encoding in self.headers['Accept-Encoding'].split(',') if encoding.strip() != 'br'
]).strip()
6 changes: 3 additions & 3 deletions dev_requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -3,12 +3,12 @@ pytest >= 4.4.1
pytest-cov >= 2.6.1
pytest-xdist >= 1.28.0
pytest-forked >= 1.0.2
pytest-testmon >= 0.9.16
pytest-watch >= 4.2.0
pytest-timeout >= 1.3.3
pytest-env >= 0.6.2
responses >= 0.10.6
sure >= 1.4.11
flake8 >= 3.7.7
tox >= 3.9.0
coveralls >= 1.7.0
autopep8 >= 1.4.4
autopep8 >= 1.4.4
js2py >= 0.60
1 change: 1 addition & 0 deletions pytest.ini
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
[pytest]
addopts = -p no:warnings
timeout = 2000
env = PYTHONHASHSEED=0
6 changes: 2 additions & 4 deletions setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -33,18 +33,16 @@
],
include_package_data = True,
install_requires = [
'pyopenssl >= 17.0',
'requests >= 2.9.2',
'js2py >= 0.60',
'requests_toolbelt >= 0.9.1',
'brotli >= 1.0.7'
],
classifiers=[
'Development Status :: 5 - Production/Stable',
'Intended Audience :: Developers',
'Natural Language :: English',
"License :: OSI Approved :: MIT License",
"Operating System :: OS Independent",
'License :: OSI Approved :: MIT License',
'Operating System :: OS Independent',
'Programming Language :: Python',
'Programming Language :: Python :: 2',
'Programming Language :: Python :: 2.7',
Expand Down
211 changes: 94 additions & 117 deletions tests/__init__.py
Original file line number Diff line number Diff line change
@@ -1,141 +1,118 @@
# -*- coding: utf-8 -*-

import hashlib
import responses
import pytest
import re

from requests.compat import urlencode
from collections import OrderedDict
from os import path
from io import open

try:
from urlparse import parse_qsl
except ImportError:
from urllib.parse import parse_qsl

# Fake URL, network requests are not allowed by default when using the decorator
url = 'https://example-site.dev'
url = 'http://www.evildomain.com'

# These kwargs will be passed to tests by the decorator
cloudscraper_kwargs = dict(
delay=0.01,
debug=False
)
cloudscraper_kwargs = dict(delay=0.01, debug=False)

# Cloudflare challenge fixtures are only read from the FS once
cache = {}


class ChallengeResponse(responses.Response):
"""Simulates a standard IUAM JS challenge response from Cloudflare
This would be the first response in a test.
Kwargs:
Keyword arguments used to override the defaults.
The request will error if it doesn't match a defined response.
"""

def __init__(self, **kwargs):
defaults = (
('method', 'GET'),
('status', 503),
('headers', {'Server': 'cloudflare'}),
('content_type', 'text/html')
)

for k, v in defaults:
kwargs.setdefault(k, v)

super(ChallengeResponse, self).__init__(**kwargs)


class RedirectResponse(responses.CallbackResponse):
"""Simulate the redirect response that occurs after sending a correct answer
This would be the second response in a test.
It will call the provided callback when a matching request is received.
Afterwards, the default is to redirect to the index page "/" aka fake URL.
Kwargs:
Keyword arguments used to override the defaults.
The request will error if it doesn't match a defined response.
"""

def __init__(self, callback=lambda request: None, **kwargs):
defaults = (
('method', 'GET'),
('status', 302),
('headers', {'Location': '/'}),
('content_type', 'text/html'),
('body', '')
)

for k, v in defaults:
kwargs.setdefault(k, v)

args = tuple(kwargs.pop(k) for k in ('status', 'headers', 'body'))
kwargs['callback'] = lambda request: callback(request) or args

super(RedirectResponse, self).__init__(**kwargs)


class DefaultResponse(responses.Response):
"""Simulate the final response after the challenge is solved
This would be the last response in a test and normally occurs after a redirect.
Kwargs:
Keyword arguments used to override the defaults.
The request will error if it doesn't match a defined response.
"""

def __init__(self, **kwargs):
defaults = (
('method', 'GET'),
('status', 200),
('content_type', 'text/html')
)

for k, v in defaults:
kwargs.setdefault(k, v)

super(DefaultResponse, self).__init__(**kwargs)
# ------------------------------------------------------------------------------- #


def fixtures(filename):
"""Read and cache a challenge fixture
"""
Read and cache a challenge fixture
Returns: HTML (bytes): The HTML challenge fixture
"""
if not cache.get(filename):
with open(path.join(path.dirname(__file__), 'fixtures', filename), 'rb') as fp:
print('reading...')
with open(path.join(path.dirname(__file__), 'fixtures', filename), 'r') as fp:
cache[filename] = fp.read()
return cache[filename]


# This is the page that should be received after bypassing the JS challenge.
requested_page = fixtures('requested_page.html')
# ------------------------------------------------------------------------------- #


# This fancy decorator wraps tests so the responses will be mocked.
# It could be called directly e.g. challenge_responses(*args)(test_func) -> wrapper
def challenge_responses(filename, jschl_answer):
# This function is called with the test_func and returns a new wrapper.
def challenge_responses_decorator(test):
def mockCloudflare(fixture, payload):
def responses_decorator(test):
@responses.activate
def wrapper(self, interpreter):
html = fixtures(filename).decode('utf-8')

params = OrderedDict(re.findall(r'name="(s|jschl_vc|pass)"\svalue="(\S+)"', html))
params['jschl_answer'] = jschl_answer

submit_uri = '{}/cdn-cgi/l/chk_jschl?{}'.format(url, urlencode(params))

responses.add(ChallengeResponse(url=url, body=fixtures(filename)))

def onRedirect(request):
# We don't register the last response unless the redirect occurs
responses.add(DefaultResponse(url=url, body=requested_page))

responses.add(RedirectResponse(url=submit_uri, callback=onRedirect))

return test(self, interpreter=interpreter, **cloudscraper_kwargs)
# The following causes pytest to call the test wrapper once for each interpreter.
return pytest.mark.parametrize('interpreter', ['js2py', 'nodejs'])(wrapper)

return challenge_responses_decorator
def wrapper(self):
def post_callback(request):
postPayload = dict(parse_qsl(request.body))
postPayload['r'] = hashlib.sha256(postPayload.get('r', '').encode('ascii')).hexdigest()

for param in payload:
if param not in postPayload or postPayload[param] != payload[param]:
return (
503,
{'Server': 'cloudflare'},
fixtures(fixture)
)

# ------------------------------------------------------------------------------- #

return (
200,
[
(
'Set-Cookie', '__cfduid=d5927a7cbaa96ec536939f93648e3c08a1576098703; Domain=.evildomain.com; path=/'
),
(
'Set-Cookie',
'__cfduid=d5927a7cbaa96ec536939f93648e3c08a1576098703; domain=.evildomain.com; path=/'
),
('Server', 'cloudflare')
],
'Solved OK'
)

# ------------------------------------------------------------------------------- #

def challengeCallback(request):
status_code = 503

if 'reCaptcha' in fixture or '1020' in fixture:
status_code = 403
return (
status_code,
[
(
'Set-Cookie',
'__cfduid=d5927a7cbaa96ec536939f93648e3c08a1576098703; Domain=.evildomain.com; path=/'
),
('Server', 'cloudflare')
],
fixtures(fixture)
)

# ------------------------------------------------------------------------------- #

responses.add_callback(
responses.POST,
url,
callback=post_callback,
content_type='text/html',
)

responses.add_callback(
responses.GET,
url,
callback=challengeCallback,
content_type='text/html',
)

# ------------------------------------------------------------------------------- #

return test(self, **cloudscraper_kwargs)

# ------------------------------------------------------------------------------- #

return wrapper

# ------------------------------------------------------------------------------- #

return responses_decorator
Loading

0 comments on commit 9fb6620

Please sign in to comment.