diff --git a/AUTHORS.rst b/AUTHORS.rst index a1b5ec4632..6ced9b5b1c 100644 --- a/AUTHORS.rst +++ b/AUTHORS.rst @@ -157,3 +157,6 @@ Patches and Suggestions - Martin Jul (`@mjul `_) - Joe Alcorn (`@buttscicles `_) - Syed Suhail Ahmed (`@syedsuhail `_) +- Scott Sadler (`@ssadler `_) +- Arthur Darcet (`@arthurdarcet `_) +- Ulrich Petri (`@ulope `_) diff --git a/HISTORY.rst b/HISTORY.rst index 5dcc1c393f..254656c5bc 100644 --- a/HISTORY.rst +++ b/HISTORY.rst @@ -3,31 +3,138 @@ Release History --------------- -2.4.3.4 (2014-11-06) Rebelmouse -+++++++++++++++++++++++++++++++ -- small refactoring SysCallError treatment +2.6.0 (2015-03-14) +++++++++++++++++++ + +**Bugfixes** + +- Fix handling of cookies on redirect. Previously a cookie without a host + value set would use the hostname for the redirected URL exposing requests + users to session fixation attacks and potentially cookie stealing. This was + disclosed privately by Matthew Daley of `BugFuzz `_. + An CVE identifier has not yet been assigned for this. This affects all + versions of requests from v2.1.0 to v2.5.3 (inclusive on both ends). + +- Fix error when requests is an ``install_requires`` dependency and ``python + setup.py test`` is run. (#2462) + +- Fix error when urllib3 is unbundled and requests continues to use the + vendored import location. + +- Include fixes to ``urllib3``'s header handling. + +- Requests' handling of unvendored dependencies is now more restrictive. + +**Features and Improvements** + +- Support bytearrays when passed as parameters in the ``files`` argument. + (#2468) + +- Avoid data duplication when creating a request with ``str``, ``bytes``, or + ``bytearray`` input to the ``files`` argument. + +2.5.3 (2015-02-24) +++++++++++++++++++ + +**Bugfixes** + +- Revert changes to our vendored certificate bundle. For more context see + (#2455, #2456, and http://bugs.python.org/issue23476) + +2.5.2 (2015-02-23) +++++++++++++++++++ + +**Features and Improvements** + +- Add sha256 fingerprint support. (`shazow/urllib3#540`_) + +- Improve the performance of headers. (`shazow/urllib3#544`_) + +**Bugfixes** + +- Copy pip's import machinery. When downstream redistributors remove + requests.packages.urllib3 the import machinery will continue to let those + same symbols work. Example usage in requests' documentation and 3rd-party + libraries relying on the vendored copies of urllib3 will work without having + to fallback to the system urllib3. + +- Attempt to quote parts of the URL on redirect if unquoting and then quoting + fails. (#2356) + +- Fix filename type check for multipart form-data uploads. (#2411) + +- Properly handle the case where a server issuing digest authentication + challenges provides both auth and auth-int qop-values. (#2408) + +- Fix a socket leak. (`shazow/urllib3#549`_) + +- Fix multiple ``Set-Cookie`` headers properly. (`shazow/urllib3#534`_) + +- Disable the built-in hostname verification. (`shazow/urllib3#526`_) + +- Fix the behaviour of decoding an exhausted stream. (`shazow/urllib3#535`_) + +**Security** + +- Pulled in an updated ``cacert.pem``. +- Drop RC4 from the default cipher list. (`shazow/urllib3#551`_) -2.4.3.3 (2014-11-06) Rebelmouse -+++++++++++++++++++++++++++++++ -- SysCallError is handled as SSLError exception +.. _shazow/urllib3#551: https://github.com/shazow/urllib3/pull/551 +.. _shazow/urllib3#549: https://github.com/shazow/urllib3/pull/549 +.. _shazow/urllib3#544: https://github.com/shazow/urllib3/pull/544 +.. _shazow/urllib3#540: https://github.com/shazow/urllib3/pull/540 +.. _shazow/urllib3#535: https://github.com/shazow/urllib3/pull/535 +.. _shazow/urllib3#534: https://github.com/shazow/urllib3/pull/534 +.. _shazow/urllib3#526: https://github.com/shazow/urllib3/pull/526 +2.5.1 (2014-12-23) +++++++++++++++++++ + +**Behavioural Changes** + +- Only catch HTTPErrors in raise_for_status (#2382) -2.4.3.2 (2014-10-4) Rebelmouse -+++++++++++++++++++++++++++++++ +**Bugfixes** -- Raise a requests' exception when occurs 104 - Connection reset by peer +- Handle LocationParseError from urllib3 (#2344) +- Handle file-like object filenames that are not strings (#2379) +- Unbreak HTTPDigestAuth handler. Allow new nonces to be negotiated (#2389) +2.5.0 (2014-12-01) +++++++++++++++++++ -2.4.3.1 (2014-10-24) Rebelmouse -+++++++++++++++++++++++++++++++ +**Improvements** -- Added libs to requirements.txt for SSL SNI support +- Allow usage of urllib3's Retry object with HTTPAdapters (#2216) +- The ``iter_lines`` method on a response now accepts a delimiter with which + to split the content (#2295) + +**Behavioural Changes** + +- Add deprecation warnings to functions in requests.utils that will be removed + in 3.0 (#2309) +- Sessions used by the functional API are always closed (#2326) +- Restrict requests to HTTP/1.1 and HTTP/1.0 (stop accepting HTTP/0.9) (#2323) **Bugfixes** -- Handhshake ssl hangs forever - Gevent/Eventlet enviorments only -- ZeroReturnerror exception is handled properly. +- Only parse the URL once (#2353) +- Allow Content-Length header to always be overriden (#2332) +- Properly handle files in HTTPDigestAuth (#2333) +- Cap redirect_cache size to prevent memory abuse (#2299) +- Fix HTTPDigestAuth handling of redirects after authenticating successfully + (#2253) +- Fix crash with custom method parameter to Session.request (#2317) +- Fix how Link headers are parsed using the regular expression library (#2271) + +**Documentation** + +- Add more references for interlinking (#2348) +- Update CSS for theme (#2290) +- Update width of buttons and sidebar (#2289) +- Replace references of Gittip with Gratipay (#2282) +- Add link to changelog in sidebar (#2273) 2.4.3 (2014-10-06) ++++++++++++++++++ @@ -81,7 +188,7 @@ Release History - Support for connect timeouts! Timeout now accepts a tuple (connect, read) which is used to set individual connect and read timeouts. - Allow copying of PreparedRequests without headers/cookies. - Updated bundled urllib3 version. -- Refactored settings loading from environment — new `Session.merge_environment_settings`. +- Refactored settings loading from environment -- new `Session.merge_environment_settings`. - Handle socket errors in iter_content. diff --git a/LICENSE b/LICENSE index 8c5e758401..a103fc915e 100644 --- a/LICENSE +++ b/LICENSE @@ -1,4 +1,4 @@ -Copyright 2014 Kenneth Reitz +Copyright 2015 Kenneth Reitz Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. diff --git a/README.rst b/README.rst index 521ab6f209..40995fe9ed 100644 --- a/README.rst +++ b/README.rst @@ -1,11 +1,11 @@ Requests: HTTP for Humans ========================= -.. image:: https://badge.fury.io/py/requests.png - :target: http://badge.fury.io/py/requests +.. image:: https://img.shields.io/pypi/v/requests.svg + :target: https://pypi.python.org/pypi/requests -.. image:: https://pypip.in/d/requests/badge.png - :target: https://crate.io/packages/requests/ +.. image:: https://img.shields.io/pypi/dm/requests.svg + :target: https://pypi.python.org/pypi/requests Requests is an Apache2 Licensed HTTP library, written in Python, for human @@ -19,7 +19,7 @@ perform the simplest of tasks. Things shouldn't be this way. Not in Python. -.. code-block:: pycon +.. code-block:: python >>> r = requests.get('https://api.github.com', auth=('user', 'pass')) >>> r.status_code diff --git a/docs/_templates/sidebarintro.html b/docs/_templates/sidebarintro.html index e60cf4518e..11d326677e 100644 --- a/docs/_templates/sidebarintro.html +++ b/docs/_templates/sidebarintro.html @@ -15,15 +15,9 @@

-

Donate

-

- If you love Requests, consider supporting the author on Gratipay: -

-

- -

+

+ Buy Requests Pro +

Get Updates

@@ -46,6 +40,11 @@

Translations

Useful Links

    +
  • Certifi: Best CA Bundle
  • +
  • Requests-Toolbelt
  • + +

    +
  • Requests @ GitHub
  • Requests @ PyPI
  • Issue Tracker
  • diff --git a/docs/_templates/sidebarlogo.html b/docs/_templates/sidebarlogo.html index 930fdb3d41..928cd2fdb8 100644 --- a/docs/_templates/sidebarlogo.html +++ b/docs/_templates/sidebarlogo.html @@ -14,16 +14,9 @@ development release.

    - -

    Donate

    -

    - If you love Requests, consider supporting the author on Gittip: -

    -

    - -

    +

    + Buy Requests Pro +

    Get Updates

    Receive updates on new releases and upcoming projects.

    diff --git a/docs/_themes/kr/layout.html b/docs/_themes/kr/layout.html index cf977d8fea..a9a3cb6e64 100644 --- a/docs/_themes/kr/layout.html +++ b/docs/_themes/kr/layout.html @@ -15,6 +15,9 @@ Fork me on GitHub + + + + + {%- endblock %} diff --git a/docs/api.rst b/docs/api.rst index 7c5dae2e54..7225a83799 100644 --- a/docs/api.rst +++ b/docs/api.rst @@ -88,6 +88,12 @@ Cookies .. autofunction:: requests.utils.cookiejar_from_dict .. autofunction:: requests.utils.add_dict_to_cookiejar +.. autoclass:: requests.cookies.RequestsCookieJar + :inherited-members: + +.. autoclass:: requests.cookies.CookieConflictError + :inherited-members: + Encodings ~~~~~~~~~ diff --git a/docs/community/support.rst b/docs/community/support.rst index 7f8c332dcf..96f600a1da 100644 --- a/docs/community/support.rst +++ b/docs/community/support.rst @@ -5,12 +5,57 @@ Support If you have questions or issues about Requests, there are several options: +StackOverflow +------------- + +If your question does not contain sensitive (possibly proprietary) +information or can be properly anonymized, please ask a question on +`StackOverflow `_ +and use the tag ``python-requests``. + Send a Tweet ------------ If your question is less than 140 characters, feel free to send a tweet to -`@kennethreitz `_. +`@kennethreitz `_, +`@sigmavirus24 `_, or +`@lukasaoz `_. + +Vulnerability Disclosure +------------------------ + +If you think you have found a potential security vulnerability in requests, +please email `sigmavirus24 `_ and +`Lukasa `_ directly. **Do not file a public issue.** + +Our PGP Key fingerprints are: + +- 0161 BB7E B208 B5E0 4FDC 9F81 D9DA 0A04 9113 F853 (@sigmavirus24) + +- 90DC AE40 FEA7 4B14 9B70 662D F25F 2144 EEC1 373D (@lukasa) +If English is not your first language, please try to describe the problem and +its impact to the best of your ability. For greater detail, please use your native +language and we will try our best to translate it using online services. + +Please also include the code you used to find the problem and the shortest amount +of code necessary to reproduce it. + +Please do not disclose this to anyone else. We will retrieve a CVE identifier if +necessary and give you full credit under whatever name or alias you provide. +We will only request an identifier when we have a fix and can publish it in a release. + +We will respect your privacy and will only publicize your involvement if you grant +us permission. + +Previous CVEs +~~~~~~~~~~~~~ + +- Fixed in 2.3.0 + + - `CVE 2014-1829 `_ + + - `CVE 2014-1830 `_ File an Issue ------------- @@ -34,4 +79,9 @@ IRC The official Freenode channel for Requests is `#python-requests `_ -I'm also available as **kennethreitz** on Freenode. +The core developers of requests are on IRC throughout the day. +You can find them in ``#python-requests`` as: + +- kennethreitz +- lukasa +- sigmavirus24 diff --git a/docs/conf.py b/docs/conf.py index 4521eed4c4..4969857b34 100644 --- a/docs/conf.py +++ b/docs/conf.py @@ -46,7 +46,7 @@ # General information about the project. project = u'Requests' -copyright = u'2014. A Kenneth Reitz Project' +copyright = u'2015. A Kenneth Reitz Project' # The version info for the project you're documenting, acts as replacement for # |version| and |release|, also used in various other places throughout the diff --git a/docs/user/advanced.rst b/docs/user/advanced.rst index b1e334bb87..a2fe653b57 100644 --- a/docs/user/advanced.rst +++ b/docs/user/advanced.rst @@ -5,6 +5,7 @@ Advanced Usage This document covers some of Requests more advanced features. +.. _session-objects: Session Objects --------------- @@ -50,6 +51,8 @@ parameters. All values that are contained within a session are directly available to you. See the :ref:`Session API Docs ` to learn more. +.. _request-and-response-objects: + Request and Response Objects ---------------------------- @@ -82,6 +85,8 @@ request, and then the request's headers:: {'Accept-Encoding': 'identity, deflate, compress, gzip', 'Accept': '*/*', 'User-Agent': 'python-requests/1.2.0'} +.. _prepared-requests: + Prepared Requests ----------------- @@ -189,6 +194,7 @@ If you specify a wrong path or an invalid cert:: >>> requests.get('https://kennethreitz.com', cert='/wrong_path/server.pem') SSLError: [Errno 336265225] _ssl.c:347: error:140B0009:SSL routines:SSL_CTX_use_PrivateKey_file:PEM lib +.. _body-content-workflow: Body Content Workflow --------------------- @@ -228,6 +234,7 @@ consider using ``contextlib.closing`` (`documented here`_), like this:: .. _`documented here`: http://docs.python.org/2/library/contextlib.html#contextlib.closing +.. _keep-alive: Keep-Alive ---------- @@ -240,6 +247,7 @@ Note that connections are only released back to the pool for reuse once all body data has been read; be sure to either set ``stream`` to ``False`` or read the ``content`` property of the ``Response`` object. +.. _streaming-uploads: Streaming Uploads ----------------- @@ -251,6 +259,7 @@ file-like object for your body:: with open('massive-body', 'rb') as f: requests.post('http://some.url/streamed', data=f) +.. _chunk-encoding: Chunk-Encoded Requests ---------------------- @@ -267,6 +276,7 @@ a length) for your body:: requests.post('http://some.url/chunked', data=gen()) +.. _multipart: POST Multiple Multipart-Encoded Files ------------------------------------- @@ -290,6 +300,7 @@ To do that, just set files to a list of tuples of (form_field_name, file_info): ... } +.. _event-hooks: Event Hooks ----------- @@ -329,6 +340,7 @@ Let's print some request method arguments at runtime:: http://httpbin.org +.. _custom-auth: Custom Authentication --------------------- @@ -372,7 +384,7 @@ Streaming Requests With :class:`requests.Response.iter_lines()` you can easily iterate over streaming APIs such as the `Twitter Streaming -API `_. Simply +API `_. Simply set ``stream`` to ``True`` and iterate over the response with :class:`~requests.Response.iter_lines()`:: @@ -387,6 +399,20 @@ set ``stream`` to ``True`` and iterate over the response with if line: print(json.loads(line)) +.. warning:: + + :class:`~requests.Response.iter_lines()` is not reentrant safe. + Calling this method multiple times causes some of the received data + being lost. In case you need to call it from multiple places, use + the resulting iterator object instead:: + + lines = r.iter_lines() + # Save the first line for later or just skip it + first_line = next(lines) + for line in lines: + print(line) + +.. _proxies: Proxies ------- diff --git a/docs/user/quickstart.rst b/docs/user/quickstart.rst index b501b1b441..6fd3602e40 100644 --- a/docs/user/quickstart.rst +++ b/docs/user/quickstart.rst @@ -73,6 +73,13 @@ You can see that the URL has been correctly encoded by printing the URL:: Note that any dictionary key whose value is ``None`` will not be added to the URL's query string. +In order to pass a list of items as a value you must mark the key as +referring to a list like string by appending ``[]`` to the key:: + + >>> payload = {'key1': 'value1', 'key2[]': ['value2', 'value3']} + >>> r = requests.get("http://httpbin.org/get", params=payload) + >>> print(r.url) + http://httpbin.org/get?key1=value1&key2%5B%5D=value2&key2%5B%5D=value3 Response Content ---------------- @@ -268,7 +275,7 @@ In the event you are posting a very large file as a ``multipart/form-data`` request, you may want to stream the request. By default, ``requests`` does not support this, but there is a separate package which does - ``requests-toolbelt``. You should read `the toolbelt's documentation -`_ for more details about how to use it. +`_ for more details about how to use it. For sending multiple files in one request refer to the :ref:`advanced ` section. @@ -340,6 +347,15 @@ So, we can access the headers using any capitalization we want:: >>> r.headers.get('content-type') 'application/json' +It is also special in that the server could have sent the same header multiple +times with different values, but requests combines them so they can be +represented in the dictionary within a single mapping, as per +`RFC 7230 `_: + + > A recipient MAY combine multiple header fields with the same field name + > into one "field-name: field-value" pair, without changing the semantics + > of the message, by appending each subsequent field value to the combined + > field value in order, separated by a comma. Cookies ------- diff --git a/requests/__init__.py b/requests/__init__.py index 09c6efa8c9..446500bfd4 100644 --- a/requests/__init__.py +++ b/requests/__init__.py @@ -36,17 +36,17 @@ The other HTTP methods are supported - see `requests.api`. Full documentation is at . -:copyright: (c) 2014 by Kenneth Reitz. +:copyright: (c) 2015 by Kenneth Reitz. :license: Apache 2.0, see LICENSE for more details. """ __title__ = 'requests' -__version__ = '2.4.3.4' -__build__ = 0x020403 +__version__ = '2.6.0' +__build__ = 0x020503 __author__ = 'Kenneth Reitz' __license__ = 'Apache 2.0' -__copyright__ = 'Copyright 2014 Kenneth Reitz' +__copyright__ = 'Copyright 2015 Kenneth Reitz' # Attempt to enable urllib3's SNI support, if possible try: diff --git a/requests/adapters.py b/requests/adapters.py index abb25d11fd..02e0dd1f1d 100644 --- a/requests/adapters.py +++ b/requests/adapters.py @@ -11,10 +11,10 @@ import socket from .models import Response -from .packages.urllib3 import Retry from .packages.urllib3.poolmanager import PoolManager, proxy_from_url from .packages.urllib3.response import HTTPResponse from .packages.urllib3.util import Timeout as TimeoutSauce +from .packages.urllib3.util.retry import Retry from .compat import urlparse, basestring from .utils import (DEFAULT_CA_BUNDLE_PATH, get_encoding_from_headers, prepend_scheme_if_needed, get_auth_from_url, urldefragauth) @@ -26,9 +26,10 @@ from .packages.urllib3.exceptions import ProtocolError from .packages.urllib3.exceptions import ReadTimeoutError from .packages.urllib3.exceptions import SSLError as _SSLError +from .packages.urllib3.exceptions import ResponseError from .cookies import extract_cookies_to_jar from .exceptions import (ConnectionError, ConnectTimeout, ReadTimeout, SSLError, - ProxyError) + ProxyError, RetryError) from .auth import _basic_auth_str DEFAULT_POOLBLOCK = False @@ -60,8 +61,12 @@ class HTTPAdapter(BaseAdapter): :param pool_connections: The number of urllib3 connection pools to cache. :param pool_maxsize: The maximum number of connections to save in the pool. :param int max_retries: The maximum number of retries each connection - should attempt. Note, this applies only to failed connections and - timeouts, never to requests where the server returns a response. + should attempt. Note, this applies only to failed DNS lookups, socket + connections and connection timeouts, never to requests where data has + made it to the server. By default, Requests does not retry failed + connections. If you need granular control over the conditions under + which we retry a request, import urllib3's ``Retry`` class and pass + that instead. :param pool_block: Whether the connection pool should block for connections. Usage:: @@ -77,7 +82,10 @@ class HTTPAdapter(BaseAdapter): def __init__(self, pool_connections=DEFAULT_POOLSIZE, pool_maxsize=DEFAULT_POOLSIZE, max_retries=DEFAULT_RETRIES, pool_block=DEFAULT_POOLBLOCK): - self.max_retries = max_retries + if max_retries == DEFAULT_RETRIES: + self.max_retries = Retry(0, read=False) + else: + self.max_retries = Retry.from_int(max_retries) self.config = {} self.proxy_manager = {} @@ -123,7 +131,7 @@ def init_poolmanager(self, connections, maxsize, block=DEFAULT_POOLBLOCK, **pool self._pool_block = block self.poolmanager = PoolManager(num_pools=connections, maxsize=maxsize, - block=block, **pool_kwargs) + block=block, strict=True, **pool_kwargs) def proxy_manager_for(self, proxy, **proxy_kwargs): """Return urllib3 ProxyManager for the given proxy. @@ -358,7 +366,7 @@ def send(self, request, stream=False, timeout=None, verify=True, cert=None, prox assert_same_host=False, preload_content=False, decode_content=False, - retries=Retry(self.max_retries, read=False), + retries=self.max_retries, timeout=timeout ) @@ -410,6 +418,9 @@ def send(self, request, stream=False, timeout=None, verify=True, cert=None, prox if isinstance(e.reason, ConnectTimeoutError): raise ConnectTimeout(e, request=request) + if isinstance(e.reason, ResponseError): + raise RetryError(e, request=request) + raise ConnectionError(e, request=request) except _ProxyError as e: diff --git a/requests/api.py b/requests/api.py index 4eaaf9e651..98c92298eb 100644 --- a/requests/api.py +++ b/requests/api.py @@ -16,7 +16,6 @@ def request(method, url, **kwargs): """Constructs and sends a :class:`Request `. - Returns :class:`Response ` object. :param method: method for the new :class:`Request` object. :param url: URL for the new :class:`Request` object. @@ -37,6 +36,8 @@ def request(method, url, **kwargs): :param verify: (optional) if ``True``, the SSL cert will be verified. A CA_BUNDLE path can also be provided. :param stream: (optional) if ``False``, the response content will be immediately downloaded. :param cert: (optional) if String, path to ssl client cert file (.pem). If Tuple, ('cert', 'key') pair. + :return: :class:`Response ` object + :rtype: requests.Response Usage:: @@ -46,14 +47,21 @@ def request(method, url, **kwargs): """ session = sessions.Session() - return session.request(method=method, url=url, **kwargs) + response = session.request(method=method, url=url, **kwargs) + # By explicitly closing the session, we avoid leaving sockets open which + # can trigger a ResourceWarning in some cases, and look like a memory leak + # in others. + session.close() + return response def get(url, **kwargs): - """Sends a GET request. Returns :class:`Response` object. + """Sends a GET request. :param url: URL for the new :class:`Request` object. :param \*\*kwargs: Optional arguments that ``request`` takes. + :return: :class:`Response ` object + :rtype: requests.Response """ kwargs.setdefault('allow_redirects', True) @@ -61,10 +69,12 @@ def get(url, **kwargs): def options(url, **kwargs): - """Sends a OPTIONS request. Returns :class:`Response` object. + """Sends a OPTIONS request. :param url: URL for the new :class:`Request` object. :param \*\*kwargs: Optional arguments that ``request`` takes. + :return: :class:`Response ` object + :rtype: requests.Response """ kwargs.setdefault('allow_redirects', True) @@ -72,10 +82,12 @@ def options(url, **kwargs): def head(url, **kwargs): - """Sends a HEAD request. Returns :class:`Response` object. + """Sends a HEAD request. :param url: URL for the new :class:`Request` object. :param \*\*kwargs: Optional arguments that ``request`` takes. + :return: :class:`Response ` object + :rtype: requests.Response """ kwargs.setdefault('allow_redirects', False) @@ -83,44 +95,52 @@ def head(url, **kwargs): def post(url, data=None, json=None, **kwargs): - """Sends a POST request. Returns :class:`Response` object. + """Sends a POST request. :param url: URL for the new :class:`Request` object. :param data: (optional) Dictionary, bytes, or file-like object to send in the body of the :class:`Request`. :param json: (optional) json data to send in the body of the :class:`Request`. :param \*\*kwargs: Optional arguments that ``request`` takes. + :return: :class:`Response ` object + :rtype: requests.Response """ return request('post', url, data=data, json=json, **kwargs) def put(url, data=None, **kwargs): - """Sends a PUT request. Returns :class:`Response` object. + """Sends a PUT request. :param url: URL for the new :class:`Request` object. :param data: (optional) Dictionary, bytes, or file-like object to send in the body of the :class:`Request`. :param \*\*kwargs: Optional arguments that ``request`` takes. + :return: :class:`Response ` object + :rtype: requests.Response """ return request('put', url, data=data, **kwargs) def patch(url, data=None, **kwargs): - """Sends a PATCH request. Returns :class:`Response` object. + """Sends a PATCH request. :param url: URL for the new :class:`Request` object. :param data: (optional) Dictionary, bytes, or file-like object to send in the body of the :class:`Request`. :param \*\*kwargs: Optional arguments that ``request`` takes. + :return: :class:`Response ` object + :rtype: requests.Response """ return request('patch', url, data=data, **kwargs) def delete(url, **kwargs): - """Sends a DELETE request. Returns :class:`Response` object. + """Sends a DELETE request. :param url: URL for the new :class:`Request` object. :param \*\*kwargs: Optional arguments that ``request`` takes. + :return: :class:`Response ` object + :rtype: requests.Response """ return request('delete', url, **kwargs) diff --git a/requests/auth.py b/requests/auth.py index 9b6426dc06..d1c4825176 100644 --- a/requests/auth.py +++ b/requests/auth.py @@ -17,6 +17,7 @@ from .compat import urlparse, str from .cookies import extract_cookies_to_jar from .utils import parse_dict_header, to_native_string +from .status_codes import codes CONTENT_TYPE_FORM_URLENCODED = 'application/x-www-form-urlencoded' CONTENT_TYPE_MULTI_PART = 'multipart/form-data' @@ -66,6 +67,7 @@ def __init__(self, username, password): self.nonce_count = 0 self.chal = {} self.pos = None + self.num_401_calls = 1 def build_digest_header(self, method, url): @@ -122,13 +124,15 @@ def sha_utf8(x): s += os.urandom(8) cnonce = (hashlib.sha1(s).hexdigest()[:16]) - noncebit = "%s:%s:%s:%s:%s" % (nonce, ncvalue, cnonce, qop, HA2) if _algorithm == 'MD5-SESS': HA1 = hash_utf8('%s:%s:%s' % (HA1, nonce, cnonce)) if qop is None: respdig = KD(HA1, "%s:%s" % (nonce, HA2)) elif qop == 'auth' or 'auth' in qop.split(','): + noncebit = "%s:%s:%s:%s:%s" % ( + nonce, ncvalue, cnonce, 'auth', HA2 + ) respdig = KD(HA1, noncebit) else: # XXX handle auth-int. @@ -150,6 +154,11 @@ def sha_utf8(x): return 'Digest %s' % (base) + def handle_redirect(self, r, **kwargs): + """Reset num_401_calls counter on redirects.""" + if r.is_redirect: + self.num_401_calls = 1 + def handle_401(self, r, **kwargs): """Takes the given response and tries digest-auth, if needed.""" @@ -162,7 +171,7 @@ def handle_401(self, r, **kwargs): if 'digest' in s_auth.lower() and num_401_calls < 2: - setattr(self, 'num_401_calls', num_401_calls + 1) + self.num_401_calls += 1 pat = re.compile(r'digest ', flags=re.IGNORECASE) self.chal = parse_dict_header(pat.sub('', s_auth, count=1)) @@ -182,7 +191,7 @@ def handle_401(self, r, **kwargs): return _r - setattr(self, 'num_401_calls', 1) + self.num_401_calls = 1 return r def __call__(self, r): @@ -192,6 +201,11 @@ def __call__(self, r): try: self.pos = r.body.tell() except AttributeError: - pass + # In the case of HTTPDigestAuth being reused and the body of + # the previous request was a file-like object, pos has the + # file position of the previous body. Ensure it's set to + # None. + self.pos = None r.register_hook('response', self.handle_401) + r.register_hook('response', self.handle_redirect) return r diff --git a/requests/compat.py b/requests/compat.py index be5a1ed6c1..70edff7849 100644 --- a/requests/compat.py +++ b/requests/compat.py @@ -21,62 +21,10 @@ #: Python 3.x? is_py3 = (_ver[0] == 3) -#: Python 3.0.x -is_py30 = (is_py3 and _ver[1] == 0) - -#: Python 3.1.x -is_py31 = (is_py3 and _ver[1] == 1) - -#: Python 3.2.x -is_py32 = (is_py3 and _ver[1] == 2) - -#: Python 3.3.x -is_py33 = (is_py3 and _ver[1] == 3) - -#: Python 3.4.x -is_py34 = (is_py3 and _ver[1] == 4) - -#: Python 2.7.x -is_py27 = (is_py2 and _ver[1] == 7) - -#: Python 2.6.x -is_py26 = (is_py2 and _ver[1] == 6) - -#: Python 2.5.x -is_py25 = (is_py2 and _ver[1] == 5) - -#: Python 2.4.x -is_py24 = (is_py2 and _ver[1] == 4) # I'm assuming this is not by choice. - - -# --------- -# Platforms -# --------- - - -# Syntax sugar. -_ver = sys.version.lower() - -is_pypy = ('pypy' in _ver) -is_jython = ('jython' in _ver) -is_ironpython = ('iron' in _ver) - -# Assume CPython, if nothing else. -is_cpython = not any((is_pypy, is_jython, is_ironpython)) - -# Windows-based system. -is_windows = 'win32' in str(sys.platform).lower() - -# Standard Linux 2+ system. -is_linux = ('linux' in str(sys.platform).lower()) -is_osx = ('darwin' in str(sys.platform).lower()) -is_hpux = ('hpux' in str(sys.platform).lower()) # Complete guess. -is_solaris = ('solar==' in str(sys.platform).lower()) # Complete guess. - try: import simplejson as json except (ImportError, SyntaxError): - # simplejson does not support Python 3.2, it thows a SyntaxError + # simplejson does not support Python 3.2, it throws a SyntaxError # because of u'...' Unicode literals. import json @@ -99,7 +47,6 @@ basestring = basestring numeric_types = (int, long, float) - elif is_py3: from urllib.parse import urlparse, urlunparse, urljoin, urlsplit, urlencode, quote, unquote, quote_plus, unquote_plus, urldefrag from urllib.request import parse_http_list, getproxies, proxy_bypass diff --git a/requests/cookies.py b/requests/cookies.py index 831c49c6d2..6969fe5cc4 100644 --- a/requests/cookies.py +++ b/requests/cookies.py @@ -157,26 +157,28 @@ class CookieConflictError(RuntimeError): class RequestsCookieJar(cookielib.CookieJar, collections.MutableMapping): - """Compatibility class; is a cookielib.CookieJar, but exposes a dict interface. + """Compatibility class; is a cookielib.CookieJar, but exposes a dict + interface. This is the CookieJar we create by default for requests and sessions that don't specify one, since some clients may expect response.cookies and session.cookies to support dict operations. - Don't use the dict interface internally; it's just for compatibility with - with external client code. All `requests` code should work out of the box - with externally provided instances of CookieJar, e.g., LWPCookieJar and - FileCookieJar. - - Caution: dictionary operations that are normally O(1) may be O(n). + Requests does not use the dict interface internally; it's just for + compatibility with external client code. All requests code should work + out of the box with externally provided instances of ``CookieJar``, e.g. + ``LWPCookieJar`` and ``FileCookieJar``. Unlike a regular CookieJar, this class is pickleable. - """ + .. warning:: dictionary operations that are normally O(1) may be O(n). + """ def get(self, name, default=None, domain=None, path=None): """Dict-like get() that also supports optional domain and path args in order to resolve naming collisions from using one cookie jar over - multiple domains. Caution: operation is O(n), not O(1).""" + multiple domains. + + .. warning:: operation is O(n), not O(1).""" try: return self._find_no_duplicates(name, domain, path) except KeyError: @@ -199,37 +201,38 @@ def set(self, name, value, **kwargs): return c def iterkeys(self): - """Dict-like iterkeys() that returns an iterator of names of cookies from the jar. - See itervalues() and iteritems().""" + """Dict-like iterkeys() that returns an iterator of names of cookies + from the jar. See itervalues() and iteritems().""" for cookie in iter(self): yield cookie.name def keys(self): - """Dict-like keys() that returns a list of names of cookies from the jar. - See values() and items().""" + """Dict-like keys() that returns a list of names of cookies from the + jar. See values() and items().""" return list(self.iterkeys()) def itervalues(self): - """Dict-like itervalues() that returns an iterator of values of cookies from the jar. - See iterkeys() and iteritems().""" + """Dict-like itervalues() that returns an iterator of values of cookies + from the jar. See iterkeys() and iteritems().""" for cookie in iter(self): yield cookie.value def values(self): - """Dict-like values() that returns a list of values of cookies from the jar. - See keys() and items().""" + """Dict-like values() that returns a list of values of cookies from the + jar. See keys() and items().""" return list(self.itervalues()) def iteritems(self): - """Dict-like iteritems() that returns an iterator of name-value tuples from the jar. - See iterkeys() and itervalues().""" + """Dict-like iteritems() that returns an iterator of name-value tuples + from the jar. See iterkeys() and itervalues().""" for cookie in iter(self): yield cookie.name, cookie.value def items(self): - """Dict-like items() that returns a list of name-value tuples from the jar. - See keys() and values(). Allows client-code to call "dict(RequestsCookieJar) - and get a vanilla python dict of key value pairs.""" + """Dict-like items() that returns a list of name-value tuples from the + jar. See keys() and values(). Allows client-code to call + ``dict(RequestsCookieJar)`` and get a vanilla python dict of key value + pairs.""" return list(self.iteritems()) def list_domains(self): @@ -259,8 +262,9 @@ def multiple_domains(self): return False # there is only one domain in jar def get_dict(self, domain=None, path=None): - """Takes as an argument an optional domain and path and returns a plain old - Python dict of name-value pairs of cookies that meet the requirements.""" + """Takes as an argument an optional domain and path and returns a plain + old Python dict of name-value pairs of cookies that meet the + requirements.""" dictionary = {} for cookie in iter(self): if (domain is None or cookie.domain == domain) and (path is None @@ -269,21 +273,24 @@ def get_dict(self, domain=None, path=None): return dictionary def __getitem__(self, name): - """Dict-like __getitem__() for compatibility with client code. Throws exception - if there are more than one cookie with name. In that case, use the more - explicit get() method instead. Caution: operation is O(n), not O(1).""" + """Dict-like __getitem__() for compatibility with client code. Throws + exception if there are more than one cookie with name. In that case, + use the more explicit get() method instead. + + .. warning:: operation is O(n), not O(1).""" return self._find_no_duplicates(name) def __setitem__(self, name, value): - """Dict-like __setitem__ for compatibility with client code. Throws exception - if there is already a cookie of that name in the jar. In that case, use the more - explicit set() method instead.""" + """Dict-like __setitem__ for compatibility with client code. Throws + exception if there is already a cookie of that name in the jar. In that + case, use the more explicit set() method instead.""" self.set(name, value) def __delitem__(self, name): - """Deletes a cookie given a name. Wraps cookielib.CookieJar's remove_cookie_by_name().""" + """Deletes a cookie given a name. Wraps ``cookielib.CookieJar``'s + ``remove_cookie_by_name()``.""" remove_cookie_by_name(self, name) def set_cookie(self, cookie, *args, **kwargs): @@ -300,10 +307,11 @@ def update(self, other): super(RequestsCookieJar, self).update(other) def _find(self, name, domain=None, path=None): - """Requests uses this method internally to get cookie values. Takes as args name - and optional domain and path. Returns a cookie.value. If there are conflicting cookies, - _find arbitrarily chooses one. See _find_no_duplicates if you want an exception thrown - if there are conflicting cookies.""" + """Requests uses this method internally to get cookie values. Takes as + args name and optional domain and path. Returns a cookie.value. If + there are conflicting cookies, _find arbitrarily chooses one. See + _find_no_duplicates if you want an exception thrown if there are + conflicting cookies.""" for cookie in iter(self): if cookie.name == name: if domain is None or cookie.domain == domain: @@ -313,10 +321,11 @@ def _find(self, name, domain=None, path=None): raise KeyError('name=%r, domain=%r, path=%r' % (name, domain, path)) def _find_no_duplicates(self, name, domain=None, path=None): - """__get_item__ and get call _find_no_duplicates -- never used in Requests internally. - Takes as args name and optional domain and path. Returns a cookie.value. - Throws KeyError if cookie is not found and CookieConflictError if there are - multiple cookies that match name and optionally domain and path.""" + """Both ``__get_item__`` and ``get`` call this function: it's never + used elsewhere in Requests. Takes as args name and optional domain and + path. Returns a cookie.value. Throws KeyError if cookie is not found + and CookieConflictError if there are multiple cookies that match name + and optionally domain and path.""" toReturn = None for cookie in iter(self): if cookie.name == name: @@ -440,7 +449,7 @@ def merge_cookies(cookiejar, cookies): """ if not isinstance(cookiejar, cookielib.CookieJar): raise ValueError('You can only merge into CookieJar') - + if isinstance(cookies, dict): cookiejar = cookiejar_from_dict( cookies, cookiejar=cookiejar, overwrite=False) diff --git a/requests/exceptions.py b/requests/exceptions.py index 34c7a0dbff..89135a802e 100644 --- a/requests/exceptions.py +++ b/requests/exceptions.py @@ -90,5 +90,10 @@ class ChunkedEncodingError(RequestException): class ContentDecodingError(RequestException, BaseHTTPError): """Failed to decode response content""" + class StreamConsumedError(RequestException, TypeError): """The content for this response was already consumed""" + + +class RetryError(RequestException): + """Custom retries logic failed""" diff --git a/requests/models.py b/requests/models.py index 17e5598848..419cf0a8b5 100644 --- a/requests/models.py +++ b/requests/models.py @@ -20,11 +20,10 @@ from .packages.urllib3.filepost import encode_multipart_formdata from .packages.urllib3.util import parse_url from .packages.urllib3.exceptions import ( - DecodeError, ReadTimeoutError, ProtocolError) + DecodeError, ReadTimeoutError, ProtocolError, LocationParseError) from .exceptions import ( - HTTPError, RequestException, MissingSchema, InvalidURL, - ChunkedEncodingError, ContentDecodingError, ConnectionError, - StreamConsumedError) + HTTPError, MissingSchema, InvalidURL, ChunkedEncodingError, + ContentDecodingError, ConnectionError, StreamConsumedError) from .utils import ( guess_filename, get_auth_from_url, requote_uri, stream_decode_response_unicode, to_key_val_list, parse_header_links, @@ -144,12 +143,13 @@ def _encode_files(files, data): else: fn = guess_filename(v) or k fp = v - if isinstance(fp, str): - fp = StringIO(fp) - if isinstance(fp, bytes): - fp = BytesIO(fp) - rf = RequestField(name=k, data=fp.read(), + if isinstance(fp, (str, bytes, bytearray)): + fdata = fp + else: + fdata = fp.read() + + rf = RequestField(name=k, data=fdata, filename=fn, headers=fh) rf.make_multipart(content_type=ft) new_fields.append(rf) @@ -351,7 +351,10 @@ def prepare_url(self, url, params): return # Support for unicode domain names and paths. - scheme, auth, host, port, path, query, fragment = parse_url(url) + try: + scheme, auth, host, port, path, query, fragment = parse_url(url) + except LocationParseError as e: + raise InvalidURL(*e.args) if not scheme: raise MissingSchema("Invalid URL {0!r}: No schema supplied. " @@ -472,7 +475,7 @@ def prepare_content_length(self, body): l = super_len(body) if l: self.headers['Content-Length'] = builtin_str(l) - elif self.method not in ('GET', 'HEAD'): + elif (self.method not in ('GET', 'HEAD')) and (self.headers.get('Content-Length') is None): self.headers['Content-Length'] = '0' def prepare_auth(self, auth, url=''): @@ -615,7 +618,7 @@ def __iter__(self): def ok(self): try: self.raise_for_status() - except RequestException: + except HTTPError: return False return True @@ -682,10 +685,12 @@ def generate(): return chunks - def iter_lines(self, chunk_size=ITER_CHUNK_SIZE, decode_unicode=None): + def iter_lines(self, chunk_size=ITER_CHUNK_SIZE, decode_unicode=None, delimiter=None): """Iterates over the response data, one line at a time. When stream=True is set on the request, this avoids reading the content at once into memory for large responses. + + .. note:: This method is not reentrant safe. """ pending = None @@ -694,7 +699,11 @@ def iter_lines(self, chunk_size=ITER_CHUNK_SIZE, decode_unicode=None): if pending is not None: chunk = pending + chunk - lines = chunk.splitlines() + + if delimiter: + lines = chunk.split(delimiter) + else: + lines = chunk.splitlines() if lines and lines[-1] and chunk and lines[-1][-1] == chunk[-1]: pending = lines.pop() diff --git a/requests/packages/__init__.py b/requests/packages/__init__.py index d62c4b7111..4dcf870f3b 100644 --- a/requests/packages/__init__.py +++ b/requests/packages/__init__.py @@ -1,3 +1,107 @@ +""" +Copyright (c) Donald Stufft, pip, and individual contributors + +Permission is hereby granted, free of charge, to any person obtaining +a copy of this software and associated documentation files (the +"Software"), to deal in the Software without restriction, including +without limitation the rights to use, copy, modify, merge, publish, +distribute, sublicense, and/or sell copies of the Software, and to +permit persons to whom the Software is furnished to do so, subject to +the following conditions: + +The above copyright notice and this permission notice shall be +included in all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, +EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF +MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND +NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE +LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION +OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION +WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. +""" from __future__ import absolute_import -from . import urllib3 +import sys + + +class VendorAlias(object): + + def __init__(self, package_names): + self._package_names = package_names + self._vendor_name = __name__ + self._vendor_pkg = self._vendor_name + "." + self._vendor_pkgs = [ + self._vendor_pkg + name for name in self._package_names + ] + + def find_module(self, fullname, path=None): + if fullname.startswith(self._vendor_pkg): + return self + + def load_module(self, name): + # Ensure that this only works for the vendored name + if not name.startswith(self._vendor_pkg): + raise ImportError( + "Cannot import %s, must be a subpackage of '%s'." % ( + name, self._vendor_name, + ) + ) + + if not (name == self._vendor_name or + any(name.startswith(pkg) for pkg in self._vendor_pkgs)): + raise ImportError( + "Cannot import %s, must be one of %s." % ( + name, self._vendor_pkgs + ) + ) + + # Check to see if we already have this item in sys.modules, if we do + # then simply return that. + if name in sys.modules: + return sys.modules[name] + + # Check to see if we can import the vendor name + try: + # We do this dance here because we want to try and import this + # module without hitting a recursion error because of a bunch of + # VendorAlias instances on sys.meta_path + real_meta_path = sys.meta_path[:] + try: + sys.meta_path = [ + m for m in sys.meta_path + if not isinstance(m, VendorAlias) + ] + __import__(name) + module = sys.modules[name] + finally: + # Re-add any additions to sys.meta_path that were made while + # during the import we just did, otherwise things like + # requests.packages.urllib3.poolmanager will fail. + for m in sys.meta_path: + if m not in real_meta_path: + real_meta_path.append(m) + + # Restore sys.meta_path with any new items. + sys.meta_path = real_meta_path + except ImportError: + # We can't import the vendor name, so we'll try to import the + # "real" name. + real_name = name[len(self._vendor_pkg):] + try: + __import__(real_name) + module = sys.modules[real_name] + except ImportError: + raise ImportError("No module named '%s'" % (name,)) + + # If we've gotten here we've found the module we're looking for, either + # as part of our vendored package, or as the real name, so we'll add + # it to sys.modules as the vendored name so that we don't have to do + # the lookup again. + sys.modules[name] = module + + # Finally, return the loaded module + return module + + +sys.meta_path.append(VendorAlias(["urllib3", "chardet"])) diff --git a/requests/packages/chardet/__init__.py b/requests/packages/chardet/__init__.py index e4f0799d62..82c2a48d29 100644 --- a/requests/packages/chardet/__init__.py +++ b/requests/packages/chardet/__init__.py @@ -15,7 +15,7 @@ # 02110-1301 USA ######################### END LICENSE BLOCK ######################### -__version__ = "2.2.1" +__version__ = "2.3.0" from sys import version_info diff --git a/requests/packages/chardet/chardetect.py b/requests/packages/chardet/chardetect.py index ecd0163be7..ffe892f25d 100755 --- a/requests/packages/chardet/chardetect.py +++ b/requests/packages/chardet/chardetect.py @@ -12,34 +12,68 @@ If no paths are provided, it takes its input from stdin. """ + +from __future__ import absolute_import, print_function, unicode_literals + +import argparse +import sys from io import open -from sys import argv, stdin +from chardet import __version__ from chardet.universaldetector import UniversalDetector -def description_of(file, name='stdin'): - """Return a string describing the probable encoding of a file.""" +def description_of(lines, name='stdin'): + """ + Return a string describing the probable encoding of a file or + list of strings. + + :param lines: The lines to get the encoding of. + :type lines: Iterable of bytes + :param name: Name of file or collection of lines + :type name: str + """ u = UniversalDetector() - for line in file: + for line in lines: u.feed(line) u.close() result = u.result if result['encoding']: - return '%s: %s with confidence %s' % (name, - result['encoding'], - result['confidence']) + return '{0}: {1} with confidence {2}'.format(name, result['encoding'], + result['confidence']) else: - return '%s: no result' % name + return '{0}: no result'.format(name) -def main(): - if len(argv) <= 1: - print(description_of(stdin)) - else: - for path in argv[1:]: - with open(path, 'rb') as f: - print(description_of(f, path)) +def main(argv=None): + ''' + Handles command line arguments and gets things started. + + :param argv: List of arguments, as if specified on the command-line. + If None, ``sys.argv[1:]`` is used instead. + :type argv: list of str + ''' + # Get command line arguments + parser = argparse.ArgumentParser( + description="Takes one or more file paths and reports their detected \ + encodings", + formatter_class=argparse.ArgumentDefaultsHelpFormatter, + conflict_handler='resolve') + parser.add_argument('input', + help='File whose encoding we would like to determine.', + type=argparse.FileType('rb'), nargs='*', + default=[sys.stdin]) + parser.add_argument('--version', action='version', + version='%(prog)s {0}'.format(__version__)) + args = parser.parse_args(argv) + + for f in args.input: + if f.isatty(): + print("You are running chardetect interactively. Press " + + "CTRL-D twice at the start of a blank line to signal the " + + "end of your input. If you want help, run chardetect " + + "--help\n", file=sys.stderr) + print(description_of(f, f.name)) if __name__ == '__main__': diff --git a/requests/packages/chardet/jpcntx.py b/requests/packages/chardet/jpcntx.py index f7f69ba4cd..59aeb6a878 100644 --- a/requests/packages/chardet/jpcntx.py +++ b/requests/packages/chardet/jpcntx.py @@ -177,6 +177,12 @@ def get_order(self, aBuf): return -1, 1 class SJISContextAnalysis(JapaneseContextAnalysis): + def __init__(self): + self.charset_name = "SHIFT_JIS" + + def get_charset_name(self): + return self.charset_name + def get_order(self, aBuf): if not aBuf: return -1, 1 @@ -184,6 +190,8 @@ def get_order(self, aBuf): first_char = wrap_ord(aBuf[0]) if ((0x81 <= first_char <= 0x9F) or (0xE0 <= first_char <= 0xFC)): charLen = 2 + if (first_char == 0x87) or (0xFA <= first_char <= 0xFC): + self.charset_name = "CP932" else: charLen = 1 diff --git a/requests/packages/chardet/latin1prober.py b/requests/packages/chardet/latin1prober.py index ad695f57a7..eef3573543 100644 --- a/requests/packages/chardet/latin1prober.py +++ b/requests/packages/chardet/latin1prober.py @@ -129,11 +129,11 @@ def get_confidence(self): if total < 0.01: confidence = 0.0 else: - confidence = ((self._mFreqCounter[3] / total) - - (self._mFreqCounter[1] * 20.0 / total)) + confidence = ((self._mFreqCounter[3] - self._mFreqCounter[1] * 20.0) + / total) if confidence < 0.0: confidence = 0.0 # lower the confidence of latin1 so that other more accurate # detector can take priority. - confidence = confidence * 0.5 + confidence = confidence * 0.73 return confidence diff --git a/requests/packages/chardet/mbcssm.py b/requests/packages/chardet/mbcssm.py index 3f93cfb045..efe678ca03 100644 --- a/requests/packages/chardet/mbcssm.py +++ b/requests/packages/chardet/mbcssm.py @@ -353,7 +353,7 @@ 2,2,2,2,2,2,2,2, # 68 - 6f 2,2,2,2,2,2,2,2, # 70 - 77 2,2,2,2,2,2,2,1, # 78 - 7f - 3,3,3,3,3,3,3,3, # 80 - 87 + 3,3,3,3,3,2,2,3, # 80 - 87 3,3,3,3,3,3,3,3, # 88 - 8f 3,3,3,3,3,3,3,3, # 90 - 97 3,3,3,3,3,3,3,3, # 98 - 9f @@ -369,9 +369,8 @@ 2,2,2,2,2,2,2,2, # d8 - df 3,3,3,3,3,3,3,3, # e0 - e7 3,3,3,3,3,4,4,4, # e8 - ef - 4,4,4,4,4,4,4,4, # f0 - f7 - 4,4,4,4,4,0,0,0 # f8 - ff -) + 3,3,3,3,3,3,3,3, # f0 - f7 + 3,3,3,3,3,0,0,0) # f8 - ff SJIS_st = ( @@ -571,5 +570,3 @@ 'stateTable': UTF8_st, 'charLenTable': UTF8CharLenTable, 'name': 'UTF-8'} - -# flake8: noqa diff --git a/requests/packages/chardet/sjisprober.py b/requests/packages/chardet/sjisprober.py index b173614e68..cd0e9e7078 100644 --- a/requests/packages/chardet/sjisprober.py +++ b/requests/packages/chardet/sjisprober.py @@ -47,7 +47,7 @@ def reset(self): self._mContextAnalyzer.reset() def get_charset_name(self): - return "SHIFT_JIS" + return self._mContextAnalyzer.get_charset_name() def feed(self, aBuf): aLen = len(aBuf) diff --git a/requests/packages/chardet/universaldetector.py b/requests/packages/chardet/universaldetector.py index 9a03ad3d89..476522b999 100644 --- a/requests/packages/chardet/universaldetector.py +++ b/requests/packages/chardet/universaldetector.py @@ -71,9 +71,9 @@ def feed(self, aBuf): if not self._mGotData: # If the data starts with BOM, we know it is UTF - if aBuf[:3] == codecs.BOM: + if aBuf[:3] == codecs.BOM_UTF8: # EF BB BF UTF-8 with BOM - self.result = {'encoding': "UTF-8", 'confidence': 1.0} + self.result = {'encoding': "UTF-8-SIG", 'confidence': 1.0} elif aBuf[:4] == codecs.BOM_UTF32_LE: # FF FE 00 00 UTF-32, little-endian BOM self.result = {'encoding': "UTF-32LE", 'confidence': 1.0} diff --git a/requests/packages/urllib3/__init__.py b/requests/packages/urllib3/__init__.py index 4b36b5aeeb..0660b9c83a 100644 --- a/requests/packages/urllib3/__init__.py +++ b/requests/packages/urllib3/__init__.py @@ -4,7 +4,7 @@ __author__ = 'Andrey Petrov (andrey.petrov@shazow.net)' __license__ = 'MIT' -__version__ = 'dev' +__version__ = '1.10.2' from .connectionpool import ( @@ -55,9 +55,9 @@ def add_stderr_logger(level=logging.DEBUG): del NullHandler -# Set security warning to only go off once by default. +# Set security warning to always go off by default. import warnings -warnings.simplefilter('module', exceptions.SecurityWarning) +warnings.simplefilter('always', exceptions.SecurityWarning) def disable_warnings(category=exceptions.HTTPWarning): """ diff --git a/requests/packages/urllib3/_collections.py b/requests/packages/urllib3/_collections.py index d77ebb8df7..cc424de0f4 100644 --- a/requests/packages/urllib3/_collections.py +++ b/requests/packages/urllib3/_collections.py @@ -1,7 +1,7 @@ from collections import Mapping, MutableMapping try: from threading import RLock -except ImportError: # Platform-specific: No threads available +except ImportError: # Platform-specific: No threads available class RLock: def __enter__(self): pass @@ -10,11 +10,11 @@ def __exit__(self, exc_type, exc_value, traceback): pass -try: # Python 2.7+ +try: # Python 2.7+ from collections import OrderedDict except ImportError: from .packages.ordered_dict import OrderedDict -from .packages.six import itervalues +from .packages.six import iterkeys, itervalues, PY3 __all__ = ['RecentlyUsedContainer', 'HTTPHeaderDict'] @@ -85,8 +85,7 @@ def __iter__(self): def clear(self): with self.lock: # Copy pointers to all values, then wipe the mapping - # under Python 2, this copies the list of values twice :-| - values = list(self._container.values()) + values = list(itervalues(self._container)) self._container.clear() if self.dispose_func: @@ -95,10 +94,17 @@ def clear(self): def keys(self): with self.lock: - return self._container.keys() + return list(iterkeys(self._container)) -class HTTPHeaderDict(MutableMapping): +_dict_setitem = dict.__setitem__ +_dict_getitem = dict.__getitem__ +_dict_delitem = dict.__delitem__ +_dict_contains = dict.__contains__ +_dict_setdefault = dict.setdefault + + +class HTTPHeaderDict(dict): """ :param headers: An iterable of field-value pairs. Must not contain multiple field names @@ -130,25 +136,75 @@ class HTTPHeaderDict(MutableMapping): 'foo=bar, baz=quxx' >>> headers['Content-Length'] '7' - - If you want to access the raw headers with their original casing - for debugging purposes you can access the private ``._data`` attribute - which is a normal python ``dict`` that maps the case-insensitive key to a - list of tuples stored as (case-sensitive-original-name, value). Using the - structure from above as our example: - - >>> headers._data - {'set-cookie': [('Set-Cookie', 'foo=bar'), ('set-cookie', 'baz=quxx')], - 'content-length': [('content-length', '7')]} """ def __init__(self, headers=None, **kwargs): - self._data = {} - if headers is None: - headers = {} - self.update(headers, **kwargs) + dict.__init__(self) + if headers is not None: + if isinstance(headers, HTTPHeaderDict): + self._copy_from(headers) + else: + self.extend(headers) + if kwargs: + self.extend(kwargs) + + def __setitem__(self, key, val): + return _dict_setitem(self, key.lower(), (key, val)) + + def __getitem__(self, key): + val = _dict_getitem(self, key.lower()) + return ', '.join(val[1:]) + + def __delitem__(self, key): + return _dict_delitem(self, key.lower()) - def add(self, key, value): + def __contains__(self, key): + return _dict_contains(self, key.lower()) + + def __eq__(self, other): + if not isinstance(other, Mapping) and not hasattr(other, 'keys'): + return False + if not isinstance(other, type(self)): + other = type(self)(other) + return dict((k1, self[k1]) for k1 in self) == dict((k2, other[k2]) for k2 in other) + + def __ne__(self, other): + return not self.__eq__(other) + + values = MutableMapping.values + get = MutableMapping.get + update = MutableMapping.update + + if not PY3: # Python 2 + iterkeys = MutableMapping.iterkeys + itervalues = MutableMapping.itervalues + + __marker = object() + + def pop(self, key, default=__marker): + '''D.pop(k[,d]) -> v, remove specified key and return the corresponding value. + If key is not found, d is returned if given, otherwise KeyError is raised. + ''' + # Using the MutableMapping function directly fails due to the private marker. + # Using ordinary dict.pop would expose the internal structures. + # So let's reinvent the wheel. + try: + value = self[key] + except KeyError: + if default is self.__marker: + raise + return default + else: + del self[key] + return value + + def discard(self, key): + try: + del self[key] + except KeyError: + pass + + def add(self, key, val): """Adds a (name, value) pair, doesn't overwrite the value if it already exists. @@ -157,43 +213,108 @@ def add(self, key, value): >>> headers['foo'] 'bar, baz' """ - self._data.setdefault(key.lower(), []).append((key, value)) + key_lower = key.lower() + new_vals = key, val + # Keep the common case aka no item present as fast as possible + vals = _dict_setdefault(self, key_lower, new_vals) + if new_vals is not vals: + # new_vals was not inserted, as there was a previous one + if isinstance(vals, list): + # If already several items got inserted, we have a list + vals.append(val) + else: + # vals should be a tuple then, i.e. only one item so far + # Need to convert the tuple to list for further extension + _dict_setitem(self, key_lower, [vals[0], vals[1], val]) + + def extend(*args, **kwargs): + """Generic import function for any type of header-like object. + Adapted version of MutableMapping.update in order to insert items + with self.add instead of self.__setitem__ + """ + if len(args) > 2: + raise TypeError("update() takes at most 2 positional " + "arguments ({} given)".format(len(args))) + elif not args: + raise TypeError("update() takes at least 1 argument (0 given)") + self = args[0] + other = args[1] if len(args) >= 2 else () + + if isinstance(other, Mapping): + for key in other: + self.add(key, other[key]) + elif hasattr(other, "keys"): + for key in other.keys(): + self.add(key, other[key]) + else: + for key, value in other: + self.add(key, value) + + for key, value in kwargs.items(): + self.add(key, value) def getlist(self, key): """Returns a list of all the values for the named field. Returns an empty list if the key doesn't exist.""" - return self[key].split(', ') if key in self else [] - - def copy(self): - h = HTTPHeaderDict() - for key in self._data: - for rawkey, value in self._data[key]: - h.add(rawkey, value) - return h - - def __eq__(self, other): - if not isinstance(other, Mapping): - return False - other = HTTPHeaderDict(other) - return dict((k1, self[k1]) for k1 in self._data) == \ - dict((k2, other[k2]) for k2 in other._data) - - def __getitem__(self, key): - values = self._data[key.lower()] - return ', '.join(value[1] for value in values) - - def __setitem__(self, key, value): - self._data[key.lower()] = [(key, value)] + try: + vals = _dict_getitem(self, key.lower()) + except KeyError: + return [] + else: + if isinstance(vals, tuple): + return [vals[1]] + else: + return vals[1:] + + # Backwards compatibility for httplib + getheaders = getlist + getallmatchingheaders = getlist + iget = getlist - def __delitem__(self, key): - del self._data[key.lower()] + def __repr__(self): + return "%s(%s)" % (type(self).__name__, dict(self.itermerged())) - def __len__(self): - return len(self._data) + def _copy_from(self, other): + for key in other: + val = _dict_getitem(other, key) + if isinstance(val, list): + # Don't need to convert tuples + val = list(val) + _dict_setitem(self, key, val) - def __iter__(self): - for headers in itervalues(self._data): - yield headers[0][0] - - def __repr__(self): - return '%s(%r)' % (self.__class__.__name__, dict(self.items())) + def copy(self): + clone = type(self)() + clone._copy_from(self) + return clone + + def iteritems(self): + """Iterate over all header lines, including duplicate ones.""" + for key in self: + vals = _dict_getitem(self, key) + for val in vals[1:]: + yield vals[0], val + + def itermerged(self): + """Iterate over all headers, merging duplicate ones together.""" + for key in self: + val = _dict_getitem(self, key) + yield val[0], ', '.join(val[1:]) + + def items(self): + return list(self.iteritems()) + + @classmethod + def from_httplib(cls, message, duplicates=('set-cookie',)): # Python 2 + """Read headers from a Python 2 httplib message object.""" + ret = cls(message.items()) + # ret now contains only the last header line for each duplicate. + # Importing with all duplicates would be nice, but this would + # mean to repeat most of the raw parsing already done, when the + # message object was created. Extracting only the headers of interest + # separately, the cookies, should be faster and requires less + # extra code. + for key in duplicates: + ret.discard(key) + for val in message.getheaders(key): + ret.add(key, val) + return ret diff --git a/requests/packages/urllib3/connection.py b/requests/packages/urllib3/connection.py index c6e1959a2f..e5de769d8c 100644 --- a/requests/packages/urllib3/connection.py +++ b/requests/packages/urllib3/connection.py @@ -3,6 +3,7 @@ import socket from socket import timeout as SocketTimeout import warnings +from .packages import six try: # Python 3 from http.client import HTTPConnection as _HTTPConnection, HTTPException @@ -26,12 +27,20 @@ class BaseSSLError(BaseException): pass +try: # Python 3: + # Not a no-op, we're adding this to the namespace so it can be imported. + ConnectionError = ConnectionError +except NameError: # Python 2: + class ConnectionError(Exception): + pass + + from .exceptions import ( ConnectTimeoutError, SystemTimeWarning, + SecurityWarning, ) from .packages.ssl_match_hostname import match_hostname -from .packages import six from .util.ssl_ import ( resolve_cert_reqs, @@ -40,8 +49,8 @@ class BaseSSLError(BaseException): assert_fingerprint, ) -from .util import connection +from .util import connection port_by_scheme = { 'http': 80, @@ -233,8 +242,15 @@ def connect(self): self.assert_fingerprint) elif resolved_cert_reqs != ssl.CERT_NONE \ and self.assert_hostname is not False: - match_hostname(self.sock.getpeercert(), - self.assert_hostname or hostname) + cert = self.sock.getpeercert() + if not cert.get('subjectAltName', ()): + warnings.warn(( + 'Certificate has no `subjectAltName`, falling back to check for a `commonName` for now. ' + 'This feature is being removed by major browsers and deprecated by RFC 2818. ' + '(See https://github.com/shazow/urllib3/issues/497 for details.)'), + SecurityWarning + ) + match_hostname(cert, self.assert_hostname or hostname) self.is_verified = (resolved_cert_reqs == ssl.CERT_REQUIRED or self.assert_fingerprint is not None) diff --git a/requests/packages/urllib3/connectionpool.py b/requests/packages/urllib3/connectionpool.py index 9cc2a95541..0085345c43 100644 --- a/requests/packages/urllib3/connectionpool.py +++ b/requests/packages/urllib3/connectionpool.py @@ -32,7 +32,7 @@ port_by_scheme, DummyConnection, HTTPConnection, HTTPSConnection, VerifiedHTTPSConnection, - HTTPException, BaseSSLError, + HTTPException, BaseSSLError, ConnectionError ) from .request import RequestMethods from .response import HTTPResponse @@ -72,6 +72,21 @@ def __str__(self): return '%s(host=%r, port=%r)' % (type(self).__name__, self.host, self.port) + def __enter__(self): + return self + + def __exit__(self, exc_type, exc_val, exc_tb): + self.close() + # Return False to re-raise any potential exceptions + return False + + def close(): + """ + Close all pooled connections and disable the pool. + """ + pass + + # This is taken from http://hg.python.org/cpython/file/7aaba721ebc0/Lib/socket.py#l252 _blocking_errnos = set([errno.EAGAIN, errno.EWOULDBLOCK]) @@ -266,6 +281,10 @@ def _validate_conn(self, conn): """ pass + def _prepare_proxy(self, conn): + # Nothing to do for HTTP connections. + pass + def _get_timeout(self, timeout): """ Helper that always returns a :class:`urllib3.util.Timeout` """ if timeout is _Default: @@ -278,6 +297,23 @@ def _get_timeout(self, timeout): # can be removed later return Timeout.from_float(timeout) + def _raise_timeout(self, err, url, timeout_value): + """Is the error actually a timeout? Will raise a ReadTimeout or pass""" + + if isinstance(err, SocketTimeout): + raise ReadTimeoutError(self, url, "Read timed out. (read timeout=%s)" % timeout_value) + + # See the above comment about EAGAIN in Python 3. In Python 2 we have + # to specifically catch it and throw the timeout error + if hasattr(err, 'errno') and err.errno in _blocking_errnos: + raise ReadTimeoutError(self, url, "Read timed out. (read timeout=%s)" % timeout_value) + + # Catch possible read timeouts thrown as SSL errors. If not the + # case, rethrow the original. We need to do this because of: + # http://bugs.python.org/issue10272 + if 'timed out' in str(err) or 'did not complete (read)' in str(err): # Python 2.6 + raise ReadTimeoutError(self, url, "Read timed out. (read timeout=%s)" % timeout_value) + def _make_request(self, conn, method, url, timeout=_Default, **httplib_request_kw): """ @@ -301,7 +337,12 @@ def _make_request(self, conn, method, url, timeout=_Default, conn.timeout = timeout_obj.connect_timeout # Trigger any extra validation we need to do. - self._validate_conn(conn) + try: + self._validate_conn(conn) + except (SocketTimeout, BaseSSLError) as e: + # Py2 raises this as a BaseSSLError, Py3 raises it as socket timeout. + self._raise_timeout(err=e, url=url, timeout_value=conn.timeout) + raise # conn.request() calls httplib.*.request, not the method in # urllib3.request. It also calls makefile (recv) on the socket. @@ -327,32 +368,12 @@ def _make_request(self, conn, method, url, timeout=_Default, # Receive the response from the server try: - try: # Python 2.7+, use buffering of HTTP responses + try: # Python 2.7, use buffering of HTTP responses httplib_response = conn.getresponse(buffering=True) except TypeError: # Python 2.6 and older httplib_response = conn.getresponse() - except SocketTimeout: - raise ReadTimeoutError( - self, url, "Read timed out. (read timeout=%s)" % read_timeout) - - except BaseSSLError as e: - # Catch possible read timeouts thrown as SSL errors. If not the - # case, rethrow the original. We need to do this because of: - # http://bugs.python.org/issue10272 - if 'timed out' in str(e) or \ - 'did not complete (read)' in str(e): # Python 2.6 - raise ReadTimeoutError( - self, url, "Read timed out. (read timeout=%s)" % read_timeout) - - raise - - except SocketError as e: # Platform-specific: Python 2 - # See the above comment about EAGAIN in Python 3. In Python 2 we - # have to specifically catch it and throw the timeout error - if e.errno in _blocking_errnos: - raise ReadTimeoutError( - self, url, "Read timed out. (read timeout=%s)" % read_timeout) - + except (SocketTimeout, BaseSSLError, SocketError) as e: + self._raise_timeout(err=e, url=url, timeout_value=read_timeout) raise # AppEngine doesn't have a version attr. @@ -508,11 +529,18 @@ def urlopen(self, method, url, body=None, headers=None, retries=None, try: # Request a connection from the queue. + timeout_obj = self._get_timeout(timeout) conn = self._get_conn(timeout=pool_timeout) + conn.timeout = timeout_obj.connect_timeout + + is_new_proxy_conn = self.proxy is not None and not getattr(conn, 'sock', None) + if is_new_proxy_conn: + self._prepare_proxy(conn) + # Make the request on the httplib connection object. httplib_response = self._make_request(conn, method, url, - timeout=timeout, + timeout=timeout_obj, body=body, headers=headers) # If we're going to release the connection in ``finally:``, then @@ -537,26 +565,36 @@ def urlopen(self, method, url, body=None, headers=None, retries=None, raise EmptyPoolError(self, "No pool connections are available.") except (BaseSSLError, CertificateError) as e: - # Release connection unconditionally because there is no way to - # close it externally in case of exception. - release_conn = True + # Close the connection. If a connection is reused on which there + # was a Certificate error, the next request will certainly raise + # another Certificate error. + if conn: + conn.close() + conn = None raise SSLError(e) - except (TimeoutError, HTTPException, SocketError) as e: + except SSLError: + # Treat SSLError separately from BaseSSLError to preserve + # traceback. + if conn: + conn.close() + conn = None + raise + + except (TimeoutError, HTTPException, SocketError, ConnectionError) as e: if conn: # Discard the connection for these exceptions. It will be # be replaced during the next _get_conn() call. conn.close() conn = None - stacktrace = sys.exc_info()[2] if isinstance(e, SocketError) and self.proxy: e = ProxyError('Cannot connect to proxy.', e) elif isinstance(e, (SocketError, HTTPException)): e = ProtocolError('Connection aborted.', e) - retries = retries.increment(method, url, error=e, - _pool=self, _stacktrace=stacktrace) + retries = retries.increment(method, url, error=e, _pool=self, + _stacktrace=sys.exc_info()[2]) retries.sleep() # Keep track of the error for the retry warning. @@ -668,23 +706,25 @@ def _prepare_conn(self, conn): assert_fingerprint=self.assert_fingerprint) conn.ssl_version = self.ssl_version - if self.proxy is not None: - # Python 2.7+ - try: - set_tunnel = conn.set_tunnel - except AttributeError: # Platform-specific: Python 2.6 - set_tunnel = conn._set_tunnel + return conn - if sys.version_info <= (2, 6, 4) and not self.proxy_headers: # Python 2.6.4 and older - set_tunnel(self.host, self.port) - else: - set_tunnel(self.host, self.port, self.proxy_headers) + def _prepare_proxy(self, conn): + """ + Establish tunnel connection early, because otherwise httplib + would improperly set Host: header to proxy's IP:port. + """ + # Python 2.7+ + try: + set_tunnel = conn.set_tunnel + except AttributeError: # Platform-specific: Python 2.6 + set_tunnel = conn._set_tunnel - # Establish tunnel connection early, because otherwise httplib - # would improperly set Host: header to proxy's IP:port. - conn.connect() + if sys.version_info <= (2, 6, 4) and not self.proxy_headers: # Python 2.6.4 and older + set_tunnel(self.host, self.port) + else: + set_tunnel(self.host, self.port, self.proxy_headers) - return conn + conn.connect() def _new_conn(self): """ @@ -725,8 +765,7 @@ def _validate_conn(self, conn): warnings.warn(( 'Unverified HTTPS request is being made. ' 'Adding certificate verification is strongly advised. See: ' - 'https://urllib3.readthedocs.org/en/latest/security.html ' - '(This warning will only appear once by default.)'), + 'https://urllib3.readthedocs.org/en/latest/security.html'), InsecureRequestWarning) diff --git a/requests/packages/urllib3/contrib/pyopenssl.py b/requests/packages/urllib3/contrib/pyopenssl.py index b19dbe05fc..7eabd22d52 100644 --- a/requests/packages/urllib3/contrib/pyopenssl.py +++ b/requests/packages/urllib3/contrib/pyopenssl.py @@ -29,7 +29,7 @@ when the required modules are installed. Activating this module also has the positive side effect of disabling SSL/TLS -encryption in Python 2 (see `CRIME attack`_). +compression in Python 2 (see `CRIME attack`_). If you want to configure the default list of supported cipher suites, you can set the ``urllib3.contrib.pyopenssl.DEFAULT_SSL_CIPHER_LIST`` variable. @@ -71,9 +71,14 @@ # Map from urllib3 to PyOpenSSL compatible parameter-values. _openssl_versions = { ssl.PROTOCOL_SSLv23: OpenSSL.SSL.SSLv23_METHOD, - ssl.PROTOCOL_SSLv3: OpenSSL.SSL.SSLv3_METHOD, ssl.PROTOCOL_TLSv1: OpenSSL.SSL.TLSv1_METHOD, } + +try: + _openssl_versions.update({ssl.PROTOCOL_SSLv3: OpenSSL.SSL.SSLv3_METHOD}) +except AttributeError: + pass + _openssl_verify = { ssl.CERT_NONE: OpenSSL.SSL.VERIFY_NONE, ssl.CERT_OPTIONAL: OpenSSL.SSL.VERIFY_PEER, @@ -188,9 +193,12 @@ def recv(self, *args, **kwargs): if self.suppress_ragged_eofs and e.args == (-1, 'Unexpected EOF'): return b'' else: - raise ssl.SSLError(e) - except OpenSSL.SSL.ZeroReturnError: - return b'' + raise + except OpenSSL.SSL.ZeroReturnError as e: + if self.connection.get_shutdown() == OpenSSL.SSL.RECEIVED_SHUTDOWN: + return b'' + else: + raise except OpenSSL.SSL.WantReadError: rd, wd, ed = select.select( [self.socket], [], [], self.socket.gettimeout()) @@ -204,11 +212,21 @@ def recv(self, *args, **kwargs): def settimeout(self, timeout): return self.socket.settimeout(timeout) + def _send_until_done(self, data): + while True: + try: + return self.connection.send(data) + except OpenSSL.SSL.WantWriteError: + _, wlist, _ = select.select([], [self.socket], [], + self.socket.gettimeout()) + if not wlist: + raise timeout() + continue + def sendall(self, data): - try: - return self.connection.sendall(data) - except OpenSSL.SSL.SysCallError as e: - raise ssl.SSLError(e) + while len(data): + sent = self._send_until_done(data) + data = data[sent:] def close(self): if self._makefile_refs < 1: @@ -256,6 +274,7 @@ def ssl_wrap_socket(sock, keyfile=None, certfile=None, cert_reqs=None, ssl_version=None): ctx = OpenSSL.SSL.Context(_openssl_versions[ssl_version]) if certfile: + keyfile = keyfile or certfile # Match behaviour of the normal python ssl library ctx.use_certificate_file(certfile) if keyfile: ctx.use_privatekey_file(keyfile) diff --git a/requests/packages/urllib3/exceptions.py b/requests/packages/urllib3/exceptions.py index 7519ba9805..5d52301122 100644 --- a/requests/packages/urllib3/exceptions.py +++ b/requests/packages/urllib3/exceptions.py @@ -72,11 +72,8 @@ class MaxRetryError(RequestError): def __init__(self, pool, url, reason=None): self.reason = reason - message = "Max retries exceeded with url: %s" % url - if reason: - message += " (Caused by %r)" % reason - else: - message += " (Caused by redirect)" + message = "Max retries exceeded with url: %s (Caused by %r)" % ( + url, reason) RequestError.__init__(self, pool, url, message) @@ -141,6 +138,12 @@ def __init__(self, location): self.location = location +class ResponseError(HTTPError): + "Used as a container for an error reason supplied in a MaxRetryError." + GENERIC_ERROR = 'too many error responses' + SPECIFIC_ERROR = 'too many {status_code} error responses' + + class SecurityWarning(HTTPWarning): "Warned when perfoming security reducing actions" pass @@ -154,3 +157,8 @@ class InsecureRequestWarning(SecurityWarning): class SystemTimeWarning(SecurityWarning): "Warned when system time is suspected to be wrong" pass + + +class InsecurePlatformWarning(SecurityWarning): + "Warned when certain SSL configuration is not available on a platform." + pass diff --git a/requests/packages/urllib3/poolmanager.py b/requests/packages/urllib3/poolmanager.py index 515dc96219..b8d1e745d1 100644 --- a/requests/packages/urllib3/poolmanager.py +++ b/requests/packages/urllib3/poolmanager.py @@ -8,7 +8,7 @@ from ._collections import RecentlyUsedContainer from .connectionpool import HTTPConnectionPool, HTTPSConnectionPool from .connectionpool import port_by_scheme -from .exceptions import LocationValueError +from .exceptions import LocationValueError, MaxRetryError from .request import RequestMethods from .util.url import parse_url from .util.retry import Retry @@ -64,6 +64,14 @@ def __init__(self, num_pools=10, headers=None, **connection_pool_kw): self.pools = RecentlyUsedContainer(num_pools, dispose_func=lambda p: p.close()) + def __enter__(self): + return self + + def __exit__(self, exc_type, exc_val, exc_tb): + self.clear() + # Return False to re-raise any potential exceptions + return False + def _new_pool(self, scheme, host, port): """ Create a new :class:`ConnectionPool` based on host, port and scheme. @@ -167,7 +175,14 @@ def urlopen(self, method, url, redirect=True, **kw): if not isinstance(retries, Retry): retries = Retry.from_int(retries, redirect=redirect) - kw['retries'] = retries.increment(method, redirect_location) + try: + retries = retries.increment(method, url, response=response, _pool=conn) + except MaxRetryError: + if retries.raise_on_redirect: + raise + return response + + kw['retries'] = retries kw['redirect'] = redirect log.info("Redirecting %s -> %s" % (url, redirect_location)) diff --git a/requests/packages/urllib3/request.py b/requests/packages/urllib3/request.py index 51fe2386b7..b08d6c9274 100644 --- a/requests/packages/urllib3/request.py +++ b/requests/packages/urllib3/request.py @@ -118,18 +118,24 @@ def request_encode_body(self, method, url, fields=None, headers=None, which is used to compose the body of the request. The random boundary string can be explicitly set with the ``multipart_boundary`` parameter. """ - if encode_multipart: - body, content_type = encode_multipart_formdata( - fields or {}, boundary=multipart_boundary) - else: - body, content_type = (urlencode(fields or {}), - 'application/x-www-form-urlencoded') - if headers is None: headers = self.headers - headers_ = {'Content-Type': content_type} - headers_.update(headers) + extra_kw = {'headers': {}} + + if fields: + if 'body' in urlopen_kw: + raise TypeError('request got values for both \'fields\' and \'body\', can only specify one.') + + if encode_multipart: + body, content_type = encode_multipart_formdata(fields, boundary=multipart_boundary) + else: + body, content_type = urlencode(fields), 'application/x-www-form-urlencoded' + + extra_kw['body'] = body + extra_kw['headers'] = {'Content-Type': content_type} + + extra_kw['headers'].update(headers) + extra_kw.update(urlopen_kw) - return self.urlopen(method, url, body=body, headers=headers_, - **urlopen_kw) + return self.urlopen(method, url, **extra_kw) diff --git a/requests/packages/urllib3/response.py b/requests/packages/urllib3/response.py index e69de95733..34cd3d7057 100644 --- a/requests/packages/urllib3/response.py +++ b/requests/packages/urllib3/response.py @@ -4,12 +4,11 @@ from ._collections import HTTPHeaderDict from .exceptions import ProtocolError, DecodeError, ReadTimeoutError -from .packages.six import string_types as basestring, binary_type +from .packages.six import string_types as basestring, binary_type, PY3 from .connection import HTTPException, BaseSSLError from .util.response import is_fp_closed - class DeflateDecoder(object): def __init__(self): @@ -21,6 +20,9 @@ def __getattr__(self, name): return getattr(self._obj, name) def decompress(self, data): + if not data: + return data + if not self._first_try: return self._obj.decompress(data) @@ -36,9 +38,23 @@ def decompress(self, data): self._data = None +class GzipDecoder(object): + + def __init__(self): + self._obj = zlib.decompressobj(16 + zlib.MAX_WBITS) + + def __getattr__(self, name): + return getattr(self._obj, name) + + def decompress(self, data): + if not data: + return data + return self._obj.decompress(data) + + def _get_decoder(mode): if mode == 'gzip': - return zlib.decompressobj(16 + zlib.MAX_WBITS) + return GzipDecoder() return DeflateDecoder() @@ -76,9 +92,10 @@ def __init__(self, body='', headers=None, status=0, version=0, reason=None, strict=0, preload_content=True, decode_content=True, original_response=None, pool=None, connection=None): - self.headers = HTTPHeaderDict() - if headers: - self.headers.update(headers) + if isinstance(headers, HTTPHeaderDict): + self.headers = headers + else: + self.headers = HTTPHeaderDict(headers) self.status = status self.version = version self.reason = reason @@ -202,7 +219,7 @@ def read(self, amt=None, decode_content=None, cache_content=False): except BaseSSLError as e: # FIXME: Is there a better way to differentiate between SSLErrors? - if not 'read operation timed out' in str(e): # Defensive: + if 'read operation timed out' not in str(e): # Defensive: # This shouldn't happen but just in case we're missing an edge # case, let's avoid swallowing SSL errors. raise @@ -267,14 +284,16 @@ def from_httplib(ResponseCls, r, **response_kw): Remaining parameters are passed to the HTTPResponse constructor, along with ``original_response=r``. """ - - headers = HTTPHeaderDict() - for k, v in r.getheaders(): - headers.add(k, v) + headers = r.msg + if not isinstance(headers, HTTPHeaderDict): + if PY3: # Python 3 + headers = HTTPHeaderDict(headers.items()) + else: # Python 2 + headers = HTTPHeaderDict.from_httplib(headers) # HTTPResponse objects in Python 3 don't have a .strict attribute strict = getattr(r, 'strict', 0) - return ResponseCls(body=r, + resp = ResponseCls(body=r, headers=headers, status=r.status, version=r.version, @@ -282,6 +301,7 @@ def from_httplib(ResponseCls, r, **response_kw): strict=strict, original_response=r, **response_kw) + return resp # Backwards-compatibility methods for httplib.HTTPResponse def getheaders(self): diff --git a/requests/packages/urllib3/util/connection.py b/requests/packages/urllib3/util/connection.py index 2156993a0c..859aec6ee6 100644 --- a/requests/packages/urllib3/util/connection.py +++ b/requests/packages/urllib3/util/connection.py @@ -82,6 +82,7 @@ def create_connection(address, timeout=socket._GLOBAL_DEFAULT_TIMEOUT, err = _ if sock is not None: sock.close() + sock = None if err is not None: raise err diff --git a/requests/packages/urllib3/util/retry.py b/requests/packages/urllib3/util/retry.py index eb560dfc08..7e0959df37 100644 --- a/requests/packages/urllib3/util/retry.py +++ b/requests/packages/urllib3/util/retry.py @@ -2,10 +2,11 @@ import logging from ..exceptions import ( - ProtocolError, ConnectTimeoutError, - ReadTimeoutError, MaxRetryError, + ProtocolError, + ReadTimeoutError, + ResponseError, ) from ..packages import six @@ -36,7 +37,6 @@ class Retry(object): Errors will be wrapped in :class:`~urllib3.exceptions.MaxRetryError` unless retries are disabled, in which case the causing exception will be raised. - :param int total: Total number of retries to allow. Takes precedence over other counts. @@ -184,13 +184,13 @@ def _is_connection_error(self, err): return isinstance(err, ConnectTimeoutError) def _is_read_error(self, err): - """ Errors that occur after the request has been started, so we can't - assume that the server did not process any of it. + """ Errors that occur after the request has been started, so we should + assume that the server began processing it. """ return isinstance(err, (ReadTimeoutError, ProtocolError)) def is_forced_retry(self, method, status_code): - """ Is this method/response retryable? (Based on method/codes whitelists) + """ Is this method/status code retryable? (Based on method/codes whitelists) """ if self.method_whitelist and method.upper() not in self.method_whitelist: return False @@ -198,8 +198,7 @@ def is_forced_retry(self, method, status_code): return self.status_forcelist and status_code in self.status_forcelist def is_exhausted(self): - """ Are we out of retries? - """ + """ Are we out of retries? """ retry_counts = (self.total, self.connect, self.read, self.redirect) retry_counts = list(filter(None, retry_counts)) if not retry_counts: @@ -230,6 +229,7 @@ def increment(self, method=None, url=None, response=None, error=None, _pool=None connect = self.connect read = self.read redirect = self.redirect + cause = 'unknown' if error and self._is_connection_error(error): # Connect retry? @@ -251,10 +251,16 @@ def increment(self, method=None, url=None, response=None, error=None, _pool=None # Redirect retry? if redirect is not None: redirect -= 1 + cause = 'too many redirects' else: - # FIXME: Nothing changed, scenario doesn't make sense. + # Incrementing because of a server error like a 500 in + # status_forcelist and a the given method is in the whitelist _observed_errors += 1 + cause = ResponseError.GENERIC_ERROR + if response and response.status: + cause = ResponseError.SPECIFIC_ERROR.format( + status_code=response.status) new_retry = self.new( total=total, @@ -262,7 +268,7 @@ def increment(self, method=None, url=None, response=None, error=None, _pool=None _observed_errors=_observed_errors) if new_retry.is_exhausted(): - raise MaxRetryError(_pool, url, error) + raise MaxRetryError(_pool, url, error or ResponseError(cause)) log.debug("Incremented Retry for (url='%s'): %r" % (url, new_retry)) diff --git a/requests/packages/urllib3/util/ssl_.py b/requests/packages/urllib3/util/ssl_.py index 9cfe2d2afb..e7e7dfae18 100644 --- a/requests/packages/urllib3/util/ssl_.py +++ b/requests/packages/urllib3/util/ssl_.py @@ -1,21 +1,96 @@ from binascii import hexlify, unhexlify -from hashlib import md5, sha1 +from hashlib import md5, sha1, sha256 -from ..exceptions import SSLError +from ..exceptions import SSLError, InsecurePlatformWarning -try: # Test for SSL features - SSLContext = None - HAS_SNI = False +SSLContext = None +HAS_SNI = False +create_default_context = None + +import errno +import ssl +import warnings - import ssl +try: # Test for SSL features from ssl import wrap_socket, CERT_NONE, PROTOCOL_SSLv23 - from ssl import SSLContext # Modern SSL? from ssl import HAS_SNI # Has SNI? except ImportError: pass +try: + from ssl import OP_NO_SSLv2, OP_NO_SSLv3, OP_NO_COMPRESSION +except ImportError: + OP_NO_SSLv2, OP_NO_SSLv3 = 0x1000000, 0x2000000 + OP_NO_COMPRESSION = 0x20000 + +try: + from ssl import _DEFAULT_CIPHERS +except ImportError: + _DEFAULT_CIPHERS = ( + 'ECDH+AESGCM:DH+AESGCM:ECDH+AES256:DH+AES256:ECDH+AES128:DH+AES:ECDH+HIGH:' + 'DH+HIGH:ECDH+3DES:DH+3DES:RSA+AESGCM:RSA+AES:RSA+HIGH:RSA+3DES:!aNULL:' + '!eNULL:!MD5' + ) + +try: + from ssl import SSLContext # Modern SSL? +except ImportError: + import sys + + class SSLContext(object): # Platform-specific: Python 2 & 3.1 + supports_set_ciphers = sys.version_info >= (2, 7) + + def __init__(self, protocol_version): + self.protocol = protocol_version + # Use default values from a real SSLContext + self.check_hostname = False + self.verify_mode = ssl.CERT_NONE + self.ca_certs = None + self.options = 0 + self.certfile = None + self.keyfile = None + self.ciphers = None + + def load_cert_chain(self, certfile, keyfile): + self.certfile = certfile + self.keyfile = keyfile + + def load_verify_locations(self, location): + self.ca_certs = location + + def set_ciphers(self, cipher_suite): + if not self.supports_set_ciphers: + raise TypeError( + 'Your version of Python does not support setting ' + 'a custom cipher suite. Please upgrade to Python ' + '2.7, 3.2, or later if you need this functionality.' + ) + self.ciphers = cipher_suite + + def wrap_socket(self, socket, server_hostname=None): + warnings.warn( + 'A true SSLContext object is not available. This prevents ' + 'urllib3 from configuring SSL appropriately and may cause ' + 'certain SSL connections to fail. For more information, see ' + 'https://urllib3.readthedocs.org/en/latest/security.html' + '#insecureplatformwarning.', + InsecurePlatformWarning + ) + kwargs = { + 'keyfile': self.keyfile, + 'certfile': self.certfile, + 'ca_certs': self.ca_certs, + 'cert_reqs': self.verify_mode, + 'ssl_version': self.protocol, + } + if self.supports_set_ciphers: # Platform-specific: Python 2.7+ + return wrap_socket(socket, ciphers=self.ciphers, **kwargs) + else: # Platform-specific: Python 2.6 + return wrap_socket(socket, **kwargs) + + def assert_fingerprint(cert, fingerprint): """ Checks if given fingerprint matches the supplied certificate. @@ -30,7 +105,8 @@ def assert_fingerprint(cert, fingerprint): # this digest. hashfunc_map = { 16: md5, - 20: sha1 + 20: sha1, + 32: sha256, } fingerprint = fingerprint.replace(':', '').lower() @@ -91,42 +167,100 @@ def resolve_ssl_version(candidate): return candidate -if SSLContext is not None: # Python 3.2+ - def ssl_wrap_socket(sock, keyfile=None, certfile=None, cert_reqs=None, - ca_certs=None, server_hostname=None, - ssl_version=None): - """ - All arguments except `server_hostname` have the same meaning as for - :func:`ssl.wrap_socket` - - :param server_hostname: - Hostname of the expected certificate - """ - context = SSLContext(ssl_version) - context.verify_mode = cert_reqs - - # Disable TLS compression to migitate CRIME attack (issue #309) - OP_NO_COMPRESSION = 0x20000 - context.options |= OP_NO_COMPRESSION - - if ca_certs: - try: - context.load_verify_locations(ca_certs) - # Py32 raises IOError - # Py33 raises FileNotFoundError - except Exception as e: # Reraise as SSLError +def create_urllib3_context(ssl_version=None, cert_reqs=ssl.CERT_REQUIRED, + options=None, ciphers=None): + """All arguments have the same meaning as ``ssl_wrap_socket``. + + By default, this function does a lot of the same work that + ``ssl.create_default_context`` does on Python 3.4+. It: + + - Disables SSLv2, SSLv3, and compression + - Sets a restricted set of server ciphers + + If you wish to enable SSLv3, you can do:: + + from urllib3.util import ssl_ + context = ssl_.create_urllib3_context() + context.options &= ~ssl_.OP_NO_SSLv3 + + You can do the same to enable compression (substituting ``COMPRESSION`` + for ``SSLv3`` in the last line above). + + :param ssl_version: + The desired protocol version to use. This will default to + PROTOCOL_SSLv23 which will negotiate the highest protocol that both + the server and your installation of OpenSSL support. + :param cert_reqs: + Whether to require the certificate verification. This defaults to + ``ssl.CERT_REQUIRED``. + :param options: + Specific OpenSSL options. These default to ``ssl.OP_NO_SSLv2``, + ``ssl.OP_NO_SSLv3``, ``ssl.OP_NO_COMPRESSION``. + :param ciphers: + Which cipher suites to allow the server to select. + :returns: + Constructed SSLContext object with specified options + :rtype: SSLContext + """ + context = SSLContext(ssl_version or ssl.PROTOCOL_SSLv23) + + if options is None: + options = 0 + # SSLv2 is easily broken and is considered harmful and dangerous + options |= OP_NO_SSLv2 + # SSLv3 has several problems and is now dangerous + options |= OP_NO_SSLv3 + # Disable compression to prevent CRIME attacks for OpenSSL 1.0+ + # (issue #309) + options |= OP_NO_COMPRESSION + + context.options |= options + + if getattr(context, 'supports_set_ciphers', True): # Platform-specific: Python 2.6 + context.set_ciphers(ciphers or _DEFAULT_CIPHERS) + + context.verify_mode = cert_reqs + if getattr(context, 'check_hostname', None) is not None: # Platform-specific: Python 3.2 + # We do our own verification, including fingerprints and alternative + # hostnames. So disable it here + context.check_hostname = False + return context + + +def ssl_wrap_socket(sock, keyfile=None, certfile=None, cert_reqs=None, + ca_certs=None, server_hostname=None, + ssl_version=None, ciphers=None, ssl_context=None): + """ + All arguments except for server_hostname and ssl_context have the same + meaning as they do when using :func:`ssl.wrap_socket`. + + :param server_hostname: + When SNI is supported, the expected hostname of the certificate + :param ssl_context: + A pre-made :class:`SSLContext` object. If none is provided, one will + be created using :func:`create_urllib3_context`. + :param ciphers: + A string of ciphers we wish the client to support. This is not + supported on Python 2.6 as the ssl module does not support it. + """ + context = ssl_context + if context is None: + context = create_urllib3_context(ssl_version, cert_reqs, + ciphers=ciphers) + + if ca_certs: + try: + context.load_verify_locations(ca_certs) + except IOError as e: # Platform-specific: Python 2.6, 2.7, 3.2 + raise SSLError(e) + # Py33 raises FileNotFoundError which subclasses OSError + # These are not equivalent unless we check the errno attribute + except OSError as e: # Platform-specific: Python 3.3 and beyond + if e.errno == errno.ENOENT: raise SSLError(e) - if certfile: - # FIXME: This block needs a test. - context.load_cert_chain(certfile, keyfile) - if HAS_SNI: # Platform-specific: OpenSSL with enabled SNI - return context.wrap_socket(sock, server_hostname=server_hostname) - return context.wrap_socket(sock) - -else: # Python 3.1 and earlier - def ssl_wrap_socket(sock, keyfile=None, certfile=None, cert_reqs=None, - ca_certs=None, server_hostname=None, - ssl_version=None): - return wrap_socket(sock, keyfile=keyfile, certfile=certfile, - ca_certs=ca_certs, cert_reqs=cert_reqs, - ssl_version=ssl_version) + raise + if certfile: + context.load_cert_chain(certfile, keyfile) + if HAS_SNI: # Platform-specific: OpenSSL with enabled SNI + return context.wrap_socket(sock, server_hostname=server_hostname) + return context.wrap_socket(sock) diff --git a/requests/packages/urllib3/util/url.py b/requests/packages/urllib3/util/url.py index 487d456cf8..b2ec834fe7 100644 --- a/requests/packages/urllib3/util/url.py +++ b/requests/packages/urllib3/util/url.py @@ -40,6 +40,48 @@ def netloc(self): return '%s:%d' % (self.host, self.port) return self.host + @property + def url(self): + """ + Convert self into a url + + This function should more or less round-trip with :func:`.parse_url`. The + returned url may not be exactly the same as the url inputted to + :func:`.parse_url`, but it should be equivalent by the RFC (e.g., urls + with a blank port will have : removed). + + Example: :: + + >>> U = parse_url('http://google.com/mail/') + >>> U.url + 'http://google.com/mail/' + >>> Url('http', 'username:password', 'host.com', 80, + ... '/path', 'query', 'fragment').url + 'http://username:password@host.com:80/path?query#fragment' + """ + scheme, auth, host, port, path, query, fragment = self + url = '' + + # We use "is not None" we want things to happen with empty strings (or 0 port) + if scheme is not None: + url += scheme + '://' + if auth is not None: + url += auth + '@' + if host is not None: + url += host + if port is not None: + url += ':' + str(port) + if path is not None: + url += path + if query is not None: + url += '?' + query + if fragment is not None: + url += '#' + fragment + + return url + + def __str__(self): + return self.url def split_first(s, delims): """ @@ -84,7 +126,7 @@ def parse_url(url): Example:: >>> parse_url('http://google.com/mail/') - Url(scheme='http', host='google.com', port=None, path='/', ...) + Url(scheme='http', host='google.com', port=None, path='/mail/', ...) >>> parse_url('google.com:80') Url(scheme=None, host='google.com', port=80, path=None, ...) >>> parse_url('/foo?bar') @@ -162,7 +204,6 @@ def parse_url(url): return Url(scheme, auth, host, port, path, query, fragment) - def get_host(url): """ Deprecated. Use :func:`.parse_url` instead. diff --git a/requests/sessions.py b/requests/sessions.py index d701ff2ef8..ef3f22bc5c 100644 --- a/requests/sessions.py +++ b/requests/sessions.py @@ -13,7 +13,7 @@ from datetime import datetime from .auth import _basic_auth_str -from .compat import cookielib, OrderedDict, urljoin, urlparse, builtin_str +from .compat import cookielib, OrderedDict, urljoin, urlparse from .cookies import ( cookiejar_from_dict, extract_cookies_to_jar, RequestsCookieJar, merge_cookies) from .models import Request, PreparedRequest, DEFAULT_REDIRECT_LIMIT @@ -21,6 +21,7 @@ from .utils import to_key_val_list, default_headers, to_native_string from .exceptions import ( TooManyRedirects, InvalidSchema, ChunkedEncodingError, ContentDecodingError) +from .packages.urllib3._collections import RecentlyUsedContainer from .structures import CaseInsensitiveDict from .adapters import HTTPAdapter @@ -35,6 +36,8 @@ # formerly defined here, reexposed here for backward compatibility from .models import REDIRECT_STATI +REDIRECT_CACHE_SIZE = 1000 + def merge_setting(request_setting, session_setting, dict_class=OrderedDict): """ @@ -128,7 +131,7 @@ def resolve_redirects(self, resp, req, stream=False, timeout=None, # Facilitate relative 'location' headers, as allowed by RFC 7231. # (e.g. '/path/to/resource' instead of 'http://domain.tld/path/to/resource') # Compliant with RFC3986, we percent encode the url. - if not urlparse(url).netloc: + if not parsed.netloc: url = urljoin(resp.url, requote_uri(url)) else: url = requote_uri(url) @@ -168,7 +171,10 @@ def resolve_redirects(self, resp, req, stream=False, timeout=None, except KeyError: pass - extract_cookies_to_jar(prepared_request._cookies, prepared_request, resp.raw) + # Extract any cookies sent on the response to the cookiejar + # in the new request. Because we've mutated our copied prepared + # request, use the old one that we haven't yet touched. + extract_cookies_to_jar(prepared_request._cookies, req, resp.raw) prepared_request._cookies.update(self.cookies) prepared_request.prepare_cookies(prepared_request._cookies) @@ -273,7 +279,7 @@ class Session(SessionRedirectMixin): __attrs__ = [ 'headers', 'cookies', 'auth', 'proxies', 'hooks', 'params', 'verify', 'cert', 'prefetch', 'adapters', 'stream', 'trust_env', - 'max_redirects', 'redirect_cache' + 'max_redirects', ] def __init__(self): @@ -327,7 +333,8 @@ def __init__(self): self.mount('https://', HTTPAdapter()) self.mount('http://', HTTPAdapter()) - self.redirect_cache = {} + # Only store 1000 redirects to prevent using infinite memory + self.redirect_cache = RecentlyUsedContainer(REDIRECT_CACHE_SIZE) def __enter__(self): return self @@ -425,7 +432,7 @@ def request(self, method, url, If Tuple, ('cert', 'key') pair. """ - method = builtin_str(method) + method = to_native_string(method) # Create the Request. req = Request( @@ -658,12 +665,19 @@ def mount(self, prefix, adapter): self.adapters[key] = self.adapters.pop(key) def __getstate__(self): - return dict((attr, getattr(self, attr, None)) for attr in self.__attrs__) + state = dict((attr, getattr(self, attr, None)) for attr in self.__attrs__) + state['redirect_cache'] = dict(self.redirect_cache) + return state def __setstate__(self, state): + redirect_cache = state.pop('redirect_cache', {}) for attr, value in state.items(): setattr(self, attr, value) + self.redirect_cache = RecentlyUsedContainer(REDIRECT_CACHE_SIZE) + for redirect, to in redirect_cache.items(): + self.redirect_cache[redirect] = to + def session(): """Returns a :class:`Session` for context-management.""" diff --git a/requests/utils.py b/requests/utils.py index 182348daad..8fba62dd82 100644 --- a/requests/utils.py +++ b/requests/utils.py @@ -19,12 +19,14 @@ import sys import socket import struct +import warnings from . import __version__ from . import certs from .compat import parse_http_list as _parse_list_header from .compat import (quote, urlparse, bytes, str, OrderedDict, unquote, is_py2, - builtin_str, getproxies, proxy_bypass, urlunparse) + builtin_str, getproxies, proxy_bypass, urlunparse, + basestring) from .cookies import RequestsCookieJar, cookiejar_from_dict from .structures import CaseInsensitiveDict from .exceptions import InvalidURL @@ -114,7 +116,8 @@ def get_netrc_auth(url): def guess_filename(obj): """Tries to guess the filename of the given object.""" name = getattr(obj, 'name', None) - if name and name[0] != '<' and name[-1] != '>': + if (name and isinstance(name, basestring) and name[0] != '<' and + name[-1] != '>'): return os.path.basename(name) @@ -287,6 +290,11 @@ def get_encodings_from_content(content): :param content: bytestring to extract encodings from. """ + warnings.warn(( + 'In requests 3.0, get_encodings_from_content will be removed. For ' + 'more information, please see the discussion on issue #2266. (This' + ' warning should only appear once.)'), + DeprecationWarning) charset_re = re.compile(r']', flags=re.I) pragma_re = re.compile(r']', flags=re.I) @@ -354,6 +362,11 @@ def get_unicode_from_response(r): 2. fall back and replace all unicode characters """ + warnings.warn(( + 'In requests 3.0, get_unicode_from_response will be removed. For ' + 'more information, please see the discussion on issue #2266. (This' + ' warning should only appear once.)'), + DeprecationWarning) tried_encodings = [] @@ -407,10 +420,18 @@ def requote_uri(uri): This function passes the given URI through an unquote/quote cycle to ensure that it is fully and consistently quoted. """ - # Unquote only the unreserved characters - # Then quote only illegal characters (do not quote reserved, unreserved, - # or '%') - return quote(unquote_unreserved(uri), safe="!#$%&'()*+,/:;=?@[]~") + safe_with_percent = "!#$%&'()*+,/:;=?@[]~" + safe_without_percent = "!#$&'()*+,/:;=?@[]~" + try: + # Unquote only the unreserved characters + # Then quote only illegal characters (do not quote reserved, + # unreserved, or '%') + return quote(unquote_unreserved(uri), safe=safe_with_percent) + except InvalidURL: + # We couldn't unquote the given URI, so let's try quoting it, but + # there may be unquoted '%'s in the URI. We need to make sure they're + # properly quoted so they do not cause issues elsewhere. + return quote(uri, safe=safe_without_percent) def address_in_network(ip, net): diff --git a/setup.py b/setup.py index 813fc87979..4637291fc7 100755 --- a/setup.py +++ b/setup.py @@ -1,10 +1,9 @@ #!/usr/bin/env python import os +import re import sys -import requests - from codecs import open try: @@ -29,6 +28,18 @@ requires = [] +version = '' +with open('requests/__init__.py', 'r') as fd: + reg = re.compile(r'__version__\s*=\s*[\'"]([^\'"]*)[\'"]') + for line in fd: + m = reg.match(line) + if m: + version = m.group(1) + break + +if not version: + raise RuntimeError('Cannot find version information') + with open('README.rst', 'r', 'utf-8') as f: readme = f.read() with open('HISTORY.rst', 'r', 'utf-8') as f: @@ -36,7 +47,7 @@ setup( name='requests', - version=requests.__version__, + version=version, description='Python HTTP for Humans.', long_description=readme + '\n\n' + history, author='Kenneth Reitz', @@ -60,7 +71,6 @@ 'Programming Language :: Python :: 3', 'Programming Language :: Python :: 3.3', 'Programming Language :: Python :: 3.4' - ), extras_require={ 'security': ['pyOpenSSL', 'ndg-httpsclient', 'pyasn1'], diff --git a/test_requests.py b/test_requests.py index 0d93893b9d..15406a22fc 100755 --- a/test_requests.py +++ b/test_requests.py @@ -20,7 +20,7 @@ from requests.cookies import cookiejar_from_dict, morsel_to_cookie from requests.exceptions import (ConnectionError, ConnectTimeout, InvalidSchema, InvalidURL, MissingSchema, - ReadTimeout, Timeout) + ReadTimeout, Timeout, RetryError) from requests.models import PreparedRequest from requests.structures import CaseInsensitiveDict from requests.sessions import SessionRedirectMixin @@ -103,6 +103,14 @@ def test_no_content_length(self): head_req = requests.Request('HEAD', httpbin('head')).prepare() assert 'Content-Length' not in head_req.headers + def test_override_content_length(self): + headers = { + 'Content-Length': 'not zero' + } + r = requests.Request('POST', httpbin('post'), headers=headers).prepare() + assert 'Content-Length' in r.headers + assert r.headers['Content-Length'] == 'not zero' + def test_path_is_not_double_encoded(self): request = requests.Request('GET', "http://0.0.0.0/get/test case").prepare() @@ -250,7 +258,7 @@ def test_headers_on_session_with_None_are_not_sent(self): """Do not send headers in Session.headers with None values.""" ses = requests.Session() ses.headers['Accept-Encoding'] = None - req = requests.Request('GET', 'http://httpbin.org/get') + req = requests.Request('GET', httpbin('get')) prep = ses.prepare_request(req) assert 'Accept-Encoding' not in prep.headers @@ -293,13 +301,20 @@ def test_BASICAUTH_TUPLE_HTTP_200_OK_GET(self): r = s.get(url) assert r.status_code == 200 - def test_connection_error(self): + def test_connection_error_invalid_domain(self): """Connecting to an unknown domain should raise a ConnectionError""" with pytest.raises(ConnectionError): - requests.get("http://fooobarbangbazbing.httpbin.org") + requests.get("http://doesnotexist.google.com") + def test_connection_error_invalid_port(self): + """Connecting to an invalid port should raise a ConnectionError""" with pytest.raises(ConnectionError): - requests.get("http://httpbin.org:1") + requests.get("http://httpbin.org:1", timeout=1) + + def test_LocationParseError(self): + """Inputing a URL that cannot be parsed should raise an InvalidURL error""" + with pytest.raises(InvalidURL): + requests.get("http://fe80::5054:ff:fe5a:fc0") def test_basicauth_with_netrc(self): auth = ('user', 'pass') @@ -920,6 +935,27 @@ def test_can_send_nonstring_objects_with_files(self): assert 'multipart/form-data' in p.headers['Content-Type'] + def test_can_send_bytes_bytearray_objects_with_files(self): + # Test bytes: + data = {'a': 'this is a string'} + files = {'b': b'foo'} + r = requests.Request('POST', httpbin('post'), data=data, files=files) + p = r.prepare() + assert 'multipart/form-data' in p.headers['Content-Type'] + # Test bytearrays: + files = {'b': bytearray(b'foo')} + r = requests.Request('POST', httpbin('post'), data=data, files=files) + p = r.prepare() + assert 'multipart/form-data' in p.headers['Content-Type'] + + def test_can_send_file_object_with_non_string_filename(self): + f = io.BytesIO() + f.name = 2 + r = requests.Request('POST', httpbin('post'), files={'f': f}) + p = r.prepare() + + assert 'multipart/form-data' in p.headers['Content-Type'] + def test_autoset_header_values_are_native(self): data = 'this is a string' length = '16' @@ -1000,12 +1036,12 @@ def test_basic_auth_str_is_always_native(self): assert s == "Basic dGVzdDp0ZXN0" def test_requests_history_is_saved(self): - r = requests.get('https://httpbin.org/redirect/5') + r = requests.get(httpbin('redirect/5')) total = r.history[-1].history i = 0 for item in r.history: assert item.history == total[0:i] - i=i+1 + i = i + 1 def test_json_param_post_content_type_works(self): r = requests.post( @@ -1016,6 +1052,23 @@ def test_json_param_post_content_type_works(self): assert 'application/json' in r.request.headers['Content-Type'] assert {'life': 42} == r.json()['json'] + def test_response_iter_lines(self): + r = requests.get(httpbin('stream/4'), stream=True) + assert r.status_code == 200 + + it = r.iter_lines() + next(it) + assert len(list(it)) == 3 + + @pytest.mark.xfail + def test_response_iter_lines_reentrant(self): + """Response.iter_lines() is not reentrant safe""" + r = requests.get(httpbin('stream/4'), stream=True) + assert r.status_code == 200 + + next(r.iter_lines()) + assert len(list(r.iter_lines())) == 3 + class TestContentEncodingDetection(unittest.TestCase): @@ -1244,6 +1297,32 @@ def test_get_environ_proxies(self): 'http://localhost.localdomain:5000/v1.0/') == {} assert get_environ_proxies('http://www.requests.com/') != {} + def test_guess_filename_when_int(self): + from requests.utils import guess_filename + assert None is guess_filename(1) + + def test_guess_filename_when_filename_is_an_int(self): + from requests.utils import guess_filename + fake = type('Fake', (object,), {'name': 1})() + assert None is guess_filename(fake) + + def test_guess_filename_with_file_like_obj(self): + from requests.utils import guess_filename + from requests import compat + fake = type('Fake', (object,), {'name': b'value'})() + guessed_name = guess_filename(fake) + assert b'value' == guessed_name + assert isinstance(guessed_name, compat.bytes) + + def test_guess_filename_with_unicode_name(self): + from requests.utils import guess_filename + from requests import compat + filename = b'value'.decode('utf-8') + fake = type('Fake', (object,), {'name': filename})() + guessed_name = guess_filename(fake) + assert filename == guessed_name + assert isinstance(guessed_name, compat.str) + def test_is_ipv4_address(self): from requests.utils import is_ipv4_address assert is_ipv4_address('8.8.8.8') @@ -1280,6 +1359,22 @@ def test_get_auth_from_url(self): assert username == percent_encoding_test_chars assert password == percent_encoding_test_chars + def test_requote_uri_with_unquoted_percents(self): + """Ensure we handle unquoted percent signs in redirects. + + See: https://github.com/kennethreitz/requests/issues/2356 + """ + from requests.utils import requote_uri + bad_uri = 'http://example.com/fiz?buz=%ppicture' + quoted = 'http://example.com/fiz?buz=%25ppicture' + assert quoted == requote_uri(bad_uri) + + def test_requote_uri_properly_requotes(self): + """Ensure requoting doesn't break expectations.""" + from requests.utils import requote_uri + quoted = 'http://example.com/fiz?buz=%25ppicture' + assert quoted == requote_uri(quoted) + class TestMorselToCookieExpires(unittest.TestCase): @@ -1342,7 +1437,7 @@ def test_max_age_invalid_str(self): class TestTimeout: def test_stream_timeout(self): try: - requests.get('https://httpbin.org/delay/10', timeout=2.0) + requests.get(httpbin('delay/10'), timeout=2.0) except requests.exceptions.Timeout as e: assert 'Read timed out' in e.args[0].args[0] @@ -1389,6 +1484,11 @@ def test_total_timeout_connect(self): except ConnectTimeout: pass + def test_encoded_methods(self): + """See: https://github.com/kennethreitz/requests/issues/2316""" + r = requests.request(b'GET', httpbin('get')) + assert r.ok + SendCall = collections.namedtuple('SendCall', ('args', 'kwargs')) @@ -1437,7 +1537,7 @@ class TestRedirects: def test_requests_are_updated_each_time(self): session = RedirectSession([303, 307]) - prep = requests.Request('POST', 'http://httpbin.org/post').prepare() + prep = requests.Request('POST', httpbin('post')).prepare() r0 = session.send(prep) assert r0.request.method == 'POST' assert session.calls[-1] == SendCall((r0.request,), {}) @@ -1507,13 +1607,33 @@ def test_prepared_request_complete_copy(): ) assert_copy(p, p.copy()) + def test_prepare_unicode_url(): p = PreparedRequest() p.prepare( method='GET', - url=u('http://www.example.com/üniçø∂é') + url=u('http://www.example.com/üniçø∂é'), + hooks=[] ) assert_copy(p, p.copy()) + +def test_urllib3_retries(): + from requests.packages.urllib3.util import Retry + s = requests.Session() + s.mount('http://', HTTPAdapter(max_retries=Retry( + total=2, status_forcelist=[500] + ))) + + with pytest.raises(RetryError): + s.get(httpbin('status/500')) + +def test_vendor_aliases(): + from requests.packages import urllib3 + from requests.packages import chardet + + with pytest.raises(ImportError): + from requests.packages import webbrowser + if __name__ == '__main__': unittest.main()