Skip to content

Commit

Permalink
Fix encoding detection
Browse files Browse the repository at this point in the history
  • Loading branch information
deanishe committed Aug 17, 2014
1 parent 5eb6468 commit ba4353d
Show file tree
Hide file tree
Showing 5 changed files with 91 additions and 10 deletions.
2 changes: 1 addition & 1 deletion TODO
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,7 @@ General:
trying to achieve. They would just be a duplication of the comments in the
source code in any case.
- Add `setup.py` so `Alfred-Workflow` can be added to PyPi and installed with `pip`
- Add explicit `save()` method to `Settings`
- Add explicit `save()` method to `Settings` @done(14-08-17 00:08)

background.py:
- Add `stop_process()` function.
Expand Down
Binary file modified alfred-workflow.zip
Binary file not shown.
51 changes: 51 additions & 0 deletions tests/test_http_encoding.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,51 @@
#!/usr/bin/env python
# -*- coding: utf-8 -*-
# vim: fileencoding=utf-8
"""
test_encoding.py
Created by [email protected] on 2014-08-17.
Copyright (c) 2014 [email protected]
MIT Licence. See http://opensource.org/licenses/MIT
"""

from __future__ import print_function


import unittest

from workflow import web


def setUp():
pass


def tearDown():
pass


class WebEncodingTests(unittest.TestCase):
def setUp(self):
self.urls = [
# URL, encoding
('http://www.baidu.com/s?wd=lager', 'utf-8'),
('http://httpbin.org/xml', 'us-ascii'),
('http://httpbin.org/get', 'utf-8'),
('https://deanishe.net/no-encoding.xml', 'utf-8')
]

def tearDown(self):
pass

def test_encoding(self):
"""Find response encoding"""
for url, encoding in self.urls:
r = web.get(url)
self.assertEqual(r.encoding, encoding)


if __name__ == '__main__':
unittest.main()
2 changes: 1 addition & 1 deletion workflow/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -110,7 +110,7 @@ def main(wf):
"""

__version__ = '1.8.1'
__version__ = '1.8.2'


from .workflow import Workflow, PasswordNotFound, KeychainError
Expand Down
46 changes: 38 additions & 8 deletions workflow/web.py
Original file line number Diff line number Diff line change
Expand Up @@ -150,7 +150,7 @@ def __init__(self, request):
self.request = request
self.url = None
self.raw = None
self.encoding = None
self._encoding = None
self.error = None
self.status_code = None
self.reason = None
Expand Down Expand Up @@ -181,7 +181,6 @@ def __init__(self, request):
self.mimetype = headers.gettype()
for key in headers.keys():
self.headers[key.lower()] = headers.get(key)
self.encoding = self._get_encoding()

def json(self):
"""Decode response contents as JSON.
Expand All @@ -193,6 +192,19 @@ def json(self):

return json.loads(self.content, self.encoding or 'utf-8')

@property
def encoding(self):
"""Return text encoding of document or ``None``
:returns: ``str``
"""

if not self._encoding:
self._encoding = self._get_encoding()

return self._encoding

@property
def content(self):
"""Return raw content of response (i.e. bytes)
Expand Down Expand Up @@ -274,27 +286,45 @@ def _get_encoding(self):
"""

# HTTP Content-Type header
headers = self.raw.info()
# _, params = cgi.parse_header(self.headers.get('content-type'))
encoding = headers.getparam('charset')
encoding = None

if headers.getparam('charset'):
encoding = headers.getparam('charset')

# HTTP Content-Type header
for param in headers.getplist():
if param.startswith('charset='):
encoding = param[8:]
break

# Encoding declared in document should override HTTP headers
if self.mimetype == 'text/html': # sniff HTML headers
m = re.search("""<meta.+charset=["']{0,1}(.+)["'].*>""",
m = re.search("""<meta.+charset=["']{0,1}(.+?)["'].*>""",
self.content)
if m:
encoding = m.group(1)

elif ((self.mimetype.startswith('application/') or
self.mimetype.startswith('text/')) and
'xml' in self.mimetype):
m = re.search("""<?xml.+encoding=["'](.+?)["'].*>""",
m = re.search("""<?xml.+encoding=["'](.+?)["'][^>]*\?>""",
self.content)
if m:
encoding = m.group(1)
elif self.mimetype == 'application/json' and not encoding:

# Format defaults
if self.mimetype == 'application/json' and not encoding:
# The default encoding for JSON
encoding = 'utf-8'

elif self.mimetype == 'application/xml' and not encoding:
# The default for 'application/xml'
encoding = 'utf-8'

if encoding:
encoding = encoding.lower()

return encoding


Expand Down

0 comments on commit ba4353d

Please sign in to comment.