@@ -0,0 +1,245 @@
+import re
+import smartypants
+def amp(text):
+ """Wraps apersands in HTML with ```` so they can be
+ styled with CSS. Apersands are also normalized to ``&``. Requires
+ ampersands to have whitespace or an `` `` on both sides.
+ >>> amp('One & two')
+ u'One & two'
+ >>> amp('One & two')
+ u'One & two'
+ >>> amp('One & two')
+ u'One & two'
+ >>> amp('One & two')
+ u'One & two'
+ It won't mess up & that are already wrapped, in entities or URLs
+ >>> amp('One & two')
+ u'One & two'
+ >>> amp('“this” & that')
+ u'“this” & that'
+ It should ignore standalone amps that are in attributes
+ >>> amp('xyz')
+ u'xyz'
+ """
+ text = unicode(text)
+ # tag_pattern from http://haacked.com/archive/2004/10/25/usingregularexpressionstomatchhtml.aspx
+ # it kinda sucks but it fixes the standalone amps in attributes bug
+ tag_pattern = '?\w+((\s+\w+(\s*=\s*(?:".*?"|\'.*?\'|[^\'">\s]+))?)+\s*|\s*)/?>'
+ amp_finder = re.compile(r"(\s| )(&|&|&\#38;)(\s| )")
+ intra_tag_finder = re.compile(r'(?PCAPS
more CAPS")
+ u'CAPS
more CAPS'
+ >>> caps("A message from 2KU2 with digits")
+ u'A message from 2KU2 with digits'
+ >>> caps("Dotted caps followed by spaces should never include them in the wrap D.O.T. like so.")
+ u'Dotted caps followed by spaces should never include them in the wrap D.O.T. like so.'
+ All caps with with apostrophes in them shouldn't break. Only handles dump apostrophes though.
+ >>> caps("JIMMY'S")
+ u'JIMMY\\'S'
+ >>> caps("D.O.T.HE34TRFID")
+ u'D.O.T.HE34TRFID'
+ """
+ text = unicode(text)
+ tokens = smartypants._tokenize(text)
+ result = []
+ in_skipped_tag = False
+ cap_finder = re.compile(r"""(
+ (\b[A-Z\d]* # Group 2: Any amount of caps and digits
+ [A-Z]\d*[A-Z] # A cap string much at least include two caps (but they can have digits between them)
+ [A-Z\d']*\b) # Any amount of caps and digits or dumb apostsrophes
+ | (\b[A-Z]+\.\s? # OR: Group 3: Some caps, followed by a '.' and an optional space
+ (?:[A-Z]+\.\s?)+) # Followed by the same thing at least once more
+ (?:\s|\b|$))
+ """, re.VERBOSE)
+ def _cap_wrapper(matchobj):
+ """This is necessary to keep dotted cap strings to pick up extra spaces"""
+ if matchobj.group(2):
+ return """%s""" % matchobj.group(2)
+ else:
+ if matchobj.group(3)[-1] == " ":
+ caps = matchobj.group(3)[:-1]
+ tail = ' '
+ else:
+ caps = matchobj.group(3)
+ tail = ''
+ return """%s%s""" % (caps, tail)
+ tags_to_skip_regex = re.compile("<(/)?(?:pre|code|kbd|script|math)[^>]*>", re.IGNORECASE)
+ for token in tokens:
+ if token[0] == "tag":
+ # Don't mess with tags.
+ result.append(token[1])
+ close_match = tags_to_skip_regex.match(token[1])
+ if close_match and close_match.group(1) == None:
+ in_skipped_tag = True
+ else:
+ in_skipped_tag = False
+ else:
+ if in_skipped_tag:
+ result.append(token[1])
+ else:
+ result.append(cap_finder.sub(_cap_wrapper, token[1]))
+ output = "".join(result)
+ return output
+def initial_quotes(text):
+ """Wraps initial quotes in ``class="dquo"`` for double quotes or
+ ``class="quo"`` for single quotes. Works in these block tags ``(h1-h6, p, li, dt, dd)``
+ and also accounts for potential opening inline elements ``a, em, strong, span, b, i``
+ >>> initial_quotes('"With primes"')
+ u'"With primes"'
+ >>> initial_quotes("'With single primes'")
+ u'\\'With single primes\\''
+ >>> initial_quotes('"With primes and a link"')
+ u'"With primes and a link"'
+ >>> initial_quotes('“With smartypanted quotes”')
+ u'“With smartypanted quotes”'
+ """
+ text = unicode(text)
+ quote_finder = re.compile(r"""((<(p|h[1-6]|li|dt|dd)[^>]*>|^) # start with an opening p, h1-6, li, dd, dt or the start of the string
+ \s* # optional white space!
+ (<(a|em|span|strong|i|b)[^>]*>\s*)*) # optional opening inline tags, with more optional white space for each.
+ (("|“|&\#8220;)|('|‘|&\#8216;)) # Find me a quote! (only need to find the left quotes and the primes)
+ # double quotes are in group 7, singles in group 8
+ """, re.VERBOSE)
+ def _quote_wrapper(matchobj):
+ if matchobj.group(7):
+ classname = "dquo"
+ quote = matchobj.group(7)
+ else:
+ classname = "quo"
+ quote = matchobj.group(8)
+ return """%s%s""" % (matchobj.group(1), classname, quote)
+ output = quote_finder.sub(_quote_wrapper, text)
+ return output
+def smartquotes(text):
+ """Applies smarty pants to curl quotes.
+ >>> smartquotes('The "Green" man')
+ u'The “Green” man'
+ """
+ text = unicode(text)
+ output = smartypants.smartyPants(text)
+ return output
+def typogrify(text):
+ """The super typography filter
+ Applies the following filters: widont, smartquotes, caps, amp, initial_quotes
+ >>> typogrify('"Jayhawks" & KU fans act extremely obnoxiously
+ u'“Jayhawks” & KU fans act extremely obnoxiously
+ """
+ text = unicode(text)
+ text = amp(text)
+ text = widont(text)
+ text = smartquotes(text)
+ text = caps(text)
+ text = initial_quotes(text)
+ return text
+def widont(text):
+ """Replaces the space between the last two words in a string with `` ``
+ Works in these block tags ``(h1-h6, p, li, dd, dt)`` and also accounts for
+ potential closing inline elements ``a, em, strong, span, b, i``
+ >>> widont('A very simple test')
+ u'A very simple test'
+ Single word items shouldn't be changed
+ >>> widont('Test')
+ u'Test'
+ >>> widont(' Test')
+ u' Test'
+ >>> widont('
In a couple of paragraphs
paragraph two
') + u'In a couple of paragraphs
paragraph two
' + + >>> widont('Some text nearly ends with a link.
') + u'Some text nearly ends with a link.
' + + >>> widont('Neither do PREs') + u'
Neither do PREs' + + >>> widont('
But divs with paragraphs do!
But divs with paragraphs do!