Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 5 additions & 2 deletions slugify/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,8 +8,11 @@

slugify_url = Slugify()
slugify_url.to_lower = True
slugify_url.stop_words = ('a', 'an', 'the')
slugify_url.max_length = 200
#slugify_url.stop_words = ('a', 'an', 'the')
slugify_url.max_length = 100
slugify_url.min_length = 25
slugify_url.separator = '-'
slugify_url.extract_phrase = True

slugify_filename = Slugify()
slugify_filename.separator = '_'
Expand Down
54 changes: 52 additions & 2 deletions slugify/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@

from unidecode import unidecode
import regex as re
from django.utils.text import Truncator


re.DEFAULT_VERSION = re.V1 # Version 1 behaviour: nested sets and set operations are supported
Expand Down Expand Up @@ -65,7 +66,10 @@ class Slugify(object):
_stop_words = ()

def __init__(self, pretranslate=None, translate=unidecode, safe_chars='', stop_words=(),
to_lower=False, max_length=None, separator=u'-', capitalize=False):
to_lower=False, max_length=2000, min_length=25, separator=u'-', capitalize=False, extract_phrase=False, truncate_words=False):
"""Init next parametesr taking in account URL format recommendations:
to_lower = True, max_length = 2000, separator = '-'
"""

self.pretranslate = pretranslate
self.translate = translate
Expand All @@ -74,8 +78,11 @@ def __init__(self, pretranslate=None, translate=unidecode, safe_chars='', stop_w

self.to_lower = to_lower
self.max_length = max_length
self.min_length = min_length
self.separator = separator
self.capitalize = capitalize
self.extract_phrase = extract_phrase
self.truncate_words = truncate_words # Is allowed cut words in the middle?

def pretranslate_dict_to_function(self, convert_dict):

Expand Down Expand Up @@ -141,7 +148,45 @@ def sanitize(self, text):
text = text.replace("'", '').strip() # remove '
return filter(None, self.sanitize_re.split(text)) # split by unwanted characters

def avoid_truncated_word(self, text):
"""Truncate in a way that text will be shorter than max_length and won't be cut in the middle of a word"""
words = text.split()
if not words:
return text
truncator = Truncator(text)
last_word = text.split()[-1]
text = truncator.chars(self.max_length, '')
truncated_last_word = text.split()[-1]
if truncated_last_word != last_word:
# last word is cut. So, remove it
num_words = len(text.split())
text = truncator.words(num_words - 1)
return text

def phrase(self, text):
"""Try to get an slug as most meaningful as possible using punctuation to extract fragment"""
text = self.avoid_truncated_word(text) # Note we have to cut text here, can't wait after sanitize phase
punctuation_marks = [u'\.', u';', u',', u'\:']
len_text = len(text)
for mark in punctuation_marks:
r = re.compile(u".*%s" % (mark))
m = r.match(text)
if m:
phrase = m.group()
len_phrase = len(phrase)
if len_phrase >= self.min_length and len_phrase <= len_text:
text = phrase
break
return text


def __call__(self, text, **kwargs):
"""Use:
slugify_url = Slugify()
slugify_url.extract_phrase = True # set parameters
...
slugify("Text to slugify")
"""

max_length = kwargs.get('max_length', self.max_length)
separator = kwargs.get('separator', self.separator)
Expand All @@ -166,7 +211,12 @@ def __call__(self, text, **kwargs):

text = u''.join(text_parts)

words = self.sanitize(text)
if self.extract_phrase:
text = self.phrase(text) # calls self.avoid_truncated_word()
elif not self.truncate_words:
text = self.avoid_truncated_word(text)

words = self.sanitize(text) # leave only secure chars
text = join_words(words, separator, max_length)

if text and kwargs.get('capitalize', self.capitalize):
Expand Down
45 changes: 44 additions & 1 deletion slugify/tests.py
Original file line number Diff line number Diff line change
Expand Up @@ -54,7 +54,7 @@ def test_slugify_unicode(self):
class PredefinedSlugifyTestCase(unittest.TestCase):

def test_slugify_url(self):
self.assertEqual(slugify_url('The Über article'), 'uber-article')
self.assertEqual(slugify_url('The Über article'), 'the-uber-article')

def test_slugify_filename(self):
self.assertEqual(slugify_filename(u'Дrаft №2.txt'), u'Draft_2.txt')
Expand Down Expand Up @@ -254,5 +254,48 @@ def test_deprecated_get_slugify(self):
self.assertIn("'slugify.get_slugify' is deprecated", str(warning[-1].message))


class PhraseSlugifyTestCase(unittest.TestCase):
"""Tests results for current config of slugify_url

slugify_url = Slugify()
slugify_url.to_lower = True
#slugify_url.stop_words = ('a', 'an', 'the')
slugify_url.max_length = 100
slugify_url.min_length = 25
slugify_url.separator = '-'
slugify_url.extract_phrase = True
"""

def test_slugify_phrase_url(self):
text = "Someone must have slandered Josef K., for one morning, without having done anything truly wrong, he was arrested."
self.assertEqual(slugify_url(text),
"someone-must-have-slandered-josef-k-for-one-morning-without-having-done-anything-truly-wrong")

text = "The Miss Lonelyhearts of the New York Post-Dispatch (Are you in trouble?—Do-you-need-advice?—Write-to-Miss-Lonelyhearts-and-she-will-help-you) sat at his desk and stared at a piece of white cardboard. —Nathanael West, Miss Lonelyhearts"
self.assertEqual(slugify_url(text),
"the-miss-lonelyhearts-of-the-new-york-post-dispatch-are-you-in")

text = " I wish either my father or my mother, or indeed both of them, as they were in duty both equally bound to it, had minded what they were about when they begot me; had they duly considered how much depended upon what they were then doing;—that not only the production of a rational Being was concerned in it, but that possibly the happy formation and temperature of his body, perhaps his genius and the very cast of his mind;—and, for aught they knew to the contrary, even the fortunes of his whole house might take their turn from the humours and dispositions which were then uppermost:—Had they duly weighed and considered all this, and proceeded accordingly,—I am verily persuaded I should have made a quite different figure in the world, from that, in which the reader is likely to see me. —Laurence Sterne, Tristram Shandy (1759–1767)"
self.assertEqual(slugify_url(text),
"i-wish-either-my-father-or-my-mother-or-indeed-both-of-them")

text = "En un lugar de la Mancha, de cuyo nombre no quiero acordarme, vivía un caballero."
self.assertEqual(slugify_url(text),
"en-un-lugar-de-la-mancha-de-cuyo-nombre-no-quiero-acordarme-vivia-un-caballero")

# :
text = "Este era el nombre del caballero: Don Quijote"
self.assertEqual(slugify_url(text),
"este-era-el-nombre-del-caballero") # len (phrase) > min_length, cut at punctuation mark

text = "Su nombre: Don Quijote"
self.assertEqual(slugify_url(text),
"su-nombre-don-quijote") # len (phrase) < min_length, don't cut at punctuation mark

# ;
text = "Este era el nombre del caballero; Don Quijote"
self.assertEqual(slugify_url(text),
"este-era-el-nombre-del-caballero") # len (phrase) > min_length, cut at punctuation mark

if __name__ == '__main__':
unittest.main()