Skip to content

Commit

Permalink
Merge branch 'release/v1.5'
Browse files Browse the repository at this point in the history
  • Loading branch information
deanishe committed May 11, 2014
2 parents 4bdaa1c + e7ea627 commit 1b52ea7
Show file tree
Hide file tree
Showing 6 changed files with 153 additions and 106 deletions.
1 change: 1 addition & 0 deletions TODO
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@ web.py:
workflow.py:
- automatically add `lib` and/or `packages` subdirectory to `sys.path`?
- move "magic" args to class-level dictionary, so authors can add their own.
- optimise `filter` by memoising expensive operations, e.g. `pattern`/`search` generation @done(14-05-11 23:13)
ui.py:
Possibly provide access to dialog boxes, notifications w/out having to use AppleScript
- Yes/no dialog
Expand Down
Binary file modified alfred-workflow.zip
Binary file not shown.
19 changes: 11 additions & 8 deletions doc/howto.rst
Original file line number Diff line number Diff line change
Expand Up @@ -250,9 +250,10 @@ description of the algorithm and match flags).

**Note:** By default, :meth:`Workflow.filter() <workflow.workflow.Workflow.filter>`
will match and return anything that contains all the characters in ``query``
in the same order, regardless of case. It's very likely that you'll want to set
the standard a little higher. See :ref:`restricting-results` for info on how
to do that.
in the same order, regardless of case. Not only can this lead to unacceptable
performance when working with thousands of results, but it's also very likely
that you'll want to set the standard a little higher.
See :ref:`restricting-results` for info on how to do that.

To use :meth:`Workflow.filter() <workflow.workflow.Workflow.filter>`, pass it
a query, a list of items to filter and sort, and if your list contains items
Expand Down Expand Up @@ -376,15 +377,17 @@ You can set match rules using bitwise operators, so ``|`` to combine them or
**Note:** ``MATCH_ALLCHARS`` is particularly slow and provides the
worst matches. You should consider excluding it, especially if you're calling
:meth:`Workflow.filter() <workflow.workflow.Workflow.filter>` with > 5000 items.
:meth:`Workflow.filter() <workflow.workflow.Workflow.filter>` with more than a
few hundred items or expect multi-word queries.

Diacritic folding
-----------------

By default, :meth:`Workflow.filter() <workflow.workflow.Workflow.filter>` will fold non-ASCII characters
to ASCII equivalents (e.g. *é* -> *e*, *ü* -> *u*) if the ``query`` contains
only ASCII characters. This behaviour can be turned off by passing
``fold_diacritics=False`` to :meth:`Workflow.filter() <workflow.workflow.Workflow.filter>`.
By default, :meth:`Workflow.filter() <workflow.workflow.Workflow.filter>`
will fold non-ASCII characters to ASCII equivalents (e.g. *é* -> *e*, *ü* -> *u*)
if the ``query`` contains only ASCII characters. This behaviour can be turned
off by passing ``fold_diacritics=False`` to
:meth:`Workflow.filter() <workflow.workflow.Workflow.filter>`.

**Note:** To keep the library small, only a subset of European languages are
supported. The `Unidecode <https://pypi.python.org/pypi/Unidecode>`_ library
Expand Down
2 changes: 1 addition & 1 deletion workflow/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -108,7 +108,7 @@ def main(wf):
"""

__version__ = '1.4.4'
__version__ = '1.5'

from .workflow import Workflow, PasswordNotFound, KeychainError
from .workflow import (ICON_ERROR, ICON_WARNING, ICON_NOTE, ICON_INFO,
Expand Down
10 changes: 6 additions & 4 deletions workflow/background.py
Original file line number Diff line number Diff line change
Expand Up @@ -159,7 +159,7 @@ def _background(stdin='/dev/null', stdout='/dev/null',
pid = os.fork()
if pid > 0:
sys.exit(0) # Exit first parent.
except OSError, e:
except OSError as e:
log.critical("fork #1 failed: (%d) %s\n" % (e.errno, e.strerror))
sys.exit(1)
# Decouple from parent environment.
Expand All @@ -171,7 +171,7 @@ def _background(stdin='/dev/null', stdout='/dev/null',
pid = os.fork()
if pid > 0:
sys.exit(0) # Exit second parent.
except OSError, e:
except OSError as e:
log.critical("fork #2 failed: (%d) %s\n" % (e.errno, e.strerror))
sys.exit(1)
# Now I am a daemon!
Expand Down Expand Up @@ -201,7 +201,7 @@ def run_in_background(name, args, **kwargs):
"""

if is_running(name):
log.info('Task `{}` is already running')
log.info('Task `{}` is already running'.format(name))
return

argcache = _arg_cache(name)
Expand All @@ -211,7 +211,9 @@ def run_in_background(name, args, **kwargs):
pickle.dump({'args': args, 'kwargs': kwargs}, file)

# Call this script
retcode = subprocess.call(['/usr/bin/python', __file__, name])
cmd = ['/usr/bin/python', __file__, name]
log.debug('Calling {!r} ...'.format(cmd))
retcode = subprocess.call(cmd)
if retcode: # pragma: no cover
log.error('Failed to call task in background')
else:
Expand Down
227 changes: 134 additions & 93 deletions workflow/workflow.py
Original file line number Diff line number Diff line change
Expand Up @@ -625,6 +625,7 @@ def __init__(self, default_settings=None, input_encoding='utf-8',
self._info_loaded = False
self._logger = None
self._items = []
self._search_pattern_cache = {}
if libraries:
sys.path = libraries + sys.path

Expand Down Expand Up @@ -1074,114 +1075,41 @@ def filter(self, query, items, key=lambda x: x, ascending=False,
"""

results = {}
query = query.lower()
queryset = set(query)
# Remove preceding/trailing spaces
query = query.strip()

# Use user override if there is one
fold_diacritics = self.settings.get('__workflows_diacritic_folding',
fold_diacritics)

if not isascii(query):
fold_diacritics = False

# Build pattern: include all characters
pattern = []
for c in query:
# pattern.append('[^{0}]*{0}'.format(re.escape(c)))
pattern.append('.*?{0}'.format(re.escape(c)))
pattern = ''.join(pattern)
search = re.compile(pattern, re.IGNORECASE).search
# print('filter: searching %d items' % len(items))
results = {}

for i, item in enumerate(items):
rule = None
skip = False
score = 0
value = key(item)

if fold_diacritics:
value = self.fold_to_ascii(value)

# pre-filter any items that do not contain all characters
# of ``query`` to save on running several more expensive tests
if not queryset <= set(value.lower()):
words = [s.strip() for s in query.split(' ')]
value = key(item).strip()
if value == '':
continue
for word in words:
if word == '':
continue
s, r = self._filter_item(value, word, match_on,
fold_diacritics)

if not s: # Skip items that don't match part of the query
skip = True
score += s

# item starts with query
if (match_on & MATCH_STARTSWITH and
value.lower().startswith(query)):
score = 100.0 - (len(value) / len(query))
rule = MATCH_STARTSWITH

if not score and match_on & MATCH_CAPITALS:
# query matches capitalised letters in item,
# e.g. of = OmniFocus
initials = ''.join([c for c in value if c in INITIALS])
if initials.lower().startswith(query):
score = 100.0 - (len(initials) / len(query))
rule = MATCH_CAPITALS

if not score:
if (match_on & MATCH_ATOM or
match_on & MATCH_INITIALS_CONTAIN or
match_on & MATCH_INITIALS_STARTSWITH):
# split the item into "atoms", i.e. words separated by
# spaces or other non-word characters
atoms = [s.lower() for s in split_on_delimiters(value)]
# print('atoms : %s --> %s' % (value, atoms))
# initials of the atoms
initials = ''.join([s[0] for s in atoms if s])

if match_on & MATCH_ATOM:
# is `query` one of the atoms in item?
# similar to substring, but scores more highly, as it's
# a word within the item
if query in atoms:
score = 100.0 - (len(value) / len(query))
rule = MATCH_ATOM

if not score:
# `query` matches start (or all) of the initials of the
# atoms, e.g. ``himym`` matches "How I Met Your Mother"
# *and* "how i met your mother" (the ``capitals`` rule only
# matches the former)
if (match_on & MATCH_INITIALS_STARTSWITH and
initials.startswith(query)):
score = 100.0 - (len(initials) / len(query))
rule = MATCH_INITIALS_STARTSWITH

# `query` is a substring of initials, e.g. ``doh`` matches
# "The Dukes of Hazzard"
elif (match_on & MATCH_INITIALS_CONTAIN and
query in initials):
score = 95.0 - (len(initials) / len(query))
rule = MATCH_INITIALS_CONTAIN

if not score:
# `query` is a substring of item
if match_on & MATCH_SUBSTRING and query in value.lower():
score = 90.0 - (len(value) / len(query))
rule = MATCH_SUBSTRING

if not score:
# finally, assign a score based on how close together the
# characters in `query` are in item.
if match_on & MATCH_ALLCHARS:
match = search(value)
if match:
score = 100.0 / ((1 + match.start()) *
(match.end() - match.start() + 1))
rule = MATCH_ALLCHARS

if min_score and score < min_score:
if skip:
continue

if score > 0:
if score:
# use "reversed" `score` (i.e. highest becomes lowest) and
# `value` as sort key. This means items with the same score
# will be sorted in alphabetical not reverse alphabetical order
results[(100.0 / score, value.lower(), i)] = (item, score,
rule)
results[(100.0 / score, value.lower(), score)] = (item, score,
r)

# sort on keys, then discard the keys
keys = sorted(results.keys(), reverse=ascending)
Expand All @@ -1190,12 +1118,125 @@ def filter(self, query, items, key=lambda x: x, ascending=False,
if max_results and len(results) > max_results:
results = results[:max_results]

if min_score:
results = [r for r in results if r[1] > min_score]

# return list of ``(item, score, rule)``
if include_score:
return results
# just return list of items
return [t[0] for t in results]

def _filter_item(self, value, query, match_on, fold_diacritics):
"""Filter ``value`` against ``query`` using rules ``match_on``
:returns: ``(score, rule)``
"""

query = query.lower()
queryset = set(query)

if not isascii(query):
fold_diacritics = False

rule = None
score = 0

if fold_diacritics:
value = self.fold_to_ascii(value)

# pre-filter any items that do not contain all characters
# of ``query`` to save on running several more expensive tests
if not queryset <= set(value.lower()):
return (0, None)

# item starts with query
if (match_on & MATCH_STARTSWITH and
value.lower().startswith(query)):
score = 100.0 - (len(value) / len(query))
rule = MATCH_STARTSWITH

if not score and match_on & MATCH_CAPITALS:
# query matches capitalised letters in item,
# e.g. of = OmniFocus
initials = ''.join([c for c in value if c in INITIALS])
if initials.lower().startswith(query):
score = 100.0 - (len(initials) / len(query))
rule = MATCH_CAPITALS

if not score:
if (match_on & MATCH_ATOM or
match_on & MATCH_INITIALS_CONTAIN or
match_on & MATCH_INITIALS_STARTSWITH):
# split the item into "atoms", i.e. words separated by
# spaces or other non-word characters
atoms = [s.lower() for s in split_on_delimiters(value)]
# print('atoms : %s --> %s' % (value, atoms))
# initials of the atoms
initials = ''.join([s[0] for s in atoms if s])

if match_on & MATCH_ATOM:
# is `query` one of the atoms in item?
# similar to substring, but scores more highly, as it's
# a word within the item
if query in atoms:
score = 100.0 - (len(value) / len(query))
rule = MATCH_ATOM

if not score:
# `query` matches start (or all) of the initials of the
# atoms, e.g. ``himym`` matches "How I Met Your Mother"
# *and* "how i met your mother" (the ``capitals`` rule only
# matches the former)
if (match_on & MATCH_INITIALS_STARTSWITH and
initials.startswith(query)):
score = 100.0 - (len(initials) / len(query))
rule = MATCH_INITIALS_STARTSWITH

# `query` is a substring of initials, e.g. ``doh`` matches
# "The Dukes of Hazzard"
elif (match_on & MATCH_INITIALS_CONTAIN and
query in initials):
score = 95.0 - (len(initials) / len(query))
rule = MATCH_INITIALS_CONTAIN

if not score:
# `query` is a substring of item
if match_on & MATCH_SUBSTRING and query in value.lower():
score = 90.0 - (len(value) / len(query))
rule = MATCH_SUBSTRING

if not score:
# finally, assign a score based on how close together the
# characters in `query` are in item.
if match_on & MATCH_ALLCHARS:
search = self._search_for_query(query)
match = search(value)
if match:
score = 100.0 / ((1 + match.start()) *
(match.end() - match.start() + 1))
rule = MATCH_ALLCHARS

if score > 0:
return (score, rule)
return (0, None)

def _search_for_query(self, query):
if query in self._search_pattern_cache:
return self._search_pattern_cache[query]

# Build pattern: include all characters
pattern = []
for c in query:
# pattern.append('[^{0}]*{0}'.format(re.escape(c)))
pattern.append('.*?{0}'.format(re.escape(c)))
pattern = ''.join(pattern)
search = re.compile(pattern, re.IGNORECASE).search

self._search_pattern_cache[query] = search
return search

def run(self, func):
"""Call `func` to run your workflow
Expand Down

0 comments on commit 1b52ea7

Please sign in to comment.