Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions lem.asd
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,7 @@
"log4cl"
"split-sequence"
"str"
"mk-string-metrics" ;; fuzzy-match algorithm.
"dexador"
"cl-mustache"
;; "lem-encodings"
Expand Down Expand Up @@ -129,6 +130,7 @@
(:file "input")
(:file "overlay")
(:file "streams")
(:file "fuzzy-match")
(:file "completion")
(:file "typeout")
(:file "cursors")
Expand Down
49 changes: 29 additions & 20 deletions src/completion.lisp
Original file line number Diff line number Diff line change
Expand Up @@ -28,28 +28,37 @@
:while pos))))

(defun completion (name elements &key (test #'search) separator key rank)
"Perform completion on ELEMENTS matching NAME. Returns matching elements,
"Perform completion on ELEMENTS matching NAME. Returns matching elements,
optionally sorted by RANK function."
(labels ((apply-key (elt) (if key (funcall key elt) elt))
(test-with-separator (elt)
(let* ((elt (apply-key elt))
(parts1 (explode-string name separator))
(parts2 (explode-string elt separator)))
(and (<= (length parts1) (length parts2))
(loop :for p1 :in parts1
:for p2 :in parts2
:always (funcall test p1 p2)))))
(test-without-separator (elt)
(funcall test name (apply-key elt))))
(let ((filtered-elements
(remove-if-not (if separator
#'test-with-separator
#'test-without-separator)
elements)))
(if rank
(sort filtered-elements #'< :key (lambda (elt) (funcall rank name (apply-key elt))))
filtered-elements))))
(declare (ignorable rank key test separator))
(assert (stringp name))
(assert (listp elements))
;; (assert (stringp (first elements)))

;; (labels ((apply-key (elt) (if key (funcall key elt) elt))
;; (test-with-separator (elt)
;; (let* ((elt (apply-key elt))
;; (parts1 (explode-string name separator))
;; (parts2 (explode-string elt separator)))
;; (and (<= (length parts1) (length parts2))
;; (loop :for p1 :in parts1
;; :for p2 :in parts2
;; :always (funcall test p1 p2)))))
;; (test-without-separator (elt)
;; (funcall test name (apply-key elt))))
;; (let ((filtered-elements
;; (remove-if-not (if separator
;; #'test-with-separator
;; #'test-without-separator)
;; elements)))
;; (if rank
;; (sort filtered-elements #'< :key (lambda (elt) (funcall rank name (apply-key elt))))
;; filtered-elements)))

;; devel
(lem/fuzzy-match:fuzzy-match name elements :key key :threshold 0.1)

)
(defun string-completion-rank (name elt)
(cond
; Exact match
Expand Down
25 changes: 18 additions & 7 deletions src/ext/prompt-window.lisp
Original file line number Diff line number Diff line change
Expand Up @@ -490,13 +490,9 @@
command))))

(filter-items (items)
(if (find #\- string)
(completion-hyphen string
items
:key #'lem/completion-mode:completion-item-label)
(completion string
(completion string
items
:key #'lem/completion-mode:completion-item-label))))
:key #'lem/completion-mode:completion-item-label)))

(let* ((all-items (collect-items (sort (all-command-names) #'string<)))
(candidate-items (collect-items candidates))
Expand All @@ -505,7 +501,22 @@
:test #'equal
:key #'lem/completion-mode:completion-item-label
:from-end t)))
(filter-items items))))
;; completion-items: Return issue
;; (filter-items items)

;; only strings: ok, but not taking on "proj ws" grrr
;; (completion string
;; (subseq ;; TODO vince ONGOING
;; (remove-duplicates
;; (append candidates (all-command-names))
;; :from-end t :test #'equal)
;; 0 200)
;; )

;; same as above but with completion-item objects: display issue after Return.
(completion string items :key #'lem/completion-mode:completion-item-label)

)))

(setf *prompt-file-completion-function* 'prompt-file-completion)
(setf *prompt-buffer-completion-function* 'prompt-buffer-completion)
Expand Down
206 changes: 206 additions & 0 deletions src/fuzzy-match.lisp
Original file line number Diff line number Diff line change
@@ -0,0 +1,206 @@
;;; Imported fuzzy-match.

(defpackage :lem/fuzzy-match
(:use :cl)
(:export :fuzzy-match
:*threshold*)
(:documentation "fuzzy-match a list of strings or a list of objects from an input string. Ignore dashes and spaces, give priority to objects starting with the search string."))

(in-package :lem/fuzzy-match)

(defvar *threshold* 0.1
;; devel: Learn good value.
"The score threshold below which candidates are discarded.")

(defparameter *debug* nil
"If t, print scores.")

;; yep we might use structures or objects or dicts, but so be it.
(defun item (plist)
(getf plist :item))

(defun input (plist)
(getf plist :input))

(defun score (plist)
(getf plist :score))

(defun substring-norm (substrings string &key (substring-length 2))
"Return the norm of SUBSTRINGS with regard to STRING.
The norm is closer to 1 if
- substrings start near the beginning of STRING;
- substrings length are closer to the length of STRING.

Only substrings of SUBSTRING-LENGTH characters or more are considered."
;; TODO: Remove duplicates in SUBSTRINGS? Repeats could mean we insist more on it.
(let ((position-factor 1.0)
(length-factor 1.0)
(long-substrings (remove-if (lambda (s) (> substring-length (length s)))
substrings)))
(if long-substrings
(/ (apply #'+
(mapcar (lambda (s)
(let ((position (search s string)))
(if (not position)
0
(/ (+
(* position-factor
(/ 1
;; We use the sqrt to slow down the
;; decrease rate, we want the a
;; position of 10-15 still be >0.1.
(sqrt (1+ position))))
(* length-factor
(/ (min (length s) (length string))
(length string))))
(+ position-factor length-factor)))))
long-substrings))
(length long-substrings))
0)))

(defun to-unicode (input)
"Convert INPUT to (simple-array character) type."
(if (typep input 'base-string)
(coerce input `(simple-array character (,(length input))))
input))

;; IDEA: Make score functions customizable, e.g. for global history.

(defun score-candidate (input candidate)
"Return a CANDIDATE's score for INPUT.
A higher score means the candidate comes first."
;; The Jaccard metric seems to provide much better results than, say,
;; Damerau-Levensthein but it's much slower.
(assert (stringp candidate))
(+ (* 1.0 (mk-string-metrics:norm-damerau-levenshtein candidate input))
(* 1.0 (substring-norm (str:split " " input) candidate))))

(defun score-sort-candidates (input pairs)
"Score and sort PAIRS, the pair closest to INPUT in the levenshtein distance comes first.
PAIRS is a list of (search-string real-item)."
;; WARNING: mk-string-metrics works on low-level arrays and might not get
;; the text encoding right. We need to make sure the candidates and the
;; input are of the same encoding.
(let ((input (to-unicode input))
(candidates (mapcar (lambda (elt)
(list :input (to-unicode (input elt))
:item (item elt)))
pairs)))
(flet ((score-pair (pair)
(list :score (score-candidate input (input pair))
:input (input pair)
:item (item pair)))
(sort-candidate (triplet1 triplet2)
(> (getf triplet1 :score)
(getf triplet2 :score))))
(stable-sort (mapcar #'score-pair candidates)
#'sort-candidate))))

;; devel: add type declarations.
(defun find-exactly-matching-substrings (input candidates &key (substring-length 2))
"Return the list of input substrings that match at least one candidate.
The substrings must be SUBSTRING-LENGTH characters long or more."
(assert (stringp input))
(assert (stringp (first candidates)))
(let ((input-strings (delete-if (lambda (s) (< (length s) substring-length))
(str:split " " input :omit-nulls t))))
(when input-strings
(delete-duplicates
(loop for candidate in candidates
append (remove-if
(lambda (i)
(not (search i candidate)))
input-strings))
:test #'string=))))

(defun keep-exact-matches-in-candidates (input pairs)
"Filter out non-exact matches from candidates.
If any input substring (split by whitespace) matches exactly (but not necessarily a whole word),
then all candidates that are not exactly matched by at least one substring are removed."
(let ((exactly-matching-substrings (find-exactly-matching-substrings
input
(mapcar #'input pairs))))
(if exactly-matching-substrings
(remove-if (lambda (item)
(not (loop for i in exactly-matching-substrings
always (search i (input item)))))
pairs)
pairs)))

(declaim (inline filter-by-threshold))
(defun filter-by-threshold (items &key (threshold *threshold*))
"Keep items (plist with :string and :score keys) only if score is >= than THRESHOLD.
If THRESHOLD isn't a number, return all items."
(cond
((numberp threshold)
(loop :for item :in items
:if (>= (score item) threshold)
:collect item))
(t
items)))

(defun fuzzy-match (input candidates &key (key #'identity) (threshold *threshold*))
"From the user input and a list of candidates, return a filtered list of
candidates that have all the input words in them, and sort this list to have the
'most relevant' first.

KEY is a function to get the candidates' string representation. It will be funcall'ed.

THRESHOLD, a float between 0 and 1, is the minimal score for a matching result. If a match scores below it, it is discarded.

The match is case-sensitive if INPUT contains at least one uppercase character."
;; To sort by the display value, we store all the candidates in a
;; (display-value real-value) list or pairs.
(unless key
;; when callers use :key nil
(setf key #'identity))
(let ((pairs (mapcar (lambda (elt)
(list :input (funcall key elt)
:item elt))
candidates)))

;; slight cleanup.
(setf input (str:replace-all " " " " input)) ;; unbreakable whitespace

;; (setf input (str:replace-all " " "-" input))
;; (setf input (ppcre:regex-replace-all "-+" input "-" ))


;; Prefer to work with all downcased candidate strings,
;; except if the input contains one uppercase character.
;; DEVEL: have a parameter?
(when (str:downcasep input)
(setf pairs
(mapcar (lambda (elt)
(setf (getf elt :input)
(string-downcase (getf elt :input)))
elt)
pairs)))

(if (str:emptyp input)
candidates
(let* ((pairs (keep-exact-matches-in-candidates input pairs))
;; score and sort
(score-items (score-sort-candidates input pairs))
;; filter low quality results
;; devel: have adaptive threshold on the result length?
(score-items (filter-by-threshold score-items :threshold threshold)))
(when *debug*
(print (subseq score-items 0 10)))
(mapcar #'item score-items)))))

#+(or)
;; test with objects, other than list of strings.
(progn
(defstruct candidate
(string)
(stuff))

(defparameter *objects*
(list (make-candidate :string "project-switch")
(make-candidate :string "bananas")))

(equalp (first *objects*)
(first
(fuzzy-match "proj ws" *objects* :key #'candidate-string)))
)
Loading