diff --git a/lem.asd b/lem.asd index 09702b818..f90c7e3d0 100644 --- a/lem.asd +++ b/lem.asd @@ -30,6 +30,7 @@ "log4cl" "split-sequence" "str" + "mk-string-metrics" ;; fuzzy-match algorithm. "dexador" "cl-mustache" ;; "lem-encodings" @@ -129,6 +130,7 @@ (:file "input") (:file "overlay") (:file "streams") + (:file "fuzzy-match") (:file "completion") (:file "typeout") (:file "cursors") diff --git a/src/completion.lisp b/src/completion.lisp index 1be0f0504..1ad7bb838 100644 --- a/src/completion.lisp +++ b/src/completion.lisp @@ -28,28 +28,37 @@ :while pos)))) (defun completion (name elements &key (test #'search) separator key rank) - "Perform completion on ELEMENTS matching NAME. Returns matching elements, + "Perform completion on ELEMENTS matching NAME. Returns matching elements, optionally sorted by RANK function." - (labels ((apply-key (elt) (if key (funcall key elt) elt)) - (test-with-separator (elt) - (let* ((elt (apply-key elt)) - (parts1 (explode-string name separator)) - (parts2 (explode-string elt separator))) - (and (<= (length parts1) (length parts2)) - (loop :for p1 :in parts1 - :for p2 :in parts2 - :always (funcall test p1 p2))))) - (test-without-separator (elt) - (funcall test name (apply-key elt)))) - (let ((filtered-elements - (remove-if-not (if separator - #'test-with-separator - #'test-without-separator) - elements))) - (if rank - (sort filtered-elements #'< :key (lambda (elt) (funcall rank name (apply-key elt)))) - filtered-elements)))) + (declare (ignorable rank key test separator)) + (assert (stringp name)) + (assert (listp elements)) + ;; (assert (stringp (first elements))) + ;; (labels ((apply-key (elt) (if key (funcall key elt) elt)) + ;; (test-with-separator (elt) + ;; (let* ((elt (apply-key elt)) + ;; (parts1 (explode-string name separator)) + ;; (parts2 (explode-string elt separator))) + ;; (and (<= (length parts1) (length parts2)) + ;; (loop :for p1 :in parts1 + ;; :for p2 :in parts2 + ;; :always (funcall test p1 p2))))) + ;; (test-without-separator (elt) + ;; (funcall test name (apply-key elt)))) + ;; (let ((filtered-elements + ;; (remove-if-not (if separator + ;; #'test-with-separator + ;; #'test-without-separator) + ;; elements))) + ;; (if rank + ;; (sort filtered-elements #'< :key (lambda (elt) (funcall rank name (apply-key elt)))) + ;; filtered-elements))) + + ;; devel + (lem/fuzzy-match:fuzzy-match name elements :key key :threshold 0.1) + + ) (defun string-completion-rank (name elt) (cond ; Exact match diff --git a/src/ext/prompt-window.lisp b/src/ext/prompt-window.lisp index 7e4ed6f41..1f20bf187 100644 --- a/src/ext/prompt-window.lisp +++ b/src/ext/prompt-window.lisp @@ -490,13 +490,9 @@ command)))) (filter-items (items) - (if (find #\- string) - (completion-hyphen string - items - :key #'lem/completion-mode:completion-item-label) - (completion string + (completion string items - :key #'lem/completion-mode:completion-item-label)))) + :key #'lem/completion-mode:completion-item-label))) (let* ((all-items (collect-items (sort (all-command-names) #'string<))) (candidate-items (collect-items candidates)) @@ -505,7 +501,22 @@ :test #'equal :key #'lem/completion-mode:completion-item-label :from-end t))) - (filter-items items)))) + ;; completion-items: Return issue + ;; (filter-items items) + + ;; only strings: ok, but not taking on "proj ws" grrr + ;; (completion string + ;; (subseq ;; TODO vince ONGOING + ;; (remove-duplicates + ;; (append candidates (all-command-names)) + ;; :from-end t :test #'equal) + ;; 0 200) + ;; ) + + ;; same as above but with completion-item objects: display issue after Return. + (completion string items :key #'lem/completion-mode:completion-item-label) + + ))) (setf *prompt-file-completion-function* 'prompt-file-completion) (setf *prompt-buffer-completion-function* 'prompt-buffer-completion) diff --git a/src/fuzzy-match.lisp b/src/fuzzy-match.lisp new file mode 100644 index 000000000..d12adb167 --- /dev/null +++ b/src/fuzzy-match.lisp @@ -0,0 +1,206 @@ +;;; Imported fuzzy-match. + +(defpackage :lem/fuzzy-match + (:use :cl) + (:export :fuzzy-match + :*threshold*) + (:documentation "fuzzy-match a list of strings or a list of objects from an input string. Ignore dashes and spaces, give priority to objects starting with the search string.")) + +(in-package :lem/fuzzy-match) + +(defvar *threshold* 0.1 + ;; devel: Learn good value. + "The score threshold below which candidates are discarded.") + +(defparameter *debug* nil + "If t, print scores.") + +;; yep we might use structures or objects or dicts, but so be it. +(defun item (plist) + (getf plist :item)) + +(defun input (plist) + (getf plist :input)) + +(defun score (plist) + (getf plist :score)) + +(defun substring-norm (substrings string &key (substring-length 2)) + "Return the norm of SUBSTRINGS with regard to STRING. +The norm is closer to 1 if +- substrings start near the beginning of STRING; +- substrings length are closer to the length of STRING. + +Only substrings of SUBSTRING-LENGTH characters or more are considered." + ;; TODO: Remove duplicates in SUBSTRINGS? Repeats could mean we insist more on it. + (let ((position-factor 1.0) + (length-factor 1.0) + (long-substrings (remove-if (lambda (s) (> substring-length (length s))) + substrings))) + (if long-substrings + (/ (apply #'+ + (mapcar (lambda (s) + (let ((position (search s string))) + (if (not position) + 0 + (/ (+ + (* position-factor + (/ 1 + ;; We use the sqrt to slow down the + ;; decrease rate, we want the a + ;; position of 10-15 still be >0.1. + (sqrt (1+ position)))) + (* length-factor + (/ (min (length s) (length string)) + (length string)))) + (+ position-factor length-factor))))) + long-substrings)) + (length long-substrings)) + 0))) + +(defun to-unicode (input) + "Convert INPUT to (simple-array character) type." + (if (typep input 'base-string) + (coerce input `(simple-array character (,(length input)))) + input)) + +;; IDEA: Make score functions customizable, e.g. for global history. + +(defun score-candidate (input candidate) + "Return a CANDIDATE's score for INPUT. +A higher score means the candidate comes first." + ;; The Jaccard metric seems to provide much better results than, say, + ;; Damerau-Levensthein but it's much slower. + (assert (stringp candidate)) + (+ (* 1.0 (mk-string-metrics:norm-damerau-levenshtein candidate input)) + (* 1.0 (substring-norm (str:split " " input) candidate)))) + +(defun score-sort-candidates (input pairs) + "Score and sort PAIRS, the pair closest to INPUT in the levenshtein distance comes first. +PAIRS is a list of (search-string real-item)." + ;; WARNING: mk-string-metrics works on low-level arrays and might not get + ;; the text encoding right. We need to make sure the candidates and the + ;; input are of the same encoding. + (let ((input (to-unicode input)) + (candidates (mapcar (lambda (elt) + (list :input (to-unicode (input elt)) + :item (item elt))) + pairs))) + (flet ((score-pair (pair) + (list :score (score-candidate input (input pair)) + :input (input pair) + :item (item pair))) + (sort-candidate (triplet1 triplet2) + (> (getf triplet1 :score) + (getf triplet2 :score)))) + (stable-sort (mapcar #'score-pair candidates) + #'sort-candidate)))) + +;; devel: add type declarations. +(defun find-exactly-matching-substrings (input candidates &key (substring-length 2)) + "Return the list of input substrings that match at least one candidate. +The substrings must be SUBSTRING-LENGTH characters long or more." + (assert (stringp input)) + (assert (stringp (first candidates))) + (let ((input-strings (delete-if (lambda (s) (< (length s) substring-length)) + (str:split " " input :omit-nulls t)))) + (when input-strings + (delete-duplicates + (loop for candidate in candidates + append (remove-if + (lambda (i) + (not (search i candidate))) + input-strings)) + :test #'string=)))) + +(defun keep-exact-matches-in-candidates (input pairs) + "Filter out non-exact matches from candidates. +If any input substring (split by whitespace) matches exactly (but not necessarily a whole word), +then all candidates that are not exactly matched by at least one substring are removed." + (let ((exactly-matching-substrings (find-exactly-matching-substrings + input + (mapcar #'input pairs)))) + (if exactly-matching-substrings + (remove-if (lambda (item) + (not (loop for i in exactly-matching-substrings + always (search i (input item))))) + pairs) + pairs))) + +(declaim (inline filter-by-threshold)) +(defun filter-by-threshold (items &key (threshold *threshold*)) + "Keep items (plist with :string and :score keys) only if score is >= than THRESHOLD. + If THRESHOLD isn't a number, return all items." + (cond + ((numberp threshold) + (loop :for item :in items + :if (>= (score item) threshold) + :collect item)) + (t + items))) + +(defun fuzzy-match (input candidates &key (key #'identity) (threshold *threshold*)) + "From the user input and a list of candidates, return a filtered list of +candidates that have all the input words in them, and sort this list to have the +'most relevant' first. + +KEY is a function to get the candidates' string representation. It will be funcall'ed. + +THRESHOLD, a float between 0 and 1, is the minimal score for a matching result. If a match scores below it, it is discarded. + +The match is case-sensitive if INPUT contains at least one uppercase character." + ;; To sort by the display value, we store all the candidates in a + ;; (display-value real-value) list or pairs. + (unless key + ;; when callers use :key nil + (setf key #'identity)) + (let ((pairs (mapcar (lambda (elt) + (list :input (funcall key elt) + :item elt)) + candidates))) + + ;; slight cleanup. + (setf input (str:replace-all " " " " input)) ;; unbreakable whitespace + + ;; (setf input (str:replace-all " " "-" input)) + ;; (setf input (ppcre:regex-replace-all "-+" input "-" )) + + + ;; Prefer to work with all downcased candidate strings, + ;; except if the input contains one uppercase character. + ;; DEVEL: have a parameter? + (when (str:downcasep input) + (setf pairs + (mapcar (lambda (elt) + (setf (getf elt :input) + (string-downcase (getf elt :input))) + elt) + pairs))) + + (if (str:emptyp input) + candidates + (let* ((pairs (keep-exact-matches-in-candidates input pairs)) + ;; score and sort + (score-items (score-sort-candidates input pairs)) + ;; filter low quality results + ;; devel: have adaptive threshold on the result length? + (score-items (filter-by-threshold score-items :threshold threshold))) + (when *debug* + (print (subseq score-items 0 10))) + (mapcar #'item score-items))))) + +#+(or) +;; test with objects, other than list of strings. +(progn + (defstruct candidate + (string) + (stuff)) + + (defparameter *objects* + (list (make-candidate :string "project-switch") + (make-candidate :string "bananas"))) + + (equalp (first *objects*) + (first + (fuzzy-match "proj ws" *objects* :key #'candidate-string))) + )