Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
18 changes: 18 additions & 0 deletions api.lisp
Original file line number Diff line number Diff line change
Expand Up @@ -446,6 +446,24 @@ the possible return values of CODE-BLOCK."
(:method (chars)
(canonical-composition (normalization-form-k-d chars))))

(defgeneric identifier-case-fold (c)
(:documentation "Return case folding for a character or list of characters that represent an identifier (NFKC_Casefold).")
(:method ((char character))
(identifier-case-fold (char-code char)))
(:method ((code-point integer))
(let ((rule (gethash code-point *nfkc-casefold-mappings*)))
(if rule (copy-list (first rule)) (list code-point))))
(:method ((chars list))
(normalization-form-c
(loop for c in chars
nconc (identifier-case-fold c)))))

(defun identifier-case-fold-mapping (c &key want-code-point-p)
(let ((mapped (identifier-case-fold c)))
(if want-code-point-p
(mapcar #'code-char mapped)
mapped)))

(defun binary-properties ()
"Returns a sorted list of all binary properties known to CL-UNICODE.
These are the allowed second arguments \(modulo canonicalization) to
Expand Down
9,829 changes: 9,829 additions & 0 deletions build/data/DerivedNormalizationProps.txt

Large diffs are not rendered by default.

5 changes: 5 additions & 0 deletions build/dump.lisp
Original file line number Diff line number Diff line change
Expand Up @@ -175,6 +175,11 @@ hash-tables.lisp using DUMP-HASH-TABLE."
(dump-hash-table '*jamo-short-names* out)
(dump-hash-table '*property-aliases* out)
(dump-hash-table '*composition-mappings* out)
(dump-hash-table '*nfc-quick-check-mappings* out)
(dump-hash-table '*nfkc-quick-check-mappings* out)
(dump-hash-table '*nfkc-casefold-mappings* out)
(dump-hash-table '*nfd-quick-check-mappings* out)
(dump-hash-table '*nfkd-quick-check-mappings* out)
;; finally add code which adds the computed Hangul syllable names
;; at load time
(print '(add-hangul-names) out)))
Expand Down
57 changes: 47 additions & 10 deletions build/read.lisp
Original file line number Diff line number Diff line change
Expand Up @@ -201,6 +201,17 @@ script to the corresponding entries in *CHAR-DATABASE*."
when char-info
do (setf (decomposition-mapping* char-info) mapping)))

(defun add-binary-property (code-point-range property)
(pushnew property *binary-properties* :test #'eq)
(with-code-point-range (code-point code-point-range)
(let ((char-info (aref *char-database* code-point)))
(unless char-info
;; this file actually contains some information for
;; unassigned (but reserved) code points, like e.g. #xfff0
(setf char-info (make-instance 'char-info :code-point code-point)
(aref *char-database* code-point) char-info))
(push property (binary-props* char-info)))))

(defun read-binary-properties ()
"Parses the file \"PropList.txt\" and adds information about binary
properties to the corresponding entries in *CHAR-DATABASE*."
Expand All @@ -211,15 +222,7 @@ properties to the corresponding entries in *CHAR-DATABASE*."
;; point not being mentioned in UnicodeData.txt - see also the
;; initform for GENERAL-CATEGORY in the definition of CHAR-INFO
(unless (eq property '#.(property-symbol "NoncharacterCodePoint"))
(pushnew property *binary-properties* :test #'eq)
(with-code-point-range (code-point code-point-range)
(let ((char-info (aref *char-database* code-point)))
(unless char-info
;; this file actually contains some information for
;; unassigned (but reserved) code points, like e.g. #xfff0
(setf char-info (make-instance 'char-info :code-point code-point)
(aref *char-database* code-point) char-info))
(push property (binary-props* char-info)))))))
(add-binary-property code-point-range property))))

(defun read-derived-age ()
"Parses the file \"DerivedAge.txt\" and adds information about the
Expand All @@ -239,6 +242,39 @@ mirroring glyphs to the corresponding entries in *CHAR-DATABASE*."
(when char-info
(setf (bidi-mirroring-glyph* char-info) mirroring-glyph))))))

(defun read-normalization-props ()
"Parses the file \"DerivedNormalizationProps.txt\" and adds Quick_Check binary properties
to the corresponding entries in *CHAR-DATABASE*."
(dolist
(hash (list *nfd-quick-check-mappings* *nfkd-quick-check-mappings* *nfd-quick-check-mappings*
*nfd-quick-check-mappings* *nfkc-casefold-mappings*))
(clrhash hash))
(with-unicode-file ("DerivedNormalizationProps.txt" contents)
(destructuring-bind (code-range name &rest other) contents
(let ((property (parse-value name 'symbol nil))
(range (parse-code-point code-range)))
(case property
((#.(property-symbol "FullCompositionExclusion")
#.(property-symbol "ChangesWhenNFKCCasefolded"))
(add-binary-property range property))
(#.(property-symbol "NFDQC")
(with-code-point-range (code-point range)
(pushnew (property-symbol (first other)) (gethash code-point *nfd-quick-check-mappings*))))
(#.(property-symbol "NFKDQC")
(with-code-point-range (code-point range)
(pushnew (property-symbol (first other)) (gethash code-point *nfkd-quick-check-mappings*))))
(#.(property-symbol "NFCQC")
(with-code-point-range (code-point range)
(pushnew (property-symbol (first other)) (gethash code-point *nfd-quick-check-mappings*))))
(#.(property-symbol "NFKCQC")
(with-code-point-range (code-point range)
(pushnew (property-symbol (first other)) (gethash code-point *nfd-quick-check-mappings*))))
(#.(property-symbol "NFKCCF")
(with-code-point-range (code-point range)
(pushnew (parse-value (first other) 'hex-list nil) (gethash code-point *nfkc-casefold-mappings*)))))))))



(defun read-jamo ()
"Parses the file \"Jamo.txt\" and stores information about Jamo
short names in the *JAMO-SHORT-NAMES* hash table. This information is
Expand Down Expand Up @@ -346,7 +382,8 @@ source code files for CL-UNICODE."
(read-special-casing)
(read-case-folding-mapping)
(set-default-bidi-classes)
(add-hangul-decomposition))
(add-hangul-decomposition)
(read-normalization-props))

(defun build-name-mappings ()
"Initializes and fills the hash tables which map code points to
Expand Down
2 changes: 1 addition & 1 deletion build/util.lisp
Original file line number Diff line number Diff line change
Expand Up @@ -49,7 +49,7 @@ initialized with NILs."
certain values which might otherwise not be seen when the Unicode
files are parsed."
(clrhash *canonical-names*)
(dolist (name '("Cn" "AL" "R" "L" "Decimal" "Digit" "Numeric" "BidiMirrored" "NoncharacterCodePoint"))
(dolist (name '("Cn" "AL" "R" "L" "Decimal" "Digit" "Numeric" "BidiMirrored" "NoncharacterCodePoint" "M"))
(register-property-symbol name)))

(defun extract-variables (bindings)
Expand Down
4 changes: 2 additions & 2 deletions derived.lisp
Original file line number Diff line number Diff line change
Expand Up @@ -57,9 +57,9 @@
("Z" "Zs" "Zl" "Zp")
("C" "Cc" "Cf" "Cs" "Co" "Cn")
("Math" "Sm" "OtherMath")
("Alphabetic" "L" "Nl" "OtherAlphabetic")
("Lowercase" "Ll" "OtherLowercase")
("Uppercase" "Lu" "OtherUppercase")
("Alphabetic" "Lowercase" "Uppercase" "Lt" "Lm" "Lo" "Nl" "OtherAlphabetic")
("Cased" "Lowercase" "Uppercase" "Lt")
("CaseIgnorable" "Mn" "Me" "Cf" "Lm" "Sk"
,(lambda (c) (find (word-break c)
Expand All @@ -68,7 +68,7 @@
"MidNumLet")
:test 'equal)))
("GraphemeExtend" "Me" "Mn" "OtherGraphemeExtend")
("GraphemeBase" ("C" "Zl" "Zp" "GraphemeExtend"))
("GraphemeBase" ("Cc" "Cf" "Cs" "Co" "Cn" "Zl" "Zp" "GraphemeExtend"))
("IDStart" "L" "Nl" "OtherIDStart" ("PatternSyntax" "PatternWhiteSpace"))
("IDContinue" "IDStart" "Mn" "Mc" "Nd" "Pc" "OtherIDContinue" ("PatternSyntax" "PatternWhiteSpace"))
("XIDStart" "IDStart" (,@+xid-difference+ #xe33 #xeb3 (#xff9e . #xff9f)))
Expand Down
4 changes: 3 additions & 1 deletion packages.lisp
Original file line number Diff line number Diff line change
Expand Up @@ -78,7 +78,9 @@
:normalization-form-c
:normalization-form-d
:normalization-form-k-c
:normalization-form-k-d))
:normalization-form-k-d
:identifier-case-fold
:identifier-case-fold-mapping))

(defpackage :cl-unicode-names
(:use))
15 changes: 15 additions & 0 deletions specials.lisp
Original file line number Diff line number Diff line change
Expand Up @@ -68,6 +68,21 @@ corresponding character \(unless all of them are NIL).")
(defvar *compatibility-formatting-tags* nil
"A list of Character Decomposition compatibility formatting tags.")

(defvar *nfc-quick-check-mappings* (make-hash-table)
"A hash table that maps code points to NFC_Quick_Check properties.")

(defvar *nfkc-quick-check-mappings* (make-hash-table)
"A hash table that maps code points to NFKC_Quick_Check properties.")

(defvar *nfd-quick-check-mappings* (make-hash-table)
"A hash table that maps code points to NFD_Quick_Check properties.")

(defvar *nfkd-quick-check-mappings* (make-hash-table)
"A hash table that maps code points to NFKD_Quick_Check properties.")

(defvar *nfkc-casefold-mappings* (make-hash-table)
"A hash table that maps code points to NFKC_Casfold mapping rules.")

(defvar *scripts* nil
"A list of all property symbols which denote scripts.")

Expand Down