Skip to content

Commit fa7c5c8

Browse files
mpilgremMikolaj
authored andcommitted
Fix haskell#9507 Describe accurately acceptable package names
Also extinguishes false positive in typo detection. Also aims to make corresponding changes to buildinfo-reference-generator, including the `Cabal-described` package. Adds `alphanumNotDigit` and now specifies `alphanum` by inserting digits into `alphanumNotDigit`. The description of `alpha-num-not-digit` is added to the introduction to the section on 'Non-terminals' to accommodate the limitation of buildinfo-reference-generator that only a single 'formula' can follow a single paragraph of narrative for each non-terminal production.
1 parent e094726 commit fa7c5c8

File tree

9 files changed

+67
-30
lines changed

9 files changed

+67
-30
lines changed

.typos.toml

+5
Original file line numberDiff line numberDiff line change
@@ -1,2 +1,7 @@
11
[default]
22
extend-ignore-re = ["(?s)(#|//)\\s*spellchecker:off.*?\\n\\s*(#|//)\\s*spellchecker:on"]
3+
4+
[default.extend-words]
5+
# Extinguish false positive in cabal-package-description-file.rst. 'Nd' is a
6+
# Unicode category, not a misspelling of 'And'.
7+
nd = "nd"

Cabal-described/src/Distribution/Described.hs

+5-2
Original file line numberDiff line numberDiff line change
@@ -168,8 +168,8 @@ reUnqualComponent = RENamed "unqual-name" $
168168
-- currently the parser accepts "csAlphaNum `difference` "0123456789"
169169
-- which is larger set than CS.alpha
170170
--
171-
-- Hackage rejects non ANSI names, so it's not so relevant.
172-
<> RECharSet CS.alpha
171+
-- Hackage, however, rejects non ANSI names.
172+
<> RECharSet csAlphaNumNotDigit
173173
<> REMunch reEps (RECharSet csAlphaNum)
174174

175175
reDot :: GrammarRegex a
@@ -194,6 +194,9 @@ csAlpha = CS.alpha
194194
csAlphaNum :: CS.CharSet
195195
csAlphaNum = CS.alphanum
196196

197+
csAlphaNumNotDigit :: CS.CharSet
198+
csAlphaNumNotDigit = CS.alphanumNotDigit
199+
197200
csUpper :: CS.CharSet
198201
csUpper = CS.upper
199202

Cabal-described/src/Distribution/Utils/CharSet.hs

+10-3
Original file line numberDiff line numberDiff line change
@@ -27,16 +27,17 @@ module Distribution.Utils.CharSet (
2727
-- * Special lists
2828
alpha,
2929
alphanum,
30+
alphanumNotDigit,
3031
upper,
3132
) where
3233

33-
import Data.Char (chr, isAlpha, isAlphaNum, isUpper, ord)
34+
import Data.Char (chr, isAlpha, isAlphaNum, isDigit, isUpper, ord)
3435
import Data.List (foldl', sortBy)
3536
import Data.Monoid (Monoid (..))
3637
import Data.String (IsString (..))
3738
import Distribution.Compat.Semigroup (Semigroup (..))
3839
import Prelude
39-
(Bool (..), Bounded (..), Char, Enum (..), Eq (..), Int, Maybe (..), Num (..), Ord (..), Show (..), String, concatMap, flip, fst, otherwise, showParen,
40+
(Bool (..), Bounded (..), Char, Enum (..), Eq (..), Int, Maybe (..), Num (..), Ord (..), Show (..), String, (&&), concatMap, flip, fst, not, otherwise, showParen,
4041
showString, uncurry, ($), (.))
4142

4243
#if MIN_VERSION_containers(0,5,0)
@@ -229,10 +230,16 @@ alpha :: CharSet
229230
alpha = foldl' (flip insert) empty [ c | c <- [ minBound .. maxBound ], isAlpha c ]
230231
{-# NOINLINE alpha #-}
231232

233+
-- | Note: this set varies depending on @base@ version.
234+
--
235+
alphanumNotDigit :: CharSet
236+
alphanumNotDigit = foldl' (flip insert) empty [ c | c <- [ minBound .. maxBound ], isAlphaNum c && not (isDigit c) ]
237+
{-# NOINLINE alphanumNotDigit #-}
238+
232239
-- | Note: this set varies depending on @base@ version.
233240
--
234241
alphanum :: CharSet
235-
alphanum = foldl' (flip insert) empty [ c | c <- [ minBound .. maxBound ], isAlphaNum c ]
242+
alphanum = foldl' (flip insert) alphanumNotDigit ['0' .. '9' ]
236243
{-# NOINLINE alphanum #-}
237244

238245
-- | Note: this set varies depending on @base@ version.

Cabal-described/src/Distribution/Utils/GrammarRegex.hs

+4-3
Original file line numberDiff line numberDiff line change
@@ -194,9 +194,10 @@ mathtt d = "\\mathtt{" <<>> d <<>> "}"
194194

195195
charsetDoc :: CS.CharSet -> PP.Doc
196196
charsetDoc acs
197-
| acs == CS.alpha = terminalDoc "alpha"
198-
| acs == CS.alphanum = terminalDoc "alpha-num"
199-
| acs == CS.upper = terminalDoc "upper"
197+
| acs == CS.alpha = terminalDoc "alpha"
198+
| acs == CS.alphanum = terminalDoc "alpha-num"
199+
| acs == CS.alphanumNotDigit = terminalDoc "alpha-num-not-digit"
200+
| acs == CS.upper = terminalDoc "upper"
200201
charsetDoc acs = case CS.toIntervalList acs of
201202
[] -> "\\emptyset"
202203
[(x,y)] | x == y -> inquotes $ mathtt $ charDoc x

buildinfo-reference-generator/src/Main.hs

+2-2
Original file line numberDiff line numberDiff line change
@@ -52,8 +52,8 @@ main = do
5252
"String as in Haskell; it's recommended to avoid using Haskell-specific escapes."
5353
, zproduction "unqual-name" reUnqualComponent $ unwords
5454
[ "Unqualified component names are used for package names, component names etc. but not flag names."
55-
, "Unqualified component name consist of components separated by dash, each component is non-empty alphanumeric string, with at least one alphabetic character."
56-
, "In other words, component may not look like a number."
55+
, "An unqualified component name consists of components separated by a hyphen, each component is a non-empty alphanumeric string, with at least one character that is not the digits ``0`` to ``9``."
56+
, "In other words, a component may not look like a number."
5757
]
5858

5959
, zproduction "module-name" (describe (Proxy :: Proxy ModuleName))

buildinfo-reference-generator/template.zinza

+7-4
Original file line numberDiff line numberDiff line change
@@ -12,7 +12,7 @@ Field syntax is described as they are in the latest cabal file format version.
1212

1313
.. math::
1414

15-
\mathord{"}\mathtt{example}\mathord{"}
15+
\mathord{``}\mathtt{example}\mathord{"}
1616

1717
* non-terminals are type set in italic:
1818

@@ -25,13 +25,13 @@ Field syntax is described as they are in the latest cabal file format version.
2525

2626
.. math::
2727

28-
[ \mathord{"}\mathtt{1}\mathord{"} \cdots \mathord{"}\mathtt{9}\mathord{"} ]
28+
[ \mathord{``}\mathtt{1}\mathord{"} \cdots \mathord{``}\mathtt{9}\mathord{"} ]
2929

3030
Character set complements have :math:`c` superscript:
3131

3232
.. math::
3333

34-
[ \mathord{"}\mathtt{1}\mathord{"} \cdots \mathord{"}\mathtt{9}\mathord{"} ]^c
34+
[ \mathord{``}\mathtt{1}\mathord{"} \cdots \mathord{``}\mathtt{9}\mathord{"} ]^c
3535

3636
* repetition is type set using regular expression inspired notation.
3737
Superscripts tell how many time to repeat:
@@ -125,7 +125,10 @@ Optional comma separated
125125
Non-terminals
126126
-------------
127127

128-
In the syntax definitions below the following non-terminal symbols are used:
128+
In the syntax definitions below the following non-terminal symbols are used. In addition:
129+
130+
.. math::
131+
{\mathop{\mathit{alpha\text{-}num\text{-}not\text{-}digit}}} = {\mathop{\mathit{alpha\text{-}num}}}\cap{[ \mathord{``}\mathtt{0}\mathord{"} \cdots \mathord{``}\mathtt{9}\mathord{"} ]^c}
129132

130133
{% for production in productions %}
131134
{{ production.name }}

doc/buildinfo-fields-reference.rst

+9-6
Original file line numberDiff line numberDiff line change
@@ -12,7 +12,7 @@ Field syntax is described as they are in the latest cabal file format version.
1212

1313
.. math::
1414
15-
\mathord{"}\mathtt{example}\mathord{"}
15+
\mathord{``}\mathtt{example}\mathord{"}
1616
1717
* non-terminals are type set in italic:
1818

@@ -25,13 +25,13 @@ Field syntax is described as they are in the latest cabal file format version.
2525

2626
.. math::
2727
28-
[ \mathord{"}\mathtt{1}\mathord{"} \cdots \mathord{"}\mathtt{9}\mathord{"} ]
28+
[ \mathord{``}\mathtt{1}\mathord{"} \cdots \mathord{``}\mathtt{9}\mathord{"} ]
2929
3030
Character set complements have :math:`c` superscript:
3131

3232
.. math::
3333
34-
[ \mathord{"}\mathtt{1}\mathord{"} \cdots \mathord{"}\mathtt{9}\mathord{"} ]^c
34+
[ \mathord{``}\mathtt{1}\mathord{"} \cdots \mathord{``}\mathtt{9}\mathord{"} ]^c
3535
3636
* repetition is type set using regular expression inspired notation.
3737
Superscripts tell how many time to repeat:
@@ -125,7 +125,10 @@ Optional comma separated
125125
Non-terminals
126126
-------------
127127

128-
In the syntax definitions below the following non-terminal symbols are used:
128+
In the syntax definitions below the following non-terminal symbols are used. In addition:
129+
130+
.. math::
131+
{\mathop{\mathit{alpha\text{-}num\text{-}not\text{-}digit}}} = {\mathop{\mathit{alpha\text{-}num}}}\cap{[ \mathord{``}\mathtt{0}\mathord{"} \cdots \mathord{``}\mathtt{9}\mathord{"} ]^c}
129132
130133
hs-string
131134
String as in Haskell; it's recommended to avoid using Haskell-specific escapes.
@@ -134,10 +137,10 @@ hs-string
134137
\mathop{\mathord{``}\mathtt{\text{"}}\mathord{"}}{\left\{ {[\mathop{\mathord{``}\mathtt{\text{"}}\mathord{"}}\mathop{\mathord{``}\mathtt{\text{\\}}\mathord{"}}]^c}\mid\left\{ \begin{gathered}\mathop{\mathord{``}\mathtt{\text{\\}\text{&}}\mathord{"}}\\\mathop{\mathord{``}\mathtt{\text{\\}\text{\\}}\mathord{"}}\\\left\{ \mathop{\mathord{``}\mathtt{\text{\\}n}\mathord{"}}\mid\mathop{\mathit{escapes}} \right\}\\\mathop{\mathord{``}\mathtt{\text{\\}}\mathord{"}}[\mathop{\mathord{``}\mathtt{0}\mathord{"}}\cdots\mathop{\mathord{``}\mathtt{9}\mathord{"}}]\\\mathop{\mathord{``}\mathtt{\text{\\}o}\mathord{"}}[\mathop{\mathord{``}\mathtt{0}\mathord{"}}\cdots\mathop{\mathord{``}\mathtt{7}\mathord{"}}]\\\mathop{\mathord{``}\mathtt{\text{\\}x}\mathord{"}}[\mathop{\mathord{``}\mathtt{0}\mathord{"}}\cdots\mathop{\mathord{``}\mathtt{9}\mathord{"}}\mathop{\mathord{``}\mathtt{A}\mathord{"}}\cdots\mathop{\mathord{``}\mathtt{F}\mathord{"}}\mathop{\mathord{``}\mathtt{a}\mathord{"}}\cdots\mathop{\mathord{``}\mathtt{f}\mathord{"}}]\\\left\{ \mathop{\mathord{``}\mathtt{\text{\\}\text{^}\text{@}}\mathord{"}}\mid\mathop{\mathit{control}} \right\}\\\left\{ \mathop{\mathord{``}\mathtt{\text{\\}NUL}\mathord{"}}\mid\mathop{\mathit{ascii}} \right\}\end{gathered} \right\} \right\}}^\ast_{}\mathop{\mathord{``}\mathtt{\text{"}}\mathord{"}}
135138
136139
unqual-name
137-
Unqualified component names are used for package names, component names etc. but not flag names. Unqualified component name consist of components separated by dash, each component is non-empty alphanumeric string, with at least one alphabetic character. In other words, component may not look like a number.
140+
Unqualified component names are used for package names, component names etc. but not flag names. An unqualified component name consists of components separated by a hyphen, each component is a non-empty alphanumeric string, with at least one character that is not the digits ``0`` to ``9``. In other words, a component may not look like a number.
138141

139142
.. math::
140-
{\left({\mathop{\mathit{alpha\text{-}num}}}^\ast_{}\mathop{\mathit{alpha}}{\mathop{\mathit{alpha\text{-}num}}}^\ast_{}\right)}^+_{\mathop{\mathord{``}\mathtt{\text{-}}\mathord{"}}}
143+
{\left({\mathop{\mathit{alpha\text{-}num}}}^\ast_{}\mathop{\mathit{alpha\text{-}num\text{-}not\text{-}digit}}{\mathop{\mathit{alpha\text{-}num}}}^\ast_{}\right)}^+_{\mathop{\mathord{``}\mathtt{\text{-}}\mathord{"}}}
141144
142145
module-name
143146
Haskell module name as recognized by Cabal parser.

doc/cabal-package-description-file.rst

+24-9
Original file line numberDiff line numberDiff line change
@@ -313,24 +313,39 @@ describe the package as a whole:
313313
tools require the package-name specified for this field to match
314314
the package description's file-name :file:`{package-name}.cabal`.
315315

316-
Package names are case-sensitive and must match the regular expression
317-
(i.e. alphanumeric "words" separated by dashes; each alphanumeric
318-
word must contain at least one letter):
319-
``[[:digit:]]*[[:alpha:]][[:alnum:]]*(-[[:digit:]]*[[:alpha:]][[:alnum:]]*)*``.
316+
A valid package name comprises an alphanumeric 'word'; or two or more
317+
such words separated by a hyphen character (``-``). A word cannot be
318+
comprised only of the digits ``0`` to ``9``.
320319

321-
Or, expressed in ABNF_:
320+
An alphanumeric character belongs to one of the Unicode Letter categories
321+
(Lu (uppercase), Ll (lowercase), Lt (titlecase), Lm (modifier), or
322+
Lo (other)) or Number categories (Nd (decimal), Nl (letter), or No (other)).
323+
324+
Package names are case-sensitive.
325+
326+
Expressed as a regular expression:
327+
328+
``[0-9]*[\p{L}\p{N}-[0-9]][\p{L}\p{N}]*(-[0-9]*[\p{L}\p{N}-[0-9]][\p{L}\p{N}]*)*``
329+
330+
Expressed in ABNF_:
322331

323332
.. code-block:: abnf
324333
325334
package-name = package-name-part *("-" package-name-part)
326-
package-name-part = *DIGIT UALPHA *UALNUM
335+
package-name-part = *DIGIT UALPHANUM-NOT-DIGIT *UALNUM
336+
337+
DIGIT = %x30-39 ; 0-9
327338
328-
UALNUM = UALPHA / DIGIT
329-
UALPHA = ... ; set of alphabetic Unicode code-points
339+
UALNUM = UALPHANUM-NOT-DIGIT / DIGIT
340+
UALPHANUM-NOT-DIGIT = ... ; set of Unicode code-points in Letter or
341+
; Number categories, other than the DIGIT
342+
; code-points
330343
331344
.. note::
332345

333-
Hackage restricts package names to the ASCII subset.
346+
Hackage will not accept package names that use alphanumeric characters
347+
other than ``A`` to ``Z``, ``a`` to ``z``, and ``0`` to ``9``
348+
(the ASCII subset).
334349

335350
.. pkg-field:: version: numbers (required)
336351

doc/package-concepts.rst

+1-1
Original file line numberDiff line numberDiff line change
@@ -43,7 +43,7 @@ Package names and versions
4343
All packages have a name, e.g. "HUnit". Package names are assumed to be
4444
unique. Cabal package names may contain letters, numbers and hyphens,
4545
but not spaces and may also not contain a hyphened section consisting of
46-
only numbers. The namespace for Cabal packages is flat, not
46+
only of the digits ``0`` to ``9``. The namespace for Cabal packages is flat, not
4747
hierarchical.
4848

4949
Packages also have a version, e.g "1.1". This matches the typical way in

0 commit comments

Comments
 (0)