diff --git a/.golangci.yml b/.golangci.yml index 1337d6b..0c1a71e 100644 --- a/.golangci.yml +++ b/.golangci.yml @@ -2,3 +2,9 @@ version: "2" linters: disable: - unused + settings: + ireturn: + allow: + - error + - stdlib + - github.com/git-pkgs/spdx.Expression diff --git a/category.go b/category.go index 7141523..d3e2870 100644 --- a/category.go +++ b/category.go @@ -52,7 +52,9 @@ func initCategoryMap() { return } - categoryMap = make(map[string]Category, len(licenseData)*2) + // Each entry may have a primary SPDX key, alternative keys, and a license_key. + estimatedKeys := 3 + categoryMap = make(map[string]Category, len(licenseData)*estimatedKeys) for _, entry := range licenseData { cat := Category(entry.Category) if cat == "" { diff --git a/normalize.go b/normalize.go index 002a79d..f8d03e4 100644 --- a/normalize.go +++ b/normalize.go @@ -160,9 +160,9 @@ type transform func(string) string var transforms = []transform{ // Uppercase - func(s string) string { return strings.ToUpper(s) }, + strings.ToUpper, // Trim whitespace - func(s string) string { return strings.TrimSpace(s) }, + strings.TrimSpace, // Remove dots (M.I.T. -> MIT) func(s string) string { return strings.ReplaceAll(s, ".", "") }, // Remove all whitespace (Apache- 2.0 -> Apache-2.0) @@ -246,7 +246,7 @@ var transforms = []transform{ if result != s && !strings.HasPrefix(result, "CC-") { result = "CC-" + result if !reCCVersion.MatchString(result) { - result = result + "-4.0" + result += "-4.0" } } return result diff --git a/parse.go b/parse.go index 2e36564..060c23d 100644 --- a/parse.go +++ b/parse.go @@ -160,6 +160,10 @@ const ( tokenOpenParen tokenCloseParen tokenEOF + + opAND = "AND" + opOR = "OR" + opWITH = "WITH" ) type token struct { @@ -222,12 +226,12 @@ func (l *lexer) next() (token, error) { upper := strings.ToUpper(word) switch upper { - case "AND": - return token{typ: tokenAnd, value: "AND"}, nil - case "OR": - return token{typ: tokenOr, value: "OR"}, nil - case "WITH": - return token{typ: tokenWith, value: "WITH"}, nil + case opAND: + return token{typ: tokenAnd, value: opAND}, nil + case opOR: + return token{typ: tokenOr, value: opOR}, nil + case opWITH: + return token{typ: tokenWith, value: opWITH}, nil } // Check for DocumentRef or LicenseRef diff --git a/parse_lax.go b/parse_lax.go index bc15314..d4cd784 100644 --- a/parse_lax.go +++ b/parse_lax.go @@ -44,7 +44,7 @@ func tokenizeForNormalization(expr string) []tokenForNorm { word := current.String() upper := strings.ToUpper(word) switch upper { - case "AND", "OR", "WITH": + case opAND, opOR, opWITH: tokens = append(tokens, tokenForNorm{value: upper, isOp: true}) default: tokens = append(tokens, tokenForNorm{value: word}) @@ -76,109 +76,116 @@ func tokenizeForNormalization(expr string) []tokenForNorm { return tokens } -// normalizeTokens processes tokens and normalizes informal license names. -func normalizeTokens(tokens []tokenForNorm) (string, error) { - var result strings.Builder - var licenseWords []string - expectException := false // true if we just saw WITH - - flushLicense := func() error { - if len(licenseWords) == 0 { - return nil - } +// tokenNormalizer holds state for normalizing a stream of tokens. +type tokenNormalizer struct { + result strings.Builder + licenseWords []string + expectException bool +} - normalized, err := normalizeLicenseWords(licenseWords) - if err != nil { - return err - } +func (n *tokenNormalizer) flushPending() error { + if n.expectException { + return n.flushException() + } + return n.flushLicense() +} - if result.Len() > 0 && !strings.HasSuffix(result.String(), "(") { - result.WriteString(" ") - } - result.WriteString(normalized) - licenseWords = nil +func (n *tokenNormalizer) flushLicense() error { + if len(n.licenseWords) == 0 { return nil } - flushException := func() error { - if len(licenseWords) == 0 { - return nil - } + normalized, err := normalizeLicenseWords(n.licenseWords) + if err != nil { + return err + } - // Exception should be a single valid exception ID - exc := strings.Join(licenseWords, "-") - if lookupException(exc) == "" { - // Try the original form - exc = strings.Join(licenseWords, " ") - if lookupException(exc) == "" { - return &LicenseError{License: exc, Err: ErrInvalidException} - } - } + if n.result.Len() > 0 && !strings.HasSuffix(n.result.String(), "(") { + n.result.WriteString(" ") + } + n.result.WriteString(normalized) + n.licenseWords = nil + return nil +} - result.WriteString(" ") - result.WriteString(lookupException(exc)) - licenseWords = nil +func (n *tokenNormalizer) flushException() error { + if len(n.licenseWords) == 0 { return nil } - for _, tok := range tokens { - if tok.isOp { - if expectException { - if err := flushException(); err != nil { - return "", err - } - expectException = false - } else { - if err := flushLicense(); err != nil { - return "", err - } - } - result.WriteString(" ") - result.WriteString(tok.value) - if tok.value == "WITH" { - expectException = true - } - } else if tok.isParen { - if expectException { - if err := flushException(); err != nil { - return "", err - } - expectException = false - } else { - if err := flushLicense(); err != nil { - return "", err - } - } - if tok.value == "(" { - if result.Len() > 0 && !strings.HasSuffix(result.String(), "(") && !strings.HasSuffix(result.String(), " ") { - result.WriteString(" ") - } - result.WriteString("(") - } else { - result.WriteString(")") - } - } else if tok.isPlus { - // Plus attaches to previous license word - if len(licenseWords) > 0 { - licenseWords[len(licenseWords)-1] += "+" - } - } else { - // License word (or exception word if expectException) - licenseWords = append(licenseWords, tok.value) + // Exception should be a single valid exception ID + exc := strings.Join(n.licenseWords, "-") + if lookupException(exc) == "" { + // Try the original form + exc = strings.Join(n.licenseWords, " ") + if lookupException(exc) == "" { + return &LicenseError{License: exc, Err: ErrInvalidException} } } - if expectException { - if err := flushException(); err != nil { - return "", err + n.result.WriteString(" ") + n.result.WriteString(lookupException(exc)) + n.licenseWords = nil + return nil +} + +func (n *tokenNormalizer) handleOp(tok tokenForNorm) error { + if err := n.flushPending(); err != nil { + return err + } + n.expectException = false + n.result.WriteString(" ") + n.result.WriteString(tok.value) + if tok.value == opWITH { + n.expectException = true + } + return nil +} + +func (n *tokenNormalizer) handleParen(tok tokenForNorm) error { + if err := n.flushPending(); err != nil { + return err + } + n.expectException = false + if tok.value == "(" { + if n.result.Len() > 0 && !strings.HasSuffix(n.result.String(), "(") && !strings.HasSuffix(n.result.String(), " ") { + n.result.WriteString(" ") } + n.result.WriteString("(") } else { - if err := flushLicense(); err != nil { + n.result.WriteString(")") + } + return nil +} + +// normalizeTokens processes tokens and normalizes informal license names. +func normalizeTokens(tokens []tokenForNorm) (string, error) { + n := &tokenNormalizer{} + + for _, tok := range tokens { + var err error + switch { + case tok.isOp: + err = n.handleOp(tok) + case tok.isParen: + err = n.handleParen(tok) + case tok.isPlus: + if len(n.licenseWords) > 0 { + n.licenseWords[len(n.licenseWords)-1] += "+" + } + default: + n.licenseWords = append(n.licenseWords, tok.value) + } + if err != nil { return "", err } } - return strings.TrimSpace(result.String()), nil + if err := n.flushPending(); err != nil { + return "", err + } + + return strings.TrimSpace(n.result.String()), nil } // normalizeLicenseWords takes a slice of words that should form a license name diff --git a/spdx_correct_test.go b/spdx_correct_test.go index 8b79d89..6cfcfdc 100644 --- a/spdx_correct_test.go +++ b/spdx_correct_test.go @@ -5,7 +5,7 @@ import "testing" // TestSPDXCorrectCompatibility tests our implementation against spdx-correct.js test cases. // spdx-correct.js is maintained by a license lawyer and is the de facto standard. // Source: https://github.com/jslicense/spdx-correct.js/blob/main/test.js -func TestSPDXCorrectCompatibility(t *testing.T) { +func TestSPDXCorrectCompatibility(t *testing.T) { //nolint:maintidx // large table-driven test by design // These are the test cases from spdx-correct.js with upgrade: true (default) cases := map[string]string{ // BSD variants