|
| 1 | +# spdx |
| 2 | + |
| 3 | +Go library for SPDX license expression parsing, normalization, and validation. |
| 4 | + |
| 5 | +Normalizes informal license strings from the real world (like "Apache 2" or "MIT License") to valid SPDX identifiers (like "Apache-2.0" or "MIT"). Useful when working with package metadata from registries where license fields often contain non-standard values. |
| 6 | + |
| 7 | +## Installation |
| 8 | + |
| 9 | +```bash |
| 10 | +go get github.com/git-pkgs/spdx |
| 11 | +``` |
| 12 | + |
| 13 | +## Usage |
| 14 | + |
| 15 | +### Normalize informal license strings |
| 16 | + |
| 17 | +```go |
| 18 | +import "github.com/git-pkgs/spdx" |
| 19 | + |
| 20 | +// Normalize converts informal strings to valid SPDX identifiers |
| 21 | +id, err := spdx.Normalize("Apache 2") // "Apache-2.0" |
| 22 | +id, err := spdx.Normalize("MIT License") // "MIT" |
| 23 | +id, err := spdx.Normalize("GPL v3") // "GPL-3.0-or-later" |
| 24 | +id, err := spdx.Normalize("GNU General Public License") // "GPL-3.0-or-later" |
| 25 | +id, err := spdx.Normalize("BSD 3-Clause") // "BSD-3-Clause" |
| 26 | +id, err := spdx.Normalize("CC BY 4.0") // "CC-BY-4.0" |
| 27 | +``` |
| 28 | + |
| 29 | +### Parse and normalize expressions |
| 30 | + |
| 31 | +```go |
| 32 | +// Parse handles both strict SPDX IDs and informal license names |
| 33 | +expr, err := spdx.Parse("MIT OR Apache-2.0") |
| 34 | +fmt.Println(expr.String()) // "MIT OR Apache-2.0" |
| 35 | + |
| 36 | +expr, err := spdx.Parse("Apache 2 OR MIT License") |
| 37 | +fmt.Println(expr.String()) // "Apache-2.0 OR MIT" |
| 38 | + |
| 39 | +expr, err := spdx.Parse("GPL v3 AND BSD 3-Clause") |
| 40 | +fmt.Println(expr.String()) // "GPL-3.0-or-later AND BSD-3-Clause" |
| 41 | + |
| 42 | +// Handles operator precedence (AND binds tighter than OR) |
| 43 | +expr, err := spdx.Parse("MIT OR GPL-2.0-only AND Apache-2.0") |
| 44 | +fmt.Println(expr.String()) // "MIT OR (GPL-2.0-only AND Apache-2.0)" |
| 45 | + |
| 46 | +// ParseStrict requires valid SPDX IDs (no fuzzy normalization) |
| 47 | +expr, err := spdx.ParseStrict("MIT OR Apache-2.0") // succeeds |
| 48 | +expr, err := spdx.ParseStrict("Apache 2 OR MIT") // fails |
| 49 | +``` |
| 50 | + |
| 51 | +### Validate licenses |
| 52 | + |
| 53 | +```go |
| 54 | +// Check if a string is valid SPDX |
| 55 | +spdx.Valid("MIT OR Apache-2.0") // true |
| 56 | +spdx.Valid("FAKEYLICENSE") // false |
| 57 | + |
| 58 | +// Check if a single identifier is valid |
| 59 | +spdx.ValidLicense("MIT") // true |
| 60 | +spdx.ValidLicense("Apache 2") // false (informal, not valid SPDX) |
| 61 | + |
| 62 | +// Validate multiple licenses at once |
| 63 | +valid, invalid := spdx.ValidateLicenses([]string{"MIT", "Apache-2.0", "FAKE"}) |
| 64 | +// valid: false, invalid: ["FAKE"] |
| 65 | +``` |
| 66 | + |
| 67 | +### Check license compatibility |
| 68 | + |
| 69 | +```go |
| 70 | +// Check if allowed licenses satisfy an expression |
| 71 | +satisfied, err := spdx.Satisfies("MIT OR Apache-2.0", []string{"MIT"}) |
| 72 | +// true |
| 73 | + |
| 74 | +satisfied, err := spdx.Satisfies("MIT AND Apache-2.0", []string{"MIT"}) |
| 75 | +// false (both required) |
| 76 | +``` |
| 77 | + |
| 78 | +### Extract licenses from expressions |
| 79 | + |
| 80 | +```go |
| 81 | +licenses, err := spdx.ExtractLicenses("(MIT AND GPL-2.0-only) OR Apache-2.0") |
| 82 | +// ["Apache-2.0", "GPL-2.0-only", "MIT"] |
| 83 | +``` |
| 84 | + |
| 85 | +### Get license categories |
| 86 | + |
| 87 | +Categories are sourced from [scancode-licensedb](https://scancode-licensedb.aboutcode.org/) (OSS licenses only) and updated weekly. |
| 88 | + |
| 89 | +```go |
| 90 | +// Get the category for a license |
| 91 | +cat := spdx.LicenseCategory("MIT") // spdx.CategoryPermissive |
| 92 | +cat := spdx.LicenseCategory("GPL-3.0-only") // spdx.CategoryCopyleft |
| 93 | +cat := spdx.LicenseCategory("MPL-2.0") // spdx.CategoryCopyleftLimited |
| 94 | +cat := spdx.LicenseCategory("Unlicense") // spdx.CategoryPublicDomain |
| 95 | + |
| 96 | +// Check license type |
| 97 | +spdx.IsPermissive("MIT") // true |
| 98 | +spdx.IsPermissive("GPL-3.0") // false |
| 99 | +spdx.IsCopyleft("GPL-3.0-only") // true |
| 100 | +spdx.IsCopyleft("LGPL-2.1") // true (weak copyleft) |
| 101 | + |
| 102 | +// Get categories for an expression |
| 103 | +cats, err := spdx.ExpressionCategories("MIT OR GPL-3.0-only") |
| 104 | +// []Category{CategoryPermissive, CategoryCopyleft} |
| 105 | + |
| 106 | +// Check expressions for copyleft |
| 107 | +spdx.HasCopyleft("MIT OR Apache-2.0") // false |
| 108 | +spdx.HasCopyleft("MIT OR GPL-3.0-only") // true |
| 109 | +spdx.IsFullyPermissive("MIT OR Apache-2.0") // true |
| 110 | +spdx.IsFullyPermissive("MIT OR GPL-3.0") // false |
| 111 | + |
| 112 | +// Get detailed license info |
| 113 | +info := spdx.GetLicenseInfo("MIT") |
| 114 | +// info.Category: CategoryPermissive |
| 115 | +// info.IsException: false |
| 116 | +// info.IsDeprecated: false |
| 117 | +``` |
| 118 | + |
| 119 | +Available categories: |
| 120 | +- `CategoryPermissive` - MIT, Apache-2.0, BSD-* |
| 121 | +- `CategoryCopyleft` - GPL-*, AGPL-* |
| 122 | +- `CategoryCopyleftLimited` - LGPL-*, MPL-*, EPL-* |
| 123 | +- `CategoryPublicDomain` - Unlicense, CC0-1.0 |
| 124 | +- `CategoryCommercial` - Commercial licenses |
| 125 | +- `CategoryProprietaryFree` - Free but proprietary |
| 126 | +- `CategorySourceAvailable` - Source-available licenses |
| 127 | +- `CategoryPatentLicense` - Patent grants |
| 128 | +- `CategoryFreeRestricted` - Free with restrictions |
| 129 | +- `CategoryCLA` - Contributor agreements |
| 130 | +- `CategoryUnstated` - No license stated |
| 131 | + |
| 132 | +## Normalization examples |
| 133 | + |
| 134 | +The library handles many common variations found in package registries: |
| 135 | + |
| 136 | +| Input | Output | |
| 137 | +|-------|--------| |
| 138 | +| Apache 2 | Apache-2.0 | |
| 139 | +| Apache License 2.0 | Apache-2.0 | |
| 140 | +| Apache License, Version 2.0 | Apache-2.0 | |
| 141 | +| MIT License | MIT | |
| 142 | +| M.I.T. | MIT | |
| 143 | +| GPL v3 | GPL-3.0-or-later | |
| 144 | +| GNU General Public License v3 | GPL-3.0-or-later | |
| 145 | +| LGPL 2.1 | LGPL-2.1-only | |
| 146 | +| BSD 3-Clause | BSD-3-Clause | |
| 147 | +| 3-Clause BSD | BSD-3-Clause | |
| 148 | +| Simplified BSD | BSD-2-Clause | |
| 149 | +| MPL 2.0 | MPL-2.0 | |
| 150 | +| Mozilla Public License | MPL-2.0 | |
| 151 | +| CC BY 4.0 | CC-BY-4.0 | |
| 152 | +| Attribution-NonCommercial | CC-BY-NC-4.0 | |
| 153 | +| Unlicense | Unlicense | |
| 154 | +| WTFPL | WTFPL | |
| 155 | + |
| 156 | +## Performance |
| 157 | + |
| 158 | +Designed for processing large numbers of licenses: |
| 159 | + |
| 160 | +``` |
| 161 | +BenchmarkNormalize-8 49116 24381 ns/op (~5µs per license) |
| 162 | +BenchmarkNormalizeBatch-8 372 3271336 ns/op (~3.3µs per license at scale) |
| 163 | +BenchmarkParse-8 236752 5263 ns/op (includes normalization) |
| 164 | +BenchmarkValid-8 789087 1506 ns/op (strict validation) |
| 165 | +``` |
| 166 | + |
| 167 | +## Prior art |
| 168 | + |
| 169 | +This library combines approaches from several existing implementations: |
| 170 | + |
| 171 | +- [librariesio/spdx](https://github.com/librariesio/spdx) (Ruby) - Expression parsing and case normalization |
| 172 | +- [jslicense/spdx-correct.js](https://github.com/jslicense/spdx-correct.js) (JavaScript) - Fuzzy matching transforms and test cases |
| 173 | +- [EmbarkStudios/spdx](https://github.com/EmbarkStudios/spdx) (Rust) - Performance-oriented design |
| 174 | +- [github/go-spdx](https://github.com/github/go-spdx) (Go) - SPDX license list and Satisfies implementation |
| 175 | +- [aboutcode-org/scancode-licensedb](https://github.com/aboutcode-org/scancode-licensedb) - License categories and metadata |
| 176 | + |
| 177 | +## License |
| 178 | + |
| 179 | +MIT |
0 commit comments