From 07cd2adbc10a9f38b122120a51628d7ed3f46ffb Mon Sep 17 00:00:00 2001
From: Peter Rabbitson <ribasushi@protocol.ai>
Date: Thu, 21 May 2020 20:27:53 +0200
Subject: [PATCH 1/3] Base36 byte-encoding specification

Uses the alphabet 0-9a-z case insensitively. The prefix K is chosen
to limit future clashes with english words based on
https://en.wikipedia.org/wiki/Letter_frequency
---
 multibase.csv   |  2 ++
 rfcs/Base36.md  | 40 ++++++++++++++++++++++++++++++++++++++++
 tests/test1.csv |  2 ++
 tests/test2.csv |  2 ++
 tests/test3.csv |  2 ++
 tests/test4.csv |  2 ++
 tests/test5.csv |  2 ++
 tests/test6.csv |  2 ++
 8 files changed, 54 insertions(+)
 create mode 100644 rfcs/Base36.md

diff --git a/multibase.csv b/multibase.csv
index d980df7..5f69d41 100644
--- a/multibase.csv
+++ b/multibase.csv
@@ -14,6 +14,8 @@ base32upper,       B,    rfc4648 no padding,
 base32pad,         c,    rfc4648 with padding,                                     candidate
 base32padupper,    C,    rfc4648 with padding,                                     candidate
 base32z,           h,    z-base-32 (used by Tahoe-LAFS),                           draft
+base36upper,       K,    base36 [0-9a-z] case-insensitive no padding,              default
+base36,            k,    base36 [0-9a-z] case-insensitive no padding,              default
 base58flickr,      Z,    base58 flicker,                                           candidate
 base58btc,         z,    base58 bitcoin,                                           default
 base64,            m,    rfc4648 no padding,                                       default
diff --git a/rfcs/Base36.md b/rfcs/Base36.md
new file mode 100644
index 0000000..cd4ff57
--- /dev/null
+++ b/rfcs/Base36.md
@@ -0,0 +1,40 @@
+# Base36
+
+The multibase base36 prefix is the character `k` or `K`. The digit-alphabet
+consists of 0..9 and then the case insensitive range a..z for the values 10..35
+
+## Encoding
+
+A byte array is encoded to base36 by:
+
+1. Counting the number of leading 0 bytes (Z).
+2. Interpreting the rest of the byte array as a big-endian unsigned integer (N).
+3. Concatenating a length Z string of '0' characters with the decimal
+   representation of N.
+
+A byte array is encoded to multibase base36 by prefixing its base36 encoding
+with the character `k`.
+
+## Decoding
+
+A multibase base36 encoded string is decoded by first dropping the multibase
+prefix (which must be `k` or `K`).
+
+The remaining characters are then converted to a byte array by:
+
+1. Counting the number of leading '0' characters (Z).
+2. Interpreting the rest of the character sequence as a base36 unsigned integer
+   (N).
+3. Concatenating a length Z array of NULL (0x00) bytes with N encoded as a
+   big-endian unsigned integer.
+
+## Examples
+
+Byte Array <-> Base36 Multibase:
+
+| Bytes | ==  | LC Base36 | OR | UC base36 |
+|---|---|---|---|---|
+| `[0x00, 0x01]`       | == | `"k01"`   | | `"K01"`   |
+| `[0x00, 0x00, 0xff]` | == | `"k0073"` | | `"K0073"` |
+| `[0x01, 0x00]`       | == | `"k74"`   | | `"K74"`   |
+| `[0x00, 0x01, 0x00]` | == | `"k074"`  | | `"K074"`  |
diff --git a/tests/test1.csv b/tests/test1.csv
index 44d6003..97e807c 100644
--- a/tests/test1.csv
+++ b/tests/test1.csv
@@ -13,6 +13,8 @@ base32padupper, "CIRSWGZLOORZGC3DJPJSSAZLWMVZHS5DINFXGOIJB"
 base32hexpad, "t8him6pbeehp62r39f9ii0pbmclp7it38d5n6e891"
 base32hexpadupper, "T8HIM6PBEEHP62R39F9II0PBMCLP7IT38D5N6E891"
 base32z, "het1sg3mqqt3gn5djxj11y3msci3817depfzgqejb"
+base36, "k343ixo7d49hqj1ium15pgy1wzww5fxrid21td7l"
+base36upper, "K343IXO7D49HQJ1IUM15PGY1WZWW5FXRID21TD7L"
 base58flickr, "Ztwe7gVTeK8wswS1gf8hrgAua9fcw9reboD"
 base58btc, "zUXE7GvtEk8XTXs1GF8HSGbVA9FCX9SEBPe"
 base64, "mRGVjZW50cmFsaXplIGV2ZXJ5dGhpbmchIQ"
diff --git a/tests/test2.csv b/tests/test2.csv
index 97ecccb..3b95ebf 100644
--- a/tests/test2.csv
+++ b/tests/test2.csv
@@ -13,6 +13,8 @@ base32padupper, "CPFSXGIDNMFXGSIBB"
 base32hexpad, "tf5in683dc5n6i811"
 base32hexpadupper, "TF5IN683DC5N6I811"
 base32z, "hxf1zgedpcfzg1ebb"
+base36, "k2lcpzo5yikidynfl"
+base36upper, "K2LCPZO5YIKIDYNFL"
 base58flickr, "Z7Pznk19XTTzBtx"
 base58btc, "z7paNL19xttacUY"
 base64, "meWVzIG1hbmkgIQ"
diff --git a/tests/test3.csv b/tests/test3.csv
index 4bfbc5e..8ddea2b 100644
--- a/tests/test3.csv
+++ b/tests/test3.csv
@@ -13,6 +13,8 @@ base32padupper, "CNBSWY3DPEB3W64TMMQ======"
 base32hexpad, "td1imor3f41rmusjccg======"
 base32hexpadupper, "TD1IMOR3F41RMUSJCCG======"
 base32z, "hpb1sa5dxrb5s6hucco"
+base36, "kfuvrsivvnfrbjwajo"
+base36upper, "KFUVRSIVVNFRBJWAJO"
 base58flickr, "ZrTu1dk6cWsRYjYu"
 base58btc, "zStV1DL6CwTryKyV"
 base64, "maGVsbG8gd29ybGQ"
diff --git a/tests/test4.csv b/tests/test4.csv
index e02f128..7fd4fc3 100644
--- a/tests/test4.csv
+++ b/tests/test4.csv
@@ -13,6 +13,8 @@ base32padupper, "CAB4WK4ZANVQW42JAEE======"
 base32hexpad, "t01smasp0dlgmsq9044======"
 base32hexpadupper, "T01SMASP0DLGMSQ9044======"
 base32z, "hybhskh3ypiosh4jyrr"
+base36, "k02lcpzo5yikidynfl"
+base36upper, "K02LCPZO5YIKIDYNFL"
 base58flickr, "Z17Pznk19XTTzBtx"
 base58btc, "z17paNL19xttacUY"
 base64, "mAHllcyBtYW5pICE"
diff --git a/tests/test5.csv b/tests/test5.csv
index 9f70104..44e6b26 100644
--- a/tests/test5.csv
+++ b/tests/test5.csv
@@ -13,6 +13,8 @@ base32padupper, "CAAAHSZLTEBWWC3TJEAQQ===="
 base32hexpad, "t0007ipbj41mm2rj940gg===="
 base32hexpadupper, "T0007IPBJ41MM2RJ940GG===="
 base32z, "hyyy813murbssn5ujryoo"
+base36, "k002lcpzo5yikidynfl"
+base36upper, "K002LCPZO5YIKIDYNFL"
 base58flickr, "Z117Pznk19XTTzBtx"
 base58btc, "z117paNL19xttacUY"
 base64, "mAAB5ZXMgbWFuaSAh"
diff --git a/tests/test6.csv b/tests/test6.csv
index a10b180..3037d9c 100644
--- a/tests/test6.csv
+++ b/tests/test6.csv
@@ -9,3 +9,5 @@ base32pad, "cnbswy3dpeB3W64TMMQ======"
 base32padupper, "Cnbswy3dpeB3W64TMMQ======"
 base32hexpad, "td1imor3f41RMUSJCCG======"
 base32hexpadupper, "Td1imor3f41RMUSJCCG======"
+base36, "kfUvrsIvVnfRbjWaJo"
+base36upper, "KfUVrSIVVnFRbJWAJo"

From 3b7419fd5dfb75922c642ccc870eae99cd7fdc10 Mon Sep 17 00:00:00 2001
From: Peter Rabbitson <ribasushi@protocol.ai>
Date: Fri, 22 May 2020 03:04:20 +0200
Subject: [PATCH 2/3] More docs /o\

---
 README.md     | 26 +++++++++++++++-----------
 multibase.csv | 22 +++++++++++-----------
 2 files changed, 26 insertions(+), 22 deletions(-)

diff --git a/README.md b/README.md
index c8badf5..8bd1565 100644
--- a/README.md
+++ b/README.md
@@ -8,7 +8,7 @@
 > Self identifying base encodings
 
 Multibase is a protocol for disambiguating the encoding of base-encoded (e.g.,
-base32, base64, base58, etc.) binary appearing in text.
+base32, base36, base64, base58, etc.) binary appearing in text.
 
 When text is encoded as bytes, we can usually use a one-size-fits-all encoding
 (UTF-8) because we're always encoding to the same set of 256 bytes (+/- the NUL
@@ -63,17 +63,19 @@ base8,             7,    octal,
 base10,            9,    decimal,                                                  draft
 base16,            f,    hexadecimal,                                              default
 base16upper,       F,    hexadecimal,                                              default
-base32hex,         v,    rfc4648 no padding - highest char,                        candidate
-base32hexupper,    V,    rfc4648 no padding - highest char,                        candidate
-base32hexpad,      t,    rfc4648 with padding,                                     candidate
-base32hexpadupper, T,    rfc4648 with padding,                                     candidate
-base32,            b,    rfc4648 no padding,                                       default
-base32upper,       B,    rfc4648 no padding,                                       default
-base32pad,         c,    rfc4648 with padding,                                     candidate
-base32padupper,    C,    rfc4648 with padding,                                     candidate
+base32hex,         v,    rfc4648 case-insensitive - no padding - highest char,     candidate
+base32hexupper,    V,    rfc4648 case-insensitive - no padding - highest char,     candidate
+base32hexpad,      t,    rfc4648 case-insensitive - with padding,                  candidate
+base32hexpadupper, T,    rfc4648 case-insensitive - with padding,                  candidate
+base32,            b,    rfc4648 case-insensitive - no padding,                    default
+base32upper,       B,    rfc4648 case-insensitive - no padding,                    default
+base32pad,         c,    rfc4648 case-insensitive - with padding,                  candidate
+base32padupper,    C,    rfc4648 case-insensitive - with padding,                  candidate
 base32z,           h,    z-base-32 (used by Tahoe-LAFS),                           draft
-base58flickr,      Z,    base58 flicker,                                           candidate
+base36,            k,    base36 [0-9a-z] case-insensitive - no padding,            default
+base36upper,       K,    base36 [0-9a-z] case-insensitive - no padding,            default
 base58btc,         z,    base58 bitcoin,                                           default
+base58flickr,      Z,    base58 flicker,                                           candidate
 base64,            m,    rfc4648 no padding,                                       default
 base64pad,         M,    rfc4648 with padding - MIME encoding,                     candidate
 base64url,         u,    rfc4648 no padding,                                       default
@@ -107,6 +109,7 @@ Consider the following encodings of the same binary string:
 ```
 4D756C74696261736520697320617765736F6D6521205C6F2F # base16 (hex)
 JV2WY5DJMJQXGZJANFZSAYLXMVZW63LFEEQFY3ZP           # base32
+3IY8QKL64VUGCX009XWUHKF6GBBTS3TVRXFRA5R            # base36
 YAjKoNbau5KiqmHPmSxYCvn66dA1vLmwbt                 # base58
 TXVsdGliYXNlIGlzIGF3ZXNvbWUhIFxvLw==               # base64
 ```
@@ -116,11 +119,12 @@ And consider the same encodings with their multibase prefix
 ```
 F4D756C74696261736520697320617765736F6D6521205C6F2F # base16 F
 BJV2WY5DJMJQXGZJANFZSAYLXMVZW63LFEEQFY3ZP           # base32 B
+K3IY8QKL64VUGCX009XWUHKF6GBBTS3TVRXFRA5R            # base36 K
 zYAjKoNbau5KiqmHPmSxYCvn66dA1vLmwbt                 # base58 z
 MTXVsdGliYXNlIGlzIGF3ZXNvbWUhIFxvLw==               # base64 M
 ```
 
-The base prefixes used are: `F, B, z, M`.
+The base prefixes used are: `F, B, K, z, M`.
 
 
 ## FAQ
diff --git a/multibase.csv b/multibase.csv
index 5f69d41..865b404 100644
--- a/multibase.csv
+++ b/multibase.csv
@@ -5,19 +5,19 @@ base8,             7,    octal,
 base10,            9,    decimal,                                                  draft
 base16,            f,    hexadecimal,                                              default
 base16upper,       F,    hexadecimal,                                              default
-base32hex,         v,    rfc4648 no padding - highest char,                        candidate
-base32hexupper,    V,    rfc4648 no padding - highest char,                        candidate
-base32hexpad,      t,    rfc4648 with padding,                                     candidate
-base32hexpadupper, T,    rfc4648 with padding,                                     candidate
-base32,            b,    rfc4648 no padding,                                       default
-base32upper,       B,    rfc4648 no padding,                                       default
-base32pad,         c,    rfc4648 with padding,                                     candidate
-base32padupper,    C,    rfc4648 with padding,                                     candidate
+base32hex,         v,    rfc4648 case-insensitive - no padding - highest char,     candidate
+base32hexupper,    V,    rfc4648 case-insensitive - no padding - highest char,     candidate
+base32hexpad,      t,    rfc4648 case-insensitive - with padding,                  candidate
+base32hexpadupper, T,    rfc4648 case-insensitive - with padding,                  candidate
+base32,            b,    rfc4648 case-insensitive - no padding,                    default
+base32upper,       B,    rfc4648 case-insensitive - no padding,                    default
+base32pad,         c,    rfc4648 case-insensitive - with padding,                  candidate
+base32padupper,    C,    rfc4648 case-insensitive - with padding,                  candidate
 base32z,           h,    z-base-32 (used by Tahoe-LAFS),                           draft
-base36upper,       K,    base36 [0-9a-z] case-insensitive no padding,              default
-base36,            k,    base36 [0-9a-z] case-insensitive no padding,              default
-base58flickr,      Z,    base58 flicker,                                           candidate
+base36,            k,    base36 [0-9a-z] case-insensitive - no padding,            default
+base36upper,       K,    base36 [0-9a-z] case-insensitive - no padding,            default
 base58btc,         z,    base58 bitcoin,                                           default
+base58flickr,      Z,    base58 flicker,                                           candidate
 base64,            m,    rfc4648 no padding,                                       default
 base64pad,         M,    rfc4648 with padding - MIME encoding,                     candidate
 base64url,         u,    rfc4648 no padding,                                       default

From f378d3427fe125057facdbac936c4215cc777920 Mon Sep 17 00:00:00 2001
From: Peter Rabbitson <ribasushi@protocol.ai>
Date: Fri, 22 May 2020 04:51:28 +0200
Subject: [PATCH 3/3] Nits

---
 README.md      | 4 ++--
 multibase.csv  | 4 ++--
 rfcs/Base36.md | 4 ++--
 3 files changed, 6 insertions(+), 6 deletions(-)

diff --git a/README.md b/README.md
index 8bd1565..220789f 100644
--- a/README.md
+++ b/README.md
@@ -72,8 +72,8 @@ base32upper,       B,    rfc4648 case-insensitive - no padding,
 base32pad,         c,    rfc4648 case-insensitive - with padding,                  candidate
 base32padupper,    C,    rfc4648 case-insensitive - with padding,                  candidate
 base32z,           h,    z-base-32 (used by Tahoe-LAFS),                           draft
-base36,            k,    base36 [0-9a-z] case-insensitive - no padding,            default
-base36upper,       K,    base36 [0-9a-z] case-insensitive - no padding,            default
+base36,            k,    base36 [0-9a-z] case-insensitive - no padding,            draft
+base36upper,       K,    base36 [0-9a-z] case-insensitive - no padding,            draft
 base58btc,         z,    base58 bitcoin,                                           default
 base58flickr,      Z,    base58 flicker,                                           candidate
 base64,            m,    rfc4648 no padding,                                       default
diff --git a/multibase.csv b/multibase.csv
index 865b404..3b5abe2 100644
--- a/multibase.csv
+++ b/multibase.csv
@@ -14,8 +14,8 @@ base32upper,       B,    rfc4648 case-insensitive - no padding,
 base32pad,         c,    rfc4648 case-insensitive - with padding,                  candidate
 base32padupper,    C,    rfc4648 case-insensitive - with padding,                  candidate
 base32z,           h,    z-base-32 (used by Tahoe-LAFS),                           draft
-base36,            k,    base36 [0-9a-z] case-insensitive - no padding,            default
-base36upper,       K,    base36 [0-9a-z] case-insensitive - no padding,            default
+base36,            k,    base36 [0-9a-z] case-insensitive - no padding,            draft
+base36upper,       K,    base36 [0-9a-z] case-insensitive - no padding,            draft
 base58btc,         z,    base58 bitcoin,                                           default
 base58flickr,      Z,    base58 flicker,                                           candidate
 base64,            m,    rfc4648 no padding,                                       default
diff --git a/rfcs/Base36.md b/rfcs/Base36.md
index cd4ff57..b050bfd 100644
--- a/rfcs/Base36.md
+++ b/rfcs/Base36.md
@@ -9,7 +9,7 @@ A byte array is encoded to base36 by:
 
 1. Counting the number of leading 0 bytes (Z).
 2. Interpreting the rest of the byte array as a big-endian unsigned integer (N).
-3. Concatenating a length Z string of '0' characters with the decimal
+3. Concatenating a length Z string of '0' characters with the base36
    representation of N.
 
 A byte array is encoded to multibase base36 by prefixing its base36 encoding
@@ -32,7 +32,7 @@ The remaining characters are then converted to a byte array by:
 
 Byte Array <-> Base36 Multibase:
 
-| Bytes | ==  | LC Base36 | OR | UC base36 |
+| Bytes | ==  | LC Base36 | OR | UC Base36 |
 |---|---|---|---|---|
 | `[0x00, 0x01]`       | == | `"k01"`   | | `"K01"`   |
 | `[0x00, 0x00, 0xff]` | == | `"k0073"` | | `"K0073"` |