smithy-lang · brandondahler · Nov 14, 2025 · Nov 17, 2025 · Nov 20, 2025
@@ -0,0 +1,5 @@
+{
+  "type": "feature",
+  "description": "Added byte strings and byte text blocks to the IDL to support encoding human readable text as blob values",
+  "pull_requests": []
+}
@@ -117,13 +117,15 @@ string support defined in :rfc:`7405`.
 
 .. productionlist:: smithy
     ControlSection   :*(`ControlStatement`)
-    ControlStatement :"$" `NodeObjectKey` [`SP`] ":" [`SP`] `NodeValue` `BR`
+    ControlStatement :"$" `ControlKey` [`SP`] ":" [`SP`] `NodeValue` `BR`
+    ControlKey       :`QuotedText` / `Identifier`
 
 .. rubric:: Metadata
 
 .. productionlist:: smithy
     MetadataSection   :*(`MetadataStatement`)
-    MetadataStatement :%s"metadata" `SP` `NodeObjectKey` [`SP`] "=" [`SP`] `NodeValue` `BR`
+    MetadataStatement :%s"metadata" `SP` `MetadataKey` [`SP`] "=" [`SP`] `NodeValue` `BR`
+    MetadataKey       :`QuotedText` / `Identifier`
 
 .. rubric:: Node values
 
@@ -136,7 +138,7 @@ string support defined in :rfc:`7405`.
     NodeArray           :"[" [`WS`] *(`NodeValue` [`WS`]) "]"
     NodeObject          :"{" [`WS`] [`NodeObjectKvp` *(`WS` `NodeObjectKvp`)] [`WS`] "}"
     NodeObjectKvp       :`NodeObjectKey` [`WS`] ":" [`WS`] `NodeValue`
-    NodeObjectKey       :`QuotedText` / `Identifier`
+    NodeObjectKey       :`QuotedText` / `ByteString` / `Identifier`
     Number              :[`Minus`] `Int` [`Frac`] [`Exp`]
     DecimalPoint        :%x2E ; .
     DigitOneToNine      :%x31-39 ; 1-9
@@ -148,7 +150,8 @@ string support defined in :rfc:`7405`.
     Plus                :%x2B ; +
     Zero                :%x30 ; 0
     NodeKeyword         :%s"true" / %s"false" / %s"null"
-    NodeStringValue     :`ShapeId` / `TextBlock` / `QuotedText`
+    NodeStringValue     :`ShapeId` / `TextBlock` / `ByteTextBlock` / `QuotedText` / `ByteString`
+    ByteString          :"b" `QuotedText`
     QuotedText          :DQUOTE *`QuotedChar` DQUOTE
     QuotedChar          :%x09        ; tab
                         :/ %x20-21     ; space - "!"
@@ -162,6 +165,7 @@ string support defined in :rfc:`7405`.
     UnicodeEscape       :%s"u" `Hex` `Hex` `Hex` `Hex`
     Hex                 :DIGIT / %x41-46 / %x61-66
     Escape              :%x5C ; backslash
+    ByteTextBlock       : "b" `TextBlock`
     TextBlock           :`ThreeDquotes` [`SP`] `NL` *`TextBlockContent` `ThreeDquotes`
     TextBlockContent    :`QuotedChar` / (1*2DQUOTE 1*`QuotedChar`)
     ThreeDquotes        :DQUOTE DQUOTE DQUOTE
@@ -2398,4 +2402,48 @@ example is interpreted as ``Foo\nBaz Bam``:
     Baz \
     Bam"""
 
+Byte Strings
+============
+
+The byte string and byte text block productions are used to encode human
+readable strings as if they were binary values.  They are equivalent to a
+standard string containing the base64 encoded representation of the UTF-8 bytes
+which make up the string.
+
+These values are parsed into the :ref:`semantic model <semantic-model>` in the
+same manner as their standard counterparts.
+
+The following values are all equivalent:
+
+.. tab:: Smithy
+
+    .. code-block:: smithy
+
+        version: "2"
+        metadata foo = {
+            byteString: b"Hello\nWorld"
+            byteTextBlock: b"""
+                Hello
+                World"""
+            string: "SGVsbG8KV29ybGQ="
+            textBlock: """
+                SGVsbG8KV29ybGQ="""
+        }
+
+.. tab:: JSON
+
+    .. code-block:: json
+
+        {
+            "smithy": "2",
+            "metadata": {
+                "foo": {
+                    "byteString": "SGVsbG8KV29ybGQ=",
+                    "byteTextBlock": "SGVsbG8KV29ybGQ=",
+                    "string": "SGVsbG8KV29ybGQ=",
+                    "textBlock": "SGVsbG8KV29ybGQ="
+                }
+            }
+        }
+
 .. _CommonMark: https://spec.commonmark.org/
@@ -175,6 +175,11 @@ public IdlToken next() {
                 return parseString();
             case '/':
                 return parseComment();
+            case 'b':
+                if (parser.peek(1) == '"') {
+                    return parseByteString();
+                }
+                return parseIdentifier();
             case '-':
             case '0':
             case '1':
@@ -215,7 +220,6 @@ public IdlToken next() {
             case 'Z':
             case '_':
             case 'a':
-            case 'b':
             case 'c':
             case 'd':
             case 'e':
@@ -388,6 +392,35 @@ private IdlToken parseString() {
         }
     }
 
+    private IdlToken parseByteString() {
+        parser.expect('b');
+        parser.expect('"'); // skip first quote.
+
+        if (parser.peek() == '"') {
+            parser.skip(); // skip second quote.
+            if (parser.peek() == '"') { // A third consecutive quote is a BYTE_TEXT_BLOCK.
+                parser.skip();
+                return parseByteTextBlock();
+            } else {
+                // Empty byte string.
+                currentTokenEnd = parser.position();
+                currentTokenStringSlice = "";
+                return currentTokenType = IdlToken.BYTE_STRING;
+            }
+        }
+
+        try {
+            // Parse the contents of a byte string.
+            currentTokenStringSlice = parseQuotedTextAndTextBlock(false);
+            currentTokenEnd = parser.position();
+            return currentTokenType = IdlToken.BYTE_STRING;
+        } catch (RuntimeException e) {
+            currentTokenEnd = parser.position();
+            currentTokenError = "Error parsing byte string: " + e.getMessage();
+            return currentTokenType = IdlToken.ERROR;
+        }
+    }
+
     private IdlToken parseTextBlock() {
         try {
             currentTokenStringSlice = parseQuotedTextAndTextBlock(true);
@@ -400,14 +433,26 @@ private IdlToken parseTextBlock() {
         }
     }
 
-    // Parses both quoted_text and text_block
+    private IdlToken parseByteTextBlock() {
+        try {
+            currentTokenStringSlice = parseQuotedTextAndTextBlock(true);
+            currentTokenEnd = parser.position();
+            return currentTokenType = IdlToken.BYTE_TEXT_BLOCK;
+        } catch (RuntimeException e) {
+            currentTokenEnd = parser.position();
+            currentTokenError = "Error parsing byte text block: " + e.getMessage();
+            return currentTokenType = IdlToken.ERROR;
+        }
+    }
+
+    // Parses quoted_text, byte_string, text_block, and byte_text_block body
     private CharSequence parseQuotedTextAndTextBlock(boolean triple) {
         int start = parser.position();
 
         while (!parser.eof()) {
             char next = parser.peek();
             if (next == '"' && (!triple || (parser.peek(1) == '"' && parser.peek(2) == '"'))) {
-                // Found closing quotes of quoted_text and/or text_block
+                // Found closing quotes
                 break;
             }
             parser.skip();

@@ -16,6 +16,7 @@
 import software.amazon.smithy.model.validation.Severity;
 import software.amazon.smithy.model.validation.ValidationEvent;
 import software.amazon.smithy.utils.Pair;
+import software.amazon.smithy.utils.StringUtils;
 
 /**
  * Parses Node values from a {@link IdlInternalTokenizer}.
@@ -53,25 +54,36 @@ static Node expectAndSkipNode(IdlModelLoader loader) {
     static Node expectAndSkipNode(IdlModelLoader loader, SourceLocation location) {
         IdlInternalTokenizer tokenizer = loader.getTokenizer();
         IdlToken token = tokenizer.expect(IdlToken.STRING,
+                IdlToken.BYTE_STRING,
                 IdlToken.TEXT_BLOCK,
+                IdlToken.BYTE_TEXT_BLOCK,
                 IdlToken.NUMBER,
                 IdlToken.IDENTIFIER,
                 IdlToken.LBRACE,
                 IdlToken.LBRACKET);
 
         switch (token) {
             case STRING:
-            case TEXT_BLOCK:
-                Node result = new StringNode(tokenizer.getCurrentTokenStringSlice().toString(), location);
+            case TEXT_BLOCK: {
+                String value = tokenizer.getCurrentTokenStringSlice().toString();
                 tokenizer.next();
-                return result;
-            case IDENTIFIER:
+                return new StringNode(value, location);
+            }
+            case BYTE_STRING:
+            case BYTE_TEXT_BLOCK: {
+                String value = StringUtils.base64Encode(tokenizer.getCurrentTokenStringSlice());
+                tokenizer.next();
+                return new StringNode(value, location);
+            }
+            case IDENTIFIER: {
                 String shapeId = loader.internString(IdlShapeIdParser.expectAndSkipShapeId(tokenizer));
                 return createIdentifier(loader, shapeId, location);
-            case NUMBER:
-                Number number = tokenizer.getCurrentTokenNumberValue();
+            }
+            case NUMBER: {
+                Number value = tokenizer.getCurrentTokenNumberValue();
                 tokenizer.next();
-                return new NumberNode(number, location);
+                return new NumberNode(value, location);
+            }
             case LBRACE:
                 return parseObjectNode(loader, location);
             case LBRACKET:
@@ -191,7 +203,9 @@ private static ObjectNode parseObjectNode(IdlModelLoader loader, SourceLocation
         ObjectNode.Builder builder = ObjectNode.builder().sourceLocation(location);
 
         while (tokenizer.hasNext()) {
-            if (tokenizer.expect(IdlToken.RBRACE, IdlToken.STRING, IdlToken.IDENTIFIER) == IdlToken.RBRACE) {
+            IdlToken token =
+                    tokenizer.expect(IdlToken.RBRACE, IdlToken.STRING, IdlToken.BYTE_STRING, IdlToken.IDENTIFIER);
+            if (token == IdlToken.RBRACE) {
                 break;
             }
 

@@ -279,7 +279,7 @@ private static boolean isValidNormalCharacter(char c, boolean isTextBlock) {
                 || c == '\n'
                 || c == '\r'
                 || (c >= 0x20 && c <= 0x21) // space - "!"
-                || (isTextBlock && c == 0x22) // DQUOTE is allowed in text_block
+                || (isTextBlock && c == 0x22) // DQUOTE is allowed in text_block and byte_text_block
                 || (c >= 0x23 && c <= 0x5b) // "#" - "["
                 || c >= 0x5d; // "]"+
     }

@@ -39,7 +39,9 @@ public boolean isWhitespace() {
     DOC_COMMENT("///"),
     AT("@"),
     STRING("\""),
+    BYTE_STRING("b\""),
     TEXT_BLOCK("\"\"\""),
+    BYTE_TEXT_BLOCK("b\"\"\""),
     COLON(":"),
     WALRUS(":="),
     IDENTIFIER(""),

@@ -16,6 +16,7 @@
 import software.amazon.smithy.model.node.StringNode;
 import software.amazon.smithy.model.traits.DocumentationTrait;
 import software.amazon.smithy.model.traits.Trait;
+import software.amazon.smithy.utils.StringUtils;
 
 final class IdlTraitParser {
 
@@ -158,29 +159,53 @@ private static Node parseTraitValueBody(IdlModelLoader loader, SourceLocation lo
         tokenizer.expect(IdlToken.LBRACE,
                 IdlToken.LBRACKET,
                 IdlToken.TEXT_BLOCK,
+                IdlToken.BYTE_TEXT_BLOCK,
                 IdlToken.STRING,
+                IdlToken.BYTE_STRING,
                 IdlToken.NUMBER,
                 IdlToken.IDENTIFIER);
 
         switch (tokenizer.getCurrentToken()) {
             case LBRACE:
-            case LBRACKET:
+            case LBRACKET: {
                 Node result = IdlNodeParser.expectAndSkipNode(loader, location);
                 tokenizer.skipWsAndDocs();
                 return result;
-            case TEXT_BLOCK:
-                Node textBlockResult = new StringNode(tokenizer.getCurrentTokenStringSlice().toString(), location);
+            }
+            case TEXT_BLOCK: {
+                String value = tokenizer.getCurrentTokenStringSlice().toString();
                 tokenizer.next();
                 tokenizer.skipWsAndDocs();
-                return textBlockResult;
-            case NUMBER:
-                Number number = tokenizer.getCurrentTokenNumberValue();
+                return new StringNode(value, location);
+            }
+            case BYTE_TEXT_BLOCK: {
+                String value = StringUtils.base64Encode(tokenizer.getCurrentTokenStringSlice().toString());
+                tokenizer.next();
+                tokenizer.skipWsAndDocs();
+                return new StringNode(value, location);
+            }
+            case NUMBER: {
+                Number value = tokenizer.getCurrentTokenNumberValue();
                 tokenizer.next();
                 tokenizer.skipWsAndDocs();
-                return new NumberNode(number, location);
-            case STRING:
-                String stringValue = tokenizer.getCurrentTokenStringSlice().toString();
-                StringNode stringNode = new StringNode(stringValue, location);
+                return new NumberNode(value, location);
+            }
+            case STRING: {
+                String value = tokenizer.getCurrentTokenStringSlice().toString();
+                StringNode stringNode = new StringNode(value, location);
+                tokenizer.next();
+                tokenizer.skipWsAndDocs();
+                if (tokenizer.getCurrentToken() == IdlToken.COLON) {
+                    tokenizer.next();
+                    tokenizer.skipWsAndDocs();
+                    return parseStructuredTrait(loader, stringNode);
+                } else {
+                    return stringNode;
+                }
+            }
+            case BYTE_STRING: {
+                String value = StringUtils.base64Encode(tokenizer.getCurrentTokenStringSlice().toString());
+                StringNode stringNode = new StringNode(value, location);
                 tokenizer.next();
                 tokenizer.skipWsAndDocs();
                 if (tokenizer.getCurrentToken() == IdlToken.COLON) {
@@ -190,8 +215,9 @@ private static Node parseTraitValueBody(IdlModelLoader loader, SourceLocation lo
                 } else {
                     return stringNode;
                 }
+            }
             case IDENTIFIER:
-            default:
+            default: {
                 // Handle: `foo`, `foo$bar`, `foo.bar#baz`, `foo.bar#baz$bam`, `foo: bam`
                 String identifier = loader.internString(IdlShapeIdParser.expectAndSkipShapeId(tokenizer));
                 tokenizer.skipWsAndDocs();
@@ -203,6 +229,7 @@ private static Node parseTraitValueBody(IdlModelLoader loader, SourceLocation lo
                     tokenizer.skipWsAndDocs();
                     return parseStructuredTrait(loader, new StringNode(identifier, location));
                 }
+            }
         }
     }
 
@@ -228,7 +255,7 @@ private static ObjectNode parseStructuredTrait(IdlModelLoader loader, StringNode
         tokenizer.skipWsAndDocs();
 
         while (tokenizer.getCurrentToken() != IdlToken.RPAREN) {
-            tokenizer.expect(IdlToken.IDENTIFIER, IdlToken.STRING);
+            tokenizer.expect(IdlToken.IDENTIFIER, IdlToken.STRING, IdlToken.BYTE_STRING);
             String key = loader.internString(tokenizer.getCurrentTokenStringSlice());
             StringNode keyNode = new StringNode(key, tokenizer.getCurrentTokenLocation());
             tokenizer.next();