Skip to content
Draft
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
{
"type": "feature",
"description": "Added byte strings and byte text blocks to the IDL to support encoding human readable text as blob values",
"pull_requests": []
}
56 changes: 52 additions & 4 deletions docs/source-2.0/spec/idl.rst
Original file line number Diff line number Diff line change
Expand Up @@ -117,13 +117,15 @@ string support defined in :rfc:`7405`.

.. productionlist:: smithy
ControlSection :*(`ControlStatement`)
ControlStatement :"$" `NodeObjectKey` [`SP`] ":" [`SP`] `NodeValue` `BR`
ControlStatement :"$" `ControlKey` [`SP`] ":" [`SP`] `NodeValue` `BR`
ControlKey :`QuotedText` / `Identifier`

.. rubric:: Metadata

.. productionlist:: smithy
MetadataSection :*(`MetadataStatement`)
MetadataStatement :%s"metadata" `SP` `NodeObjectKey` [`SP`] "=" [`SP`] `NodeValue` `BR`
MetadataStatement :%s"metadata" `SP` `MetadataKey` [`SP`] "=" [`SP`] `NodeValue` `BR`
MetadataKey :`QuotedText` / `Identifier`

.. rubric:: Node values

Expand All @@ -136,7 +138,7 @@ string support defined in :rfc:`7405`.
NodeArray :"[" [`WS`] *(`NodeValue` [`WS`]) "]"
NodeObject :"{" [`WS`] [`NodeObjectKvp` *(`WS` `NodeObjectKvp`)] [`WS`] "}"
NodeObjectKvp :`NodeObjectKey` [`WS`] ":" [`WS`] `NodeValue`
NodeObjectKey :`QuotedText` / `Identifier`
NodeObjectKey :`QuotedText` / `ByteString` / `Identifier`
Number :[`Minus`] `Int` [`Frac`] [`Exp`]
DecimalPoint :%x2E ; .
DigitOneToNine :%x31-39 ; 1-9
Expand All @@ -148,7 +150,8 @@ string support defined in :rfc:`7405`.
Plus :%x2B ; +
Zero :%x30 ; 0
NodeKeyword :%s"true" / %s"false" / %s"null"
NodeStringValue :`ShapeId` / `TextBlock` / `QuotedText`
NodeStringValue :`ShapeId` / `TextBlock` / `ByteTextBlock` / `QuotedText` / `ByteString`
ByteString :"b" `QuotedText`
QuotedText :DQUOTE *`QuotedChar` DQUOTE
QuotedChar :%x09 ; tab
:/ %x20-21 ; space - "!"
Expand All @@ -162,6 +165,7 @@ string support defined in :rfc:`7405`.
UnicodeEscape :%s"u" `Hex` `Hex` `Hex` `Hex`
Hex :DIGIT / %x41-46 / %x61-66
Escape :%x5C ; backslash
ByteTextBlock : "b" `TextBlock`
TextBlock :`ThreeDquotes` [`SP`] `NL` *`TextBlockContent` `ThreeDquotes`
TextBlockContent :`QuotedChar` / (1*2DQUOTE 1*`QuotedChar`)
ThreeDquotes :DQUOTE DQUOTE DQUOTE
Expand Down Expand Up @@ -2398,4 +2402,48 @@ example is interpreted as ``Foo\nBaz Bam``:
Baz \
Bam"""

Byte Strings
============

The byte string and byte text block productions are used to encode human
readable strings as if they were binary values. They are equivalent to a
standard string containing the base64 encoded representation of the UTF-8 bytes
which make up the string.

These values are parsed into the :ref:`semantic model <semantic-model>` in the
same manner as their standard counterparts.

The following values are all equivalent:

.. tab:: Smithy

.. code-block:: smithy

version: "2"
metadata foo = {
byteString: b"Hello\nWorld"
byteTextBlock: b"""
Hello
World"""
string: "SGVsbG8KV29ybGQ="
textBlock: """
SGVsbG8KV29ybGQ="""
}

.. tab:: JSON

.. code-block:: json

{
"smithy": "2",
"metadata": {
"foo": {
"byteString": "SGVsbG8KV29ybGQ=",
"byteTextBlock": "SGVsbG8KV29ybGQ=",
"string": "SGVsbG8KV29ybGQ=",
"textBlock": "SGVsbG8KV29ybGQ="
}
}
}

.. _CommonMark: https://spec.commonmark.org/
Original file line number Diff line number Diff line change
Expand Up @@ -175,6 +175,11 @@ public IdlToken next() {
return parseString();
case '/':
return parseComment();
case 'b':
if (parser.peek(1) == '"') {
return parseByteString();
}
return parseIdentifier();
case '-':
case '0':
case '1':
Expand Down Expand Up @@ -215,7 +220,6 @@ public IdlToken next() {
case 'Z':
case '_':
case 'a':
case 'b':
case 'c':
case 'd':
case 'e':
Expand Down Expand Up @@ -388,6 +392,35 @@ private IdlToken parseString() {
}
}

private IdlToken parseByteString() {
parser.expect('b');
parser.expect('"'); // skip first quote.

if (parser.peek() == '"') {
parser.skip(); // skip second quote.
if (parser.peek() == '"') { // A third consecutive quote is a BYTE_TEXT_BLOCK.
parser.skip();
return parseByteTextBlock();
} else {
// Empty byte string.
currentTokenEnd = parser.position();
currentTokenStringSlice = "";
return currentTokenType = IdlToken.BYTE_STRING;
}
}

try {
// Parse the contents of a byte string.
currentTokenStringSlice = parseQuotedTextAndTextBlock(false);
currentTokenEnd = parser.position();
return currentTokenType = IdlToken.BYTE_STRING;
} catch (RuntimeException e) {
currentTokenEnd = parser.position();
currentTokenError = "Error parsing byte string: " + e.getMessage();
return currentTokenType = IdlToken.ERROR;
}
}

private IdlToken parseTextBlock() {
try {
currentTokenStringSlice = parseQuotedTextAndTextBlock(true);
Expand All @@ -400,14 +433,26 @@ private IdlToken parseTextBlock() {
}
}

// Parses both quoted_text and text_block
private IdlToken parseByteTextBlock() {
try {
currentTokenStringSlice = parseQuotedTextAndTextBlock(true);
currentTokenEnd = parser.position();
return currentTokenType = IdlToken.BYTE_TEXT_BLOCK;
} catch (RuntimeException e) {
currentTokenEnd = parser.position();
currentTokenError = "Error parsing byte text block: " + e.getMessage();
return currentTokenType = IdlToken.ERROR;
}
}

// Parses quoted_text, byte_string, text_block, and byte_text_block body
private CharSequence parseQuotedTextAndTextBlock(boolean triple) {
int start = parser.position();

while (!parser.eof()) {
char next = parser.peek();
if (next == '"' && (!triple || (parser.peek(1) == '"' && parser.peek(2) == '"'))) {
// Found closing quotes of quoted_text and/or text_block
// Found closing quotes
break;
}
parser.skip();
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@
import software.amazon.smithy.model.validation.Severity;
import software.amazon.smithy.model.validation.ValidationEvent;
import software.amazon.smithy.utils.Pair;
import software.amazon.smithy.utils.StringUtils;

/**
* Parses Node values from a {@link IdlInternalTokenizer}.
Expand Down Expand Up @@ -53,25 +54,36 @@ static Node expectAndSkipNode(IdlModelLoader loader) {
static Node expectAndSkipNode(IdlModelLoader loader, SourceLocation location) {
IdlInternalTokenizer tokenizer = loader.getTokenizer();
IdlToken token = tokenizer.expect(IdlToken.STRING,
IdlToken.BYTE_STRING,
IdlToken.TEXT_BLOCK,
IdlToken.BYTE_TEXT_BLOCK,
IdlToken.NUMBER,
IdlToken.IDENTIFIER,
IdlToken.LBRACE,
IdlToken.LBRACKET);

switch (token) {
case STRING:
case TEXT_BLOCK:
Node result = new StringNode(tokenizer.getCurrentTokenStringSlice().toString(), location);
case TEXT_BLOCK: {
String value = tokenizer.getCurrentTokenStringSlice().toString();
tokenizer.next();
return result;
case IDENTIFIER:
return new StringNode(value, location);
}
case BYTE_STRING:
case BYTE_TEXT_BLOCK: {
String value = StringUtils.base64Encode(tokenizer.getCurrentTokenStringSlice());
tokenizer.next();
return new StringNode(value, location);
}
case IDENTIFIER: {
String shapeId = loader.internString(IdlShapeIdParser.expectAndSkipShapeId(tokenizer));
return createIdentifier(loader, shapeId, location);
case NUMBER:
Number number = tokenizer.getCurrentTokenNumberValue();
}
case NUMBER: {
Number value = tokenizer.getCurrentTokenNumberValue();
tokenizer.next();
return new NumberNode(number, location);
return new NumberNode(value, location);
}
case LBRACE:
return parseObjectNode(loader, location);
case LBRACKET:
Expand Down Expand Up @@ -191,7 +203,9 @@ private static ObjectNode parseObjectNode(IdlModelLoader loader, SourceLocation
ObjectNode.Builder builder = ObjectNode.builder().sourceLocation(location);

while (tokenizer.hasNext()) {
if (tokenizer.expect(IdlToken.RBRACE, IdlToken.STRING, IdlToken.IDENTIFIER) == IdlToken.RBRACE) {
IdlToken token =
tokenizer.expect(IdlToken.RBRACE, IdlToken.STRING, IdlToken.BYTE_STRING, IdlToken.IDENTIFIER);
if (token == IdlToken.RBRACE) {
break;
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -279,7 +279,7 @@ private static boolean isValidNormalCharacter(char c, boolean isTextBlock) {
|| c == '\n'
|| c == '\r'
|| (c >= 0x20 && c <= 0x21) // space - "!"
|| (isTextBlock && c == 0x22) // DQUOTE is allowed in text_block
|| (isTextBlock && c == 0x22) // DQUOTE is allowed in text_block and byte_text_block
|| (c >= 0x23 && c <= 0x5b) // "#" - "["
|| c >= 0x5d; // "]"+
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -39,7 +39,9 @@ public boolean isWhitespace() {
DOC_COMMENT("///"),
AT("@"),
STRING("\""),
BYTE_STRING("b\""),
TEXT_BLOCK("\"\"\""),
BYTE_TEXT_BLOCK("b\"\"\""),
COLON(":"),
WALRUS(":="),
IDENTIFIER(""),
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@
import software.amazon.smithy.model.node.StringNode;
import software.amazon.smithy.model.traits.DocumentationTrait;
import software.amazon.smithy.model.traits.Trait;
import software.amazon.smithy.utils.StringUtils;

final class IdlTraitParser {

Expand Down Expand Up @@ -158,29 +159,53 @@ private static Node parseTraitValueBody(IdlModelLoader loader, SourceLocation lo
tokenizer.expect(IdlToken.LBRACE,
IdlToken.LBRACKET,
IdlToken.TEXT_BLOCK,
IdlToken.BYTE_TEXT_BLOCK,
IdlToken.STRING,
IdlToken.BYTE_STRING,
IdlToken.NUMBER,
IdlToken.IDENTIFIER);

switch (tokenizer.getCurrentToken()) {
case LBRACE:
case LBRACKET:
case LBRACKET: {
Node result = IdlNodeParser.expectAndSkipNode(loader, location);
tokenizer.skipWsAndDocs();
return result;
case TEXT_BLOCK:
Node textBlockResult = new StringNode(tokenizer.getCurrentTokenStringSlice().toString(), location);
}
case TEXT_BLOCK: {
String value = tokenizer.getCurrentTokenStringSlice().toString();
tokenizer.next();
tokenizer.skipWsAndDocs();
return textBlockResult;
case NUMBER:
Number number = tokenizer.getCurrentTokenNumberValue();
return new StringNode(value, location);
}
case BYTE_TEXT_BLOCK: {
String value = StringUtils.base64Encode(tokenizer.getCurrentTokenStringSlice().toString());
tokenizer.next();
tokenizer.skipWsAndDocs();
return new StringNode(value, location);
}
case NUMBER: {
Number value = tokenizer.getCurrentTokenNumberValue();
tokenizer.next();
tokenizer.skipWsAndDocs();
return new NumberNode(number, location);
case STRING:
String stringValue = tokenizer.getCurrentTokenStringSlice().toString();
StringNode stringNode = new StringNode(stringValue, location);
return new NumberNode(value, location);
}
case STRING: {
String value = tokenizer.getCurrentTokenStringSlice().toString();
StringNode stringNode = new StringNode(value, location);
tokenizer.next();
tokenizer.skipWsAndDocs();
if (tokenizer.getCurrentToken() == IdlToken.COLON) {
tokenizer.next();
tokenizer.skipWsAndDocs();
return parseStructuredTrait(loader, stringNode);
} else {
return stringNode;
}
}
case BYTE_STRING: {
String value = StringUtils.base64Encode(tokenizer.getCurrentTokenStringSlice().toString());
StringNode stringNode = new StringNode(value, location);
tokenizer.next();
tokenizer.skipWsAndDocs();
if (tokenizer.getCurrentToken() == IdlToken.COLON) {
Expand All @@ -190,8 +215,9 @@ private static Node parseTraitValueBody(IdlModelLoader loader, SourceLocation lo
} else {
return stringNode;
}
}
case IDENTIFIER:
default:
default: {
// Handle: `foo`, `foo$bar`, `foo.bar#baz`, `foo.bar#baz$bam`, `foo: bam`
String identifier = loader.internString(IdlShapeIdParser.expectAndSkipShapeId(tokenizer));
tokenizer.skipWsAndDocs();
Expand All @@ -203,6 +229,7 @@ private static Node parseTraitValueBody(IdlModelLoader loader, SourceLocation lo
tokenizer.skipWsAndDocs();
return parseStructuredTrait(loader, new StringNode(identifier, location));
}
}
}
}

Expand All @@ -228,7 +255,7 @@ private static ObjectNode parseStructuredTrait(IdlModelLoader loader, StringNode
tokenizer.skipWsAndDocs();

while (tokenizer.getCurrentToken() != IdlToken.RPAREN) {
tokenizer.expect(IdlToken.IDENTIFIER, IdlToken.STRING);
tokenizer.expect(IdlToken.IDENTIFIER, IdlToken.STRING, IdlToken.BYTE_STRING);
String key = loader.internString(tokenizer.getCurrentTokenStringSlice());
StringNode keyNode = new StringNode(key, tokenizer.getCurrentTokenLocation());
tokenizer.next();
Expand Down
Loading
Loading