smithy-lang · brandondahler · Nov 14, 2025 · Nov 17, 2025 · Nov 20, 2025
@@ -0,0 +1,7 @@
+{
+  "type": "feature",
+  "description": "Added byte strings and byte text blocks to the IDL to support encoding human readable text as blob values",
+  "pull_requests": [
+    "[#2853](https://github.com/smithy-lang/smithy/pull/2853)"
+  ]
+}
@@ -117,13 +117,15 @@ string support defined in :rfc:`7405`.
 
 .. productionlist:: smithy
     ControlSection   :*(`ControlStatement`)
-    ControlStatement :"$" `NodeObjectKey` [`SP`] ":" [`SP`] `NodeValue` `BR`
+    ControlStatement :"$" `ControlKey` [`SP`] ":" [`SP`] `NodeValue` `BR`
+    ControlKey       :`QuotedText` / `Identifier`
 
 .. rubric:: Metadata
 
 .. productionlist:: smithy
     MetadataSection   :*(`MetadataStatement`)
-    MetadataStatement :%s"metadata" `SP` `NodeObjectKey` [`SP`] "=" [`SP`] `NodeValue` `BR`
+    MetadataStatement :%s"metadata" `SP` `MetadataKey` [`SP`] "=" [`SP`] `NodeValue` `BR`
+    MetadataKey       :`QuotedText` / `Identifier`
 
 .. rubric:: Node values
 
@@ -136,7 +138,7 @@ string support defined in :rfc:`7405`.
     NodeArray           :"[" [`WS`] *(`NodeValue` [`WS`]) "]"
     NodeObject          :"{" [`WS`] [`NodeObjectKvp` *(`WS` `NodeObjectKvp`)] [`WS`] "}"
     NodeObjectKvp       :`NodeObjectKey` [`WS`] ":" [`WS`] `NodeValue`
-    NodeObjectKey       :`QuotedText` / `Identifier`
+    NodeObjectKey       :`QuotedText` / `ByteString` / `Identifier`
     Number              :[`Minus`] `Int` [`Frac`] [`Exp`]
     DecimalPoint        :%x2E ; .
     DigitOneToNine      :%x31-39 ; 1-9
@@ -148,7 +150,8 @@ string support defined in :rfc:`7405`.
     Plus                :%x2B ; +
     Zero                :%x30 ; 0
     NodeKeyword         :%s"true" / %s"false" / %s"null"
-    NodeStringValue     :`ShapeId` / `TextBlock` / `QuotedText`
+    NodeStringValue     :`ShapeId` / `TextBlock` / `ByteTextBlock` / `QuotedText` / `ByteString`
+    ByteString          :"b" `QuotedText`
     QuotedText          :DQUOTE *`QuotedChar` DQUOTE
     QuotedChar          :%x09        ; tab
                         :/ %x20-21     ; space - "!"
@@ -162,6 +165,7 @@ string support defined in :rfc:`7405`.
     UnicodeEscape       :%s"u" `Hex` `Hex` `Hex` `Hex`
     Hex                 :DIGIT / %x41-46 / %x61-66
     Escape              :%x5C ; backslash
+    ByteTextBlock       : "b" `TextBlock`
     TextBlock           :`ThreeDquotes` [`SP`] `NL` *`TextBlockContent` `ThreeDquotes`
     TextBlockContent    :`QuotedChar` / (1*2DQUOTE 1*`QuotedChar`)
     ThreeDquotes        :DQUOTE DQUOTE DQUOTE
@@ -2398,4 +2402,68 @@ example is interpreted as ``Foo\nBaz Bam``:
     Baz \
     Bam"""
 
+Byte Strings
+============
+
+The byte string and byte text block productions are used to encode binary
+values as human readable strings.  These offer an alternative to having to
+embed opaque base64 strings in places where binary values are required.
+
+Byte strings follow the same high-level parsing logic as standard strings.
+The escape sequences, line normalization, and incidental whitespace behaviors
+that exists in standard strings also work the same way in byte strings.
+Converting a valid standard string into a byte string is equivalent to encoding
+the original string into its UTF-8 bytes and then base64 encoding those bytes.
+
+The following values are all logically equivalent after parsing:
+
+.. tab:: Smithy
+
+    .. code-block:: smithy
+
+        version: "2"
+        metadata foo = {
+            byteString: b"Hello\nWorld"
+            byteTextBlock: b"""
+                Hello
+                World"""
+            string: "SGVsbG8KV29ybGQ="
+            textBlock: """
+                SGVsbG8KV29ybGQ="""
+        }
+
+.. tab:: JSON
+
+    .. code-block:: json
+
+        {
+            "smithy": "2",
+            "metadata": {
+                "foo": {
+                    "byteString": "SGVsbG8KV29ybGQ=",
+                    "byteTextBlock": "SGVsbG8KV29ybGQ=",
+                    "string": "SGVsbG8KV29ybGQ=",
+                    "textBlock": "SGVsbG8KV29ybGQ="
+                }
+            }
+        }
+
+In addition to the :ref:`string escape characters <string-escape-characters>`,
+byte strings support additional escape characters to make encoding arbitrary
+byte sequences possible:
+
+.. list-table::
+    :header-rows: 1
+    :widths: 20 30 50
+
+    * - Byte value
+      - Escape
+      - Meaning
+    * - ``00``
+      - ``\0``
+      - NULL byte
+    * - ``HH``
+      - ``\xHH``
+      - 2-digit hexadecimal byte value
+
 .. _CommonMark: https://spec.commonmark.org/
@@ -21,6 +21,7 @@ class DefaultTokenizer implements IdlTokenizer {
     private int currentTokenColumn = -1;
     private Number currentTokenNumber;
     private CharSequence currentTokenStringSlice;
+    private byte[] currentTokenBytes;
     private String currentTokenError;
 
     DefaultTokenizer(String filename, CharSequence model) {
@@ -97,6 +98,17 @@ public final CharSequence getCurrentTokenStringSlice() {
         }
     }
 
+    @Override
+    public final byte[] getCurrentTokenBytes() {
+        getCurrentToken();
+        if (currentTokenBytes == null) {
+            throw syntax("The current token must be a byte string but found: "
+                    + currentTokenType.getDebug(getCurrentTokenLexeme()), getCurrentTokenLocation());
+        }
+
+        return currentTokenBytes;
+    }
+
     @Override
     public final Number getCurrentTokenNumberValue() {
         getCurrentToken();
@@ -125,6 +137,7 @@ public final boolean hasNext() {
     @Override
     public IdlToken next() {
         currentTokenStringSlice = null;
+        currentTokenBytes = null;
         currentTokenNumber = null;
         currentTokenColumn = parser.column();
         currentTokenLine = parser.line();
@@ -175,6 +188,11 @@ public IdlToken next() {
                 return parseString();
             case '/':
                 return parseComment();
+            case 'b':
+                if (parser.peek(1) == '"') {
+                    return parseByteString();
+                }
+                return parseIdentifier();
             case '-':
             case '0':
             case '1':
@@ -215,7 +233,6 @@ public IdlToken next() {
             case 'Z':
             case '_':
             case 'a':
-            case 'b':
             case 'c':
             case 'd':
             case 'e':
@@ -388,6 +405,35 @@ private IdlToken parseString() {
         }
     }
 
+    private IdlToken parseByteString() {
+        parser.expect('b');
+        parser.expect('"'); // skip first quote.
+
+        if (parser.peek() == '"') {
+            parser.skip(); // skip second quote.
+            if (parser.peek() == '"') { // A third consecutive quote is a BYTE_TEXT_BLOCK.
+                parser.skip();
+                return parseByteTextBlock();
+            } else {
+                // Empty byte string.
+                currentTokenEnd = parser.position();
+                currentTokenBytes = new byte[0];
+                return currentTokenType = IdlToken.BYTE_STRING;
+            }
+        }
+
+        try {
+            // Parse the contents of a byte string.
+            currentTokenBytes = parseByteStringAndTextBlock(false);
+            currentTokenEnd = parser.position();
+            return currentTokenType = IdlToken.BYTE_STRING;
+        } catch (RuntimeException e) {
+            currentTokenEnd = parser.position();
+            currentTokenError = "Error parsing byte string: " + e.getMessage();
+            return currentTokenType = IdlToken.ERROR;
+        }
+    }
+
     private IdlToken parseTextBlock() {
         try {
             currentTokenStringSlice = parseQuotedTextAndTextBlock(true);
@@ -400,14 +446,26 @@ private IdlToken parseTextBlock() {
         }
     }
 
-    // Parses both quoted_text and text_block
+    private IdlToken parseByteTextBlock() {
+        try {
+            currentTokenBytes = parseByteStringAndTextBlock(true);
+            currentTokenEnd = parser.position();
+            return currentTokenType = IdlToken.BYTE_TEXT_BLOCK;
+        } catch (RuntimeException e) {
+            currentTokenEnd = parser.position();
+            currentTokenError = "Error parsing byte text block: " + e.getMessage();
+            return currentTokenType = IdlToken.ERROR;
+        }
+    }
+
+    // Parses quoted_text and text_block body
     private CharSequence parseQuotedTextAndTextBlock(boolean triple) {
         int start = parser.position();
 
         while (!parser.eof()) {
             char next = parser.peek();
             if (next == '"' && (!triple || (parser.peek(1) == '"' && parser.peek(2) == '"'))) {
-                // Found closing quotes of quoted_text and/or text_block
+                // Found closing quotes
                 break;
             }
             parser.skip();
@@ -427,4 +485,32 @@ private CharSequence parseQuotedTextAndTextBlock(boolean triple) {
 
         return IdlStringLexer.scanStringContents(result, triple);
     }
+
+    // Parses quoted_text and text_block body
+    private byte[] parseByteStringAndTextBlock(boolean triple) {
+        int start = parser.position();
+
+        while (!parser.eof()) {
+            char next = parser.peek();
+            if (next == '"' && (!triple || (parser.peek(1) == '"' && parser.peek(2) == '"'))) {
+                // Found closing quotes
+                break;
+            }
+            parser.skip();
+            if (next == '\\') {
+                parser.skip();
+            }
+        }
+
+        // Strip the ending '"'.
+        CharSequence result = parser.borrowSliceFrom(start);
+        parser.expect('"');
+
+        if (triple) {
+            parser.expect('"');
+            parser.expect('"');
+        }
+
+        return IdlStringLexer.scanByteStringContents(result, triple);
+    }
 }
@@ -4,6 +4,7 @@
  */
 package software.amazon.smithy.model.loader;
 
+import java.util.Base64;
 import java.util.function.Consumer;
 import software.amazon.smithy.model.SourceLocation;
 import software.amazon.smithy.model.node.ArrayNode;
@@ -53,25 +54,36 @@ static Node expectAndSkipNode(IdlModelLoader loader) {
     static Node expectAndSkipNode(IdlModelLoader loader, SourceLocation location) {
         IdlInternalTokenizer tokenizer = loader.getTokenizer();
         IdlToken token = tokenizer.expect(IdlToken.STRING,
+                IdlToken.BYTE_STRING,
                 IdlToken.TEXT_BLOCK,
+                IdlToken.BYTE_TEXT_BLOCK,
                 IdlToken.NUMBER,
                 IdlToken.IDENTIFIER,
                 IdlToken.LBRACE,
                 IdlToken.LBRACKET);
 
         switch (token) {
             case STRING:
-            case TEXT_BLOCK:
-                Node result = new StringNode(tokenizer.getCurrentTokenStringSlice().toString(), location);
+            case TEXT_BLOCK: {
+                String value = tokenizer.getCurrentTokenStringSlice().toString();
                 tokenizer.next();
-                return result;
-            case IDENTIFIER:
+                return new StringNode(value, location);
+            }
+            case BYTE_STRING:
+            case BYTE_TEXT_BLOCK: {
+                String value = Base64.getEncoder().encodeToString(tokenizer.getCurrentTokenBytes());
+                tokenizer.next();
+                return new StringNode(value, location);
+            }
+            case IDENTIFIER: {
                 String shapeId = loader.internString(IdlShapeIdParser.expectAndSkipShapeId(tokenizer));
                 return createIdentifier(loader, shapeId, location);
-            case NUMBER:
-                Number number = tokenizer.getCurrentTokenNumberValue();
+            }
+            case NUMBER: {
+                Number value = tokenizer.getCurrentTokenNumberValue();
                 tokenizer.next();
-                return new NumberNode(number, location);
+                return new NumberNode(value, location);
+            }
             case LBRACE:
                 return parseObjectNode(loader, location);
             case LBRACKET:
@@ -191,7 +203,9 @@ private static ObjectNode parseObjectNode(IdlModelLoader loader, SourceLocation
         ObjectNode.Builder builder = ObjectNode.builder().sourceLocation(location);
 
         while (tokenizer.hasNext()) {
-            if (tokenizer.expect(IdlToken.RBRACE, IdlToken.STRING, IdlToken.IDENTIFIER) == IdlToken.RBRACE) {
+            IdlToken token =
+                    tokenizer.expect(IdlToken.RBRACE, IdlToken.STRING, IdlToken.BYTE_STRING, IdlToken.IDENTIFIER);
+            if (token == IdlToken.RBRACE) {
                 break;
             }