okikio
diff --git a/Diff for: ‎.gitpod.yml
-17 b/Diff for: ‎.gitpod.yml
-17
diff --git a/Diff for: ‎byte_methods.ts
+61-22 b/Diff for: ‎byte_methods.ts
+61-22
diff --git a/Diff for: ‎constants.ts
+108-3 b/Diff for: ‎constants.ts
+108-3
diff --git a/Diff for: ‎iterable.ts
+45-2 b/Diff for: ‎iterable.ts
+45-2
@@ -9,23 +9,6 @@ ports:
   - port: 3001
     onOpen: ignore
 
-github:
-  prebuilds:
-    # enable for the master/default branch (defaults to true)
-    master: true
-# enable for all branches in this repo (defaults to false)
-    branches: true
-# enable for pull requests coming from this repo (defaults to true)
-    pullRequests: true
-# enable for pull requests coming from forks (defaults to false)
-    pullRequestsFromForks: true
-# add a "Review in Gitpod" button as a comment to pull requests (defaults to true)
-    addComment: true
-# add a "Review in Gitpod" button to pull requests (defaults to false)
-    addBadge: false
-# add a label once the prebuild is ready to pull requests (defaults to false)
-    addLabel: prebuilt-in-gitpod
-
 # List the start up tasks. You can start them in parallel in multiple terminals. See https://www.gitpod.io/docs/44_config_start_tasks/
 tasks:  
   - init: >
 
@@ -1,3 +1,31 @@
+/**
+ * @module
+ * Provides utility functions for working with UTF-8 encoded characters in TypeScript.
+ * It includes methods for determining the byte length of UTF-8 characters, converting bytes to Unicode code points,
+ * extracting code points from buffers, and dealing with UTF-16 code units in strings.
+ * 
+ * @example
+ * ```ts
+ * import { getByteLength, bytesToCodePoint, bytesToCodePointFromBuffer, codePointAt } from 'jsr:@okikio/codepoint-iterator/byte_methods';
+ * 
+ * // Determine the byte length of a UTF-8 encoded character
+ * const leadByte = 0xF0; // Leading byte of a 4-byte UTF-8 character
+ * console.log(getByteLength(leadByte)); // Expected output: 4
+ * 
+ * // Convert a sequence of UTF-8 bytes to a Unicode code point
+ * const bytes = [0xF0, 0x9F, 0x92, 0xA9]; // UTF-8 encoded representation of the 💩 emoji
+ * console.log(bytesToCodePoint(4, bytes)); // Expected output: 128169 (code point for 💩)
+ * 
+ * // Extract a Unicode code point from a buffer
+ * const buffer = new Uint8Array([0xF0, 0x9F, 0x92, 0xA9]);
+ * console.log(bytesToCodePointFromBuffer(4, buffer, 0)); // Expected output: 128169
+ * 
+ * // Calculate the Unicode code point of a character in a string
+ * const str = '🌍';
+ * console.log(codePointAt(str, 0)); // Expected output: 127757 (code point for 🌍)
+ * ```
+ */
+
 import { 
   BITS_FOR_2B,
   BITS_FOR_3B,
@@ -18,12 +46,15 @@ import {
 /**
  * Calculates the number of bytes required to represent a single UTF-8 character.
  * 
- * UTF-8 can be represented by 1 to 4 bytes. 
+ * Determines the byte length of a UTF-8 encoded character based on its leading byte.
+ * This is crucial for correctly interpreting or encoding text in UTF-8, 
+ * where characters may vary in byte length from 1 to 4 bytes.
+ * 
  * This function given the byte value of the leading byte for the utf-8 character 
  * calculates how many more bytes are required to represent the utf-8 character,
  * this allows emoji's another other symbols to be represented in utf-8.
  *
- * @param byte - The lead byte of a UTF-8 character.
+ * @param byte The lead byte of a UTF-8 character.
  * @returns The number of bytes in a Uint8Array required to represent the UTF-8 character (the number of bytes ranges from 1 to 4).
  */
 export function getByteLength(byte: number): number {
@@ -37,8 +68,8 @@ export function getByteLength(byte: number): number {
 }
 
 /**
- * UTF-8 bytes to codepoint.
- * Calculates the Unicode code point from the bytes of a UTF-8 character.
+ * Converts a sequence of bytes into a Unicode code point. This function is a key part of 
+ * decoding UTF-8 encoded text, as it translates the raw bytes back into the characters they represent.
  * 
  * UTF-8 can be represented by 1 to 4 bytes. 
  * This function given the byte length of the utf-8 character 
@@ -48,10 +79,10 @@ export function getByteLength(byte: number): number {
  * Due to the dynamic length of utf-8 characters, 
  * its faster to just grab the bytes from the Uint8Array then calculate it's codepoint
  * than trying to decode said Uint8Array into a string and then converting 
- * said string into codepoints.
+ * said string into codepoints. 
  *
  * @param byteLength The number of bytes in a Uint8Array required to represent a single UTF-8 character (the number of bytes ranges from 1 to 4).
- * @param [bytes] - An array of length `byteLength` bytes that make up the UTF-8 character.
+ * @param bytes An array of length `byteLength` bytes that make up the UTF-8 character.
  * @returns The Unicode code point of the UTF-8 character.
  */
 export function bytesToCodePoint(byteLength: number, [byte1, byte2, byte3, byte4]: number[]): number {
@@ -79,16 +110,20 @@ export function bytesToCodePoint(byteLength: number, [byte1, byte2, byte3, byte4
       MASK_FOR_1B & byte4
 
     // 1-byte UTF-8 sequence (fallback)
+    // Default to 1-byte sequence if length is unexpected
     : byte1
   );
 }
 
-/**
- * Calculates the Unicode code point from a given buffer using indexed access.
- * @param byteLength - The number of bytes representing the code point.
- * @param buffer - The Uint8Array buffer containing the bytes.
- * @param head - The starting index of the code point in the buffer.
- * @returns The calculated Unicode code point.
+/** 
+ * Extracts a Unicode code point from a given buffer starting at a specified index. 
+ * This method is useful for parsing a stream or array of data where UTF-8 characters 
+ * are embedded within a larger set of binary data.
+ * 
+ * @param byteLength The byte length of the UTF-8 encoded character to extract.
+ * @param buffer The buffer (array or Uint8Array) containing the UTF-8 data.
+ * @param head The index in the buffer where the UTF-8 encoded character starts.
+ * @returns The Unicode code point extracted from the buffer.
  */
 export function bytesToCodePointFromBuffer<T extends number = number>(
   byteLength: number,
@@ -121,23 +156,26 @@ export function bytesToCodePointFromBuffer<T extends number = number>(
         MASK_FOR_1B & buffer[(head + 3) % bufferSize]
       );
     default:
+      // Default case for unexpected byteLength
       return buffer[head];
   }
 }
 
 /**
  * Extracts the Unicode code point and its size in UTF-16 code units from a string at a given position.
- * @param str - The input string.
- * @param index - The position in the string to extract the code point from.
- * @returns A number represent the code point in UTF-16 code units.
+ * 
+ * Calculates the Unicode code point of a character at a specific index in a string, 
+ * taking into account UTF-16 encoding which may represent characters using one or two code units (surrogates).
+ * This function is particularly useful for strings containing emoji or other characters 
+ * that may be represented as surrogate pairs in JavaScript.
+ * 
+ * @param str The string to extract the code point from.
+ * @param index The index of the character within the string.
+ * @returns The Unicode code point of the character, considering potential surrogate pairs.
  */
-export function codePointAt(str: string, index: number): number {
+export function codePointAt(str: string, index: number): number | undefined {
   const size = str.length;
-
-  // Account for out-of-bounds indices:
-  if (index < 0 || index >= size) {
-    return undefined;
-  }
+  if (index < 0 || index >= size) return undefined; // Guard clause for out-of-bounds index
 
   // Get the first code unit
   const first = str.charCodeAt(index);
@@ -174,9 +212,10 @@ export function codePointAt(str: string, index: number): number {
       // Use bitwise shift instead of multiplication and addition
       // Bitwise left shift (<< 10) is used here as an efficient way to multiply by 2^10 (or 2**10) (or 1024).
       // This is equivalent to the expression (first - 0xD800) * 0x400, since 0x400 in decimal is 1024.
-      return ((first - 0xD800) << 10) + (second - 0xDC00) + 0x10000;
+      return ((first - 0xD800) << 10) + (second - 0xDC00) + 0x10000; // Calculate and return surrogate pair code point
     }
   }
 
+  // Return the code unit if not a surrogate pair
   return first;
 }
@@ -1,24 +1,129 @@
+/**
+ * @module
+ * This module defines constants used for UTF-8 character encoding, 
+ * covering 1-byte to 5-byte sequences, including their leading bits 
+ * and masks for identifying and extracting the encoded character bits.
+ * 
+ * Defines constants for UTF-8 encoding operations, including lead bytes, masks, and bits required for different byte sequences.
+ * These constants are essential for encoding and decoding UTF-8 characters, from simple ASCII to complex symbols and emojis.
+ * 
+ * @example
+ * Imagine encoding the character '𝄞' (the G Clef symbol in music), which requires a 4-byte UTF-8 sequence.
+ * 
+ * 1. Identify the lead byte for a 4-byte sequence: `LEAD_FOR_4B` (1111 0000 in binary)
+ * 2. The mask for extracting significant bits from the first byte in a 4-byte sequence: `MASK_FOR_4B` (0000 0111 in binary)
+ * 3. To encode '𝄞', we calculate its bits beyond the ASCII range, requiring `BITS_FOR_4B` (18 bits for the highest bits 19 -> 21).
+ * 
+ * The process involves:
+ * - Using `LEAD_FOR_4B` to start the encoding sequence.
+ * - Applying `MASK_FOR_4B` to extract the first few significant bits of the character.
+ * - Shifting by `BITS_FOR_4B`, `BITS_FOR_3B`, and `BITS_FOR_2B` to position the remaining bits correctly.
+ * 
+ * For a 2-byte character like 'Ω' (Omega):
+ * - Start with `LEAD_FOR_2B` (1100 0000 in binary) to indicate a 2-byte sequence.
+ * - Use `MASK_FOR_2B` (0001 1111 in binary) for the first byte's significant bits.
+ * - The shift amount is `BITS_FOR_2B` (6 bits for positions 7 to 12).
+ * 
+ * A 1-byte ASCII character, such as 'A':
+ * - Simply uses `LEAD_FOR_1B` (1000 0000 in binary) and `MASK_FOR_1B` (0011 1111 in binary) to represent the character in UTF-8.
+ */
+
 // 1-byte encoding
+/** 
+ * Leading bits for a 1-byte sequence in UTF-8 encoding. 
+ * This indicates that the character is represented with a single byte.
+ * 
+ * @example `1000 0000`
+ */
 export const LEAD_FOR_1B = 0x80; // 1000 0000
+
+/** 
+ * Mask for extracting the significant bits from a 1-byte encoded character.
+ * 
+ * @example `0011 1111`
+ */
 export const MASK_FOR_1B = 0x3F; // 0011 1111
 
 // 2-byte encoding
-export const BITS_FOR_2B = 6; // bits 7 -> 12
+/** 
+ * Number of significant bits in a 2-byte sequence, used for characters beyond the ASCII range.
+ * 
+ * @example highest bits 7 -> 12
+ */
+export const BITS_FOR_2B = 6; // highest bits 7 -> 12
+
+/** 
+ * Leading bits for a 2-byte sequence, indicating the start of a 2-byte encoded character.
+ * 
+ * @example `1100 0000`
+ */
 export const LEAD_FOR_2B = 0xC0; // 1100 0000
+
+/** 
+ * Mask for extracting the significant bits from a 2-byte encoded character.
+ * 
+ * @example `0001 1111`
+ */
 export const MASK_FOR_2B = 0x1F; // 0001 1111
 
 // 3-byte encoding
-export const BITS_FOR_3B = 12; // bits 13 -> 18
+/** 
+ * Number of significant bits in a 3-byte sequence, typically used for characters in many non-Western alphabets.
+ * 
+ * @example highest bits 13 -> 18
+ */
+export const BITS_FOR_3B = 12; // highest bits 13 -> 18
+
+/** 
+ * Leading bits for a 3-byte sequence, indicating the start of a 3-byte encoded character.
+ * 
+ * @example `1110 0000`
+ */
 export const LEAD_FOR_3B = 0xE0; // 1110 0000
+
+/** 
+ * Mask for extracting the significant bits from a 3-byte encoded character.
+ * 
+ * @example `0000 1111`
+ */
 export const MASK_FOR_3B = 0x0F; // 0000 1111
 
 // 4-byte encoding
+/** 
+ * Number of significant bits in a 4-byte sequence, used for characters that are less common in daily use.
+ * 
+ * @example highest bits 19 -> 21
+ */
 export const BITS_FOR_4B = 18; // highest bits 19 -> 21
+
+/** 
+ * Leading bits for a 4-byte sequence, indicating the start of a 4-byte encoded character.
+ * 
+ * @example `1111 0000`
+ */
 export const LEAD_FOR_4B = 0xF0; // 1111 0000
+
+/** 
+ * Mask for extracting the significant bits from a 4-byte encoded character.
+ * 
+ * @example `0000 0111`
+ */
 export const MASK_FOR_4B = 0x07; // 0000 0111
 
 // 5-byte encoding
+/** 
+ * Leading bits for a 5-byte sequence. This is not officially used in UTF-8 encoding 
+ * and is included for completeness.
+ * 
+ * @example `1111 1000`
+ */
 export const LEAD_FOR_5B = 0xF8; // 1111 1000
 
-// The maximum number of bytes required to represent a UTF-8 character
+// UTF-8 encoding specifics
+/** 
+ * The maximum number of bytes required to represent any UTF-8 character. 
+ * This constant defines the upper limit for UTF-8 encoded character size.
+ * 
+ * @example 4
+ */
 export const UTF8_MAX_BYTE_LENGTH = 4;
@@ -1,11 +1,54 @@
 /**
- * Converts ReadableStream into async iterables
+ * @module
+ * Provides extensions for `ReadableStream` to enhance its usability in JavaScript environments.
+ * This module includes a function to convert a `ReadableStream` into an asynchronous iterable,
+ * allowing for easier consumption of streamed data in a more modern and convenient syntax.
+ * 
+ * This is particularly useful in environments or scenarios where `ReadableStream` does not natively support async iteration.
+ * 
+ * @example
+ * ```ts
+ * // Assuming you have a function that returns a ReadableStream, e.g., fetching some data
+ * async function fetchDataAsStream() {
+ *   const response = await fetch('https://example.com/data');
+ *   return response.body; // This is a ReadableStream
+ * }
+ * 
+ * // Utilize `getIterableStream` to consume the ReadableStream as an async iterable
+ * async function processStreamData() {
+ *   const stream = await fetchDataAsStream();
+ *   for await (const chunk of getIterableStream(stream)) {
+ *     console.log(chunk); // Process each chunk of data as it's read from the stream
+ *   }
+ * }
+ * 
+ * processStreamData();
+ * ```
+ * Consuming a `ReadableStream` of data (e.g., from a network response) using the `getIterableStream` function,
+ * enabling the use of an async for-loop to process the data in chunks as it's received.
+ */
+
+/**
+ * Converts a `ReadableStream` into an async iterable. This allows for easier consumption
+ * of stream data using asynchronous iteration, providing a more modern approach to handling streamed data.
  * 
  * Ideally this would already be built into ReadableStream, 
  * but it's currently not so this should help tide over til
  * js runtimes support async iterables for ReadableStreams.
  * 
- * @param stream ReadableStream to convert into async iterable
+ * @param stream The `ReadableStream` to be converted into an async iterable. This stream can contain any type of data, typically `Uint8Array` for binary data.
+ * @returns An `AsyncIterable` that yields data chunks from the `ReadableStream` as they are read.
+ * @template T The type of data chunks contained within the `ReadableStream`, defaulting to `Uint8Array`.
+ * 
+ * @example
+ * ```ts
+ * const responseStream = fetch('https://example.com/data').then(res => res.body);
+ * for await (const chunk of getIterableStream(await responseStream)) {
+ *   console.log(new TextDecoder().decode(chunk)); // Assuming the stream is text data
+ * }
+ * ```
+ * Converting a `ReadableStream` from a fetch request into an async iterable,
+ * and then asynchronously iterating over each chunk of data, decoding and logging the text content.
  */
 export async function* getIterableStream<T = Uint8Array>(stream: ReadableStream<T>): AsyncIterable<T> {
   const reader = stream.getReader();