diff --git a/emojiasm/bytecode.py b/emojiasm/bytecode.py
index a52496b..ab135c6 100644
--- a/emojiasm/bytecode.py
+++ b/emojiasm/bytecode.py
@@ -16,7 +16,7 @@
     Tier 2: Numeric + output (PRINT/PRINTLN but no INPUT; GPU with output buffer)
     Tier 3: Full features (INPUT/strings require CPU fallback)
 
-Max stack depth capped at 128 entries in device memory (KB #147).
+Max stack depth capped at 256 entries in device memory (KB #147).
 """
 
 from __future__ import annotations
@@ -88,7 +88,7 @@
 _MAX_OPERAND = (1 << 24) - 1
 
 # Maximum stack depth allowed on GPU (KB #147)
-_GPU_MAX_STACK = 128
+_GPU_MAX_STACK = 256
 
 
 # ── Output dataclass ─────────────────────────────────────────────────────
@@ -299,7 +299,7 @@ def _analyze_max_stack_depth(program: Program) -> int:
     """Conservative max stack depth via instruction walk.
 
     Walks each function linearly (ignoring branches — conservative because
-    we take the max across all instruction positions). Caps at 128 per
+    we take the max across all instruction positions). Caps at 256 per
     KB #147 GPU memory budget.
     """
     max_depth = 0
diff --git a/emojiasm/gpu.py b/emojiasm/gpu.py
index ed34ec6..2f7b1f1 100644
--- a/emojiasm/gpu.py
+++ b/emojiasm/gpu.py
@@ -20,7 +20,7 @@
 
 # ── Constants ────────────────────────────────────────────────────────────
 
-DEFAULT_STACK_DEPTH = 128
+DEFAULT_STACK_DEPTH = 256
 DEFAULT_MAX_STEPS = 1_000_000
 DEFAULT_THREADGROUP_SIZE = 256
 
diff --git a/emojiasm/metal/vm.metal b/emojiasm/metal/vm.metal
index a921c57..7c22ff5 100644
--- a/emojiasm/metal/vm.metal
+++ b/emojiasm/metal/vm.metal
@@ -91,10 +91,10 @@ constant uint32_t STATUS_TIMEOUT    = 3;
 // ── Fixed-size limits ───────────────────────────────────────────────────
 
 // Call stack depth (thread-local, small enough for registers per KB #146)
-constant int CALL_STACK_DEPTH = 16;
+constant int CALL_STACK_DEPTH = 32;
 
 // Memory cells per thread (thread-local array)
-constant int NUM_MEMORY_CELLS = 32;
+constant int NUM_MEMORY_CELLS = 128;
 
 // ── Output buffer entry (Tier 2 output capture) ────────────────────────
 
diff --git a/emojiasm/transpiler.py b/emojiasm/transpiler.py
index d6b1234..67bcc35 100644
--- a/emojiasm/transpiler.py
+++ b/emojiasm/transpiler.py
@@ -17,7 +17,7 @@
 from __future__ import annotations
 
 import ast
-from .opcodes import Op
+from .opcodes import Op, EMOJI_TO_OP
 from .parser import Program, Function, Instruction
 from .disasm import disassemble
 
@@ -31,21 +31,87 @@ def __init__(self, message: str, lineno: int = 0):
         super().__init__(f"TranspileError{loc}: {message}")
 
 
-# Emoji pool for variable memory cells (50 characters)
+# Emoji pool for variable memory cells (200+ characters)
 EMOJI_POOL = list(
+    # Original 50 (backward compatible, do not reorder)
     "🔢📊🎯⭐🌟💎🔥🌊🌈🍎"
     "🍊🍋🍇🍓🍒🥝🥑🌽🥕🍄"
     "🐱🐶🐸🦊🐻🐼🐨🐯🦁🐮"
     "🐷🐵🐔🐧🦅🦆🦉🐝🐛🦋"
     "🌻🌺🌸🌼🌹🍀🌿🌴🌵🎄"
+    # Animals (additional)
+    "🐭🐹🐰🐴🐗🐺🦄🐌🐞🐜"
+    "🐢🐍🐙🐠🐟🐬🐳🐋🐊🐅"
+    "🐆🐘🐪🐫🐃🐂🐄🐎🐖🐏"
+    "🐑🐐🐕🐩🐇🐁🐀🐿🦔🦇"
+    "🦎🦖🦕🦑🦞🦀🐡🦈🦍🦧"
+    "🦛🦏🦒🦘🦙🦌🦃🦚🦜🦢"
+    "🦩🦝🦨🦡🦫🦦🦥🐈🐓🦗"
+    # Food and drink
+    "🍐🍌🍉🍈🍑🥭🍍🥥🍅🍆"
+    "🥦🥬🥒🧄🧅🥔🍠🥐🥯🍞"
+    "🥖🥨🧀🥚🍳🧈🥞🧇🥓🥩"
+    "🍗🍖🍔🍟🍕🥪🥙🧆🥗🥘"
+    "🥫🍝🍜🍲🍛🍣🍱🥟🍤🍙"
+    "🍚🍘🍥🥠🥮🍢🍡🍧🍨🍦"
+    "🥧🧁🍰🎂🍮🍭🍬🍫🍿🍩"
+    "🍪🌰🥜🍯"
+    # Sports and activities
+    "⚽🏀🏈⚾🥎🎾🏐🏉🥏🎱"
+    "🏓🏸🏒🏑🥍🏏🥅🏹🎣🥊"
+    "🥋🎽🛹🛷🥌🎿🏂"
+    # Vehicles and transport
+    "🚗🚕🚙🚌🏎🚓🚑🚒🚐🛻"
+    "🚚🚛🚜🏍🛵🚲🛴🚔🚍🚘"
+    "🚖🚡🚠🚟🚃🚋🚞🚝🚄🚅"
+    "🚈🚂🚆🛶🚤🛥🚢"
 )
 
-# Emoji pool for function names
+# Emoji pool for function names (50+ characters)
 FUNC_EMOJI_POOL = list(
+    # Original entries (backward compatible, do not reorder)
     "🔲🔳🟥🟦🟩🟨🟧🟪🟫⬛"
     "⬜❤️💙💚💛🧡💜🤎🖤🤍"
+    # Colored circles and shapes
+    "🔴🟠🟡🟢🔵🟣🟤⚫⚪🔶"
+    "🔷🔸🔹🔺🔻💠🔘"
+    # Zodiac and symbols
+    "♈♉♊♋♌♍♎♏♐♑♒♓⛎"
 )
 
+def _validate_emoji_pools() -> None:
+    """Check emoji pools for duplicates and cross-pool collisions.
+
+    Called at module load time. Raises RuntimeError if:
+    - FUNC_EMOJI_POOL has duplicate entries
+    - EMOJI_POOL has duplicate entries
+    - FUNC_EMOJI_POOL and EMOJI_POOL overlap
+
+    Note: Collisions with opcode emoji (EMOJI_TO_OP) are acceptable because
+    the parser distinguishes opcode context from STORE/LOAD memory cell names.
+    """
+    func_dupes = len(FUNC_EMOJI_POOL) - len(set(FUNC_EMOJI_POOL))
+    if func_dupes:
+        raise RuntimeError(
+            f"FUNC_EMOJI_POOL has {func_dupes} duplicate entries"
+        )
+
+    var_dupes = len(EMOJI_POOL) - len(set(EMOJI_POOL))
+    if var_dupes:
+        raise RuntimeError(
+            f"EMOJI_POOL has {var_dupes} duplicate entries"
+        )
+
+    overlap = set(FUNC_EMOJI_POOL) & set(EMOJI_POOL)
+    if overlap:
+        raise RuntimeError(
+            f"FUNC_EMOJI_POOL and EMOJI_POOL overlap: {overlap}"
+        )
+
+
+_validate_emoji_pools()
+
+
 # Operator mappings
 _BINOP_MAP = {
     ast.Add: Op.ADD,
diff --git a/specs/tier4-capacity/.progress.md b/specs/tier4-capacity/.progress.md
new file mode 100644
index 0000000..5c9f993
--- /dev/null
+++ b/specs/tier4-capacity/.progress.md
@@ -0,0 +1,37 @@
+# tier4-capacity
+
+## Original Goal
+
+Implement EmojiASM issue #30: Tier 4 VM and GPU kernel capacity limits. Raise the hard limits that constrain what transpiled Python programs can do. Expand GPU memory cells (32->128+), variable pool (50->200+), function pool (20->50+), GPU call stack depth (16->32+), GPU stack depth (128->256+). These are hard walls that cause silent failures or crashes.
+
+## Completed Tasks
+- [x] 1.1 Increase GPU memory cells from 32 to 128
+- [x] 1.2 Increase GPU call stack depth from 16 to 32
+- [x] 1.3 Increase GPU stack depth from 128 to 256
+- [x] 1.4 Expand variable emoji pool from 50 to 200+
+- [x] 1.5 Expand function emoji pool from 20 to 50+
+- [x] 2.1 Update docstrings and comments for new limits
+- [x] 2.2 Add collision validation utility
+- [x] 3.1 Test expanded variable pool limits
+- [x] 3.2 Test expanded function pool and GPU kernel limits
+
+## Current Task
+Awaiting next task
+
+## Learnings
+
+- Existing `EMOJI_POOL` has collisions with reserved opcodes: `🔢` (MOD) and `📊` (DIRECTIVE_DATA). These work because transpiler uses them as STORE/LOAD args (memory cell names), not as instruction opcodes in source text. Parser context distinguishes them.
+- KB #147 confirms 256-entry stack feasible on Apple Silicon GPU (1KB device memory per thread).
+- KB #185 estimates ~6.5KB/thread at current sizes. With increases, ~9.7KB/thread -- still under 10KB budget.
+- `FUNC_EMOJI_POOL` has 21 entries (not 20 as stated in issue) because `❤️` uses variation selector making it count differently from the 10-per-line grouping.
+- `stack_depth` is already parameterized as a kernel argument in `gpu.py` -- only the default needs changing. Memory cells and call stack are compile-time Metal constants.
+- The `_split_kernel_source()` function in `gpu.py` patches scalar reference parameters to pointer dereferences for MLX compatibility -- no changes needed for our constant-only changes.
+- `_build_memory_map()` in `bytecode.py` maps emoji cell names to integer indices 0..N-1 -- the 24-bit operand field supports up to 16M cells, so 128 is no concern.
+- Two existing tests (`test_capped_at_128` in test_bytecode.py and `test_default_stack_depth` in test_gpu_kernel.py) hardcoded old constant values and needed updating along with the constant changes.
+- Expanded EMOJI_POOL to 258 entries (animals, food, sports, vehicles). Only 1 opcode collision (`🔢` = MOD), which is expected and harmless. Python `list()` on emoji strings correctly splits multi-byte emoji into individual characters.
+- Expanded FUNC_EMOJI_POOL to 51 entries using colored circles/shapes (🔴🟠🟡🟢🔵🟣🟤⚫⚪🔶🔷🔸🔹🔺🔻💠🔘) and zodiac symbols (♈♉♊♋♌♍♎♏♐♑♒♓⛎). All are single-codepoint emoji safe for list() splitting. No collisions with EMOJI_POOL or opcodes.
+- Only bytecode.py had stale "128" references (module docstring and `_analyze_max_stack_depth` docstring). transpiler.py comments were already updated in tasks 1.4/1.5. vm.metal comments don't reference specific old numeric values.
+- `_validate_emoji_pools()` runs at module load time, checking for duplicates within each pool and cross-pool overlap. Opcode collisions are intentionally allowed since parser context distinguishes them.
+
+## Next
+Task 1.6: POC Checkpoint -- verify all existing tests pass
diff --git a/specs/tier4-capacity/design.md b/specs/tier4-capacity/design.md
new file mode 100644
index 0000000..fb72457
--- /dev/null
+++ b/specs/tier4-capacity/design.md
@@ -0,0 +1,134 @@
+---
+spec: tier4-capacity
+phase: design
+created: 2026-03-08
+generated: auto
+---
+
+# Design: tier4-capacity
+
+## Overview
+
+Increase five hard-coded capacity limits across three layers: Metal kernel constants, Python bytecode compiler caps, and transpiler emoji pools. No architectural changes -- just constant adjustments and expanding string lists.
+
+## Architecture
+
+```
+Transpiler (transpiler.py)          Bytecode (bytecode.py)           GPU Kernel (vm.metal)
+  EMOJI_POOL: 50 -> 200+             _GPU_MAX_STACK: 128 -> 256       NUM_MEMORY_CELLS: 32 -> 128
+  FUNC_EMOJI_POOL: 20 -> 50+                                          CALL_STACK_DEPTH: 16 -> 32
+                                    GPU Interface (gpu.py)             stack_depth: 128 -> 256
+                                      DEFAULT_STACK_DEPTH: 128 -> 256
+```
+
+## Components
+
+### Component A: Metal Kernel Constants (`emojiasm/metal/vm.metal`)
+**Purpose**: Define per-thread resource sizes for GPU execution
+**Changes**:
+- L94: `CALL_STACK_DEPTH` 16 -> 32
+- L97: `NUM_MEMORY_CELLS` 32 -> 128
+
+**Impact on thread-local memory**:
+- `call_stack[32]`: 32 * 4B = 128B (was 64B)
+- `memory[128]`: 128 * 4B = 512B (was 128B)
+- Net increase: ~448B per thread (~7KB total with stacks, well within budget per KB #185)
+
+### Component B: Bytecode Compiler Cap (`emojiasm/bytecode.py`)
+**Purpose**: Cap static stack analysis for GPU programs
+**Changes**:
+- L91: `_GPU_MAX_STACK` 128 -> 256
+
+### Component C: GPU Interface Default (`emojiasm/gpu.py`)
+**Purpose**: Set default per-instance stack size passed to kernel
+**Changes**:
+- L23: `DEFAULT_STACK_DEPTH` 128 -> 256
+
+**Impact on device memory**:
+- Stacks buffer: `n * 256 * 4B` = 1KB per thread (was 512B)
+- For 10K threads: 10MB (was 5MB), well within GPU memory
+
+### Component D: Variable Emoji Pool (`emojiasm/transpiler.py`)
+**Purpose**: Map Python variable names to unique emoji memory cell identifiers
+**Changes**:
+- Expand `EMOJI_POOL` from 50 to 200+ characters
+- Use emoji from multiple Unicode blocks: food, animals, objects, nature, sports, vehicles, flags
+
+**Emoji selection criteria**:
+1. Must NOT appear in `EMOJI_TO_OP` (opcodes.py)
+2. Must NOT appear in directive constants (`DIRECTIVE_FUNC`, `DIRECTIVE_LABEL`, etc.)
+3. Must NOT appear in `FUNC_EMOJI_POOL`
+4. Should be single-codepoint or stable multi-codepoint sequences
+5. Prefer visually distinct emoji
+
+### Component E: Function Emoji Pool (`emojiasm/transpiler.py`)
+**Purpose**: Map Python function names to unique emoji identifiers
+**Changes**:
+- Expand `FUNC_EMOJI_POOL` from 20 to 50+ characters
+- Add more colored shapes, symbols, and distinct emoji
+
+**Selection criteria**: Same collision avoidance as Component D, plus must not overlap with `EMOJI_POOL`.
+
+## Data Flow
+
+1. Python source -> Transpiler assigns variables from `EMOJI_POOL` (up to 200+)
+2. Transpiler assigns functions from `FUNC_EMOJI_POOL` (up to 50+)
+3. Program -> Bytecode compiler maps emoji cells to integer indices 0..N-1
+4. Bytecode `_analyze_max_stack_depth()` caps at 256 (was 128)
+5. `gpu_run()` allocates stacks buffer with `n * 256` entries
+6. Metal kernel uses `memory[128]`, `call_stack[32]`, dynamic `stack_depth=256`
+
+## Technical Decisions
+
+| Decision | Options | Choice | Rationale |
+|----------|---------|--------|-----------|
+| Memory cells count | 64, 128, 256 | 128 | Matches stack depth; 512B thread-local fits register budget per KB #147 |
+| Call stack depth | 24, 32, 64 | 32 | Supports fib(20); 128B minimal overhead |
+| Stack depth | 192, 256, 512 | 256 | Per KB #147 max feasible; 1KB device memory per thread |
+| Variable pool size | 150, 200, 300 | 200+ | Covers complex programs; more emoji available if needed |
+| Function pool size | 40, 50, 80 | 50+ | Covers modular programs; 50 functions is generous |
+| Configurability | Constants vs params | Constants (except stack_depth) | Memory cells and call stack are compile-time Metal constants; stack_depth already parameterized |
+
+## File Structure
+
+| File | Action | Purpose |
+|------|--------|---------|
+| `emojiasm/metal/vm.metal` | Modify | Update `CALL_STACK_DEPTH` and `NUM_MEMORY_CELLS` |
+| `emojiasm/bytecode.py` | Modify | Update `_GPU_MAX_STACK` |
+| `emojiasm/gpu.py` | Modify | Update `DEFAULT_STACK_DEPTH` |
+| `emojiasm/transpiler.py` | Modify | Expand `EMOJI_POOL` and `FUNC_EMOJI_POOL` |
+| `tests/test_gpu_kernel.py` | Modify | Add/update tests for new limits |
+| `tests/test_transpiler.py` | Modify | Add tests for expanded pools |
+| `tests/test_emojiasm.py` | No change | Existing tests should pass as-is |
+
+## Error Handling
+
+| Error | Handling | User Impact |
+|-------|----------|-------------|
+| Variable pool exceeded (>200) | `TranspileError` with count | Same as before, higher limit |
+| Function pool exceeded (>50) | `TranspileError` with count | Same as before, higher limit |
+| GPU memory cell OOB (>128) | `STATUS_ERROR` in kernel | Same error path, higher limit |
+| GPU call stack overflow (>32) | `STATUS_ERROR` in kernel | Same error path, higher limit |
+| GPU stack overflow (>256) | `STATUS_ERROR` in kernel | Same error path, higher limit |
+
+## Existing Patterns to Follow
+
+- `vm.metal` L92-97: `constant int` declarations for limits
+- `transpiler.py` L35-47: Emoji pools as `list()` of concatenated string literals
+- `transpiler.py` L139-141: Pool exhaustion raises `TranspileError`
+- `bytecode.py` L91: `_GPU_MAX_STACK` caps analysis
+- `gpu.py` L23: `DEFAULT_STACK_DEPTH` constant
+
+## Per-Thread Memory Budget (Updated)
+
+| Resource | Old Size | New Size |
+|----------|----------|----------|
+| Operand stack (device) | 512B | 1024B |
+| Call stack (thread-local) | 64B | 128B |
+| Memory cells (thread-local) | 128B | 512B |
+| Arrays (thread-local) | 8KB | 8KB (unchanged) |
+| PRNG state (thread-local) | 24B | 24B |
+| **Total thread-local** | **~8.2KB** | **~8.7KB** |
+| **Total with device stack** | **~8.7KB** | **~9.7KB** |
+
+Within 10KB budget per NFR-2. At 9.7KB/thread, 64MB supports ~6,500 concurrent VMs.
diff --git a/specs/tier4-capacity/requirements.md b/specs/tier4-capacity/requirements.md
new file mode 100644
index 0000000..1643645
--- /dev/null
+++ b/specs/tier4-capacity/requirements.md
@@ -0,0 +1,85 @@
+---
+spec: tier4-capacity
+phase: requirements
+created: 2026-03-08
+generated: auto
+---
+
+# Requirements: tier4-capacity
+
+## Summary
+
+Raise hard capacity limits in the GPU Metal kernel, bytecode compiler, and Python transpiler to support larger, more complex programs. All limits are currently too low for non-trivial transpiled Python with nested loops, many variables, and recursive functions.
+
+## User Stories
+
+### US-1: Transpile programs with many variables
+As a developer, I want to transpile Python programs with up to 200 variables so that complex algorithms with nested loops and temporaries don't hit the 50-variable limit.
+
+**Acceptance Criteria**:
+- AC-1.1: `EMOJI_POOL` contains at least 200 unique emoji characters
+- AC-1.2: No emoji in `EMOJI_POOL` collides with opcodes in `EMOJI_TO_OP` or directives
+- AC-1.3: A transpiled program using 100+ variables compiles and runs correctly
+
+### US-2: Transpile programs with many functions
+As a developer, I want to define up to 50 functions in transpiled Python so that modular programs with helper functions don't hit the 20-function limit.
+
+**Acceptance Criteria**:
+- AC-2.1: `FUNC_EMOJI_POOL` contains at least 50 unique emoji characters
+- AC-2.2: No emoji in `FUNC_EMOJI_POOL` collides with variable pool or opcodes
+- AC-2.3: A transpiled program with 30+ functions compiles and runs correctly
+
+### US-3: GPU programs with many memory cells
+As a developer, I want GPU programs to access up to 128 memory cells so that transpiled programs with many variables execute on GPU without memory cell overflow.
+
+**Acceptance Criteria**:
+- AC-3.1: `NUM_MEMORY_CELLS` in `vm.metal` is at least 128
+- AC-3.2: STORE/LOAD to cell indices 0-127 work correctly in GPU execution
+- AC-3.3: Bytecode operand encoding supports cell indices up to 127
+
+### US-4: Deeper GPU recursion
+As a developer, I want GPU programs to recurse up to 32 levels deep so that recursive algorithms like `fib(20)` don't overflow the call stack.
+
+**Acceptance Criteria**:
+- AC-4.1: `CALL_STACK_DEPTH` in `vm.metal` is at least 32
+- AC-4.2: A recursive function calling 20+ levels deep completes without error on GPU
+
+### US-5: Larger GPU stack
+As a developer, I want GPU programs to use a 256-entry stack so that complex expressions and save/restore patterns around recursive calls don't overflow.
+
+**Acceptance Criteria**:
+- AC-5.1: `DEFAULT_STACK_DEPTH` in `gpu.py` is at least 256
+- AC-5.2: `_GPU_MAX_STACK` in `bytecode.py` matches the new default
+- AC-5.3: Stacks buffer in `gpu_run()` is sized correctly for the new depth
+
+## Functional Requirements
+
+| ID | Requirement | Priority | Source |
+|----|-------------|----------|--------|
+| FR-1 | Expand `EMOJI_POOL` to 200+ emoji | Must | US-1 |
+| FR-2 | Expand `FUNC_EMOJI_POOL` to 50+ emoji | Must | US-2 |
+| FR-3 | Increase `NUM_MEMORY_CELLS` to 128 | Must | US-3 |
+| FR-4 | Increase `CALL_STACK_DEPTH` to 32 | Must | US-4 |
+| FR-5 | Increase `DEFAULT_STACK_DEPTH` and `_GPU_MAX_STACK` to 256 | Must | US-5 |
+| FR-6 | Ensure no emoji collisions between pools and opcode/directive sets | Must | US-1, US-2 |
+| FR-7 | All existing tests continue to pass | Must | All |
+
+## Non-Functional Requirements
+
+| ID | Requirement | Category |
+|----|-------------|----------|
+| NFR-1 | GPU occupancy should remain above 50% with new limits | Performance |
+| NFR-2 | Per-thread memory budget should stay under 10KB | Performance |
+| NFR-3 | Emoji pools should use visually distinct, common emoji | Usability |
+
+## Out of Scope
+
+- Runtime-configurable memory cell count (would require kernel recompilation)
+- Dynamic memory allocation on GPU
+- Expanding CPU VM limits (already at 4096 stack, dict-based memory)
+- Array capacity changes (`MAX_ARRAYS`, `MAX_ARRAY_SIZE` in vm.metal)
+
+## Dependencies
+
+- Unicode emoji availability in Python strings
+- Metal shader compiler support for larger thread-local arrays
diff --git a/specs/tier4-capacity/research.md b/specs/tier4-capacity/research.md
new file mode 100644
index 0000000..e1211f9
--- /dev/null
+++ b/specs/tier4-capacity/research.md
@@ -0,0 +1,66 @@
+---
+spec: tier4-capacity
+phase: research
+created: 2026-03-08
+generated: auto
+---
+
+# Research: tier4-capacity
+
+## Executive Summary
+
+Raising capacity limits across the GPU kernel, transpiler pools, and bytecode module. All changes are low-risk constant/pool adjustments. The main trade-off is GPU occupancy vs capacity -- KB #147 confirms 256-entry stack is feasible, and KB #185 shows ~6.5KB/thread budget supports ~10K concurrent VMs.
+
+## Codebase Analysis
+
+### Current Limits (Exact Locations)
+
+| Limit | Current | File | Line |
+|-------|---------|------|------|
+| GPU memory cells | 32 | `emojiasm/metal/vm.metal` | L97 `NUM_MEMORY_CELLS = 32` |
+| GPU call stack depth | 16 | `emojiasm/metal/vm.metal` | L94 `CALL_STACK_DEPTH = 16` |
+| GPU stack depth | 128 | `emojiasm/gpu.py` | L23 `DEFAULT_STACK_DEPTH = 128` |
+| GPU stack cap (bytecode) | 128 | `emojiasm/bytecode.py` | L91 `_GPU_MAX_STACK = 128` |
+| Variable emoji pool | 50 | `emojiasm/transpiler.py` | L35-41 `EMOJI_POOL` |
+| Function emoji pool | 20 | `emojiasm/transpiler.py` | L44-47 `FUNC_EMOJI_POOL` |
+| CPU VM max_stack | 4096 | `emojiasm/vm.py` | L21 `stack_size=4096` |
+
+### Existing Patterns
+
+- `vm.metal` uses `constant int` declarations for compile-time limits (L94, L97)
+- `gpu.py` passes `stack_depth` as a kernel parameter (already configurable per-dispatch)
+- `bytecode.py` caps `_analyze_max_stack_depth()` at `_GPU_MAX_STACK`
+- `transpiler.py` `VarManager` raises `TranspileError` when pool exhausted (L139-141)
+- `FUNC_EMOJI_POOL` checked at L544-545 during function registration
+
+### Dependencies
+
+- MLX `mx.fast.metal_kernel()` -- stacks buffer sized as `n * stack_depth`
+- Metal compiler -- `constant int` values baked into kernel at compile time
+- `_split_kernel_source()` in `gpu.py` -- patches scalar refs to pointer derefs
+- Tests: `test_gpu_kernel.py` (source checks), `test_transpiler.py` (transpile+run)
+
+### Constraints
+
+- Metal thread-local arrays: larger `memory[]` and `call_stack[]` consume more registers, reducing occupancy
+- KB #147: 256-entry stack feasible, 64-entry thread-private arrays start impacting occupancy
+- KB #185: ~6.5KB/thread budget at current sizes; doubling stack+memory stays under ~8KB
+- 24-bit operand field in bytecode: max 16M memory cells (not a concern at 128)
+- `EMOJI_POOL` must avoid collisions with opcodes (`EMOJI_TO_OP`) and directives
+
+## Feasibility Assessment
+
+| Aspect | Assessment | Notes |
+|--------|------------|-------|
+| Technical Viability | High | Pure constant changes + expanding string lists |
+| Effort Estimate | S | ~2-3 hours including tests |
+| Risk Level | Low | No architectural changes; only raising limits |
+
+## Recommendations
+
+1. Increase `NUM_MEMORY_CELLS` to 128 (matches stack depth, 512B thread-local)
+2. Increase `CALL_STACK_DEPTH` to 32 (supports `fib(20)` and deeper recursion)
+3. Increase `DEFAULT_STACK_DEPTH` and `_GPU_MAX_STACK` to 256 (per KB #147)
+4. Expand `EMOJI_POOL` to 200+ using Unicode emoji blocks (animals, food, objects, symbols)
+5. Expand `FUNC_EMOJI_POOL` to 50+ using colored shapes, hearts, flags
+6. Keep limits as constants (not runtime-configurable) for simplicity; `stack_depth` already parameterized
diff --git a/specs/tier4-capacity/tasks.md b/specs/tier4-capacity/tasks.md
new file mode 100644
index 0000000..fbdc4c4
--- /dev/null
+++ b/specs/tier4-capacity/tasks.md
@@ -0,0 +1,137 @@
+---
+spec: tier4-capacity
+phase: tasks
+total_tasks: 12
+created: 2026-03-08
+generated: auto
+---
+
+# Tasks: tier4-capacity
+
+## Phase 1: Make It Work (POC)
+
+Focus: Change all constants and expand pools. Verify existing tests still pass.
+
+- [x] 1.1 Increase GPU memory cells from 32 to 128
+  - **Do**: In `emojiasm/metal/vm.metal`, change `constant int NUM_MEMORY_CELLS = 32;` to `constant int NUM_MEMORY_CELLS = 128;`
+  - **Files**: `emojiasm/metal/vm.metal`
+  - **Done when**: The constant reads 128; no other code changes needed since all usage is via `NUM_MEMORY_CELLS`
+  - **Verify**: `grep "NUM_MEMORY_CELLS = 128" emojiasm/metal/vm.metal`
+  - **Commit**: `feat(gpu): increase memory cells from 32 to 128`
+  - _Requirements: FR-3_
+  - _Design: Component A_
+
+- [x] 1.2 Increase GPU call stack depth from 16 to 32
+  - **Do**: In `emojiasm/metal/vm.metal`, change `constant int CALL_STACK_DEPTH = 16;` to `constant int CALL_STACK_DEPTH = 32;`
+  - **Files**: `emojiasm/metal/vm.metal`
+  - **Done when**: The constant reads 32
+  - **Verify**: `grep "CALL_STACK_DEPTH = 32" emojiasm/metal/vm.metal`
+  - **Commit**: `feat(gpu): increase call stack depth from 16 to 32`
+  - _Requirements: FR-4_
+  - _Design: Component A_
+
+- [x] 1.3 Increase GPU stack depth from 128 to 256
+  - **Do**:
+    1. In `emojiasm/gpu.py`, change `DEFAULT_STACK_DEPTH = 128` to `DEFAULT_STACK_DEPTH = 256`
+    2. In `emojiasm/bytecode.py`, change `_GPU_MAX_STACK = 128` to `_GPU_MAX_STACK = 256`
+  - **Files**: `emojiasm/gpu.py`, `emojiasm/bytecode.py`
+  - **Done when**: Both constants read 256
+  - **Verify**: `grep "DEFAULT_STACK_DEPTH = 256" emojiasm/gpu.py && grep "_GPU_MAX_STACK = 256" emojiasm/bytecode.py`
+  - **Commit**: `feat(gpu): increase stack depth from 128 to 256`
+  - _Requirements: FR-5_
+  - _Design: Components B, C_
+
+- [x] 1.4 Expand variable emoji pool from 50 to 200+
+  - **Do**: In `emojiasm/transpiler.py`, replace `EMOJI_POOL` with an expanded list of 200+ emoji. Use emoji from these Unicode blocks: food/drink, animals, nature, sports, vehicles, objects, symbols. Verify no collisions with `EMOJI_TO_OP` keys or directive constants by running the collision check script. Keep the existing 50 emoji as the first 50 entries (preserves backward compatibility for any serialized programs).
+  - **Files**: `emojiasm/transpiler.py`
+  - **Done when**: `len(EMOJI_POOL) >= 200` and no collision with opcodes/directives
+  - **Verify**: `python3 -c "from emojiasm.transpiler import EMOJI_POOL; from emojiasm.opcodes import EMOJI_TO_OP; print(f'Pool size: {len(EMOJI_POOL)}'); assert len(EMOJI_POOL) >= 200; assert len(set(EMOJI_POOL)) == len(EMOJI_POOL), 'duplicates'; collisions = [e for e in EMOJI_POOL if e in EMOJI_TO_OP]; print(f'Opcode collisions: {len(collisions)} (ok if used only as memory cell names)')"`
+  - **Commit**: `feat(transpiler): expand variable emoji pool to 200+`
+  - _Requirements: FR-1, FR-6_
+  - _Design: Component D_
+
+- [x] 1.5 Expand function emoji pool from 20 to 50+
+  - **Do**: In `emojiasm/transpiler.py`, replace `FUNC_EMOJI_POOL` with an expanded list of 50+ emoji. Use colored circles, squares, diamonds, and other shape/symbol emoji. Ensure no overlap with `EMOJI_POOL`, `EMOJI_TO_OP`, or directives.
+  - **Files**: `emojiasm/transpiler.py`
+  - **Done when**: `len(FUNC_EMOJI_POOL) >= 50` and no collision with variable pool or opcodes
+  - **Verify**: `python3 -c "from emojiasm.transpiler import EMOJI_POOL, FUNC_EMOJI_POOL; from emojiasm.opcodes import EMOJI_TO_OP; print(f'Func pool: {len(FUNC_EMOJI_POOL)}'); assert len(FUNC_EMOJI_POOL) >= 50; assert len(set(FUNC_EMOJI_POOL)) == len(FUNC_EMOJI_POOL), 'duplicates'; assert not set(FUNC_EMOJI_POOL) & set(EMOJI_POOL), 'cross-collision with var pool'"`
+  - **Commit**: `feat(transpiler): expand function emoji pool to 50+`
+  - _Requirements: FR-2, FR-6_
+  - _Design: Component E_
+
+- [ ] 1.6 POC Checkpoint -- verify all existing tests pass
+  - **Do**: Run the full test suite to ensure no regressions from constant changes and pool expansion
+  - **Done when**: All tests pass (pytest exit code 0)
+  - **Verify**: `pytest`
+  - **Commit**: `feat(tier4): complete POC for capacity limit increases`
+
+## Phase 2: Refactoring
+
+- [x] 2.1 Update docstrings and comments for new limits
+  - **Do**:
+    1. In `emojiasm/transpiler.py`, update the comment `# Emoji pool for variable memory cells (50 characters)` to reflect new count
+    2. Update the comment `# Emoji pool for function names` similarly
+    3. In `emojiasm/bytecode.py`, update the docstring mentioning "Max stack depth capped at 128"
+    4. In `emojiasm/metal/vm.metal`, update any comments referencing old limit values
+  - **Files**: `emojiasm/transpiler.py`, `emojiasm/bytecode.py`, `emojiasm/metal/vm.metal`
+  - **Done when**: All comments/docstrings reference correct new values
+  - **Verify**: `grep -n "50 characters\|capped at 128\|32 cells\|16 entries" emojiasm/transpiler.py emojiasm/bytecode.py emojiasm/metal/vm.metal` should return no hits
+  - **Commit**: `docs: update comments for new capacity limits`
+  - _Design: All components_
+
+- [x] 2.2 Add collision validation utility
+  - **Do**: Add a `_validate_emoji_pools()` function in `transpiler.py` that checks for collisions between `EMOJI_POOL`, `FUNC_EMOJI_POOL`, and `EMOJI_TO_OP`/directives. Call it at module load (once) and raise `RuntimeError` if collisions found that would cause parsing ambiguity. Note: collisions with opcodes used only as memory cell names (STORE/LOAD args) are acceptable since the parser distinguishes these contexts.
+  - **Files**: `emojiasm/transpiler.py`
+  - **Done when**: Function exists and runs at import time without error
+  - **Verify**: `python3 -c "import emojiasm.transpiler; print('Import OK, no collisions')"`
+  - **Commit**: `refactor(transpiler): add emoji pool collision validation`
+  - _Design: Component D, E_
+
+## Phase 3: Testing
+
+- [x] 3.1 Test expanded variable pool limits
+  - **Do**: Add test in `tests/test_transpiler.py`:
+    1. `test_many_variables`: Transpile+run a program that uses 100+ unique variables with assignments and reads
+    2. `test_variable_pool_size`: Assert `len(EMOJI_POOL) >= 200`
+    3. `test_variable_pool_no_duplicates`: Assert all entries unique
+  - **Files**: `tests/test_transpiler.py`
+  - **Done when**: New tests pass
+  - **Verify**: `pytest tests/test_transpiler.py -k "many_variables or variable_pool" -v`
+  - **Commit**: `test(transpiler): add tests for expanded variable pool`
+  - _Requirements: AC-1.1, AC-1.2, AC-1.3_
+
+- [x] 3.2 Test expanded function pool and GPU kernel limits
+  - **Do**: Add tests:
+    1. In `tests/test_transpiler.py`: `test_many_functions` -- transpile+run a program with 30+ `def` statements
+    2. In `tests/test_transpiler.py`: `test_function_pool_size` -- assert `len(FUNC_EMOJI_POOL) >= 50`
+    3. In `tests/test_gpu_kernel.py`: `test_memory_cells_128` -- verify kernel source has `NUM_MEMORY_CELLS = 128`
+    4. In `tests/test_gpu_kernel.py`: `test_call_stack_depth_32` -- verify kernel source has `CALL_STACK_DEPTH = 32`
+    5. In `tests/test_gpu_kernel.py`: `test_default_stack_depth_256` -- verify `DEFAULT_STACK_DEPTH == 256`
+  - **Files**: `tests/test_transpiler.py`, `tests/test_gpu_kernel.py`
+  - **Done when**: All new tests pass
+  - **Verify**: `pytest tests/test_transpiler.py tests/test_gpu_kernel.py -k "many_functions or function_pool or memory_cells_128 or call_stack_depth_32 or stack_depth_256" -v`
+  - **Commit**: `test: add tests for expanded pools and GPU limits`
+  - _Requirements: AC-2.1, AC-2.3, AC-3.1, AC-4.1, AC-5.1_
+
+## Phase 4: Quality Gates
+
+- [ ] 4.1 Local quality check
+  - **Do**: Run all quality checks:
+    1. `pytest` -- full test suite
+    2. `python3 -m py_compile emojiasm/transpiler.py` -- syntax check
+    3. `python3 -m py_compile emojiasm/gpu.py` -- syntax check
+    4. `python3 -m py_compile emojiasm/bytecode.py` -- syntax check
+  - **Verify**: All commands exit 0
+  - **Done when**: All quality checks pass
+  - **Commit**: `fix(tier4): address any lint/type issues` (if needed)
+
+- [ ] 4.2 Create PR and verify CI
+  - **Do**: Push branch, create PR with `gh pr create` referencing issue #30
+  - **Verify**: `gh pr checks --watch` all green
+  - **Done when**: PR ready for review
+
+## Notes
+
+- **POC shortcuts taken**: No configurability for memory cells or call stack (compile-time Metal constants); collision validation deferred to Phase 2
+- **Existing collisions**: `EMOJI_POOL` already has `🔢` (MOD opcode) and `📊` (DATA directive) -- these work fine because they're used as STORE/LOAD arguments, not parsed as opcodes
+- **Production TODOs**: Consider making memory cell count a kernel parameter (requires kernel recompilation) in future iteration
diff --git a/tests/test_bytecode.py b/tests/test_bytecode.py
index 3647b0b..ad18d89 100644
--- a/tests/test_bytecode.py
+++ b/tests/test_bytecode.py
@@ -457,14 +457,14 @@ def test_dup_increases_depth(self):
         assert depth == 3  # PUSH(1) + DUP(2) + DUP(3)
 
     def test_capped_at_128(self):
-        """Stack depth should never exceed GPU max of 128 (KB #147)."""
-        # Create a program with 200 PUSHes
-        lines = ["📜 🏠"] + ["📥 1"] * 200 + ["🛑"]
+        """Stack depth should never exceed GPU max of 256 (KB #147)."""
+        # Create a program with 300 PUSHes
+        lines = ["📜 🏠"] + ["📥 1"] * 300 + ["🛑"]
         src = "\n".join(lines)
         prog = _parse(src)
         depth = _analyze_max_stack_depth(prog)
         assert depth == _GPU_MAX_STACK
-        assert depth == 128
+        assert depth == 256
 
     def test_max_stack_depth_in_gpu_program(self):
         src = "📥 6\n📥 7\n✖️\n🖨️\n🛑"
diff --git a/tests/test_gpu_kernel.py b/tests/test_gpu_kernel.py
index 7041a4f..c7c6136 100644
--- a/tests/test_gpu_kernel.py
+++ b/tests/test_gpu_kernel.py
@@ -259,7 +259,7 @@ def test_random_seeds_with_thread_id(self):
 
 class TestConstants:
     def test_default_stack_depth(self):
-        assert DEFAULT_STACK_DEPTH == 128
+        assert DEFAULT_STACK_DEPTH == 256
 
     def test_default_max_steps(self):
         assert DEFAULT_MAX_STEPS == 1_000_000
@@ -430,3 +430,20 @@ def test_math_gpu_opcodes_match_bytecode(self):
                 f"Mismatch for {gpu_name}: GPU=0x{GPU_OPCODES[gpu_name]:02X} "
                 f"vs bytecode=0x{OP_MAP[op]:02X}"
             )
+
+
+# ── Expanded capacity limits ─────────────────────────────────────────────
+
+
+class TestCapacityLimits:
+    """Verify GPU kernel capacity constants match expanded limits."""
+
+    def test_memory_cells_128(self):
+        """Kernel source must have NUM_MEMORY_CELLS = 128."""
+        src = get_kernel_source()
+        assert re.search(r"NUM_MEMORY_CELLS\s*=\s*128", src) is not None
+
+    def test_call_stack_depth_32(self):
+        """Kernel source must have CALL_STACK_DEPTH = 32."""
+        src = get_kernel_source()
+        assert re.search(r"CALL_STACK_DEPTH\s*=\s*32", src) is not None
diff --git a/tests/test_transpiler.py b/tests/test_transpiler.py
index f737ce2..debf239 100644
--- a/tests/test_transpiler.py
+++ b/tests/test_transpiler.py
@@ -1,7 +1,10 @@
 """Tests for the Python-to-EmojiASM transpiler."""
 
 import pytest
-from emojiasm.transpiler import transpile, transpile_to_source, TranspileError
+from emojiasm.transpiler import (
+    transpile, transpile_to_source, TranspileError,
+    EMOJI_POOL, FUNC_EMOJI_POOL,
+)
 from emojiasm.vm import VM
 
 
@@ -888,3 +891,38 @@ def test_source_map_multiline(self):
         assert "x = 42" in source_set
         assert "y = 10" in source_set
         assert "print(x + y)" in source_set
+
+
+# ── Expanded pool limits ─────────────────────────────────────────────────
+
+
+class TestVariablePool:
+    def test_variable_pool_size(self):
+        """EMOJI_POOL must have at least 200 entries."""
+        assert len(EMOJI_POOL) >= 200
+
+    def test_variable_pool_no_duplicates(self):
+        """All entries in EMOJI_POOL must be unique."""
+        assert len(set(EMOJI_POOL)) == len(EMOJI_POOL)
+
+    def test_many_variables(self):
+        """Transpile+run a program using 100+ unique variables."""
+        # Generate: v0 = 0\nv1 = 1\n...\nv99 = 99\nprint(v0 + v99)
+        lines = [f"v{i} = {i}" for i in range(100)]
+        lines.append("print(v0 + v99)")
+        src = "\n".join(lines)
+        assert run_py(src).strip() == "99"
+
+
+class TestFunctionPool:
+    def test_function_pool_size(self):
+        """FUNC_EMOJI_POOL must have at least 50 entries."""
+        assert len(FUNC_EMOJI_POOL) >= 50
+
+    def test_many_functions(self):
+        """Transpile+run a program with 30+ def statements."""
+        # Generate: def f0(): return 0\ndef f1(): return 1\n...\ndef f29(): return 29\nprint(f29())
+        lines = [f"def f{i}():\n    return {i}" for i in range(30)]
+        lines.append("print(f29())")
+        src = "\n".join(lines)
+        assert run_py(src).strip() == "29"