Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 3 additions & 3 deletions emojiasm/bytecode.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@
Tier 2: Numeric + output (PRINT/PRINTLN but no INPUT; GPU with output buffer)
Tier 3: Full features (INPUT/strings require CPU fallback)

Max stack depth capped at 128 entries in device memory (KB #147).
Max stack depth capped at 256 entries in device memory (KB #147).
"""

from __future__ import annotations
Expand Down Expand Up @@ -88,7 +88,7 @@
_MAX_OPERAND = (1 << 24) - 1

# Maximum stack depth allowed on GPU (KB #147)
_GPU_MAX_STACK = 128
_GPU_MAX_STACK = 256


# ── Output dataclass ─────────────────────────────────────────────────────
Expand Down Expand Up @@ -299,7 +299,7 @@ def _analyze_max_stack_depth(program: Program) -> int:
"""Conservative max stack depth via instruction walk.

Walks each function linearly (ignoring branches — conservative because
we take the max across all instruction positions). Caps at 128 per
we take the max across all instruction positions). Caps at 256 per
KB #147 GPU memory budget.
"""
max_depth = 0
Expand Down
2 changes: 1 addition & 1 deletion emojiasm/gpu.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@

# ── Constants ────────────────────────────────────────────────────────────

DEFAULT_STACK_DEPTH = 128
DEFAULT_STACK_DEPTH = 256
DEFAULT_MAX_STEPS = 1_000_000
DEFAULT_THREADGROUP_SIZE = 256

Expand Down
4 changes: 2 additions & 2 deletions emojiasm/metal/vm.metal
Original file line number Diff line number Diff line change
Expand Up @@ -91,10 +91,10 @@ constant uint32_t STATUS_TIMEOUT = 3;
// ── Fixed-size limits ───────────────────────────────────────────────────

// Call stack depth (thread-local, small enough for registers per KB #146)
constant int CALL_STACK_DEPTH = 16;
constant int CALL_STACK_DEPTH = 32;

// Memory cells per thread (thread-local array)
constant int NUM_MEMORY_CELLS = 32;
constant int NUM_MEMORY_CELLS = 128;

// ── Output buffer entry (Tier 2 output capture) ────────────────────────

Expand Down
72 changes: 69 additions & 3 deletions emojiasm/transpiler.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@
from __future__ import annotations

import ast
from .opcodes import Op
from .opcodes import Op, EMOJI_TO_OP
from .parser import Program, Function, Instruction
from .disasm import disassemble

Expand All @@ -31,21 +31,87 @@ def __init__(self, message: str, lineno: int = 0):
super().__init__(f"TranspileError{loc}: {message}")


# Emoji pool for variable memory cells (50 characters)
# Emoji pool for variable memory cells (200+ characters)
EMOJI_POOL = list(
# Original 50 (backward compatible, do not reorder)
"🔢📊🎯⭐🌟💎🔥🌊🌈🍎"
"🍊🍋🍇🍓🍒🥝🥑🌽🥕🍄"
"🐱🐶🐸🦊🐻🐼🐨🐯🦁🐮"
"🐷🐵🐔🐧🦅🦆🦉🐝🐛🦋"
"🌻🌺🌸🌼🌹🍀🌿🌴🌵🎄"
# Animals (additional)
"🐭🐹🐰🐴🐗🐺🦄🐌🐞🐜"
"🐢🐍🐙🐠🐟🐬🐳🐋🐊🐅"
"🐆🐘🐪🐫🐃🐂🐄🐎🐖🐏"
"🐑🐐🐕🐩🐇🐁🐀🐿🦔🦇"
"🦎🦖🦕🦑🦞🦀🐡🦈🦍🦧"
"🦛🦏🦒🦘🦙🦌🦃🦚🦜🦢"
"🦩🦝🦨🦡🦫🦦🦥🐈🐓🦗"
# Food and drink
"🍐🍌🍉🍈🍑🥭🍍🥥🍅🍆"
"🥦🥬🥒🧄🧅🥔🍠🥐🥯🍞"
"🥖🥨🧀🥚🍳🧈🥞🧇🥓🥩"
"🍗🍖🍔🍟🍕🥪🥙🧆🥗🥘"
"🥫🍝🍜🍲🍛🍣🍱🥟🍤🍙"
"🍚🍘🍥🥠🥮🍢🍡🍧🍨🍦"
"🥧🧁🍰🎂🍮🍭🍬🍫🍿🍩"
"🍪🌰🥜🍯"
# Sports and activities
"⚽🏀🏈⚾🥎🎾🏐🏉🥏🎱"
"🏓🏸🏒🏑🥍🏏🥅🏹🎣🥊"
"🥋🎽🛹🛷🥌🎿🏂"
# Vehicles and transport
"🚗🚕🚙🚌🏎🚓🚑🚒🚐🛻"
"🚚🚛🚜🏍🛵🚲🛴🚔🚍🚘"
"🚖🚡🚠🚟🚃🚋🚞🚝🚄🚅"
"🚈🚂🚆🛶🚤🛥🚢"
)

# Emoji pool for function names
# Emoji pool for function names (50+ characters)
FUNC_EMOJI_POOL = list(
# Original entries (backward compatible, do not reorder)
"🔲🔳🟥🟦🟩🟨🟧🟪🟫⬛"
"⬜❤️💙💚💛🧡💜🤎🖤🤍"
# Colored circles and shapes
"🔴🟠🟡🟢🔵🟣🟤⚫⚪🔶"
"🔷🔸🔹🔺🔻💠🔘"
# Zodiac and symbols
"♈♉♊♋♌♍♎♏♐♑♒♓⛎"
)

def _validate_emoji_pools() -> None:
"""Check emoji pools for duplicates and cross-pool collisions.

Called at module load time. Raises RuntimeError if:
- FUNC_EMOJI_POOL has duplicate entries
- EMOJI_POOL has duplicate entries
- FUNC_EMOJI_POOL and EMOJI_POOL overlap

Note: Collisions with opcode emoji (EMOJI_TO_OP) are acceptable because
the parser distinguishes opcode context from STORE/LOAD memory cell names.
"""
func_dupes = len(FUNC_EMOJI_POOL) - len(set(FUNC_EMOJI_POOL))
if func_dupes:
raise RuntimeError(
f"FUNC_EMOJI_POOL has {func_dupes} duplicate entries"
)

var_dupes = len(EMOJI_POOL) - len(set(EMOJI_POOL))
if var_dupes:
raise RuntimeError(
f"EMOJI_POOL has {var_dupes} duplicate entries"
)

overlap = set(FUNC_EMOJI_POOL) & set(EMOJI_POOL)
if overlap:
raise RuntimeError(
f"FUNC_EMOJI_POOL and EMOJI_POOL overlap: {overlap}"
)


_validate_emoji_pools()


# Operator mappings
_BINOP_MAP = {
ast.Add: Op.ADD,
Expand Down
37 changes: 37 additions & 0 deletions specs/tier4-capacity/.progress.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,37 @@
# tier4-capacity

## Original Goal

Implement EmojiASM issue #30: Tier 4 VM and GPU kernel capacity limits. Raise the hard limits that constrain what transpiled Python programs can do. Expand GPU memory cells (32->128+), variable pool (50->200+), function pool (20->50+), GPU call stack depth (16->32+), GPU stack depth (128->256+). These are hard walls that cause silent failures or crashes.

## Completed Tasks
- [x] 1.1 Increase GPU memory cells from 32 to 128
- [x] 1.2 Increase GPU call stack depth from 16 to 32
- [x] 1.3 Increase GPU stack depth from 128 to 256
- [x] 1.4 Expand variable emoji pool from 50 to 200+
- [x] 1.5 Expand function emoji pool from 20 to 50+
- [x] 2.1 Update docstrings and comments for new limits
- [x] 2.2 Add collision validation utility
- [x] 3.1 Test expanded variable pool limits
- [x] 3.2 Test expanded function pool and GPU kernel limits

## Current Task
Awaiting next task

## Learnings

- Existing `EMOJI_POOL` has collisions with reserved opcodes: `🔢` (MOD) and `📊` (DIRECTIVE_DATA). These work because transpiler uses them as STORE/LOAD args (memory cell names), not as instruction opcodes in source text. Parser context distinguishes them.
- KB #147 confirms 256-entry stack feasible on Apple Silicon GPU (1KB device memory per thread).
- KB #185 estimates ~6.5KB/thread at current sizes. With increases, ~9.7KB/thread -- still under 10KB budget.
- `FUNC_EMOJI_POOL` has 21 entries (not 20 as stated in issue) because `❤️` uses variation selector making it count differently from the 10-per-line grouping.
- `stack_depth` is already parameterized as a kernel argument in `gpu.py` -- only the default needs changing. Memory cells and call stack are compile-time Metal constants.
- The `_split_kernel_source()` function in `gpu.py` patches scalar reference parameters to pointer dereferences for MLX compatibility -- no changes needed for our constant-only changes.
- `_build_memory_map()` in `bytecode.py` maps emoji cell names to integer indices 0..N-1 -- the 24-bit operand field supports up to 16M cells, so 128 is no concern.
- Two existing tests (`test_capped_at_128` in test_bytecode.py and `test_default_stack_depth` in test_gpu_kernel.py) hardcoded old constant values and needed updating along with the constant changes.
- Expanded EMOJI_POOL to 258 entries (animals, food, sports, vehicles). Only 1 opcode collision (`🔢` = MOD), which is expected and harmless. Python `list()` on emoji strings correctly splits multi-byte emoji into individual characters.
- Expanded FUNC_EMOJI_POOL to 51 entries using colored circles/shapes (🔴🟠🟡🟢🔵🟣🟤⚫⚪🔶🔷🔸🔹🔺🔻💠🔘) and zodiac symbols (♈♉♊♋♌♍♎♏♐♑♒♓⛎). All are single-codepoint emoji safe for list() splitting. No collisions with EMOJI_POOL or opcodes.
- Only bytecode.py had stale "128" references (module docstring and `_analyze_max_stack_depth` docstring). transpiler.py comments were already updated in tasks 1.4/1.5. vm.metal comments don't reference specific old numeric values.
- `_validate_emoji_pools()` runs at module load time, checking for duplicates within each pool and cross-pool overlap. Opcode collisions are intentionally allowed since parser context distinguishes them.

## Next
Task 1.6: POC Checkpoint -- verify all existing tests pass
134 changes: 134 additions & 0 deletions specs/tier4-capacity/design.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,134 @@
---
spec: tier4-capacity
phase: design
created: 2026-03-08
generated: auto
---

# Design: tier4-capacity

## Overview

Increase five hard-coded capacity limits across three layers: Metal kernel constants, Python bytecode compiler caps, and transpiler emoji pools. No architectural changes -- just constant adjustments and expanding string lists.

## Architecture

```
Transpiler (transpiler.py) Bytecode (bytecode.py) GPU Kernel (vm.metal)
EMOJI_POOL: 50 -> 200+ _GPU_MAX_STACK: 128 -> 256 NUM_MEMORY_CELLS: 32 -> 128
FUNC_EMOJI_POOL: 20 -> 50+ CALL_STACK_DEPTH: 16 -> 32
GPU Interface (gpu.py) stack_depth: 128 -> 256
DEFAULT_STACK_DEPTH: 128 -> 256
```

## Components

### Component A: Metal Kernel Constants (`emojiasm/metal/vm.metal`)
**Purpose**: Define per-thread resource sizes for GPU execution
**Changes**:
- L94: `CALL_STACK_DEPTH` 16 -> 32
- L97: `NUM_MEMORY_CELLS` 32 -> 128

**Impact on thread-local memory**:
- `call_stack[32]`: 32 * 4B = 128B (was 64B)
- `memory[128]`: 128 * 4B = 512B (was 128B)
- Net increase: ~448B per thread (~7KB total with stacks, well within budget per KB #185)

### Component B: Bytecode Compiler Cap (`emojiasm/bytecode.py`)
**Purpose**: Cap static stack analysis for GPU programs
**Changes**:
- L91: `_GPU_MAX_STACK` 128 -> 256

### Component C: GPU Interface Default (`emojiasm/gpu.py`)
**Purpose**: Set default per-instance stack size passed to kernel
**Changes**:
- L23: `DEFAULT_STACK_DEPTH` 128 -> 256

**Impact on device memory**:
- Stacks buffer: `n * 256 * 4B` = 1KB per thread (was 512B)
- For 10K threads: 10MB (was 5MB), well within GPU memory

### Component D: Variable Emoji Pool (`emojiasm/transpiler.py`)
**Purpose**: Map Python variable names to unique emoji memory cell identifiers
**Changes**:
- Expand `EMOJI_POOL` from 50 to 200+ characters
- Use emoji from multiple Unicode blocks: food, animals, objects, nature, sports, vehicles, flags

**Emoji selection criteria**:
1. Must NOT appear in `EMOJI_TO_OP` (opcodes.py)
2. Must NOT appear in directive constants (`DIRECTIVE_FUNC`, `DIRECTIVE_LABEL`, etc.)
3. Must NOT appear in `FUNC_EMOJI_POOL`
4. Should be single-codepoint or stable multi-codepoint sequences
5. Prefer visually distinct emoji

### Component E: Function Emoji Pool (`emojiasm/transpiler.py`)
**Purpose**: Map Python function names to unique emoji identifiers
**Changes**:
- Expand `FUNC_EMOJI_POOL` from 20 to 50+ characters
- Add more colored shapes, symbols, and distinct emoji

**Selection criteria**: Same collision avoidance as Component D, plus must not overlap with `EMOJI_POOL`.

## Data Flow

1. Python source -> Transpiler assigns variables from `EMOJI_POOL` (up to 200+)
2. Transpiler assigns functions from `FUNC_EMOJI_POOL` (up to 50+)
3. Program -> Bytecode compiler maps emoji cells to integer indices 0..N-1
4. Bytecode `_analyze_max_stack_depth()` caps at 256 (was 128)
5. `gpu_run()` allocates stacks buffer with `n * 256` entries
6. Metal kernel uses `memory[128]`, `call_stack[32]`, dynamic `stack_depth=256`

## Technical Decisions

| Decision | Options | Choice | Rationale |
|----------|---------|--------|-----------|
| Memory cells count | 64, 128, 256 | 128 | Matches stack depth; 512B thread-local fits register budget per KB #147 |
| Call stack depth | 24, 32, 64 | 32 | Supports fib(20); 128B minimal overhead |
| Stack depth | 192, 256, 512 | 256 | Per KB #147 max feasible; 1KB device memory per thread |
| Variable pool size | 150, 200, 300 | 200+ | Covers complex programs; more emoji available if needed |
| Function pool size | 40, 50, 80 | 50+ | Covers modular programs; 50 functions is generous |
| Configurability | Constants vs params | Constants (except stack_depth) | Memory cells and call stack are compile-time Metal constants; stack_depth already parameterized |

## File Structure

| File | Action | Purpose |
|------|--------|---------|
| `emojiasm/metal/vm.metal` | Modify | Update `CALL_STACK_DEPTH` and `NUM_MEMORY_CELLS` |
| `emojiasm/bytecode.py` | Modify | Update `_GPU_MAX_STACK` |
| `emojiasm/gpu.py` | Modify | Update `DEFAULT_STACK_DEPTH` |
| `emojiasm/transpiler.py` | Modify | Expand `EMOJI_POOL` and `FUNC_EMOJI_POOL` |
| `tests/test_gpu_kernel.py` | Modify | Add/update tests for new limits |
| `tests/test_transpiler.py` | Modify | Add tests for expanded pools |
| `tests/test_emojiasm.py` | No change | Existing tests should pass as-is |

## Error Handling

| Error | Handling | User Impact |
|-------|----------|-------------|
| Variable pool exceeded (>200) | `TranspileError` with count | Same as before, higher limit |
| Function pool exceeded (>50) | `TranspileError` with count | Same as before, higher limit |
| GPU memory cell OOB (>128) | `STATUS_ERROR` in kernel | Same error path, higher limit |
| GPU call stack overflow (>32) | `STATUS_ERROR` in kernel | Same error path, higher limit |
| GPU stack overflow (>256) | `STATUS_ERROR` in kernel | Same error path, higher limit |

## Existing Patterns to Follow

- `vm.metal` L92-97: `constant int` declarations for limits
- `transpiler.py` L35-47: Emoji pools as `list()` of concatenated string literals
- `transpiler.py` L139-141: Pool exhaustion raises `TranspileError`
- `bytecode.py` L91: `_GPU_MAX_STACK` caps analysis
- `gpu.py` L23: `DEFAULT_STACK_DEPTH` constant

## Per-Thread Memory Budget (Updated)

| Resource | Old Size | New Size |
|----------|----------|----------|
| Operand stack (device) | 512B | 1024B |
| Call stack (thread-local) | 64B | 128B |
| Memory cells (thread-local) | 128B | 512B |
| Arrays (thread-local) | 8KB | 8KB (unchanged) |
| PRNG state (thread-local) | 24B | 24B |
| **Total thread-local** | **~8.2KB** | **~8.7KB** |
| **Total with device stack** | **~8.7KB** | **~9.7KB** |

Within 10KB budget per NFR-2. At 9.7KB/thread, 64MB supports ~6,500 concurrent VMs.
Loading