flagos-ai · xin2an · Dec 31, 2025 · Dec 31, 2025 · Jan 4, 2026 · Jan 7, 2026
diff --git a/vllm_fl/dispatch/README.md b/vllm_fl/dispatch/README.md
@@ -227,14 +227,11 @@ The system automatically detects hardware and loads the corresponding configurat
 
 | Platform | Config File | Auto-Detection |
 |----------|-------------|----------------|
-| Ascend NPU | `config/ascend.yaml` | `torch.npu.is_available()` |
-| NVIDIA GPU | `config/cuda.yaml` | `torch.cuda.is_available()` |
+| Ascend NPU | `config/ascend.yaml` | `platform.vendor_name == 'ascend'` |
+| NVIDIA GPU | `config/nvidia.yaml` | `platform.vendor_name == 'nvidia'` |
+| METAX GPU | `config/metax.yaml` | `platform.vendor_name == 'metax'` |
 
-You can force a specific platform using `VLLM_FL_PLATFORM` environment variable:
-```bash
-export VLLM_FL_PLATFORM=ascend  # Force Ascend config
-export VLLM_FL_PLATFORM=cuda    # Force CUDA config
-```
+Platform detection is automatic based on `current_platform.vendor_name`.
 
 ### User-Specified Configuration File (YAML)
 
@@ -314,7 +311,6 @@ Environment variables can override specific items from platform config. If not s
 |----------|---------|-------------|
 | `VLLM_FL_PREFER_ENABLED` | `true` | Global switch. Set `false` to disable all dispatch features |
 | `VLLM_FL_CONFIG` | (none) | Path to YAML config file (complete override) |
-| `VLLM_FL_PLATFORM` | (auto) | Force platform: `ascend`, `cuda` |
 
 #### Backend Selection
 
@@ -388,9 +384,6 @@ export VLLM_FL_PER_OP="rms_norm=vendor|flagos|reference"
 # Use completely custom config file
 export VLLM_FL_CONFIG=/path/to/my_config.yaml
 
-# Force specific platform
-export VLLM_FL_PLATFORM=ascend
-
 # Enable debug logging
 export VLLM_FL_LOG_LEVEL=DEBUG
 ```

diff --git a/vllm_fl/dispatch/__init__.py b/vllm_fl/dispatch/__init__.py
@@ -96,6 +96,7 @@
 )
 from .manager import OpManager, get_default_manager, reset_default_manager
 from .ops import VLLMFLBackendBase
+from .method_dispatch import dispatch_method
 from .discovery import (
     discover_plugins,
     get_discovered_plugins,
@@ -106,6 +107,16 @@
 from .logger_manager import get_logger, set_log_level
 
 
+def call_method_op(op_name: str, instance, *args, **kwargs):
+    """
+    Call an operator as a bound method on *instance*.
+
+    The resolved backend function receives *instance* as ``self``,
+    allowing it to freely access instance attributes.
+    """
+    return get_default_manager().call_as_method(op_name, instance, *args, **kwargs)
+
+
 def call_op(op_name: str, *args, **kwargs):
     """
     Convenience function to call an operator through the default manager.
@@ -163,6 +174,9 @@ def resolve_op(op_name: str):
     "reset_default_manager",
     # Backend base
     "VLLMFLBackendBase",
+    # Method dispatch
+    "dispatch_method",
+    "call_method_op",
     # Plugin discovery
     "discover_plugins",
     "get_discovered_plugins",

diff --git a/vllm_fl/dispatch/backends/flaggems/flaggems.py b/vllm_fl/dispatch/backends/flaggems/flaggems.py
@@ -8,7 +8,7 @@
 
 from __future__ import annotations
 
-from typing import Optional, Union
+from typing import Optional
 
 import torch
 
@@ -42,82 +42,6 @@ def is_available(self) -> bool:
 
     # ==================== Operator Implementations ====================
 
-    def silu_and_mul(self, obj, x: torch.Tensor) -> torch.Tensor:
-        """
-        SiLU activation followed by element-wise multiplication.
-
-        Args:
-            obj: The calling obj (for interface consistency)
-            x: Input tensor of shape [..., 2*d]
-
-        Returns:
-            Output tensor of shape [..., d]
-        """
-        from .impl.activation import silu_and_mul_flaggems
-
-        return silu_and_mul_flaggems(obj, x)
-
-    def rms_norm(
-        self,
-        obj,
-        x: torch.Tensor,
-        residual: Optional[torch.Tensor] = None,
-    ) -> Union[torch.Tensor, tuple[torch.Tensor, torch.Tensor]]:
-        """
-        RMS normalization.
-
-        Args:
-            obj: The calling obj (e.g., RMSNorm layer)
-            x: Input tensor
-            residual: Optional residual tensor
-
-        Returns:
-            Normalized tensor, or tuple of (normalized, residual) if residual is provided
-        """
-        from .impl.normalization import rms_norm_flaggems
-
-        return rms_norm_flaggems(obj, x, residual)
-
-    def rotary_embedding(
-        self,
-        obj,
-        query: torch.Tensor,
-        key: torch.Tensor,
-        cos: torch.Tensor,
-        sin: torch.Tensor,
-        position_ids: torch.Tensor,
-        rotary_interleaved: bool = False,
-        inplace: bool = True,
-    ) -> tuple[torch.Tensor, torch.Tensor]:
-        """
-        Apply rotary position embedding.
-
-        Args:
-            obj: The calling obj (for interface consistency)
-            query: Query tensor
-            key: Key tensor
-            cos: Cosine cache
-            sin: Sine cache
-            position_ids: Position indices
-            rotary_interleaved: Whether to use interleaved rotary
-            inplace: Whether to modify tensors in-place
-
-        Returns:
-            Tuple of (embedded_query, embedded_key)
-        """
-        from .impl.rotary import rotary_embedding_flaggems
-
-        return rotary_embedding_flaggems(
-            obj,
-            query,
-            key,
-            cos,
-            sin,
-            position_ids,
-            rotary_interleaved=rotary_interleaved,
-            inplace=inplace,
-        )
-
     def attention_backend(self, use_mla: bool = False) -> str:
         """
         Get the attention backend class path for FlagGems.

diff --git a/vllm_fl/dispatch/backends/flaggems/impl/activation.py b/vllm_fl/dispatch/backends/flaggems/impl/activation.py
@@ -9,12 +9,12 @@
 import torch
 
 
-def silu_and_mul_flaggems(obj, x: torch.Tensor) -> torch.Tensor:
+def silu_and_mul_flaggems(self, x: torch.Tensor) -> torch.Tensor:
     """
     SiLU activation followed by element-wise multiplication using FlagGems.
 
     Args:
-        obj: The calling obj (for interface consistency)
+        self: The calling instance (for interface consistency)
         x: Input tensor of shape [..., 2*d]
 
     Returns:

diff --git a/vllm_fl/dispatch/backends/flaggems/impl/normalization.py b/vllm_fl/dispatch/backends/flaggems/impl/normalization.py
@@ -12,15 +12,15 @@
 
 
 def rms_norm_flaggems(
-    obj,
+    self,
     x: torch.Tensor,
     residual: Optional[torch.Tensor] = None,
 ) -> Union[torch.Tensor, tuple[torch.Tensor, torch.Tensor]]:
     """
     RMS normalization using FlagGems.
 
     Args:
-        obj: The calling obj (e.g., RMSNorm layer)
+        self: The calling instance (e.g., RMSNorm layer)
         x: Input tensor
         residual: Optional residual tensor
 
@@ -29,8 +29,8 @@ def rms_norm_flaggems(
     """
     from flag_gems.modules.normalization import gems_rms_forward
 
-    # Get weight and epsilon from obj
-    weight = obj.weight
-    epsilon = obj.variance_epsilon
+    # Get weight and epsilon from self
+    weight = self.weight
+    epsilon = self.variance_epsilon
 
     return gems_rms_forward(x, residual, weight, epsilon)
diff --git a/vllm_fl/dispatch/backends/flaggems/impl/rotary.py b/vllm_fl/dispatch/backends/flaggems/impl/rotary.py
@@ -10,7 +10,7 @@
 
 
 def rotary_embedding_flaggems(
-    obj,
+    self,
     query: torch.Tensor,
     key: torch.Tensor,
     cos: torch.Tensor,
@@ -23,7 +23,7 @@ def rotary_embedding_flaggems(
     Apply rotary position embedding using FlagGems.
 
     Args:
-        obj: The calling obj (for interface consistency)
+        self: The calling instance (for interface consistency)
         query: Query tensor
         key: Key tensor
         cos: Cosine cache

diff --git a/vllm_fl/dispatch/backends/flaggems/register_ops.py b/vllm_fl/dispatch/backends/flaggems/register_ops.py
@@ -34,6 +34,9 @@ def register_builtins(registry) -> None:
         registry: Registry to register into
     """
     from .flaggems import FlagGemsBackend
+    from .impl.activation import silu_and_mul_flaggems
+    from .impl.normalization import rms_norm_flaggems
+    from .impl.rotary import rotary_embedding_flaggems
 
     backend = FlagGemsBackend()
     is_avail = backend.is_available
@@ -44,7 +47,7 @@ def register_builtins(registry) -> None:
             op_name="silu_and_mul",
             impl_id="default.flagos",
             kind=BackendImplKind.DEFAULT,
-            fn=_bind_is_available(backend.silu_and_mul, is_avail),
+            fn=_bind_is_available(silu_and_mul_flaggems, is_avail),
             vendor=None,
             priority=BackendPriority.DEFAULT,
         ),
@@ -53,7 +56,7 @@ def register_builtins(registry) -> None:
             op_name="rms_norm",
             impl_id="default.flagos",
             kind=BackendImplKind.DEFAULT,
-            fn=_bind_is_available(backend.rms_norm, is_avail),
+            fn=_bind_is_available(rms_norm_flaggems, is_avail),
             vendor=None,
             priority=BackendPriority.DEFAULT,
         ),
@@ -62,11 +65,11 @@ def register_builtins(registry) -> None:
             op_name="rotary_embedding",
             impl_id="default.flagos",
             kind=BackendImplKind.DEFAULT,
-            fn=_bind_is_available(backend.rotary_embedding, is_avail),
+            fn=_bind_is_available(rotary_embedding_flaggems, is_avail),
             vendor=None,
             priority=BackendPriority.DEFAULT,
         ),
-        # Attention Backend
+        # Attention Backend (no instance binding needed)
         OpImpl(
             op_name="attention_backend",
             impl_id="default.flagos",

diff --git a/vllm_fl/dispatch/backends/reference/impl/activation.py b/vllm_fl/dispatch/backends/reference/impl/activation.py
@@ -10,12 +10,12 @@
 import torch.nn.functional as F
 
 
-def silu_and_mul_torch(obj, x: torch.Tensor) -> torch.Tensor:
+def silu_and_mul_torch(self, x: torch.Tensor) -> torch.Tensor:
     """
     SiLU activation followed by element-wise multiplication using PyTorch.
 
     Args:
-        obj: The calling obj (for interface consistency)
+        self: The calling instance (for interface consistency)
         x: Input tensor of shape [..., 2*d]
 
     Returns:

diff --git a/vllm_fl/dispatch/backends/reference/impl/normalization.py b/vllm_fl/dispatch/backends/reference/impl/normalization.py
@@ -12,24 +12,24 @@
 
 
 def rms_norm_torch(
-    obj,
+    self,
     x: torch.Tensor,
     residual: Optional[torch.Tensor] = None,
 ) -> Union[torch.Tensor, tuple[torch.Tensor, torch.Tensor]]:
     """
     RMS normalization using PyTorch.
 
     Args:
-        obj: The calling obj (e.g., RMSNorm layer)
+        self: The calling instance (e.g., RMSNorm layer)
         x: Input tensor
         residual: Optional residual tensor
 
     Returns:
         Normalized tensor, or tuple of (normalized, residual) if residual is provided
     """
-    # Get weight and epsilon from obj
-    weight = obj.weight
-    epsilon = obj.variance_epsilon
+    # Get weight and epsilon from self
+    weight = self.weight
+    epsilon = self.variance_epsilon
 
     if residual is not None:
         x = x + residual

diff --git a/vllm_fl/dispatch/backends/reference/impl/rotary.py b/vllm_fl/dispatch/backends/reference/impl/rotary.py
@@ -10,7 +10,7 @@
 
 
 def rotary_embedding_torch(
-    obj,
+    self,
     query: torch.Tensor,
     key: torch.Tensor,
     cos: torch.Tensor,
@@ -23,7 +23,7 @@ def rotary_embedding_torch(
     Apply rotary position embedding using PyTorch.
 
     Args:
-        obj: The calling obj (for interface consistency)
+        self: The calling instance (for interface consistency)
         query: Query tensor [batch, num_heads, seq_len, head_dim] or [seq_len, num_heads, head_dim]
         key: Key tensor [batch, num_heads, seq_len, head_dim] or [seq_len, num_heads, head_dim]
         cos: Cosine cache [max_seq_len, rotary_dim] where rotary_dim = head_dim or head_dim // 2