flagos-ai · ceci3 · Mar 3, 2026
diff --git a/csrc/CMakeLists.txt b/csrc/CMakeLists.txt
@@ -0,0 +1,81 @@
+# SPDX-License-Identifier: Apache-2.0
+# SPDX-FileCopyrightText: Copyright contributors to the vLLM-FL project
+#
+# vLLM-FL C++ extensions - Root CMakeLists.txt
+
+cmake_minimum_required(VERSION 3.26)
+project(vllm_fl_extensions LANGUAGES CXX)
+
+set(CMAKE_CXX_STANDARD 17)
+set(CMAKE_CXX_STANDARD_REQUIRED ON)
+
+# =============================================================================
+# Vendor Selection (REQUIRED - no auto-detection)
+# =============================================================================
+
+if(NOT DEFINED VLLM_VENDOR)
+    if(DEFINED ENV{VLLM_VENDOR})
+        set(VLLM_VENDOR $ENV{VLLM_VENDOR})
+    endif()
+endif()
+
+if(NOT VLLM_VENDOR)
+    message(FATAL_ERROR
+        "VLLM_VENDOR is required but not specified.\n"
+        "Please set VLLM_VENDOR environment variable or cmake option:\n"
+        "  export VLLM_VENDOR=cuda    # For NVIDIA CUDA\n"
+        "  export VLLM_VENDOR=ascend  # For Huawei Ascend\n"
+        "\n"
+        "Or pass to cmake:\n"
+        "  cmake -DVLLM_VENDOR=cuda .."
+    )
+endif()
+
+set(SUPPORTED_VENDORS cuda ascend)
+if(NOT VLLM_VENDOR IN_LIST SUPPORTED_VENDORS)
+    message(FATAL_ERROR
+        "Unsupported vendor: ${VLLM_VENDOR}\n"
+        "Supported vendors: ${SUPPORTED_VENDORS}"
+    )
+endif()
+
+message(STATUS "==============================================")
+message(STATUS "vLLM-FL Extensions: ${VLLM_VENDOR}")
+message(STATUS "==============================================")
+
+# =============================================================================
+# Find Python
+# =============================================================================
+
+if(VLLM_PYTHON_EXECUTABLE)
+    set(Python_EXECUTABLE ${VLLM_PYTHON_EXECUTABLE})
+endif()
+
+find_package(Python REQUIRED COMPONENTS Interpreter Development.Module)
+message(STATUS "Python: ${Python_EXECUTABLE} (${Python_VERSION})")
+
+# =============================================================================
+# Find PyTorch
+# =============================================================================
+
+execute_process(
+    COMMAND ${Python_EXECUTABLE} -c "import torch; print(torch.utils.cmake_prefix_path)"
+    OUTPUT_VARIABLE TORCH_CMAKE_PREFIX
+    OUTPUT_STRIP_TRAILING_WHITESPACE
+)
+list(APPEND CMAKE_PREFIX_PATH ${TORCH_CMAKE_PREFIX})
+
+find_package(Torch REQUIRED)
+message(STATUS "PyTorch: ${Torch_VERSION}")
+
+# =============================================================================
+# Include directories
+# =============================================================================
+
+include_directories(${CMAKE_CURRENT_SOURCE_DIR})
+
+# =============================================================================
+# Build Vendor Backend
+# =============================================================================
+
+add_subdirectory(${VLLM_VENDOR})
diff --git a/csrc/ascend/CMakeLists.txt b/csrc/ascend/CMakeLists.txt
@@ -0,0 +1,65 @@
+# SPDX-License-Identifier: Apache-2.0
+# SPDX-FileCopyrightText: Copyright contributors to the vLLM-FL project
+#
+# Ascend backend for vLLM-FL
+
+# Ascend CANN toolkit
+set(ASCEND_TOOLKIT_PATH "$ENV{ASCEND_TOOLKIT_HOME}")
+if(NOT ASCEND_TOOLKIT_PATH)
+    set(ASCEND_TOOLKIT_PATH "/usr/local/Ascend/ascend-toolkit/latest")
+endif()
+
+if(NOT EXISTS "${ASCEND_TOOLKIT_PATH}/include/acl/acl.h")
+    message(WARNING "Ascend CANN not found at ${ASCEND_TOOLKIT_PATH}. Skipping.")
+    return()
+endif()
+
+message(STATUS "Ascend CANN: ${ASCEND_TOOLKIT_PATH}")
+
+# =============================================================================
+# Source files
+# =============================================================================
+
+set(VLLM_FL_ASCEND_SRCS
+    weak_ref_tensor.cpp
+    torch_bindings.cpp
+)
+
+# =============================================================================
+# Define extension target
+# =============================================================================
+
+# Create Python extension module named _C
+# This will be importable as: import vllm_fl._C
+Python_add_library(_C MODULE WITH_SOABI ${VLLM_FL_ASCEND_SRCS})
+
+# Set TORCH_EXTENSION_NAME so TORCH_LIBRARY_EXPAND works
+target_compile_definitions(_C PRIVATE "-DTORCH_EXTENSION_NAME=_C")
+
+# Include directories
+target_include_directories(_C PRIVATE
+    ${CMAKE_CURRENT_SOURCE_DIR}/..
+    ${ASCEND_TOOLKIT_PATH}/include
+    ${TORCH_INCLUDE_DIRS}
+)
+
+# Link libraries
+get_filename_component(TORCH_LIB_DIR "${TORCH_LIBRARY}" DIRECTORY)
+target_link_directories(_C PRIVATE
+    ${TORCH_LIB_DIR}
+    ${ASCEND_TOOLKIT_PATH}/lib64
+)
+
+target_link_libraries(_C PRIVATE ${TORCH_LIBRARIES})
+
+# C++ settings
+set_target_properties(_C PROPERTIES
+    CXX_STANDARD 17
+    CXX_STANDARD_REQUIRED ON
+)
+
+# =============================================================================
+# Install to vllm_fl package directory
+# =============================================================================
+
+install(TARGETS _C LIBRARY DESTINATION vllm_fl COMPONENT _C)
diff --git a/csrc/ascend/torch_bindings.cpp b/csrc/ascend/torch_bindings.cpp
@@ -0,0 +1,25 @@
+// SPDX-License-Identifier: Apache-2.0
+// SPDX-FileCopyrightText: Copyright contributors to the vLLM-FL project
+//
+// Ascend torch bindings for vLLM-FL operators
+
+#include <torch/torch.h>
+#include <torch/library.h>
+
+#include "registration.h"
+
+namespace vllm_fl {
+
+// Forward declarations of Ascend implementations
+torch::Tensor weak_ref_tensor_ascend(const torch::Tensor& tensor);
+
+}  // namespace vllm_fl
+
+// Register extension for Python import
+REGISTER_EXTENSION(TORCH_EXTENSION_NAME)
+
+// Define operators using the extension name
+TORCH_LIBRARY_EXPAND(TORCH_EXTENSION_NAME, ops) {
+    ops.def("weak_ref_tensor(Tensor input) -> Tensor");
+    ops.impl("weak_ref_tensor", c10::kPrivateUse1, &vllm_fl::weak_ref_tensor_ascend);
+}
diff --git a/csrc/ascend/weak_ref_tensor.cpp b/csrc/ascend/weak_ref_tensor.cpp
@@ -0,0 +1,25 @@
+// Copyright (c) 2026 BAAI. All rights reserved.
+// Ascend weak_ref_tensor implementation
+
+#include <torch/torch.h>
+
+namespace vllm_fl {
+  torch::Tensor weak_ref_tensor_ascend(torch::Tensor& tensor) {
+    if (!tensor.is_privateuseone()) {
+      throw std::runtime_error("Tensor must be on NPU device");
+    }
+    // Get the raw data pointer
+    void* data_ptr = tensor.data_ptr();
+    // Get tensor sizes and strides
+    std::vector<int64_t> sizes = tensor.sizes().vec();
+    std::vector<int64_t> strides = tensor.strides().vec();
+    // Get tensor options (dtype, device)
+    auto options = tensor.options();
+    // Create a new tensor from the raw data pointer
+    auto new_tensor = at_npu::native::from_blob(data_ptr, sizes, strides, options);
+    return new_tensor;
+  }
+
+}
+
+}  // namespace vllm_fl
diff --git a/csrc/cuda/CMakeLists.txt b/csrc/cuda/CMakeLists.txt
@@ -0,0 +1,63 @@
+# SPDX-License-Identifier: Apache-2.0
+# SPDX-FileCopyrightText: Copyright contributors to the vLLM-FL project
+#
+# CUDA backend for vLLM-FL
+
+find_package(CUDAToolkit REQUIRED)
+enable_language(CUDA)
+
+message(STATUS "CUDA Toolkit: ${CUDAToolkit_VERSION}")
+
+# =============================================================================
+# Source files
+# =============================================================================
+
+set(VLLM_FL_CUDA_SRCS
+    weak_ref_tensor.cu
+    torch_bindings.cpp
+)
+
+# =============================================================================
+# Define extension target
+# =============================================================================
+
+# Create Python extension module named _C
+# This will be importable as: import vllm_fl._C
+Python_add_library(_C MODULE WITH_SOABI ${VLLM_FL_CUDA_SRCS})
+
+# Set TORCH_EXTENSION_NAME so TORCH_LIBRARY_EXPAND works
+target_compile_definitions(_C PRIVATE "-DTORCH_EXTENSION_NAME=_C")
+
+# Include directories
+target_include_directories(_C PRIVATE
+    ${CMAKE_CURRENT_SOURCE_DIR}/..
+    ${CUDAToolkit_INCLUDE_DIRS}
+    ${TORCH_INCLUDE_DIRS}
+)
+
+# Link libraries
+target_link_libraries(_C PRIVATE
+    torch
+    CUDA::cudart
+    CUDA::cuda_driver
+)
+
+# CUDA settings
+set_target_properties(_C PROPERTIES
+    CUDA_STANDARD 17
+    CUDA_STANDARD_REQUIRED ON
+)
+
+if(NOT DEFINED CMAKE_CUDA_ARCHITECTURES)
+    set(CMAKE_CUDA_ARCHITECTURES "70;75;80;86;89;90")
+endif()
+
+target_compile_options(_C PRIVATE
+    $<$<COMPILE_LANGUAGE:CUDA>:-O3 --use_fast_math>
+)
+
+# =============================================================================
+# Install to vllm_fl package directory
+# =============================================================================
+
+install(TARGETS _C LIBRARY DESTINATION vllm_fl COMPONENT _C)
diff --git a/csrc/cuda/torch_bindings.cpp b/csrc/cuda/torch_bindings.cpp
@@ -0,0 +1,29 @@
+// SPDX-License-Identifier: Apache-2.0
+// SPDX-FileCopyrightText: Copyright contributors to the vLLM-FL project
+//
+// CUDA torch bindings for vLLM-FL operators
+
+#include <torch/torch.h>
+#include <torch/library.h>
+
+#include "registration.h"
+
+namespace vllm_fl {
+
+// Forward declarations of CUDA implementations
+torch::Tensor weak_ref_tensor_cuda(torch::Tensor& tensor);
+
+}  // namespace vllm_fl
+
+// Register extension for Python import
+REGISTER_EXTENSION(TORCH_EXTENSION_NAME)
+
+// Define operators using the extension name
+TORCH_LIBRARY_EXPAND(TORCH_EXTENSION_NAME, ops) {
+    ops.def("weak_ref_tensor(Tensor input) -> Tensor");
+    ops.impl("weak_ref_tensor", c10::kCUDA, &vllm_fl::weak_ref_tensor_cuda);
+
+    // Add more operators here:
+    // ops.def("another_op(Tensor input) -> Tensor");
+    // ops.impl("another_op", c10::kCUDA, &vllm_fl::another_op_cuda);
+}
diff --git a/csrc/cuda/weak_ref_tensor.cu b/csrc/cuda/weak_ref_tensor.cu
@@ -0,0 +1,30 @@
+// Copyright (c) 2026 BAAI. All rights reserved.
+// CUDA weak_ref_tensor implementation
+
+#include <torch/torch.h>
+#include <c10/cuda/CUDAGuard.h>
+
+namespace vllm_fl {
+  torch::Tensor weak_ref_tensor_cuda(torch::Tensor& tensor) {
+    // Ensure tensor is on CUDA
+    if (!tensor.is_cuda()) {
+      throw std::runtime_error("Tensor must be on CUDA device");
+    }
+
+    // Get the raw data pointer
+    void* data_ptr = tensor.data_ptr();
+
+    // Get tensor sizes and strides
+    std::vector<int64_t> sizes = tensor.sizes().vec();
+    std::vector<int64_t> strides = tensor.strides().vec();
+
+    // Get tensor options (dtype, device)
+    auto options = tensor.options();
+
+    // Create a new tensor from the raw data pointer
+    auto new_tensor = torch::from_blob(data_ptr, sizes, strides, options);
+
+    return new_tensor;
+  }
+
+}  // namespace vllm_fl
diff --git a/csrc/registration.h b/csrc/registration.h
@@ -0,0 +1,27 @@
+#pragma once
+
+#include <Python.h>
+
+#define _CONCAT(A, B) A##B
+#define CONCAT(A, B) _CONCAT(A, B)
+
+#define _STRINGIFY(A) #A
+#define STRINGIFY(A) _STRINGIFY(A)
+
+// A version of the TORCH_LIBRARY macro that expands the NAME, i.e. so NAME
+// could be a macro instead of a literal token.
+#define TORCH_LIBRARY_EXPAND(NAME, MODULE) TORCH_LIBRARY(NAME, MODULE)
+
+// A version of the TORCH_LIBRARY_IMPL macro that expands the NAME, i.e. so NAME
+// could be a macro instead of a literal token.
+#define TORCH_LIBRARY_IMPL_EXPAND(NAME, DEVICE, MODULE) \
+  TORCH_LIBRARY_IMPL(NAME, DEVICE, MODULE)
+
+// REGISTER_EXTENSION allows the shared library to be loaded and initialized
+// via python's import statement.
+#define REGISTER_EXTENSION(NAME)                                               \
+  PyMODINIT_FUNC CONCAT(PyInit_, NAME)() {                                     \
+    static struct PyModuleDef module = {PyModuleDef_HEAD_INIT,                 \
+                                        STRINGIFY(NAME), nullptr, 0, nullptr}; \
+    return PyModule_Create(&module);                                           \
+  }