Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
126 changes: 126 additions & 0 deletions .github/workflows/_ascend_npu_torchtitan.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,126 @@
name: "_ascend_npu_torchtitan"

on:
workflow_call:
inputs:
runner:
required: true
type: string
description: "The runner selected to run on"
image:
required: true
type: string
description: "The docker image which will be loaded"
torch-artifact:
required: false
type: string
description: "The distribution artifact name of torch"
torch-npu-artifact:
required: true
type: string
description: "The distribution artifact name of torch_npu"

# Bash shells do not use ~/.profile or ~/.bashrc so these shells need to be explicitly
# declared as "shell: bash -el {0}" on steps that need to be properly activated.
# It's used to activate ascend-toolkit environment variables.

jobs:
setup_environment:
name: run torchtitan tests
runs-on: ${{ inputs.runner }}
container:
image: ${{ inputs.image }}
env:
HF_ENDPOINT: https://hf-mirror.com
outputs:
torch_version: ${{ steps.get_torch_version.outputs.torch-version }}
npu_info: ${{ steps.check_npu.outputs.npu_info }}
steps:
- name: Show NPU info
run: |
npu-smi info

- name: Config mirrors
run: |
sed -i 's|ports.ubuntu.com|mirrors.tuna.tsinghua.edu.cn|g' /etc/apt/sources.list
pip config set global.index-url https://mirrors.tuna.tsinghua.edu.cn/pypi/web/simple

- name: Install system dependencies
run: |
apt-get update
apt-get install -y \
git gcc g++ make cmake ninja-build curl \
libgl1 libglib2.0-0 libsndfile1

- name: Config git
run: |
git config --global --add safe.directory "$GITHUB_WORKSPACE"
git config --global url."https://gh-proxy.test.osinfra.cn/https://github.com/".insteadOf https://github.com/

- name: Checkout
uses: actions/checkout@v4

- name: Checkout benchmark
uses: actions/checkout@v4
with:
repository: pytorch/torchtitan
path: torchtitan

- name: Download torch artifact
if: ${{ inputs.torch-artifact }}
uses: actions/download-artifact@v4
with:
name: ${{ inputs.torch-artifact }}

- name: Install torch
if: ${{ inputs.torch-artifact }}
run: |
pip install ${{ inputs.torch-artifact }}

- name: Install torch_npu dependencies
if: ${{ !inputs.torch-artifact }}
run: |
pip install -r https://raw.githubusercontent.com/Ascend/pytorch/refs/heads/master/requirements.txt

- name: List torch version
id: list-torch-version
shell: bash
run: |
torch_version=$(python -c "import torch; print(torch.__version__)")
echo "torch-version=${torch_version}" >> $GITHUB_OUTPUT

- name: Download torch_npu artifact
uses: actions/download-artifact@v4
with:
name: ${{ inputs.torch-npu-artifact }}
path: ascend_npu

- name: Install torch_npu
working-directory: ascend_npu
run: |
pip install ${{ inputs.torch-npu-artifact }}

- name: Install project dependencies
run: |
pip install pytest pytest-cov tyro tabulate

- name: Show environment info
id: check_npu
run: |
npu_is_available=$(python -c "import torch; print(torch.npu.is_available())")
npu_count=$(python -c "import torch; print(torch.npu.device_count())")
echo "npu_count=${npu_count}" >> $GITHUB_OUTPUT
echo "NPU is available: ${npu_is_available}"
echo "NPU count: ${npu_count}"
pip list | grep -E 'torch|numpy'

- name: Run torchtitan integration_test
working-directory: torchtitan
run: |
mkdir artifacts-to-be-uploaded
python -m tests.integration_tests.run_tests --test_name artifacts-to-be-uploaded --ngpu ${{ steps.check_npu.outputs.npu_count }} || true

- name: Run torchtitan unittest
working-directory: torchtitan·
run: |
pytest tests/unit_tests --cov=. --cov-report=xml --durations=20 -vv
16 changes: 16 additions & 0 deletions .github/workflows/ascend_npu_test.yml
Original file line number Diff line number Diff line change
Expand Up @@ -168,3 +168,19 @@ jobs:
torch-npu-artifact: ${{ needs.build.outputs.torch-npu-artifact }}
secrets:
hf-token: ${{ secrets.HF_TOKEN }}

torchtitan:
name: Run torchtitan tests
needs:
- prepare
- build-torch
- build
if: |
!cancelled() && github.event_name != 'repository_dispatch' &&
(success() || (needs.build-torch.result == 'skipped' && needs.build.result == 'success'))
uses: ./.github/workflows/_ascend_npu_torchtitan.yml
with:
runner: ${{ needs.prepare.outputs.runner }}
image: ${{ needs.prepare.outputs.image }}
torch-artifact: ${{ needs.build-torch.outputs.torch-artifact }}
torch-npu-artifact: ${{ needs.build.outputs.torch-npu-artifact }}
Loading