Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 5 additions & 0 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@ dev = [
{include-group = 'docs'},
{include-group = 'frameworks'},
{include-group = 'lint'},
{include-group = 'profiling'},
{include-group = 'scripts'},
{include-group = 'test'},
{include-group = 'typing'}
Expand All @@ -42,6 +43,10 @@ lint = [
'tach>=0.23.0',
'validate-pyproject-schema-store[all]>=2025.06.13'
]
profiling = [
'nvtx>=0.2.14',
'viztracer>=1.1.1'
]
scripts = ["pyyaml>=6.0.1", "typer>=0.12.3", "packaging"]
test = [
'coverage[toml]>=7.6.1',
Expand Down
104 changes: 104 additions & 0 deletions src/gt4py/next/instrumentation/gpu_profiler.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,104 @@
# GT4Py - GridTools Framework
#
# Copyright (c) 2014-2024, ETH Zurich
# All rights reserved.
#
# Please, refer to the LICENSE file in the root directory.
# SPDX-License-Identifier: BSD-3-Clause


from __future__ import annotations

import contextlib
import warnings
from collections.abc import Callable
from typing import Any, ClassVar

from gt4py._core import definitions as core_definitions, types as core_types
from gt4py.next import common, typing as gtx_typing
from gt4py.next.instrumentation import hooks


if core_definitions.CUPY_DEVICE_TYPE is not None:
import cupyx.profiler as cupy_profiler

time_range = cupy_profiler.time_range

else:

class time_range(contextlib.AbstractContextManager):
def __init__(
self,
message: str | None = None,
color_id: int | None = None,
argb_color: core_types.int32 | None = None,
sync=False,
) -> None:
warnings.warn(
"GT4Py profiling is only supported when using a GPU.",
UserWarning,
stacklevel=2,
)


@contextlib.contextmanager
def profile_calls():
start_profiling_calls()
yield
stop_profiling_calls()


def start_profiling_calls() -> None:
hooks.program_call_context.register(ProgramCallProfiler, index=0)
hooks.compiled_program_call_context.register(CompiledProgramCallProfiler, index=0)


def stop_profiling_calls() -> None:
hooks.program_call_context.remove(ProgramCallProfiler)
hooks.compiled_program_call_context.remove(CompiledProgramCallProfiler)


class ProgramProfiler(contextlib.AbstractContextManager):
name: str
time_range: cupy_profiler.time_range

COLOR_ID: ClassVar[int]

__slots__ = ("name", "time_range")

def __enter__(self) -> None:
print(f"\n\n\n\nProfiling {self.name}...")
self.time_range = time_range(self.name, color_id=self.COLOR_ID).__enter__()

def __exit__(self, exc_type, exc_val, exc_tb) -> None:
self.time_range.__exit__(exc_type, exc_val, exc_tb)
print(f"Finished profiling {self.name}.\n\n\n\n")


class ProgramCallProfiler(ProgramProfiler):
COLOR_ID: ClassVar[int] = 1

def __init__(
self,
program: gtx_typing.Program,
args: tuple[Any, ...],
offset_provider: common.OffsetProvider,
enable_jit: bool,
kwargs: dict[str, Any],
) -> None:
self.name = program.__name__


class CompiledProgramCallProfiler(ProgramProfiler):
COLOR_ID: ClassVar[int] = 2

def __init__(
self,
compiled_program: Callable,
args: tuple[Any, ...],
kwargs: dict[str, Any],
offset_provider: common.OffsetProvider,
root: tuple[str, str],
key: gtx_typing.CompiledProgramsKey,
) -> None:
self.name = f"{root[0]}<{root[1]}>"
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
# GT4Py - GridTools Framework
#
# Copyright (c) 2014-2024, ETH Zurich
# All rights reserved.
#
# Please, refer to the LICENSE file in the root directory.
# SPDX-License-Identifier: BSD-3-Clause

from __future__ import annotations

from gt4py.next.instrumentation import gpu_profiler

from ...multi_feature_tests.ffront_tests.test_ffront_fvm_nabla import pnabla



with gpu_profiler.profile():
pass

Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@

import gt4py.next as gtx
from gt4py.next import common, Dims, gtfn_cpu, typing as gtx_typing
from gt4py.next.instrumentation import hooks
from gt4py.next.instrumentation import gpu_profiler, hooks

try:
from gt4py.next.program_processors.runners import dace as dace_backends
Expand Down Expand Up @@ -153,7 +153,9 @@ def test_program_call_hooks(backend: gtx_typing.Backend):
hooks.program_call_context.register(custom_program_callback)
hooks.embedded_program_call_context.register(custom_embedded_program_callback)
hooks.compiled_program_call_context.register(custom_compiled_program_callback)
test_program(True, a_field, b_field, out=out_field)
import gt4py.next.instrumentation.gpu_profiler
with gpu_profiler.profile_calls():
test_program(True, a_field, b_field, out=out_field)

# Check that the callbacks were called
assert len(callback_results) == 2
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -104,3 +104,35 @@ def test_ffront_nabla(exec_alloc_descriptor):
assert_close(3.5455427772565435e-003, np.max(pnabla_MXX.asnumpy()))
assert_close(-3.3540113705465301e-003, np.min(pnabla_MYY.asnumpy()))
assert_close(3.3540113705465301e-003, np.max(pnabla_MYY.asnumpy()))


@pytest.mark.requires_atlas
def test_ffront_nabla_profiler(exec_alloc_descriptor):
from gt4py.next.instrumentation import gpu_profiler
import cupyx.profiler as cupy_profiler
with gpu_profiler.profile_calls():
with cupy_profiler.time_range("pnabla-preparation", color_id=3):
setup = nabla_setup(allocator=exec_alloc_descriptor.allocator)

pnabla_MXX = gtx.zeros({Vertex: setup.nodes_size}, allocator=exec_alloc_descriptor.allocator)
pnabla_MYY = gtx.zeros({Vertex: setup.nodes_size}, allocator=exec_alloc_descriptor.allocator)

offset_provider={
"E2V": setup.edges2node_connectivity,
"V2E": setup.nodes2edge_connectivity,
}

pnabla_prog = pnabla.with_backend(exec_alloc_descriptor)
pnabla_prog.compile(offset_provider=offset_provider)


pnabla_prog(
setup.input_field,
setup.S_fields,
setup.sign_field,
setup.vol_field,
out=(pnabla_MXX, pnabla_MYY),
offset_provider=offset_provider
)


Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,8 @@
# Please, refer to the LICENSE file in the root directory.
# SPDX-License-Identifier: BSD-3-Clause

from __future__ import annotations

import json
import pathlib
import unittest.mock
Expand Down
Loading
Loading