From 7a5b1abdb6d7f6d6d41c257d87a4d059fde39fca Mon Sep 17 00:00:00 2001 From: Giovanni Baraldi Date: Wed, 24 Jun 2026 21:42:00 +0200 Subject: [PATCH 1/7] Fix GPR count on gfx11, gfx12, gfx1250 --- projects/rocprof-trace-decoder/source/gfx10/rdna_sqtt.cpp | 1 + projects/rocprof-trace-decoder/source/quick_scan_export.cpp | 1 + projects/rocprof-trace-decoder/source/trace_parser.hpp | 3 ++- 3 files changed, 4 insertions(+), 1 deletion(-) diff --git a/projects/rocprof-trace-decoder/source/gfx10/rdna_sqtt.cpp b/projects/rocprof-trace-decoder/source/gfx10/rdna_sqtt.cpp index c70ca4d2f48..efe0128afb1 100644 --- a/projects/rocprof-trace-decoder/source/gfx10/rdna_sqtt.cpp +++ b/projects/rocprof-trace-decoder/source/gfx10/rdna_sqtt.cpp @@ -155,6 +155,7 @@ void RDNASQTParser::sqtt_simd_analysis(CppReturnInfo& info, TokenGenerator& _gen case RdnaType::HEADER: { tt_version = header_type{.raw = token.contents}.version; + csregister.tt_version = tt_version; if (tt_version >= 4) double_buffer = (token.contents >> 43) & 1; diff --git a/projects/rocprof-trace-decoder/source/quick_scan_export.cpp b/projects/rocprof-trace-decoder/source/quick_scan_export.cpp index 38b03fabb0d..87bb3c534ce 100644 --- a/projects/rocprof-trace-decoder/source/quick_scan_export.cpp +++ b/projects/rocprof-trace-decoder/source/quick_scan_export.cpp @@ -425,6 +425,7 @@ ROCPROF_TRACE_DECODER_API rocprofiler_thread_trace_decoder_status_t rocprof_trac { uint64_t header_word = load_header_word(data); gfxip = extract_gfxip(header_word); + if (gfxip > 9) local.tt_version = mi400::header_type{.raw = header_word}.version; auto decoder = HandleData::get_write_handle(handle); if (!decoder.valid()) return ROCPROFILER_THREAD_TRACE_DECODER_STATUS_ERROR_INVALID_ARGUMENT; diff --git a/projects/rocprof-trace-decoder/source/trace_parser.hpp b/projects/rocprof-trace-decoder/source/trace_parser.hpp index 93bf131c0cd..179cb2ce0a9 100644 --- a/projects/rocprof-trace-decoder/source/trace_parser.hpp +++ b/projects/rocprof-trace-decoder/source/trace_parser.hpp @@ -345,6 +345,7 @@ class CSRegisterHandler bool bIsROCMFormat = false; int userdata_state{}; + int tt_version{0}; CowPtr> active_codeobjs{}; CachedTable table{}; @@ -527,7 +528,7 @@ class CSRegisterHandler static constexpr uint64_t BITMASK = (uint64_t{1} << 48) - 1; - rocprofiler_thread_trace_decoder_dispatch_t PopulateDispatch(int64_t time, int me, int pipe, int tt_version = 0) +rocprofiler_thread_trace_decoder_dispatch_t PopulateDispatch(int64_t time, int me, int pipe) { rocprofiler_thread_trace_decoder_dispatch_t event{}; event.size = sizeof(rocprofiler_thread_trace_decoder_dispatch_t); From 60ac965a808b877d02a87c59b967b0c5681f3718 Mon Sep 17 00:00:00 2001 From: Giovanni Baraldi Date: Wed, 24 Jun 2026 21:58:19 +0200 Subject: [PATCH 2/7] Fix LDS --- .../rocprof-trace-decoder/source/gfx9/gfx9wave.h | 5 ++++- .../rocprof-trace-decoder/source/trace_parser.cpp | 15 +++++++++++---- .../rocprof-trace-decoder/source/trace_parser.hpp | 13 +++++-------- 3 files changed, 20 insertions(+), 13 deletions(-) diff --git a/projects/rocprof-trace-decoder/source/gfx9/gfx9wave.h b/projects/rocprof-trace-decoder/source/gfx9/gfx9wave.h index cb9b0736a5c..3ca95964df0 100644 --- a/projects/rocprof-trace-decoder/source/gfx9/gfx9wave.h +++ b/projects/rocprof-trace-decoder/source/gfx9/gfx9wave.h @@ -79,7 +79,10 @@ class CSRegisterHandlerGFX9 : public CSRegisterHandler class MISQTTParser : public SQTTParser { public: - MISQTTParser(int tg_cu, bool _double_buffer) : target_cu(tg_cu), double_buffer(_double_buffer){}; + MISQTTParser(int tg_cu, bool _double_buffer, bool is_mi350) : target_cu(tg_cu), double_buffer(_double_buffer) + { + csregister.tt_version = is_mi350 ? 1 : 0; + }; ~MISQTTParser() override{}; void sqtt_simd_analysis(CppReturnInfo& info, class TokenGenerator& generator, class Stitcher& stitch) override; diff --git a/projects/rocprof-trace-decoder/source/trace_parser.cpp b/projects/rocprof-trace-decoder/source/trace_parser.cpp index 618466d5a8f..0e93925762b 100644 --- a/projects/rocprof-trace-decoder/source/trace_parser.cpp +++ b/projects/rocprof-trace-decoder/source/trace_parser.cpp @@ -51,13 +51,14 @@ std::unique_ptr AnalyseBinary_GFX9_internal( uint64_t buffersize, int target_cu, class Stitcher& stitch, - bool double_buffer + bool double_buffer, + bool is_mi350 ) { stitch.setgfxip(9); auto generator = gfx9::MITokenGenerator(tokendata, buffersize, 0, 0); - auto parser = std::make_unique(target_cu, double_buffer); + auto parser = std::make_unique(target_cu, double_buffer, is_mi350); parser->sqtt_simd_analysis(info, generator, stitch); return parser; @@ -165,7 +166,13 @@ std::unique_ptr AnalyseBinary_internal( BUFFER_SIZE -= sizeof(rocprof_trace_decoder_gfx9_header_t); return AnalyseBinary_GFX9_internal( - info, buffer, BUFFER_SIZE, gfx9_header.DCU, stitch, gfx9_header.double_buffer + info, + buffer, + BUFFER_SIZE, + gfx9_header.DCU, + stitch, + gfx9_header.double_buffer, + gfx9_header.gfx9_version2 >= 6 ); } else if (gfx9_header.legacy_version != 0) @@ -182,7 +189,7 @@ std::unique_ptr AnalyseBinary_internal( return AnalyseBinary_GFX10_internal(info, buffer, BUFFER_SIZE, stitch); } } - else { return AnalyseBinary_GFX9_internal(info, buffer, BUFFER_SIZE, gfx9_target_cu, stitch, false); } + else { return AnalyseBinary_GFX9_internal(info, buffer, BUFFER_SIZE, gfx9_target_cu, stitch, false, false); } return nullptr; } diff --git a/projects/rocprof-trace-decoder/source/trace_parser.hpp b/projects/rocprof-trace-decoder/source/trace_parser.hpp index 179cb2ce0a9..a7083a1ee98 100644 --- a/projects/rocprof-trace-decoder/source/trace_parser.hpp +++ b/projects/rocprof-trace-decoder/source/trace_parser.hpp @@ -528,7 +528,7 @@ class CSRegisterHandler static constexpr uint64_t BITMASK = (uint64_t{1} << 48) - 1; -rocprofiler_thread_trace_decoder_dispatch_t PopulateDispatch(int64_t time, int me, int pipe) + rocprofiler_thread_trace_decoder_dispatch_t PopulateDispatch(int64_t time, int me, int pipe) { rocprofiler_thread_trace_decoder_dispatch_t event{}; event.size = sizeof(rocprofiler_thread_trace_decoder_dispatch_t); @@ -544,19 +544,16 @@ rocprofiler_thread_trace_decoder_dispatch_t PopulateDispatch(int64_t time, int m event.thread_dim_x = num_thread_x; event.thread_dim_y = num_thread_y; event.thread_dim_z = num_thread_z; - event.lds_size = ((rsrc2 >> 15) & 0x1FF) * 512; + event.lds_size = ((rsrc2 >> 15) & 0x1FF) * 128; event.sgprs = 128; event.vgprs = (rsrc1 & 0x3F) * 8 + 8; event.user_sgprs = (rsrc2 >> 1) & 0x1F; if (tt_version == 0) event.sgprs = ((rsrc1 >> 7) & 0x7) * 16 + 16; - - if (tt_version >= 5) - { - event.vgprs *= 2; - event.lds_size *= 2; - } + if (tt_version == 1) event.lds_size *= 10; + if (tt_version >= 2) event.lds_size *= 2; + if (tt_version >= 5) event.vgprs *= 2; event.flags = ROCPROFILER_THREAD_TRACE_DECODER_DISPATCH_FLAGS_NONE; if ((rsrc1 >> 10) & 1) event.flags |= ROCPROFILER_THREAD_TRACE_DECODER_DISPATCH_FLAGS_SCALAR_CACHE_INVALIDATE; From 5cde721e639e80a72b319d4f443707e65a26a828 Mon Sep 17 00:00:00 2001 From: Giovanni Baraldi Date: Wed, 24 Jun 2026 22:12:47 +0200 Subject: [PATCH 3/7] Typo --- projects/rocprof-trace-decoder/source/trace_parser.hpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/projects/rocprof-trace-decoder/source/trace_parser.hpp b/projects/rocprof-trace-decoder/source/trace_parser.hpp index a7083a1ee98..62e7daf782c 100644 --- a/projects/rocprof-trace-decoder/source/trace_parser.hpp +++ b/projects/rocprof-trace-decoder/source/trace_parser.hpp @@ -550,7 +550,7 @@ class CSRegisterHandler event.vgprs = (rsrc1 & 0x3F) * 8 + 8; event.user_sgprs = (rsrc2 >> 1) & 0x1F; - if (tt_version == 0) event.sgprs = ((rsrc1 >> 7) & 0x7) * 16 + 16; + if (tt_version <= 1) event.sgprs = ((rsrc1 >> 7) & 0x7) * 16 + 16; if (tt_version == 1) event.lds_size *= 10; if (tt_version >= 2) event.lds_size *= 2; if (tt_version >= 5) event.vgprs *= 2; From 08f250917fa3c6ddc073fd6d88b2c343c3ae897e Mon Sep 17 00:00:00 2001 From: Giovanni Baraldi Date: Wed, 24 Jun 2026 22:26:43 +0200 Subject: [PATCH 4/7] Fix again --- .../rocprof-trace-decoder/source/trace_parser.cpp | 1 + .../rocprof-trace-decoder/source/trace_parser.hpp | 11 +++++++---- 2 files changed, 8 insertions(+), 4 deletions(-) diff --git a/projects/rocprof-trace-decoder/source/trace_parser.cpp b/projects/rocprof-trace-decoder/source/trace_parser.cpp index 0e93925762b..dc7e780d08b 100644 --- a/projects/rocprof-trace-decoder/source/trace_parser.cpp +++ b/projects/rocprof-trace-decoder/source/trace_parser.cpp @@ -55,6 +55,7 @@ std::unique_ptr AnalyseBinary_GFX9_internal( bool is_mi350 ) { + is_mi350 = true; stitch.setgfxip(9); auto generator = gfx9::MITokenGenerator(tokendata, buffersize, 0, 0); diff --git a/projects/rocprof-trace-decoder/source/trace_parser.hpp b/projects/rocprof-trace-decoder/source/trace_parser.hpp index 62e7daf782c..2a48334f391 100644 --- a/projects/rocprof-trace-decoder/source/trace_parser.hpp +++ b/projects/rocprof-trace-decoder/source/trace_parser.hpp @@ -544,16 +544,19 @@ class CSRegisterHandler event.thread_dim_x = num_thread_x; event.thread_dim_y = num_thread_y; event.thread_dim_z = num_thread_z; - event.lds_size = ((rsrc2 >> 15) & 0x1FF) * 128; + event.lds_size = ((rsrc2 >> 15) & 0x1FF) * 512; event.sgprs = 128; event.vgprs = (rsrc1 & 0x3F) * 8 + 8; event.user_sgprs = (rsrc2 >> 1) & 0x1F; if (tt_version <= 1) event.sgprs = ((rsrc1 >> 7) & 0x7) * 16 + 16; - if (tt_version == 1) event.lds_size *= 10; - if (tt_version >= 2) event.lds_size *= 2; - if (tt_version >= 5) event.vgprs *= 2; + if (tt_version == 1) event.lds_size = event.lds_size * 10 / 4; + if (tt_version >= 5) + { + event.lds_size *= 2; + event.vgprs *= 2; + } event.flags = ROCPROFILER_THREAD_TRACE_DECODER_DISPATCH_FLAGS_NONE; if ((rsrc1 >> 10) & 1) event.flags |= ROCPROFILER_THREAD_TRACE_DECODER_DISPATCH_FLAGS_SCALAR_CACHE_INVALIDATE; From 92575756176ddc4d54a753e4c9be0852e959e6ca Mon Sep 17 00:00:00 2001 From: Giovanni Baraldi Date: Wed, 24 Jun 2026 22:27:34 +0200 Subject: [PATCH 5/7] Format --- projects/rocprof-trace-decoder/source/trace_parser.hpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/projects/rocprof-trace-decoder/source/trace_parser.hpp b/projects/rocprof-trace-decoder/source/trace_parser.hpp index 2a48334f391..e893d2169cc 100644 --- a/projects/rocprof-trace-decoder/source/trace_parser.hpp +++ b/projects/rocprof-trace-decoder/source/trace_parser.hpp @@ -554,8 +554,8 @@ class CSRegisterHandler if (tt_version == 1) event.lds_size = event.lds_size * 10 / 4; if (tt_version >= 5) { - event.lds_size *= 2; event.vgprs *= 2; + event.lds_size *= 2; } event.flags = ROCPROFILER_THREAD_TRACE_DECODER_DISPATCH_FLAGS_NONE; From 31c6333f3a2a5059b7a38dc67b00915b711c83b1 Mon Sep 17 00:00:00 2001 From: Giovanni Baraldi Date: Wed, 24 Jun 2026 22:33:38 +0200 Subject: [PATCH 6/7] Fix debug left --- projects/rocprof-trace-decoder/source/trace_parser.cpp | 1 - 1 file changed, 1 deletion(-) diff --git a/projects/rocprof-trace-decoder/source/trace_parser.cpp b/projects/rocprof-trace-decoder/source/trace_parser.cpp index dc7e780d08b..0e93925762b 100644 --- a/projects/rocprof-trace-decoder/source/trace_parser.cpp +++ b/projects/rocprof-trace-decoder/source/trace_parser.cpp @@ -55,7 +55,6 @@ std::unique_ptr AnalyseBinary_GFX9_internal( bool is_mi350 ) { - is_mi350 = true; stitch.setgfxip(9); auto generator = gfx9::MITokenGenerator(tokendata, buffersize, 0, 0); From c52f4959e2fc2fb8a2d66e5184dfdd646421ea4f Mon Sep 17 00:00:00 2001 From: Giovanni Baraldi Date: Wed, 24 Jun 2026 23:49:19 +0200 Subject: [PATCH 7/7] Fix unit test --- .../rocprof-trace-decoder/test/unit/trace_parser_test.cpp | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/projects/rocprof-trace-decoder/test/unit/trace_parser_test.cpp b/projects/rocprof-trace-decoder/test/unit/trace_parser_test.cpp index 22f3b9b705b..e4720250859 100644 --- a/projects/rocprof-trace-decoder/test/unit/trace_parser_test.cpp +++ b/projects/rocprof-trace-decoder/test/unit/trace_parser_test.cpp @@ -33,7 +33,8 @@ std::unique_ptr AnalyseBinary_GFX9_internal( uint64_t buffersize, int target_cu, class Stitcher& stitch, - bool double_buffer + bool double_buffer, + bool is_mi350 ); // Note: ToPcV2 is defined in segment_test.cpp @@ -362,5 +363,5 @@ TEST(AnalyseBinaryTest, Gfx9PositiveTargetCu) Stitcher stitch(mock, noop_cb, nullptr); uint8_t buf[64] = {0}; CppReturnInfo info; - EXPECT_NE(AnalyseBinary_GFX9_internal(info, buf, sizeof(buf), 0, stitch, false), nullptr); + EXPECT_NE(AnalyseBinary_GFX9_internal(info, buf, sizeof(buf), 0, stitch, false, false), nullptr); }