Tpush#115
Conversation
There was a problem hiding this comment.
Code Review
This pull request updates the TPush implementation to support additional tile locations and optimizes c2v transfers using linear storage. It also adds new test cases for these scenarios. However, the review identifies several high-severity issues: a major regression in test coverage caused by commenting out nearly all existing tests, and memory leaks in the new test implementations where cleanup logic was disabled. Additionally, the feedback suggests removing dead code and confusing trailing comments in the core headers to improve maintainability.
| # hashfind | ||
| # mgather | ||
| # mscatter | ||
| # setgetval | ||
| # tabs | ||
| # tadd | ||
| # taddc | ||
| # tadds | ||
| # taddsc | ||
| # tassign_alias | ||
| # tand | ||
| # tands | ||
| # targreduceop | ||
| # taxpy | ||
| # tbroadcast | ||
| # tci | ||
| # tcmp | ||
| # tcmps | ||
| # tcolexpand | ||
| # tcolexpandop | ||
| # tcolmax | ||
| # tcolmin | ||
| # tcolprod | ||
| # tcolreduceidx | ||
| # tcolsum | ||
| # tconcat | ||
| # tcvt | ||
| # tdequant | ||
| # tdiv | ||
| # tdivs | ||
| # texp | ||
| # texpands | ||
| # textract | ||
| # tfillpad | ||
| # tflashattn | ||
| # tfmod | ||
| # tfmods | ||
| # tgather | ||
| # tgatherb | ||
| # tget | ||
| # tget_async | ||
| # tget_scale_addr | ||
| # tgetscaleaddr | ||
| # thistogram | ||
| # timg2col | ||
| # tinsert | ||
| # tload | ||
| # tloadconv | ||
| # tlog | ||
| # tlrelu | ||
| # tmatmul | ||
| # tmatmul_layout | ||
| # #tmatmul_mx | ||
| # tmax | ||
| # tmaxs | ||
| # tmin | ||
| # tmins | ||
| # tmov | ||
| # tmrgsort | ||
| # tmul | ||
| # tmuls | ||
| # tneg | ||
| # tnot | ||
| # tnotify | ||
| # tor | ||
| # tors | ||
| # tpartadd | ||
| # tpartmul | ||
| # tpartmax | ||
| # tpartmin | ||
| # tprefetch | ||
| # tprelu | ||
| # tpushpop | ||
| # tput | ||
| # tput_async | ||
| # tquant | ||
| # trandom | ||
| # trecip | ||
| # treduce | ||
| # trelu | ||
| # trem | ||
| # trems | ||
| # treshape | ||
| # trowexpand | ||
| # trowexpandop | ||
| # trowmax | ||
| # trowmin | ||
| # trowreduceidx | ||
| # trowsum | ||
| # trsqrt | ||
| # tscatter | ||
| # tsel | ||
| # tsels | ||
| # tshl | ||
| # tshls | ||
| # tshr | ||
| # tshrs | ||
| # tsort32 | ||
| # tsqrt | ||
| # tstore | ||
| # tsub | ||
| # tsubview | ||
| # tsubc | ||
| # tsubs | ||
| # tsubsc | ||
| # ttest | ||
| # ttrans | ||
| # ttri | ||
| # twait | ||
| # txor | ||
| # txors | ||
| # tpushpop_cv_nosplit | ||
| # tpushpop_cv | ||
| # tpushpop_vc_nosplit |
| // pto::cpu_sim::register_hooks(nullptr, nullptr); | ||
| // aclrtFree(pipe_mem); | ||
| // static_cast<MainPipe::SharedState*>(g_shared_storage_ptr)->~SharedState(); | ||
| // free(g_shared_storage_ptr); |
There was a problem hiding this comment.
| // pto::cpu_sim::register_hooks(nullptr, nullptr); | ||
| // aclrtFree(pipe_mem); | ||
| // static_cast<MainPipe::SharedState*>(g_shared_storage_ptr)->~SharedState(); | ||
| // free(g_shared_storage_ptr); |
There was a problem hiding this comment.
| // using SlotTile = Tile<TileType::Vec, T, consRows, consCols, BLayout::RowMajor, consRows, consCols>; | ||
| // SlotTile slotTile; | ||
| // TASSIGN(slotTile, static_cast<uint64_t>(pipe.fifo.C2V_CONSUMER_BUF + entryBase)); | ||
| // cpu_pipe::CopyTileWindow(slotTile, tile, 0, 0); |
| popTileFromGMFiFo<TileCons, Split>(fifo, tile); | ||
| return true; | ||
| } else if constexpr (TPipe::is_c2v) { | ||
| } else if constexpr (TPipe::is_c2v && TileCons::Loc == TileType::Vec) { // && TileCons::Loc != TileType::Vec |
| } | ||
| return false; | ||
| } else if constexpr (TPipe::is_v2c) { | ||
| } else if constexpr (TPipe::is_v2c && TileCons::Loc != TileType::Vec) { // && TileCons::Loc == TileType::Vec |
|
Triage review (2026-05-08): I do not think this branch is mergeable as-is. Blockers I found:
Please reduce this to a focused patch: keep the CPU-SIM fix and the minimal new regression tests, do not disable unrelated testcases, rebase onto current |
No description provided.