From 6614e4a536b4424a079091bc8097fe8d3f2f4d75 Mon Sep 17 00:00:00 2001 From: hugh Date: Sun, 17 May 2026 17:44:20 +0000 Subject: [PATCH 01/10] [wip] migrate rigid_solver kernel_step_1/step_2 to @qd.data_oriented(stable_members=True) Checkpoint of the migration attempt: moves kernel_step_1, kernel_step_2, and update_geom_aabbs into RigidSolver as @qd.kernel methods, decorates the class with @qd.data_oriented(stable_members=True), exposes _contact_island_state and _collider_state as direct attributes, and updates the substep() / detection() call sites. This was originally failing against quadrants hp/data-oriented-ndarray-fix with: Missing argument __qd_links_state__qd_cinr_inertial. Unexpected argument links_state. The fix lives on quadrants branch hp/data-oriented-qd-func-dataclass (Option A: extend caller-side dataclass-arg expansion to handle data_oriented self.X attribute access). --- genesis/engine/solvers/rigid/rigid_solver.py | 196 ++++++++++++++----- 1 file changed, 149 insertions(+), 47 deletions(-) diff --git a/genesis/engine/solvers/rigid/rigid_solver.py b/genesis/engine/solvers/rigid/rigid_solver.py index 42f2cf7d39..428b7b265b 100644 --- a/genesis/engine/solvers/rigid/rigid_solver.py +++ b/genesis/engine/solvers/rigid/rigid_solver.py @@ -220,6 +220,7 @@ def _sanitize_sol_params( return sol_params +@qd.data_oriented(stable_members=True) class RigidSolver(KinematicSolver): # override typing _entities: list[RigidEntity] = gs.List() @@ -960,6 +961,150 @@ def _init_constraint_solver(self): self.constraint_solver = ConstraintSolverIsland(self) else: self.constraint_solver = ConstraintSolver(self) + self._contact_island_state = self.constraint_solver.contact_island.contact_island_state + self._collider_state = self.collider._collider_state + + @qd.kernel(fastcache=True) + def step_1( + self, + is_forward_pos_updated: qd.template(), + is_forward_vel_updated: qd.template(), + is_backward: qd.template(), + ): + if qd.static(not is_forward_pos_updated): + func_update_cartesian_space( + links_state=self.links_state, + links_info=self.links_info, + joints_state=self.joints_state, + joints_info=self.joints_info, + dofs_state=self.dofs_state, + dofs_info=self.dofs_info, + geoms_state=self.geoms_state, + geoms_info=self.geoms_info, + entities_info=self.entities_info, + rigid_global_info=self._rigid_global_info, + static_rigid_sim_config=self._static_rigid_sim_config, + force_update_fixed_geoms=False, + is_backward=is_backward, + ) + + if qd.static(not is_forward_vel_updated): + func_forward_velocity( + entities_info=self.entities_info, + links_info=self.links_info, + links_state=self.links_state, + joints_info=self.joints_info, + dofs_state=self.dofs_state, + rigid_global_info=self._rigid_global_info, + static_rigid_sim_config=self._static_rigid_sim_config, + is_backward=is_backward, + ) + + func_forward_dynamics( + links_state=self.links_state, + links_info=self.links_info, + dofs_state=self.dofs_state, + dofs_info=self.dofs_info, + joints_info=self.joints_info, + entities_state=self.entities_state, + entities_info=self.entities_info, + geoms_state=self.geoms_state, + rigid_global_info=self._rigid_global_info, + static_rigid_sim_config=self._static_rigid_sim_config, + contact_island_state=self._contact_island_state, + is_backward=is_backward, + ) + + @qd.kernel(fastcache=True) + def step_2( + self, + is_backward: qd.template(), + ): + func_update_acc( + update_cacc=True, + dofs_state=self.dofs_state, + links_info=self.links_info, + links_state=self.links_state, + entities_info=self.entities_info, + rigid_global_info=self._rigid_global_info, + static_rigid_sim_config=self._static_rigid_sim_config, + is_backward=is_backward, + ) + + if qd.static(self._static_rigid_sim_config.integrator != gs.integrator.approximate_implicitfast): + func_implicit_damping( + dofs_state=self.dofs_state, + dofs_info=self.dofs_info, + entities_info=self.entities_info, + rigid_global_info=self._rigid_global_info, + static_rigid_sim_config=self._static_rigid_sim_config, + is_backward=is_backward, + ) + + func_integrate( + dofs_state=self.dofs_state, + links_info=self.links_info, + joints_info=self.joints_info, + rigid_global_info=self._rigid_global_info, + static_rigid_sim_config=self._static_rigid_sim_config, + is_backward=is_backward, + ) + + if qd.static(self._static_rigid_sim_config.use_hibernation): + func_hibernate__for_all_awake_islands_either_hiberanate_or_update_aabb_sort_buffer( + dofs_state=self.dofs_state, + entities_state=self.entities_state, + entities_info=self.entities_info, + links_state=self.links_state, + geoms_state=self.geoms_state, + collider_state=self._collider_state, + unused__rigid_global_info=self._rigid_global_info, + rigid_global_info=self._rigid_global_info, + static_rigid_sim_config=self._static_rigid_sim_config, + contact_island_state=self._contact_island_state, + errno=self._errno, + ) + func_aggregate_awake_entities( + entities_state=self.entities_state, + entities_info=self.entities_info, + rigid_global_info=self._rigid_global_info, + static_rigid_sim_config=self._static_rigid_sim_config, + ) + + if qd.static(not is_backward): + func_copy_next_to_curr( + dofs_state=self.dofs_state, + rigid_global_info=self._rigid_global_info, + static_rigid_sim_config=self._static_rigid_sim_config, + errno=self._errno, + ) + + if qd.static(not self._static_rigid_sim_config.enable_mujoco_compatibility): + func_update_cartesian_space( + links_state=self.links_state, + links_info=self.links_info, + joints_state=self.joints_state, + joints_info=self.joints_info, + dofs_state=self.dofs_state, + dofs_info=self.dofs_info, + geoms_state=self.geoms_state, + geoms_info=self.geoms_info, + entities_info=self.entities_info, + rigid_global_info=self._rigid_global_info, + static_rigid_sim_config=self._static_rigid_sim_config, + force_update_fixed_geoms=False, + is_backward=is_backward, + ) + func_forward_velocity( + entities_info=self.entities_info, + links_info=self.links_info, + links_state=self.links_state, + joints_info=self.joints_info, + dofs_state=self.dofs_state, + rigid_global_info=self._rigid_global_info, + static_rigid_sim_config=self._static_rigid_sim_config, + is_backward=is_backward, + ) def substep(self, f): # from genesis.utils.tools import create_timer @@ -974,20 +1119,7 @@ def substep(self, f): static_rigid_sim_config=self._static_rigid_sim_config, ) - kernel_step_1( - self.links_state, - self.links_info, - self.joints_state, - self.joints_info, - self.dofs_state, - self.dofs_info, - self.geoms_state, - self.geoms_info, - self.entities_state, - self.entities_info, - self._rigid_global_info, - self._static_rigid_sim_config, - self.constraint_solver.contact_island.contact_island_state, + self.step_1( self._is_forward_pos_updated, self._is_forward_vel_updated, self._is_backward, @@ -1002,23 +1134,8 @@ def substep(self, f): ) else: self._func_constraint_force() - kernel_step_2( - self.dofs_state, - self.dofs_info, - self.links_info, - self.links_state, - self.joints_info, - self.joints_state, - self.entities_state, - self.entities_info, - self.geoms_info, - self.geoms_state, - self.collider._collider_state, - self._rigid_global_info, - self._static_rigid_sim_config, - self.constraint_solver.contact_island.contact_island_state, + self.step_2( self._is_backward, - self._errno, ) self._is_forward_pos_updated = not self._enable_mujoco_compatibility self._is_forward_vel_updated = not self._enable_mujoco_compatibility @@ -1454,23 +1571,8 @@ def substep_post_coupling(self, f): static_rigid_sim_config=self._static_rigid_sim_config, is_backward=self._is_backward, ) - kernel_step_2( - dofs_state=self.dofs_state, - dofs_info=self.dofs_info, - links_info=self.links_info, - links_state=self.links_state, - joints_info=self.joints_info, - joints_state=self.joints_state, - entities_state=self.entities_state, - entities_info=self.entities_info, - geoms_info=self.geoms_info, - geoms_state=self.geoms_state, - collider_state=self.collider._collider_state, - rigid_global_info=self._rigid_global_info, - static_rigid_sim_config=self._static_rigid_sim_config, - contact_island_state=self.constraint_solver.contact_island.contact_island_state, - is_backward=self._is_backward, - errno=self._errno, + self.step_2( + self._is_backward, ) elif isinstance(self.sim.coupler, IPCCoupler): # If any rigid entity is coupled to IPC, perform rigid simulation in post-coupling phase. From 1c2d018f470db38a4c01e9177310e68a605b3ee4 Mon Sep 17 00:00:00 2001 From: hugh Date: Sun, 17 May 2026 21:04:50 +0000 Subject: [PATCH 02/10] [Cleanup] Delete dead top-level kernel_step_1/kernel_step_2 The forward path was migrated to @qd.kernel methods on the data_oriented RigidSolver (self.step_1 / self.step_2). The only remaining caller of the top-level kernel_step_2 was the backward-grad call site; replace it with self.step_2.grad(is_backward=True) (the BoundQuadrantsCallable.grad descriptor uses the kernels adjoint correctly) and delete both old top-level kernel definitions. --- genesis/engine/solvers/rigid/rigid_solver.py | 193 +------------------ 1 file changed, 1 insertion(+), 192 deletions(-) diff --git a/genesis/engine/solvers/rigid/rigid_solver.py b/genesis/engine/solvers/rigid/rigid_solver.py index 428b7b265b..68c8dee250 100644 --- a/genesis/engine/solvers/rigid/rigid_solver.py +++ b/genesis/engine/solvers/rigid/rigid_solver.py @@ -1469,24 +1469,7 @@ def substep_pre_coupling_grad(self, f): if not is_grad_valid: gs.raise_exception(f"Nan grad in qpos or dofs_vel found at step {self._sim.cur_step_global}") - kernel_step_2.grad( - dofs_state=self.dofs_state, - dofs_info=self.dofs_info, - links_info=self.links_info, - links_state=self.links_state, - joints_info=self.joints_info, - joints_state=self.joints_state, - entities_state=self.entities_state, - entities_info=self.entities_info, - geoms_info=self.geoms_info, - geoms_state=self.geoms_state, - collider_state=self.collider._collider_state, - rigid_global_info=self._rigid_global_info, - static_rigid_sim_config=self._static_rigid_sim_config, - contact_island_state=self.constraint_solver.contact_island.contact_island_state, - is_backward=True, - errno=self._errno, - ) + self.step_2.grad(is_backward=True) # We cannot use [kernel_forward_dynamics.grad] because we read [dofs_state.acc] and overwrite it in the kernel, # which is prohibited (https://docs.taichi-lang.org/docs/differentiable_programming#global-data-access-rules). @@ -2884,177 +2867,3 @@ def equalities(self): return self._equalities return gs.List(equality for entity in self._entities for equality in entity.equalities) - -@qd.kernel(fastcache=True) -def kernel_step_1( - links_state: array_class.LinksState, - links_info: array_class.LinksInfo, - joints_state: array_class.JointsState, - joints_info: array_class.JointsInfo, - dofs_state: array_class.DofsState, - dofs_info: array_class.DofsInfo, - geoms_state: array_class.GeomsState, - geoms_info: array_class.GeomsInfo, - entities_state: array_class.EntitiesState, - entities_info: array_class.EntitiesInfo, - rigid_global_info: array_class.RigidGlobalInfo, - static_rigid_sim_config: qd.template(), - contact_island_state: array_class.ContactIslandState, - is_forward_pos_updated: qd.template(), - is_forward_vel_updated: qd.template(), - is_backward: qd.template(), -): - if qd.static(not is_forward_pos_updated): - func_update_cartesian_space( - links_state=links_state, - links_info=links_info, - joints_state=joints_state, - joints_info=joints_info, - dofs_state=dofs_state, - dofs_info=dofs_info, - geoms_state=geoms_state, - geoms_info=geoms_info, - entities_info=entities_info, - rigid_global_info=rigid_global_info, - static_rigid_sim_config=static_rigid_sim_config, - force_update_fixed_geoms=False, - is_backward=is_backward, - ) - - if qd.static(not is_forward_vel_updated): - func_forward_velocity( - entities_info=entities_info, - links_info=links_info, - links_state=links_state, - joints_info=joints_info, - dofs_state=dofs_state, - rigid_global_info=rigid_global_info, - static_rigid_sim_config=static_rigid_sim_config, - is_backward=is_backward, - ) - - func_forward_dynamics( - links_state=links_state, - links_info=links_info, - dofs_state=dofs_state, - dofs_info=dofs_info, - joints_info=joints_info, - entities_state=entities_state, - entities_info=entities_info, - geoms_state=geoms_state, - rigid_global_info=rigid_global_info, - static_rigid_sim_config=static_rigid_sim_config, - contact_island_state=contact_island_state, - is_backward=is_backward, - ) - - -@qd.kernel(fastcache=True) -def kernel_step_2( - dofs_state: array_class.DofsState, - dofs_info: array_class.DofsInfo, - links_info: array_class.LinksInfo, - links_state: array_class.LinksState, - joints_info: array_class.JointsInfo, - joints_state: array_class.JointsState, - entities_state: array_class.EntitiesState, - entities_info: array_class.EntitiesInfo, - geoms_info: array_class.GeomsInfo, - geoms_state: array_class.GeomsState, - collider_state: array_class.ColliderState, - rigid_global_info: array_class.RigidGlobalInfo, - static_rigid_sim_config: qd.template(), - contact_island_state: array_class.ContactIslandState, - is_backward: qd.template(), - errno: qd.Tensor, -): - # Position, Velocity and Acceleration data must be consistent when computing links acceleration, otherwise it - # would not corresponds to anyting physical. There is no other way than doing this right before integration, - # because the acceleration at the end of the step is unknown for now as it may change discontinuous between - # before and after integration under the effect of external forces and constraints. This means that - # acceleration data will be shifted one timestep in the past, but there isn't really any way around. - func_update_acc( - update_cacc=True, - dofs_state=dofs_state, - links_info=links_info, - links_state=links_state, - entities_info=entities_info, - rigid_global_info=rigid_global_info, - static_rigid_sim_config=static_rigid_sim_config, - is_backward=is_backward, - ) - - if qd.static(static_rigid_sim_config.integrator != gs.integrator.approximate_implicitfast): - func_implicit_damping( - dofs_state=dofs_state, - dofs_info=dofs_info, - entities_info=entities_info, - rigid_global_info=rigid_global_info, - static_rigid_sim_config=static_rigid_sim_config, - is_backward=is_backward, - ) - - func_integrate( - dofs_state=dofs_state, - links_info=links_info, - joints_info=joints_info, - rigid_global_info=rigid_global_info, - static_rigid_sim_config=static_rigid_sim_config, - is_backward=is_backward, - ) - - if qd.static(static_rigid_sim_config.use_hibernation): - func_hibernate__for_all_awake_islands_either_hiberanate_or_update_aabb_sort_buffer( - dofs_state=dofs_state, - entities_state=entities_state, - entities_info=entities_info, - links_state=links_state, - geoms_state=geoms_state, - collider_state=collider_state, - unused__rigid_global_info=rigid_global_info, - rigid_global_info=rigid_global_info, - static_rigid_sim_config=static_rigid_sim_config, - contact_island_state=contact_island_state, - errno=errno, - ) - func_aggregate_awake_entities( - entities_state=entities_state, - entities_info=entities_info, - rigid_global_info=rigid_global_info, - static_rigid_sim_config=static_rigid_sim_config, - ) - - if qd.static(not is_backward): - func_copy_next_to_curr( - dofs_state=dofs_state, - rigid_global_info=rigid_global_info, - static_rigid_sim_config=static_rigid_sim_config, - errno=errno, - ) - - if qd.static(not static_rigid_sim_config.enable_mujoco_compatibility): - func_update_cartesian_space( - links_state=links_state, - links_info=links_info, - joints_state=joints_state, - joints_info=joints_info, - dofs_state=dofs_state, - dofs_info=dofs_info, - geoms_state=geoms_state, - geoms_info=geoms_info, - entities_info=entities_info, - rigid_global_info=rigid_global_info, - static_rigid_sim_config=static_rigid_sim_config, - force_update_fixed_geoms=False, - is_backward=is_backward, - ) - func_forward_velocity( - entities_info=entities_info, - links_info=links_info, - links_state=links_state, - joints_info=joints_info, - dofs_state=dofs_state, - rigid_global_info=rigid_global_info, - static_rigid_sim_config=static_rigid_sim_config, - is_backward=is_backward, - ) From 2169828ebfa5679ac00c2915af85910cd70f7323 Mon Sep 17 00:00:00 2001 From: hugh Date: Sun, 17 May 2026 21:11:46 +0000 Subject: [PATCH 03/10] [Cleanup] tests: update kernel_step_1 refs to migrated RigidSolver.step_1 - test_grad.py::test_diff_solver now calls rigid_solver.step_1(...) (method form) instead of the deleted top-level kernel_step_1. - test_quadrants.py: access RigidSolver.step_1._primal (via the class-level QuadrantsCallable descriptor) for the cache observation checks. The attribute chain works because QuadrantsCallable directly exposes _primal. Smoke-tested both forward and backward paths (scene.step() + rigid_solver.step_2.grad). --- tests/test_grad.py | 16 +--------------- tests/test_quadrants.py | 14 +++++++------- 2 files changed, 8 insertions(+), 22 deletions(-) diff --git a/tests/test_grad.py b/tests/test_grad.py index d6276fd6e8..72a1560836 100644 --- a/tests/test_grad.py +++ b/tests/test_grad.py @@ -217,7 +217,6 @@ def compute_dL_error(dL_dx, x_type): @pytest.mark.parametrize("backend", [gs.cpu, gs.gpu]) def test_diff_solver(monkeypatch): from genesis.engine.solvers.rigid.constraint.solver import func_solve_init, func_solve_body - from genesis.engine.solvers.rigid.rigid_solver import kernel_step_1 RTOL = 1e-4 @@ -270,20 +269,7 @@ def constraint_solver_resolve(): # Step once to compute constraint solver's inputs: [mass], [jac], [aref], [efc_D], [force]. We do not call the # entire scene.step() because it will overwrite the necessary information that we need to compute the gradients. - kernel_step_1( - links_state=rigid_solver.links_state, - links_info=rigid_solver.links_info, - joints_state=rigid_solver.joints_state, - joints_info=rigid_solver.joints_info, - dofs_state=rigid_solver.dofs_state, - dofs_info=rigid_solver.dofs_info, - geoms_state=rigid_solver.geoms_state, - geoms_info=rigid_solver.geoms_info, - entities_state=rigid_solver.entities_state, - entities_info=rigid_solver.entities_info, - rigid_global_info=rigid_solver._rigid_global_info, - static_rigid_sim_config=rigid_solver._static_rigid_sim_config, - contact_island_state=constraint_solver.contact_island.contact_island_state, + rigid_solver.step_1( is_forward_pos_updated=True, is_forward_vel_updated=True, is_backward=False, diff --git a/tests/test_quadrants.py b/tests/test_quadrants.py index a953904c62..e45bc927a9 100644 --- a/tests/test_quadrants.py +++ b/tests/test_quadrants.py @@ -208,11 +208,11 @@ def gs_num_envs_child(args: list[str]): scene.rigid_solver.collider.detection() qd.sync() - from genesis.engine.solvers.rigid.rigid_solver import kernel_step_1 + from genesis.engine.solvers.rigid.rigid_solver import RigidSolver - assert kernel_step_1._primal.fe_ll_cache_observations.cache_hit == args.expected_fe_ll_cache_hit - assert kernel_step_1._primal.src_ll_cache_observations.cache_key_generated == args.expected_use_src_ll_cache - assert kernel_step_1._primal.src_ll_cache_observations.cache_loaded == args.expected_src_ll_cache_hit + assert RigidSolver.step_1._primal.fe_ll_cache_observations.cache_hit == args.expected_fe_ll_cache_hit + assert RigidSolver.step_1._primal.src_ll_cache_observations.cache_key_generated == args.expected_use_src_ll_cache + assert RigidSolver.step_1._primal.src_ll_cache_observations.cache_loaded == args.expected_src_ll_cache_hit sys.exit(RET_SUCCESS) @@ -308,10 +308,10 @@ def change_scene(args: list[str]): z = qpos.reshape((*qpos.shape[:-1], args.n_objs, 7))[..., 2] assert_allclose(z, 0.2, atol=1e-3) - from genesis.engine.solvers.rigid.rigid_solver import kernel_step_1 + from genesis.engine.solvers.rigid.rigid_solver import RigidSolver - assert kernel_step_1._primal.src_ll_cache_observations.cache_validated == args.expected_src_ll_cache_hit - assert kernel_step_1._primal.src_ll_cache_observations.cache_loaded == args.expected_src_ll_cache_hit + assert RigidSolver.step_1._primal.src_ll_cache_observations.cache_validated == args.expected_src_ll_cache_hit + assert RigidSolver.step_1._primal.src_ll_cache_observations.cache_loaded == args.expected_src_ll_cache_hit sys.exit(RET_SUCCESS) From c653c24f18f987d435c6897ce223e3915d61b71d Mon Sep 17 00:00:00 2001 From: Hugh Perkins Date: Fri, 15 May 2026 10:48:00 -0700 Subject: [PATCH 04/10] rigid/constraint/solver: uptake quadrants subgroup _tiled rename ``qd.simt.subgroup`` recently moved to the suffix convention where the sized form is ``_tiled(v, log2_size)`` and the no-suffix form ``(v)`` operates over the full subgroup (quadrants commit d07644e4, "rename to _tiled suffix convention"). ``solver._kernel_mass_factor_solve_chol_subgroup_16`` was still calling the old ``reduce_all_add(dot, 4)`` two-arg form, which no longer exists on the new API (the 1-arg form ignores tile width and reduces over the full subgroup, which would give wrong results for the 16-thread parallel dot the kernel implements). Switch the two call sites to ``reduce_all_add_tiled(dot, 4)`` to preserve the intended 2**4 = 16 lane tile. --- genesis/engine/solvers/rigid/constraint/solver.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/genesis/engine/solvers/rigid/constraint/solver.py b/genesis/engine/solvers/rigid/constraint/solver.py index 2c511316ca..43552b1e8c 100644 --- a/genesis/engine/solvers/rigid/constraint/solver.py +++ b/genesis/engine/solvers/rigid/constraint/solver.py @@ -1864,7 +1864,7 @@ def func_cholesky_and_solve_fused_tiled( while j < i_d: dot = dot + L_sh[i_d, j] * v_sh[j] j = j + 16 - dot = qd.simt.subgroup.reduce_all_add(dot, 4) + dot = qd.simt.subgroup.reduce_all_add_tiled(dot, 4) if tid == 0: v_sh[i_d] = (v_sh[i_d] - dot) / L_sh[i_d, i_d] qd.simt.block.sync() @@ -1877,7 +1877,7 @@ def func_cholesky_and_solve_fused_tiled( while j < n_dofs: dot = dot + L_sh[j, i_d] * v_sh[j] j = j + 16 - dot = qd.simt.subgroup.reduce_all_add(dot, 4) + dot = qd.simt.subgroup.reduce_all_add_tiled(dot, 4) if tid == 0: v_sh[i_d] = (v_sh[i_d] - dot) / L_sh[i_d, i_d] qd.simt.block.sync() From ef22c7d9f07ed43ef9ac3cb7591a7d3ab0283996 Mon Sep 17 00:00:00 2001 From: Alexis Duburcq Date: Sat, 16 May 2026 21:45:23 +0200 Subject: [PATCH 05/10] rigid/constraint: uptake _tiled rename in linesearch coop + update_constraint_cost coop --- genesis/engine/solvers/rigid/constraint/solver.py | 8 ++++---- .../engine/solvers/rigid/constraint/solver_breakdown.py | 6 +++--- 2 files changed, 7 insertions(+), 7 deletions(-) diff --git a/genesis/engine/solvers/rigid/constraint/solver.py b/genesis/engine/solvers/rigid/constraint/solver.py index 43552b1e8c..041fb5a5eb 100644 --- a/genesis/engine/solvers/rigid/constraint/solver.py +++ b/genesis/engine/solvers/rigid/constraint/solver.py @@ -2488,7 +2488,7 @@ def _func_linesearch_eval_constraints_at_n_alphas_coop( """Cooperative (32-lane subgroup) variant of ``_func_linesearch_eval_constraints_at_n_alphas_serial``. All 32 lanes call this with their own ``tid``; the constraint loop is strided by 32, then each - accumulator is reduced across the warp via ``subgroup.reduce_all_add(_, 5)`` so every lane ends + accumulator is reduced across the warp via ``subgroup.reduce_all_add_tiled(_, 5)`` so every lane ends up with identical return values. Returns the same 3 ``qd.Vector(3)``s ``(t0, t1, t2)`` as the serial inner. """ ne = constraint_state.n_constraints_equality[i_b] @@ -2559,9 +2559,9 @@ def _func_linesearch_eval_constraints_at_n_alphas_coop( # Warp-tree reduction: every lane's 9 partial sums collapse into the per-env totals; after this # all 32 lanes hold identical scalars. The `5` is log2(32) tree levels. for k in qd.static(range(n_alphas)): - t_0[k] = qd.simt.subgroup.reduce_all_add(t_0[k], 5) - t_1[k] = qd.simt.subgroup.reduce_all_add(t_1[k], 5) - t_2[k] = qd.simt.subgroup.reduce_all_add(t_2[k], 5) + t_0[k] = qd.simt.subgroup.reduce_all_add_tiled(t_0[k], 5) + t_1[k] = qd.simt.subgroup.reduce_all_add_tiled(t_1[k], 5) + t_2[k] = qd.simt.subgroup.reduce_all_add_tiled(t_2[k], 5) t0 = qd.Vector([t_0[0], t_1[0], t_2[0]]) t1 = qd.Vector([t_0[1], t_1[1], t_2[1]]) diff --git a/genesis/engine/solvers/rigid/constraint/solver_breakdown.py b/genesis/engine/solvers/rigid/constraint/solver_breakdown.py index 0ca71c5760..cd329ae0a6 100644 --- a/genesis/engine/solvers/rigid/constraint/solver_breakdown.py +++ b/genesis/engine/solvers/rigid/constraint/solver_breakdown.py @@ -598,7 +598,7 @@ def _func_update_constraint_cost_coop( static_rigid_sim_config: qd.template(), ): """Warp-per-env cooperative variant of ``_func_update_constraint_cost``: 32 lanes stride through dofs and - constraints, with the final per-env scalar produced by ``subgroup.reduce_all_add``. Per-lane reads of + constraints, with the final per-env scalar produced by ``subgroup.reduce_all_add_tiled``. Per-lane reads of Jaref/efc_D/active are coalesced under the [_B, len_constraints_] physical layout (i.e. when those layout-flippable constraint-state tensors were allocated with ``layout=(1, 0)``).""" _B = constraint_state.grad.shape[1] @@ -648,8 +648,8 @@ def _func_update_constraint_cost_coop( cost_i += linear_neg * f * (-0.5 * rf - Jaref_c) + linear_pos * f * (-0.5 * rf + Jaref_c) i_c = i_c + _K - cost_i = qd.simt.subgroup.reduce_all_add(cost_i, 5) - gauss_i = qd.simt.subgroup.reduce_all_add(gauss_i, 5) + cost_i = qd.simt.subgroup.reduce_all_add_tiled(cost_i, 5) + gauss_i = qd.simt.subgroup.reduce_all_add_tiled(gauss_i, 5) if tid == 0: constraint_state.gauss[i_b] = gauss_i From b7515d86a1821fc18056a513431fb6c84e9a4089 Mon Sep 17 00:00:00 2001 From: Hugh Perkins Date: Mon, 18 May 2026 13:46:51 -0700 Subject: [PATCH 06/10] [Fix] rigid solver: qd.static reads via static_rigid_sim_config, not module-level globals MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Two qd.static expressions captured module-level mutable globals (`gs.backend`, `gs.qd_float`) whose values can vary across process runs but aren't part of the kernel's fastcache key — risking cross-process cache collisions where a kernel compiled for one backend / precision gets loaded for another. Fix by routing both through declared `static_rigid_sim_config` fields: * `constraint/solver.py::add_frictionloss_constraints`: `gs.backend != gs.metal` -> `static_rigid_sim_config.backend != gs.metal` * `constraint/solver.py::func_cholesky_solve_tiled`: `gs.qd_float == qd.f32` -> `static_rigid_sim_config.is_qd_float_f32` * `array_class.py::RigidSimStaticConfig`: new `is_qd_float_f32: bool` field, populated by `RigidSolver._build_static_config` and `KinematicSolver._build_static_config`. All `qd.static(...)` expressions in fastcache-enabled kernels must derive from declared kernel parameters (now documented in the fastcache user guide). Co-authored-by: Cursor --- genesis/engine/solvers/kinematic_solver.py | 2 ++ genesis/engine/solvers/rigid/constraint/solver.py | 8 ++++++-- genesis/engine/solvers/rigid/rigid_solver.py | 1 + genesis/utils/array_class.py | 4 ++++ 4 files changed, 13 insertions(+), 2 deletions(-) diff --git a/genesis/engine/solvers/kinematic_solver.py b/genesis/engine/solvers/kinematic_solver.py index 8c91fbf280..223af20c52 100644 --- a/genesis/engine/solvers/kinematic_solver.py +++ b/genesis/engine/solvers/kinematic_solver.py @@ -1,6 +1,7 @@ from typing import TYPE_CHECKING, Literal import numpy as np +import quadrants as qd import torch import genesis as gs @@ -220,6 +221,7 @@ def _build_static_config(self): # Static config with all physics disabled self._static_rigid_sim_config = array_class.RigidSimStaticConfig( backend=gs.backend, + is_qd_float_f32=(gs.qd_float == qd.f32), para_level=self.sim._para_level, requires_grad=False, use_hibernation=False, diff --git a/genesis/engine/solvers/rigid/constraint/solver.py b/genesis/engine/solvers/rigid/constraint/solver.py index c56fb55a9c..a28f0e4cc3 100644 --- a/genesis/engine/solvers/rigid/constraint/solver.py +++ b/genesis/engine/solvers/rigid/constraint/solver.py @@ -1294,7 +1294,9 @@ def add_frictionloss_constraints( # if `serialize=True`... qd.loop_config( name="add_frictionloss_constraints", - serialize=qd.static(static_rigid_sim_config.para_level < gs.PARA_LEVEL.ALL and gs.backend != gs.metal), + serialize=qd.static( + static_rigid_sim_config.para_level < gs.PARA_LEVEL.ALL and static_rigid_sim_config.backend != gs.metal + ), ) for i_b in range(_B): constraint_state.n_constraints_frictionloss[i_b] = 0 @@ -2115,7 +2117,9 @@ def func_cholesky_solve_tiled( # Performance is optimal for BLOCK_DIM = 64 BLOCK_DIM = qd.static(64) MAX_DOFS = qd.static(static_rigid_sim_config.tiled_n_dofs) - ENABLE_WARP_REDUCTION = qd.static(static_rigid_sim_config.backend == gs.cuda and gs.qd_float == qd.f32) + ENABLE_WARP_REDUCTION = qd.static( + static_rigid_sim_config.backend == gs.cuda and static_rigid_sim_config.is_qd_float_f32 + ) WARP_SIZE = qd.static(32) NUM_WARPS = qd.static(BLOCK_DIM // WARP_SIZE) diff --git a/genesis/engine/solvers/rigid/rigid_solver.py b/genesis/engine/solvers/rigid/rigid_solver.py index a8db91ba36..6047c5db6a 100644 --- a/genesis/engine/solvers/rigid/rigid_solver.py +++ b/genesis/engine/solvers/rigid/rigid_solver.py @@ -426,6 +426,7 @@ def _should_transpose_constraint_layout(self) -> bool: def _build_static_config(self): static_rigid_sim_config = dict( backend=gs.backend, + is_qd_float_f32=(gs.qd_float == qd.f32), para_level=self.sim._para_level, requires_grad=self.sim.options.requires_grad, use_hibernation=self._use_hibernation, diff --git a/genesis/utils/array_class.py b/genesis/utils/array_class.py index f5ce99bcb5..c51b43a44d 100644 --- a/genesis/utils/array_class.py +++ b/genesis/utils/array_class.py @@ -2051,6 +2051,10 @@ def get_rigid_adjoint_cache(solver): @qd.data_oriented class RigidSimStaticConfig(metaclass=AutoInitMeta): backend: int + # Whether ``gs.qd_float == qd.f32`` (i.e. single-precision floats). Declared on the static config + # so kernels can read it via ``qd.static(static_rigid_sim_config.is_qd_float_f32)`` instead of + # the module-level ``gs.qd_float``, keeping the fastcache key sensitive to precision. + is_qd_float_f32: bool para_level: int enable_collision: bool use_hibernation: bool From 1a545e0bdfb5cc2f950d70e7affe87d56e43fc86 Mon Sep 17 00:00:00 2001 From: Hugh Perkins Date: Tue, 19 May 2026 03:30:16 -0400 Subject: [PATCH 07/10] 0.8.1b1 --- pyproject.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pyproject.toml b/pyproject.toml index 2d30e22377..aeede3f8ac 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -10,7 +10,7 @@ readme = "README.md" requires-python = ">=3.10,<3.14" dependencies = [ "psutil", - "quadrants==0.8.0", + "quadrants==0.8.1b1", "pydantic>=2.11.0", "numpy>=1.26.4", "frozendict", From 2336015274074f3f7833b16a73991601ffce2ab7 Mon Sep 17 00:00:00 2001 From: Hugh Perkins Date: Tue, 19 May 2026 03:31:20 -0400 Subject: [PATCH 08/10] precommit --- genesis/engine/solvers/rigid/rigid_solver.py | 1 - 1 file changed, 1 deletion(-) diff --git a/genesis/engine/solvers/rigid/rigid_solver.py b/genesis/engine/solvers/rigid/rigid_solver.py index 6047c5db6a..5dc54ca185 100644 --- a/genesis/engine/solvers/rigid/rigid_solver.py +++ b/genesis/engine/solvers/rigid/rigid_solver.py @@ -2843,4 +2843,3 @@ def equalities(self): if self.is_built: return self._equalities return gs.List(equality for entity in self._entities for equality in entity.equalities) - From 4b87a373d0c1de8b44d73096350ef0db1917229e Mon Sep 17 00:00:00 2001 From: Hugh Perkins Date: Tue, 19 May 2026 06:42:05 -0400 Subject: [PATCH 09/10] 2 --- pyproject.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pyproject.toml b/pyproject.toml index aeede3f8ac..0fbd8a6528 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -10,7 +10,7 @@ readme = "README.md" requires-python = ">=3.10,<3.14" dependencies = [ "psutil", - "quadrants==0.8.1b1", + "quadrants==0.8.1b2", "pydantic>=2.11.0", "numpy>=1.26.4", "frozendict", From 53a3e78625ab16fc2329e807ac3b7510c1b83287 Mon Sep 17 00:00:00 2001 From: Hugh Perkins Date: Tue, 19 May 2026 16:26:05 -0400 Subject: [PATCH 10/10] 1.0.1b2 --- pyproject.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pyproject.toml b/pyproject.toml index 0fbd8a6528..43ad821683 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -10,7 +10,7 @@ readme = "README.md" requires-python = ">=3.10,<3.14" dependencies = [ "psutil", - "quadrants==0.8.1b2", + "quadrants==1.0.1b2", "pydantic>=2.11.0", "numpy>=1.26.4", "frozendict",