@@ -90,26 +90,24 @@ define <3 x i32> @v_load_constant_v3i32_align1(ptr addrspace(4) %ptr) {
90
90
; GFX1250-NOUNALIGNED-NEXT: global_load_u8 v10, v[0:1], off offset:8
91
91
; GFX1250-NOUNALIGNED-NEXT: global_load_u8 v11, v[0:1], off offset:9
92
92
; GFX1250-NOUNALIGNED-NEXT: global_load_u8 v12, v[0:1], off offset:11
93
- ; GFX1250-NOUNALIGNED-NEXT: global_load_u8 v0 , v[0:1], off offset:10
93
+ ; GFX1250-NOUNALIGNED-NEXT: global_load_u8 v13 , v[0:1], off offset:10
94
94
; GFX1250-NOUNALIGNED-NEXT: s_wait_loadcnt 0xa
95
95
; GFX1250-NOUNALIGNED-NEXT: s_wait_xcnt 0x0
96
- ; GFX1250-NOUNALIGNED-NEXT: v_lshl_or_b32 v1 , v3, 8, v2
96
+ ; GFX1250-NOUNALIGNED-NEXT: v_lshl_or_b32 v0 , v3, 8, v2
97
97
; GFX1250-NOUNALIGNED-NEXT: s_wait_loadcnt 0x8
98
- ; GFX1250-NOUNALIGNED-NEXT: v_dual_lshlrev_b32 v3 , 16, v4 :: v_dual_lshlrev_b32 v2 , 24, v5
98
+ ; GFX1250-NOUNALIGNED-NEXT: v_dual_lshlrev_b32 v2 , 16, v4 :: v_dual_lshlrev_b32 v1 , 24, v5
99
99
; GFX1250-NOUNALIGNED-NEXT: s_wait_loadcnt 0x6
100
- ; GFX1250-NOUNALIGNED-NEXT: v_lshl_or_b32 v4 , v7, 8, v6
100
+ ; GFX1250-NOUNALIGNED-NEXT: v_lshl_or_b32 v3 , v7, 8, v6
101
101
; GFX1250-NOUNALIGNED-NEXT: s_wait_loadcnt 0x4
102
- ; GFX1250-NOUNALIGNED-NEXT: v_dual_lshlrev_b32 v6, 16, v8 :: v_dual_lshlrev_b32 v5, 24, v9
102
+ ; GFX1250-NOUNALIGNED-NEXT: v_dual_lshlrev_b32 v5, 16, v8 :: v_dual_lshlrev_b32 v4, 24, v9
103
+ ; GFX1250-NOUNALIGNED-NEXT: v_or3_b32 v0, v1, v2, v0
103
104
; GFX1250-NOUNALIGNED-NEXT: s_wait_loadcnt 0x2
104
- ; GFX1250-NOUNALIGNED-NEXT: v_lshl_or_b32 v7, v11, 8, v10
105
- ; GFX1250-NOUNALIGNED-NEXT: s_wait_loadcnt 0x1
106
- ; GFX1250-NOUNALIGNED-NEXT: v_lshlrev_b32_e32 v8, 24, v12
105
+ ; GFX1250-NOUNALIGNED-NEXT: v_lshl_or_b32 v6, v11, 8, v10
107
106
; GFX1250-NOUNALIGNED-NEXT: s_wait_loadcnt 0x0
108
- ; GFX1250-NOUNALIGNED-NEXT: v_lshlrev_b32_e32 v9, 16, v0
109
- ; GFX1250-NOUNALIGNED-NEXT: v_or3_b32 v0, v2, v3, v1
110
- ; GFX1250-NOUNALIGNED-NEXT: v_or3_b32 v1, v5, v6, v4
111
- ; GFX1250-NOUNALIGNED-NEXT: s_delay_alu instid0(VALU_DEP_3)
112
- ; GFX1250-NOUNALIGNED-NEXT: v_or3_b32 v2, v8, v9, v7
107
+ ; GFX1250-NOUNALIGNED-NEXT: v_dual_lshlrev_b32 v7, 24, v12 :: v_dual_lshlrev_b32 v8, 16, v13
108
+ ; GFX1250-NOUNALIGNED-NEXT: v_or3_b32 v1, v4, v5, v3
109
+ ; GFX1250-NOUNALIGNED-NEXT: s_delay_alu instid0(VALU_DEP_2)
110
+ ; GFX1250-NOUNALIGNED-NEXT: v_or3_b32 v2, v7, v8, v6
113
111
; GFX1250-NOUNALIGNED-NEXT: s_set_pc_i64 s[30:31]
114
112
;
115
113
; GFX9-UNALIGNED-LABEL: v_load_constant_v3i32_align1:
@@ -942,7 +940,7 @@ define amdgpu_ps <3 x i32> @s_load_constant_v3i32_align1(ptr addrspace(4) inreg
942
940
;
943
941
; GFX1250-NOUNALIGNED-LABEL: s_load_constant_v3i32_align1:
944
942
; GFX1250-NOUNALIGNED: ; %bb.0:
945
- ; GFX1250-NOUNALIGNED-NEXT: s_clause 0xa
943
+ ; GFX1250-NOUNALIGNED-NEXT: s_clause 0xb
946
944
; GFX1250-NOUNALIGNED-NEXT: s_load_u8 s2, s[0:1], 0x1
947
945
; GFX1250-NOUNALIGNED-NEXT: s_load_u8 s3, s[0:1], 0x3
948
946
; GFX1250-NOUNALIGNED-NEXT: s_load_u8 s4, s[0:1], 0x2
@@ -954,27 +952,26 @@ define amdgpu_ps <3 x i32> @s_load_constant_v3i32_align1(ptr addrspace(4) inreg
954
952
; GFX1250-NOUNALIGNED-NEXT: s_load_u8 s10, s[0:1], 0x0
955
953
; GFX1250-NOUNALIGNED-NEXT: s_load_u8 s11, s[0:1], 0x4
956
954
; GFX1250-NOUNALIGNED-NEXT: s_load_u8 s12, s[0:1], 0xa
957
- ; GFX1250-NOUNALIGNED-NEXT: s_wait_xcnt 0x0
958
- ; GFX1250-NOUNALIGNED-NEXT: s_load_u8 s1, s[0:1], 0x8
955
+ ; GFX1250-NOUNALIGNED-NEXT: s_load_u8 s13, s[0:1], 0x8
959
956
; GFX1250-NOUNALIGNED-NEXT: s_wait_kmcnt 0x0
960
957
; GFX1250-NOUNALIGNED-NEXT: s_lshl_b32 s0, s2, 8
961
- ; GFX1250-NOUNALIGNED-NEXT: s_lshl_b32 s2 , s3, 24
962
- ; GFX1250-NOUNALIGNED-NEXT: s_lshl_b32 s3 , s4, 16
963
- ; GFX1250-NOUNALIGNED-NEXT: s_lshl_b32 s4 , s5, 8
964
- ; GFX1250-NOUNALIGNED-NEXT: s_or_b32 s2, s2, s3
965
- ; GFX1250-NOUNALIGNED-NEXT: s_lshl_b32 s5 , s6, 24
966
- ; GFX1250-NOUNALIGNED-NEXT: s_lshl_b32 s6 , s7, 16
967
- ; GFX1250-NOUNALIGNED-NEXT: s_lshl_b32 s7 , s8, 8
958
+ ; GFX1250-NOUNALIGNED-NEXT: s_lshl_b32 s1 , s3, 24
959
+ ; GFX1250-NOUNALIGNED-NEXT: s_lshl_b32 s2 , s4, 16
960
+ ; GFX1250-NOUNALIGNED-NEXT: s_lshl_b32 s3 , s5, 8
961
+ ; GFX1250-NOUNALIGNED-NEXT: s_or_b32 s1, s1, s2
962
+ ; GFX1250-NOUNALIGNED-NEXT: s_lshl_b32 s4 , s6, 24
963
+ ; GFX1250-NOUNALIGNED-NEXT: s_lshl_b32 s5 , s7, 16
964
+ ; GFX1250-NOUNALIGNED-NEXT: s_lshl_b32 s6 , s8, 8
968
965
; GFX1250-NOUNALIGNED-NEXT: s_or_b32 s0, s0, s10
969
- ; GFX1250-NOUNALIGNED-NEXT: s_lshl_b32 s8, s9, 24
970
- ; GFX1250-NOUNALIGNED-NEXT: s_or_b32 s0, s2, s0
971
- ; GFX1250-NOUNALIGNED-NEXT: s_lshl_b32 s2, s12, 16
972
- ; GFX1250-NOUNALIGNED-NEXT: s_or_b32 s3, s4, s11
973
- ; GFX1250-NOUNALIGNED-NEXT: s_or_b32 s4, s5, s6
966
+ ; GFX1250-NOUNALIGNED-NEXT: s_lshl_b32 s7, s9, 24
967
+ ; GFX1250-NOUNALIGNED-NEXT: s_or_b32 s0, s1, s0
968
+ ; GFX1250-NOUNALIGNED-NEXT: s_lshl_b32 s1, s12, 16
969
+ ; GFX1250-NOUNALIGNED-NEXT: s_or_b32 s2, s3, s11
970
+ ; GFX1250-NOUNALIGNED-NEXT: s_or_b32 s3, s4, s5
971
+ ; GFX1250-NOUNALIGNED-NEXT: s_or_b32 s4, s6, s13
974
972
; GFX1250-NOUNALIGNED-NEXT: s_or_b32 s5, s7, s1
975
- ; GFX1250-NOUNALIGNED-NEXT: s_or_b32 s2, s8, s2
976
- ; GFX1250-NOUNALIGNED-NEXT: s_or_b32 s1, s4, s3
977
- ; GFX1250-NOUNALIGNED-NEXT: s_or_b32 s2, s2, s5
973
+ ; GFX1250-NOUNALIGNED-NEXT: s_or_b32 s1, s3, s2
974
+ ; GFX1250-NOUNALIGNED-NEXT: s_or_b32 s2, s5, s4
978
975
; GFX1250-NOUNALIGNED-NEXT: ; return to shader part epilog
979
976
;
980
977
; GFX9-UNALIGNED-LABEL: s_load_constant_v3i32_align1:
@@ -1351,11 +1348,25 @@ define amdgpu_ps <3 x i32> @s_load_constant_v3i32_align2(ptr addrspace(4) inreg
1351
1348
}
1352
1349
1353
1350
define amdgpu_ps <3 x i32 > @s_load_constant_v3i32_align4 (ptr addrspace (4 ) inreg %ptr ) {
1354
- ; GFX12-LABEL: s_load_constant_v3i32_align4:
1355
- ; GFX12: ; %bb.0:
1356
- ; GFX12-NEXT: s_load_b96 s[0:2], s[0:1], 0x0
1357
- ; GFX12-NEXT: s_wait_kmcnt 0x0
1358
- ; GFX12-NEXT: ; return to shader part epilog
1351
+ ; GFX12-UNALIGNED-LABEL: s_load_constant_v3i32_align4:
1352
+ ; GFX12-UNALIGNED: ; %bb.0:
1353
+ ; GFX12-UNALIGNED-NEXT: s_load_b96 s[0:2], s[0:1], 0x0
1354
+ ; GFX12-UNALIGNED-NEXT: s_wait_kmcnt 0x0
1355
+ ; GFX12-UNALIGNED-NEXT: ; return to shader part epilog
1356
+ ;
1357
+ ; GFX12-NOUNALIGNED-LABEL: s_load_constant_v3i32_align4:
1358
+ ; GFX12-NOUNALIGNED: ; %bb.0:
1359
+ ; GFX12-NOUNALIGNED-NEXT: s_load_b96 s[0:2], s[0:1], 0x0
1360
+ ; GFX12-NOUNALIGNED-NEXT: s_wait_kmcnt 0x0
1361
+ ; GFX12-NOUNALIGNED-NEXT: ; return to shader part epilog
1362
+ ;
1363
+ ; GFX1250-LABEL: s_load_constant_v3i32_align4:
1364
+ ; GFX1250: ; %bb.0:
1365
+ ; GFX1250-NEXT: s_mov_b32 s4, s0
1366
+ ; GFX1250-NEXT: s_mov_b32 s5, s1
1367
+ ; GFX1250-NEXT: s_load_b96 s[0:2], s[4:5], 0x0
1368
+ ; GFX1250-NEXT: s_wait_kmcnt 0x0
1369
+ ; GFX1250-NEXT: ; return to shader part epilog
1359
1370
;
1360
1371
; GFX9-LABEL: s_load_constant_v3i32_align4:
1361
1372
; GFX9: ; %bb.0:
@@ -1388,11 +1399,25 @@ define amdgpu_ps <3 x i32> @s_load_constant_v3i32_align4(ptr addrspace(4) inreg
1388
1399
}
1389
1400
1390
1401
define amdgpu_ps i96 @s_load_constant_i96_align8 (ptr addrspace (4 ) inreg %ptr ) {
1391
- ; GFX12-LABEL: s_load_constant_i96_align8:
1392
- ; GFX12: ; %bb.0:
1393
- ; GFX12-NEXT: s_load_b96 s[0:2], s[0:1], 0x0
1394
- ; GFX12-NEXT: s_wait_kmcnt 0x0
1395
- ; GFX12-NEXT: ; return to shader part epilog
1402
+ ; GFX12-UNALIGNED-LABEL: s_load_constant_i96_align8:
1403
+ ; GFX12-UNALIGNED: ; %bb.0:
1404
+ ; GFX12-UNALIGNED-NEXT: s_load_b96 s[0:2], s[0:1], 0x0
1405
+ ; GFX12-UNALIGNED-NEXT: s_wait_kmcnt 0x0
1406
+ ; GFX12-UNALIGNED-NEXT: ; return to shader part epilog
1407
+ ;
1408
+ ; GFX12-NOUNALIGNED-LABEL: s_load_constant_i96_align8:
1409
+ ; GFX12-NOUNALIGNED: ; %bb.0:
1410
+ ; GFX12-NOUNALIGNED-NEXT: s_load_b96 s[0:2], s[0:1], 0x0
1411
+ ; GFX12-NOUNALIGNED-NEXT: s_wait_kmcnt 0x0
1412
+ ; GFX12-NOUNALIGNED-NEXT: ; return to shader part epilog
1413
+ ;
1414
+ ; GFX1250-LABEL: s_load_constant_i96_align8:
1415
+ ; GFX1250: ; %bb.0:
1416
+ ; GFX1250-NEXT: s_mov_b32 s4, s0
1417
+ ; GFX1250-NEXT: s_mov_b32 s5, s1
1418
+ ; GFX1250-NEXT: s_load_b96 s[0:2], s[4:5], 0x0
1419
+ ; GFX1250-NEXT: s_wait_kmcnt 0x0
1420
+ ; GFX1250-NEXT: ; return to shader part epilog
1396
1421
;
1397
1422
; GFX9-LABEL: s_load_constant_i96_align8:
1398
1423
; GFX9: ; %bb.0:
@@ -1425,11 +1450,25 @@ define amdgpu_ps i96 @s_load_constant_i96_align8(ptr addrspace(4) inreg %ptr) {
1425
1450
}
1426
1451
1427
1452
define amdgpu_ps <3 x i32 > @s_load_constant_v3i32_align8 (ptr addrspace (4 ) inreg %ptr ) {
1428
- ; GFX12-LABEL: s_load_constant_v3i32_align8:
1429
- ; GFX12: ; %bb.0:
1430
- ; GFX12-NEXT: s_load_b96 s[0:2], s[0:1], 0x0
1431
- ; GFX12-NEXT: s_wait_kmcnt 0x0
1432
- ; GFX12-NEXT: ; return to shader part epilog
1453
+ ; GFX12-UNALIGNED-LABEL: s_load_constant_v3i32_align8:
1454
+ ; GFX12-UNALIGNED: ; %bb.0:
1455
+ ; GFX12-UNALIGNED-NEXT: s_load_b96 s[0:2], s[0:1], 0x0
1456
+ ; GFX12-UNALIGNED-NEXT: s_wait_kmcnt 0x0
1457
+ ; GFX12-UNALIGNED-NEXT: ; return to shader part epilog
1458
+ ;
1459
+ ; GFX12-NOUNALIGNED-LABEL: s_load_constant_v3i32_align8:
1460
+ ; GFX12-NOUNALIGNED: ; %bb.0:
1461
+ ; GFX12-NOUNALIGNED-NEXT: s_load_b96 s[0:2], s[0:1], 0x0
1462
+ ; GFX12-NOUNALIGNED-NEXT: s_wait_kmcnt 0x0
1463
+ ; GFX12-NOUNALIGNED-NEXT: ; return to shader part epilog
1464
+ ;
1465
+ ; GFX1250-LABEL: s_load_constant_v3i32_align8:
1466
+ ; GFX1250: ; %bb.0:
1467
+ ; GFX1250-NEXT: s_mov_b32 s4, s0
1468
+ ; GFX1250-NEXT: s_mov_b32 s5, s1
1469
+ ; GFX1250-NEXT: s_load_b96 s[0:2], s[4:5], 0x0
1470
+ ; GFX1250-NEXT: s_wait_kmcnt 0x0
1471
+ ; GFX1250-NEXT: ; return to shader part epilog
1433
1472
;
1434
1473
; GFX9-LABEL: s_load_constant_v3i32_align8:
1435
1474
; GFX9: ; %bb.0:
@@ -1462,11 +1501,25 @@ define amdgpu_ps <3 x i32> @s_load_constant_v3i32_align8(ptr addrspace(4) inreg
1462
1501
}
1463
1502
1464
1503
define amdgpu_ps <3 x i32 > @s_load_constant_v6i16_align8 (ptr addrspace (4 ) inreg %ptr ) {
1465
- ; GFX12-LABEL: s_load_constant_v6i16_align8:
1466
- ; GFX12: ; %bb.0:
1467
- ; GFX12-NEXT: s_load_b96 s[0:2], s[0:1], 0x0
1468
- ; GFX12-NEXT: s_wait_kmcnt 0x0
1469
- ; GFX12-NEXT: ; return to shader part epilog
1504
+ ; GFX12-UNALIGNED-LABEL: s_load_constant_v6i16_align8:
1505
+ ; GFX12-UNALIGNED: ; %bb.0:
1506
+ ; GFX12-UNALIGNED-NEXT: s_load_b96 s[0:2], s[0:1], 0x0
1507
+ ; GFX12-UNALIGNED-NEXT: s_wait_kmcnt 0x0
1508
+ ; GFX12-UNALIGNED-NEXT: ; return to shader part epilog
1509
+ ;
1510
+ ; GFX12-NOUNALIGNED-LABEL: s_load_constant_v6i16_align8:
1511
+ ; GFX12-NOUNALIGNED: ; %bb.0:
1512
+ ; GFX12-NOUNALIGNED-NEXT: s_load_b96 s[0:2], s[0:1], 0x0
1513
+ ; GFX12-NOUNALIGNED-NEXT: s_wait_kmcnt 0x0
1514
+ ; GFX12-NOUNALIGNED-NEXT: ; return to shader part epilog
1515
+ ;
1516
+ ; GFX1250-LABEL: s_load_constant_v6i16_align8:
1517
+ ; GFX1250: ; %bb.0:
1518
+ ; GFX1250-NEXT: s_mov_b32 s4, s0
1519
+ ; GFX1250-NEXT: s_mov_b32 s5, s1
1520
+ ; GFX1250-NEXT: s_load_b96 s[0:2], s[4:5], 0x0
1521
+ ; GFX1250-NEXT: s_wait_kmcnt 0x0
1522
+ ; GFX1250-NEXT: ; return to shader part epilog
1470
1523
;
1471
1524
; GFX9-LABEL: s_load_constant_v6i16_align8:
1472
1525
; GFX9: ; %bb.0:
@@ -1500,24 +1553,64 @@ define amdgpu_ps <3 x i32> @s_load_constant_v6i16_align8(ptr addrspace(4) inreg
1500
1553
}
1501
1554
1502
1555
define amdgpu_ps <12 x i8 > @s_load_constant_v12i8_align8 (ptr addrspace (4 ) inreg %ptr ) {
1503
- ; GFX12-LABEL: s_load_constant_v12i8_align8:
1504
- ; GFX12: ; %bb.0:
1505
- ; GFX12-NEXT: s_load_b96 s[0:2], s[0:1], 0x0
1506
- ; GFX12-NEXT: s_wait_kmcnt 0x0
1507
- ; GFX12-NEXT: s_lshr_b32 s13, s0, 8
1508
- ; GFX12-NEXT: s_lshr_b32 s12, s0, 16
1509
- ; GFX12-NEXT: s_lshr_b32 s3, s0, 24
1510
- ; GFX12-NEXT: s_lshr_b32 s5, s1, 8
1511
- ; GFX12-NEXT: s_lshr_b32 s6, s1, 16
1512
- ; GFX12-NEXT: s_lshr_b32 s7, s1, 24
1513
- ; GFX12-NEXT: s_lshr_b32 s9, s2, 8
1514
- ; GFX12-NEXT: s_lshr_b32 s10, s2, 16
1515
- ; GFX12-NEXT: s_lshr_b32 s11, s2, 24
1516
- ; GFX12-NEXT: s_mov_b32 s4, s1
1517
- ; GFX12-NEXT: s_mov_b32 s8, s2
1518
- ; GFX12-NEXT: s_mov_b32 s1, s13
1519
- ; GFX12-NEXT: s_mov_b32 s2, s12
1520
- ; GFX12-NEXT: ; return to shader part epilog
1556
+ ; GFX12-UNALIGNED-LABEL: s_load_constant_v12i8_align8:
1557
+ ; GFX12-UNALIGNED: ; %bb.0:
1558
+ ; GFX12-UNALIGNED-NEXT: s_load_b96 s[0:2], s[0:1], 0x0
1559
+ ; GFX12-UNALIGNED-NEXT: s_wait_kmcnt 0x0
1560
+ ; GFX12-UNALIGNED-NEXT: s_lshr_b32 s13, s0, 8
1561
+ ; GFX12-UNALIGNED-NEXT: s_lshr_b32 s12, s0, 16
1562
+ ; GFX12-UNALIGNED-NEXT: s_lshr_b32 s3, s0, 24
1563
+ ; GFX12-UNALIGNED-NEXT: s_lshr_b32 s5, s1, 8
1564
+ ; GFX12-UNALIGNED-NEXT: s_lshr_b32 s6, s1, 16
1565
+ ; GFX12-UNALIGNED-NEXT: s_lshr_b32 s7, s1, 24
1566
+ ; GFX12-UNALIGNED-NEXT: s_lshr_b32 s9, s2, 8
1567
+ ; GFX12-UNALIGNED-NEXT: s_lshr_b32 s10, s2, 16
1568
+ ; GFX12-UNALIGNED-NEXT: s_lshr_b32 s11, s2, 24
1569
+ ; GFX12-UNALIGNED-NEXT: s_mov_b32 s4, s1
1570
+ ; GFX12-UNALIGNED-NEXT: s_mov_b32 s8, s2
1571
+ ; GFX12-UNALIGNED-NEXT: s_mov_b32 s1, s13
1572
+ ; GFX12-UNALIGNED-NEXT: s_mov_b32 s2, s12
1573
+ ; GFX12-UNALIGNED-NEXT: ; return to shader part epilog
1574
+ ;
1575
+ ; GFX12-NOUNALIGNED-LABEL: s_load_constant_v12i8_align8:
1576
+ ; GFX12-NOUNALIGNED: ; %bb.0:
1577
+ ; GFX12-NOUNALIGNED-NEXT: s_load_b96 s[0:2], s[0:1], 0x0
1578
+ ; GFX12-NOUNALIGNED-NEXT: s_wait_kmcnt 0x0
1579
+ ; GFX12-NOUNALIGNED-NEXT: s_lshr_b32 s13, s0, 8
1580
+ ; GFX12-NOUNALIGNED-NEXT: s_lshr_b32 s12, s0, 16
1581
+ ; GFX12-NOUNALIGNED-NEXT: s_lshr_b32 s3, s0, 24
1582
+ ; GFX12-NOUNALIGNED-NEXT: s_lshr_b32 s5, s1, 8
1583
+ ; GFX12-NOUNALIGNED-NEXT: s_lshr_b32 s6, s1, 16
1584
+ ; GFX12-NOUNALIGNED-NEXT: s_lshr_b32 s7, s1, 24
1585
+ ; GFX12-NOUNALIGNED-NEXT: s_lshr_b32 s9, s2, 8
1586
+ ; GFX12-NOUNALIGNED-NEXT: s_lshr_b32 s10, s2, 16
1587
+ ; GFX12-NOUNALIGNED-NEXT: s_lshr_b32 s11, s2, 24
1588
+ ; GFX12-NOUNALIGNED-NEXT: s_mov_b32 s4, s1
1589
+ ; GFX12-NOUNALIGNED-NEXT: s_mov_b32 s8, s2
1590
+ ; GFX12-NOUNALIGNED-NEXT: s_mov_b32 s1, s13
1591
+ ; GFX12-NOUNALIGNED-NEXT: s_mov_b32 s2, s12
1592
+ ; GFX12-NOUNALIGNED-NEXT: ; return to shader part epilog
1593
+ ;
1594
+ ; GFX1250-LABEL: s_load_constant_v12i8_align8:
1595
+ ; GFX1250: ; %bb.0:
1596
+ ; GFX1250-NEXT: s_mov_b32 s4, s0
1597
+ ; GFX1250-NEXT: s_mov_b32 s5, s1
1598
+ ; GFX1250-NEXT: s_load_b96 s[0:2], s[4:5], 0x0
1599
+ ; GFX1250-NEXT: s_wait_kmcnt 0x0
1600
+ ; GFX1250-NEXT: s_lshr_b32 s13, s0, 8
1601
+ ; GFX1250-NEXT: s_lshr_b32 s12, s0, 16
1602
+ ; GFX1250-NEXT: s_lshr_b32 s3, s0, 24
1603
+ ; GFX1250-NEXT: s_lshr_b32 s5, s1, 8
1604
+ ; GFX1250-NEXT: s_lshr_b32 s6, s1, 16
1605
+ ; GFX1250-NEXT: s_lshr_b32 s7, s1, 24
1606
+ ; GFX1250-NEXT: s_lshr_b32 s9, s2, 8
1607
+ ; GFX1250-NEXT: s_lshr_b32 s10, s2, 16
1608
+ ; GFX1250-NEXT: s_lshr_b32 s11, s2, 24
1609
+ ; GFX1250-NEXT: s_mov_b32 s4, s1
1610
+ ; GFX1250-NEXT: s_mov_b32 s8, s2
1611
+ ; GFX1250-NEXT: s_mov_b32 s1, s13
1612
+ ; GFX1250-NEXT: s_mov_b32 s2, s12
1613
+ ; GFX1250-NEXT: ; return to shader part epilog
1521
1614
;
1522
1615
; GFX9-LABEL: s_load_constant_v12i8_align8:
1523
1616
; GFX9: ; %bb.0:
0 commit comments