Skip to content

Commit

Permalink
instcountci: x87 fst/fld optimization for different addrmodes
Browse files Browse the repository at this point in the history
  • Loading branch information
pmatos committed Jan 14, 2025
1 parent 58a034b commit 8191c49
Show file tree
Hide file tree
Showing 11 changed files with 29,747 additions and 2,621 deletions.
95 changes: 32 additions & 63 deletions unittests/InstructionCountCI/FlagM/HotBlocks_32Bit.json
Original file line number Diff line number Diff line change
Expand Up @@ -134,7 +134,7 @@
},
"Psychonauts matrix swizzle": {
"x86InstructionCount": 103,
"ExpectedInstructionCount": 144,
"ExpectedInstructionCount": 113,
"Comment": [
"Hottest block in Windows Psychonauts",
"Doing a 4x4 32-bit float matrix swizzle",
Expand Down Expand Up @@ -254,131 +254,100 @@
"stur w7, [x9, #-68]",
"ldur w4, [x9, #-68]",
"ldr s2, [x4]",
"sub w20, w9, #0x40 (64)",
"str s2, [x20]",
"stur s2, [x9, #-64]",
"ldur w7, [x9, #-68]",
"ldr s2, [x7, #16]",
"sub w20, w9, #0x3c (60)",
"str s2, [x20]",
"stur s2, [x9, #-60]",
"ldur w5, [x9, #-68]",
"ldr s2, [x5, #32]",
"sub w20, w9, #0x38 (56)",
"str s2, [x20]",
"stur s2, [x9, #-56]",
"ldur w4, [x9, #-68]",
"ldr s2, [x4, #48]",
"sub w20, w9, #0x34 (52)",
"str s2, [x20]",
"stur s2, [x9, #-52]",
"ldur w7, [x9, #-68]",
"ldr s2, [x7, #4]",
"sub w20, w9, #0x30 (48)",
"str s2, [x20]",
"stur s2, [x9, #-48]",
"ldur w5, [x9, #-68]",
"ldr s2, [x5, #20]",
"sub w20, w9, #0x2c (44)",
"str s2, [x20]",
"stur s2, [x9, #-44]",
"ldur w4, [x9, #-68]",
"ldr s2, [x4, #36]",
"sub w20, w9, #0x28 (40)",
"str s2, [x20]",
"stur s2, [x9, #-40]",
"ldur w7, [x9, #-68]",
"ldr s2, [x7, #52]",
"sub w20, w9, #0x24 (36)",
"str s2, [x20]",
"stur s2, [x9, #-36]",
"ldur w5, [x9, #-68]",
"ldr s2, [x5, #8]",
"sub w20, w9, #0x20 (32)",
"str s2, [x20]",
"stur s2, [x9, #-32]",
"ldur w4, [x9, #-68]",
"ldr s2, [x4, #24]",
"sub w20, w9, #0x1c (28)",
"str s2, [x20]",
"stur s2, [x9, #-28]",
"ldur w7, [x9, #-68]",
"ldr s2, [x7, #40]",
"sub w20, w9, #0x18 (24)",
"str s2, [x20]",
"stur s2, [x9, #-24]",
"ldur w5, [x9, #-68]",
"ldr s2, [x5, #56]",
"sub w20, w9, #0x14 (20)",
"str s2, [x20]",
"stur s2, [x9, #-20]",
"ldur w4, [x9, #-68]",
"ldr s2, [x4, #12]",
"sub w20, w9, #0x10 (16)",
"str s2, [x20]",
"stur s2, [x9, #-16]",
"ldur w7, [x9, #-68]",
"ldr s2, [x7, #28]",
"sub w20, w9, #0xc (12)",
"str s2, [x20]",
"stur s2, [x9, #-12]",
"ldur w5, [x9, #-68]",
"ldr s2, [x5, #44]",
"sub w20, w9, #0x8 (8)",
"str s2, [x20]",
"stur s2, [x9, #-8]",
"ldur w4, [x9, #-68]",
"ldr s2, [x4, #60]",
"sub w20, w9, #0x4 (4)",
"str s2, [x20]",
"stur s2, [x9, #-4]",
"ldr w7, [x9, #8]",
"ldur s2, [x9, #-64]",
"str s2, [x7]",
"ldr w5, [x9, #8]",
"ldur s2, [x9, #-60]",
"add w20, w5, #0x4 (4)",
"str s2, [x20]",
"str s2, [x5, #4]",
"ldr w4, [x9, #8]",
"ldur s2, [x9, #-56]",
"add w20, w4, #0x8 (8)",
"str s2, [x20]",
"str s2, [x4, #8]",
"ldr w7, [x9, #8]",
"ldur s2, [x9, #-52]",
"add w20, w7, #0xc (12)",
"str s2, [x20]",
"str s2, [x7, #12]",
"ldr w5, [x9, #8]",
"ldur s2, [x9, #-48]",
"add w20, w5, #0x10 (16)",
"str s2, [x20]",
"str s2, [x5, #16]",
"ldr w4, [x9, #8]",
"ldur s2, [x9, #-44]",
"add w20, w4, #0x14 (20)",
"str s2, [x20]",
"str s2, [x4, #20]",
"ldr w7, [x9, #8]",
"ldur s2, [x9, #-40]",
"add w20, w7, #0x18 (24)",
"str s2, [x20]",
"str s2, [x7, #24]",
"ldr w5, [x9, #8]",
"ldur s2, [x9, #-36]",
"add w20, w5, #0x1c (28)",
"str s2, [x20]",
"str s2, [x5, #28]",
"ldr w4, [x9, #8]",
"ldur s2, [x9, #-32]",
"add w20, w4, #0x20 (32)",
"str s2, [x20]",
"str s2, [x4, #32]",
"ldr w7, [x9, #8]",
"ldur s2, [x9, #-28]",
"add w20, w7, #0x24 (36)",
"str s2, [x20]",
"str s2, [x7, #36]",
"ldr w5, [x9, #8]",
"ldur s2, [x9, #-24]",
"add w20, w5, #0x28 (40)",
"str s2, [x20]",
"str s2, [x5, #40]",
"ldr w4, [x9, #8]",
"ldur s2, [x9, #-20]",
"add w20, w4, #0x2c (44)",
"str s2, [x20]",
"str s2, [x4, #44]",
"ldr w7, [x9, #8]",
"ldur s2, [x9, #-16]",
"add w20, w7, #0x30 (48)",
"str s2, [x20]",
"str s2, [x7, #48]",
"ldr w5, [x9, #8]",
"ldur s2, [x9, #-12]",
"add w20, w5, #0x34 (52)",
"str s2, [x20]",
"str s2, [x5, #52]",
"ldr w4, [x9, #8]",
"ldur s2, [x9, #-8]",
"add w20, w4, #0x38 (56)",
"str s2, [x20]",
"str s2, [x4, #56]",
"ldr w7, [x9, #8]",
"ldur s2, [x9, #-4]",
"add w20, w7, #0x3c (60)",
"str s2, [x20]",
"str s2, [x7, #60]",
"ldr w4, [x9, #8]",
"mov x8, x9",
"ldr w9, [x8], #4",
Expand Down
Loading

0 comments on commit 8191c49

Please sign in to comment.