Skip to content

Commit 7582308

Browse files
committed
[AArch64][GISel] Scalarize i128 vector shifts.
Like most other i128 operations, this adds scalarization for i128 vector shifts. Which in turn allows a few other operations to legalize too.
1 parent 3a27458 commit 7582308

File tree

4 files changed

+419
-72
lines changed

4 files changed

+419
-72
lines changed

llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -179,7 +179,8 @@ AArch64LegalizerInfo::AArch64LegalizerInfo(const AArch64Subtarget &ST)
179179
.clampNumElements(0, v2s32, v4s32)
180180
.clampNumElements(0, v2s64, v2s64)
181181
.moreElementsToNextPow2(0)
182-
.minScalarSameAs(1, 0);
182+
.minScalarSameAs(1, 0)
183+
.scalarizeIf(scalarOrEltWiderThan(0, 64), 0);
183184

184185
getActionDefinitionsBuilder(G_PTR_ADD)
185186
.legalFor({{p0, s64}, {v2p0, v2s64}})

llvm/test/CodeGen/AArch64/abs.ll

Lines changed: 34 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -280,6 +280,40 @@ entry:
280280
}
281281
declare <4 x i64> @llvm.abs.v4i64(<4 x i64>, i1)
282282

283+
define <2 x i128> @abs_v4i128(<2 x i128> %a){
284+
; CHECK-SD-LABEL: abs_v4i128:
285+
; CHECK-SD: // %bb.0: // %entry
286+
; CHECK-SD-NEXT: asr x8, x1, #63
287+
; CHECK-SD-NEXT: asr x9, x3, #63
288+
; CHECK-SD-NEXT: eor x10, x0, x8
289+
; CHECK-SD-NEXT: eor x11, x1, x8
290+
; CHECK-SD-NEXT: subs x0, x10, x8
291+
; CHECK-SD-NEXT: eor x10, x2, x9
292+
; CHECK-SD-NEXT: sbc x1, x11, x8
293+
; CHECK-SD-NEXT: eor x8, x3, x9
294+
; CHECK-SD-NEXT: subs x2, x10, x9
295+
; CHECK-SD-NEXT: sbc x3, x8, x9
296+
; CHECK-SD-NEXT: ret
297+
;
298+
; CHECK-GI-LABEL: abs_v4i128:
299+
; CHECK-GI: // %bb.0: // %entry
300+
; CHECK-GI-NEXT: asr x8, x1, #63
301+
; CHECK-GI-NEXT: asr x9, x3, #63
302+
; CHECK-GI-NEXT: adds x10, x0, x8
303+
; CHECK-GI-NEXT: adc x11, x1, x8
304+
; CHECK-GI-NEXT: adds x12, x2, x9
305+
; CHECK-GI-NEXT: eor x0, x10, x8
306+
; CHECK-GI-NEXT: adc x13, x3, x9
307+
; CHECK-GI-NEXT: eor x1, x11, x8
308+
; CHECK-GI-NEXT: eor x2, x12, x9
309+
; CHECK-GI-NEXT: eor x3, x13, x9
310+
; CHECK-GI-NEXT: ret
311+
entry:
312+
%res = call <2 x i128> @llvm.abs.v2i128(<2 x i128> %a, i1 0)
313+
ret <2 x i128> %res
314+
}
315+
declare <2 x i128> @llvm.abs.v2i128(<2 x i128>, i1)
316+
283317
; ===== Vectors with Non-Pow 2 Widths =====
284318

285319
define <3 x i8> @abs_v3i8(<3 x i8> %a){

llvm/test/CodeGen/AArch64/fcmp.ll

Lines changed: 209 additions & 70 deletions
Original file line numberDiff line numberDiff line change
@@ -1,11 +1,8 @@
11
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 4
22
; RUN: llc -mtriple=aarch64 -verify-machineinstrs %s -o - | FileCheck %s --check-prefixes=CHECK,CHECK-SD,CHECK-SD-NOFP16
33
; RUN: llc -mtriple=aarch64 -mattr=+fullfp16 -verify-machineinstrs %s -o - | FileCheck %s --check-prefixes=CHECK,CHECK-SD,CHECK-SD-FP16
4-
; RUN: llc -mtriple=aarch64 -global-isel -global-isel-abort=2 -verify-machineinstrs %s -o - 2>&1 | FileCheck %s --check-prefixes=CHECK,CHECK-GI,CHECK-GI-NOFP16
5-
; RUN: llc -mtriple=aarch64 -mattr=+fullfp16 -global-isel -global-isel-abort=2 -verify-machineinstrs %s -o - 2>&1 | FileCheck %s --check-prefixes=CHECK,CHECK-GI,CHECK-GI-FP16
6-
7-
; CHECK-GI: warning: Instruction selection used fallback path for v2f128_fp128
8-
; CHECK-GI-NEXT: warning: Instruction selection used fallback path for v3f128_fp128
4+
; RUN: llc -mtriple=aarch64 -global-isel -verify-machineinstrs %s -o - | FileCheck %s --check-prefixes=CHECK,CHECK-GI,CHECK-GI-NOFP16
5+
; RUN: llc -mtriple=aarch64 -mattr=+fullfp16 -global-isel -verify-machineinstrs %s -o - | FileCheck %s --check-prefixes=CHECK,CHECK-GI,CHECK-GI-FP16
96

107
define fp128 @f128_fp128(fp128 %a, fp128 %b, fp128 %d, fp128 %e) {
118
; CHECK-SD-LABEL: f128_fp128:
@@ -429,78 +426,220 @@ entry:
429426
}
430427

431428
define <2 x fp128> @v2f128_fp128(<2 x fp128> %a, <2 x fp128> %b, <2 x fp128> %d, <2 x fp128> %e) {
432-
; CHECK-LABEL: v2f128_fp128:
433-
; CHECK: // %bb.0: // %entry
434-
; CHECK-NEXT: sub sp, sp, #112
435-
; CHECK-NEXT: str x30, [sp, #96] // 8-byte Folded Spill
436-
; CHECK-NEXT: .cfi_def_cfa_offset 112
437-
; CHECK-NEXT: .cfi_offset w30, -16
438-
; CHECK-NEXT: stp q4, q5, [sp] // 32-byte Folded Spill
439-
; CHECK-NEXT: stp q1, q3, [sp, #32] // 32-byte Folded Spill
440-
; CHECK-NEXT: mov v1.16b, v2.16b
441-
; CHECK-NEXT: stp q7, q6, [sp, #64] // 32-byte Folded Spill
442-
; CHECK-NEXT: bl __lttf2
443-
; CHECK-NEXT: cmp w0, #0
444-
; CHECK-NEXT: b.ge .LBB12_2
445-
; CHECK-NEXT: // %bb.1: // %entry
446-
; CHECK-NEXT: ldr q0, [sp] // 16-byte Folded Reload
447-
; CHECK-NEXT: str q0, [sp, #80] // 16-byte Folded Spill
448-
; CHECK-NEXT: .LBB12_2: // %entry
449-
; CHECK-NEXT: ldp q0, q1, [sp, #32] // 32-byte Folded Reload
450-
; CHECK-NEXT: bl __lttf2
451-
; CHECK-NEXT: ldr q1, [sp, #64] // 16-byte Folded Reload
452-
; CHECK-NEXT: cmp w0, #0
453-
; CHECK-NEXT: b.ge .LBB12_4
454-
; CHECK-NEXT: // %bb.3: // %entry
455-
; CHECK-NEXT: ldr q1, [sp, #16] // 16-byte Folded Reload
456-
; CHECK-NEXT: .LBB12_4: // %entry
457-
; CHECK-NEXT: ldr q0, [sp, #80] // 16-byte Folded Reload
458-
; CHECK-NEXT: ldr x30, [sp, #96] // 8-byte Folded Reload
459-
; CHECK-NEXT: add sp, sp, #112
460-
; CHECK-NEXT: ret
429+
; CHECK-SD-LABEL: v2f128_fp128:
430+
; CHECK-SD: // %bb.0: // %entry
431+
; CHECK-SD-NEXT: sub sp, sp, #112
432+
; CHECK-SD-NEXT: str x30, [sp, #96] // 8-byte Folded Spill
433+
; CHECK-SD-NEXT: .cfi_def_cfa_offset 112
434+
; CHECK-SD-NEXT: .cfi_offset w30, -16
435+
; CHECK-SD-NEXT: stp q4, q5, [sp] // 32-byte Folded Spill
436+
; CHECK-SD-NEXT: stp q1, q3, [sp, #32] // 32-byte Folded Spill
437+
; CHECK-SD-NEXT: mov v1.16b, v2.16b
438+
; CHECK-SD-NEXT: stp q7, q6, [sp, #64] // 32-byte Folded Spill
439+
; CHECK-SD-NEXT: bl __lttf2
440+
; CHECK-SD-NEXT: cmp w0, #0
441+
; CHECK-SD-NEXT: b.ge .LBB12_2
442+
; CHECK-SD-NEXT: // %bb.1: // %entry
443+
; CHECK-SD-NEXT: ldr q0, [sp] // 16-byte Folded Reload
444+
; CHECK-SD-NEXT: str q0, [sp, #80] // 16-byte Folded Spill
445+
; CHECK-SD-NEXT: .LBB12_2: // %entry
446+
; CHECK-SD-NEXT: ldp q0, q1, [sp, #32] // 32-byte Folded Reload
447+
; CHECK-SD-NEXT: bl __lttf2
448+
; CHECK-SD-NEXT: ldr q1, [sp, #64] // 16-byte Folded Reload
449+
; CHECK-SD-NEXT: cmp w0, #0
450+
; CHECK-SD-NEXT: b.ge .LBB12_4
451+
; CHECK-SD-NEXT: // %bb.3: // %entry
452+
; CHECK-SD-NEXT: ldr q1, [sp, #16] // 16-byte Folded Reload
453+
; CHECK-SD-NEXT: .LBB12_4: // %entry
454+
; CHECK-SD-NEXT: ldr q0, [sp, #80] // 16-byte Folded Reload
455+
; CHECK-SD-NEXT: ldr x30, [sp, #96] // 8-byte Folded Reload
456+
; CHECK-SD-NEXT: add sp, sp, #112
457+
; CHECK-SD-NEXT: ret
458+
;
459+
; CHECK-GI-LABEL: v2f128_fp128:
460+
; CHECK-GI: // %bb.0: // %entry
461+
; CHECK-GI-NEXT: sub sp, sp, #112
462+
; CHECK-GI-NEXT: stp x30, x19, [sp, #96] // 16-byte Folded Spill
463+
; CHECK-GI-NEXT: .cfi_def_cfa_offset 112
464+
; CHECK-GI-NEXT: .cfi_offset w19, -8
465+
; CHECK-GI-NEXT: .cfi_offset w30, -16
466+
; CHECK-GI-NEXT: stp q3, q1, [sp] // 32-byte Folded Spill
467+
; CHECK-GI-NEXT: mov v1.16b, v2.16b
468+
; CHECK-GI-NEXT: stp q4, q5, [sp, #32] // 32-byte Folded Spill
469+
; CHECK-GI-NEXT: stp q6, q7, [sp, #64] // 32-byte Folded Spill
470+
; CHECK-GI-NEXT: bl __lttf2
471+
; CHECK-GI-NEXT: ldp q1, q0, [sp] // 32-byte Folded Reload
472+
; CHECK-GI-NEXT: cmp w0, #0
473+
; CHECK-GI-NEXT: cset w19, lt
474+
; CHECK-GI-NEXT: bl __lttf2
475+
; CHECK-GI-NEXT: ldr q0, [sp, #32] // 16-byte Folded Reload
476+
; CHECK-GI-NEXT: cmp w0, #0
477+
; CHECK-GI-NEXT: bfi x19, x8, #32, #32
478+
; CHECK-GI-NEXT: cset w8, lt
479+
; CHECK-GI-NEXT: fmov x10, d0
480+
; CHECK-GI-NEXT: mov x11, v0.d[1]
481+
; CHECK-GI-NEXT: bfi x8, x8, #32, #32
482+
; CHECK-GI-NEXT: ldp q0, q1, [sp, #48] // 32-byte Folded Reload
483+
; CHECK-GI-NEXT: lsl x9, x19, #63
484+
; CHECK-GI-NEXT: lsl x8, x8, #63
485+
; CHECK-GI-NEXT: ldp x30, x19, [sp, #96] // 16-byte Folded Reload
486+
; CHECK-GI-NEXT: asr x9, x9, #63
487+
; CHECK-GI-NEXT: fmov x12, d0
488+
; CHECK-GI-NEXT: mov x13, v0.d[1]
489+
; CHECK-GI-NEXT: ldr q0, [sp, #80] // 16-byte Folded Reload
490+
; CHECK-GI-NEXT: fmov x14, d1
491+
; CHECK-GI-NEXT: asr x8, x8, #63
492+
; CHECK-GI-NEXT: and x10, x10, x9
493+
; CHECK-GI-NEXT: fmov x15, d0
494+
; CHECK-GI-NEXT: mov x16, v1.d[1]
495+
; CHECK-GI-NEXT: mov x17, v0.d[1]
496+
; CHECK-GI-NEXT: and x12, x12, x8
497+
; CHECK-GI-NEXT: bic x14, x14, x9
498+
; CHECK-GI-NEXT: bic x15, x15, x8
499+
; CHECK-GI-NEXT: orr x10, x10, x14
500+
; CHECK-GI-NEXT: orr x12, x12, x15
501+
; CHECK-GI-NEXT: mov v0.d[0], x10
502+
; CHECK-GI-NEXT: and x10, x11, x9
503+
; CHECK-GI-NEXT: mov v1.d[0], x12
504+
; CHECK-GI-NEXT: and x11, x13, x8
505+
; CHECK-GI-NEXT: bic x9, x16, x9
506+
; CHECK-GI-NEXT: bic x8, x17, x8
507+
; CHECK-GI-NEXT: orr x9, x10, x9
508+
; CHECK-GI-NEXT: orr x8, x11, x8
509+
; CHECK-GI-NEXT: mov v0.d[1], x9
510+
; CHECK-GI-NEXT: mov v1.d[1], x8
511+
; CHECK-GI-NEXT: add sp, sp, #112
512+
; CHECK-GI-NEXT: ret
461513
entry:
462514
%c = fcmp olt <2 x fp128> %a, %b
463515
%s = select <2 x i1> %c, <2 x fp128> %d, <2 x fp128> %e
464516
ret <2 x fp128> %s
465517
}
466518

467519
define <3 x fp128> @v3f128_fp128(<3 x fp128> %a, <3 x fp128> %b, <3 x fp128> %d, <3 x fp128> %e) {
468-
; CHECK-LABEL: v3f128_fp128:
469-
; CHECK: // %bb.0: // %entry
470-
; CHECK-NEXT: sub sp, sp, #112
471-
; CHECK-NEXT: str x30, [sp, #96] // 8-byte Folded Spill
472-
; CHECK-NEXT: .cfi_def_cfa_offset 112
473-
; CHECK-NEXT: .cfi_offset w30, -16
474-
; CHECK-NEXT: stp q1, q4, [sp] // 32-byte Folded Spill
475-
; CHECK-NEXT: mov v1.16b, v3.16b
476-
; CHECK-NEXT: stp q2, q5, [sp, #32] // 32-byte Folded Spill
477-
; CHECK-NEXT: stp q6, q7, [sp, #64] // 32-byte Folded Spill
478-
; CHECK-NEXT: bl __lttf2
479-
; CHECK-NEXT: cmp w0, #0
480-
; CHECK-NEXT: b.lt .LBB13_2
481-
; CHECK-NEXT: // %bb.1:
482-
; CHECK-NEXT: ldr q0, [sp, #128]
483-
; CHECK-NEXT: str q0, [sp, #64] // 16-byte Folded Spill
484-
; CHECK-NEXT: .LBB13_2: // %entry
485-
; CHECK-NEXT: ldp q0, q1, [sp] // 32-byte Folded Reload
486-
; CHECK-NEXT: bl __lttf2
487-
; CHECK-NEXT: cmp w0, #0
488-
; CHECK-NEXT: b.lt .LBB13_4
489-
; CHECK-NEXT: // %bb.3:
490-
; CHECK-NEXT: ldr q0, [sp, #144]
491-
; CHECK-NEXT: str q0, [sp, #80] // 16-byte Folded Spill
492-
; CHECK-NEXT: .LBB13_4: // %entry
493-
; CHECK-NEXT: ldp q0, q1, [sp, #32] // 32-byte Folded Reload
494-
; CHECK-NEXT: bl __lttf2
495-
; CHECK-NEXT: add x8, sp, #160
496-
; CHECK-NEXT: cmp w0, #0
497-
; CHECK-NEXT: add x9, sp, #112
498-
; CHECK-NEXT: csel x8, x9, x8, lt
499-
; CHECK-NEXT: ldp q0, q1, [sp, #64] // 32-byte Folded Reload
500-
; CHECK-NEXT: ldr q2, [x8]
501-
; CHECK-NEXT: ldr x30, [sp, #96] // 8-byte Folded Reload
502-
; CHECK-NEXT: add sp, sp, #112
503-
; CHECK-NEXT: ret
520+
; CHECK-SD-LABEL: v3f128_fp128:
521+
; CHECK-SD: // %bb.0: // %entry
522+
; CHECK-SD-NEXT: sub sp, sp, #112
523+
; CHECK-SD-NEXT: str x30, [sp, #96] // 8-byte Folded Spill
524+
; CHECK-SD-NEXT: .cfi_def_cfa_offset 112
525+
; CHECK-SD-NEXT: .cfi_offset w30, -16
526+
; CHECK-SD-NEXT: stp q1, q4, [sp] // 32-byte Folded Spill
527+
; CHECK-SD-NEXT: mov v1.16b, v3.16b
528+
; CHECK-SD-NEXT: stp q2, q5, [sp, #32] // 32-byte Folded Spill
529+
; CHECK-SD-NEXT: stp q6, q7, [sp, #64] // 32-byte Folded Spill
530+
; CHECK-SD-NEXT: bl __lttf2
531+
; CHECK-SD-NEXT: cmp w0, #0
532+
; CHECK-SD-NEXT: b.lt .LBB13_2
533+
; CHECK-SD-NEXT: // %bb.1:
534+
; CHECK-SD-NEXT: ldr q0, [sp, #128]
535+
; CHECK-SD-NEXT: str q0, [sp, #64] // 16-byte Folded Spill
536+
; CHECK-SD-NEXT: .LBB13_2: // %entry
537+
; CHECK-SD-NEXT: ldp q0, q1, [sp] // 32-byte Folded Reload
538+
; CHECK-SD-NEXT: bl __lttf2
539+
; CHECK-SD-NEXT: cmp w0, #0
540+
; CHECK-SD-NEXT: b.lt .LBB13_4
541+
; CHECK-SD-NEXT: // %bb.3:
542+
; CHECK-SD-NEXT: ldr q0, [sp, #144]
543+
; CHECK-SD-NEXT: str q0, [sp, #80] // 16-byte Folded Spill
544+
; CHECK-SD-NEXT: .LBB13_4: // %entry
545+
; CHECK-SD-NEXT: ldp q0, q1, [sp, #32] // 32-byte Folded Reload
546+
; CHECK-SD-NEXT: bl __lttf2
547+
; CHECK-SD-NEXT: add x8, sp, #160
548+
; CHECK-SD-NEXT: cmp w0, #0
549+
; CHECK-SD-NEXT: add x9, sp, #112
550+
; CHECK-SD-NEXT: csel x8, x9, x8, lt
551+
; CHECK-SD-NEXT: ldp q0, q1, [sp, #64] // 32-byte Folded Reload
552+
; CHECK-SD-NEXT: ldr q2, [x8]
553+
; CHECK-SD-NEXT: ldr x30, [sp, #96] // 8-byte Folded Reload
554+
; CHECK-SD-NEXT: add sp, sp, #112
555+
; CHECK-SD-NEXT: ret
556+
;
557+
; CHECK-GI-LABEL: v3f128_fp128:
558+
; CHECK-GI: // %bb.0: // %entry
559+
; CHECK-GI-NEXT: sub sp, sp, #192
560+
; CHECK-GI-NEXT: str x30, [sp, #160] // 8-byte Folded Spill
561+
; CHECK-GI-NEXT: stp x20, x19, [sp, #176] // 16-byte Folded Spill
562+
; CHECK-GI-NEXT: .cfi_def_cfa_offset 192
563+
; CHECK-GI-NEXT: .cfi_offset w19, -8
564+
; CHECK-GI-NEXT: .cfi_offset w20, -16
565+
; CHECK-GI-NEXT: .cfi_offset w30, -32
566+
; CHECK-GI-NEXT: stp q4, q1, [sp] // 32-byte Folded Spill
567+
; CHECK-GI-NEXT: mov v1.16b, v3.16b
568+
; CHECK-GI-NEXT: stp q5, q2, [sp, #32] // 32-byte Folded Spill
569+
; CHECK-GI-NEXT: ldr q2, [sp, #192]
570+
; CHECK-GI-NEXT: str q7, [sp, #64] // 16-byte Folded Spill
571+
; CHECK-GI-NEXT: stp q6, q2, [sp, #80] // 32-byte Folded Spill
572+
; CHECK-GI-NEXT: ldr q2, [sp, #208]
573+
; CHECK-GI-NEXT: str q2, [sp, #112] // 16-byte Folded Spill
574+
; CHECK-GI-NEXT: ldr q2, [sp, #224]
575+
; CHECK-GI-NEXT: str q2, [sp, #128] // 16-byte Folded Spill
576+
; CHECK-GI-NEXT: ldr q2, [sp, #240]
577+
; CHECK-GI-NEXT: str q2, [sp, #144] // 16-byte Folded Spill
578+
; CHECK-GI-NEXT: bl __lttf2
579+
; CHECK-GI-NEXT: ldp q1, q0, [sp] // 32-byte Folded Reload
580+
; CHECK-GI-NEXT: cmp w0, #0
581+
; CHECK-GI-NEXT: cset w19, lt
582+
; CHECK-GI-NEXT: bl __lttf2
583+
; CHECK-GI-NEXT: ldp q1, q0, [sp, #32] // 32-byte Folded Reload
584+
; CHECK-GI-NEXT: cmp w0, #0
585+
; CHECK-GI-NEXT: cset w20, lt
586+
; CHECK-GI-NEXT: bl __lttf2
587+
; CHECK-GI-NEXT: ldr q0, [sp, #80] // 16-byte Folded Reload
588+
; CHECK-GI-NEXT: bfi x19, x8, #32, #32
589+
; CHECK-GI-NEXT: bfi x20, x8, #32, #32
590+
; CHECK-GI-NEXT: cmp w0, #0
591+
; CHECK-GI-NEXT: ldr x30, [sp, #160] // 8-byte Folded Reload
592+
; CHECK-GI-NEXT: fmov x8, d0
593+
; CHECK-GI-NEXT: mov x10, v0.d[1]
594+
; CHECK-GI-NEXT: ldr q0, [sp, #64] // 16-byte Folded Reload
595+
; CHECK-GI-NEXT: cset w9, lt
596+
; CHECK-GI-NEXT: lsl x13, x19, #63
597+
; CHECK-GI-NEXT: lsl x14, x20, #63
598+
; CHECK-GI-NEXT: fmov x11, d0
599+
; CHECK-GI-NEXT: mov x12, v0.d[1]
600+
; CHECK-GI-NEXT: ldr q0, [sp, #96] // 16-byte Folded Reload
601+
; CHECK-GI-NEXT: bfi x9, x8, #32, #32
602+
; CHECK-GI-NEXT: asr x13, x13, #63
603+
; CHECK-GI-NEXT: asr x14, x14, #63
604+
; CHECK-GI-NEXT: fmov x15, d0
605+
; CHECK-GI-NEXT: mov x16, v0.d[1]
606+
; CHECK-GI-NEXT: ldp q0, q1, [sp, #112] // 32-byte Folded Reload
607+
; CHECK-GI-NEXT: lsl x9, x9, #63
608+
; CHECK-GI-NEXT: and x8, x8, x13
609+
; CHECK-GI-NEXT: and x11, x11, x14
610+
; CHECK-GI-NEXT: asr x9, x9, #63
611+
; CHECK-GI-NEXT: ldp x20, x19, [sp, #176] // 16-byte Folded Reload
612+
; CHECK-GI-NEXT: fmov x17, d0
613+
; CHECK-GI-NEXT: mov x18, v0.d[1]
614+
; CHECK-GI-NEXT: ldr q0, [sp, #144] // 16-byte Folded Reload
615+
; CHECK-GI-NEXT: fmov x0, d1
616+
; CHECK-GI-NEXT: and x15, x15, x9
617+
; CHECK-GI-NEXT: mov x2, v1.d[1]
618+
; CHECK-GI-NEXT: fmov x1, d0
619+
; CHECK-GI-NEXT: mov x3, v0.d[1]
620+
; CHECK-GI-NEXT: bic x17, x17, x13
621+
; CHECK-GI-NEXT: bic x0, x0, x14
622+
; CHECK-GI-NEXT: orr x8, x8, x17
623+
; CHECK-GI-NEXT: bic x1, x1, x9
624+
; CHECK-GI-NEXT: orr x11, x11, x0
625+
; CHECK-GI-NEXT: mov v0.d[0], x8
626+
; CHECK-GI-NEXT: orr x15, x15, x1
627+
; CHECK-GI-NEXT: mov v1.d[0], x11
628+
; CHECK-GI-NEXT: and x8, x10, x13
629+
; CHECK-GI-NEXT: mov v2.d[0], x15
630+
; CHECK-GI-NEXT: and x10, x12, x14
631+
; CHECK-GI-NEXT: and x11, x16, x9
632+
; CHECK-GI-NEXT: bic x12, x18, x13
633+
; CHECK-GI-NEXT: bic x13, x2, x14
634+
; CHECK-GI-NEXT: bic x9, x3, x9
635+
; CHECK-GI-NEXT: orr x8, x8, x12
636+
; CHECK-GI-NEXT: orr x10, x10, x13
637+
; CHECK-GI-NEXT: orr x9, x11, x9
638+
; CHECK-GI-NEXT: mov v0.d[1], x8
639+
; CHECK-GI-NEXT: mov v1.d[1], x10
640+
; CHECK-GI-NEXT: mov v2.d[1], x9
641+
; CHECK-GI-NEXT: add sp, sp, #192
642+
; CHECK-GI-NEXT: ret
504643
entry:
505644
%c = fcmp olt <3 x fp128> %a, %b
506645
%s = select <3 x i1> %c, <3 x fp128> %d, <3 x fp128> %e

0 commit comments

Comments
 (0)