Skip to content

Commit 4170108

Browse files
committed
[DirectX] legalize frem so that modulo behavior is the same as fmod
1 parent 4007de0 commit 4170108

File tree

2 files changed

+86
-0
lines changed

2 files changed

+86
-0
lines changed

llvm/lib/Target/DirectX/DXILLegalizePass.cpp

+27
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,7 @@
1212
#include "llvm/IR/IRBuilder.h"
1313
#include "llvm/IR/InstIterator.h"
1414
#include "llvm/IR/Instruction.h"
15+
#include "llvm/IR/IntrinsicsDirectX.h"
1516
#include "llvm/Pass.h"
1617
#include "llvm/Transforms/Utils/BasicBlockUtils.h"
1718
#include <functional>
@@ -20,6 +21,31 @@
2021

2122
using namespace llvm;
2223

24+
static void replaceFrem(Instruction &I,
25+
SmallVectorImpl<Instruction *> &ToRemove,
26+
DenseMap<Value *, Value *> &) {
27+
auto *BO = dyn_cast<BinaryOperator>(&I);
28+
if (BO == nullptr || BO->getOpcode() != Instruction::FRem)
29+
return;
30+
31+
IRBuilder<> Builder(&I);
32+
Value *P0 = BO->getOperand(0);
33+
Value *P1 = BO->getOperand(1);
34+
35+
Value *Div1 = Builder.CreateFDiv(P0, P1);
36+
Value *Zero = ConstantFP::get(P0->getType(), 0.0);
37+
Value *Cmp = Builder.CreateFCmpOGE(Div1, Zero, "cmp.i");
38+
Value *AbsVal =
39+
Builder.CreateIntrinsic(Div1->getType(), Intrinsic::fabs, {Div1});
40+
Value *FracVal =
41+
Builder.CreateIntrinsic(AbsVal->getType(), Intrinsic::dx_frac, {AbsVal});
42+
Value *NegFrac = Builder.CreateFNeg(FracVal);
43+
Value *SelectVal = Builder.CreateSelect(Cmp, FracVal, NegFrac);
44+
Value *MulVal = Builder.CreateFMul(SelectVal, P1);
45+
BO->replaceAllUsesWith(MulVal);
46+
ToRemove.push_back(BO);
47+
}
48+
2349
static void fixI8TruncUseChain(Instruction &I,
2450
SmallVectorImpl<Instruction *> &ToRemove,
2551
DenseMap<Value *, Value *> &ReplacedValues) {
@@ -169,6 +195,7 @@ class DXILLegalizationPipeline {
169195
void initializeLegalizationPipeline() {
170196
LegalizationPipeline.push_back(fixI8TruncUseChain);
171197
LegalizationPipeline.push_back(downcastI64toI32InsertExtractElements);
198+
LegalizationPipeline.push_back(replaceFrem);
172199
}
173200
};
174201

llvm/test/CodeGen/DirectX/frem.ll

+59
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,59 @@
1+
2+
; RUN: opt -S -dxil-legalize -mtriple=dxil-pc-shadermodel6.3-library %s -o - | FileCheck %s
3+
4+
define noundef half @frem_half(half noundef %a, half noundef %b) {
5+
; CHECK-LABEL: define noundef half @frem_half(
6+
; CHECK-SAME: half noundef [[A:%.*]], half noundef [[B:%.*]]) {
7+
; CHECK-NEXT: [[ENTRY:.*:]]
8+
; CHECK-NEXT: [[FDIV:%.*]] = fdiv half [[A]], [[B]]
9+
; CHECK-NEXT: [[FCMP:%.*]] = fcmp oge half [[FDIV]], 0xH0000
10+
; CHECK-NEXT: [[FABS:%.*]] = call half @llvm.fabs.f16(half [[FDIV]])
11+
; CHECK-NEXT: [[FRAC:%.*]] = call half @llvm.dx.frac.f16(half [[FABS]])
12+
; CHECK-NEXT: [[FNEG:%.*]] = fneg half [[FRAC]]
13+
; CHECK-NEXT: [[SELC:%.*]] = select i1 [[FCMP]], half [[FRAC]], half [[FNEG]]
14+
; CHECK-NEXT: [[FMUL:%.*]] = fmul half [[SELC]], [[B]]
15+
; CHECK-NEXT: ret half [[FMUL]]
16+
;
17+
entry:
18+
%fmod.i = frem reassoc nnan ninf nsz arcp afn half %a, %b
19+
ret half %fmod.i
20+
}
21+
22+
; Note by the time the legalizer sees frem with vec type frem will be scalarized
23+
; This test is for completeness not for expected input of DXL SMs <= 6.8.
24+
25+
define noundef <2 x half> @frem_half2(<2 x half> noundef %a, <2 x half> noundef %b) {
26+
; CHECK-LABEL: define noundef <2 x half> @frem_half2(
27+
; CHECK-SAME: <2 x half> noundef [[A:%.*]], <2 x half> noundef [[B:%.*]]) {
28+
; CHECK-NEXT: [[ENTRY:.*:]]
29+
; CHECK-NEXT: [[FDIV:%.*]] = fdiv <2 x half> [[A]], [[B]]
30+
; CHECK-NEXT: [[FCMP:%.*]] = fcmp oge <2 x half> [[FDIV]], zeroinitializer
31+
; CHECK-NEXT: [[FABS:%.*]] = call <2 x half> @llvm.fabs.v2f16(<2 x half> [[FDIV]])
32+
; CHECK-NEXT: [[FRAC:%.*]] = call <2 x half> @llvm.dx.frac.v2f16(<2 x half> [[FABS]])
33+
; CHECK-NEXT: [[FNEG:%.*]] = fneg <2 x half> [[FRAC]]
34+
; CHECK-NEXT: [[SELC:%.*]] = select <2 x i1> [[FCMP]], <2 x half> [[FRAC]], <2 x half> [[FNEG]]
35+
; CHECK-NEXT: [[FMUL:%.*]] = fmul <2 x half> [[SELC]], [[B]]
36+
; CHECK-NEXT: ret <2 x half> [[FMUL]]
37+
;
38+
entry:
39+
%fmod.i = frem reassoc nnan ninf nsz arcp afn <2 x half> %a, %b
40+
ret <2 x half> %fmod.i
41+
}
42+
43+
define noundef float @frem_float(float noundef %a, float noundef %b) {
44+
; CHECK-LABEL: define noundef float @frem_float(
45+
; CHECK-SAME: float noundef [[A:%.*]], float noundef [[B:%.*]]) {
46+
; CHECK-NEXT: [[ENTRY:.*:]]
47+
; CHECK-NEXT: [[FDIV:%.*]] = fdiv float [[A]], [[B]]
48+
; CHECK-NEXT: [[FCMP:%.*]] = fcmp oge float [[FDIV]], 0.000000e+00
49+
; CHECK-NEXT: [[FABS:%.*]] = call float @llvm.fabs.f32(float [[FDIV]])
50+
; CHECK-NEXT: [[FRAC:%.*]] = call float @llvm.dx.frac.f32(float [[FABS]])
51+
; CHECK-NEXT: [[FNEG:%.*]] = fneg float [[FRAC]]
52+
; CHECK-NEXT: [[SELC:%.*]] = select i1 [[FCMP]], float [[FRAC]], float [[FNEG]]
53+
; CHECK-NEXT: [[FMUL:%.*]] = fmul float [[SELC]], [[B]]
54+
; CHECK-NEXT: ret float [[FMUL]]
55+
;
56+
entry:
57+
%fmod.i = frem reassoc nnan ninf nsz arcp afn float %a, %b
58+
ret float %fmod.i
59+
}

0 commit comments

Comments
 (0)