-
Notifications
You must be signed in to change notification settings - Fork 13.2k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
[DirectX] legalize frem so that modulo behavior is the same as fmod #133772
base: main
Are you sure you want to change the base?
Conversation
@llvm/pr-subscribers-backend-directx Author: Farzon Lotfi (farzonl) ChangesIn the DML testing process we noticed that modulo was using the frem instruction. frem was one of the original cases we thought we need globalIsel's legalizer for. llvm/wg-hlsl#86 Since we had originally thought this instruction was not legal in DXIL I put up this PR. That said the fact that DXC is using it would indicate frem is a legal instruction in DXC. If we don't need this PR i'll close it. But figure I'd put it up incase we run into problems in the backend with frem during validation or in execution. Full diff: https://github.com/llvm/llvm-project/pull/133772.diff 2 Files Affected:
diff --git a/llvm/lib/Target/DirectX/DXILLegalizePass.cpp b/llvm/lib/Target/DirectX/DXILLegalizePass.cpp
index 395311e430fbb..474232e6870a3 100644
--- a/llvm/lib/Target/DirectX/DXILLegalizePass.cpp
+++ b/llvm/lib/Target/DirectX/DXILLegalizePass.cpp
@@ -12,6 +12,7 @@
#include "llvm/IR/IRBuilder.h"
#include "llvm/IR/InstIterator.h"
#include "llvm/IR/Instruction.h"
+#include "llvm/IR/IntrinsicsDirectX.h"
#include "llvm/Pass.h"
#include "llvm/Transforms/Utils/BasicBlockUtils.h"
#include <functional>
@@ -20,6 +21,31 @@
using namespace llvm;
+static void replaceFrem(Instruction &I,
+ SmallVectorImpl<Instruction *> &ToRemove,
+ DenseMap<Value *, Value *> &) {
+ auto *BO = dyn_cast<BinaryOperator>(&I);
+ if (BO == nullptr || BO->getOpcode() != Instruction::FRem)
+ return;
+
+ IRBuilder<> Builder(&I);
+ Value *P0 = BO->getOperand(0);
+ Value *P1 = BO->getOperand(1);
+
+ Value *Div1 = Builder.CreateFDiv(P0, P1);
+ Value *Zero = ConstantFP::get(P0->getType(), 0.0);
+ Value *Cmp = Builder.CreateFCmpOGE(Div1, Zero, "cmp.i");
+ Value *AbsVal =
+ Builder.CreateIntrinsic(Div1->getType(), Intrinsic::fabs, {Div1});
+ Value *FracVal =
+ Builder.CreateIntrinsic(AbsVal->getType(), Intrinsic::dx_frac, {AbsVal});
+ Value *NegFrac = Builder.CreateFNeg(FracVal);
+ Value *SelectVal = Builder.CreateSelect(Cmp, FracVal, NegFrac);
+ Value *MulVal = Builder.CreateFMul(SelectVal, P1);
+ BO->replaceAllUsesWith(MulVal);
+ ToRemove.push_back(BO);
+}
+
static void fixI8TruncUseChain(Instruction &I,
SmallVectorImpl<Instruction *> &ToRemove,
DenseMap<Value *, Value *> &ReplacedValues) {
@@ -169,6 +195,7 @@ class DXILLegalizationPipeline {
void initializeLegalizationPipeline() {
LegalizationPipeline.push_back(fixI8TruncUseChain);
LegalizationPipeline.push_back(downcastI64toI32InsertExtractElements);
+ LegalizationPipeline.push_back(replaceFrem);
}
};
diff --git a/llvm/test/CodeGen/DirectX/frem.ll b/llvm/test/CodeGen/DirectX/frem.ll
new file mode 100644
index 0000000000000..e75cab90bec07
--- /dev/null
+++ b/llvm/test/CodeGen/DirectX/frem.ll
@@ -0,0 +1,59 @@
+
+; RUN: opt -S -dxil-legalize -mtriple=dxil-pc-shadermodel6.3-library %s -o - | FileCheck %s
+
+define noundef half @frem_half(half noundef %a, half noundef %b) {
+; CHECK-LABEL: define noundef half @frem_half(
+; CHECK-SAME: half noundef [[A:%.*]], half noundef [[B:%.*]]) {
+; CHECK-NEXT: [[ENTRY:.*:]]
+; CHECK-NEXT: [[FDIV:%.*]] = fdiv half [[A]], [[B]]
+; CHECK-NEXT: [[FCMP:%.*]] = fcmp oge half [[FDIV]], 0xH0000
+; CHECK-NEXT: [[FABS:%.*]] = call half @llvm.fabs.f16(half [[FDIV]])
+; CHECK-NEXT: [[FRAC:%.*]] = call half @llvm.dx.frac.f16(half [[FABS]])
+; CHECK-NEXT: [[FNEG:%.*]] = fneg half [[FRAC]]
+; CHECK-NEXT: [[SELC:%.*]] = select i1 [[FCMP]], half [[FRAC]], half [[FNEG]]
+; CHECK-NEXT: [[FMUL:%.*]] = fmul half [[SELC]], [[B]]
+; CHECK-NEXT: ret half [[FMUL]]
+;
+entry:
+ %fmod.i = frem reassoc nnan ninf nsz arcp afn half %a, %b
+ ret half %fmod.i
+}
+
+; Note by the time the legalizer sees frem with vec type frem will be scalarized
+; This test is for completeness not for expected input of DXL SMs <= 6.8.
+
+define noundef <2 x half> @frem_half2(<2 x half> noundef %a, <2 x half> noundef %b) {
+; CHECK-LABEL: define noundef <2 x half> @frem_half2(
+; CHECK-SAME: <2 x half> noundef [[A:%.*]], <2 x half> noundef [[B:%.*]]) {
+; CHECK-NEXT: [[ENTRY:.*:]]
+; CHECK-NEXT: [[FDIV:%.*]] = fdiv <2 x half> [[A]], [[B]]
+; CHECK-NEXT: [[FCMP:%.*]] = fcmp oge <2 x half> [[FDIV]], zeroinitializer
+; CHECK-NEXT: [[FABS:%.*]] = call <2 x half> @llvm.fabs.v2f16(<2 x half> [[FDIV]])
+; CHECK-NEXT: [[FRAC:%.*]] = call <2 x half> @llvm.dx.frac.v2f16(<2 x half> [[FABS]])
+; CHECK-NEXT: [[FNEG:%.*]] = fneg <2 x half> [[FRAC]]
+; CHECK-NEXT: [[SELC:%.*]] = select <2 x i1> [[FCMP]], <2 x half> [[FRAC]], <2 x half> [[FNEG]]
+; CHECK-NEXT: [[FMUL:%.*]] = fmul <2 x half> [[SELC]], [[B]]
+; CHECK-NEXT: ret <2 x half> [[FMUL]]
+;
+entry:
+ %fmod.i = frem reassoc nnan ninf nsz arcp afn <2 x half> %a, %b
+ ret <2 x half> %fmod.i
+}
+
+define noundef float @frem_float(float noundef %a, float noundef %b) {
+; CHECK-LABEL: define noundef float @frem_float(
+; CHECK-SAME: float noundef [[A:%.*]], float noundef [[B:%.*]]) {
+; CHECK-NEXT: [[ENTRY:.*:]]
+; CHECK-NEXT: [[FDIV:%.*]] = fdiv float [[A]], [[B]]
+; CHECK-NEXT: [[FCMP:%.*]] = fcmp oge float [[FDIV]], 0.000000e+00
+; CHECK-NEXT: [[FABS:%.*]] = call float @llvm.fabs.f32(float [[FDIV]])
+; CHECK-NEXT: [[FRAC:%.*]] = call float @llvm.dx.frac.f32(float [[FABS]])
+; CHECK-NEXT: [[FNEG:%.*]] = fneg float [[FRAC]]
+; CHECK-NEXT: [[SELC:%.*]] = select i1 [[FCMP]], float [[FRAC]], float [[FNEG]]
+; CHECK-NEXT: [[FMUL:%.*]] = fmul float [[SELC]], [[B]]
+; CHECK-NEXT: ret float [[FMUL]]
+;
+entry:
+ %fmod.i = frem reassoc nnan ninf nsz arcp afn float %a, %b
+ ret float %fmod.i
+}
|
In the DML testing process we noticed that modulo was using the frem instruction.
frem was one of the original cases we thought we need globalIsel's legalizer for. llvm/wg-hlsl#86
Since we had originally thought this instruction was not legal in DXIL I put up this PR.
That said the fact that DXC is using it would indicate frem is a legal instruction in DXC. If we don't need this PR i'll close it. But figure I'd put it up incase we run into problems in the backend with frem during validation or in execution.