Skip to content

Commit 88c20fa

Browse files
committed
InstCombine: Add constant folding/simplify for amdgcn.ldexp intrinsic
This really belongs in InstructionSimplify since it doesn't introduce new instructions. Put it in instcombine to avoid increasing the number of passes considering target intrinsics. I also noticed that we seem to now be interpreting strictfp attributes on call sites, so try to handle that.
1 parent 5a8db27 commit 88c20fa

File tree

3 files changed

+400
-0
lines changed

3 files changed

+400
-0
lines changed

llvm/lib/Analysis/ConstantFolding.cpp

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1457,6 +1457,7 @@ bool llvm::canConstantFoldCallTo(const CallBase *Call, const Function *F) {
14571457
case Intrinsic::amdgcn_cubetc:
14581458
case Intrinsic::amdgcn_fmul_legacy:
14591459
case Intrinsic::amdgcn_fract:
1460+
case Intrinsic::amdgcn_ldexp:
14601461
case Intrinsic::x86_sse_cvtss2si:
14611462
case Intrinsic::x86_sse_cvtss2si64:
14621463
case Intrinsic::x86_sse_cvttss2si:
@@ -2224,6 +2225,16 @@ static Constant *ConstantFoldScalarCall2(StringRef Name,
22242225
return ConstantFP::get(Ty->getContext(),
22252226
APFloat((double)std::pow((double)Op1V,
22262227
(int)Op2C->getZExtValue())));
2228+
2229+
if (IntrinsicID == Intrinsic::amdgcn_ldexp) {
2230+
// FIXME: Should flush denorms depending on FP mode, but that's ignored
2231+
// everywhere else.
2232+
2233+
// scalbn is equivalent to ldexp with float radix 2
2234+
APFloat Result = scalbn(Op1->getValueAPF(), Op2C->getSExtValue(),
2235+
APFloat::rmNearestTiesToEven);
2236+
return ConstantFP::get(Ty->getContext(), Result);
2237+
}
22272238
}
22282239
return nullptr;
22292240
}

llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp

Lines changed: 47 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4067,6 +4067,53 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) {
40674067

40684068
break;
40694069
}
4070+
case Intrinsic::amdgcn_ldexp: {
4071+
// FIXME: This doesn't introduce new instructions and belongs in
4072+
// InstructionSimplify.
4073+
Type *Ty = II->getType();
4074+
Value *Op0 = II->getArgOperand(0);
4075+
Value *Op1 = II->getArgOperand(1);
4076+
4077+
// Folding undef to qnan is safe regardless of the FP mode.
4078+
if (isa<UndefValue>(Op0)) {
4079+
auto *QNaN = ConstantFP::get(Ty, APFloat::getQNaN(Ty->getFltSemantics()));
4080+
return replaceInstUsesWith(*II, QNaN);
4081+
}
4082+
4083+
const APFloat *C = nullptr;
4084+
match(Op0, m_APFloat(C));
4085+
4086+
// FIXME: Should flush denorms depending on FP mode, but that's ignored
4087+
// everywhere else.
4088+
//
4089+
// These cases should be safe, even with strictfp.
4090+
// ldexp(0.0, x) -> 0.0
4091+
// ldexp(-0.0, x) -> -0.0
4092+
// ldexp(inf, x) -> inf
4093+
// ldexp(-inf, x) -> -inf
4094+
if (C && (C->isZero() || C->isInfinity()))
4095+
return replaceInstUsesWith(*II, Op0);
4096+
4097+
// With strictfp, be more careful about possibly needing to flush denormals
4098+
// or not, and snan behavior depends on ieee_mode.
4099+
if (II->isStrictFP())
4100+
break;
4101+
4102+
if (C && C->isNaN()) {
4103+
// FIXME: We just need to make the nan quiet here, but that's unavailable
4104+
// on APFloat, only IEEEfloat
4105+
auto *Quieted = ConstantFP::get(
4106+
Ty, scalbn(*C, 0, APFloat::rmNearestTiesToEven));
4107+
return replaceInstUsesWith(*II, Quieted);
4108+
}
4109+
4110+
// ldexp(x, 0) -> x
4111+
// ldexp(x, undef) -> x
4112+
if (isa<UndefValue>(Op1) || match(Op1, m_ZeroInt()))
4113+
return replaceInstUsesWith(*II, Op0);
4114+
4115+
break;
4116+
}
40704117
case Intrinsic::hexagon_V6_vandvrt:
40714118
case Intrinsic::hexagon_V6_vandvrt_128B: {
40724119
// Simplify Q -> V -> Q conversion.

0 commit comments

Comments
 (0)