InstCombine: Add constant folding/simplify for amdgcn.ldexp intrinsic

arsenm · arsenm · commit 88c20fa3d2a2 · 2020-05-22T08:21:38.000-04:00
This really belongs in InstructionSimplify since it doesn't introduce
new instructions. Put it in instcombine to avoid increasing the number
of passes considering target intrinsics.

I also noticed that we seem to now be interpreting strictfp attributes
on call sites, so try to handle that.
diff --git a/llvm/lib/Analysis/ConstantFolding.cpp b/llvm/lib/Analysis/ConstantFolding.cpp
@@ -1457,6 +1457,7 @@ bool llvm::canConstantFoldCallTo(const CallBase *Call, const Function *F) {
   case Intrinsic::amdgcn_cubetc:
   case Intrinsic::amdgcn_fmul_legacy:
   case Intrinsic::amdgcn_fract:
+  case Intrinsic::amdgcn_ldexp:
   case Intrinsic::x86_sse_cvtss2si:
   case Intrinsic::x86_sse_cvtss2si64:
   case Intrinsic::x86_sse_cvttss2si:
@@ -2224,6 +2225,16 @@ static Constant *ConstantFoldScalarCall2(StringRef Name,
         return ConstantFP::get(Ty->getContext(),
                                APFloat((double)std::pow((double)Op1V,
                                                  (int)Op2C->getZExtValue())));
+
+      if (IntrinsicID == Intrinsic::amdgcn_ldexp) {
+        // FIXME: Should flush denorms depending on FP mode, but that's ignored
+        // everywhere else.
+
+        // scalbn is equivalent to ldexp with float radix 2
+        APFloat Result = scalbn(Op1->getValueAPF(), Op2C->getSExtValue(),
+                                APFloat::rmNearestTiesToEven);
+        return ConstantFP::get(Ty->getContext(), Result);
+      }
     }
     return nullptr;
   }
diff --git a/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp b/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp
@@ -4067,6 +4067,53 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) {
 
     break;
   }
+  case Intrinsic::amdgcn_ldexp: {
+    // FIXME: This doesn't introduce new instructions and belongs in
+    // InstructionSimplify.
+    Type *Ty = II->getType();
+    Value *Op0 = II->getArgOperand(0);
+    Value *Op1 = II->getArgOperand(1);
+
+    // Folding undef to qnan is safe regardless of the FP mode.
+    if (isa<UndefValue>(Op0)) {
+      auto *QNaN = ConstantFP::get(Ty, APFloat::getQNaN(Ty->getFltSemantics()));
+      return replaceInstUsesWith(*II, QNaN);
+    }
+
+    const APFloat *C = nullptr;
+    match(Op0, m_APFloat(C));
+
+    // FIXME: Should flush denorms depending on FP mode, but that's ignored
+    // everywhere else.
+    //
+    // These cases should be safe, even with strictfp.
+    // ldexp(0.0, x) -> 0.0
+    // ldexp(-0.0, x) -> -0.0
+    // ldexp(inf, x) -> inf
+    // ldexp(-inf, x) -> -inf
+    if (C && (C->isZero() || C->isInfinity()))
+      return replaceInstUsesWith(*II, Op0);
+
+    // With strictfp, be more careful about possibly needing to flush denormals
+    // or not, and snan behavior depends on ieee_mode.
+    if (II->isStrictFP())
+      break;
+
+    if (C && C->isNaN()) {
+      // FIXME: We just need to make the nan quiet here, but that's unavailable
+      // on APFloat, only IEEEfloat
+      auto *Quieted = ConstantFP::get(
+        Ty, scalbn(*C, 0, APFloat::rmNearestTiesToEven));
+      return replaceInstUsesWith(*II, Quieted);
+    }
+
+    // ldexp(x, 0) -> x
+    // ldexp(x, undef) -> x
+    if (isa<UndefValue>(Op1) || match(Op1, m_ZeroInt()))
+      return replaceInstUsesWith(*II, Op0);
+
+    break;
+  }
   case Intrinsic::hexagon_V6_vandvrt:
   case Intrinsic::hexagon_V6_vandvrt_128B: {
     // Simplify Q -> V -> Q conversion.
diff --git a/llvm/test/Transforms/InstCombine/AMDGPU/ldexp.ll b/llvm/test/Transforms/InstCombine/AMDGPU/ldexp.ll