Skip to content
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.

Commit 0415cdb

Browse files
committedMar 6, 2025
fptrunc in true16
1 parent b5e70d0 commit 0415cdb

File tree

5 files changed

+738
-355
lines changed

5 files changed

+738
-355
lines changed
 

‎llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp

Lines changed: 10 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -3578,15 +3578,22 @@ SDValue AMDGPUTargetLowering::LowerFP_TO_FP16(SDValue Op, SelectionDAG &DAG) con
35783578
return SDValue();
35793579
}
35803580

3581-
assert(N0.getSimpleValueType() == MVT::f64);
3581+
return LowerF64ToF16(N0, Op.getValueType(), DL, DAG);
3582+
}
3583+
3584+
SDValue AMDGPUTargetLowering::LowerF64ToF16(SDValue Src, EVT ResTy,
3585+
const SDLoc &DL,
3586+
SelectionDAG &DAG) const {
3587+
assert(Src.getSimpleValueType() == MVT::f64);
35823588

35833589
// f64 -> f16 conversion using round-to-nearest-even rounding mode.
3590+
// TODO: We can generate better code for True16.
35843591
const unsigned ExpMask = 0x7ff;
35853592
const unsigned ExpBiasf64 = 1023;
35863593
const unsigned ExpBiasf16 = 15;
35873594
SDValue Zero = DAG.getConstant(0, DL, MVT::i32);
35883595
SDValue One = DAG.getConstant(1, DL, MVT::i32);
3589-
SDValue U = DAG.getNode(ISD::BITCAST, DL, MVT::i64, N0);
3596+
SDValue U = DAG.getNode(ISD::BITCAST, DL, MVT::i64, Src);
35903597
SDValue UH = DAG.getNode(ISD::SRL, DL, MVT::i64, U,
35913598
DAG.getConstant(32, DL, MVT::i64));
35923599
UH = DAG.getZExtOrTrunc(UH, DL, MVT::i32);
@@ -3661,7 +3668,7 @@ SDValue AMDGPUTargetLowering::LowerFP_TO_FP16(SDValue Op, SelectionDAG &DAG) con
36613668
DAG.getConstant(0x8000, DL, MVT::i32));
36623669

36633670
V = DAG.getNode(ISD::OR, DL, MVT::i32, Sign, V);
3664-
return DAG.getZExtOrTrunc(V, DL, Op.getValueType());
3671+
return DAG.getZExtOrTrunc(V, DL, ResTy);
36653672
}
36663673

36673674
SDValue AMDGPUTargetLowering::LowerFP_TO_INT(const SDValue Op,

‎llvm/lib/Target/AMDGPU/AMDGPUISelLowering.h

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -97,6 +97,9 @@ class AMDGPUTargetLowering : public TargetLowering {
9797
SDValue LowerFP_TO_FP16(SDValue Op, SelectionDAG &DAG) const;
9898
SDValue LowerFP_TO_INT(SDValue Op, SelectionDAG &DAG) const;
9999

100+
SDValue LowerF64ToF16(SDValue Src, EVT ResTy, const SDLoc &DL,
101+
SelectionDAG &DAG) const;
102+
100103
SDValue LowerSIGN_EXTEND_INREG(SDValue Op, SelectionDAG &DAG) const;
101104

102105
protected:

‎llvm/lib/Target/AMDGPU/SIISelLowering.cpp

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -6825,6 +6825,17 @@ SDValue SITargetLowering::lowerFP_ROUND(SDValue Op, SelectionDAG &DAG) const {
68256825

68266826
SDLoc DL(Op);
68276827

6828+
if (Subtarget->useRealTrue16Insts()) {
6829+
if (getTargetMachine().Options.UnsafeFPMath) {
6830+
SDValue Flags = Op.getOperand(1);
6831+
SDValue Src32 = DAG.getNode(ISD::FP_ROUND, DL, MVT::f32, Src, Flags);
6832+
return DAG.getNode(ISD::FP_ROUND, DL, MVT::f16, Src32, Flags);
6833+
}
6834+
6835+
SDValue FpToFp16 = LowerF64ToF16(Src, MVT::i16, DL, DAG);
6836+
return DAG.getNode(ISD::BITCAST, DL, MVT::f16, FpToFp16);
6837+
}
6838+
68286839
SDValue FpToFp16 = DAG.getNode(ISD::FP_TO_FP16, DL, MVT::i32, Src);
68296840
SDValue Trunc = DAG.getNode(ISD::TRUNCATE, DL, MVT::i16, FpToFp16);
68306841
return DAG.getNode(ISD::BITCAST, DL, MVT::f16, Trunc);
There was a problem loading the remainder of the diff.

0 commit comments

Comments
 (0)
Failed to load comments.