Skip to content

Commit 090fa3e

Browse files
committed
[AMDGPU][GlobalISel] Allow forming s16 U/SBFX pre-regbankselect
Make s16 G_U/SBFX legal and widen them in RegBankSelect. This allows the set of BFX formation combines to work on s16 types.
1 parent c4187f7 commit 090fa3e

File tree

7 files changed

+503
-644
lines changed

7 files changed

+503
-644
lines changed

llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp

+6-3
Original file line numberDiff line numberDiff line change
@@ -2069,10 +2069,13 @@ AMDGPULegalizerInfo::AMDGPULegalizerInfo(const GCNSubtarget &ST_,
20692069
.minScalar(0, S32)
20702070
.lower();
20712071

2072+
// Only {S32, S32} or {S32, S64} should ever reach codegen.
2073+
// We allow S/UBFX for S16 so the combiner can form them before
2074+
// RegBankSelect, and RegBankSelect will then legalize them correctly.
20722075
getActionDefinitionsBuilder({G_SBFX, G_UBFX})
2073-
.legalFor({{S32, S32}, {S64, S32}})
2074-
.clampScalar(1, S32, S32)
2075-
.clampScalar(0, S32, S64)
2076+
.legalFor({{S16, S16}, {S32, S32}, {S64, S32}})
2077+
.clampScalar(1, S16, S32)
2078+
.clampScalar(0, S16, S64)
20762079
.widenScalarToNextPow2(0)
20772080
.scalarize(0);
20782081

llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp

+30-3
Original file line numberDiff line numberDiff line change
@@ -1485,7 +1485,9 @@ bool AMDGPURegisterBankInfo::applyMappingBFE(MachineIRBuilder &B,
14851485
Register DstReg = MI.getOperand(0).getReg();
14861486
LLT Ty = MRI.getType(DstReg);
14871487

1488+
const LLT S64 = LLT::scalar(64);
14881489
const LLT S32 = LLT::scalar(32);
1490+
const LLT S16 = LLT::scalar(16);
14891491

14901492
unsigned FirstOpnd = isa<GIntrinsic>(MI) ? 2 : 1;
14911493
Register SrcReg = MI.getOperand(FirstOpnd).getReg();
@@ -1495,6 +1497,18 @@ bool AMDGPURegisterBankInfo::applyMappingBFE(MachineIRBuilder &B,
14951497
const RegisterBank *DstBank =
14961498
OpdMapper.getInstrMapping().getOperandMapping(0).BreakDown[0].RegBank;
14971499
if (DstBank == &AMDGPU::VGPRRegBank) {
1500+
if (Ty == S16) {
1501+
ApplyRegBankMapping ApplyBank(B, *this, MRI, &AMDGPU::VGPRRegBank);
1502+
B.setInsertPt(B.getMBB(), MI);
1503+
LegalizerHelper Helper(B.getMF(), ApplyBank, B);
1504+
1505+
Helper.widenScalarDst(MI, S32);
1506+
Helper.widenScalarSrc(MI, S32, 1, AMDGPU::G_ANYEXT);
1507+
Helper.widenScalarSrc(MI, S32, 2, AMDGPU::G_ZEXT);
1508+
Helper.widenScalarSrc(MI, S32, 3, AMDGPU::G_ZEXT);
1509+
return true;
1510+
}
1511+
14981512
if (Ty == S32)
14991513
return true;
15001514

@@ -1554,6 +1568,11 @@ bool AMDGPURegisterBankInfo::applyMappingBFE(MachineIRBuilder &B,
15541568

15551569
ApplyRegBankMapping ApplyBank(B, *this, MRI, &AMDGPU::SGPRRegBank);
15561570

1571+
if (Ty == S16) {
1572+
OffsetReg = B.buildAnyExtOrTrunc(S32, OffsetReg).getReg(0);
1573+
WidthReg = B.buildAnyExtOrTrunc(S32, WidthReg).getReg(0);
1574+
}
1575+
15571576
// Ensure the high bits are clear to insert the offset.
15581577
auto OffsetMask = B.buildConstant(S32, maskTrailingOnes<unsigned>(6));
15591578
auto ClampOffset = B.buildAnd(S32, OffsetReg, OffsetMask);
@@ -1568,13 +1587,21 @@ bool AMDGPURegisterBankInfo::applyMappingBFE(MachineIRBuilder &B,
15681587

15691588
// TODO: It might be worth using a pseudo here to avoid scc clobber and
15701589
// register class constraints.
1571-
unsigned Opc = Ty == S32 ? (Signed ? AMDGPU::S_BFE_I32 : AMDGPU::S_BFE_U32) :
1572-
(Signed ? AMDGPU::S_BFE_I64 : AMDGPU::S_BFE_U64);
1590+
unsigned Opc = (Ty != S64) ? (Signed ? AMDGPU::S_BFE_I32 : AMDGPU::S_BFE_U32)
1591+
: (Signed ? AMDGPU::S_BFE_I64 : AMDGPU::S_BFE_U64);
15731592

1574-
auto MIB = B.buildInstr(Opc, {DstReg}, {SrcReg, MergedInputs});
1593+
Register BFEDst = DstReg;
1594+
if (Ty == S16) {
1595+
BFEDst = MRI.createGenericVirtualRegister(S32);
1596+
MRI.setRegBank(BFEDst, AMDGPU::SGPRRegBank);
1597+
}
1598+
auto MIB = B.buildInstr(Opc, {BFEDst}, {SrcReg, MergedInputs});
15751599
if (!constrainSelectedInstRegOperands(*MIB, *TII, *TRI, *this))
15761600
llvm_unreachable("failed to constrain BFE");
15771601

1602+
if (BFEDst != DstReg)
1603+
B.buildZExtOrTrunc(DstReg, BFEDst);
1604+
15781605
MI.eraseFromParent();
15791606
return true;
15801607
}

0 commit comments

Comments
 (0)