Skip to content

Commit 597912a

Browse files
committed
[AMDGPU][GlobalISel] Allow forming s16 U/SBFX pre-regbankselect
Make s16 G_U/SBFX legal and widen them in RegBankSelect. This allows the set of BFX formation combines to work on s16 types.
1 parent 0d0eed4 commit 597912a

File tree

7 files changed

+503
-644
lines changed

7 files changed

+503
-644
lines changed

llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp

+6-3
Original file line numberDiff line numberDiff line change
@@ -2073,10 +2073,13 @@ AMDGPULegalizerInfo::AMDGPULegalizerInfo(const GCNSubtarget &ST_,
20732073
.minScalar(0, S32)
20742074
.lower();
20752075

2076+
// Only {S32, S32} or {S32, S64} should ever reach codegen.
2077+
// We allow S/UBFX for S16 so the combiner can form them before
2078+
// RegBankSelect, and RegBankSelect will then legalize them correctly.
20762079
getActionDefinitionsBuilder({G_SBFX, G_UBFX})
2077-
.legalFor({{S32, S32}, {S64, S32}})
2078-
.clampScalar(1, S32, S32)
2079-
.clampScalar(0, S32, S64)
2080+
.legalFor({{S16, S16}, {S32, S32}, {S64, S32}})
2081+
.clampScalar(1, S16, S32)
2082+
.clampScalar(0, S16, S64)
20802083
.widenScalarToNextPow2(0)
20812084
.scalarize(0);
20822085

llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp

+30-3
Original file line numberDiff line numberDiff line change
@@ -1484,7 +1484,9 @@ bool AMDGPURegisterBankInfo::applyMappingBFE(MachineIRBuilder &B,
14841484
Register DstReg = MI.getOperand(0).getReg();
14851485
LLT Ty = MRI.getType(DstReg);
14861486

1487+
const LLT S64 = LLT::scalar(64);
14871488
const LLT S32 = LLT::scalar(32);
1489+
const LLT S16 = LLT::scalar(16);
14881490

14891491
unsigned FirstOpnd = isa<GIntrinsic>(MI) ? 2 : 1;
14901492
Register SrcReg = MI.getOperand(FirstOpnd).getReg();
@@ -1494,6 +1496,18 @@ bool AMDGPURegisterBankInfo::applyMappingBFE(MachineIRBuilder &B,
14941496
const RegisterBank *DstBank =
14951497
OpdMapper.getInstrMapping().getOperandMapping(0).BreakDown[0].RegBank;
14961498
if (DstBank == &AMDGPU::VGPRRegBank) {
1499+
if (Ty == S16) {
1500+
ApplyRegBankMapping ApplyBank(B, *this, MRI, &AMDGPU::VGPRRegBank);
1501+
B.setInsertPt(B.getMBB(), MI);
1502+
LegalizerHelper Helper(B.getMF(), ApplyBank, B);
1503+
1504+
Helper.widenScalarDst(MI, S32);
1505+
Helper.widenScalarSrc(MI, S32, 1, AMDGPU::G_ANYEXT);
1506+
Helper.widenScalarSrc(MI, S32, 2, AMDGPU::G_ZEXT);
1507+
Helper.widenScalarSrc(MI, S32, 3, AMDGPU::G_ZEXT);
1508+
return true;
1509+
}
1510+
14971511
if (Ty == S32)
14981512
return true;
14991513

@@ -1553,6 +1567,11 @@ bool AMDGPURegisterBankInfo::applyMappingBFE(MachineIRBuilder &B,
15531567

15541568
ApplyRegBankMapping ApplyBank(B, *this, MRI, &AMDGPU::SGPRRegBank);
15551569

1570+
if (Ty == S16) {
1571+
OffsetReg = B.buildAnyExtOrTrunc(S32, OffsetReg).getReg(0);
1572+
WidthReg = B.buildAnyExtOrTrunc(S32, WidthReg).getReg(0);
1573+
}
1574+
15561575
// Ensure the high bits are clear to insert the offset.
15571576
auto OffsetMask = B.buildConstant(S32, maskTrailingOnes<unsigned>(6));
15581577
auto ClampOffset = B.buildAnd(S32, OffsetReg, OffsetMask);
@@ -1567,13 +1586,21 @@ bool AMDGPURegisterBankInfo::applyMappingBFE(MachineIRBuilder &B,
15671586

15681587
// TODO: It might be worth using a pseudo here to avoid scc clobber and
15691588
// register class constraints.
1570-
unsigned Opc = Ty == S32 ? (Signed ? AMDGPU::S_BFE_I32 : AMDGPU::S_BFE_U32) :
1571-
(Signed ? AMDGPU::S_BFE_I64 : AMDGPU::S_BFE_U64);
1589+
unsigned Opc = (Ty != S64) ? (Signed ? AMDGPU::S_BFE_I32 : AMDGPU::S_BFE_U32)
1590+
: (Signed ? AMDGPU::S_BFE_I64 : AMDGPU::S_BFE_U64);
15721591

1573-
auto MIB = B.buildInstr(Opc, {DstReg}, {SrcReg, MergedInputs});
1592+
Register BFEDst = DstReg;
1593+
if (Ty == S16) {
1594+
BFEDst = MRI.createGenericVirtualRegister(S32);
1595+
MRI.setRegBank(BFEDst, AMDGPU::SGPRRegBank);
1596+
}
1597+
auto MIB = B.buildInstr(Opc, {BFEDst}, {SrcReg, MergedInputs});
15741598
if (!constrainSelectedInstRegOperands(*MIB, *TII, *TRI, *this))
15751599
llvm_unreachable("failed to constrain BFE");
15761600

1601+
if (BFEDst != DstReg)
1602+
B.buildZExtOrTrunc(DstReg, BFEDst);
1603+
15771604
MI.eraseFromParent();
15781605
return true;
15791606
}

0 commit comments

Comments
 (0)