@@ -1484,7 +1484,9 @@ bool AMDGPURegisterBankInfo::applyMappingBFE(MachineIRBuilder &B,
1484
1484
Register DstReg = MI.getOperand (0 ).getReg ();
1485
1485
LLT Ty = MRI.getType (DstReg);
1486
1486
1487
+ const LLT S64 = LLT::scalar (64 );
1487
1488
const LLT S32 = LLT::scalar (32 );
1489
+ const LLT S16 = LLT::scalar (16 );
1488
1490
1489
1491
unsigned FirstOpnd = isa<GIntrinsic>(MI) ? 2 : 1 ;
1490
1492
Register SrcReg = MI.getOperand (FirstOpnd).getReg ();
@@ -1494,6 +1496,18 @@ bool AMDGPURegisterBankInfo::applyMappingBFE(MachineIRBuilder &B,
1494
1496
const RegisterBank *DstBank =
1495
1497
OpdMapper.getInstrMapping ().getOperandMapping (0 ).BreakDown [0 ].RegBank ;
1496
1498
if (DstBank == &AMDGPU::VGPRRegBank) {
1499
+ if (Ty == S16) {
1500
+ ApplyRegBankMapping ApplyBank (B, *this , MRI, &AMDGPU::VGPRRegBank);
1501
+ B.setInsertPt (B.getMBB (), MI);
1502
+ LegalizerHelper Helper (B.getMF (), ApplyBank, B);
1503
+
1504
+ Helper.widenScalarDst (MI, S32);
1505
+ Helper.widenScalarSrc (MI, S32, 1 , AMDGPU::G_ANYEXT);
1506
+ Helper.widenScalarSrc (MI, S32, 2 , AMDGPU::G_ZEXT);
1507
+ Helper.widenScalarSrc (MI, S32, 3 , AMDGPU::G_ZEXT);
1508
+ return true ;
1509
+ }
1510
+
1497
1511
if (Ty == S32)
1498
1512
return true ;
1499
1513
@@ -1553,6 +1567,11 @@ bool AMDGPURegisterBankInfo::applyMappingBFE(MachineIRBuilder &B,
1553
1567
1554
1568
ApplyRegBankMapping ApplyBank (B, *this , MRI, &AMDGPU::SGPRRegBank);
1555
1569
1570
+ if (Ty == S16) {
1571
+ OffsetReg = B.buildAnyExtOrTrunc (S32, OffsetReg).getReg (0 );
1572
+ WidthReg = B.buildAnyExtOrTrunc (S32, WidthReg).getReg (0 );
1573
+ }
1574
+
1556
1575
// Ensure the high bits are clear to insert the offset.
1557
1576
auto OffsetMask = B.buildConstant (S32, maskTrailingOnes<unsigned >(6 ));
1558
1577
auto ClampOffset = B.buildAnd (S32, OffsetReg, OffsetMask);
@@ -1567,13 +1586,21 @@ bool AMDGPURegisterBankInfo::applyMappingBFE(MachineIRBuilder &B,
1567
1586
1568
1587
// TODO: It might be worth using a pseudo here to avoid scc clobber and
1569
1588
// register class constraints.
1570
- unsigned Opc = Ty == S32 ? (Signed ? AMDGPU::S_BFE_I32 : AMDGPU::S_BFE_U32) :
1571
- (Signed ? AMDGPU::S_BFE_I64 : AMDGPU::S_BFE_U64);
1589
+ unsigned Opc = ( Ty != S64) ? (Signed ? AMDGPU::S_BFE_I32 : AMDGPU::S_BFE_U32)
1590
+ : (Signed ? AMDGPU::S_BFE_I64 : AMDGPU::S_BFE_U64);
1572
1591
1573
- auto MIB = B.buildInstr (Opc, {DstReg}, {SrcReg, MergedInputs});
1592
+ Register BFEDst = DstReg;
1593
+ if (Ty == S16) {
1594
+ BFEDst = MRI.createGenericVirtualRegister (S32);
1595
+ MRI.setRegBank (BFEDst, AMDGPU::SGPRRegBank);
1596
+ }
1597
+ auto MIB = B.buildInstr (Opc, {BFEDst}, {SrcReg, MergedInputs});
1574
1598
if (!constrainSelectedInstRegOperands (*MIB, *TII, *TRI, *this ))
1575
1599
llvm_unreachable (" failed to constrain BFE" );
1576
1600
1601
+ if (BFEDst != DstReg)
1602
+ B.buildZExtOrTrunc (DstReg, BFEDst);
1603
+
1577
1604
MI.eraseFromParent ();
1578
1605
return true ;
1579
1606
}
0 commit comments