@@ -1485,7 +1485,9 @@ bool AMDGPURegisterBankInfo::applyMappingBFE(MachineIRBuilder &B,
1485
1485
Register DstReg = MI.getOperand (0 ).getReg ();
1486
1486
LLT Ty = MRI.getType (DstReg);
1487
1487
1488
+ const LLT S64 = LLT::scalar (64 );
1488
1489
const LLT S32 = LLT::scalar (32 );
1490
+ const LLT S16 = LLT::scalar (16 );
1489
1491
1490
1492
unsigned FirstOpnd = isa<GIntrinsic>(MI) ? 2 : 1 ;
1491
1493
Register SrcReg = MI.getOperand (FirstOpnd).getReg ();
@@ -1495,6 +1497,18 @@ bool AMDGPURegisterBankInfo::applyMappingBFE(MachineIRBuilder &B,
1495
1497
const RegisterBank *DstBank =
1496
1498
OpdMapper.getInstrMapping ().getOperandMapping (0 ).BreakDown [0 ].RegBank ;
1497
1499
if (DstBank == &AMDGPU::VGPRRegBank) {
1500
+ if (Ty == S16) {
1501
+ ApplyRegBankMapping ApplyBank (B, *this , MRI, &AMDGPU::VGPRRegBank);
1502
+ B.setInsertPt (B.getMBB (), MI);
1503
+ LegalizerHelper Helper (B.getMF (), ApplyBank, B);
1504
+
1505
+ Helper.widenScalarDst (MI, S32);
1506
+ Helper.widenScalarSrc (MI, S32, 1 , AMDGPU::G_ANYEXT);
1507
+ Helper.widenScalarSrc (MI, S32, 2 , AMDGPU::G_ZEXT);
1508
+ Helper.widenScalarSrc (MI, S32, 3 , AMDGPU::G_ZEXT);
1509
+ return true ;
1510
+ }
1511
+
1498
1512
if (Ty == S32)
1499
1513
return true ;
1500
1514
@@ -1554,6 +1568,11 @@ bool AMDGPURegisterBankInfo::applyMappingBFE(MachineIRBuilder &B,
1554
1568
1555
1569
ApplyRegBankMapping ApplyBank (B, *this , MRI, &AMDGPU::SGPRRegBank);
1556
1570
1571
+ if (Ty == S16) {
1572
+ OffsetReg = B.buildAnyExtOrTrunc (S32, OffsetReg).getReg (0 );
1573
+ WidthReg = B.buildAnyExtOrTrunc (S32, WidthReg).getReg (0 );
1574
+ }
1575
+
1557
1576
// Ensure the high bits are clear to insert the offset.
1558
1577
auto OffsetMask = B.buildConstant (S32, maskTrailingOnes<unsigned >(6 ));
1559
1578
auto ClampOffset = B.buildAnd (S32, OffsetReg, OffsetMask);
@@ -1568,13 +1587,21 @@ bool AMDGPURegisterBankInfo::applyMappingBFE(MachineIRBuilder &B,
1568
1587
1569
1588
// TODO: It might be worth using a pseudo here to avoid scc clobber and
1570
1589
// register class constraints.
1571
- unsigned Opc = Ty == S32 ? (Signed ? AMDGPU::S_BFE_I32 : AMDGPU::S_BFE_U32) :
1572
- (Signed ? AMDGPU::S_BFE_I64 : AMDGPU::S_BFE_U64);
1590
+ unsigned Opc = ( Ty != S64) ? (Signed ? AMDGPU::S_BFE_I32 : AMDGPU::S_BFE_U32)
1591
+ : (Signed ? AMDGPU::S_BFE_I64 : AMDGPU::S_BFE_U64);
1573
1592
1574
- auto MIB = B.buildInstr (Opc, {DstReg}, {SrcReg, MergedInputs});
1593
+ Register BFEDst = DstReg;
1594
+ if (Ty == S16) {
1595
+ BFEDst = MRI.createGenericVirtualRegister (S32);
1596
+ MRI.setRegBank (BFEDst, AMDGPU::SGPRRegBank);
1597
+ }
1598
+ auto MIB = B.buildInstr (Opc, {BFEDst}, {SrcReg, MergedInputs});
1575
1599
if (!constrainSelectedInstRegOperands (*MIB, *TII, *TRI, *this ))
1576
1600
llvm_unreachable (" failed to constrain BFE" );
1577
1601
1602
+ if (BFEDst != DstReg)
1603
+ B.buildZExtOrTrunc (DstReg, BFEDst);
1604
+
1578
1605
MI.eraseFromParent ();
1579
1606
return true ;
1580
1607
}
0 commit comments