Add support for single reductions in ComplexDeinterleavingPass #112875

NickGuy-Arm · 2024-10-18T10:06:28Z

The Complex Deinterleaving pass assumes that all values emitted will result in complex numbers, this patch aims to remove that assumption and adds support for emitting just the real or imaginary components, not both.

llvmbot · 2024-10-18T10:07:03Z

@llvm/pr-subscribers-backend-aarch64

Author: Nicholas Guy (NickGuy-Arm)

Changes

The Complex Deinterleaving pass assumes that all values emitted will result in complex numbers, this patch aims to remove that assumption and adds support for emitting just the real or imaginary components, not both.

Patch is 25.02 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/112875.diff

4 Files Affected:

(modified) llvm/include/llvm/CodeGen/ComplexDeinterleavingPass.h (+1)
(modified) llvm/lib/CodeGen/ComplexDeinterleavingPass.cpp (+107-14)
(modified) llvm/lib/Target/AArch64/AArch64ISelLowering.cpp (+10-9)
(added) llvm/test/CodeGen/AArch64/complex-deinterleaving-cdot.ll (+170)

diff --git a/llvm/include/llvm/CodeGen/ComplexDeinterleavingPass.h b/llvm/include/llvm/CodeGen/ComplexDeinterleavingPass.h
index 84a2673fecb5bf..a3fa2197727701 100644
--- a/llvm/include/llvm/CodeGen/ComplexDeinterleavingPass.h
+++ b/llvm/include/llvm/CodeGen/ComplexDeinterleavingPass.h
@@ -43,6 +43,7 @@ enum class ComplexDeinterleavingOperation {
   ReductionPHI,
   ReductionOperation,
   ReductionSelect,
+  ReductionSingle
 };
 
 enum class ComplexDeinterleavingRotation {
diff --git a/llvm/lib/CodeGen/ComplexDeinterleavingPass.cpp b/llvm/lib/CodeGen/ComplexDeinterleavingPass.cpp
index 8573b016d1e5bb..08287a4d5ed022 100644
--- a/llvm/lib/CodeGen/ComplexDeinterleavingPass.cpp
+++ b/llvm/lib/CodeGen/ComplexDeinterleavingPass.cpp
@@ -145,6 +145,7 @@ struct ComplexDeinterleavingCompositeNode {
   friend class ComplexDeinterleavingGraph;
   using NodePtr = std::shared_ptr<ComplexDeinterleavingCompositeNode>;
   using RawNodePtr = ComplexDeinterleavingCompositeNode *;
+  bool OperandsValid = true;
 
 public:
   ComplexDeinterleavingOperation Operation;
@@ -161,7 +162,11 @@ struct ComplexDeinterleavingCompositeNode {
   SmallVector<RawNodePtr> Operands;
   Value *ReplacementNode = nullptr;
 
-  void addOperand(NodePtr Node) { Operands.push_back(Node.get()); }
+  void addOperand(NodePtr Node) {
+    if (!Node || !Node.get())
+      OperandsValid = false;
+    Operands.push_back(Node.get());
+  }
 
   void dump() { dump(dbgs()); }
   void dump(raw_ostream &OS) {
@@ -195,6 +200,10 @@ struct ComplexDeinterleavingCompositeNode {
       PrintNodeRef(Op);
     }
   }
+
+  bool AreOperandsValid() {
+    return OperandsValid;
+  }
 };
 
 class ComplexDeinterleavingGraph {
@@ -294,7 +303,7 @@ class ComplexDeinterleavingGraph {
 
   NodePtr submitCompositeNode(NodePtr Node) {
     CompositeNodes.push_back(Node);
-    if (Node->Real && Node->Imag)
+    if (Node->Real)
       CachedResult[{Node->Real, Node->Imag}] = Node;
     return Node;
   }
@@ -328,8 +337,10 @@ class ComplexDeinterleavingGraph {
   ///      i: ai - br
   NodePtr identifyAdd(Instruction *Real, Instruction *Imag);
   NodePtr identifySymmetricOperation(Instruction *Real, Instruction *Imag);
+  NodePtr identifyPartialReduction(Value *R, Value *I);
 
   NodePtr identifyNode(Value *R, Value *I);
+  NodePtr identifyNode(Value *R, Value *I, bool &FromCache);
 
   /// Determine if a sum of complex numbers can be formed from \p RealAddends
   /// and \p ImagAddens. If \p Accumulator is not null, add the result to it.
@@ -397,6 +408,7 @@ class ComplexDeinterleavingGraph {
   /// * Deinterleave the final value outside of the loop and repurpose original
   /// reduction users
   void processReductionOperation(Value *OperationReplacement, RawNodePtr Node);
+  void processReductionSingle(Value *OperationReplacement, RawNodePtr Node);
 
 public:
   void dump() { dump(dbgs()); }
@@ -893,16 +905,26 @@ ComplexDeinterleavingGraph::identifySymmetricOperation(Instruction *Real,
 
 ComplexDeinterleavingGraph::NodePtr
 ComplexDeinterleavingGraph::identifyNode(Value *R, Value *I) {
-  LLVM_DEBUG(dbgs() << "identifyNode on " << *R << " / " << *I << "\n");
-  assert(R->getType() == I->getType() &&
-         "Real and imaginary parts should not have different types");
+  bool _;
+  return identifyNode(R, I, _);
+}
 
+ComplexDeinterleavingGraph::NodePtr
+ComplexDeinterleavingGraph::identifyNode(Value *R, Value *I, bool &FromCache) {
   auto It = CachedResult.find({R, I});
   if (It != CachedResult.end()) {
     LLVM_DEBUG(dbgs() << " - Folding to existing node\n");
+    FromCache = true;
     return It->second;
   }
 
+  if(NodePtr CN = identifyPartialReduction(R, I))
+    return CN;
+
+  bool IsReduction = RealPHI == R && (!ImagPHI || ImagPHI == I);
+  if(!IsReduction && R->getType() != I->getType())
+    return nullptr;
+
   if (NodePtr CN = identifySplat(R, I))
     return CN;
 
@@ -1428,12 +1450,18 @@ bool ComplexDeinterleavingGraph::identifyNodes(Instruction *RootI) {
   if (It != RootToNode.end()) {
     auto RootNode = It->second;
     assert(RootNode->Operation ==
-           ComplexDeinterleavingOperation::ReductionOperation);
+           ComplexDeinterleavingOperation::ReductionOperation || RootNode->Operation == ComplexDeinterleavingOperation::ReductionSingle);
     // Find out which part, Real or Imag, comes later, and only if we come to
     // the latest part, add it to OrderedRoots.
     auto *R = cast<Instruction>(RootNode->Real);
-    auto *I = cast<Instruction>(RootNode->Imag);
-    auto *ReplacementAnchor = R->comesBefore(I) ? I : R;
+    auto *I = RootNode->Imag ? cast<Instruction>(RootNode->Imag) : nullptr;
+
+    Instruction *ReplacementAnchor;
+    if(I) 
+      ReplacementAnchor = R->comesBefore(I) ? I : R;
+    else 
+      ReplacementAnchor = R;
+    
     if (ReplacementAnchor != RootI)
       return false;
     OrderedRoots.push_back(RootI);
@@ -1521,11 +1549,11 @@ void ComplexDeinterleavingGraph::identifyReductionNodes() {
   for (size_t i = 0; i < OperationInstruction.size(); ++i) {
     if (Processed[i])
       continue;
+    auto *Real = OperationInstruction[i];
     for (size_t j = i + 1; j < OperationInstruction.size(); ++j) {
       if (Processed[j])
         continue;
-
-      auto *Real = OperationInstruction[i];
+      
       auto *Imag = OperationInstruction[j];
       if (Real->getType() != Imag->getType())
         continue;
@@ -1557,6 +1585,25 @@ void ComplexDeinterleavingGraph::identifyReductionNodes() {
         break;
       }
     }
+
+    // We want to check that we have 2 operands, but the function attributes
+    // being counted as operands bloats this value.
+    if(Real->getNumOperands() < 2)
+      continue;
+
+    RealPHI = ReductionInfo[Real].first;
+    ImagPHI = nullptr;
+    PHIsFound = false;
+    auto Node = identifyNode(Real->getOperand(0), Real->getOperand(1));
+    if(Node && PHIsFound) {
+      LLVM_DEBUG(dbgs() << "Identified single reduction starting from instruction: "
+                          << *Real << "/" << *ReductionInfo[Real].second << "\n");
+      Processed[i] = true;
+      auto RootNode = prepareCompositeNode(ComplexDeinterleavingOperation::ReductionSingle, Real, nullptr);
+      RootNode->addOperand(Node);
+      RootToNode[Real] = RootNode;
+      submitCompositeNode(RootNode);
+    }
   }
 
   RealPHI = nullptr;
@@ -1564,6 +1611,12 @@ void ComplexDeinterleavingGraph::identifyReductionNodes() {
 }
 
 bool ComplexDeinterleavingGraph::checkNodes() {
+
+  for (NodePtr N : CompositeNodes) {
+    if (!N->AreOperandsValid())
+      return false;
+  }
+
   // Collect all instructions from roots to leaves
   SmallPtrSet<Instruction *, 16> AllInstructions;
   SmallVector<Instruction *, 8> Worklist;
@@ -1832,7 +1885,7 @@ ComplexDeinterleavingGraph::identifySplat(Value *R, Value *I) {
 ComplexDeinterleavingGraph::NodePtr
 ComplexDeinterleavingGraph::identifyPHINode(Instruction *Real,
                                             Instruction *Imag) {
-  if (Real != RealPHI || Imag != ImagPHI)
+  if (Real != RealPHI || (ImagPHI && Imag != ImagPHI))
     return nullptr;
 
   PHIsFound = true;
@@ -1970,13 +2023,18 @@ Value *ComplexDeinterleavingGraph::replaceNode(IRBuilderBase &Builder,
   case ComplexDeinterleavingOperation::ReductionPHI: {
     // If Operation is ReductionPHI, a new empty PHINode is created.
     // It is filled later when the ReductionOperation is processed.
+    auto *OldPHI = cast<PHINode>(Node->Real);
     auto *VTy = cast<VectorType>(Node->Real->getType());
     auto *NewVTy = VectorType::getDoubleElementsVectorType(VTy);
     auto *NewPHI = PHINode::Create(NewVTy, 0, "", BackEdge->getFirstNonPHIIt());
-    OldToNewPHI[dyn_cast<PHINode>(Node->Real)] = NewPHI;
+    OldToNewPHI[OldPHI] = NewPHI;
     ReplacementNode = NewPHI;
     break;
   }
+  case ComplexDeinterleavingOperation::ReductionSingle:
+    ReplacementNode = replaceNode(Builder, Node->Operands[0]);
+    processReductionSingle(ReplacementNode, Node);
+    break;
   case ComplexDeinterleavingOperation::ReductionOperation:
     ReplacementNode = replaceNode(Builder, Node->Operands[0]);
     processReductionOperation(ReplacementNode, Node);
@@ -2001,6 +2059,37 @@ Value *ComplexDeinterleavingGraph::replaceNode(IRBuilderBase &Builder,
   return ReplacementNode;
 }
 
+void ComplexDeinterleavingGraph::processReductionSingle(Value *OperationReplacement, RawNodePtr Node) {
+  auto *Real = cast<Instruction>(Node->Real);
+  auto *OldPHI = ReductionInfo[Real].first;
+  auto *NewPHI = OldToNewPHI[OldPHI];
+  auto *VTy = cast<VectorType>(Real->getType());
+  auto *NewVTy = VectorType::getDoubleElementsVectorType(VTy);
+
+  Value *Init = OldPHI->getIncomingValueForBlock(Incoming);
+
+  IRBuilder<> Builder(Incoming->getTerminator());
+
+  Value *NewInit = nullptr;
+  if(auto *C = dyn_cast<Constant>(Init)) {
+    if(C->isZeroValue())
+      NewInit = Constant::getNullValue(NewVTy);
+  }
+
+  if (!NewInit)
+    NewInit = Builder.CreateIntrinsic(Intrinsic::vector_interleave2, NewVTy,
+                                          {Init, Constant::getNullValue(VTy)});
+
+  NewPHI->addIncoming(NewInit, Incoming);
+  NewPHI->addIncoming(OperationReplacement, BackEdge);
+
+  auto *FinalReduction = ReductionInfo[Real].second;
+  Builder.SetInsertPoint(&*FinalReduction->getParent()->getFirstInsertionPt());
+  // TODO Ensure that the `AddReduce` here matches the original, found in `FinalReduction`
+  auto *AddReduce = Builder.CreateAddReduce(OperationReplacement);
+  FinalReduction->replaceAllUsesWith(AddReduce);
+}
+
 void ComplexDeinterleavingGraph::processReductionOperation(
     Value *OperationReplacement, RawNodePtr Node) {
   auto *Real = cast<Instruction>(Node->Real);
@@ -2060,8 +2149,12 @@ void ComplexDeinterleavingGraph::replaceNodes() {
       auto *RootImag = cast<Instruction>(RootNode->Imag);
       ReductionInfo[RootReal].first->removeIncomingValue(BackEdge);
       ReductionInfo[RootImag].first->removeIncomingValue(BackEdge);
-      DeadInstrRoots.push_back(cast<Instruction>(RootReal));
-      DeadInstrRoots.push_back(cast<Instruction>(RootImag));
+      DeadInstrRoots.push_back(RootReal);
+      DeadInstrRoots.push_back(RootImag);
+    } else if(RootNode->Operation == ComplexDeinterleavingOperation::ReductionSingle) {
+      auto *RootInst = cast<Instruction>(RootNode->Real);
+      ReductionInfo[RootInst].first->removeIncomingValue(BackEdge);
+      DeadInstrRoots.push_back(ReductionInfo[RootInst].second);
     } else {
       assert(R && "Unable to find replacement for RootInstruction");
       DeadInstrRoots.push_back(RootInstruction);
diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
index 5e5afdb7fa0a6c..8068bb67408814 100644
--- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
+++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
@@ -29171,6 +29171,8 @@ Value *AArch64TargetLowering::createComplexDeinterleavingIR(
     ComplexDeinterleavingRotation Rotation, Value *InputA, Value *InputB,
     Value *Accumulator) const {
   VectorType *Ty = cast<VectorType>(InputA->getType());
+  if (Accumulator == nullptr)
+    Accumulator = Constant::getNullValue(Ty);
   bool IsScalable = Ty->isScalableTy();
   bool IsInt = Ty->getElementType()->isIntegerTy();
 
@@ -29182,6 +29184,7 @@ Value *AArch64TargetLowering::createComplexDeinterleavingIR(
 
   if (TyWidth > 128) {
     int Stride = Ty->getElementCount().getKnownMinValue() / 2;
+    int AccStride = cast<VectorType>(Accumulator->getType())->getElementCount().getKnownMinValue() / 2;
     auto *HalfTy = VectorType::getHalfElementsVectorType(Ty);
     auto *LowerSplitA = B.CreateExtractVector(HalfTy, InputA, B.getInt64(0));
     auto *LowerSplitB = B.CreateExtractVector(HalfTy, InputB, B.getInt64(0));
@@ -29191,25 +29194,23 @@ Value *AArch64TargetLowering::createComplexDeinterleavingIR(
         B.CreateExtractVector(HalfTy, InputB, B.getInt64(Stride));
     Value *LowerSplitAcc = nullptr;
     Value *UpperSplitAcc = nullptr;
-    if (Accumulator) {
-      LowerSplitAcc = B.CreateExtractVector(HalfTy, Accumulator, B.getInt64(0));
+    Type *FullTy = Ty;
+      FullTy = Accumulator->getType();
+      auto *HalfAccTy = VectorType::getHalfElementsVectorType(cast<VectorType>(Accumulator->getType()));
+      LowerSplitAcc = B.CreateExtractVector(HalfAccTy, Accumulator, B.getInt64(0));
       UpperSplitAcc =
-          B.CreateExtractVector(HalfTy, Accumulator, B.getInt64(Stride));
-    }
+          B.CreateExtractVector(HalfAccTy, Accumulator, B.getInt64(AccStride));
     auto *LowerSplitInt = createComplexDeinterleavingIR(
         B, OperationType, Rotation, LowerSplitA, LowerSplitB, LowerSplitAcc);
     auto *UpperSplitInt = createComplexDeinterleavingIR(
         B, OperationType, Rotation, UpperSplitA, UpperSplitB, UpperSplitAcc);
 
-    auto *Result = B.CreateInsertVector(Ty, PoisonValue::get(Ty), LowerSplitInt,
+    auto *Result = B.CreateInsertVector(FullTy, PoisonValue::get(FullTy), LowerSplitInt,
                                         B.getInt64(0));
-    return B.CreateInsertVector(Ty, Result, UpperSplitInt, B.getInt64(Stride));
+    return B.CreateInsertVector(FullTy, Result, UpperSplitInt, B.getInt64(AccStride));
   }
 
   if (OperationType == ComplexDeinterleavingOperation::CMulPartial) {
-    if (Accumulator == nullptr)
-      Accumulator = Constant::getNullValue(Ty);
-
     if (IsScalable) {
       if (IsInt)
         return B.CreateIntrinsic(
diff --git a/llvm/test/CodeGen/AArch64/complex-deinterleaving-cdot.ll b/llvm/test/CodeGen/AArch64/complex-deinterleaving-cdot.ll
new file mode 100644
index 00000000000000..6277f9a3842bbe
--- /dev/null
+++ b/llvm/test/CodeGen/AArch64/complex-deinterleaving-cdot.ll
@@ -0,0 +1,170 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5
+; RUN: opt -S --passes=complex-deinterleaving %s --mattr=+sve2 -o - | FileCheck %s
+
+target datalayout = "e-m:e-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128"
+target triple = "aarch64-none-unknown-elf"
+
+define i32 @cdotp(ptr nocapture noundef readonly %a, ptr nocapture noundef readonly %b, i32 noundef %N) {
+; CHECK-LABEL: define i32 @cdotp(
+; CHECK-SAME: ptr nocapture noundef readonly [[A:%.*]], ptr nocapture noundef readonly [[B:%.*]], i32 noundef [[N:%.*]]) #[[ATTR0:[0-9]+]] {
+; CHECK-NEXT:  [[ENTRY:.*]]:
+; CHECK-NEXT:    [[CMP28_NOT:%.*]] = icmp ult i32 [[N]], 2
+; CHECK-NEXT:    br i1 [[CMP28_NOT]], label %[[FOR_COND_CLEANUP:.*]], label %[[FOR_BODY_PREHEADER:.*]]
+; CHECK:       [[FOR_BODY_PREHEADER]]:
+; CHECK-NEXT:    [[DIV27:%.*]] = lshr i32 [[N]], 1
+; CHECK-NEXT:    [[WIDE_TRIP_COUNT:%.*]] = zext nneg i32 [[DIV27]] to i64
+; CHECK-NEXT:    [[TMP0:%.*]] = call i64 @llvm.vscale.i64()
+; CHECK-NEXT:    [[TMP1:%.*]] = mul i64 [[TMP0]], 16
+; CHECK-NEXT:    [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[WIDE_TRIP_COUNT]], [[TMP1]]
+; CHECK-NEXT:    br i1 [[MIN_ITERS_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]]
+; CHECK:       [[VECTOR_PH]]:
+; CHECK-NEXT:    [[TMP2:%.*]] = call i64 @llvm.vscale.i64()
+; CHECK-NEXT:    [[TMP3:%.*]] = mul i64 [[TMP2]], 16
+; CHECK-NEXT:    [[N_MOD_VF:%.*]] = urem i64 [[WIDE_TRIP_COUNT]], [[TMP3]]
+; CHECK-NEXT:    [[N_VEC:%.*]] = sub i64 [[WIDE_TRIP_COUNT]], [[N_MOD_VF]]
+; CHECK-NEXT:    [[TMP4:%.*]] = call i64 @llvm.vscale.i64()
+; CHECK-NEXT:    [[TMP5:%.*]] = mul i64 [[TMP4]], 16
+; CHECK-NEXT:    br label %[[VECTOR_BODY:.*]]
+; CHECK:       [[VECTOR_BODY]]:
+; CHECK-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
+; CHECK-NEXT:    [[TMP11:%.*]] = phi <vscale x 8 x i32> [ zeroinitializer, %[[VECTOR_PH]] ], [ [[TMP20:%.*]], %[[VECTOR_BODY]] ]
+; CHECK-NEXT:    [[INDEX_I:%.*]] = shl nuw nsw i64 [[INDEX]], 1
+; CHECK-NEXT:    [[A_PTR:%.*]] = getelementptr inbounds i8, ptr [[A]], i64 [[INDEX_I]]
+; CHECK-NEXT:    [[A_LOAD:%.*]] = load <vscale x 32 x i8>, ptr [[A_PTR]], align 32
+; CHECK-NEXT:    [[B_PTR:%.*]] = getelementptr inbounds i8, ptr [[B]], i64 [[INDEX_I]]
+; CHECK-NEXT:    [[B_LOAD:%.*]] = load <vscale x 32 x i8>, ptr [[B_PTR]], align 32
+; CHECK-NEXT:    [[TMP6:%.*]] = call <vscale x 16 x i8> @llvm.vector.extract.nxv16i8.nxv32i8(<vscale x 32 x i8> [[A_LOAD]], i64 0)
+; CHECK-NEXT:    [[TMP7:%.*]] = call <vscale x 16 x i8> @llvm.vector.extract.nxv16i8.nxv32i8(<vscale x 32 x i8> [[B_LOAD]], i64 0)
+; CHECK-NEXT:    [[TMP8:%.*]] = call <vscale x 16 x i8> @llvm.vector.extract.nxv16i8.nxv32i8(<vscale x 32 x i8> [[A_LOAD]], i64 16)
+; CHECK-NEXT:    [[TMP9:%.*]] = call <vscale x 16 x i8> @llvm.vector.extract.nxv16i8.nxv32i8(<vscale x 32 x i8> [[B_LOAD]], i64 16)
+; CHECK-NEXT:    [[VEC_PHI:%.*]] = call <vscale x 4 x i32> @llvm.vector.extract.nxv4i32.nxv8i32(<vscale x 8 x i32> [[TMP11]], i64 0)
+; CHECK-NEXT:    [[TMP13:%.*]] = call <vscale x 4 x i32> @llvm.vector.extract.nxv4i32.nxv8i32(<vscale x 8 x i32> [[TMP11]], i64 4)
+; CHECK-NEXT:    [[TMP10:%.*]] = call <vscale x 4 x i32> @llvm.aarch64.sve.cdot.nxv4i32(<vscale x 4 x i32> [[VEC_PHI]], <vscale x 16 x i8> [[TMP6]], <vscale x 16 x i8> [[TMP7]], i32 0)
+; CHECK-NEXT:    [[TMP21:%.*]] = call <vscale x 4 x i32> @llvm.aarch64.sve.cdot.nxv4i32(<vscale x 4 x i32> [[TMP13]], <vscale x 16 x i8> [[TMP8]], <vscale x 16 x i8> [[TMP9]], i32 0)
+; CHECK-NEXT:    [[TMP22:%.*]] = call <vscale x 8 x i32> @llvm.vector.insert.nxv8i32.nxv4i32(<vscale x 8 x i32> poison, <vscale x 4 x i32> [[TMP10]], i64 0)
+; CHECK-NEXT:    [[TMP20]] = call <vscale x 8 x i32> @llvm.vector.insert.nxv8i32.nxv4i32(<vscale x 8 x i32> [[TMP22]], <vscale x 4 x i32> [[TMP21]], i64 4)
+; CHECK-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP5]]
+; CHECK-NEXT:    [[TMP12:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
+; CHECK-NEXT:    br i1 [[TMP12]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]]
+; CHECK:       [[MIDDLE_BLOCK]]:
+; CHECK-NEXT:    [[TMP23:%.*]] = call i32 @llvm.vector.reduce.add.nxv8i32(<vscale x 8 x i32> [[TMP20]])
+; CHECK-NEXT:    [[CMP_N:%.*]] = icmp eq i64 [[WIDE_TRIP_COUNT]], [[N_VEC]]
+; CHECK-NEXT:    br i1 [[CMP_N]], label %[[FOR_COND_CLEANUP_LOOPEXIT:.*]], label %[[SCALAR_PH]]
+; CHECK:       [[SCALAR_PH]]:
+; CHECK-NEXT:    [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], %[[MIDDLE_BLOCK]] ], [ 0, %[[FOR_BODY_PREHEADER]] ]
+; CHECK-NEXT:    [[BC_MERGE_RDX:%.*]] = phi i32 [ [[TMP23]], %[[MIDDLE_BLOCK]] ], [ 0, %[[FOR_BODY_PREHEADER]] ]
+; CHECK-NEXT:    br label %[[FOR_BODY:.*]]
+; CHECK:       [[FOR_COND_CLEANUP_LOOPEXIT]]:
+; CHECK-NEXT:    [[SUB_LCSSA:%.*]] = phi i32 [ [[SUB:%.*]], %[[FOR_BODY]] ], [ [[TMP23]], %[[MIDDLE_BLOCK]] ]
+; CHECK-NEXT:    br label %[[FOR_COND_CLEANUP]]
+; CHECK:       [[FOR_COND_CLEANUP]]:
+; CHECK-NEXT:    [[RES_0_LCSSA:%.*]] = phi i32 [ 0, %[[ENTRY]] ], [ [[SUB_LCSSA]], %[[FOR_COND_CLEANUP_LOOPEXIT]] ]
+; CHECK-NEXT:    ret i32 [[RES_0_LCSSA]]
+; CHECK:       [[FOR_BODY]]:
+; CHECK-NEXT:    [[INDVARS_IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[INDVARS_IV_NEXT:%.*]], %[[FOR_BODY]] ]
+; CHECK-NEXT:    [[RES_030:%.*]] = phi i32 [ [[BC_MERGE_RDX]], %[[SCALAR_PH]] ], [ [[SUB]], %[[FOR_BODY]] ]
+; CHECK-NEXT:    [[TMP14:%.*]] = shl nuw nsw i64 [[INDVARS_IV]], 1
+; CHECK-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds i8, ptr [[A]], i64 [[TMP14]]
+; CHECK-NEXT:    [[TMP15:%.*]] = load i8, ptr [[ARRAYIDX]], align 1
+; CHECK-NEXT:    [[CONV:%.*]] = sext i8 [[TMP15]] to i32
+; CHECK-NEXT:    [[TMP16:%.*]] = or disjoint i64 [[TMP14]], 1
+; CHECK-NEXT:    [[ARRAYIDX4:%.*]] = getelementptr inbounds i8, ptr [[A]], i64 [[TMP16]]
+; CHECK-NEXT:    [[TMP17:%.*]] = load i8, ptr [[ARRAYIDX4]], align 1
+; CHECK-NEXT:    [[CONV5:%.*]] = sext i8 [[TMP17]] to i32
+; CHECK-NEXT:    [[ARRAYIDX9:%.*]] = getelementptr inbounds i8, ptr [[B]], i64 [[TMP14]]
+; CHECK-NEXT:    [[TMP18:%.*]] = load i8, ptr [[ARRAYIDX9]], align 1
+; CHECK-NEXT:    [[CONV10:%.*]] = sext i8 [[TMP18]] to i32
+; CHECK-NEXT:    [[ARRAYIDX14:%.*]] = getelementptr inbounds i8, ptr [[B]], i64 [[TMP16]]
+; CHECK-NEXT:    [[TMP19:%.*]] = load i8, ptr [[ARRAYIDX14]], align 1
+; CHECK-NEXT:    [[CONV15:%.*]] = sext i8 [[TMP19]] to i32
+; CHECK-NEXT:    [[MUL16:%.*]] = mul nsw i32 [[CONV10]], [[CONV]]
+; CHECK-NEXT:    [[ADD17:%.*]] = add nsw i32 [[MUL16]], [[RES_030]]
+; CHECK-NEXT:    [[MUL18:%.*]] = mul nsw i32 [[CONV15]], [[CONV5]]
+; CHECK-NEXT:    [[SUB]] = sub i32 [[ADD17]], [[MUL18]]
+; CHECK-NEXT:    [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1
+...
[truncated]

github-actions · 2024-10-18T10:10:11Z

✅ With the latest revision this PR passed the C/C++ code formatter.

Also removed prematurely-added test

SamTebbs33 · 2024-10-29T15:44:29Z

llvm/lib/CodeGen/ComplexDeinterleavingPass.cpp

-  LLVM_DEBUG(dbgs() << "identifyNode on " << *R << " / " << *I << "\n");
-  assert(R->getType() == I->getType() &&
-         "Real and imaginary parts should not have different types");
+  bool _;


What's this here for?

For one of the sanity-checks elsewhere (https://github.com/llvm/llvm-project/pull/112875/files/b2410688146531936db5f58ed2f0ebf78bf8387a#diff-ebbbd6cbc055d2185b50e106f58ee13188b47cd5fa49f21fb66a9ea82d54b086R993), we check that we're pulling the already-matched node from the cache after unwrapping any casts.
The bool here is to act as an optional parameter to identifyNode, working around the lvalue/rvalue mismatch of having it inlined.

It's either this approach, or we update every call to identifyNode to have an extra bool variable assigned nearby. (Or we remove the sanity check altogether)

Edit: That said, I could just use a pointer and use conventional default arguments..

llvm/lib/CodeGen/ComplexDeinterleavingPass.cpp

llvm/test/CodeGen/AArch64/complex-deinterleaving-cdot.ll

Remove debug comments

SamTebbs33 · 2024-11-07T10:14:07Z

llvm/test/CodeGen/AArch64/complex-deinterleaving-cdot.ll

@@ -0,0 +1,58 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5
+; RUN: opt -S --passes=complex-deinterleaving %s --mattr=+sve2 -o - | FileCheck %s


We need some more tests, such as:

No sve, no sve2

More rotations

I don't really understand the caching system, but some tests that test that functionality are needed

More types

And negative tests for other code paths.

NickGuy-Arm · 2024-11-21T12:10:24Z

Ping

llvm/test/CodeGen/AArch64/complex-deinterleaving-cdot.ll

SamTebbs33 · 2024-11-25T10:13:47Z

llvm/test/CodeGen/AArch64/complex-deinterleaving-cdot.ll

@@ -1,41 +1,100 @@
 ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5
-; RUN: opt -S --passes=complex-deinterleaving %s --mattr=+sve2 -o - | FileCheck %s
+; RUN: opt -S --passes=complex-deinterleaving %s --mattr=+sve2 -o - | FileCheck %s --check-prefix=CHECK-SVE2


Looks like we're still missing some negative tests and can the caching system be tested at all?

I've added a negative test, but the caching is an implementation detail and is not exposed in any testable way, it's also not new in this PR

SamTebbs33 · 2024-11-26T16:02:13Z

llvm/lib/CodeGen/ComplexDeinterleavingPass.cpp

+  if (!I->hasOneUser())
+    return nullptr;


Why can there only be one user? We should have a negative test for it so someone removing the check in the future doesn't think all is well!

The hasOneUser check was to simplify things later, but it's not strictly necessary so I've removed it and fixed the succeeding code to not assume one user.

SamTebbs33 · 2024-11-26T16:03:22Z

llvm/lib/CodeGen/ComplexDeinterleavingPass.cpp

+}
+
+ComplexDeinterleavingGraph::NodePtr
+ComplexDeinterleavingGraph::identifyNode(Value *R, Value *I, bool *FromCache) {


Can FromCache be a reference instead?

I've just removed this parameter altogether, it's only used in one place, and the value of the FromCache parameter was dubious at best

SamTebbs33 · 2024-11-26T16:09:47Z

llvm/lib/CodeGen/ComplexDeinterleavingPass.cpp

+  VectorType *RealTy = dyn_cast<VectorType>(R->getType());
+  if (!RealTy)
+    return nullptr;
+  VectorType *ImagTy = dyn_cast<VectorType>(I->getType());
+  if (!ImagTy)
+    return nullptr;
+
+  if (RealTy->isScalableTy() != ImagTy->isScalableTy())
+    return nullptr;
+  if (RealTy->getElementType() != ImagTy->getElementType())
+    return nullptr;


We should have some tests for these.

Added some tests

SamTebbs33

LGTM with one request.

SamTebbs33 · 2024-12-16T17:13:21Z

llvm/test/CodeGen/AArch64/complex-deinterleaving-cdot.ll

+  ret i16 %0
+}
+
+define i32 @cdotp_i8_rot0_fixed_length(<32 x i8> %a, <32 x i8> %b) {


This name makes it seem like a positive test so I'd suggest it be changed to not_cdotp_fixed_length.

fhahn · 2024-12-18T14:09:18Z

It looks like this may be causing some buildbot failures: https://lab.llvm.org/buildbot/#/builders/41/builds/4171

Could you please take a look and revert if it isn't a trivial fix

FAILED: lib/Demangle/CMakeFiles/LLVMDemangle.dir/MicrosoftDemangle.cpp.o 
/home/tcwg-buildbot/worker/clang-aarch64-sve-vla-2stage/stage1.install/bin/clang++ -DGTEST_HAS_RTTI=0 -D_DEBUG -D_GLIBCXX_ASSERTIONS -D_GNU_SOURCE -D__STDC_CONSTANT_MACROS -D__STDC_FORMAT_MACROS -D__STDC_LIMIT_MACROS -I/home/tcwg-buildbot/worker/clang-aarch64-sve-vla-2stage/stage2/lib/Demangle -I/home/tcwg-buildbot/worker/clang-aarch64-sve-vla-2stage/llvm/llvm/lib/Demangle -I/home/tcwg-buildbot/worker/clang-aarch64-sve-vla-2stage/stage2/include -I/home/tcwg-buildbot/worker/clang-aarch64-sve-vla-2stage/llvm/llvm/include -mcpu=neoverse-512tvb -mllvm -scalable-vectorization=preferred -mllvm -treat-scalable-fixed-error-as-warning=false -fPIC -fno-semantic-interposition -fvisibility-inlines-hidden -Werror=date-time -Werror=unguarded-availability-new -Wall -Wextra -Wno-unused-parameter -Wwrite-strings -Wcast-qual -Wmissing-field-initializers -pedantic -Wno-long-long -Wc++98-compat-extra-semi -Wimplicit-fallthrough -Wcovered-switch-default -Wno-noexcept-type -Wnon-virtual-dtor -Wdelete-non-virtual-dtor -Wsuggest-override -Wno-comment -Wstring-conversion -Wmisleading-indentation -Wctad-maybe-unsupported -fdiagnostics-color -ffunction-sections -fdata-sections -O3 -DNDEBUG -std=c++17  -fno-exceptions -funwind-tables -fno-rtti -UNDEBUG -MD -MT lib/Demangle/CMakeFiles/LLVMDemangle.dir/MicrosoftDemangle.cpp.o -MF lib/Demangle/CMakeFiles/LLVMDemangle.dir/MicrosoftDemangle.cpp.o.d -o lib/Demangle/CMakeFiles/LLVMDemangle.dir/MicrosoftDemangle.cpp.o -c /home/tcwg-buildbot/worker/clang-aarch64-sve-vla-2stage/llvm/llvm/lib/Demangle/MicrosoftDemangle.cpp
clang++: ../llvm/llvm/lib/IR/Value.cpp:508: void llvm::Value::doRAUW(Value *, ReplaceMetadataUses): Assertion `New->getType() == getType() && "replaceAllUses of value with new value of different type!"' failed.
PLEASE submit a bug report to https://github.com/llvm/llvm-project/issues/ and include the crash backtrace, preprocessed source, and associated run script.
Stack dump:
0.	Program arguments: /home/tcwg-buildbot/worker/clang-aarch64-sve-vla-2stage/stage1.install/bin/clang++ -DGTEST_HAS_RTTI=0 -D_DEBUG -D_GLIBCXX_ASSERTIONS -D_GNU_SOURCE -D__STDC_CONSTANT_MACROS -D__STDC_FORMAT_MACROS -D__STDC_LIMIT_MACROS -I/home/tcwg-buildbot/worker/clang-aarch64-sve-vla-2stage/stage2/lib/Demangle -I/home/tcwg-buildbot/worker/clang-aarch64-sve-vla-2stage/llvm/llvm/lib/Demangle -I/home/tcwg-buildbot/worker/clang-aarch64-sve-vla-2stage/stage2/include -I/home/tcwg-buildbot/worker/clang-aarch64-sve-vla-2stage/llvm/llvm/include -mcpu=neoverse-512tvb -mllvm -scalable-vectorization=preferred -mllvm -treat-scalable-fixed-error-as-warning=false -fPIC -fno-semantic-interposition -fvisibility-inlines-hidden -Werror=date-time -Werror=unguarded-availability-new -Wall -Wextra -Wno-unused-parameter -Wwrite-strings -Wcast-qual -Wmissing-field-initializers -pedantic -Wno-long-long -Wc++98-compat-extra-semi -Wimplicit-fallthrough -Wcovered-switch-default -Wno-noexcept-type -Wnon-virtual-dtor -Wdelete-non-virtual-dtor -Wsuggest-override -Wno-comment -Wstring-conversion -Wmisleading-indentation -Wctad-maybe-unsupported -fdiagnostics-color -ffunction-sections -fdata-sections -O3 -DNDEBUG -std=c++17 -fno-exceptions -funwind-tables -fno-rtti -UNDEBUG -MD -MT lib/Demangle/CMakeFiles/LLVMDemangle.dir/MicrosoftDemangle.cpp.o -MF lib/Demangle/CMakeFiles/LLVMDemangle.dir/MicrosoftDemangle.cpp.o.d -o lib/Demangle/CMakeFiles/LLVMDemangle.dir/MicrosoftDemangle.cpp.o -c /home/tcwg-buildbot/worker/clang-aarch64-sve-vla-2stage/llvm/llvm/lib/Demangle/MicrosoftDemangle.cpp
1.	<eof> parser at end of file
2.	Code generation
3.	Running pass 'Function Pass Manager' on module '/home/tcwg-buildbot/worker/clang-aarch64-sve-vla-2stage/llvm/llvm/lib/Demangle/MicrosoftDemangle.cpp'.
4.	Running pass 'Complex Deinterleaving Pass' on function '@_ZL17guessCharByteSizePKhjm'
 #0 0x0000aaaaba931f20 llvm::sys::PrintStackTrace(llvm::raw_ostream&, int) (/home/tcwg-buildbot/worker/clang-aarch64-sve-vla-2stage/stage1.install/bin/clang+++0x8141f20)
 #1 0x0000aaaaba92fe6c llvm::sys::RunSignalHandlers() (/home/tcwg-buildbot/worker/clang-aarch64-sve-vla-2stage/stage1.install/bin/clang+++0x813fe6c)
 #2 0x0000aaaaba896e80 CrashRecoverySignalHandler(int) CrashRecoveryContext.cpp:0:0
 #3 0x0000ffff9e32c9d0 (linux-vdso.so.1+0x9d0)
 #4 0x0000ffff9de8f200 __pthread_kill_implementation ./nptl/pthread_kill.c:44:76
 #5 0x0000ffff9de4a67c gsignal ./signal/../sysdeps/posix/raise.c:27:6
 #6 0x0000ffff9de37130 abort ./stdlib/abort.c:81:7
 #7 0x0000ffff9de43fd0 __assert_fail_base ./assert/assert.c:89:7
 #8 0x0000ffff9de44040 __assert_perror_fail ./assert/assert-perr.c:31:1
 #9 0x0000aaaaba388dc8 llvm::Value::doRAUW(llvm::Value*, llvm::Value::ReplaceMetadataUses) (/home/tcwg-buildbot/worker/clang-aarch64-sve-vla-2stage/stage1.install/bin/clang+++0x7b98dc8)
#10 0x0000aaaab9c994b4 (anonymous namespace)::ComplexDeinterleavingGraph::replaceNode(llvm::IRBuilderBase&, (anonymous namespace)::ComplexDeinterleavingCompositeNode*) ComplexDeinterleavingPass.cpp:0:0
#11 0x0000aaaab9c8cefc (anonymous namespace)::ComplexDeinterleaving::evaluateBasicBlock(llvm::BasicBlock*) ComplexDeinterleavingPass.cpp:0:0
#12 0x0000aaaab9c89910 (anonymous namespace)::ComplexDeinterleaving::runOnFunction(llvm::Function&) ComplexDeinterleavingPass.cpp:0:0
#13 0x0000aaaab9c89ddc (anonymous namespace)::ComplexDeinterleavingLegacyPass::runOnFunction(llvm::Function&) ComplexDeinterleavingPass.cpp:0:0
#14 0x0000aaaaba31bfd0 llvm::FPPassManager::runOnFunction(llvm::Function&) (/home/tcwg-buildbot/worker/clang-aarch64-sve-vla-2stage/stage1.install/bin/clang+++0x7b2bfd0)
#15 0x0000aaaaba323e7c llvm::FPPassManager::runOnModule(llvm::Module&) (/home/tcwg-buildbot/worker/clang-aarch64-sve-vla-2stage/stage1.install/bin/clang+++0x7b33e7c)
#16 0x0000aaaaba31c950 llvm::legacy::PassManagerImpl::run(llvm::Module&) (/home/tcwg-buildbot/worker/clang-aarch64-sve-vla-2stage/stage1.install/bin/clang+++0x7b2c950)
#17 0x0000aaaabb10eb3c clang::EmitBackendOutput(clang::DiagnosticsEngine&, clang::HeaderSearchOptions const&, clang::CodeGenOptions const&, clang::TargetOptions const&, clang::LangOptions const&, llvm::StringRef, llvm::Module*, clang::BackendAction, llvm::IntrusiveRefCntPtr<llvm::vfs::FileSystem>, std::unique_ptr<llvm::raw_pwrite_stream, std::default_delete<llvm::raw_pwrite_stream>>, clang::BackendConsumer*) (/home/tcwg-buildbot/worker/clang-aarch64-sve-vla-2stage/stage1.install/bin/clang+++0x891eb3c)
#18 0x0000aaaabb13463c clang::BackendConsumer::HandleTranslationUnit(clang::ASTContext&) (/home/tcwg-buildbot/worker/clang-aarch64-sve-vla-2stage/stage1.install/bin/clang+++0x894463c)
#19 0x0000aaaabccf960c clang::ParseAST(clang::Sema&, bool, bool) (/home/tcwg-buildbot/worker/clang-aarch64-sve-vla-2stage/stage1.install/bin/clang+++0xa50960c)
#20 0x0000aaaabb52f25c clang::FrontendAction::Execute() (/home/tcwg-buildbot/worker/clang-aarch64-sve-vla-2stage/stage1.install/bin/clang+++0x8d3f25c)
#21 0x0000aaaabb4b64b4 clang::CompilerInstance::ExecuteAction(clang::FrontendAction&) (/home/tcwg-buildbot/worker/clang-aarch64-sve-vla-2stage/stage1.install/bin/clang+++0x8cc64b4)
#22 0x0000aaaabb61c204 clang::ExecuteCompilerInvocation(clang::CompilerInstance*) (/home/tcwg-buildbot/worker/clang-aarch64-sve-vla-2stage/stage1.install/bin/clang+++0x8e2c204)
#23 0x0000aaaab8207aac cc1_main(llvm::ArrayRef<char const*>, char const*, void*) (/home/tcwg-buildbot/worker/clang-aarch64-sve-vla-2stage/stage1.install/bin/clang+++0x5a17aac)
#24 0x0000aaaab82045f4 ExecuteCC1Tool(llvm::SmallVectorImpl<char const*>&, llvm::ToolContext const&) driver.cpp:0:0
#25 0x0000aaaabb361460 void llvm::function_ref<void ()>::callback_fn<clang::driver::CC1Command::Execute(llvm::ArrayRef<std::optional<llvm::StringRef>>, std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char>>*, bool*) const::$_0>(long) Job.cpp:0:0
#26 0x0000aaaaba896b84 llvm::CrashRecoveryContext::RunSafely(llvm::function_ref<void ()>) (/home/tcwg-buildbot/worker/clang-aarch64-sve-vla-2stage/stage1.install/bin/clang+++0x80a6b84)
#27 0x0000aaaabb3608b4 clang::driver::CC1Command::Execute(llvm::ArrayRef<std::optional<llvm::StringRef>>, std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char>>*, bool*) const (/home/tcwg-buildbot/worker/clang-aarch64-sve-vla-2stage/stage1.install/bin/clang+++0x8b708b4)
#28 0x0000aaaabb326af0 clang::driver::Compilation::ExecuteCommand(clang::driver::Command const&, clang::driver::Command const*&, bool) const (/home/tcwg-buildbot/worker/clang-aarch64-sve-vla-2stage/stage1.install/bin/clang+++0x8b36af0)
#29 0x0000aaaabb326ce4 clang::driver::Compilation::ExecuteJobs(clang::driver::JobList const&, llvm::SmallVectorImpl<std::pair<int, clang::driver::Command const*>>&, bool) const (/home/tcwg-buildbot/worker/clang-aarch64-sve-vla-2stage/stage1.install/bin/clang+++0x8b36ce4)
#30 0x0000aaaabb33dd08 clang::driver::Driver::ExecuteCompilation(clang::driver::Compilation&, llvm::SmallVectorImpl<std::pair<int, clang::driver::Command const*>>&) (/home/tcwg-buildbot/worker/clang-aarch64-sve-vla-2stage/stage1.install/bin/clang+++0x8b4dd08)
#31 0x0000aaaab82039e8 clang_main(int, char**, llvm::ToolContext const&) (/home/tcwg-buildbot/worker/clang-aarch64-sve-vla-2stage/stage1.install/bin/clang+++0x5a139e8)
#32 0x0000aaaab8211e28 main (/home/tcwg-buildbot/worker/clang-aarch64-sve-vla-2stage/stage1.install/bin/clang+++0x5a21e28)
#33 0x0000ffff9de373fc __libc_start_call_main ./csu/../sysdeps/nptl/libc_start_call_main.h:74:3
#34 0x0000ffff9de374cc call_init ./csu/../csu/libc-start.c:128:20
#35 0x0000ffff9de374cc __libc_start_main ./csu/../csu/libc-start.c:379:5
#36 0x0000aaaab82021f0 _start (/home/tcwg-buildbot/worker/clang-aarch64-sve-vla-2stage/stage1.install/bin/clang+++0x5a121f0)
clang++: error: clang frontend command failed with exit code 134 (use -v to see invocation)
clang version 20.0.0git (https://github.com/llvm/llvm-project.git e7303fe80a0bea124422219356c1c9e845110a77)
Target: aarch64-unknown-linux-gnu
Thread model: posix
InstalledDir: /home/tcwg-buildbot/worker/clang-aarch64-sve-vla-2stage/stage1.install/bin

antmox · 2024-12-18T15:01:18Z

Also causing same failures here :
https://lab.llvm.org/buildbot/#/builders/198/builds/334
https://lab.llvm.org/buildbot/#/builders/143/builds/4276
https://lab.llvm.org/buildbot/#/builders/17/builds/4725

antmox · 2024-12-18T15:04:13Z

And
https://lab.llvm.org/buildbot/#/builders/41/builds/4172
https://lab.llvm.org/buildbot/#/builders/4/builds/4281
https://lab.llvm.org/buildbot/#/builders/199/builds/263

#112875)" This reverts commit b3eede5. This has been breaking most AArch64 stage2 builds for 4+ hours, reverting to get the bots back to green. https://lab.llvm.org/buildbot/#/builders/41/builds/4172 https://lab.llvm.org/buildbot/#/builders/4/builds/4281 https://lab.llvm.org/buildbot/#/builders/199/builds/263 https://lab.llvm.org/buildbot/#/builders/198/builds/334 https://lab.llvm.org/buildbot/#/builders/143/builds/4276 https://lab.llvm.org/buildbot/#/builders/17/builds/4725

fhahn · 2024-12-18T15:08:22Z

Reverted for now in 76714be as this has been breaking most stage2 builds on AArch64 for 4+ hours

NickGuy-Arm · 2024-12-18T15:14:33Z

Reverted for now in 76714be as this has been breaking most stage2 builds on AArch64 for 4+ hours

Thanks, I'm now testing a fix with a local stage2 build. Not sure why I didn't receive the buildbot emails though, so I didn't realise it was failing things at first.

…ss (llvm#112875)" This reverts commit 76714be.

NickGuy-Arm · 2024-12-18T15:30:54Z

Opened a new PR at #120441 to reland this patch with a fix for the build failures.

…rt for single reductions in ComplexDeinterleavingPass (#112875)" (#120441) This reverts commit 76714be, fixing the build failure that caused the revert. The failure stemmed from the complex deinterleaving pass identifying a series of add operations as a "complex to single reduction", so when it tried to transform this erroneously identified pattern, it faulted. The fix applied is to ensure that complex numbers (or patterns that match them) are used throughout, by checking if there is a deinterleave node amidst the graph.

…rt for single reductions in ComplexDeinterleavingPass (llvm#112875)" (llvm#120441) This reverts commit 76714be, fixing the build failure that caused the revert. The failure stemmed from the complex deinterleaving pass identifying a series of add operations as a "complex to single reduction", so when it tried to transform this erroneously identified pattern, it faulted. The fix applied is to ensure that complex numbers (or patterns that match them) are used throughout, by checking if there is a deinterleave node amidst the graph.

fhahn · 2025-05-11T20:29:26Z

llvm/lib/CodeGen/ComplexDeinterleavingPass.cpp

+    auto *Real = OperationInstruction[i];
+    // We want to check that we have 2 operands, but the function attributes
+    // being counted as operands bloats this value.
+    if (Real->getNumOperands() < 2)


Looks like we are missing a check to ensure we only have integer types here. With FP types, we currently crash due to creating llvm.reduce.add reductions for floating point types. It also doesn't check for the rigth FP flags AFAICT.

Put up #139469 to bail out

llvmbot added the backend:AArch64 label Oct 18, 2024

NickGuy-Arm requested review from SamTebbs33, igogo-x86, huntergr-arm, sdesmalen-arm and kmclaughlin-arm October 18, 2024 10:22

NickGuy-Arm added 4 commits October 21, 2024 12:53

Add support for single reductions in ComplexDeinterleavingPass

e899c47

Apply clang-format

fb22e22

Remove erroneously added function and call

42fba28

Fix case where it fails to identify unrolled reductions

918312c

Also removed prematurely-added test

NickGuy-Arm force-pushed the complex-deinterleaving/single-reductions branch from aac0ea6 to 918312c Compare October 22, 2024 13:39

NickGuy-Arm added 3 commits October 22, 2024 14:45

Address formatting errors

e492fa4

Add support for complex dot product operations

838dff4

Apply clang-format

b241068

SamTebbs33 reviewed Oct 29, 2024

View reviewed changes

NickGuy-Arm added 2 commits October 29, 2024 16:08

Change identifyNode parameter to use bool* instead of bool&

1bf2f2e

Remove debug comments

Simplify complex-deinterleaving-cdot.ll

b19c99f

NickGuy-Arm requested a review from SamTebbs33 November 6, 2024 10:23

SamTebbs33 reviewed Nov 7, 2024

View reviewed changes

Add additional test cases for cdot

fc68abe

SamTebbs33 reviewed Nov 25, 2024

View reviewed changes

NickGuy-Arm and others added 2 commits November 25, 2024 10:54

Add negative test case for cdot

97eff4d

Merge branch 'llvm:main' into complex-deinterleaving/single-reductions

073fa96

SamTebbs33 reviewed Nov 26, 2024

View reviewed changes

NickGuy-Arm added 2 commits December 10, 2024 15:06

Address comments, and refactor where certain checks are performed

646ffe3

Fix formatting

b3550a5

NickGuy-Arm force-pushed the complex-deinterleaving/single-reductions branch from ccce001 to b3550a5 Compare December 10, 2024 15:51

SamTebbs33 approved these changes Dec 16, 2024

View reviewed changes

Update test name

0329be6

NickGuy-Arm merged commit b3eede5 into llvm:main Dec 18, 2024
8 checks passed

NickGuy-Arm added a commit to NickGuy-Arm/llvm-project that referenced this pull request Dec 18, 2024

Reapply "Add support for single reductions in ComplexDeinterleavingPa…

ac40f76

…ss (llvm#112875)" This reverts commit 76714be.

fhahn reviewed May 11, 2025

View reviewed changes

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

Add support for single reductions in ComplexDeinterleavingPass #112875

Add support for single reductions in ComplexDeinterleavingPass #112875

NickGuy-Arm commented Oct 18, 2024

llvmbot commented Oct 18, 2024

github-actions bot commented Oct 18, 2024 •

edited

Loading

SamTebbs33 Oct 29, 2024

NickGuy-Arm Oct 29, 2024 •

edited

Loading

SamTebbs33 Nov 7, 2024

NickGuy-Arm commented Nov 21, 2024

SamTebbs33 Nov 25, 2024

NickGuy-Arm Nov 25, 2024

SamTebbs33 Nov 26, 2024

NickGuy-Arm Dec 10, 2024

SamTebbs33 Nov 26, 2024

NickGuy-Arm Dec 10, 2024

SamTebbs33 Nov 26, 2024

NickGuy-Arm Dec 10, 2024

SamTebbs33 left a comment

SamTebbs33 Dec 16, 2024

fhahn commented Dec 18, 2024

antmox commented Dec 18, 2024

antmox commented Dec 18, 2024

fhahn commented Dec 18, 2024

NickGuy-Arm commented Dec 18, 2024

NickGuy-Arm commented Dec 18, 2024

fhahn May 11, 2025

		@@ -0,0 +1,58 @@
		; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5
		; RUN: opt -S --passes=complex-deinterleaving %s --mattr=+sve2 -o - \| FileCheck %s

Add support for single reductions in ComplexDeinterleavingPass #112875

Add support for single reductions in ComplexDeinterleavingPass #112875

Conversation

NickGuy-Arm commented Oct 18, 2024

llvmbot commented Oct 18, 2024

github-actions bot commented Oct 18, 2024 • edited Loading

Choose a reason for hiding this comment

NickGuy-Arm Oct 29, 2024 • edited Loading

Choose a reason for hiding this comment

Choose a reason for hiding this comment

NickGuy-Arm commented Nov 21, 2024

Choose a reason for hiding this comment

Choose a reason for hiding this comment

Choose a reason for hiding this comment

Choose a reason for hiding this comment

Choose a reason for hiding this comment

Choose a reason for hiding this comment

Choose a reason for hiding this comment

Choose a reason for hiding this comment

SamTebbs33 left a comment

Choose a reason for hiding this comment

Choose a reason for hiding this comment

fhahn commented Dec 18, 2024

antmox commented Dec 18, 2024

antmox commented Dec 18, 2024

fhahn commented Dec 18, 2024

NickGuy-Arm commented Dec 18, 2024

NickGuy-Arm commented Dec 18, 2024

Choose a reason for hiding this comment

github-actions bot commented Oct 18, 2024 •

edited

Loading

NickGuy-Arm Oct 29, 2024 •

edited

Loading