Skip to content

[CodeGen][NPM] Port InitUndef to NPM #138495

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 2 commits into
base: users/optimisan/pb/optimized-ra-pipeline
Choose a base branch
from

Conversation

optimisan
Copy link
Contributor

No description provided.

@llvmbot
Copy link
Member

llvmbot commented May 5, 2025

@llvm/pr-subscribers-backend-amdgpu

@llvm/pr-subscribers-backend-aarch64

Author: Akshat Oke (optimisan)

Changes

Patch is 24.47 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/138495.diff

12 Files Affected:

  • (added) llvm/include/llvm/CodeGen/InitUndef.h (+24)
  • (modified) llvm/include/llvm/InitializePasses.h (+1-1)
  • (modified) llvm/include/llvm/Passes/CodeGenPassBuilder.h (+1)
  • (modified) llvm/include/llvm/Passes/MachinePassRegistry.def (+1-1)
  • (modified) llvm/lib/CodeGen/CodeGen.cpp (+1-1)
  • (modified) llvm/lib/CodeGen/InitUndef.cpp (+35-15)
  • (modified) llvm/lib/Passes/PassBuilder.cpp (+1)
  • (modified) llvm/test/CodeGen/AArch64/init-undef.mir (+3)
  • (modified) llvm/test/CodeGen/AMDGPU/llc-pipeline-npm.ll (+2-2)
  • (modified) llvm/test/CodeGen/RISCV/rvv/handle-noreg-with-implicit-def.mir (+2)
  • (modified) llvm/test/CodeGen/RISCV/rvv/subregister-undef-early-clobber.mir (+1)
  • (modified) llvm/test/CodeGen/RISCV/rvv/undef-earlyclobber-chain.mir (+1)
diff --git a/llvm/include/llvm/CodeGen/InitUndef.h b/llvm/include/llvm/CodeGen/InitUndef.h
new file mode 100644
index 0000000000000..be1cf4bfc9872
--- /dev/null
+++ b/llvm/include/llvm/CodeGen/InitUndef.h
@@ -0,0 +1,24 @@
+//===- llvm/CodeGen/InitUndef.h ---------------------------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_CODEGEN_INITUNDEF_H
+#define LLVM_CODEGEN_INITUNDEF_H
+
+#include "llvm/CodeGen/MachinePassManager.h"
+
+namespace llvm {
+
+class InitUndefPass : public PassInfoMixin<InitUndefPass> {
+public:
+  PreservedAnalyses run(MachineFunction &MF,
+                        MachineFunctionAnalysisManager &MFAM);
+};
+
+} // namespace llvm
+
+#endif // LLVM_CODEGEN_INITUNDEF_H
diff --git a/llvm/include/llvm/InitializePasses.h b/llvm/include/llvm/InitializePasses.h
index bff0526d4177a..07dc86c6fccf2 100644
--- a/llvm/include/llvm/InitializePasses.h
+++ b/llvm/include/llvm/InitializePasses.h
@@ -311,7 +311,7 @@ void initializeTargetTransformInfoWrapperPassPass(PassRegistry &);
 void initializeTwoAddressInstructionLegacyPassPass(PassRegistry &);
 void initializeTypeBasedAAWrapperPassPass(PassRegistry &);
 void initializeTypePromotionLegacyPass(PassRegistry &);
-void initializeInitUndefPass(PassRegistry &);
+void initializeInitUndefLegacyPass(PassRegistry &);
 void initializeUniformityInfoWrapperPassPass(PassRegistry &);
 void initializeUnifyLoopExitsLegacyPassPass(PassRegistry &);
 void initializeUnpackMachineBundlesPass(PassRegistry &);
diff --git a/llvm/include/llvm/Passes/CodeGenPassBuilder.h b/llvm/include/llvm/Passes/CodeGenPassBuilder.h
index 982bb16e71eab..351ef63af05c0 100644
--- a/llvm/include/llvm/Passes/CodeGenPassBuilder.h
+++ b/llvm/include/llvm/Passes/CodeGenPassBuilder.h
@@ -43,6 +43,7 @@
 #include "llvm/CodeGen/GlobalMerge.h"
 #include "llvm/CodeGen/GlobalMergeFunctions.h"
 #include "llvm/CodeGen/IndirectBrExpand.h"
+#include "llvm/CodeGen/InitUndef.h"
 #include "llvm/CodeGen/InterleavedAccess.h"
 #include "llvm/CodeGen/InterleavedLoadCombine.h"
 #include "llvm/CodeGen/JMCInstrumenter.h"
diff --git a/llvm/include/llvm/Passes/MachinePassRegistry.def b/llvm/include/llvm/Passes/MachinePassRegistry.def
index c69573ee3ed97..436b26852ce90 100644
--- a/llvm/include/llvm/Passes/MachinePassRegistry.def
+++ b/llvm/include/llvm/Passes/MachinePassRegistry.def
@@ -148,6 +148,7 @@ MACHINE_FUNCTION_PASS("early-tailduplication", EarlyTailDuplicatePass())
 MACHINE_FUNCTION_PASS("fentry-insert", FEntryInserterPass())
 MACHINE_FUNCTION_PASS("finalize-isel", FinalizeISelPass())
 MACHINE_FUNCTION_PASS("fixup-statepoint-caller-saved", FixupStatepointCallerSavedPass())
+MACHINE_FUNCTION_PASS("init-undef", InitUndefPass())
 MACHINE_FUNCTION_PASS("localstackalloc", LocalStackSlotAllocationPass())
 MACHINE_FUNCTION_PASS("machine-cp", MachineCopyPropagationPass())
 MACHINE_FUNCTION_PASS("machine-cse", MachineCSEPass())
@@ -304,7 +305,6 @@ DUMMY_MACHINE_FUNCTION_PASS("fs-profile-loader", MIRProfileLoaderNewPass)
 DUMMY_MACHINE_FUNCTION_PASS("funclet-layout", FuncletLayoutPass)
 DUMMY_MACHINE_FUNCTION_PASS("gc-empty-basic-blocks", GCEmptyBasicBlocksPass)
 DUMMY_MACHINE_FUNCTION_PASS("implicit-null-checks", ImplicitNullChecksPass)
-DUMMY_MACHINE_FUNCTION_PASS("init-undef-pass", InitUndefPass)
 DUMMY_MACHINE_FUNCTION_PASS("instruction-select", InstructionSelectPass)
 DUMMY_MACHINE_FUNCTION_PASS("irtranslator", IRTranslatorPass)
 DUMMY_MACHINE_FUNCTION_PASS("kcfi", MachineKCFIPass)
diff --git a/llvm/lib/CodeGen/CodeGen.cpp b/llvm/lib/CodeGen/CodeGen.cpp
index 5250534d8a4e4..aa3591cb6be58 100644
--- a/llvm/lib/CodeGen/CodeGen.cpp
+++ b/llvm/lib/CodeGen/CodeGen.cpp
@@ -54,7 +54,7 @@ void llvm::initializeCodeGen(PassRegistry &Registry) {
   initializeIfConverterPass(Registry);
   initializeImplicitNullChecksPass(Registry);
   initializeIndirectBrExpandLegacyPassPass(Registry);
-  initializeInitUndefPass(Registry);
+  initializeInitUndefLegacyPass(Registry);
   initializeInterleavedLoadCombinePass(Registry);
   initializeInterleavedAccessPass(Registry);
   initializeJMCInstrumenterPass(Registry);
diff --git a/llvm/lib/CodeGen/InitUndef.cpp b/llvm/lib/CodeGen/InitUndef.cpp
index 6c0e9f9e930b9..500a73be7c0f5 100644
--- a/llvm/lib/CodeGen/InitUndef.cpp
+++ b/llvm/lib/CodeGen/InitUndef.cpp
@@ -38,6 +38,7 @@
 //
 //===----------------------------------------------------------------------===//
 
+#include "llvm/CodeGen/InitUndef.h"
 #include "llvm/ADT/SmallSet.h"
 #include "llvm/ADT/SmallVector.h"
 #include "llvm/CodeGen/DetectDeadLanes.h"
@@ -59,20 +60,12 @@ using namespace llvm;
 
 namespace {
 
-class InitUndef : public MachineFunctionPass {
-  const TargetInstrInfo *TII;
-  MachineRegisterInfo *MRI;
-  const TargetSubtargetInfo *ST;
-  const TargetRegisterInfo *TRI;
-
-  // Newly added vregs, assumed to be fully rewritten
-  SmallSet<Register, 8> NewRegs;
-  SmallVector<MachineInstr *, 8> DeadInsts;
-
+class InitUndefLegacy : public MachineFunctionPass {
 public:
   static char ID;
 
-  InitUndef() : MachineFunctionPass(ID) {}
+  InitUndefLegacy() : MachineFunctionPass(ID) {}
+
   bool runOnMachineFunction(MachineFunction &MF) override;
 
   void getAnalysisUsage(AnalysisUsage &AU) const override {
@@ -81,6 +74,20 @@ class InitUndef : public MachineFunctionPass {
   }
 
   StringRef getPassName() const override { return INIT_UNDEF_NAME; }
+};
+
+class InitUndef {
+  const TargetInstrInfo *TII;
+  MachineRegisterInfo *MRI;
+  const TargetSubtargetInfo *ST;
+  const TargetRegisterInfo *TRI;
+
+  // Newly added vregs, assumed to be fully rewritten
+  SmallSet<Register, 8> NewRegs;
+  SmallVector<MachineInstr *, 8> DeadInsts;
+
+public:
+  bool run(MachineFunction &MF);
 
 private:
   bool processBasicBlock(MachineFunction &MF, MachineBasicBlock &MBB,
@@ -93,9 +100,9 @@ class InitUndef : public MachineFunctionPass {
 
 } // end anonymous namespace
 
-char InitUndef::ID = 0;
-INITIALIZE_PASS(InitUndef, DEBUG_TYPE, INIT_UNDEF_NAME, false, false)
-char &llvm::InitUndefID = InitUndef::ID;
+char InitUndefLegacy::ID = 0;
+INITIALIZE_PASS(InitUndefLegacy, DEBUG_TYPE, INIT_UNDEF_NAME, false, false)
+char &llvm::InitUndefID = InitUndefLegacy::ID;
 
 static bool isEarlyClobberMI(MachineInstr &MI) {
   return llvm::any_of(MI.all_defs(), [](const MachineOperand &DefMO) {
@@ -246,7 +253,20 @@ bool InitUndef::processBasicBlock(MachineFunction &MF, MachineBasicBlock &MBB,
   return Changed;
 }
 
-bool InitUndef::runOnMachineFunction(MachineFunction &MF) {
+bool InitUndefLegacy::runOnMachineFunction(MachineFunction &MF) {
+  return InitUndef().run(MF);
+}
+
+PreservedAnalyses InitUndefPass::run(MachineFunction &MF,
+                                     MachineFunctionAnalysisManager &MFAM) {
+  if (!InitUndef().run(MF))
+    return PreservedAnalyses::all();
+  auto PA = getMachineFunctionPassPreservedAnalyses();
+  PA.preserveSet<CFGAnalyses>();
+  return PA;
+}
+
+bool InitUndef::run(MachineFunction &MF) {
   ST = &MF.getSubtarget();
 
   // The pass is only needed if early-clobber defs and undef ops cannot be
diff --git a/llvm/lib/Passes/PassBuilder.cpp b/llvm/lib/Passes/PassBuilder.cpp
index 7740f622ede7c..5f7ce13ad8a3e 100644
--- a/llvm/lib/Passes/PassBuilder.cpp
+++ b/llvm/lib/Passes/PassBuilder.cpp
@@ -103,6 +103,7 @@
 #include "llvm/CodeGen/GlobalMergeFunctions.h"
 #include "llvm/CodeGen/HardwareLoops.h"
 #include "llvm/CodeGen/IndirectBrExpand.h"
+#include "llvm/CodeGen/InitUndef.h"
 #include "llvm/CodeGen/InterleavedAccess.h"
 #include "llvm/CodeGen/InterleavedLoadCombine.h"
 #include "llvm/CodeGen/JMCInstrumenter.h"
diff --git a/llvm/test/CodeGen/AArch64/init-undef.mir b/llvm/test/CodeGen/AArch64/init-undef.mir
index c9d23006d3523..cee41c053251d 100644
--- a/llvm/test/CodeGen/AArch64/init-undef.mir
+++ b/llvm/test/CodeGen/AArch64/init-undef.mir
@@ -2,6 +2,9 @@
 # RUN: llc -mtriple=aarch64-- -aarch64-enable-subreg-liveness-tracking=false -run-pass=init-undef -o - %s | FileCheck %s
 # RUN: llc -mtriple=aarch64-- -aarch64-enable-subreg-liveness-tracking=true -run-pass=init-undef -o - %s | FileCheck %s
 
+# RUN: llc -mtriple=aarch64-- -aarch64-enable-subreg-liveness-tracking=false -passes=init-undef -o - %s | FileCheck %s
+# RUN: llc -mtriple=aarch64-- -aarch64-enable-subreg-liveness-tracking=true -passes=init-undef -o - %s | FileCheck %s
+
 ---
 name:            test_stxp_undef
 body:             |
diff --git a/llvm/test/CodeGen/AMDGPU/llc-pipeline-npm.ll b/llvm/test/CodeGen/AMDGPU/llc-pipeline-npm.ll
index 91c15565762de..e00b7ff83e322 100644
--- a/llvm/test/CodeGen/AMDGPU/llc-pipeline-npm.ll
+++ b/llvm/test/CodeGen/AMDGPU/llc-pipeline-npm.ll
@@ -9,9 +9,9 @@
 
 ; GCN-O0: require<MachineModuleAnalysis>,require<profile-summary>,require<collector-metadata>,pre-isel-intrinsic-lowering,function(expand-large-div-rem,expand-fp),amdgpu-remove-incompatible-functions,amdgpu-printf-runtime-binding,amdgpu-lower-ctor-dtor,expand-variadics,amdgpu-always-inline,always-inline,amdgpu-export-kernel-runtime-handles,amdgpu-sw-lower-lds,amdgpu-lower-module-lds,function(atomic-expand,verify,gc-lowering,lower-constant-intrinsics,UnreachableBlockElimPass,ee-instrument<post-inline>,scalarize-masked-mem-intrin,ExpandReductionsPass,amdgpu-lower-kernel-arguments),amdgpu-lower-buffer-fat-pointers,cgscc(function(lower-switch,lower-invoke,UnreachableBlockElimPass,amdgpu-unify-divergent-exit-nodes,fix-irreducible,unify-loop-exits,StructurizeCFGPass,amdgpu-annotate-uniform,si-annotate-control-flow,amdgpu-rewrite-undef-for-phi,lcssa,require<uniformity>,callbr-prepare,safe-stack,stack-protector,verify)),cgscc(function(machine-function(amdgpu-isel,si-fix-sgpr-copies,si-i1-copies,finalize-isel,localstackalloc,phi-node-elimination,two-address-instruction,regallocfast,si-fix-vgpr-copies,remove-redundant-debug-values,fixup-statepoint-caller-saved,prolog-epilog,post-ra-pseudos,fentry-insert,xray-instrumentation,patchable-function,si-memory-legalizer,si-insert-waitcnts,si-late-branch-lowering,post-RA-hazard-rec,AMDGPUWaitSGPRHazardsPass,branch-relaxation,remove-loads-into-fake-uses,live-debug-values,machine-sanmd,stack-frame-layout,verify),invalidate<machine-function-info>))
 
-; GCN-O2: require<MachineModuleAnalysis>,require<profile-summary>,require<collector-metadata>,pre-isel-intrinsic-lowering,function(expand-large-div-rem,expand-fp),amdgpu-remove-incompatible-functions,amdgpu-printf-runtime-binding,amdgpu-lower-ctor-dtor,function(amdgpu-image-intrinsic-opt),expand-variadics,amdgpu-always-inline,always-inline,amdgpu-export-kernel-runtime-handles,amdgpu-sw-lower-lds,amdgpu-lower-module-lds,function(infer-address-spaces,amdgpu-atomic-optimizer,atomic-expand,amdgpu-promote-alloca,separate-const-offset-from-gep<>,slsr,early-cse<>,nary-reassociate,early-cse<>,amdgpu-codegenprepare,verify,loop-mssa(loop-reduce),mergeicmps,expand-memcmp,gc-lowering,lower-constant-intrinsics,UnreachableBlockElimPass,consthoist,ReplaceWithVeclib,partially-inline-libcalls,ee-instrument<post-inline>,scalarize-masked-mem-intrin,ExpandReductionsPass,early-cse<>,amdgpu-lower-kernel-arguments),amdgpu-lower-buffer-fat-pointers,cgscc(function(codegenprepare,load-store-vectorizer,lower-switch,lower-invoke,UnreachableBlockElimPass,flatten-cfg,sink,amdgpu-late-codegenprepare,amdgpu-unify-divergent-exit-nodes,fix-irreducible,unify-loop-exits,StructurizeCFGPass,amdgpu-annotate-uniform,si-annotate-control-flow,amdgpu-rewrite-undef-for-phi,lcssa)),amdgpu-perf-hint,cgscc(function(require<uniformity>,callbr-prepare,safe-stack,stack-protector,verify)),cgscc(function(machine-function(amdgpu-isel,si-fix-sgpr-copies,si-i1-copies,finalize-isel,early-tailduplication,opt-phis,stack-coloring,localstackalloc,dead-mi-elimination,early-machinelicm,machine-cse,machine-sink,peephole-opt,dead-mi-elimination,si-fold-operands,gcn-dpp-combine,si-load-store-opt,si-peephole-sdwa,early-machinelicm,machine-cse,si-fold-operands,dead-mi-elimination,si-shrink-instructions,detect-dead-lanes,dead-mi-elimination,InitUndefPass,ProcessImplicitDefsPass,unreachable-mbb-elimination,require<live-vars>,si-opt-vgpr-liverange,require<machine-loops>,phi-node-elimination,si-lower-control-flow,two-address-instruction,register-coalescer,rename-independent-subregs,amdgpu-rewrite-partial-reg-uses,machine-scheduler,amdgpu-pre-ra-optimizations,si-wqm,si-optimize-exec-masking-pre-ra,si-form-memory-clauses,amdgpu-pre-ra-long-branch-reg,greedy<sgpr>,virt-reg-rewriter<no-clear-vregs>,stack-slot-coloring,si-lower-sgpr-spills,si-pre-allocate-wwm-regs,greedy<wwm>,si-lower-wwm-copies,virt-reg-rewriter<no-clear-vregs>,amdgpu-reserve-wwm-regs,greedy<vgpr>,amdgpu-nsa-reassign,virt-reg-rewriter,amdgpu-mark-last-scratch-load,machine-cp,machinelicm,si-fix-vgpr-copies,si-optimize-exec-masking,remove-redundant-debug-values,fixup-statepoint-caller-saved,PostRAMachineSinkingPass,shrink-wrap,prolog-epilog,branch-folder,tailduplication,machine-latecleanup,machine-cp,post-ra-pseudos,postmisched,block-placement,fentry-insert,xray-instrumentation,patchable-function,gcn-create-vopd,si-memory-legalizer,si-insert-waitcnts,si-late-branch-lowering,si-pre-emit-peephole,post-RA-hazard-rec,AMDGPUWaitSGPRHazardsPass,amdgpu-insert-delay-alu,branch-relaxation,remove-loads-into-fake-uses,live-debug-values,machine-sanmd,stack-frame-layout,verify),invalidate<machine-function-info>))
+; GCN-O2: require<MachineModuleAnalysis>,require<profile-summary>,require<collector-metadata>,pre-isel-intrinsic-lowering,function(expand-large-div-rem,expand-fp),amdgpu-remove-incompatible-functions,amdgpu-printf-runtime-binding,amdgpu-lower-ctor-dtor,function(amdgpu-image-intrinsic-opt),expand-variadics,amdgpu-always-inline,always-inline,amdgpu-export-kernel-runtime-handles,amdgpu-sw-lower-lds,amdgpu-lower-module-lds,function(infer-address-spaces,amdgpu-atomic-optimizer,atomic-expand,amdgpu-promote-alloca,separate-const-offset-from-gep<>,slsr,early-cse<>,nary-reassociate,early-cse<>,amdgpu-codegenprepare,verify,loop-mssa(loop-reduce),mergeicmps,expand-memcmp,gc-lowering,lower-constant-intrinsics,UnreachableBlockElimPass,consthoist,ReplaceWithVeclib,partially-inline-libcalls,ee-instrument<post-inline>,scalarize-masked-mem-intrin,ExpandReductionsPass,early-cse<>,amdgpu-lower-kernel-arguments),amdgpu-lower-buffer-fat-pointers,cgscc(function(codegenprepare,load-store-vectorizer,lower-switch,lower-invoke,UnreachableBlockElimPass,flatten-cfg,sink,amdgpu-late-codegenprepare,amdgpu-unify-divergent-exit-nodes,fix-irreducible,unify-loop-exits,StructurizeCFGPass,amdgpu-annotate-uniform,si-annotate-control-flow,amdgpu-rewrite-undef-for-phi,lcssa)),amdgpu-perf-hint,cgscc(function(require<uniformity>,callbr-prepare,safe-stack,stack-protector,verify)),cgscc(function(machine-function(amdgpu-isel,si-fix-sgpr-copies,si-i1-copies,finalize-isel,early-tailduplication,opt-phis,stack-coloring,localstackalloc,dead-mi-elimination,early-machinelicm,machine-cse,machine-sink,peephole-opt,dead-mi-elimination,si-fold-operands,gcn-dpp-combine,si-load-store-opt,si-peephole-sdwa,early-machinelicm,machine-cse,si-fold-operands,dead-mi-elimination,si-shrink-instructions,detect-dead-lanes,dead-mi-elimination,init-undef,ProcessImplicitDefsPass,unreachable-mbb-elimination,require<live-vars>,si-opt-vgpr-liverange,require<machine-loops>,phi-node-elimination,si-lower-control-flow,two-address-instruction,register-coalescer,rename-independent-subregs,amdgpu-rewrite-partial-reg-uses,machine-scheduler,amdgpu-pre-ra-optimizations,si-wqm,si-optimize-exec-masking-pre-ra,si-form-memory-clauses,amdgpu-pre-ra-long-branch-reg,greedy<sgpr>,virt-reg-rewriter<no-clear-vregs>,stack-slot-coloring,si-lower-sgpr-spills,si-pre-allocate-wwm-regs,greedy<wwm>,si-lower-wwm-copies,virt-reg-rewriter<no-clear-vregs>,amdgpu-reserve-wwm-regs,greedy<vgpr>,amdgpu-nsa-reassign,virt-reg-rewriter,amdgpu-mark-last-scratch-load,machine-cp,machinelicm,si-fix-vgpr-copies,si-optimize-exec-masking,remove-redundant-debug-values,fixup-statepoint-caller-saved,PostRAMachineSinkingPass,shrink-wrap,prolog-epilog,branch-folder,tailduplication,machine-latecleanup,machine-cp,post-ra-pseudos,postmisched,block-placement,fentry-insert,xray-instrumentation,patchable-function,gcn-create-vopd,si-memory-legalizer,si-insert-waitcnts,si-late-branch-lowering,si-pre-emit-peephole,post-RA-hazard-rec,AMDGPUWaitSGPRHazardsPass,amdgpu-insert-delay-alu,branch-relaxation,remove-loads-into-fake-uses,live-debug-values,machine-sanmd,stack-frame-layout,verify),invalidate<machine-function-info>))
 
-; GCN-O3: require<MachineModuleAnalysis>,require<profile-summary>,require<collector-metadata>,pre-isel-intrinsic-lowering,function(expand-large-div-rem,expand-fp),amdgpu-remove-incompatible-functions,amdgpu-printf-runtime-binding,amdgpu-lower-ctor-dtor,function(amdgpu-image-intrinsic-opt),expand-variadics,amdgpu-always-inline,always-inline,amdgpu-export-kernel-runtime-handles,amdgpu-sw-lower-lds,amdgpu-lower-module-lds,function(infer-address-spaces,amdgpu-atomic-optimizer,atomic-expand,amdgpu-promote-alloca,separate-const-offset-from-gep<>,slsr,gvn<>,nary-reassociate,early-cse<>,amdgpu-codegenprepare,verify,loop-mssa(loop-reduce),mergeicmps,expand-memcmp,gc-lowering,lower-constant-intrinsics,UnreachableBlockElimPass,consthoist,ReplaceWithVeclib,partially-inline-libcalls,ee-instrument<post-inline>,scalarize-masked-mem-intrin,ExpandReductionsPass,gvn<>,amdgpu-lower-kernel-arguments),amdgpu-lower-buffer-fat-pointers,cgscc(function(codegenprepare,load-store-vectorizer,lower-switch,lower-invoke,UnreachableBlockElimPass,flatten-cfg,sink,amdgpu-late-codegenprepare,amdgpu-unify-divergent-exit-nodes,fix-irreducible,unify-loop-exits,StructurizeCFGPass,amdgpu-annotate-uniform,si-annotate-control-flow,amdgpu-rewrite-undef-for-phi,lcssa)),amdgpu-perf-hint,cgscc(function(require<uniformity>,callbr-prepare,safe-stack,stack-protector,verify)),cgscc(function(machine-function(amdgpu-isel,si-fix-sgpr-copies,si-i1-copies,finalize-isel,early-tailduplication,opt-phis,stack-coloring,localstackalloc,dead-mi-elimination,early-machinelicm,machine-cse,machine-sink,peephole-opt,dead-mi-elimination,si-fold-operands,gcn-dpp-combine,si-load-store-opt,si-peephole-sdwa,early-machinelicm,machine-cse,si-fold-operands,dead-mi-elimination,si-shrink-instructions,detect-dead-lanes,dead-mi-elimination,InitUndefPass,ProcessImplicitDefsPass,unreachable-mbb-elimination,require<live-vars>,si-opt-vgpr-liverange,require<machine-loops>,phi-node-elimination,si-lower-control-flow,two-address-instruction,register-coalescer,rename-independent-subregs,amdgpu-rewrite-partial-reg-uses,machine-scheduler,amdgpu-pre-ra-optimizations,si-wqm,si-optimize-exec-masking-pre-ra,si-form-memory-clauses,amdgpu-pre-ra-long-branch-reg,greedy<sgpr>,virt-reg-rewriter<no-clear-vregs>,stack-slot-coloring,si-lower-sgpr-spills,si-pre-allocate-wwm-regs,greedy<wwm>,si-lower-wwm-copies,virt-reg-rewriter<no-clear-vregs>,amdgpu-reserve-wwm-regs,greedy<vgpr>,amdgpu-nsa-reassign,virt-reg-rewriter,amdgpu-mark-last-scratch-load,machine-cp,machinelicm,si-fix-vgpr-copies,si-optimize-exec-masking,remove-redundant-debug-values,fixup-statepoint-caller-saved,PostRAMachineSinkingPass,shrink-wrap,prolog-epilog,branch-folder,tailduplication,machine-latecleanup,machine-cp,post-ra-pseudos,postmisched,block-placement,fentry-insert,xray-instrumentation,patchable-function,gcn-create-vopd,si-memory-legalizer,si-insert-waitcnts,si-late-branch-lowering,si-pre-emit-peephole,post-RA-hazard-rec,AMDGPUWaitSGPRHazardsPass,amdgpu-insert-delay-alu,branch-relaxation,remove-loads-into-fake-uses,live-debug-values,machine-sanmd,stack-frame-layout,verify),invalidate<machine-function-info>))
+; GCN-O3: require<MachineModuleAnalysis>,require<profile-summary>,require<collector-metadata>,pre-isel-intrinsic-lowering,function(expand-large-div-rem,expand-fp),amdgpu-remove-incompatible-functions,amdgpu-printf-runtime-binding,amdgpu-lower-ctor-dtor,function(amdgpu-image-intrinsic-opt),expand-variadics,amdgpu-always-inline,always-inline,amdgpu-export-kernel-runtime-handles,amdgpu-sw-lower-lds,amdgpu-lower-module-lds,function(infer-address-spaces,amdgpu-atomic-optimizer,atomic-expand,amdgpu-promote-alloca,separate-const-offset-from-gep<>,slsr,gvn<>,nary-reassociate,early-cse<>,amdgpu-codegenprepare,verify,loop-mssa(loop-reduce),mergeicmps,expa...
[truncated]

@optimisan optimisan force-pushed the users/optimisan/pb/optimized-ra-pipeline branch from 5f81915 to dc9a316 Compare May 12, 2025 09:11
@optimisan optimisan force-pushed the users/optimisan/port/init-undef branch from 8b0a590 to 4c9a533 Compare May 12, 2025 09:11
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
Projects
None yet
Development

Successfully merging this pull request may close these issues.

None yet

3 participants