Skip to content

8343597: C2 SuperWord: RelaxedMath for faster float reductions #21895

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Draft
wants to merge 32 commits into
base: master
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
32 commits
Select commit Hold shift + click to select a range
c12bad8
JDK-8343597
eme64 Nov 5, 2024
dd34d40
first part of intrinsification
eme64 Nov 5, 2024
234b81a
intrinsify with regular F/D add/mul
eme64 Nov 13, 2024
defb777
add RelaxedMathAddF - but does not yet vectorize...
eme64 Nov 13, 2024
f9e42af
vectorizes, but still with strict reduction
eme64 Nov 13, 2024
f066bcf
no_strict_order generated, moved out of loop, but turned into strict …
eme64 Nov 14, 2024
deaadf8
also non-strict when moved out of loop
eme64 Nov 14, 2024
5c8783b
Merge branch 'master' into JDK-8343597-RelaxedMath-reductions
eme64 Jan 14, 2025
86340bc
refactor part 1
eme64 Jan 14, 2025
94ca7c9
add RelaxedMathOptimizationMode everywhere
eme64 Jan 14, 2025
c47c4de
refactor
eme64 Jan 14, 2025
1a91607
Working again!
eme64 Jan 14, 2025
680ddc3
hash, cmp, dump_spec, etc
eme64 Jan 15, 2025
40c0b8a
restore newline
eme64 Jan 15, 2025
d132ed2
fix guards
eme64 Jan 20, 2025
cce0736
test stub
eme64 Jan 20, 2025
4d6e163
Merge branch 'master' into JDK-8343597-RelaxedMath-reductions
eme64 Jan 20, 2025
b66908f
basic test stub standing and compiling
eme64 Jan 20, 2025
93d05e1
test stub: IR rule and randomness
eme64 Jan 20, 2025
e83ced0
float add reduction: strict, default and reordered
eme64 Jan 20, 2025
3f3dcd7
inline mode fixed
eme64 Jan 20, 2025
7c170f0
rm mode load from field - does not constant fold during parsing
eme64 Jan 20, 2025
6a5254c
benchmark
eme64 Jan 20, 2025
c3c0d09
fix benchmark
eme64 Jan 20, 2025
5e51876
update benchmark
eme64 Jan 20, 2025
639c0a9
fix benchmark again
eme64 Jan 20, 2025
ee40d16
fix build of benchmark
eme64 Jan 21, 2025
4ce71b2
more benchmarks
eme64 Jan 21, 2025
a8d1399
fix double arguments
eme64 Jan 21, 2025
6947c14
simple reduction tests
eme64 Jan 21, 2025
f92393d
rename tests
eme64 Jan 21, 2025
e4b87d1
fix last commit
eme64 Jan 21, 2025
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions make/test/BuildMicrobenchmark.gmk
Original file line number Diff line number Diff line change
Expand Up @@ -94,6 +94,7 @@ $(eval $(call SetupJavaCompilation, BUILD_JDK_MICROBENCHMARK, \
--add-exports java.base/jdk.internal.classfile.impl=ALL-UNNAMED \
--add-exports java.base/jdk.internal.event=ALL-UNNAMED \
--add-exports java.base/jdk.internal.foreign=ALL-UNNAMED \
--add-exports java.base/jdk.internal.math=ALL-UNNAMED \
--add-exports java.base/jdk.internal.misc=ALL-UNNAMED \
--add-exports java.base/jdk.internal.util=ALL-UNNAMED \
--add-exports java.base/jdk.internal.vm=ALL-UNNAMED \
Expand Down
4 changes: 4 additions & 0 deletions src/hotspot/share/classfile/vmIntrinsics.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -312,6 +312,10 @@ bool vmIntrinsics::disabled_by_jvm_flags(vmIntrinsics::ID id) {
case vmIntrinsics::_minF_strict:
case vmIntrinsics::_maxD_strict:
case vmIntrinsics::_minD_strict:
case vmIntrinsics::_RelaxedMath_float_add:
case vmIntrinsics::_RelaxedMath_float_mul:
case vmIntrinsics::_RelaxedMath_double_add:
case vmIntrinsics::_RelaxedMath_double_mul:
if (!InlineMathNatives) return true;
break;
case vmIntrinsics::_fmaD:
Expand Down
10 changes: 10 additions & 0 deletions src/hotspot/share/classfile/vmIntrinsics.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -149,6 +149,8 @@ class methodHandle;
do_name(copySign_name, "copySign") \
do_name(signum_name,"signum") \
do_name(expand_name,"expand") \
do_name(add_name,"add") \
do_name(mul_name,"mul") \
\
do_intrinsic(_dabs, java_lang_Math, abs_name, double_double_signature, F_S) \
do_intrinsic(_fabs, java_lang_Math, abs_name, float_float_signature, F_S) \
Expand Down Expand Up @@ -232,6 +234,14 @@ class methodHandle;
do_name( floatToFloat16_name, "floatToFloat16") \
do_signature(float_f16_signature, "(F)S") \
\
do_class(jdk_internal_math_RelaxedMath, "jdk/internal/math/RelaxedMath") \
do_signature(RelaxedMath_float_signature, "(FFI)F") \
do_signature(RelaxedMath_double_signature, "(DDI)D") \
do_intrinsic(_RelaxedMath_float_add, jdk_internal_math_RelaxedMath, add_name, RelaxedMath_float_signature, F_S) \
do_intrinsic(_RelaxedMath_float_mul, jdk_internal_math_RelaxedMath, mul_name, RelaxedMath_float_signature, F_S) \
do_intrinsic(_RelaxedMath_double_add, jdk_internal_math_RelaxedMath, add_name, RelaxedMath_double_signature, F_S) \
do_intrinsic(_RelaxedMath_double_mul, jdk_internal_math_RelaxedMath, mul_name, RelaxedMath_double_signature, F_S) \
\
do_intrinsic(_compareUnsigned_i, java_lang_Integer, compareUnsigned_name, int2_int_signature, F_S) \
do_intrinsic(_compareUnsigned_l, java_lang_Long, compareUnsigned_name, long2_int_signature, F_S) \
do_name( compareUnsigned_name, "compareUnsigned") \
Expand Down
30 changes: 30 additions & 0 deletions src/hotspot/share/opto/addnode.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -522,6 +522,21 @@ const Type *AddLNode::add_ring( const Type *t0, const Type *t1 ) const {
return TypeLong::make( lo, hi, MAX2(r0->_widen,r1->_widen) );
}

uint AddFNode::hash() const {
return AddNode::hash() + _optimization_mode.mode();
}

bool AddFNode::cmp(const Node& n) const {
return AddNode::cmp(n) &&
relaxed_math_optimization_mode().cmp(n.relaxed_math_optimization_mode());
}

#ifndef PRODUCT
void AddFNode::dump_spec(outputStream* st) const {
AddNode::dump_spec(st);
_optimization_mode.dump_on(st);
}
#endif

//=============================================================================
//------------------------------add_of_identity--------------------------------
Expand Down Expand Up @@ -556,6 +571,21 @@ Node *AddFNode::Ideal(PhaseGVN *phase, bool can_reshape) {
return commute(phase, this) ? this : nullptr;
}

uint AddDNode::hash() const {
return AddNode::hash() + _optimization_mode.mode();
}

bool AddDNode::cmp(const Node& n) const {
return AddNode::cmp(n) &&
relaxed_math_optimization_mode().cmp(n.relaxed_math_optimization_mode());
}

#ifndef PRODUCT
void AddDNode::dump_spec(outputStream* st) const {
AddNode::dump_spec(st);
_optimization_mode.dump_on(st);
}
#endif

//=============================================================================
//------------------------------add_of_identity--------------------------------
Expand Down
21 changes: 18 additions & 3 deletions src/hotspot/share/opto/addnode.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -41,12 +41,13 @@ typedef const Pair<Node*, jint> ConstAddOperands;
// all inherited from this class. The various identity values are supplied
// by virtual functions.
class AddNode : public Node {
virtual uint hash() const;
public:
AddNode( Node *in1, Node *in2 ) : Node(nullptr,in1,in2) {
init_class_id(Class_Add);
}

virtual uint hash() const;

// Handle algebraic identities here. If we have an identity, return the Node
// we are equivalent to. We look for "add of zero" as an identity.
virtual Node* Identity(PhaseGVN* phase);
Expand Down Expand Up @@ -125,7 +126,11 @@ class AddLNode : public AddNode {
// Add 2 floats
class AddFNode : public AddNode {
public:
AddFNode( Node *in1, Node *in2 ) : AddNode(in1,in2) {}
const RelaxedMathOptimizationMode _optimization_mode;
AddFNode(Node* in1, Node* in2, RelaxedMathOptimizationMode optimization_mode) :
AddNode(in1, in2), _optimization_mode(optimization_mode) {}
virtual uint hash() const;
virtual bool cmp(const Node& n) const;
virtual int Opcode() const;
virtual Node *Ideal(PhaseGVN *phase, bool can_reshape);
virtual const Type *add_of_identity( const Type *t1, const Type *t2 ) const;
Expand All @@ -136,13 +141,20 @@ class AddFNode : public AddNode {
int min_opcode() const { return Op_MinF; }
virtual Node* Identity(PhaseGVN* phase) { return this; }
virtual uint ideal_reg() const { return Op_RegF; }
virtual uint size_of() const { return sizeof(*this); }
virtual const RelaxedMathOptimizationMode& relaxed_math_optimization_mode() const { return _optimization_mode; }
NOT_PRODUCT( virtual void dump_spec(outputStream* st) const; )
};

//------------------------------AddDNode---------------------------------------
// Add 2 doubles
class AddDNode : public AddNode {
public:
AddDNode( Node *in1, Node *in2 ) : AddNode(in1,in2) {}
const RelaxedMathOptimizationMode _optimization_mode;
AddDNode(Node* in1, Node* in2, RelaxedMathOptimizationMode optimization_mode) :
AddNode(in1, in2), _optimization_mode(optimization_mode) {}
virtual uint hash() const;
virtual bool cmp(const Node& n) const;
virtual int Opcode() const;
virtual Node *Ideal(PhaseGVN *phase, bool can_reshape);
virtual const Type *add_of_identity( const Type *t1, const Type *t2 ) const;
Expand All @@ -153,6 +165,9 @@ class AddDNode : public AddNode {
int min_opcode() const { return Op_MinD; }
virtual Node* Identity(PhaseGVN* phase) { return this; }
virtual uint ideal_reg() const { return Op_RegD; }
virtual uint size_of() const { return sizeof(*this); }
virtual const RelaxedMathOptimizationMode& relaxed_math_optimization_mode() const { return _optimization_mode; }
NOT_PRODUCT( virtual void dump_spec(outputStream* st) const; )
};

//------------------------------AddPNode---------------------------------------
Expand Down
4 changes: 4 additions & 0 deletions src/hotspot/share/opto/c2compiler.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -763,6 +763,10 @@ bool C2Compiler::is_intrinsic_supported(vmIntrinsics::ID id) {
case vmIntrinsics::_doubleToRawLongBits:
case vmIntrinsics::_doubleToLongBits:
case vmIntrinsics::_longBitsToDouble:
case vmIntrinsics::_RelaxedMath_float_add:
case vmIntrinsics::_RelaxedMath_float_mul:
case vmIntrinsics::_RelaxedMath_double_add:
case vmIntrinsics::_RelaxedMath_double_mul:
case vmIntrinsics::_Reference_get:
case vmIntrinsics::_Reference_refersTo0:
case vmIntrinsics::_PhantomReference_refersTo0:
Expand Down
6 changes: 4 additions & 2 deletions src/hotspot/share/opto/divnode.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -786,7 +786,8 @@ Node *DivFNode::Ideal(PhaseGVN *phase, bool can_reshape) {
assert( frexp((double)reciprocal, &exp) == 0.5, "reciprocal should be power of 2" );

// return multiplication by the reciprocal
return (new MulFNode(in(1), phase->makecon(TypeF::make(reciprocal))));
RelaxedMathOptimizationMode mode = RelaxedMathOptimizationMode::make_default();
return (new MulFNode(in(1), phase->makecon(TypeF::make(reciprocal)), mode));
}

//=============================================================================
Expand Down Expand Up @@ -878,7 +879,8 @@ Node *DivDNode::Ideal(PhaseGVN *phase, bool can_reshape) {
assert( frexp(reciprocal, &exp) == 0.5, "reciprocal should be power of 2" );

// return multiplication by the reciprocal
return (new MulDNode(in(1), phase->makecon(TypeD::make(reciprocal))));
RelaxedMathOptimizationMode mode = RelaxedMathOptimizationMode::make_default();
return (new MulDNode(in(1), phase->makecon(TypeF::make(reciprocal)), mode));
}

//=============================================================================
Expand Down
69 changes: 68 additions & 1 deletion src/hotspot/share/opto/library_call.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -549,6 +549,11 @@ bool LibraryCallKit::try_to_inline(int predicate) {
case vmIntrinsics::_doubleIsFinite:
case vmIntrinsics::_doubleIsInfinite: return inline_fp_range_check(intrinsic_id());

case vmIntrinsics::_RelaxedMath_float_add:
case vmIntrinsics::_RelaxedMath_float_mul:
case vmIntrinsics::_RelaxedMath_double_add:
case vmIntrinsics::_RelaxedMath_double_mul: return inline_relaxed_math(intrinsic_id());

case vmIntrinsics::_numberOfLeadingZeros_i:
case vmIntrinsics::_numberOfLeadingZeros_l:
case vmIntrinsics::_numberOfTrailingZeros_i:
Expand Down Expand Up @@ -1815,7 +1820,8 @@ bool LibraryCallKit::inline_math_pow() {
if (d->getd() == 2.0) {
// Special case: pow(x, 2.0) => x * x
Node* base = round_double_node(argument(0));
set_result(_gvn.transform(new MulDNode(base, base)));
RelaxedMathOptimizationMode mode = RelaxedMathOptimizationMode::make_default();
set_result(_gvn.transform(new MulDNode(base, base, mode)));
return true;
} else if (d->getd() == 0.5 && Matcher::match_rule_supported(Op_SqrtD)) {
// Special case: pow(x, 0.5) => sqrt(x)
Expand Down Expand Up @@ -5017,6 +5023,67 @@ bool LibraryCallKit::inline_fp_range_check(vmIntrinsics::ID id) {
return true;
}

bool LibraryCallKit::inline_relaxed_math(vmIntrinsics::ID id) {
Node* n1;
Node* n2;
Node* mode_n;

switch (id) {
case vmIntrinsics::_RelaxedMath_float_add:
case vmIntrinsics::_RelaxedMath_float_mul:
n1 = argument(0);
n2 = argument(1);
mode_n = argument(2);
break;
case vmIntrinsics::_RelaxedMath_double_add:
case vmIntrinsics::_RelaxedMath_double_mul:
// Double arguments take 2 stack slots each (see pop_pair).
n1 = argument(0);
n2 = argument(2);
mode_n = argument(4);
break;
default:
fatal_unexpected_iid(id);
break;
}

// Extract the RelaxedMathOptimizationMode (Default = 0).
// Note: it must be a constant already now, so no final field loads
// allowed. We could make this smarter but it might require us to
// add additional nodes, so that we can capture the mode to constant
// fold later during IGVN.
const TypeInt* mode_t = gvn().type(mode_n)->is_int();
jint mode_con = 0;
if (mode_t->is_con()) {
jint con = mode_t->get_con();
if (0 <= con && con <= 3) {
mode_con = con;
}
}
RelaxedMathOptimizationMode mode(mode_con);

Node* result = nullptr;
switch (id) {
case vmIntrinsics::_RelaxedMath_float_add:
result = new AddFNode(n1, n2, mode);
break;
case vmIntrinsics::_RelaxedMath_float_mul:
result = new MulFNode(n1, n2, mode);
break;
case vmIntrinsics::_RelaxedMath_double_add:
result = new AddDNode(n1, n2, mode);
break;
case vmIntrinsics::_RelaxedMath_double_mul:
result = new MulDNode(n1, n2, mode);
break;
default:
fatal_unexpected_iid(id);
break;
}
set_result(_gvn.transform(result));
return true;
}

//----------------------inline_unsafe_copyMemory-------------------------
// public native void Unsafe.copyMemory0(Object srcBase, long srcOffset, Object destBase, long destOffset, long bytes);

Expand Down
1 change: 1 addition & 0 deletions src/hotspot/share/opto/library_call.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -291,6 +291,7 @@ class LibraryCallKit : public GraphKit {
bool inline_onspinwait();
bool inline_fp_conversions(vmIntrinsics::ID id);
bool inline_fp_range_check(vmIntrinsics::ID id);
bool inline_relaxed_math(vmIntrinsics::ID id);
bool inline_number_methods(vmIntrinsics::ID id);
bool inline_bitshuffle_methods(vmIntrinsics::ID id);
bool inline_compare_unsigned(vmIntrinsics::ID id);
Expand Down
2 changes: 1 addition & 1 deletion src/hotspot/share/opto/loopopts.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -4668,7 +4668,7 @@ void PhaseIdealLoop::move_unordered_reduction_out_of_loop(IdealLoopTree* loop) {

// Create post-loop reduction.
Node* last_accumulator = phi->in(2);
Node* post_loop_reduction = ReductionNode::make(sopc, nullptr, init, last_accumulator, bt);
Node* post_loop_reduction = ReductionNode::make(sopc, nullptr, init, last_accumulator, bt, false);

// Take over uses of last_accumulator that are not in the loop.
for (DUIterator i = last_accumulator->outs(); last_accumulator->has_out(i); i++) {
Expand Down
36 changes: 34 additions & 2 deletions src/hotspot/share/opto/mulnode.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -518,6 +518,22 @@ Node *MulLNode::Ideal(PhaseGVN *phase, bool can_reshape) {
return res; // Return final result
}

uint MulFNode::hash() const {
return MulNode::hash() + _optimization_mode.mode();
}

bool MulFNode::cmp(const Node& n) const {
return MulNode::cmp(n) &&
relaxed_math_optimization_mode().cmp(n.relaxed_math_optimization_mode());
}

#ifndef PRODUCT
void MulFNode::dump_spec(outputStream* st) const {
MulNode::dump_spec(st);
_optimization_mode.dump_on(st);
}
#endif

//=============================================================================
//------------------------------mul_ring---------------------------------------
// Compute the product type of two double ranges into this node.
Expand All @@ -534,12 +550,28 @@ Node* MulFNode::Ideal(PhaseGVN* phase, bool can_reshape) {
// x * 2 -> x + x
if (t2 != nullptr && t2->getf() == 2) {
Node* base = in(1);
return new AddFNode(base, base);
return new AddFNode(base, base, _optimization_mode);
}

return MulNode::Ideal(phase, can_reshape);
}

uint MulDNode::hash() const {
return MulNode::hash() + _optimization_mode.mode();
}

bool MulDNode::cmp(const Node& n) const {
return MulNode::cmp(n) &&
relaxed_math_optimization_mode().cmp(n.relaxed_math_optimization_mode());
}

#ifndef PRODUCT
void MulDNode::dump_spec(outputStream* st) const {
MulNode::dump_spec(st);
_optimization_mode.dump_on(st);
}
#endif

//=============================================================================
//------------------------------mul_ring---------------------------------------
// Compute the product type of two double ranges into this node.
Expand All @@ -557,7 +589,7 @@ Node* MulDNode::Ideal(PhaseGVN* phase, bool can_reshape) {
// x * 2 -> x + x
if (t2 != nullptr && t2->getd() == 2) {
Node* base = in(1);
return new AddDNode(base, base);
return new AddDNode(base, base, _optimization_mode);
}

return MulNode::Ideal(phase, can_reshape);
Expand Down
Loading
Loading