Skip to content

Commit 63b73b2

Browse files
authored
Merge pull request mfem#1047 from mfem/x86
Intrinsic for the high-performance templated operator [x86]
2 parents 261fd7a + 6cb5a2a commit 63b73b2

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

46 files changed

+2690
-438
lines changed

.travis.yml

Lines changed: 3 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -11,8 +11,6 @@
1111

1212
language: cpp
1313

14-
sudo: false
15-
1614
stages:
1715
- checks
1816
- tests
@@ -370,8 +368,10 @@ script:
370368
# Compiler
371369
- if [ $MPI == "YES" ]; then
372370
export MYCXX=mpic++;
371+
export MAKE_CXX_FLAG=MPICXX=$MYCXX;
373372
else
374373
export MYCXX="$CXX";
374+
export MAKE_CXX_FLAG=CXX=$MYCXX;
375375
fi
376376

377377
# Print the compiler version
@@ -384,12 +384,9 @@ script:
384384
if [ "$CODECOV" == "YES" ]; then
385385
CPPFLAGS="--coverage -g";
386386
fi;
387-
if [ "$CXX" == "clang++" ]; then
388-
export MFEM_PERF_SW=clang;
389-
fi
390387

391388
# Configure the library
392-
- make config MFEM_USE_MPI=$MPI MFEM_DEBUG=$DEBUG MFEM_CXX="$MYCXX"
389+
- make config MFEM_USE_MPI=$MPI MFEM_DEBUG=$DEBUG $MAKE_CXX_FLAG
393390
MFEM_MPI_NP=$NPROCS CPPFLAGS="$CPPFLAGS"
394391
# Show the configuration
395392
- make info

CHANGELOG

Lines changed: 20 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -23,8 +23,17 @@ Meshing improvements
2323
Hessian for r-adaptivity using discrete fields, and allows use of skewness
2424
and orientation based metrics.
2525

26-
Improved GPU capabilities
27-
-------------------------
26+
Performance improvements
27+
------------------------
28+
- Added support for explicit vectorization in the high-performance templated
29+
code, which can now take advantage of specific intrinsics classes on the
30+
following architectures:
31+
- x86 (SSE/AVX/AVX2/AVX512),
32+
- Power8 & Power9 (VSX),
33+
- BG/Q (QPX).
34+
These are now enabled by default, and can be disabled with MFEM_USE_SIMD=NO.
35+
See the new file linalg/simd.hpp and the new directory linalg/simd.
36+
2837
- Added support for Chebyshev accelerated polynomial smoother on GPU.
2938

3039
Discretization improvements
@@ -57,10 +66,17 @@ Linear and nonlinear solvers
5766
- Added initial support for h- and p-multigrid solvers and preconditioners for
5867
matrix-based and matrix-free discretizations with basic GPU capability.
5968

69+
- Added a new IterativeSolverMonitor class that allows to monitor the residual
70+
and solution during the solving process of an IterativeSolver after every
71+
iteration.
72+
6073
- Block arrays of parallel matrices can now be merged into a single parallel
6174
matrix with the function HypreParMatrixFromBlocks. This could be useful for
6275
solving block systems with parallel direct solvers such as STRUMPACK.
6376

77+
- In SLISolver, changed the residual inner product from (Br,r) to (Br,Br) so the
78+
solver can work with non-SPD preconditioner B.
79+
6480
New and updated examples and miniapps
6581
-------------------------------------
6682
- Added a new example, Example 25/25p, to demonstrate the use of a Perfectly
@@ -98,9 +114,6 @@ Improved testing
98114

99115
Miscellaneous
100116
-------------
101-
- In SLISolver, changed the residual inner product from (Br,r) to (Br,Br) so the
102-
solver can work with non-SPD preconditioner B.
103-
104117
- Added support for ADIOS2 for parallel I/O with ParaView visualization. The
105118
classes adios2stream and ADIOS2DataCollection are introduced in mfem as the
106119
interfaces to generate ADIOS2 Binary Pack (BP4) directory datasets for the
@@ -110,9 +123,8 @@ Miscellaneous
110123
- The integration order used in the ComputeLpError and ComputeElementLpError
111124
methods of class GridFunction has been increased.
112125

113-
- Added a new IterativeSolverMonitor class that allows to monitor the residual
114-
and solution during the solving process of an IterativeSolver after every
115-
iteration.
126+
- Various other simplifications, extensions, and bugfixes in the code.
127+
116128

117129
Version 4.1, released on March 10, 2020
118130
=======================================

INSTALL

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -396,6 +396,12 @@ MFEM_USE_SIDRE = YES/NO
396396
blueprint specification. When enabled, this option requires installation of
397397
HDF5 (see also MFEM_USE_NETCDF), Conduit and LLNL's axom project.
398398

399+
MFEM_USE_SIMD = YES/NO
400+
Enables the high performance templated classes to use architecture dependent
401+
SIMD intrinsics instead of the generic implementation of class AutoSIMD in
402+
linalg/simd/auto.hpp. This option should be combined with suitable
403+
compiler options, such as -march=native, to enable optimal vectorization.
404+
399405
MFEM_USE_CONDUIT = YES/NO
400406
Enables support for converting MFEM Mesh and Grid Function objects to and
401407
from Conduit Mesh Blueprint Descriptions (https://github.com/LLNL/conduit/)

config/cmake/MFEMConfig.cmake.in

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -47,6 +47,7 @@ set(MFEM_USE_OCCA @MFEM_USE_OCCA@)
4747
set(MFEM_USE_RAJA @MFEM_USE_RAJA@)
4848
set(MFEM_USE_CEED @MFEM_USE_CEED@)
4949
set(MFEM_USE_UMPIRE @MFEM_USE_UMPIRE@)
50+
set(MFEM_USE_SIMD @MFEM_USE_SIMD@)
5051
set(MFEM_USE_ADIOS2 @MFEM_USE_ADIOS2@)
5152

5253
set(MFEM_CXX_COMPILER "@CMAKE_CXX_COMPILER@")

config/cmake/config.hpp.in

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -107,6 +107,9 @@
107107
// Enable MFEM functionality based on the Sidre library
108108
#cmakedefine MFEM_USE_SIDRE
109109

110+
// Enable the use of SIMD in the high performance templated classes
111+
#cmakedefine MFEM_USE_SIMD
112+
110113
// Enable MFEM functionality based on Conduit
111114
#cmakedefine MFEM_USE_CONDUIT
112115

config/cmake/modules/MfemCmakeUtilities.cmake

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -733,7 +733,7 @@ function(mfem_export_mk_files)
733733
MFEM_USE_SUPERLU MFEM_USE_STRUMPACK MFEM_USE_GNUTLS
734734
MFEM_USE_GSLIB MFEM_USE_NETCDF MFEM_USE_PETSC MFEM_USE_MPFR MFEM_USE_SIDRE
735735
MFEM_USE_CONDUIT MFEM_USE_PUMI MFEM_USE_CUDA MFEM_USE_OCCA MFEM_USE_RAJA
736-
MFEM_USE_UMPIRE)
736+
MFEM_USE_UMPIRE MFEM_USE_SIMD MFEM_USE_ADIOS2)
737737
foreach(var ${CONFIG_MK_BOOL_VARS})
738738
if (${var})
739739
set(${var} YES)
@@ -743,6 +743,7 @@ function(mfem_export_mk_files)
743743
endforeach()
744744
# TODO: Add support for MFEM_USE_CUDA=YES
745745
set(MFEM_CXX ${CMAKE_CXX_COMPILER})
746+
set(MFEM_HOST_CXX ${MFEM_CXX})
746747
set(MFEM_CPPFLAGS "")
747748
string(STRIP "${CMAKE_CXX_FLAGS_${BUILD_TYPE}} ${CMAKE_CXX_FLAGS}"
748749
MFEM_CXXFLAGS)

config/config.hpp.in

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -106,6 +106,9 @@
106106
// Enable Sidre support
107107
// #define MFEM_USE_SIDRE
108108

109+
// Enable the use of SIMD in the high performance templated classes
110+
// #define MFEM_USE_SIMD
111+
109112
// Enable Conduit support
110113
// #define MFEM_USE_CONDUIT
111114

config/config.mk.in

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -49,10 +49,12 @@ MFEM_USE_RAJA = @MFEM_USE_RAJA@
4949
MFEM_USE_OCCA = @MFEM_USE_OCCA@
5050
MFEM_USE_CEED = @MFEM_USE_CEED@
5151
MFEM_USE_UMPIRE = @MFEM_USE_UMPIRE@
52+
MFEM_USE_SIMD = @MFEM_USE_SIMD@
5253
MFEM_USE_ADIOS2 = @MFEM_USE_ADIOS2@
5354

5455
# Compiler, compile options, and link options
5556
MFEM_CXX = @MFEM_CXX@
57+
MFEM_HOST_CXX = @MFEM_HOST_CXX@
5658
MFEM_CPPFLAGS = @MFEM_CPPFLAGS@
5759
MFEM_CXXFLAGS = @MFEM_CXXFLAGS@
5860
MFEM_TPLFLAGS = @MFEM_TPLFLAGS@

config/defaults.cmake

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -49,6 +49,7 @@ option(MFEM_USE_OCCA "Enable OCCA" OFF)
4949
option(MFEM_USE_RAJA "Enable RAJA" OFF)
5050
option(MFEM_USE_CEED "Enable CEED" OFF)
5151
option(MFEM_USE_UMPIRE "Enable Umpire" OFF)
52+
option(MFEM_USE_SIMD "Enable use of SIMD intrinsics" ON)
5253
option(MFEM_USE_ADIOS2 "Enable ADIOS2" OFF)
5354

5455
set(MFEM_MPI_NP 4 CACHE STRING "Number of processes used for MPI tests")

config/defaults.mk

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -137,6 +137,7 @@ MFEM_USE_RAJA = NO
137137
MFEM_USE_OCCA = NO
138138
MFEM_USE_CEED = NO
139139
MFEM_USE_UMPIRE = NO
140+
MFEM_USE_SIMD = YES
140141
MFEM_USE_ADIOS2 = NO
141142

142143
# Compile and link options for zlib.

0 commit comments

Comments
 (0)