Skip to content

Commit 57cdd2b

Browse files
committed
Remove old snapc-amp directory, replace with new snap-hcc directory containing
an hcc port of the University of Bristol Snap-opencl-mpi code. Support hcc-lc compiler, use arrays or array view, support discrete GPU.
1 parent bd56de0 commit 57cdd2b

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

66 files changed

+6696
-10583
lines changed

snap-hcc/Doxyfile

Lines changed: 2362 additions & 0 deletions
Large diffs are not rendered by default.

snap-hcc/LICENSE.txt

Lines changed: 57 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,57 @@
1+
Copyright (c) 2016 Advanced Micro Devices, Inc.
2+
3+
All rights reserved.
4+
5+
Redistribution and use in source and binary forms, with or without modification,
6+
are permitted provided that the following conditions are met:
7+
8+
1. Redistributions of source code must retain the above copyright notice, this
9+
list of conditions and the following disclaimer.
10+
11+
2. Redistributions in binary form must reproduce the above copyright notice,
12+
this list of conditions and the following disclaimer in the documentation
13+
and/or other materials provided with the distribution.
14+
15+
3. Neither the name of the copyright holder nor the names of its contributors
16+
may be used to endorse or promote products derived from this software without
17+
specific prior written permission.
18+
19+
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
20+
ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
21+
WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
22+
DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
23+
FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
24+
DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
25+
SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
26+
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
27+
OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
28+
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
29+
30+
31+
Copyright (c) 2016, Tom Deakin, University of Bristol.
32+
All rights reserved.
33+
34+
Redistribution and use in source and binary forms, with or without
35+
modification, are permitted provided that the following conditions are met:
36+
37+
1. Redistributions of source code must retain the above copyright notice, this
38+
list of conditions and the following disclaimer.
39+
40+
2. Redistributions in binary form must reproduce the above copyright notice,
41+
this list of conditions and the following disclaimer in the documentation
42+
and/or other materials provided with the distribution.
43+
44+
3. Neither the name of the copyright holder nor the names of its contributors
45+
may be used to endorse or promote products derived from this software without
46+
specific prior written permission.
47+
48+
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
49+
ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
50+
WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
51+
DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
52+
FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
53+
DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
54+
SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
55+
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
56+
OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
57+
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

snap-hcc/src/Makefile

Lines changed: 79 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,79 @@
1+
# This Makefile is for building GPU versions of SNAP.
2+
# Use MODEL=LC (default) for discrete GPU,
3+
# MODEL=HSAIL for APU;
4+
# otherwise MODEL will be set to OLD
5+
# and we'll build for a pre-ROCM dGPU.
6+
7+
MODEL ?= LC
8+
ifeq ($(MODEL),LC)
9+
# dGPU version
10+
$(info building for discrete GPU with ROCM)
11+
CPPAMP_BUILD = /opt/rocm/hcc-lc
12+
HCC_LC = true
13+
else
14+
ifeq ($(MODEL),HSAIL)
15+
# APU version
16+
$(info building for APU with ROCM)
17+
CPPAMP_BUILD = /opt/rocm/hcc-hsail
18+
HCC_LC = false
19+
else
20+
$(info building for pre-ROCM discrete GPU)
21+
override MODEL=OLD
22+
# Pre-ROCM version
23+
CPPAMP_BUILD = /opt/hcc
24+
HCC_LC = true
25+
endif
26+
endif
27+
28+
CLANG = $(CPPAMP_BUILD)/bin/hcc
29+
CLANGCFLAGS = $(shell $(CPPAMP_BUILD)/bin/hcc-config --cxxflags)
30+
ifeq ($(HCC_LC),true)
31+
CLANGCFLAGS += -DUSE_HCC_LC
32+
endif
33+
34+
CLANGLFLAGS = $(shell $(CPPAMP_BUILD)/bin/hcc-config --ldflags)
35+
CLANGFLAGS = -I. -g -O3 -march=native \
36+
$(CLANGCFLAGS) \
37+
-ferror-limit=1 \
38+
-I/usr/lib/openmpi/include
39+
40+
CLANGLNKFLAGS = $(CLANGLFLAGS) -g -lm
41+
42+
43+
44+
MPILIBS = -L/usr/lib/openmpi/lib -lmpi
45+
46+
47+
SRCS = snap_main.cc \
48+
input.cc \
49+
allocate.cc \
50+
comms.cc \
51+
problem.cc \
52+
source.cc \
53+
sweep.cc \
54+
buffers.cc \
55+
scalar_flux.cc \
56+
convergence.cc \
57+
population.cc \
58+
profiler.cc
59+
60+
OBJS = $(SRCS:%.cc=objs/%.o)
61+
all: objs snap
62+
63+
objs:
64+
mkdir objs
65+
66+
snap: $(OBJS)
67+
$(CLANG) $(LDFLAGS) $(CLANGLNKFLAGS) $(MPILIBS) $^ -o $@
68+
69+
objs/snap_main.o: snap_main.cc hcc_arrays.h hcc_planes.h
70+
$(CLANG) $(CLANGFLAGS) -c snap_main.cc -o $@
71+
72+
objs/%.o: %.cc
73+
$(CLANG) $(CLANGFLAGS) -c $< -o $@
74+
75+
76+
.PHONY: clean
77+
78+
clean:
79+
rm -f snap objs/*o

snap-hcc/src/README.AMD

Lines changed: 240 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,240 @@
1+
This SNAP code is ported to AMD's HCC programming environment to support
2+
execution on accelerator devices, specifically GPU's.
3+
4+
This HCC port started with the OpenCL C code contributed by Tom Deakin of
5+
the University of Bristol. That code is found at:
6+
https://github.com/UoB-HPC/SNAP_MPI_OpenCL
7+
8+
Minor modifications are made to the code before the HCC porting effort.
9+
These changes include adding a inner loop counter and print statement to more
10+
closely match the original SNAP fortran code, resolving an issue with
11+
correctness of the first iteration, and arranging some parameter defaults to
12+
more closely match the original fortran. All of these changes were primarily
13+
for valiadation purposes so that the OpenCL output would match the fortran
14+
output for a nearly matching input file. After these changes, it is possible
15+
to directly compare the OpenCL output to the Fortran output.
16+
17+
Next the code was ported to the AMD HCC environment.
18+
19+
These ports parallelize and offload key parts of the SNAP program,
20+
including the dim3 sweep inner loop, scalar flux and moment calculations,
21+
and inner and outer source computations.
22+
23+
The compiler used is an experimental clang compiler. This compiler accepts
24+
HCC (c++amp) and emits code suitable for a GPU target device, in particular AMD
25+
APUs and recent AMD discrete GPUs. The compiler revision in use at the time of
26+
writing is from package hcc_hsail 0.10.16253-6ceea64-ec648b0.
27+
28+
The primary GPU work arrangement is to run the xyz space coordinates at the
29+
team level, and groups*angles at the thread level.
30+
31+
Reductions are fully working on a workgroup basis. The reductions are all
32+
computed accross the number of angles. Since the code inplements these
33+
reductions only across a workgroup, the NANG parameter is limited to the size
34+
of a workgroup, which is 1024.
35+
36+
37+
The Makefile builds on our AMD system just by typing "make" and builds the
38+
target exe snap.
39+
To run, we use the command:
40+
snap snap_input
41+
42+
Sample output:
43+
SNAP: SN (Discrete Ordinates) Application Proxy
44+
MPI+HCC port
45+
Run on Wed Aug 17 09:36:10 2016
46+
47+
48+
********************************************************
49+
Input Parameters
50+
********************************************************
51+
Geometry
52+
Problem size: 0.100 x 0.100 x 0.100
53+
Cells: 8 x 8 x 8
54+
Cell size: 0.013 x 0.013 x 0.013
55+
56+
Discrete Ordinates
57+
Angles per octant: 64
58+
Moments: 2
59+
"Computational" moments: 4
60+
61+
Energy groups
62+
Number of groups: 30
63+
64+
Timesteps
65+
Timesteps: 10
66+
Simulation time: 0.100
67+
Time delta: 0.010
68+
69+
Iterations
70+
Max outers per timestep: 10
71+
Max inners per outer: 5
72+
Stopping criteria
73+
Inner convergence: 1.00E-04
74+
Outer convergence: 1.00E-02
75+
76+
MPI decomposition
77+
Rank layout: 1 x 1 x 1
78+
Chunk size: 8
79+
80+
device : AMD HSA Agent Kaveri0
81+
tile static memory: 65536
82+
required memory: 131MB
83+
********************************************************
84+
Iteration Monitor
85+
********************************************************
86+
Timestep 0
87+
Outer Difference Inners
88+
0 5.0678e-02 3
89+
1 4.1302e-02 3
90+
2 7.4568e-04 2
91+
92+
Timestep= 0 No. Outers= 3 No. Inners= 196
93+
94+
Population: 0.00
95+
96+
Timestep 1
97+
Outer Difference Inners
98+
0 7.2102e-02 3
99+
1 2.9102e-02 2
100+
2 4.3693e-04 2
101+
102+
Timestep= 1 No. Outers= 3 No. Inners= 195
103+
104+
Population: 0.00
105+
106+
Timestep 2
107+
Outer Difference Inners
108+
0 6.8614e-02 3
109+
1 1.9153e-02 2
110+
2 2.3149e-04 2
111+
112+
Timestep= 2 No. Outers= 3 No. Inners= 195
113+
114+
Population: 0.00
115+
116+
Timestep 3
117+
Outer Difference Inners
118+
0 6.7457e-02 3
119+
1 1.6605e-02 2
120+
2 1.8773e-04 2
121+
122+
Timestep= 3 No. Outers= 3 No. Inners= 195
123+
124+
Population: 0.00
125+
126+
Timestep 4
127+
Outer Difference Inners
128+
0 6.6755e-02 3
129+
1 1.6078e-02 2
130+
2 1.9246e-04 2
131+
132+
Timestep= 4 No. Outers= 3 No. Inners= 195
133+
134+
Population: 0.00
135+
136+
Timestep 5
137+
Outer Difference Inners
138+
0 6.4709e-02 3
139+
1 1.5481e-02 2
140+
2 1.8692e-04 2
141+
142+
Timestep= 5 No. Outers= 3 No. Inners= 195
143+
144+
Population: 0.00
145+
146+
Timestep 6
147+
Outer Difference Inners
148+
0 6.4372e-02 3
149+
1 1.6188e-02 2
150+
2 1.9009e-04 2
151+
152+
Timestep= 6 No. Outers= 3 No. Inners= 195
153+
154+
Population: 0.00
155+
156+
Timestep 7
157+
Outer Difference Inners
158+
0 6.6715e-02 3
159+
1 1.5658e-02 2
160+
2 1.8843e-04 2
161+
162+
Timestep= 7 No. Outers= 3 No. Inners= 195
163+
164+
Population: 0.00
165+
166+
Timestep 8
167+
Outer Difference Inners
168+
0 6.7384e-02 3
169+
1 1.5943e-02 2
170+
2 1.8962e-04 2
171+
172+
Timestep= 8 No. Outers= 3 No. Inners= 195
173+
174+
Population: 0.00
175+
176+
Timestep 9
177+
Outer Difference Inners
178+
0 6.7405e-02 3
179+
1 1.5835e-02 2
180+
2 1.8873e-04 2
181+
182+
Timestep= 9 No. Outers= 3 No. Inners= 194
183+
184+
Population: 0.00
185+
186+
187+
********************************************************
188+
Timing Report
189+
********************************************************
190+
Setup 0.020s
191+
Outer source 0.000s
192+
Outer parameters 0.000s
193+
Inner source 0.000s
194+
Sweeps 5.208s
195+
MPI Send time 0.000s
196+
MPI Recv time 0.076s
197+
PCIe transfer time 0.000s
198+
Compute time 5.132s
199+
Scalar flux reductions 0.000s
200+
Convergence checking 0.013s
201+
Other 2.223s
202+
Total simulation 7.444s
203+
204+
Grind time 13.332ns
205+
********************************************************
206+
207+
snap_input:
208+
! Input from namelist
209+
&invar
210+
nthreads=1
211+
nnested=1
212+
npex=1
213+
npey=1
214+
npez=1
215+
ndimen=3
216+
nx=8
217+
lx=0.1
218+
ny=8
219+
ly=0.1
220+
nz=8
221+
lz=0.1
222+
ichunk=8
223+
nmom=2
224+
nang=64
225+
ng=30
226+
mat_opt=0
227+
src_opt=0
228+
timedep=1
229+
it_det=0
230+
tf=0.1
231+
nsteps=10
232+
iitm=5
233+
oitm=10
234+
epsi=1.E-4
235+
fluxp=0
236+
scatp=0
237+
fixup=1
238+
angcpy=2
239+
/
240+

0 commit comments

Comments
 (0)