Skip to content

Commit 737159a

Browse files
authored
Merge pull request #118 from hartwork/fix-for-amd-k8
Fix for AMD K8
2 parents 3b57873 + f0b584d commit 737159a

File tree

7 files changed

+583
-5
lines changed

7 files changed

+583
-5
lines changed

resolve_march_native/engine.py

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -89,6 +89,7 @@ def _process_flags_explicit_has_more(self, target_set,
8989
march_explicit_flag_set):
9090
PREFIX_NO = '-mno-'
9191
PREFIX_YES = '-m'
92+
PREFIX_MTUNE = '-mtune='
9293

9394
explicit_more_flag_set = march_explicit_flag_set - march_native_flag_set
9495
for flag in explicit_more_flag_set:
@@ -97,7 +98,8 @@ def _process_flags_explicit_has_more(self, target_set,
9798
flag, file=sys.stderr)
9899
continue
99100

100-
if not flag.startswith(PREFIX_NO) and flag.startswith(PREFIX_YES):
101+
if not flag.startswith(PREFIX_NO) and flag.startswith(PREFIX_YES) \
102+
and not flag.startswith(PREFIX_MTUNE):
101103
# march=<explicit> enabled something (too much) that march=native disabled
102104
opposite_flag = PREFIX_NO + flag[len(PREFIX_YES):]
103105
target_set.add(opposite_flag)
Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,8 @@
1+
.file "tmp.5WRm9r3nVq.c"
2+
# GNU C17 (Gentoo 13.2.1_p20230826 p7) version 13.2.1 20230826 (x86_64-pc-linux-gnu)
3+
# compiled by GNU C version 13.2.1 20230826, GMP version 6.3.0, MPFR version 4.2.1, MPC version 1.3.1, isl version none
4+
# GGC heuristics: --param ggc-min-expand=100 --param ggc-min-heapsize=131072
5+
# options passed: -march=k8-sse3
6+
.text
7+
.ident "GCC: (Gentoo 13.2.1_p20230826 p7) 13.2.1 20230826"
8+
.section .note.GNU-stack,"",@progbits
Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,8 @@
1+
.file "tmp.Rh4WnkncPp.c"
2+
# GNU C17 (Gentoo 13.2.1_p20230826 p7) version 13.2.1 20230826 (x86_64-pc-linux-gnu)
3+
# compiled by GNU C version 13.2.1 20230826, GMP version 6.3.0, MPFR version 4.2.1, MPC version 1.3.1, isl version none
4+
# GGC heuristics: --param ggc-min-expand=100 --param ggc-min-heapsize=131072
5+
# options passed: -march=k8-sse3 -mmmx -mno-popcnt -msse -msse2 -msse3 -mno-ssse3 -mno-sse4.1 -mno-sse4.2 -mno-avx -mno-avx2 -mno-sse4a -mno-fma4 -mno-xop -mno-fma -mno-avx512f -mno-bmi -mno-bmi2 -mno-aes -mno-pclmul -mno-avx512vl -mno-avx512bw -mno-avx512dq -mno-avx512cd -mno-avx512er -mno-avx512pf -mno-avx512vbmi -mno-avx512ifma -mno-avx5124vnniw -mno-avx5124fmaps -mno-avx512vpopcntdq -mno-avx512vbmi2 -mno-gfni -mno-vpclmulqdq -mno-avx512vnni -mno-avx512bitalg -mno-avx512bf16 -mno-avx512vp2intersect -m3dnow -mno-adx -mno-abm -mno-cldemote -mno-clflushopt -mno-clwb -mno-clzero -mcx16 -mno-enqcmd -mno-f16c -mno-fsgsbase -mfxsr -mno-hle -msahf -mno-lwp -mno-lzcnt -mno-movbe -mno-movdir64b -mno-movdiri -mno-mwaitx -mno-pconfig -mno-pku -mno-prefetchwt1 -mprfchw -mno-ptwrite -mno-rdpid -mno-rdrnd -mno-rdseed -mno-rtm -mno-serialize -mno-sgx -mno-sha -mno-shstk -mno-tbm -mno-tsxldtrk -mno-vaes -mno-waitpkg -mno-wbnoinvd -mno-xsave -mno-xsavec -mno-xsaveopt -mno-xsaves -mno-amx-tile -mno-amx-int8 -mno-amx-bf16 -mno-uintr -mno-hreset -mno-kl -mno-widekl -mno-avxvnni -mno-avx512fp16 -mno-avxifma -mno-avxvnniint8 -mno-avxneconvert -mno-cmpccxadd -mno-amx-fp16 -mno-prefetchi -mno-raoint -mno-amx-complex --param=l1-cache-size=64 --param=l1-cache-line-size=64 --param=l2-cache-size=512 -mtune=k8
6+
.text
7+
.ident "GCC: (Gentoo 13.2.1_p20230826 p7) 13.2.1 20230826"
8+
.section .note.GNU-stack,"",@progbits
Lines changed: 270 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,270 @@
1+
The following options are target specific:
2+
-m128bit-long-double [enabled]
3+
-m16 [disabled]
4+
-m32 [disabled]
5+
-m3dnow [enabled]
6+
-m3dnowa [enabled]
7+
-m64 [enabled]
8+
-m80387 [enabled]
9+
-m8bit-idiv [disabled]
10+
-m96bit-long-double [disabled]
11+
-mabi= sysv
12+
-mabm [disabled]
13+
-maccumulate-outgoing-args [enabled]
14+
-maddress-mode= long
15+
-madx [disabled]
16+
-maes [disabled]
17+
-malign-data= compat
18+
-malign-double [disabled]
19+
-malign-functions= 0
20+
-malign-jumps= 0
21+
-malign-loops= 0
22+
-malign-stringops [enabled]
23+
-mamx-bf16 [disabled]
24+
-mamx-complex [disabled]
25+
-mamx-fp16 [disabled]
26+
-mamx-int8 [disabled]
27+
-mamx-tile [disabled]
28+
-mandroid [disabled]
29+
-march= k8-sse3
30+
-masm= att
31+
-mavx [disabled]
32+
-mavx2 [disabled]
33+
-mavx256-split-unaligned-load [disabled]
34+
-mavx256-split-unaligned-store [disabled]
35+
-mavx5124fmaps [disabled]
36+
-mavx5124vnniw [disabled]
37+
-mavx512bf16 [disabled]
38+
-mavx512bitalg [disabled]
39+
-mavx512bw [disabled]
40+
-mavx512cd [disabled]
41+
-mavx512dq [disabled]
42+
-mavx512er [disabled]
43+
-mavx512f [disabled]
44+
-mavx512fp16 [disabled]
45+
-mavx512ifma [disabled]
46+
-mavx512pf [disabled]
47+
-mavx512vbmi [disabled]
48+
-mavx512vbmi2 [disabled]
49+
-mavx512vl [disabled]
50+
-mavx512vnni [disabled]
51+
-mavx512vp2intersect [disabled]
52+
-mavx512vpopcntdq [disabled]
53+
-mavxifma [disabled]
54+
-mavxneconvert [disabled]
55+
-mavxvnni [disabled]
56+
-mavxvnniint8 [disabled]
57+
-mbionic [disabled]
58+
-mbmi [disabled]
59+
-mbmi2 [disabled]
60+
-mbranch-cost=<0,5> 3
61+
-mcall-ms2sysv-xlogues [disabled]
62+
-mcet-switch [disabled]
63+
-mcld [disabled]
64+
-mcldemote [disabled]
65+
-mclflushopt [disabled]
66+
-mclwb [disabled]
67+
-mclzero [disabled]
68+
-mcmodel= [default]
69+
-mcmpccxadd [disabled]
70+
-mcpu=
71+
-mcrc32 [disabled]
72+
-mcx16 [disabled]
73+
-mdaz-ftz [disabled]
74+
-mdirect-extern-access [enabled]
75+
-mdispatch-scheduler [disabled]
76+
-mdump-tune-features [disabled]
77+
-menqcmd [disabled]
78+
-mf16c [disabled]
79+
-mfancy-math-387 [enabled]
80+
-mfentry [disabled]
81+
-mfentry-name=
82+
-mfentry-section=
83+
-mfma [disabled]
84+
-mfma4 [disabled]
85+
-mforce-drap [disabled]
86+
-mforce-indirect-call [disabled]
87+
-mfp-ret-in-387 [enabled]
88+
-mfpmath= sse
89+
-mfsgsbase [disabled]
90+
-mfunction-return= keep
91+
-mfused-madd -ffp-contract=fast
92+
-mfxsr [enabled]
93+
-mgather -mtune-ctrl=use_gather
94+
-mgeneral-regs-only [disabled]
95+
-mgfni [disabled]
96+
-mglibc [enabled]
97+
-mhard-float [enabled]
98+
-mharden-sls= none
99+
-mhle [disabled]
100+
-mhreset [disabled]
101+
-miamcu [disabled]
102+
-mieee-fp [enabled]
103+
-mincoming-stack-boundary= 0
104+
-mindirect-branch-cs-prefix [disabled]
105+
-mindirect-branch-register [disabled]
106+
-mindirect-branch= keep
107+
-minline-all-stringops [disabled]
108+
-minline-stringops-dynamically [disabled]
109+
-minstrument-return= none
110+
-mintel-syntax -masm=intel
111+
-mkl [disabled]
112+
-mlam= none
113+
-mlarge-data-threshold=<number> 65536
114+
-mlong-double-128 [disabled]
115+
-mlong-double-64 [disabled]
116+
-mlong-double-80 [enabled]
117+
-mlwp [disabled]
118+
-mlzcnt [disabled]
119+
-mmanual-endbr [disabled]
120+
-mmemcpy-strategy=
121+
-mmemset-strategy=
122+
-mmitigate-rop [disabled]
123+
-mmmx [enabled]
124+
-mmovbe [disabled]
125+
-mmovdir64b [disabled]
126+
-mmovdiri [disabled]
127+
-mmove-max= 128
128+
-mmpx [disabled]
129+
-mms-bitfields [disabled]
130+
-mmusl [disabled]
131+
-mmwait [enabled]
132+
-mmwaitx [disabled]
133+
-mneeded [disabled]
134+
-mno-align-stringops [disabled]
135+
-mno-default [disabled]
136+
-mno-fancy-math-387 [disabled]
137+
-mno-push-args [disabled]
138+
-mno-red-zone [disabled]
139+
-mno-sse4 [enabled]
140+
-mnop-mcount [disabled]
141+
-momit-leaf-frame-pointer [disabled]
142+
-mpc32 [disabled]
143+
-mpc64 [disabled]
144+
-mpc80 [disabled]
145+
-mpclmul [disabled]
146+
-mpcommit [disabled]
147+
-mpconfig [disabled]
148+
-mpku [disabled]
149+
-mpopcnt [disabled]
150+
-mprefer-avx128 -mprefer-vector-width=128
151+
-mprefer-vector-width= none
152+
-mpreferred-stack-boundary= 0
153+
-mprefetchi [disabled]
154+
-mprefetchwt1 [disabled]
155+
-mprfchw [disabled]
156+
-mptwrite [disabled]
157+
-mpush-args [enabled]
158+
-mraoint [disabled]
159+
-mrdpid [disabled]
160+
-mrdrnd [disabled]
161+
-mrdseed [disabled]
162+
-mrecip [disabled]
163+
-mrecip=
164+
-mrecord-mcount [disabled]
165+
-mrecord-return [disabled]
166+
-mred-zone [enabled]
167+
-mregparm= 6
168+
-mrelax-cmpxchg-loop [disabled]
169+
-mrtd [disabled]
170+
-mrtm [disabled]
171+
-msahf [disabled]
172+
-mscatter -mtune-ctrl=use_scatter
173+
-mserialize [disabled]
174+
-msgx [disabled]
175+
-msha [disabled]
176+
-mshstk [disabled]
177+
-mskip-rax-setup [disabled]
178+
-msoft-float [disabled]
179+
-msse [enabled]
180+
-msse2 [enabled]
181+
-msse2avx [disabled]
182+
-msse3 [enabled]
183+
-msse4 [disabled]
184+
-msse4.1 [disabled]
185+
-msse4.2 [disabled]
186+
-msse4a [disabled]
187+
-msse5 -mavx
188+
-msseregparm [disabled]
189+
-mssse3 [disabled]
190+
-mstack-arg-probe [disabled]
191+
-mstack-protector-guard-offset=
192+
-mstack-protector-guard-reg=
193+
-mstack-protector-guard-symbol=
194+
-mstack-protector-guard= tls
195+
-mstackrealign [disabled]
196+
-mstore-max= 128
197+
-mstringop-strategy= [default]
198+
-mstv [enabled]
199+
-mtbm [disabled]
200+
-mtls-dialect= gnu
201+
-mtls-direct-seg-refs [enabled]
202+
-mtsxldtrk [disabled]
203+
-mtune-ctrl=
204+
-mtune= k8-sse3
205+
-muclibc [disabled]
206+
-muintr [disabled]
207+
-munroll-only-small-loops [disabled]
208+
-mvaes [disabled]
209+
-mveclibabi= [default]
210+
-mvect8-ret-in-mem [disabled]
211+
-mvpclmulqdq [disabled]
212+
-mvzeroupper [disabled]
213+
-mwaitpkg [disabled]
214+
-mwbnoinvd [disabled]
215+
-mwidekl [disabled]
216+
-mx32 [disabled]
217+
-mxop [disabled]
218+
-mxsave [disabled]
219+
-mxsavec [disabled]
220+
-mxsaveopt [disabled]
221+
-mxsaves [disabled]
222+
223+
Known assembler dialects (for use with the -masm= option):
224+
att intel
225+
226+
Known ABIs (for use with the -mabi= option):
227+
ms sysv
228+
229+
Known code models (for use with the -mcmodel= option):
230+
32 kernel large medium small
231+
232+
Valid arguments to -mfpmath=:
233+
387 387+sse 387,sse both sse sse+387 sse,387
234+
235+
Known choices for mitigation against straight line speculation with -mharden-sls=:
236+
all indirect-jmp none return
237+
238+
Known indirect branch choices (for use with the -mindirect-branch=/-mfunction-return= options):
239+
keep thunk thunk-extern thunk-inline
240+
241+
Known choices for return instrumentation with -minstrument-return=:
242+
call none nop5
243+
244+
Known data alignment choices (for use with the -malign-data= option):
245+
abi cacheline compat
246+
247+
Known vectorization library ABIs (for use with the -mveclibabi= option):
248+
acml svml
249+
250+
Known address mode (for use with the -maddress-mode= option):
251+
long short
252+
253+
Known preferred register vector length (to use with the -mprefer-vector-width= option):
254+
128 256 512 none
255+
256+
Known stack protector guard (for use with the -mstack-protector-guard= option):
257+
global tls
258+
259+
Valid arguments to -mstringop-strategy=:
260+
byte_loop libcall loop rep_4byte rep_8byte rep_byte unrolled_loop vector_loop
261+
262+
Known TLS dialects (for use with the -mtls-dialect= option):
263+
gnu gnu2
264+
265+
Known valid arguments for -march= option:
266+
i386 i486 i586 pentium lakemont pentium-mmx winchip-c6 winchip2 c3 samuel-2 c3-2 nehemiah c7 esther i686 pentiumpro pentium2 pentium3 pentium3m pentium-m pentium4 pentium4m prescott nocona core2 nehalem corei7 westmere sandybridge corei7-avx ivybridge core-avx-i haswell core-avx2 broadwell skylake skylake-avx512 cannonlake icelake-client rocketlake icelake-server cascadelake tigerlake cooperlake sapphirerapids emeraldrapids alderlake raptorlake meteorlake graniterapids graniterapids-d bonnell atom silvermont slm goldmont goldmont-plus tremont gracemont sierraforest grandridge knl knm intel geode k6 k6-2 k6-3 athlon athlon-tbird athlon-4 athlon-xp athlon-mp x86-64 x86-64-v2 x86-64-v3 x86-64-v4 eden-x2 nano nano-1000 nano-2000 nano-3000 nano-x2 eden-x4 nano-x4 lujiazui k8 k8-sse3 opteron opteron-sse3 athlon64 athlon64-sse3 athlon-fx amdfam10 barcelona bdver1 bdver2 bdver3 bdver4 znver1 znver2 znver3 znver4 btver1 btver2 generic native
267+
268+
Known valid arguments for -mtune= option:
269+
generic i386 i486 pentium lakemont pentiumpro pentium4 nocona core2 nehalem sandybridge haswell bonnell silvermont goldmont goldmont-plus tremont sierraforest grandridge knl knm skylake skylake-avx512 cannonlake icelake-client icelake-server cascadelake tigerlake cooperlake sapphirerapids alderlake rocketlake graniterapids graniterapids-d intel lujiazui geode k6 athlon k8 amdfam10 bdver1 bdver2 bdver3 bdver4 btver1 btver2 znver1 znver2 znver3 znver4
270+

0 commit comments

Comments
 (0)