1- ; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -mcpu=verde -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,SI,FUNC %s
2- ; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -mcpu=fiji -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,VI,GFX89,FUNC %s
3- ; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,GFX9,GFX89,FUNC %s
4- ; RUN: llc -amdgpu-scalarize-global-loads=false -march=r600 -mcpu=redwood -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=EG,FUNC %s
1+ ; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -mcpu=verde -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,SI %s
2+ ; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -mcpu=fiji -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,VI,GFX89 %s
3+ ; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,GFX9,GFX89 %s
54
6- declare i32 @llvm.r600.read.tidig .x () readnone
5+ declare i32 @llvm.amdgcn.workitem.id .x () nounwind readnone speculatable
76
8- ; FUNC -LABEL: {{^}}s_sub_i32:
7+ ; GCN -LABEL: {{^}}s_sub_i32:
98; GCN: s_load_dwordx2
109; GCN: s_load_dwordx2 s{{\[}}[[A:[0-9]+]]:[[B:[0-9]+]]{{\]}}
1110; GCN: s_sub_i32 s{{[0-9]+}}, s[[A]], s[[B]]
@@ -15,7 +14,7 @@ define amdgpu_kernel void @s_sub_i32(i32 addrspace(1)* %out, i32 %a, i32 %b) {
1514 ret void
1615}
1716
18- ; FUNC -LABEL: {{^}}s_sub_imm_i32:
17+ ; GCN -LABEL: {{^}}s_sub_imm_i32:
1918; GCN: s_load_dword [[A:s[0-9]+]]
2019; GCN: s_sub_i32 s{{[0-9]+}}, 0x4d2, [[A]]
2120define amdgpu_kernel void @s_sub_imm_i32 (i32 addrspace (1 )* %out , i32 %a ) {
@@ -24,9 +23,7 @@ define amdgpu_kernel void @s_sub_imm_i32(i32 addrspace(1)* %out, i32 %a) {
2423 ret void
2524}
2625
27- ; FUNC-LABEL: {{^}}test_sub_i32:
28- ; EG: SUB_INT {{\** *}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
29-
26+ ; GCN-LABEL: {{^}}test_sub_i32:
3027; SI: v_subrev_i32_e32 v{{[0-9]+, vcc, v[0-9]+, v[0-9]+}}
3128; GFX9: v_sub_u32_e32 v{{[0-9]+, v[0-9]+, v[0-9]+}}
3229define amdgpu_kernel void @test_sub_i32 (i32 addrspace (1 )* %out , i32 addrspace (1 )* %in ) {
@@ -38,9 +35,7 @@ define amdgpu_kernel void @test_sub_i32(i32 addrspace(1)* %out, i32 addrspace(1)
3835 ret void
3936}
4037
41- ; FUNC-LABEL: {{^}}test_sub_imm_i32:
42- ; EG: SUB_INT
43-
38+ ; GCN-LABEL: {{^}}test_sub_imm_i32:
4439; SI: v_sub_i32_e32 v{{[0-9]+}}, vcc, 0x7b, v{{[0-9]+}}
4540; GFX9: v_sub_u32_e32 v{{[0-9]+}}, 0x7b, v{{[0-9]+}}
4641define amdgpu_kernel void @test_sub_imm_i32 (i32 addrspace (1 )* %out , i32 addrspace (1 )* %in ) {
@@ -50,10 +45,7 @@ define amdgpu_kernel void @test_sub_imm_i32(i32 addrspace(1)* %out, i32 addrspac
5045 ret void
5146}
5247
53- ; FUNC-LABEL: {{^}}test_sub_v2i32:
54- ; EG: SUB_INT {{\** *}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
55- ; EG: SUB_INT {{\** *}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
56-
48+ ; GCN-LABEL: {{^}}test_sub_v2i32:
5749; SI: v_sub_i32_e32 v{{[0-9]+, vcc, v[0-9]+, v[0-9]+}}
5850; SI: v_sub_i32_e32 v{{[0-9]+, vcc, v[0-9]+, v[0-9]+}}
5951
@@ -68,12 +60,7 @@ define amdgpu_kernel void @test_sub_v2i32(<2 x i32> addrspace(1)* %out, <2 x i32
6860 ret void
6961}
7062
71- ; FUNC-LABEL: {{^}}test_sub_v4i32:
72- ; EG: SUB_INT {{\** *}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
73- ; EG: SUB_INT {{\** *}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
74- ; EG: SUB_INT {{\** *}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
75- ; EG: SUB_INT {{\** *}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
76-
63+ ; GCN-LABEL: {{^}}test_sub_v4i32:
7764; SI: v_sub_i32_e32 v{{[0-9]+, vcc, v[0-9]+, v[0-9]+}}
7865; SI: v_sub_i32_e32 v{{[0-9]+, vcc, v[0-9]+, v[0-9]+}}
7966; SI: v_sub_i32_e32 v{{[0-9]+, vcc, v[0-9]+, v[0-9]+}}
@@ -92,11 +79,11 @@ define amdgpu_kernel void @test_sub_v4i32(<4 x i32> addrspace(1)* %out, <4 x i32
9279 ret void
9380}
9481
95- ; FUNC -LABEL: {{^}}test_sub_i16:
82+ ; GCN -LABEL: {{^}}test_sub_i16:
9683; SI: v_sub_i32_e32 v{{[0-9]+}}, vcc,
9784; GFX89: v_sub_u16_e32 v{{[0-9]+, v[0-9]+, v[0-9]+}}
9885define amdgpu_kernel void @test_sub_i16 (i16 addrspace (1 )* %out , i16 addrspace (1 )* %in ) {
99- %tid = call i32 @llvm.r600.read.tidig .x ()
86+ %tid = call i32 @llvm.amdgcn.workitem.id .x ()
10087 %gep = getelementptr i16 , i16 addrspace (1 )* %in , i32 %tid
10188 %b_ptr = getelementptr i16 , i16 addrspace (1 )* %gep , i32 1
10289 %a = load volatile i16 , i16 addrspace (1 )* %gep
@@ -106,13 +93,13 @@ define amdgpu_kernel void @test_sub_i16(i16 addrspace(1)* %out, i16 addrspace(1)
10693 ret void
10794}
10895
109- ; FUNC -LABEL: {{^}}test_sub_v2i16:
96+ ; GCN -LABEL: {{^}}test_sub_v2i16:
11097; VI: v_sub_u16_e32 v{{[0-9]+, v[0-9]+, v[0-9]+}}
11198; VI: v_sub_u16_sdwa v{{[0-9]+, v[0-9]+, v[0-9]+}}
11299
113100; GFX9: v_pk_sub_i16
114101define amdgpu_kernel void @test_sub_v2i16 (<2 x i16 > addrspace (1 )* %out , <2 x i16 > addrspace (1 )* %in ) {
115- %tid = call i32 @llvm.r600.read.tidig .x ()
102+ %tid = call i32 @llvm.amdgcn.workitem.id .x ()
116103 %gep = getelementptr <2 x i16 >, <2 x i16 > addrspace (1 )* %in , i32 %tid
117104 %b_ptr = getelementptr <2 x i16 >, <2 x i16 > addrspace (1 )* %gep , i16 1
118105 %a = load <2 x i16 >, <2 x i16 > addrspace (1 )* %gep
@@ -122,7 +109,7 @@ define amdgpu_kernel void @test_sub_v2i16(<2 x i16> addrspace(1)* %out, <2 x i16
122109 ret void
123110}
124111
125- ; FUNC -LABEL: {{^}}test_sub_v4i16:
112+ ; GCN -LABEL: {{^}}test_sub_v4i16:
126113; VI: v_sub_u16_e32 v{{[0-9]+, v[0-9]+, v[0-9]+}}
127114; VI: v_sub_u16_sdwa v{{[0-9]+, v[0-9]+, v[0-9]+}}
128115; VI: v_sub_u16_e32 v{{[0-9]+, v[0-9]+, v[0-9]+}}
@@ -131,7 +118,7 @@ define amdgpu_kernel void @test_sub_v2i16(<2 x i16> addrspace(1)* %out, <2 x i16
131118; GFX9: v_pk_sub_i16
132119; GFX9: v_pk_sub_i16
133120define amdgpu_kernel void @test_sub_v4i16 (<4 x i16 > addrspace (1 )* %out , <4 x i16 > addrspace (1 )* %in ) {
134- %tid = call i32 @llvm.r600.read.tidig .x ()
121+ %tid = call i32 @llvm.amdgcn.workitem.id .x ()
135122 %gep = getelementptr <4 x i16 >, <4 x i16 > addrspace (1 )* %in , i32 %tid
136123 %b_ptr = getelementptr <4 x i16 >, <4 x i16 > addrspace (1 )* %gep , i16 1
137124 %a = load <4 x i16 >, <4 x i16 > addrspace (1 ) * %gep
@@ -141,22 +128,16 @@ define amdgpu_kernel void @test_sub_v4i16(<4 x i16> addrspace(1)* %out, <4 x i16
141128 ret void
142129}
143130
144- ; FUNC -LABEL: {{^}}s_sub_i64:
131+ ; GCN -LABEL: {{^}}s_sub_i64:
145132; GCN: s_sub_u32
146133; GCN: s_subb_u32
147-
148- ; EG: MEM_RAT_CACHELESS STORE_RAW T{{[0-9]+}}.XY
149- ; EG-DAG: SUB_INT {{[* ]*}}
150- ; EG-DAG: SUBB_UINT
151- ; EG-DAG: SUB_INT
152- ; EG-DAG: SUB_INT {{[* ]*}}
153134define amdgpu_kernel void @s_sub_i64 (i64 addrspace (1 )* noalias %out , i64 %a , i64 %b ) nounwind {
154135 %result = sub i64 %a , %b
155136 store i64 %result , i64 addrspace (1 )* %out , align 8
156137 ret void
157138}
158139
159- ; FUNC -LABEL: {{^}}v_sub_i64:
140+ ; GCN -LABEL: {{^}}v_sub_i64:
160141; SI: v_sub_i32_e32
161142; SI: v_subb_u32_e32
162143
@@ -165,14 +146,8 @@ define amdgpu_kernel void @s_sub_i64(i64 addrspace(1)* noalias %out, i64 %a, i64
165146
166147; GFX9: v_sub_co_u32_e32
167148; GFX9: v_subb_co_u32_e32
168-
169- ; EG: MEM_RAT_CACHELESS STORE_RAW T{{[0-9]+}}.XY
170- ; EG-DAG: SUB_INT {{[* ]*}}
171- ; EG-DAG: SUBB_UINT
172- ; EG-DAG: SUB_INT
173- ; EG-DAG: SUB_INT {{[* ]*}}
174149define amdgpu_kernel void @v_sub_i64 (i64 addrspace (1 )* noalias %out , i64 addrspace (1 )* noalias %inA , i64 addrspace (1 )* noalias %inB ) nounwind {
175- %tid = call i32 @llvm.r600.read.tidig .x () readnone
150+ %tid = call i32 @llvm.amdgcn.workitem.id .x () readnone
176151 %a_ptr = getelementptr i64 , i64 addrspace (1 )* %inA , i32 %tid
177152 %b_ptr = getelementptr i64 , i64 addrspace (1 )* %inB , i32 %tid
178153 %a = load i64 , i64 addrspace (1 )* %a_ptr
@@ -182,7 +157,7 @@ define amdgpu_kernel void @v_sub_i64(i64 addrspace(1)* noalias %out, i64 addrspa
182157 ret void
183158}
184159
185- ; FUNC -LABEL: {{^}}v_test_sub_v2i64:
160+ ; GCN -LABEL: {{^}}v_test_sub_v2i64:
186161; SI: v_sub_i32_e32 v{{[0-9]+}}, vcc,
187162; SI: v_subb_u32_e32 v{{[0-9]+}}, vcc,
188163; SI: v_sub_i32_e32 v{{[0-9]+}}, vcc,
@@ -198,7 +173,7 @@ define amdgpu_kernel void @v_sub_i64(i64 addrspace(1)* noalias %out, i64 addrspa
198173; GFX9: v_sub_co_u32_e32 v{{[0-9]+}}, vcc,
199174; GFX9: v_subb_co_u32_e32 v{{[0-9]+}}, vcc,
200175define amdgpu_kernel void @v_test_sub_v2i64 (<2 x i64 > addrspace (1 )* %out , <2 x i64 > addrspace (1 )* noalias %inA , <2 x i64 > addrspace (1 )* noalias %inB ) {
201- %tid = call i32 @llvm.r600.read.tidig .x () readnone
176+ %tid = call i32 @llvm.amdgcn.workitem.id .x () readnone
202177 %a_ptr = getelementptr <2 x i64 >, <2 x i64 > addrspace (1 )* %inA , i32 %tid
203178 %b_ptr = getelementptr <2 x i64 >, <2 x i64 > addrspace (1 )* %inB , i32 %tid
204179 %a = load <2 x i64 >, <2 x i64 > addrspace (1 )* %a_ptr
@@ -208,7 +183,7 @@ define amdgpu_kernel void @v_test_sub_v2i64(<2 x i64> addrspace(1)* %out, <2 x i
208183 ret void
209184}
210185
211- ; FUNC -LABEL: {{^}}v_test_sub_v4i64:
186+ ; GCN -LABEL: {{^}}v_test_sub_v4i64:
212187; SI: v_sub_i32_e32 v{{[0-9]+}}, vcc,
213188; SI: v_subb_u32_e32 v{{[0-9]+}}, vcc,
214189; SI: v_sub_i32_e32 v{{[0-9]+}}, vcc,
@@ -236,7 +211,7 @@ define amdgpu_kernel void @v_test_sub_v2i64(<2 x i64> addrspace(1)* %out, <2 x i
236211; GFX9: v_sub_co_u32_e32 v{{[0-9]+}}, vcc,
237212; GFX9: v_subb_co_u32_e32 v{{[0-9]+}}, vcc,
238213define amdgpu_kernel void @v_test_sub_v4i64 (<4 x i64 > addrspace (1 )* %out , <4 x i64 > addrspace (1 )* noalias %inA , <4 x i64 > addrspace (1 )* noalias %inB ) {
239- %tid = call i32 @llvm.r600.read.tidig .x () readnone
214+ %tid = call i32 @llvm.amdgcn.workitem.id .x () readnone
240215 %a_ptr = getelementptr <4 x i64 >, <4 x i64 > addrspace (1 )* %inA , i32 %tid
241216 %b_ptr = getelementptr <4 x i64 >, <4 x i64 > addrspace (1 )* %inB , i32 %tid
242217 %a = load <4 x i64 >, <4 x i64 > addrspace (1 )* %a_ptr
@@ -245,3 +220,22 @@ define amdgpu_kernel void @v_test_sub_v4i64(<4 x i64> addrspace(1)* %out, <4 x i
245220 store <4 x i64 > %result , <4 x i64 > addrspace (1 )* %out
246221 ret void
247222}
223+
224+ ; Make sure the VOP3 form of sub is initially selected. Otherwise pair
225+ ; of opies from/to VCC would be necessary
226+
227+ ; GCN-LABEL: {{^}}sub_select_vop3:
228+ ; SI: v_subrev_i32_e64 v0, s[0:1], s0, v0
229+ ; VI: v_subrev_u32_e64 v0, s[0:1], s0, v0
230+ ; GFX9: v_subrev_u32_e32 v0, s0, v0
231+
232+ ; GCN: ; def vcc
233+ ; GCN: ds_write_b32
234+ ; GCN: ; use vcc
235+ define amdgpu_ps void @sub_select_vop3 (i32 inreg %s , i32 %v ) {
236+ %vcc = call i64 asm sideeffect "; def vcc" , "={vcc}" ()
237+ %sub = sub i32 %v , %s
238+ store i32 %sub , i32 addrspace (3 )* undef
239+ call void asm sideeffect "; use vcc" , "{vcc}" (i64 %vcc )
240+ ret void
241+ }
0 commit comments