@@ -6,10 +6,16 @@ groupshared int64_t gs[256];
6
6
RWBuffer <int64_t> tb;
7
7
RWStructuredBuffer <int64_t> sb;
8
8
RWByteAddressBuffer rb;
9
+ RWTexture1D <int64_t> tex1;
10
+ RWTexture2D <int64_t> tex2;
11
+ RWTexture3D <int64_t> tex3;
9
12
10
13
groupshared uint64_t ugs[256 ];
11
14
RWBuffer <uint64_t> utb;
12
15
RWStructuredBuffer <uint64_t> usb;
16
+ RWTexture1D <uint64_t> utex1;
17
+ RWTexture2D <uint64_t> utex2;
18
+ RWTexture3D <uint64_t> utex3;
13
19
14
20
[numthreads (1 ,1 ,1 )]
15
21
void main ( uint3 gtid : SV_GroupThreadID )
@@ -18,70 +24,171 @@ void main( uint3 gtid : SV_GroupThreadID)
18
24
uint b = gtid.y;
19
25
uint64_t luv = a * b;
20
26
int64_t liv = a + b;
27
+ int64_t liv2 = 0 , liv3 = 0 ;
21
28
uint ix = 0 ;
22
29
23
- // GSCHECK: atomicrmw add i64
30
+ // CHECK: atomicrmw add i64
31
+ // CHECK: call i64 @dx.op.atomicBinOp.i64(i32 78, %dx.types.Handle %{{[0-9]*}}, i32 0
32
+ // CHECK: call i64 @dx.op.atomicBinOp.i64(i32 78, %dx.types.Handle %{{[0-9]*}}, i32 0
33
+ // CHECK: call i64 @dx.op.atomicBinOp.i64(i32 78, %dx.types.Handle %{{[0-9]*}}, i32 0
34
+ // CHECK: call i64 @dx.op.atomicBinOp.i64(i32 78, %dx.types.Handle %{{[0-9]*}}, i32 0
24
35
// CHECK: call i64 @dx.op.atomicBinOp.i64(i32 78, %dx.types.Handle %{{[0-9]*}}, i32 0
25
36
// CHECK: call i64 @dx.op.atomicBinOp.i64(i32 78, %dx.types.Handle %{{[0-9]*}}, i32 0
26
37
InterlockedAdd ( gs[a], liv );
27
38
InterlockedAdd ( tb[a], liv );
28
39
InterlockedAdd ( sb[a], liv );
29
- rb.InterlockedAdd ( ix++, liv );
40
+ InterlockedAdd ( tex1[a], liv );
41
+ InterlockedAdd ( tex2[gtid.xy], liv );
42
+ InterlockedAdd ( tex3[gtid], liv );
43
+ rb.InterlockedAdd64 ( ix++, liv );
30
44
31
- // GSCHECK: atomicrmw and i64
45
+ // CHECK: atomicrmw and i64
46
+ // CHECK: call i64 @dx.op.atomicBinOp.i64(i32 78, %dx.types.Handle %{{[0-9]*}}, i32 1
47
+ // CHECK: call i64 @dx.op.atomicBinOp.i64(i32 78, %dx.types.Handle %{{[0-9]*}}, i32 1
48
+ // CHECK: call i64 @dx.op.atomicBinOp.i64(i32 78, %dx.types.Handle %{{[0-9]*}}, i32 1
49
+ // CHECK: call i64 @dx.op.atomicBinOp.i64(i32 78, %dx.types.Handle %{{[0-9]*}}, i32 1
32
50
// CHECK: call i64 @dx.op.atomicBinOp.i64(i32 78, %dx.types.Handle %{{[0-9]*}}, i32 1
33
51
// CHECK: call i64 @dx.op.atomicBinOp.i64(i32 78, %dx.types.Handle %{{[0-9]*}}, i32 1
34
52
InterlockedAnd ( gs[a], liv );
35
53
InterlockedAnd ( tb[a], liv );
36
54
InterlockedAnd ( sb[a], liv );
37
- rb.InterlockedAnd ( ix++, liv );
55
+ InterlockedAnd ( tex1[a], liv );
56
+ InterlockedAnd ( tex2[gtid.xy], liv );
57
+ InterlockedAnd ( tex3[gtid], liv );
58
+ rb.InterlockedAnd64 ( ix++, liv );
38
59
39
- // GSCHECK: atomicrmw or i64
60
+ // CHECK: atomicrmw or i64
61
+ // CHECK: call i64 @dx.op.atomicBinOp.i64(i32 78, %dx.types.Handle %{{[0-9]*}}, i32 2
62
+ // CHECK: call i64 @dx.op.atomicBinOp.i64(i32 78, %dx.types.Handle %{{[0-9]*}}, i32 2
63
+ // CHECK: call i64 @dx.op.atomicBinOp.i64(i32 78, %dx.types.Handle %{{[0-9]*}}, i32 2
64
+ // CHECK: call i64 @dx.op.atomicBinOp.i64(i32 78, %dx.types.Handle %{{[0-9]*}}, i32 2
40
65
// CHECK: call i64 @dx.op.atomicBinOp.i64(i32 78, %dx.types.Handle %{{[0-9]*}}, i32 2
41
66
// CHECK: call i64 @dx.op.atomicBinOp.i64(i32 78, %dx.types.Handle %{{[0-9]*}}, i32 2
42
67
InterlockedOr ( gs[a], liv );
43
68
InterlockedOr ( tb[a], liv );
44
69
InterlockedOr ( sb[a], liv );
45
- rb.InterlockedOr ( ix++, liv );
70
+ InterlockedOr ( tex1[a], liv );
71
+ InterlockedOr ( tex2[gtid.xy], liv );
72
+ InterlockedOr ( tex3[gtid], liv );
73
+ rb.InterlockedOr64 ( ix++, liv );
46
74
47
- // GSCHECK: atomicrmw xor i64
75
+ // CHECK: atomicrmw xor i64
76
+ // CHECK: call i64 @dx.op.atomicBinOp.i64(i32 78, %dx.types.Handle %{{[0-9]*}}, i32 3
77
+ // CHECK: call i64 @dx.op.atomicBinOp.i64(i32 78, %dx.types.Handle %{{[0-9]*}}, i32 3
78
+ // CHECK: call i64 @dx.op.atomicBinOp.i64(i32 78, %dx.types.Handle %{{[0-9]*}}, i32 3
79
+ // CHECK: call i64 @dx.op.atomicBinOp.i64(i32 78, %dx.types.Handle %{{[0-9]*}}, i32 3
48
80
// CHECK: call i64 @dx.op.atomicBinOp.i64(i32 78, %dx.types.Handle %{{[0-9]*}}, i32 3
49
81
// CHECK: call i64 @dx.op.atomicBinOp.i64(i32 78, %dx.types.Handle %{{[0-9]*}}, i32 3
50
82
InterlockedXor ( gs[a], liv );
51
83
InterlockedXor ( tb[a], liv );
52
84
InterlockedXor ( sb[a], liv );
53
- rb.InterlockedXor ( ix++, liv );
85
+ InterlockedXor ( tex1[a], liv );
86
+ InterlockedXor ( tex2[gtid.xy], liv );
87
+ InterlockedXor ( tex3[gtid], liv );
88
+ rb.InterlockedXor64 ( ix++, liv );
54
89
55
- // GSCHECK: atomicrmw min i64
90
+ // CHECK: atomicrmw min i64
91
+ // CHECK: call i64 @dx.op.atomicBinOp.i64(i32 78, %dx.types.Handle %{{[0-9]*}}, i32 4
92
+ // CHECK: call i64 @dx.op.atomicBinOp.i64(i32 78, %dx.types.Handle %{{[0-9]*}}, i32 4
93
+ // CHECK: call i64 @dx.op.atomicBinOp.i64(i32 78, %dx.types.Handle %{{[0-9]*}}, i32 4
94
+ // CHECK: call i64 @dx.op.atomicBinOp.i64(i32 78, %dx.types.Handle %{{[0-9]*}}, i32 4
56
95
// CHECK: call i64 @dx.op.atomicBinOp.i64(i32 78, %dx.types.Handle %{{[0-9]*}}, i32 4
57
96
// CHECK: call i64 @dx.op.atomicBinOp.i64(i32 78, %dx.types.Handle %{{[0-9]*}}, i32 4
58
97
InterlockedMin ( gs[a], liv );
59
98
InterlockedMin ( tb[a], liv );
60
99
InterlockedMin ( sb[a], liv );
61
- rb.InterlockedMin ( ix++, liv );
100
+ InterlockedMin ( tex1[a], liv );
101
+ InterlockedMin ( tex2[gtid.xy], liv );
102
+ InterlockedMin ( tex3[gtid], liv );
103
+ rb.InterlockedMin64 ( ix++, liv );
62
104
63
- // GSCHECK: atomicrmw max i64
105
+ // CHECK: atomicrmw max i64
106
+ // CHECK: call i64 @dx.op.atomicBinOp.i64(i32 78, %dx.types.Handle %{{[0-9]*}}, i32 5
107
+ // CHECK: call i64 @dx.op.atomicBinOp.i64(i32 78, %dx.types.Handle %{{[0-9]*}}, i32 5
108
+ // CHECK: call i64 @dx.op.atomicBinOp.i64(i32 78, %dx.types.Handle %{{[0-9]*}}, i32 5
109
+ // CHECK: call i64 @dx.op.atomicBinOp.i64(i32 78, %dx.types.Handle %{{[0-9]*}}, i32 5
64
110
// CHECK: call i64 @dx.op.atomicBinOp.i64(i32 78, %dx.types.Handle %{{[0-9]*}}, i32 5
65
111
// CHECK: call i64 @dx.op.atomicBinOp.i64(i32 78, %dx.types.Handle %{{[0-9]*}}, i32 5
66
112
InterlockedMax ( gs[a], liv );
67
113
InterlockedMax ( tb[a], liv );
68
114
InterlockedMax ( sb[a], liv );
69
- rb.InterlockedMax ( ix++, liv );
115
+ InterlockedMax ( tex1[a], liv );
116
+ InterlockedMax ( tex2[gtid.xy], liv );
117
+ InterlockedMax ( tex3[gtid], liv );
118
+ rb.InterlockedMax64 ( ix++, liv );
70
119
71
- // GSCHECK: atomicrmw umin i64
120
+ // CHECK: atomicrmw umin i64
121
+ // CHECK: call i64 @dx.op.atomicBinOp.i64(i32 78, %dx.types.Handle %{{[0-9]*}}, i32 6
122
+ // CHECK: call i64 @dx.op.atomicBinOp.i64(i32 78, %dx.types.Handle %{{[0-9]*}}, i32 6
123
+ // CHECK: call i64 @dx.op.atomicBinOp.i64(i32 78, %dx.types.Handle %{{[0-9]*}}, i32 6
124
+ // CHECK: call i64 @dx.op.atomicBinOp.i64(i32 78, %dx.types.Handle %{{[0-9]*}}, i32 6
72
125
// CHECK: call i64 @dx.op.atomicBinOp.i64(i32 78, %dx.types.Handle %{{[0-9]*}}, i32 6
73
126
// CHECK: call i64 @dx.op.atomicBinOp.i64(i32 78, %dx.types.Handle %{{[0-9]*}}, i32 6
74
127
InterlockedMin ( ugs[a], luv );
75
128
InterlockedMin ( utb[a], luv );
76
129
InterlockedMin ( usb[a], luv );
77
- rb.InterlockedMin ( ix++, luv );
130
+ InterlockedMin ( utex1[a], liv );
131
+ InterlockedMin ( utex2[gtid.xy], liv );
132
+ InterlockedMin ( utex3[gtid], liv );
133
+ rb.InterlockedMin64 ( ix++, luv );
78
134
79
- // GSCHECK: atomicrmw umax i64
135
+ // CHECK: atomicrmw umax i64
136
+ // CHECK: call i64 @dx.op.atomicBinOp.i64(i32 78, %dx.types.Handle %{{[0-9]*}}, i32 7
137
+ // CHECK: call i64 @dx.op.atomicBinOp.i64(i32 78, %dx.types.Handle %{{[0-9]*}}, i32 7
138
+ // CHECK: call i64 @dx.op.atomicBinOp.i64(i32 78, %dx.types.Handle %{{[0-9]*}}, i32 7
139
+ // CHECK: call i64 @dx.op.atomicBinOp.i64(i32 78, %dx.types.Handle %{{[0-9]*}}, i32 7
80
140
// CHECK: call i64 @dx.op.atomicBinOp.i64(i32 78, %dx.types.Handle %{{[0-9]*}}, i32 7
81
141
// CHECK: call i64 @dx.op.atomicBinOp.i64(i32 78, %dx.types.Handle %{{[0-9]*}}, i32 7
82
142
InterlockedMax ( ugs[a], luv );
83
143
InterlockedMax ( utb[a], luv );
84
144
InterlockedMax ( usb[a], luv );
85
- rb.InterlockedMax ( ix++, luv );
145
+ InterlockedMax ( utex1[a], liv );
146
+ InterlockedMax ( utex2[gtid.xy], liv );
147
+ InterlockedMax ( utex3[gtid], liv );
148
+ rb.InterlockedMax64 ( ix++, luv );
149
+
150
+ // CHECK: atomicrmw xchg i64
151
+ // CHECK: call i64 @dx.op.atomicBinOp.i64(i32 78, %dx.types.Handle %{{[0-9]*}}, i32 8
152
+ // CHECK: call i64 @dx.op.atomicBinOp.i64(i32 78, %dx.types.Handle %{{[0-9]*}}, i32 8
153
+ // CHECK: call i64 @dx.op.atomicBinOp.i64(i32 78, %dx.types.Handle %{{[0-9]*}}, i32 8
154
+ // CHECK: call i64 @dx.op.atomicBinOp.i64(i32 78, %dx.types.Handle %{{[0-9]*}}, i32 8
155
+ // CHECK: call i64 @dx.op.atomicBinOp.i64(i32 78, %dx.types.Handle %{{[0-9]*}}, i32 8
156
+ // CHECK: call i64 @dx.op.atomicBinOp.i64(i32 78, %dx.types.Handle %{{[0-9]*}}, i32 8
157
+ InterlockedExchange ( gs[a], liv, liv2 );
158
+ InterlockedExchange ( tb[a], liv2, liv );
159
+ InterlockedExchange ( sb[a], liv, liv2 );
160
+ InterlockedExchange ( tex1[a], liv2, liv );
161
+ InterlockedExchange ( tex2[gtid.xy], liv, liv2 );
162
+ InterlockedExchange ( tex3[gtid], liv2, liv );
163
+ rb.InterlockedExchange64 ( ix++, liv, liv2 );
164
+
165
+ // CHECK: cmpxchg i64
166
+ // CHECK: call i64 @dx.op.atomicCompareExchange.i64(i32 79
167
+ // CHECK: call i64 @dx.op.atomicCompareExchange.i64(i32 79
168
+ // CHECK: call i64 @dx.op.atomicCompareExchange.i64(i32 79
169
+ // CHECK: call i64 @dx.op.atomicCompareExchange.i64(i32 79
170
+ // CHECK: call i64 @dx.op.atomicCompareExchange.i64(i32 79
171
+ // CHECK: call i64 @dx.op.atomicCompareExchange.i64(i32 79
172
+ InterlockedCompareStore ( gs[a], liv, liv2 );
173
+ InterlockedCompareStore ( tb[a], liv2, liv );
174
+ InterlockedCompareStore ( sb[a], liv, liv2 );
175
+ InterlockedCompareStore ( tex1[a], liv2, liv );
176
+ InterlockedCompareStore ( tex2[gtid.xy], liv2, liv );
177
+ InterlockedCompareStore ( tex3[gtid], liv, liv2 );
178
+ rb.InterlockedCompareStore64 ( ix++, liv2, liv );
86
179
180
+ // CHECK: cmpxchg i64
181
+ // CHECK: call i64 @dx.op.atomicCompareExchange.i64(i32 79
182
+ // CHECK: call i64 @dx.op.atomicCompareExchange.i64(i32 79
183
+ // CHECK: call i64 @dx.op.atomicCompareExchange.i64(i32 79
184
+ // CHECK: call i64 @dx.op.atomicCompareExchange.i64(i32 79
185
+ // CHECK: call i64 @dx.op.atomicCompareExchange.i64(i32 79
186
+ // CHECK: call i64 @dx.op.atomicCompareExchange.i64(i32 79
187
+ InterlockedCompareExchange ( gs[a], liv, liv2, liv3 );
188
+ InterlockedCompareExchange ( tb[a], liv2, liv3, liv );
189
+ InterlockedCompareExchange ( sb[a], liv2, liv, liv3 );
190
+ InterlockedCompareExchange ( tex1[a], liv2, liv3, liv );
191
+ InterlockedCompareExchange ( tex2[gtid.xy], liv2, liv, liv3 );
192
+ InterlockedCompareExchange ( tex3[gtid], liv, liv2, liv3 );
193
+ rb.InterlockedCompareExchange64 ( ix++, liv2, liv3, liv );
87
194
}
0 commit comments