@@ -19,94 +19,178 @@ int numInterestingInts = sizeof(interesting_ints_)/sizeof(interesting_ints_[0]);
19
19
double *interesting_doubles = get_interesting_doubles();
20
20
int numInterestingDoubles = sizeof (interesting_doubles_)/sizeof (interesting_doubles_[0 ]);
21
21
22
- void test_round () {
23
- Ret_M128d (__m128d, _mm_ceil_pd);
24
- Ret_M128 (__m128, _mm_ceil_ps);
25
- Ret_M128d_M128d (__m128d, _mm_ceil_sd);
26
- Ret_M128_M128 (__m128, _mm_ceil_ss);
27
- Ret_M128d (__m128d, _mm_floor_pd);
28
- Ret_M128 (__m128, _mm_floor_ps);
29
- Ret_M128d_M128d (__m128d, _mm_floor_sd);
30
- Ret_M128_M128 (__m128, _mm_floor_ss);
31
- Ret_M128d_Tint (__m128d, _mm_round_pd);
32
- Ret_M128_Tint (__m128, _mm_round_ps);
33
- Ret_M128d_M128d_Tint (__m128d, _mm_round_sd);
34
- Ret_M128_M128_Tint (__m128, _mm_round_ss);
35
- }
36
-
37
- int main () {
38
- assert (numInterestingFloats % 4 == 0 );
39
- assert (numInterestingInts % 4 == 0 );
40
- assert (numInterestingDoubles % 4 == 0 );
22
+ void NOINLINE test_ceil_pd () { Ret_M128d (__m128d, _mm_ceil_pd); }
23
+ void NOINLINE test_ceil_ps () { Ret_M128 (__m128, _mm_ceil_ps); }
24
+ void NOINLINE test_ceil_sd () { Ret_M128d_M128d (__m128d, _mm_ceil_sd); }
25
+ void NOINLINE test_ceil_ss () { Ret_M128_M128 (__m128, _mm_ceil_ss); }
26
+ void NOINLINE test_floor_pd () { Ret_M128d (__m128d, _mm_floor_pd); }
27
+ void NOINLINE test_floor_ps () { Ret_M128 (__m128, _mm_floor_ps); }
28
+ void NOINLINE test_floor_sd () { Ret_M128d_M128d (__m128d, _mm_floor_sd); }
29
+ void NOINLINE test_floor_ss () { Ret_M128_M128 (__m128, _mm_floor_ss); }
30
+ void NOINLINE test_round_pd () { Ret_M128d_Tint (__m128d, _mm_round_pd); }
31
+ void NOINLINE test_round_ps () { Ret_M128_Tint (__m128, _mm_round_ps); }
32
+ void NOINLINE test_round_sd () { Ret_M128d_M128d_Tint (__m128d, _mm_round_sd); }
33
+ void NOINLINE test_round_ss () { Ret_M128_M128_Tint (__m128, _mm_round_ss); }
34
+ void NOINLINE test_blend_epi16 () { Ret_M128i_M128i_Tint (__m128i, _mm_blend_epi16); }
35
+ void NOINLINE test_blend_pd () { Ret_M128d_M128d_Tint (__m128d, _mm_blend_pd); }
36
+ void NOINLINE test_blend_ps () { Ret_M128_M128_Tint (__m128, _mm_blend_ps); }
37
+ void NOINLINE test_blendv_epi8 () { Ret_M128i_M128i_M128i (__m128i, _mm_blendv_epi8); }
38
+ void NOINLINE test_blendv_pd () { Ret_M128d_M128d_M128d (__m128d, _mm_blendv_pd); }
39
+ void NOINLINE test_blendv_ps () { Ret_M128_M128_M128 (__m128, _mm_blendv_ps); }
40
+ void NOINLINE test_cvtepi16_epi32 () { Ret_M128i (__m128i, _mm_cvtepi16_epi32); }
41
+ void NOINLINE test_cvtepi16_epi64 () { Ret_M128i (__m128i, _mm_cvtepi16_epi64); }
42
+ void NOINLINE test_cvtepi32_epi64 () { Ret_M128i (__m128i, _mm_cvtepi32_epi64); }
43
+ void NOINLINE test_cvtepi8_epi16 () { Ret_M128i (__m128i, _mm_cvtepi8_epi16); }
44
+ void NOINLINE test_cvtepi8_epi32 () { Ret_M128i (__m128i, _mm_cvtepi8_epi32); }
45
+ void NOINLINE test_cvtepi8_epi64 () { Ret_M128i (__m128i, _mm_cvtepi8_epi64); }
46
+ void NOINLINE test_cvtepu16_epi32 () { Ret_M128i (__m128i, _mm_cvtepu16_epi32); }
47
+ void NOINLINE test_cvtepu16_epi64 () { Ret_M128i (__m128i, _mm_cvtepu16_epi64); }
48
+ void NOINLINE test_cvtepu32_epi64 () { Ret_M128i (__m128i, _mm_cvtepu32_epi64); }
49
+ void NOINLINE test_cvtepu8_epi16 () { Ret_M128i (__m128i, _mm_cvtepu8_epi16); }
50
+ void NOINLINE test_cvtepu8_epi32 () { Ret_M128i (__m128i, _mm_cvtepu8_epi32); }
51
+ void NOINLINE test_cvtepu8_epi64 () { Ret_M128i (__m128i, _mm_cvtepu8_epi64); }
52
+ void NOINLINE test_extract_epi32 () { Ret_M128i_Tint (int , _mm_extract_epi32); }
53
+ void NOINLINE test_extract_epi64 () { Ret_M128i_Tint (int64_t , _mm_extract_epi64); }
54
+ void NOINLINE test_extract_epi8 () { Ret_M128i_Tint (int , _mm_extract_epi8); }
55
+ void NOINLINE test_extract_ps () { Ret_M128_Tint (float , _mm_extract_ps); }
56
+ void NOINLINE test_insert_epi32 () { Ret_M128i_int_Tint (__m128i, _mm_insert_epi32); }
57
+ void NOINLINE test_insert_epi64 () { Ret_M128i_int_Tint (__m128i, _mm_insert_epi64); }
58
+ void NOINLINE test_insert_ps () { Ret_M128_M128_Tint (__m128, _mm_insert_ps); }
59
+ void NOINLINE test_max_epi32 () { Ret_M128i_M128i (__m128i, _mm_max_epi32); }
60
+ void NOINLINE test_max_epi8 () { Ret_M128i_M128i (__m128i, _mm_max_epi8); }
61
+ void NOINLINE test_max_epu16 () { Ret_M128i_M128i (__m128i, _mm_max_epu16); }
62
+ void NOINLINE test_max_epu32 () { Ret_M128i_M128i (__m128i, _mm_max_epu32); }
63
+ void NOINLINE test_min_epi32 () { Ret_M128i_M128i (__m128i, _mm_min_epi32); }
64
+ void NOINLINE test_min_epi8 () { Ret_M128i_M128i (__m128i, _mm_min_epi8); }
65
+ void NOINLINE test_min_epu16 () { Ret_M128i_M128i (__m128i, _mm_min_epu16); }
66
+ void NOINLINE test_min_epu32 () { Ret_M128i_M128i (__m128i, _mm_min_epu32); }
67
+ void NOINLINE test_test_cmpeq_epi64 () { Ret_M128i_M128i (__m128i, _mm_cmpeq_epi64); }
68
+ void NOINLINE test_test_minpos_epu16 () { Ret_M128i (__m128i, _mm_minpos_epu16); }
69
+ void NOINLINE test_test_mpsadbw_epu8 () { Ret_M128i_M128i_Tint (__m128i, _mm_mpsadbw_epu8); }
70
+ void NOINLINE test_testmul_epi32 () { Ret_M128i_M128i (__m128i, _mm_mul_epi32); }
71
+ void NOINLINE test_test_mullo_epi32 () { Ret_M128i_M128i (__m128i, _mm_mullo_epi32); }
72
+ void NOINLINE test_test_packus_epi32 () { Ret_M128i_M128i (__m128i, _mm_packus_epi32); }
73
+ void NOINLINE test_test_stream_load_si128 () { Ret_IntPtr (__m128i, _mm_stream_load_si128, __m128i*, 4 , 4 ); }
41
74
42
- test_round ();
43
-
44
- Ret_M128i_M128i_Tint (__m128i, _mm_blend_epi16);
45
- Ret_M128d_M128d_Tint (__m128d, _mm_blend_pd);
46
- Ret_M128_M128_Tint (__m128, _mm_blend_ps);
47
- Ret_M128i_M128i_M128i (__m128i, _mm_blendv_epi8);
48
- Ret_M128d_M128d_M128d (__m128d, _mm_blendv_pd);
49
- Ret_M128_M128_M128 (__m128, _mm_blendv_ps);
50
- Ret_M128i_M128i (__m128i, _mm_cmpeq_epi64);
51
- Ret_M128i (__m128i, _mm_cvtepi16_epi32);
52
- Ret_M128i (__m128i, _mm_cvtepi16_epi64);
53
- Ret_M128i (__m128i, _mm_cvtepi32_epi64);
54
- Ret_M128i (__m128i, _mm_cvtepi8_epi16);
55
- Ret_M128i (__m128i, _mm_cvtepi8_epi32);
56
- Ret_M128i (__m128i, _mm_cvtepi8_epi64);
57
- Ret_M128i (__m128i, _mm_cvtepu16_epi32);
58
- Ret_M128i (__m128i, _mm_cvtepu16_epi64);
59
- Ret_M128i (__m128i, _mm_cvtepu32_epi64);
60
- Ret_M128i (__m128i, _mm_cvtepu8_epi16);
61
- Ret_M128i (__m128i, _mm_cvtepu8_epi32);
62
- Ret_M128i (__m128i, _mm_cvtepu8_epi64);
75
+ void NOINLINE test_dp_pd () {
76
+ bool oldTestNaNBits = testNaNBits;
63
77
testNaNBits = false ;
64
78
Ret_M128d_M128d_Tint (__m128d, _mm_dp_pd);
79
+ testNaNBits = oldTestNaNBits;
80
+ }
81
+
82
+ void NOINLINE test_dp_ps () {
83
+ bool oldTestNaNBits = testNaNBits;
84
+ testNaNBits = false ;
65
85
Ret_M128_M128_Tint (__m128, _mm_dp_ps); // _mm_dp_ps emulation does not match NaN bit selection rules (seems to be unspecified)
66
- testNaNBits = true ;
67
- Ret_M128i_Tint (int , _mm_extract_epi32);
68
- Ret_M128i_Tint (int64_t , _mm_extract_epi64);
69
- Ret_M128i_Tint (int , _mm_extract_epi8);
70
- Ret_M128_Tint (float , _mm_extract_ps);
71
- Ret_M128i_int_Tint (__m128i, _mm_insert_epi32);
72
- Ret_M128i_int_Tint (__m128i, _mm_insert_epi64);
73
- Ret_M128_M128_Tint (__m128, _mm_insert_ps);
74
- Ret_M128i_M128i (__m128i, _mm_max_epi32);
75
- Ret_M128i_M128i (__m128i, _mm_max_epi8);
76
- Ret_M128i_M128i (__m128i, _mm_max_epu16);
77
- Ret_M128i_M128i (__m128i, _mm_max_epu32);
78
- Ret_M128i_M128i (__m128i, _mm_min_epi32);
79
- Ret_M128i_M128i (__m128i, _mm_min_epi8);
80
- Ret_M128i_M128i (__m128i, _mm_min_epu16);
81
- Ret_M128i_M128i (__m128i, _mm_min_epu32);
82
- Ret_M128i (__m128i, _mm_minpos_epu16);
83
- Ret_M128i_M128i_Tint (__m128i, _mm_mpsadbw_epu8);
84
- Ret_M128i_M128i (__m128i, _mm_mul_epi32);
85
- Ret_M128i_M128i (__m128i, _mm_mullo_epi32);
86
- Ret_M128i_M128i (__m128i, _mm_packus_epi32);
87
- Ret_IntPtr (__m128i, _mm_stream_load_si128, __m128i*, 4 , 4 );
86
+ testNaNBits = oldTestNaNBits;
87
+ }
88
+
89
+ void NOINLINE test_test_all_ones () {
88
90
Ret_M128i (int , _mm_test_all_ones);
89
91
printf (" _mm_test_all_ones(0xFFFFFFFFFFFFFFFFull): %d\n " , _mm_test_all_ones (_mm_set1_epi64x (0xFFFFFFFFFFFFFFFFull )));
90
92
printf (" _mm_test_all_ones(0xFFFFFFFFFFFFFFFEull): %d\n " , _mm_test_all_ones (_mm_set1_epi64x (0xFFFFFFFFFFFFFFFEull )));
91
93
printf (" _mm_test_all_ones(0): %d\n " , _mm_test_all_ones (_mm_set1_epi64x (0 )));
94
+ }
95
+
96
+ void NOINLINE test_test_all_zeros () {
92
97
Ret_M128i_M128i (int , _mm_test_all_zeros);
93
98
printf (" _mm_test_all_zeros(0xFFFFFFFFFFFFFFFFull, 0xFFFFFFFFFFFFFFFFull): %d\n " , _mm_test_all_zeros (_mm_set1_epi64x (0xFFFFFFFFFFFFFFFFull ), _mm_set1_epi64x (0xFFFFFFFFFFFFFFFFull )));
94
99
printf (" _mm_test_all_zeros(0xFFFFFFFFFFFFFFFEull, 0xFFFFFFFFFFFFFFFFull): %d\n " , _mm_test_all_zeros (_mm_set1_epi64x (0xFFFFFFFFFFFFFFFEull ), _mm_set1_epi64x (0xFFFFFFFFFFFFFFFFull )));
95
100
printf (" _mm_test_all_zeros(0, 0xFFFFFFFFFFFFFFFFull): %d\n " , _mm_test_all_zeros (_mm_set1_epi64x (0 ), _mm_set1_epi64x (0xFFFFFFFFFFFFFFFFull )));
101
+ }
102
+
103
+ void NOINLINE test_test_mix_ones_zeros () {
96
104
Ret_M128i_M128i (int , _mm_test_mix_ones_zeros);
97
105
printf (" _mm_test_mix_ones_zeros(0xFFFFFFFFFFFFFFFFull, 0xFFFFFFFFFFFFFFFFull): %d\n " , _mm_test_mix_ones_zeros (_mm_set1_epi64x (0xFFFFFFFFFFFFFFFFull ), _mm_set1_epi64x (0xFFFFFFFFFFFFFFFFull )));
98
106
printf (" _mm_test_mix_ones_zeros(0xFFFFFFFFFFFFFFFEull, 0xFFFFFFFFFFFFFFFFull): %d\n " , _mm_test_mix_ones_zeros (_mm_set1_epi64x (0xFFFFFFFFFFFFFFFEull ), _mm_set1_epi64x (0xFFFFFFFFFFFFFFFFull )));
99
107
printf (" _mm_test_mix_ones_zeros(0, 0xFFFFFFFFFFFFFFFFull): %d\n " , _mm_test_mix_ones_zeros (_mm_set1_epi64x (0 ), _mm_set1_epi64x (0xFFFFFFFFFFFFFFFFull )));
108
+ }
109
+
110
+ void NOINLINE test_testc () {
100
111
Ret_M128i_M128i (int , _mm_testc_si128);
101
112
printf (" _mm_testc_si128(0xFFFFFFFFFFFFFFFFull, 0xFFFFFFFFFFFFFFFFull): %d\n " , _mm_testc_si128 (_mm_set1_epi64x (0xFFFFFFFFFFFFFFFFull ), _mm_set1_epi64x (0xFFFFFFFFFFFFFFFFull )));
102
113
printf (" _mm_testc_si128(0xFFFFFFFFFFFFFFFEull, 0xFFFFFFFFFFFFFFFFull): %d\n " , _mm_testc_si128 (_mm_set1_epi64x (0xFFFFFFFFFFFFFFFEull ), _mm_set1_epi64x (0xFFFFFFFFFFFFFFFFull )));
103
114
printf (" _mm_testc_si128(0, 0xFFFFFFFFFFFFFFFFull): %d\n " , _mm_testc_si128 (_mm_set1_epi64x (0 ), _mm_set1_epi64x (0xFFFFFFFFFFFFFFFFull )));
115
+ }
116
+
117
+ void NOINLINE test_testnzc () {
104
118
Ret_M128i_M128i (int , _mm_testnzc_si128);
105
119
printf (" _mm_testnzc_si128(0xFFFFFFFFFFFFFFFFull, 0xFFFFFFFFFFFFFFFFull): %d\n " , _mm_testnzc_si128 (_mm_set1_epi64x (0xFFFFFFFFFFFFFFFFull ), _mm_set1_epi64x (0xFFFFFFFFFFFFFFFFull )));
106
120
printf (" _mm_testnzc_si128(0xFFFFFFFFFFFFFFFEull, 0xFFFFFFFFFFFFFFFFull): %d\n " , _mm_testnzc_si128 (_mm_set1_epi64x (0xFFFFFFFFFFFFFFFEull ), _mm_set1_epi64x (0xFFFFFFFFFFFFFFFFull )));
107
121
printf (" _mm_testnzc_si128(0, 0xFFFFFFFFFFFFFFFFull): %d\n " , _mm_testnzc_si128 (_mm_set1_epi64x (0 ), _mm_set1_epi64x (0xFFFFFFFFFFFFFFFFull )));
122
+ }
123
+
124
+ void NOINLINE test_testz () {
108
125
Ret_M128i_M128i (int , _mm_testz_si128);
109
126
printf (" _mm_testz_si128(0xFFFFFFFFFFFFFFFFull, 0xFFFFFFFFFFFFFFFFull): %d\n " , _mm_testz_si128 (_mm_set1_epi64x (0xFFFFFFFFFFFFFFFFull ), _mm_set1_epi64x (0xFFFFFFFFFFFFFFFFull )));
110
127
printf (" _mm_testz_si128(0xFFFFFFFFFFFFFFFEull, 0xFFFFFFFFFFFFFFFFull): %d\n " , _mm_testz_si128 (_mm_set1_epi64x (0xFFFFFFFFFFFFFFFEull ), _mm_set1_epi64x (0xFFFFFFFFFFFFFFFFull )));
111
128
printf (" _mm_testz_si128(0, 0xFFFFFFFFFFFFFFFFull): %d\n " , _mm_testz_si128 (_mm_set1_epi64x (0 ), _mm_set1_epi64x (0xFFFFFFFFFFFFFFFFull )));
112
129
}
130
+
131
+ int main () {
132
+ assert (numInterestingFloats % 4 == 0 );
133
+ assert (numInterestingInts % 4 == 0 );
134
+ assert (numInterestingDoubles % 4 == 0 );
135
+
136
+ test_ceil_pd ();
137
+ test_ceil_ps ();
138
+ test_ceil_sd ();
139
+ test_ceil_ss ();
140
+ test_floor_pd ();
141
+ test_floor_ps ();
142
+ test_floor_sd ();
143
+ test_floor_ss ();
144
+ test_round_pd ();
145
+ test_round_ps ();
146
+ test_round_sd ();
147
+ test_round_ss ();
148
+ test_blend_epi16 ();
149
+ test_blend_pd ();
150
+ test_blend_ps ();
151
+ test_blendv_epi8 ();
152
+ test_blendv_pd ();
153
+ test_blendv_ps ();
154
+ test_cvtepi16_epi32 ();
155
+ test_cvtepi16_epi64 ();
156
+ test_cvtepi32_epi64 ();
157
+ test_cvtepi8_epi16 ();
158
+ test_cvtepi8_epi32 ();
159
+ test_cvtepi8_epi64 ();
160
+ test_cvtepu16_epi32 ();
161
+ test_cvtepu16_epi64 ();
162
+ test_cvtepu32_epi64 ();
163
+ test_cvtepu8_epi16 ();
164
+ test_cvtepu8_epi32 ();
165
+ test_cvtepu8_epi64 ();
166
+ test_extract_epi32 ();
167
+ test_extract_epi64 ();
168
+ test_extract_epi8 ();
169
+ test_extract_ps ();
170
+ test_insert_epi32 ();
171
+ test_insert_epi64 ();
172
+ test_insert_ps ();
173
+ test_max_epi32 ();
174
+ test_max_epi8 ();
175
+ test_max_epu16 ();
176
+ test_max_epu32 ();
177
+ test_min_epi32 ();
178
+ test_min_epi8 ();
179
+ test_min_epu16 ();
180
+ test_min_epu32 ();
181
+ test_test_cmpeq_epi64 ();
182
+ test_test_minpos_epu16 ();
183
+ test_test_mpsadbw_epu8 ();
184
+ test_testmul_epi32 ();
185
+ test_test_mullo_epi32 ();
186
+ test_test_packus_epi32 ();
187
+ test_test_stream_load_si128 ();
188
+ test_dp_pd ();
189
+ test_dp_ps ();
190
+ test_test_all_ones ();
191
+ test_test_all_zeros ();
192
+ test_test_mix_ones_zeros ();
193
+ test_testc ();
194
+ test_testnzc ();
195
+ test_testz ();
196
+ }
0 commit comments