14
14
EXPORT_API (void ) C_SegmentFindOptimalPath15(_In_reads_(valc) unsigned long* valv, _In_ long valc, _Inout_ long long* pBits, _Inout_ long* pTransitions)
15
15
{
16
16
unsigned long transmap, bitindex = 0 , bestindex = 0 ;
17
- short bestcost;
17
+ int bestcost;
18
18
long long bits = 40 ;
19
19
20
20
// Get the preallocated memory.
@@ -46,6 +46,7 @@ EXPORT_API(void) C_SegmentFindOptimalPath15(_In_reads_(valc) unsigned long* valv
46
46
// Calculate the min cost position in the current iteration.
47
47
bestcost = _mm_extract_epi16 (_mm_minpos_epu16 (_mm_min_epu16 (
48
48
_mm_cvtepi8_epi16 (state0f), _mm_cvtepi8_epi16 (_mm_srli_si128 (state0f, 8 )))), 0 );
49
+
49
50
// Keep the invariant that the min cost position has 0 cost.
50
51
state0f = _mm_or_si128 (mask, _mm_sub_epi8 (state0f, _mm_set1_epi8 ((char )bestcost)));
51
52
// Find the bit index of the best position.
@@ -76,7 +77,7 @@ EXPORT_API(void) C_SegmentFindOptimalPath15(_In_reads_(valc) unsigned long* valv
76
77
EXPORT_API (void ) C_SegmentFindOptimalPath21(_In_reads_(valc) unsigned long* valv, _In_ long valc, _Inout_ long long* pBits, _Inout_ long* pTransitions)
77
78
{
78
79
unsigned long transmap, bitindex = 0 , bestindex = 0 ;
79
- short bestcost;
80
+ int bestcost;
80
81
long long bits = 40 ;
81
82
unsigned long * end = valv + valc;
82
83
@@ -133,6 +134,7 @@ EXPORT_API(void) C_SegmentFindOptimalPath21(_In_reads_(valc) unsigned long* valv
133
134
bestcost = _mm_extract_epi16 (_mm_minpos_epu16 (_mm_min_epu16 (
134
135
_mm_min_epu16 (_mm_cvtepi8_epi16 (statelo), _mm_cvtepi8_epi16 (_mm_srli_si128 (statelo, 8 ))),
135
136
_mm_min_epu16 (_mm_cvtepi8_epi16 (statehi), _mm_cvtepi8_epi16 (_mm_srli_si128 (statehi, 8 ))))), 0 );
137
+
136
138
// Keep the invariant that the min cost position has 0 cost.
137
139
statelo = _mm_or_si128 (masklo, _mm_sub_epi8 (statelo, _mm_set1_epi8 ((char )bestcost)));
138
140
statehi = _mm_or_si128 (maskhi, _mm_sub_epi8 (statehi, _mm_set1_epi8 ((char )bestcost)));
@@ -169,7 +171,7 @@ EXPORT_API(void) C_SegmentFindOptimalPath7(_In_reads_(valc) unsigned long* valv,
169
171
// faster than the 15-bit version of this function, but this does
170
172
// not seem to actually be the case? This should be improved.
171
173
unsigned long transmap, bitindex = 0 , bestindex = 0 ;
172
- short bestcost;
174
+ int bestcost;
173
175
long long bits = 40 ;
174
176
unsigned long * end = valv + valc;
175
177
@@ -205,8 +207,9 @@ EXPORT_API(void) C_SegmentFindOptimalPath7(_In_reads_(valc) unsigned long* valv,
205
207
__m128i min = _mm_minpos_epu16 (state0f);
206
208
bestcost = _mm_extract_epi16 (min, 0 );
207
209
bestindex = _mm_extract_epi16 (min, 1 );
210
+
208
211
// Keep the invariant that the min cost position has 0 cost.
209
- state0f = _mm_or_si128 (mask, _mm_sub_epi8 (state0f, _mm_set1_epi16 (bestcost)));
212
+ state0f = _mm_or_si128 (mask, _mm_sub_epi8 (state0f, _mm_set1_epi16 (( short ) bestcost)));
210
213
// Store the vital statistics.
211
214
// [31,27]: best bit index
212
215
// [26,22]: min bits to encoded current value (m)
@@ -260,9 +263,11 @@ EXPORT_API(void) C_SegmentFindOptimalCost15(_In_reads_(valc) unsigned int* valv,
260
263
__m128i mask = _mm_load_si128 (masksu + numbits);
261
264
// Calculate the next base state of state0f.
262
265
state0f = _mm_or_si128 (mask, _mm_min_epu8 (_mm_adds_epu8 (state0f, stay), transition));
266
+
263
267
// Calculate the min cost position in the current iteration.
264
- short bestcost = _mm_extract_epi16 (_mm_minpos_epu16 (_mm_min_epu16 (
268
+ int bestcost = _mm_extract_epi16 (_mm_minpos_epu16 (_mm_min_epu16 (
265
269
_mm_cvtepi8_epi16 (state0f), _mm_cvtepi8_epi16 (_mm_srli_si128 (state0f, 8 )))), 0 );
270
+
266
271
// Keep the invariant that the min cost position has 0 cost.
267
272
state0f = _mm_or_si128 (mask, _mm_sub_epi8 (state0f, _mm_set1_epi8 ((char )bestcost)));
268
273
// Find the position of the best position.
@@ -274,7 +279,7 @@ EXPORT_API(void) C_SegmentFindOptimalCost15(_In_reads_(valc) unsigned int* valv,
274
279
EXPORT_API (void ) C_SegmentFindOptimalCost31(_In_reads_(valc) unsigned long* valv, _In_ long valc, _Inout_ long long* pBits)
275
280
{
276
281
unsigned long bitindex = 0 , bestindex = 0 ;
277
- short bestcost;
282
+ int bestcost;
278
283
long long bits = 40 ;
279
284
unsigned long * end = valv + valc;
280
285
@@ -318,6 +323,7 @@ EXPORT_API(void) C_SegmentFindOptimalCost31(_In_reads_(valc) unsigned long* valv
318
323
bestcost = _mm_extract_epi16 (_mm_minpos_epu16 (_mm_min_epu16 (
319
324
_mm_min_epu16 (_mm_cvtepi8_epi16 (statelo), _mm_cvtepi8_epi16 (_mm_srli_si128 (statelo, 8 ))),
320
325
_mm_min_epu16 (_mm_cvtepi8_epi16 (statehi), _mm_cvtepi8_epi16 (_mm_srli_si128 (statehi, 8 ))))), 0 );
326
+
321
327
// Keep the invariant that the min cost position has 0 cost.
322
328
statelo = _mm_or_si128 (masklo, _mm_sub_epi8 (statelo, _mm_set1_epi8 ((char )bestcost)));
323
329
statehi = _mm_or_si128 (maskhi, _mm_sub_epi8 (statehi, _mm_set1_epi8 ((char )bestcost)));
0 commit comments