Skip to content

Commit 4e7a43d

Browse files
committed
Optimize secp256k1_fe_set_b32/secp256k1_fe_get_b32
The fe_set_b32 and fe_get_b32 implementations are slow and are a bottleneck for pairgen. Replace the default implementation with a faster version.
1 parent 30ce78f commit 4e7a43d

File tree

1 file changed

+23
-20
lines changed

1 file changed

+23
-20
lines changed

secp256k1/src/field_5x52_impl.h

Lines changed: 23 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -288,17 +288,22 @@ static int secp256k1_fe_cmp_var(const secp256k1_fe_t *a, const secp256k1_fe_t *b
288288
return 0;
289289
}
290290

291+
// PAIRGEN:
292+
// The fe_set_b32 and fe_get_b32 functions are a bottleneck for pairgen.
293+
// Replace the default implementation with a faster (&simpler) implementation:
294+
295+
#define BE64(x) __builtin_bswap64((x))
296+
291297
static int secp256k1_fe_set_b32(secp256k1_fe_t *r, const unsigned char *a) {
292-
int i;
293-
r->n[0] = r->n[1] = r->n[2] = r->n[3] = r->n[4] = 0;
294-
for (i=0; i<32; i++) {
295-
int j;
296-
for (j=0; j<2; j++) {
297-
int limb = (8*i+4*j)/52;
298-
int shift = (8*i+4*j)%52;
299-
r->n[limb] |= (uint64_t)((a[31-i] >> (4*j)) & 0xF) << shift;
300-
}
301-
}
298+
uint64_t *a64 = (uint64_t *)a;
299+
uint64_t a0 = BE64(a64[0]), a1 = BE64(a64[1]), a2 = BE64(a64[2]),
300+
a3 = BE64(a64[3]);
301+
302+
r->n[0] = a3 & 0x000FFFFFFFFFFFFFULL;
303+
r->n[1] = (a3 >> 52) | ((a2 << 12) & 0x000FFFFFFFFFFFFFULL);
304+
r->n[2] = (a2 >> 40) | ((a1 << 24) & 0x000FFFFFFFFFFFFFULL);
305+
r->n[3] = (a1 >> 28) | ((a0 << 36) & 0x000FFFFFFFFFFFFFULL);
306+
r->n[4] = (a0 >> 16);
302307
if (r->n[4] == 0x0FFFFFFFFFFFFULL && (r->n[3] & r->n[2] & r->n[1]) == 0xFFFFFFFFFFFFFULL && r->n[0] >= 0xFFFFEFFFFFC2FULL) {
303308
return 0;
304309
}
@@ -317,16 +322,14 @@ static void secp256k1_fe_get_b32(unsigned char *r, const secp256k1_fe_t *a) {
317322
VERIFY_CHECK(a->normalized);
318323
secp256k1_fe_verify(a);
319324
#endif
320-
for (i=0; i<32; i++) {
321-
int j;
322-
int c = 0;
323-
for (j=0; j<2; j++) {
324-
int limb = (8*i+4*j)/52;
325-
int shift = (8*i+4*j)%52;
326-
c |= ((a->n[limb] >> shift) & 0xF) << (4 * j);
327-
}
328-
r[31-i] = c;
329-
}
325+
uint64_t *r64 = (uint64_t *)r;
326+
uint64_t n4 = a->n[4], n3 = a->n[3], n2 = a->n[2], n1 = a->n[1],
327+
n0 = a->n[0];
328+
329+
r64[0] = BE64((n4 << 16) | (n3 >> 36));
330+
r64[1] = BE64((n3 << 28) | (n2 >> 24));
331+
r64[2] = BE64((n2 << 40) | (n1 >> 12));
332+
r64[3] = BE64((n1 << 52) | n0);
330333
}
331334

332335
SECP256K1_INLINE static void secp256k1_fe_negate(secp256k1_fe_t *r, const secp256k1_fe_t *a, int m) {

0 commit comments

Comments
 (0)