@@ -288,17 +288,22 @@ static int secp256k1_fe_cmp_var(const secp256k1_fe_t *a, const secp256k1_fe_t *b
288288 return 0 ;
289289}
290290
291+ // PAIRGEN:
292+ // The fe_set_b32 and fe_get_b32 functions are a bottleneck for pairgen.
293+ // Replace the default implementation with a faster (&simpler) implementation:
294+
295+ #define BE64 (x ) __builtin_bswap64((x))
296+
291297static int secp256k1_fe_set_b32 (secp256k1_fe_t * r , const unsigned char * a ) {
292- int i ;
293- r -> n [0 ] = r -> n [1 ] = r -> n [2 ] = r -> n [3 ] = r -> n [4 ] = 0 ;
294- for (i = 0 ; i < 32 ; i ++ ) {
295- int j ;
296- for (j = 0 ; j < 2 ; j ++ ) {
297- int limb = (8 * i + 4 * j )/52 ;
298- int shift = (8 * i + 4 * j )%52 ;
299- r -> n [limb ] |= (uint64_t )((a [31 - i ] >> (4 * j )) & 0xF ) << shift ;
300- }
301- }
298+ uint64_t * a64 = (uint64_t * )a ;
299+ uint64_t a0 = BE64 (a64 [0 ]), a1 = BE64 (a64 [1 ]), a2 = BE64 (a64 [2 ]),
300+ a3 = BE64 (a64 [3 ]);
301+
302+ r -> n [0 ] = a3 & 0x000FFFFFFFFFFFFFULL ;
303+ r -> n [1 ] = (a3 >> 52 ) | ((a2 << 12 ) & 0x000FFFFFFFFFFFFFULL );
304+ r -> n [2 ] = (a2 >> 40 ) | ((a1 << 24 ) & 0x000FFFFFFFFFFFFFULL );
305+ r -> n [3 ] = (a1 >> 28 ) | ((a0 << 36 ) & 0x000FFFFFFFFFFFFFULL );
306+ r -> n [4 ] = (a0 >> 16 );
302307 if (r -> n [4 ] == 0x0FFFFFFFFFFFFULL && (r -> n [3 ] & r -> n [2 ] & r -> n [1 ]) == 0xFFFFFFFFFFFFFULL && r -> n [0 ] >= 0xFFFFEFFFFFC2FULL ) {
303308 return 0 ;
304309 }
@@ -317,16 +322,14 @@ static void secp256k1_fe_get_b32(unsigned char *r, const secp256k1_fe_t *a) {
317322 VERIFY_CHECK (a -> normalized );
318323 secp256k1_fe_verify (a );
319324#endif
320- for (i = 0 ; i < 32 ; i ++ ) {
321- int j ;
322- int c = 0 ;
323- for (j = 0 ; j < 2 ; j ++ ) {
324- int limb = (8 * i + 4 * j )/52 ;
325- int shift = (8 * i + 4 * j )%52 ;
326- c |= ((a -> n [limb ] >> shift ) & 0xF ) << (4 * j );
327- }
328- r [31 - i ] = c ;
329- }
325+ uint64_t * r64 = (uint64_t * )r ;
326+ uint64_t n4 = a -> n [4 ], n3 = a -> n [3 ], n2 = a -> n [2 ], n1 = a -> n [1 ],
327+ n0 = a -> n [0 ];
328+
329+ r64 [0 ] = BE64 ((n4 << 16 ) | (n3 >> 36 ));
330+ r64 [1 ] = BE64 ((n3 << 28 ) | (n2 >> 24 ));
331+ r64 [2 ] = BE64 ((n2 << 40 ) | (n1 >> 12 ));
332+ r64 [3 ] = BE64 ((n1 << 52 ) | n0 );
330333}
331334
332335SECP256K1_INLINE static void secp256k1_fe_negate (secp256k1_fe_t * r , const secp256k1_fe_t * a , int m ) {
0 commit comments