Skip to content

Commit 155b10a

Browse files
author
Vladimir Ivanov
committed
8293329: x86: Improve handling of constants in AES/GHASH stubs
Reviewed-by: kvn
1 parent d3f7e3b commit 155b10a

File tree

6 files changed

+276
-351
lines changed

6 files changed

+276
-351
lines changed

src/hotspot/cpu/x86/stubGenerator_x86_32.cpp

Lines changed: 40 additions & 58 deletions
Original file line numberDiff line numberDiff line change
@@ -65,6 +65,22 @@
6565
const int MXCSR_MASK = 0xFFC0; // Mask out any pending exceptions
6666
const int FPU_CNTRL_WRD_MASK = 0xFFFF;
6767

68+
ATTRIBUTE_ALIGNED(16) uint32_t KEY_SHUFFLE_MASK[] = {
69+
0x00010203UL, 0x04050607UL, 0x08090A0BUL, 0x0C0D0E0FUL,
70+
};
71+
72+
ATTRIBUTE_ALIGNED(16) uint32_t COUNTER_SHUFFLE_MASK[] = {
73+
0x0C0D0E0FUL, 0x08090A0BUL, 0x04050607UL, 0x00010203UL,
74+
};
75+
76+
ATTRIBUTE_ALIGNED(16) uint32_t GHASH_BYTE_SWAP_MASK[] = {
77+
0x0C0D0E0FUL, 0x08090A0BUL, 0x04050607UL, 0x00010203UL,
78+
};
79+
80+
ATTRIBUTE_ALIGNED(16) uint32_t GHASH_LONG_SWAP_MASK[] = {
81+
0x0B0A0908UL, 0x0F0E0D0CUL, 0x03020100UL, 0x07060504UL,
82+
};
83+
6884
// -------------------------------------------------------------------------------------------------------------------------
6985
// Stub Code definitions
7086

@@ -2180,26 +2196,12 @@ class StubGenerator: public StubCodeGenerator {
21802196
// AES intrinsic stubs
21812197
enum {AESBlockSize = 16};
21822198

2183-
address generate_key_shuffle_mask() {
2184-
__ align(16);
2185-
StubCodeMark mark(this, "StubRoutines", "key_shuffle_mask");
2186-
address start = __ pc();
2187-
__ emit_data(0x00010203, relocInfo::none, 0 );
2188-
__ emit_data(0x04050607, relocInfo::none, 0 );
2189-
__ emit_data(0x08090a0b, relocInfo::none, 0 );
2190-
__ emit_data(0x0c0d0e0f, relocInfo::none, 0 );
2191-
return start;
2199+
address key_shuffle_mask_addr() {
2200+
return (address)KEY_SHUFFLE_MASK;
21922201
}
21932202

2194-
address generate_counter_shuffle_mask() {
2195-
__ align(16);
2196-
StubCodeMark mark(this, "StubRoutines", "counter_shuffle_mask");
2197-
address start = __ pc();
2198-
__ emit_data(0x0c0d0e0f, relocInfo::none, 0);
2199-
__ emit_data(0x08090a0b, relocInfo::none, 0);
2200-
__ emit_data(0x04050607, relocInfo::none, 0);
2201-
__ emit_data(0x00010203, relocInfo::none, 0);
2202-
return start;
2203+
address counter_shuffle_mask_addr() {
2204+
return (address)COUNTER_SHUFFLE_MASK;
22032205
}
22042206

22052207
// Utility routine for loading a 128-bit key word in little endian format
@@ -2209,7 +2211,7 @@ class StubGenerator: public StubCodeGenerator {
22092211
if (xmm_shuf_mask != xnoreg) {
22102212
__ pshufb(xmmdst, xmm_shuf_mask);
22112213
} else {
2212-
__ pshufb(xmmdst, ExternalAddress(StubRoutines::x86::key_shuffle_mask_addr()));
2214+
__ pshufb(xmmdst, ExternalAddress(key_shuffle_mask_addr()));
22132215
}
22142216
}
22152217

@@ -2290,7 +2292,7 @@ class StubGenerator: public StubCodeGenerator {
22902292
// keylen could be only {11, 13, 15} * 4 = {44, 52, 60}
22912293
__ movl(keylen, Address(key, arrayOopDesc::length_offset_in_bytes() - arrayOopDesc::base_offset_in_bytes(T_INT)));
22922294

2293-
__ movdqu(xmm_key_shuf_mask, ExternalAddress(StubRoutines::x86::key_shuffle_mask_addr()));
2295+
__ movdqu(xmm_key_shuf_mask, ExternalAddress(key_shuffle_mask_addr()));
22942296
__ movdqu(xmm_result, Address(from, 0)); // get 16 bytes of input
22952297
__ movptr(to, to_param);
22962298

@@ -2389,7 +2391,7 @@ class StubGenerator: public StubCodeGenerator {
23892391
// keylen could be only {11, 13, 15} * 4 = {44, 52, 60}
23902392
__ movl(keylen, Address(key, arrayOopDesc::length_offset_in_bytes() - arrayOopDesc::base_offset_in_bytes(T_INT)));
23912393

2392-
__ movdqu(xmm_key_shuf_mask, ExternalAddress(StubRoutines::x86::key_shuffle_mask_addr()));
2394+
__ movdqu(xmm_key_shuf_mask, ExternalAddress(key_shuffle_mask_addr()));
23932395
__ movdqu(xmm_result, Address(from, 0));
23942396
__ movptr(to, to_param);
23952397

@@ -2522,7 +2524,7 @@ class StubGenerator: public StubCodeGenerator {
25222524
__ movptr(len_reg , len_param);
25232525

25242526
const XMMRegister xmm_key_shuf_mask = xmm_temp; // used temporarily to swap key bytes up front
2525-
__ movdqu(xmm_key_shuf_mask, ExternalAddress(StubRoutines::x86::key_shuffle_mask_addr()));
2527+
__ movdqu(xmm_key_shuf_mask, ExternalAddress(key_shuffle_mask_addr()));
25262528
// load up xmm regs 2 thru 7 with keys 0-5
25272529
for (int rnum = XMM_REG_NUM_KEY_FIRST, offset = 0x00; rnum <= XMM_REG_NUM_KEY_LAST; rnum++) {
25282530
load_key(as_XMMRegister(rnum), key, offset, xmm_key_shuf_mask);
@@ -2690,7 +2692,7 @@ class StubGenerator: public StubCodeGenerator {
26902692
__ movptr(rvec , rvec_param);
26912693
__ movptr(len_reg , len_param);
26922694

2693-
__ movdqu(xmm_key_shuf_mask, ExternalAddress(StubRoutines::x86::key_shuffle_mask_addr()));
2695+
__ movdqu(xmm_key_shuf_mask, ExternalAddress(key_shuffle_mask_addr()));
26942696
__ movdqu(xmm_prev_block_cipher, Address(rvec, 0x00)); // initialize with initial rvec
26952697

26962698
__ xorptr(pos, pos);
@@ -2909,11 +2911,11 @@ class StubGenerator: public StubCodeGenerator {
29092911

29102912
// initialize counter with initial counter
29112913
__ movdqu(xmm_curr_counter, Address(counter, 0x00));
2912-
__ movdqu(xmm_counter_shuf_mask, ExternalAddress(StubRoutines::x86::counter_shuffle_mask_addr()));
2914+
__ movdqu(xmm_counter_shuf_mask, ExternalAddress(counter_shuffle_mask_addr()));
29132915
__ pshufb(xmm_curr_counter, xmm_counter_shuf_mask); //counter is shuffled for increase
29142916

29152917
// key length could be only {11, 13, 15} * 4 = {44, 52, 60}
2916-
__ movdqu(xmm_key_shuf_mask, ExternalAddress(StubRoutines::x86::key_shuffle_mask_addr()));
2918+
__ movdqu(xmm_key_shuf_mask, ExternalAddress(key_shuffle_mask_addr()));
29172919
__ movl(rax, Address(key, arrayOopDesc::length_offset_in_bytes() - arrayOopDesc::base_offset_in_bytes(T_INT)));
29182920
__ cmpl(rax, 52);
29192921
__ jcc(Assembler::equal, L_key192_top);
@@ -2939,8 +2941,8 @@ class StubGenerator: public StubCodeGenerator {
29392941
__ cmpptr(len_reg, PARALLEL_FACTOR * AESBlockSize); // see if at least PARALLEL_FACTOR blocks left
29402942
__ jcc(Assembler::less, L_singleBlockLoopTop[k]);
29412943

2942-
__ movdqu(xmm_key_shuf_mask, ExternalAddress(StubRoutines::x86::key_shuffle_mask_addr()));
2943-
__ movdqu(xmm_counter_shuf_mask, ExternalAddress(StubRoutines::x86::counter_shuffle_mask_addr()));
2944+
__ movdqu(xmm_key_shuf_mask, ExternalAddress(key_shuffle_mask_addr()));
2945+
__ movdqu(xmm_counter_shuf_mask, ExternalAddress(counter_shuffle_mask_addr()));
29442946

29452947
//load, then increase counters
29462948
CTR_DoFour(movdqa, xmm_curr_counter);
@@ -2992,8 +2994,8 @@ class StubGenerator: public StubCodeGenerator {
29922994
__ BIND(L_singleBlockLoopTop[k]);
29932995
__ cmpptr(len_reg, 0);
29942996
__ jcc(Assembler::equal, L_exit);
2995-
__ movdqu(xmm_key_shuf_mask, ExternalAddress(StubRoutines::x86::key_shuffle_mask_addr()));
2996-
__ movdqu(xmm_counter_shuf_mask, ExternalAddress(StubRoutines::x86::counter_shuffle_mask_addr()));
2997+
__ movdqu(xmm_key_shuf_mask, ExternalAddress(key_shuffle_mask_addr()));
2998+
__ movdqu(xmm_counter_shuf_mask, ExternalAddress(counter_shuffle_mask_addr()));
29972999
__ movdqa(xmm_result0, xmm_curr_counter);
29983000
load_key(xmm_key, key, 0x00, xmm_key_shuf_mask);
29993001
__ push(rbx);//rbx is used for increasing counter
@@ -3078,7 +3080,7 @@ class StubGenerator: public StubCodeGenerator {
30783080
}
30793081

30803082
__ BIND(L_exit);
3081-
__ movdqu(xmm_counter_shuf_mask, ExternalAddress(StubRoutines::x86::counter_shuffle_mask_addr()));
3083+
__ movdqu(xmm_counter_shuf_mask, ExternalAddress(counter_shuffle_mask_addr()));
30823084
__ pshufb(xmm_curr_counter, xmm_counter_shuf_mask); //counter is shuffled back.
30833085
__ movdqu(Address(counter, 0), xmm_curr_counter); //save counter back
30843086
handleSOERegisters(false /*restoring*/);
@@ -3266,28 +3268,13 @@ class StubGenerator: public StubCodeGenerator {
32663268
}
32673269

32683270
// byte swap x86 long
3269-
address generate_ghash_long_swap_mask() {
3270-
__ align(CodeEntryAlignment);
3271-
StubCodeMark mark(this, "StubRoutines", "ghash_long_swap_mask");
3272-
address start = __ pc();
3273-
__ emit_data(0x0b0a0908, relocInfo::none, 0);
3274-
__ emit_data(0x0f0e0d0c, relocInfo::none, 0);
3275-
__ emit_data(0x03020100, relocInfo::none, 0);
3276-
__ emit_data(0x07060504, relocInfo::none, 0);
3277-
3278-
return start;
3271+
address ghash_long_swap_mask_addr() {
3272+
return (address)GHASH_LONG_SWAP_MASK;
32793273
}
32803274

32813275
// byte swap x86 byte array
3282-
address generate_ghash_byte_swap_mask() {
3283-
__ align(CodeEntryAlignment);
3284-
StubCodeMark mark(this, "StubRoutines", "ghash_byte_swap_mask");
3285-
address start = __ pc();
3286-
__ emit_data(0x0c0d0e0f, relocInfo::none, 0);
3287-
__ emit_data(0x08090a0b, relocInfo::none, 0);
3288-
__ emit_data(0x04050607, relocInfo::none, 0);
3289-
__ emit_data(0x00010203, relocInfo::none, 0);
3290-
return start;
3276+
address ghash_byte_swap_mask_addr() {
3277+
return (address)GHASH_BYTE_SWAP_MASK;
32913278
}
32923279

32933280
/* Single and multi-block ghash operations */
@@ -3326,14 +3313,14 @@ class StubGenerator: public StubCodeGenerator {
33263313
__ movptr(blocks, blocks_param);
33273314

33283315
__ movdqu(xmm_temp0, Address(state, 0));
3329-
__ pshufb(xmm_temp0, ExternalAddress(StubRoutines::x86::ghash_long_swap_mask_addr()));
3316+
__ pshufb(xmm_temp0, ExternalAddress(ghash_long_swap_mask_addr()));
33303317

33313318
__ movdqu(xmm_temp1, Address(subkeyH, 0));
3332-
__ pshufb(xmm_temp1, ExternalAddress(StubRoutines::x86::ghash_long_swap_mask_addr()));
3319+
__ pshufb(xmm_temp1, ExternalAddress(ghash_long_swap_mask_addr()));
33333320

33343321
__ BIND(L_ghash_loop);
33353322
__ movdqu(xmm_temp2, Address(data, 0));
3336-
__ pshufb(xmm_temp2, ExternalAddress(StubRoutines::x86::ghash_byte_swap_mask_addr()));
3323+
__ pshufb(xmm_temp2, ExternalAddress(ghash_byte_swap_mask_addr()));
33373324

33383325
__ pxor(xmm_temp0, xmm_temp2);
33393326

@@ -3419,7 +3406,7 @@ class StubGenerator: public StubCodeGenerator {
34193406

34203407
__ BIND(L_exit);
34213408
// Byte swap 16-byte result
3422-
__ pshufb(xmm_temp6, ExternalAddress(StubRoutines::x86::ghash_long_swap_mask_addr()));
3409+
__ pshufb(xmm_temp6, ExternalAddress(ghash_long_swap_mask_addr()));
34233410
__ movdqu(Address(state, 0), xmm_temp6); // store the result
34243411

34253412
handleSOERegisters(false); // restore registers
@@ -4126,16 +4113,13 @@ class StubGenerator: public StubCodeGenerator {
41264113

41274114
// don't bother generating these AES intrinsic stubs unless global flag is set
41284115
if (UseAESIntrinsics) {
4129-
StubRoutines::x86::_key_shuffle_mask_addr = generate_key_shuffle_mask(); // might be needed by the others
4130-
41314116
StubRoutines::_aescrypt_encryptBlock = generate_aescrypt_encryptBlock();
41324117
StubRoutines::_aescrypt_decryptBlock = generate_aescrypt_decryptBlock();
41334118
StubRoutines::_cipherBlockChaining_encryptAESCrypt = generate_cipherBlockChaining_encryptAESCrypt();
41344119
StubRoutines::_cipherBlockChaining_decryptAESCrypt = generate_cipherBlockChaining_decryptAESCrypt_Parallel();
41354120
}
41364121

41374122
if (UseAESCTRIntrinsics) {
4138-
StubRoutines::x86::_counter_shuffle_mask_addr = generate_counter_shuffle_mask();
41394123
StubRoutines::_counterMode_AESCrypt = generate_counterMode_AESCrypt_Parallel();
41404124
}
41414125

@@ -4158,8 +4142,6 @@ class StubGenerator: public StubCodeGenerator {
41584142

41594143
// Generate GHASH intrinsics code
41604144
if (UseGHASHIntrinsics) {
4161-
StubRoutines::x86::_ghash_long_swap_mask_addr = generate_ghash_long_swap_mask();
4162-
StubRoutines::x86::_ghash_byte_swap_mask_addr = generate_ghash_byte_swap_mask();
41634145
StubRoutines::_ghash_processBlocks = generate_ghash_processBlocks();
41644146
}
41654147

src/hotspot/cpu/x86/stubGenerator_x86_64.hpp

Lines changed: 13 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -337,31 +337,26 @@ class StubGenerator: public StubCodeGenerator {
337337

338338
address generate_key_shuffle_mask();
339339

340-
address generate_counter_shuffle_mask();
341-
342-
// This mask is used for incrementing counter value(linc0, linc4, etc.)
343-
address generate_counter_mask_addr();
344-
345-
address generate_ghash_polynomial512_addr();
346-
347340
void roundDec(XMMRegister xmm_reg);
348341
void roundDeclast(XMMRegister xmm_reg);
349342
void roundEnc(XMMRegister key, int rnum);
350343
void lastroundEnc(XMMRegister key, int rnum);
351344
void roundDec(XMMRegister key, int rnum);
352345
void lastroundDec(XMMRegister key, int rnum);
353346
void gfmul_avx512(XMMRegister ghash, XMMRegister hkey);
354-
void generateHtbl_48_block_zmm(Register htbl, Register avx512_subkeyHtbl);
347+
void generateHtbl_48_block_zmm(Register htbl, Register avx512_subkeyHtbl, Register rscratch);
355348
void ghash16_encrypt16_parallel(Register key, Register subkeyHtbl, XMMRegister ctr_blockx,
356349
XMMRegister aad_hashx, Register in, Register out, Register data, Register pos, bool reduction,
357350
XMMRegister addmask, bool no_ghash_input, Register rounds, Register ghash_pos,
358351
bool final_reduction, int index, XMMRegister counter_inc_mask);
359352
// Load key and shuffle operation
360-
void ev_load_key(XMMRegister xmmdst, Register key, int offset, XMMRegister xmm_shuf_mask = xnoreg);
353+
void ev_load_key(XMMRegister xmmdst, Register key, int offset, XMMRegister xmm_shuf_mask);
354+
void ev_load_key(XMMRegister xmmdst, Register key, int offset, Register rscratch);
361355

362356
// Utility routine for loading a 128-bit key word in little endian format
363357
// can optionally specify that the shuffle mask is already in an xmmregister
364-
void load_key(XMMRegister xmmdst, Register key, int offset, XMMRegister xmm_shuf_mask = xnoreg);
358+
void load_key(XMMRegister xmmdst, Register key, int offset, XMMRegister xmm_shuf_mask);
359+
void load_key(XMMRegister xmmdst, Register key, int offset, Register rscratch);
365360

366361
// Utility routine for increase 128bit counter (iv in CTR mode)
367362
void inc_counter(Register reg, XMMRegister xmmdst, int inc_delta, Label& next_block);
@@ -376,17 +371,15 @@ class StubGenerator: public StubCodeGenerator {
376371
void schoolbookAAD(int i, Register subkeyH, XMMRegister data, XMMRegister tmp0,
377372
XMMRegister tmp1, XMMRegister tmp2, XMMRegister tmp3);
378373
void gfmul(XMMRegister tmp0, XMMRegister t);
379-
void generateHtbl_one_block(Register htbl);
374+
void generateHtbl_one_block(Register htbl, Register rscratch);
380375
void generateHtbl_eight_blocks(Register htbl);
381376
void avx_ghash(Register state, Register htbl, Register data, Register blocks);
382377

383-
address generate_ghash_polynomial_addr();
384-
385-
address generate_ghash_shufflemask_addr();
386-
387-
address generate_ghash_long_swap_mask(); // byte swap x86 long
388-
389-
address generate_ghash_byte_swap_mask(); // byte swap x86 byte array
378+
// Used by GHASH and AES stubs.
379+
address ghash_polynomial_addr();
380+
address ghash_shufflemask_addr();
381+
address ghash_long_swap_mask_addr(); // byte swap x86 long
382+
address ghash_byte_swap_mask_addr(); // byte swap x86 byte array
390383

391384
// Single and multi-block ghash operations
392385
address generate_ghash_processBlocks();
@@ -395,6 +388,8 @@ class StubGenerator: public StubCodeGenerator {
395388
address generate_avx_ghash_processBlocks();
396389

397390

391+
// BASE64 stubs
392+
398393
address base64_shuffle_addr();
399394
address base64_avx2_shuffle_addr();
400395
address base64_avx2_input_mask_addr();

0 commit comments

Comments
 (0)