Skip to content

RISC-V: implement AES-CTR mode intrinsics #25281

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Draft
wants to merge 3 commits into
base: master
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 8 additions & 0 deletions src/hotspot/cpu/riscv/assembler_riscv.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -2609,6 +2609,14 @@ enum Nf {
INSN(vaesdm_vv, 0b1110111, 0b010, 0b00000, 0b101000);
INSN(vaesdf_vv, 0b1110111, 0b010, 0b00001, 0b101000);

INSN(vaesem_vs, 0b1110111, 0b010, 0b00010, 0b101001);
INSN(vaesef_vs, 0b1110111, 0b010, 0b00011, 0b101001);

INSN(vaesdm_vs, 0b1110111, 0b010, 0b00000, 0b101001);
INSN(vaesdf_vs, 0b1110111, 0b010, 0b00001, 0b101001);

INSN(vaesz_vs, 0b1110111, 0b010, 0b00111, 0b101001);

INSN(vclz_v, 0b1010111, 0b010, 0b01100, 0b010010); // count leading zeros
INSN(vctz_v, 0b1010111, 0b010, 0b01101, 0b010010); // count trailing zeros

Expand Down
190 changes: 190 additions & 0 deletions src/hotspot/cpu/riscv/stubGenerator_riscv.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2551,6 +2551,192 @@ class StubGenerator: public StubCodeGenerator {
return start;
}

// CTR AES crypt.
// Arguments:
//
// Inputs:
// c_rarg0 - source byte array address
// c_rarg1 - destination byte array address
// c_rarg2 - K (key) in little endian int array
// c_rarg3 - counter vector byte array address
// c_rarg4 - input length
// c_rarg5 - saved encryptedCounter start
// c_rarg6 - saved used length
//
// Output:
// x10 - output length
//
address generate_counterMode_AESCrypt() {
assert(UseAESIntrinsics, "need AES instructions (Zvkned extension) support");

__ align(CodeEntryAlignment);
StubGenStubId stub_id = StubGenStubId::counterMode_AESCrypt_id;
StubCodeMark mark(this, stub_id);

// input parm
const Register in = c_rarg0;
const Register out = c_rarg1;
const Register key = c_rarg2;
const Register counter = c_rarg3;
const Register input_len = c_rarg4;
const Register saved_encrypted_ctr = c_rarg5;
const Register used_ptr = c_rarg6;

// used temp register name for convinence
const Register vlen = x28;
const Register keylen = x29;
const Register ctr = x14;
const Register used = x15;
const Register keysize = x16;
const Register len = x17;
const Register rscratch1 = t0;
const Register rscratch2 = t1;

VectorRegister working_vregs[] = {
v1, v2, v3, v4, v5, v6, v7, v8,
v9, v10, v11, v12, v13, v14, v15
};

const address start = __ pc();
__ enter();

Label L_tail, L_exit, L_loop, L_first_loop, L_ctr_group_loop;

__ mv(len, input_len);
__ ld(used, Address(used_ptr));
__ beqz(used, L_ctr_group_loop);
__ mv(keysize, 16);
__ blt(used, keysize, L_tail);

__ bind(L_ctr_group_loop);

__ lwu(keylen, Address(key, arrayOopDesc::length_offset_in_bytes() - arrayOopDesc::base_offset_in_bytes(T_INT)));

Label L_aes128, L_aes192, L_exit_loadkey, L_savekey;

__ vsetivli(x0, 4, Assembler::e32, Assembler::m1);
__ mv(t2, 52);
__ blt(keylen, t2, L_aes128);
__ beq(keylen, t2, L_aes192);

generate_aes_loadkeys(key, working_vregs, 15);
__ j(L_exit_loadkey);

__ bind(L_aes128);
generate_aes_loadkeys(key, working_vregs, 11);
__ j(L_exit_loadkey);

__ bind(L_aes192);
generate_aes_loadkeys(key, working_vregs, 13);

__ bind(L_exit_loadkey);

// init aes_ctr counter input
uint64_t maskIndex = 0x00000088ul; // 0b10001000
__ li(t0, maskIndex);
__ vsetvli(x1, x0, Assembler::e8, Assembler::m1);
__ vmv_v_x(v0, t0);
__ vsetivli(x0, 4, Assembler::e32, Assembler::m1);
__ vle32_v(v31, counter);
__ vrev8_v(v31, v31, Assembler::VectorMask::v0_t);
__ vsetvli(x0, len, Assembler::e32, Assembler::m4);
__ vmv_v_i(v16, 0);
__ vaesz_vs(v16, v31);
__ viota_m(v20, v0, Assembler::VectorMask::v0_t);
__ vsetvli(vlen, len, Assembler::e32, Assembler::m4);
__ vadd_vv(v16, v16, v20, Assembler::VectorMask::v0_t);
__ j(L_first_loop);

__ bind(L_loop);
__ vsetvli(vlen, len, Assembler::e32, Assembler::m4);
__ vadd_vx(v16, v16, ctr, Assembler::VectorMask::v0_t);

__ bind(L_first_loop);
__ vle32_v(v20, in);
__ slli(t0, vlen, 2);
__ srli(ctr, vlen, 2);
__ sub(len, len, vlen);
__ add(in, in, t0);

__ vmv_v_v(v24, v16);
__ vrev8_v(v24, v24, Assembler::VectorMask::v0_t);
__ vaesz_vs(v24, working_vregs[0]);

Label L_aes128_loop, L_aes192_loop, L_exit_aes_loop;
__ mv(t2, 52);
__ blt(keylen, t2, L_aes128_loop);
__ beq(keylen, t2, L_aes192_loop);

// encrypt aes256
for (int i = 1; i < 14; i++) {
__ vaesem_vs(v24, working_vregs[i]);
}
__ vaesef_vs(v24, working_vregs[14]);
__ j(L_exit_aes_loop);

// encrypt aes128
__ bind(L_aes128_loop);
for (int i = 1; i < 10; i++) {
__ vaesem_vs(v24, working_vregs[i]);
}
__ vaesef_vs(v24, working_vregs[10]);
__ j(L_exit_aes_loop);

// encrypt aes192
__ bind(L_aes192_loop);
for (int i = 1; i < 12; i++) {
__ vaesem_vs(v24, working_vregs[i]);
}
__ vaesef_vs(v24, working_vregs[12]);
__ bind(L_exit_aes_loop);

__ vxor_vv(v24, v24, v20);
__ vse32_v(v24, out);
__ add(out, out, t0);

__ bnez(len, L_loop);

// save key and used
__ vsetivli(x0, 4, Assembler::e32, Assembler::m1);
__ blt(vlen, keysize, L_savekey);
__ subi(vlen, vlen, 16);
__ vmv_v_v(v16, v17);
__ blt(vlen, keysize, L_savekey);
__ subi(vlen, vlen, 16);
__ vmv_v_v(v16, v18);
__ blt(vlen, keysize, L_savekey);
__ subi(vlen, vlen, 16);
__ vmv_v_v(v16, v19);

__ bind(L_savekey);
__ vse32_v(v16, saved_encrypted_ctr);
__ mv(used, vlen);
__ j(L_exit);

// used key or len lower than 16 Byte
__ bind(L_tail);
__ beqz(len, L_exit);
__ add(rscratch2, saved_encrypted_ctr, used);
__ ld(rscratch1, Address(rscratch2));
__ ld(rscratch2, Address(in));
__ xorr(rscratch1, rscratch2, rscratch1);
__ sd(rscratch1, Address(out));
__ addi(in, in, 1);
__ addi(out, out, 1);
__ addi(used, used, 1);
__ subi(len, len, 1);
__ blt(used, keysize, L_tail);
__ j(L_ctr_group_loop);

__ bind(L_exit);
__ sd(used, Address(used_ptr));
__ mv(x10, input_len);
__ leave();
__ ret();

return start;
}

// code for comparing 8 characters of strings with Latin1 and Utf16 encoding
void compare_string_8_x_LU(Register tmpL, Register tmpU,
Register strL, Register strU, Label& DIFF) {
Expand Down Expand Up @@ -6763,6 +6949,10 @@ static const int64_t right_3_bits = right_n_bits(3);
if (UseAESIntrinsics) {
StubRoutines::_aescrypt_encryptBlock = generate_aescrypt_encryptBlock();
StubRoutines::_aescrypt_decryptBlock = generate_aescrypt_decryptBlock();

if (UseAESCTRIntrinsics) {
StubRoutines::_counterMode_AESCrypt = generate_counterMode_AESCrypt();
}
}

if (UsePoly1305Intrinsics) {
Expand Down
5 changes: 0 additions & 5 deletions src/hotspot/cpu/riscv/vm_version_riscv.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -452,11 +452,6 @@ void VM_Version::c2_initialize() {
}
}

if (UseAESCTRIntrinsics) {
warning("AES/CTR intrinsics are not available on this CPU");
FLAG_SET_DEFAULT(UseAESCTRIntrinsics, false);
}

if (FLAG_IS_DEFAULT(AlignVector)) {
FLAG_SET_DEFAULT(AlignVector, AvoidUnalignedAccesses);
}
Expand Down