mirror of
https://github.com/CloverHackyColor/CloverBootloader.git
synced 2025-02-02 22:51:28 +01:00
1847 lines
51 KiB
NASM
1847 lines
51 KiB
NASM
default rel
|
|
%define XMMWORD
|
|
%define YMMWORD
|
|
%define ZMMWORD
|
|
section .text code align=64
|
|
|
|
|
|
EXTERN OPENSSL_ia32cap_P
|
|
|
|
global aesni_multi_cbc_encrypt
|
|
|
|
ALIGN 32
|
|
aesni_multi_cbc_encrypt:
|
|
mov QWORD[8+rsp],rdi ;WIN64 prologue
|
|
mov QWORD[16+rsp],rsi
|
|
mov rax,rsp
|
|
$L$SEH_begin_aesni_multi_cbc_encrypt:
|
|
mov rdi,rcx
|
|
mov rsi,rdx
|
|
mov rdx,r8
|
|
|
|
|
|
|
|
cmp edx,2
|
|
jb NEAR $L$enc_non_avx
|
|
mov ecx,DWORD[((OPENSSL_ia32cap_P+4))]
|
|
test ecx,268435456
|
|
jnz NEAR _avx_cbc_enc_shortcut
|
|
jmp NEAR $L$enc_non_avx
|
|
ALIGN 16
|
|
$L$enc_non_avx:
|
|
mov rax,rsp
|
|
|
|
push rbx
|
|
|
|
push rbp
|
|
|
|
push r12
|
|
|
|
push r13
|
|
|
|
push r14
|
|
|
|
push r15
|
|
|
|
lea rsp,[((-168))+rsp]
|
|
movaps XMMWORD[rsp],xmm6
|
|
movaps XMMWORD[16+rsp],xmm7
|
|
movaps XMMWORD[32+rsp],xmm8
|
|
movaps XMMWORD[48+rsp],xmm9
|
|
movaps XMMWORD[64+rsp],xmm10
|
|
movaps XMMWORD[80+rsp],xmm11
|
|
movaps XMMWORD[96+rsp],xmm12
|
|
movaps XMMWORD[(-104)+rax],xmm13
|
|
movaps XMMWORD[(-88)+rax],xmm14
|
|
movaps XMMWORD[(-72)+rax],xmm15
|
|
|
|
|
|
|
|
|
|
|
|
|
|
sub rsp,48
|
|
and rsp,-64
|
|
mov QWORD[16+rsp],rax
|
|
|
|
|
|
$L$enc4x_body:
|
|
movdqu xmm12,XMMWORD[rsi]
|
|
lea rsi,[120+rsi]
|
|
lea rdi,[80+rdi]
|
|
|
|
$L$enc4x_loop_grande:
|
|
mov DWORD[24+rsp],edx
|
|
xor edx,edx
|
|
|
|
mov ecx,DWORD[((-64))+rdi]
|
|
mov r8,QWORD[((-80))+rdi]
|
|
cmp ecx,edx
|
|
mov r12,QWORD[((-72))+rdi]
|
|
cmovg edx,ecx
|
|
test ecx,ecx
|
|
|
|
movdqu xmm2,XMMWORD[((-56))+rdi]
|
|
mov DWORD[32+rsp],ecx
|
|
cmovle r8,rsp
|
|
|
|
mov ecx,DWORD[((-24))+rdi]
|
|
mov r9,QWORD[((-40))+rdi]
|
|
cmp ecx,edx
|
|
mov r13,QWORD[((-32))+rdi]
|
|
cmovg edx,ecx
|
|
test ecx,ecx
|
|
|
|
movdqu xmm3,XMMWORD[((-16))+rdi]
|
|
mov DWORD[36+rsp],ecx
|
|
cmovle r9,rsp
|
|
|
|
mov ecx,DWORD[16+rdi]
|
|
mov r10,QWORD[rdi]
|
|
cmp ecx,edx
|
|
mov r14,QWORD[8+rdi]
|
|
cmovg edx,ecx
|
|
test ecx,ecx
|
|
|
|
movdqu xmm4,XMMWORD[24+rdi]
|
|
mov DWORD[40+rsp],ecx
|
|
cmovle r10,rsp
|
|
|
|
mov ecx,DWORD[56+rdi]
|
|
mov r11,QWORD[40+rdi]
|
|
cmp ecx,edx
|
|
mov r15,QWORD[48+rdi]
|
|
cmovg edx,ecx
|
|
test ecx,ecx
|
|
|
|
movdqu xmm5,XMMWORD[64+rdi]
|
|
mov DWORD[44+rsp],ecx
|
|
cmovle r11,rsp
|
|
test edx,edx
|
|
jz NEAR $L$enc4x_done
|
|
|
|
movups xmm1,XMMWORD[((16-120))+rsi]
|
|
pxor xmm2,xmm12
|
|
movups xmm0,XMMWORD[((32-120))+rsi]
|
|
pxor xmm3,xmm12
|
|
mov eax,DWORD[((240-120))+rsi]
|
|
pxor xmm4,xmm12
|
|
movdqu xmm6,XMMWORD[r8]
|
|
pxor xmm5,xmm12
|
|
movdqu xmm7,XMMWORD[r9]
|
|
pxor xmm2,xmm6
|
|
movdqu xmm8,XMMWORD[r10]
|
|
pxor xmm3,xmm7
|
|
movdqu xmm9,XMMWORD[r11]
|
|
pxor xmm4,xmm8
|
|
pxor xmm5,xmm9
|
|
movdqa xmm10,XMMWORD[32+rsp]
|
|
xor rbx,rbx
|
|
jmp NEAR $L$oop_enc4x
|
|
|
|
ALIGN 32
|
|
$L$oop_enc4x:
|
|
add rbx,16
|
|
lea rbp,[16+rsp]
|
|
mov ecx,1
|
|
sub rbp,rbx
|
|
|
|
DB 102,15,56,220,209
|
|
prefetcht0 [31+rbx*1+r8]
|
|
prefetcht0 [31+rbx*1+r9]
|
|
DB 102,15,56,220,217
|
|
prefetcht0 [31+rbx*1+r10]
|
|
prefetcht0 [31+rbx*1+r10]
|
|
DB 102,15,56,220,225
|
|
DB 102,15,56,220,233
|
|
movups xmm1,XMMWORD[((48-120))+rsi]
|
|
cmp ecx,DWORD[32+rsp]
|
|
DB 102,15,56,220,208
|
|
DB 102,15,56,220,216
|
|
DB 102,15,56,220,224
|
|
cmovge r8,rbp
|
|
cmovg r12,rbp
|
|
DB 102,15,56,220,232
|
|
movups xmm0,XMMWORD[((-56))+rsi]
|
|
cmp ecx,DWORD[36+rsp]
|
|
DB 102,15,56,220,209
|
|
DB 102,15,56,220,217
|
|
DB 102,15,56,220,225
|
|
cmovge r9,rbp
|
|
cmovg r13,rbp
|
|
DB 102,15,56,220,233
|
|
movups xmm1,XMMWORD[((-40))+rsi]
|
|
cmp ecx,DWORD[40+rsp]
|
|
DB 102,15,56,220,208
|
|
DB 102,15,56,220,216
|
|
DB 102,15,56,220,224
|
|
cmovge r10,rbp
|
|
cmovg r14,rbp
|
|
DB 102,15,56,220,232
|
|
movups xmm0,XMMWORD[((-24))+rsi]
|
|
cmp ecx,DWORD[44+rsp]
|
|
DB 102,15,56,220,209
|
|
DB 102,15,56,220,217
|
|
DB 102,15,56,220,225
|
|
cmovge r11,rbp
|
|
cmovg r15,rbp
|
|
DB 102,15,56,220,233
|
|
movups xmm1,XMMWORD[((-8))+rsi]
|
|
movdqa xmm11,xmm10
|
|
DB 102,15,56,220,208
|
|
prefetcht0 [15+rbx*1+r12]
|
|
prefetcht0 [15+rbx*1+r13]
|
|
DB 102,15,56,220,216
|
|
prefetcht0 [15+rbx*1+r14]
|
|
prefetcht0 [15+rbx*1+r15]
|
|
DB 102,15,56,220,224
|
|
DB 102,15,56,220,232
|
|
movups xmm0,XMMWORD[((128-120))+rsi]
|
|
pxor xmm12,xmm12
|
|
|
|
DB 102,15,56,220,209
|
|
pcmpgtd xmm11,xmm12
|
|
movdqu xmm12,XMMWORD[((-120))+rsi]
|
|
DB 102,15,56,220,217
|
|
paddd xmm10,xmm11
|
|
movdqa XMMWORD[32+rsp],xmm10
|
|
DB 102,15,56,220,225
|
|
DB 102,15,56,220,233
|
|
movups xmm1,XMMWORD[((144-120))+rsi]
|
|
|
|
cmp eax,11
|
|
|
|
DB 102,15,56,220,208
|
|
DB 102,15,56,220,216
|
|
DB 102,15,56,220,224
|
|
DB 102,15,56,220,232
|
|
movups xmm0,XMMWORD[((160-120))+rsi]
|
|
|
|
jb NEAR $L$enc4x_tail
|
|
|
|
DB 102,15,56,220,209
|
|
DB 102,15,56,220,217
|
|
DB 102,15,56,220,225
|
|
DB 102,15,56,220,233
|
|
movups xmm1,XMMWORD[((176-120))+rsi]
|
|
|
|
DB 102,15,56,220,208
|
|
DB 102,15,56,220,216
|
|
DB 102,15,56,220,224
|
|
DB 102,15,56,220,232
|
|
movups xmm0,XMMWORD[((192-120))+rsi]
|
|
|
|
je NEAR $L$enc4x_tail
|
|
|
|
DB 102,15,56,220,209
|
|
DB 102,15,56,220,217
|
|
DB 102,15,56,220,225
|
|
DB 102,15,56,220,233
|
|
movups xmm1,XMMWORD[((208-120))+rsi]
|
|
|
|
DB 102,15,56,220,208
|
|
DB 102,15,56,220,216
|
|
DB 102,15,56,220,224
|
|
DB 102,15,56,220,232
|
|
movups xmm0,XMMWORD[((224-120))+rsi]
|
|
jmp NEAR $L$enc4x_tail
|
|
|
|
ALIGN 32
|
|
$L$enc4x_tail:
|
|
DB 102,15,56,220,209
|
|
DB 102,15,56,220,217
|
|
DB 102,15,56,220,225
|
|
DB 102,15,56,220,233
|
|
movdqu xmm6,XMMWORD[rbx*1+r8]
|
|
movdqu xmm1,XMMWORD[((16-120))+rsi]
|
|
|
|
DB 102,15,56,221,208
|
|
movdqu xmm7,XMMWORD[rbx*1+r9]
|
|
pxor xmm6,xmm12
|
|
DB 102,15,56,221,216
|
|
movdqu xmm8,XMMWORD[rbx*1+r10]
|
|
pxor xmm7,xmm12
|
|
DB 102,15,56,221,224
|
|
movdqu xmm9,XMMWORD[rbx*1+r11]
|
|
pxor xmm8,xmm12
|
|
DB 102,15,56,221,232
|
|
movdqu xmm0,XMMWORD[((32-120))+rsi]
|
|
pxor xmm9,xmm12
|
|
|
|
movups XMMWORD[(-16)+rbx*1+r12],xmm2
|
|
pxor xmm2,xmm6
|
|
movups XMMWORD[(-16)+rbx*1+r13],xmm3
|
|
pxor xmm3,xmm7
|
|
movups XMMWORD[(-16)+rbx*1+r14],xmm4
|
|
pxor xmm4,xmm8
|
|
movups XMMWORD[(-16)+rbx*1+r15],xmm5
|
|
pxor xmm5,xmm9
|
|
|
|
dec edx
|
|
jnz NEAR $L$oop_enc4x
|
|
|
|
mov rax,QWORD[16+rsp]
|
|
|
|
mov edx,DWORD[24+rsp]
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
lea rdi,[160+rdi]
|
|
dec edx
|
|
jnz NEAR $L$enc4x_loop_grande
|
|
|
|
$L$enc4x_done:
|
|
movaps xmm6,XMMWORD[((-216))+rax]
|
|
movaps xmm7,XMMWORD[((-200))+rax]
|
|
movaps xmm8,XMMWORD[((-184))+rax]
|
|
movaps xmm9,XMMWORD[((-168))+rax]
|
|
movaps xmm10,XMMWORD[((-152))+rax]
|
|
movaps xmm11,XMMWORD[((-136))+rax]
|
|
movaps xmm12,XMMWORD[((-120))+rax]
|
|
|
|
|
|
|
|
mov r15,QWORD[((-48))+rax]
|
|
|
|
mov r14,QWORD[((-40))+rax]
|
|
|
|
mov r13,QWORD[((-32))+rax]
|
|
|
|
mov r12,QWORD[((-24))+rax]
|
|
|
|
mov rbp,QWORD[((-16))+rax]
|
|
|
|
mov rbx,QWORD[((-8))+rax]
|
|
|
|
lea rsp,[rax]
|
|
|
|
$L$enc4x_epilogue:
|
|
mov rdi,QWORD[8+rsp] ;WIN64 epilogue
|
|
mov rsi,QWORD[16+rsp]
|
|
DB 0F3h,0C3h ;repret
|
|
|
|
$L$SEH_end_aesni_multi_cbc_encrypt:
|
|
|
|
global aesni_multi_cbc_decrypt
|
|
|
|
ALIGN 32
|
|
aesni_multi_cbc_decrypt:
|
|
mov QWORD[8+rsp],rdi ;WIN64 prologue
|
|
mov QWORD[16+rsp],rsi
|
|
mov rax,rsp
|
|
$L$SEH_begin_aesni_multi_cbc_decrypt:
|
|
mov rdi,rcx
|
|
mov rsi,rdx
|
|
mov rdx,r8
|
|
|
|
|
|
|
|
cmp edx,2
|
|
jb NEAR $L$dec_non_avx
|
|
mov ecx,DWORD[((OPENSSL_ia32cap_P+4))]
|
|
test ecx,268435456
|
|
jnz NEAR _avx_cbc_dec_shortcut
|
|
jmp NEAR $L$dec_non_avx
|
|
ALIGN 16
|
|
$L$dec_non_avx:
|
|
mov rax,rsp
|
|
|
|
push rbx
|
|
|
|
push rbp
|
|
|
|
push r12
|
|
|
|
push r13
|
|
|
|
push r14
|
|
|
|
push r15
|
|
|
|
lea rsp,[((-168))+rsp]
|
|
movaps XMMWORD[rsp],xmm6
|
|
movaps XMMWORD[16+rsp],xmm7
|
|
movaps XMMWORD[32+rsp],xmm8
|
|
movaps XMMWORD[48+rsp],xmm9
|
|
movaps XMMWORD[64+rsp],xmm10
|
|
movaps XMMWORD[80+rsp],xmm11
|
|
movaps XMMWORD[96+rsp],xmm12
|
|
movaps XMMWORD[(-104)+rax],xmm13
|
|
movaps XMMWORD[(-88)+rax],xmm14
|
|
movaps XMMWORD[(-72)+rax],xmm15
|
|
|
|
|
|
|
|
|
|
|
|
|
|
sub rsp,48
|
|
and rsp,-64
|
|
mov QWORD[16+rsp],rax
|
|
|
|
|
|
$L$dec4x_body:
|
|
movdqu xmm12,XMMWORD[rsi]
|
|
lea rsi,[120+rsi]
|
|
lea rdi,[80+rdi]
|
|
|
|
$L$dec4x_loop_grande:
|
|
mov DWORD[24+rsp],edx
|
|
xor edx,edx
|
|
|
|
mov ecx,DWORD[((-64))+rdi]
|
|
mov r8,QWORD[((-80))+rdi]
|
|
cmp ecx,edx
|
|
mov r12,QWORD[((-72))+rdi]
|
|
cmovg edx,ecx
|
|
test ecx,ecx
|
|
|
|
movdqu xmm6,XMMWORD[((-56))+rdi]
|
|
mov DWORD[32+rsp],ecx
|
|
cmovle r8,rsp
|
|
|
|
mov ecx,DWORD[((-24))+rdi]
|
|
mov r9,QWORD[((-40))+rdi]
|
|
cmp ecx,edx
|
|
mov r13,QWORD[((-32))+rdi]
|
|
cmovg edx,ecx
|
|
test ecx,ecx
|
|
|
|
movdqu xmm7,XMMWORD[((-16))+rdi]
|
|
mov DWORD[36+rsp],ecx
|
|
cmovle r9,rsp
|
|
|
|
mov ecx,DWORD[16+rdi]
|
|
mov r10,QWORD[rdi]
|
|
cmp ecx,edx
|
|
mov r14,QWORD[8+rdi]
|
|
cmovg edx,ecx
|
|
test ecx,ecx
|
|
|
|
movdqu xmm8,XMMWORD[24+rdi]
|
|
mov DWORD[40+rsp],ecx
|
|
cmovle r10,rsp
|
|
|
|
mov ecx,DWORD[56+rdi]
|
|
mov r11,QWORD[40+rdi]
|
|
cmp ecx,edx
|
|
mov r15,QWORD[48+rdi]
|
|
cmovg edx,ecx
|
|
test ecx,ecx
|
|
|
|
movdqu xmm9,XMMWORD[64+rdi]
|
|
mov DWORD[44+rsp],ecx
|
|
cmovle r11,rsp
|
|
test edx,edx
|
|
jz NEAR $L$dec4x_done
|
|
|
|
movups xmm1,XMMWORD[((16-120))+rsi]
|
|
movups xmm0,XMMWORD[((32-120))+rsi]
|
|
mov eax,DWORD[((240-120))+rsi]
|
|
movdqu xmm2,XMMWORD[r8]
|
|
movdqu xmm3,XMMWORD[r9]
|
|
pxor xmm2,xmm12
|
|
movdqu xmm4,XMMWORD[r10]
|
|
pxor xmm3,xmm12
|
|
movdqu xmm5,XMMWORD[r11]
|
|
pxor xmm4,xmm12
|
|
pxor xmm5,xmm12
|
|
movdqa xmm10,XMMWORD[32+rsp]
|
|
xor rbx,rbx
|
|
jmp NEAR $L$oop_dec4x
|
|
|
|
ALIGN 32
|
|
$L$oop_dec4x:
|
|
add rbx,16
|
|
lea rbp,[16+rsp]
|
|
mov ecx,1
|
|
sub rbp,rbx
|
|
|
|
DB 102,15,56,222,209
|
|
prefetcht0 [31+rbx*1+r8]
|
|
prefetcht0 [31+rbx*1+r9]
|
|
DB 102,15,56,222,217
|
|
prefetcht0 [31+rbx*1+r10]
|
|
prefetcht0 [31+rbx*1+r11]
|
|
DB 102,15,56,222,225
|
|
DB 102,15,56,222,233
|
|
movups xmm1,XMMWORD[((48-120))+rsi]
|
|
cmp ecx,DWORD[32+rsp]
|
|
DB 102,15,56,222,208
|
|
DB 102,15,56,222,216
|
|
DB 102,15,56,222,224
|
|
cmovge r8,rbp
|
|
cmovg r12,rbp
|
|
DB 102,15,56,222,232
|
|
movups xmm0,XMMWORD[((-56))+rsi]
|
|
cmp ecx,DWORD[36+rsp]
|
|
DB 102,15,56,222,209
|
|
DB 102,15,56,222,217
|
|
DB 102,15,56,222,225
|
|
cmovge r9,rbp
|
|
cmovg r13,rbp
|
|
DB 102,15,56,222,233
|
|
movups xmm1,XMMWORD[((-40))+rsi]
|
|
cmp ecx,DWORD[40+rsp]
|
|
DB 102,15,56,222,208
|
|
DB 102,15,56,222,216
|
|
DB 102,15,56,222,224
|
|
cmovge r10,rbp
|
|
cmovg r14,rbp
|
|
DB 102,15,56,222,232
|
|
movups xmm0,XMMWORD[((-24))+rsi]
|
|
cmp ecx,DWORD[44+rsp]
|
|
DB 102,15,56,222,209
|
|
DB 102,15,56,222,217
|
|
DB 102,15,56,222,225
|
|
cmovge r11,rbp
|
|
cmovg r15,rbp
|
|
DB 102,15,56,222,233
|
|
movups xmm1,XMMWORD[((-8))+rsi]
|
|
movdqa xmm11,xmm10
|
|
DB 102,15,56,222,208
|
|
prefetcht0 [15+rbx*1+r12]
|
|
prefetcht0 [15+rbx*1+r13]
|
|
DB 102,15,56,222,216
|
|
prefetcht0 [15+rbx*1+r14]
|
|
prefetcht0 [15+rbx*1+r15]
|
|
DB 102,15,56,222,224
|
|
DB 102,15,56,222,232
|
|
movups xmm0,XMMWORD[((128-120))+rsi]
|
|
pxor xmm12,xmm12
|
|
|
|
DB 102,15,56,222,209
|
|
pcmpgtd xmm11,xmm12
|
|
movdqu xmm12,XMMWORD[((-120))+rsi]
|
|
DB 102,15,56,222,217
|
|
paddd xmm10,xmm11
|
|
movdqa XMMWORD[32+rsp],xmm10
|
|
DB 102,15,56,222,225
|
|
DB 102,15,56,222,233
|
|
movups xmm1,XMMWORD[((144-120))+rsi]
|
|
|
|
cmp eax,11
|
|
|
|
DB 102,15,56,222,208
|
|
DB 102,15,56,222,216
|
|
DB 102,15,56,222,224
|
|
DB 102,15,56,222,232
|
|
movups xmm0,XMMWORD[((160-120))+rsi]
|
|
|
|
jb NEAR $L$dec4x_tail
|
|
|
|
DB 102,15,56,222,209
|
|
DB 102,15,56,222,217
|
|
DB 102,15,56,222,225
|
|
DB 102,15,56,222,233
|
|
movups xmm1,XMMWORD[((176-120))+rsi]
|
|
|
|
DB 102,15,56,222,208
|
|
DB 102,15,56,222,216
|
|
DB 102,15,56,222,224
|
|
DB 102,15,56,222,232
|
|
movups xmm0,XMMWORD[((192-120))+rsi]
|
|
|
|
je NEAR $L$dec4x_tail
|
|
|
|
DB 102,15,56,222,209
|
|
DB 102,15,56,222,217
|
|
DB 102,15,56,222,225
|
|
DB 102,15,56,222,233
|
|
movups xmm1,XMMWORD[((208-120))+rsi]
|
|
|
|
DB 102,15,56,222,208
|
|
DB 102,15,56,222,216
|
|
DB 102,15,56,222,224
|
|
DB 102,15,56,222,232
|
|
movups xmm0,XMMWORD[((224-120))+rsi]
|
|
jmp NEAR $L$dec4x_tail
|
|
|
|
ALIGN 32
|
|
$L$dec4x_tail:
|
|
DB 102,15,56,222,209
|
|
DB 102,15,56,222,217
|
|
DB 102,15,56,222,225
|
|
pxor xmm6,xmm0
|
|
pxor xmm7,xmm0
|
|
DB 102,15,56,222,233
|
|
movdqu xmm1,XMMWORD[((16-120))+rsi]
|
|
pxor xmm8,xmm0
|
|
pxor xmm9,xmm0
|
|
movdqu xmm0,XMMWORD[((32-120))+rsi]
|
|
|
|
DB 102,15,56,223,214
|
|
DB 102,15,56,223,223
|
|
movdqu xmm6,XMMWORD[((-16))+rbx*1+r8]
|
|
movdqu xmm7,XMMWORD[((-16))+rbx*1+r9]
|
|
DB 102,65,15,56,223,224
|
|
DB 102,65,15,56,223,233
|
|
movdqu xmm8,XMMWORD[((-16))+rbx*1+r10]
|
|
movdqu xmm9,XMMWORD[((-16))+rbx*1+r11]
|
|
|
|
movups XMMWORD[(-16)+rbx*1+r12],xmm2
|
|
movdqu xmm2,XMMWORD[rbx*1+r8]
|
|
movups XMMWORD[(-16)+rbx*1+r13],xmm3
|
|
movdqu xmm3,XMMWORD[rbx*1+r9]
|
|
pxor xmm2,xmm12
|
|
movups XMMWORD[(-16)+rbx*1+r14],xmm4
|
|
movdqu xmm4,XMMWORD[rbx*1+r10]
|
|
pxor xmm3,xmm12
|
|
movups XMMWORD[(-16)+rbx*1+r15],xmm5
|
|
movdqu xmm5,XMMWORD[rbx*1+r11]
|
|
pxor xmm4,xmm12
|
|
pxor xmm5,xmm12
|
|
|
|
dec edx
|
|
jnz NEAR $L$oop_dec4x
|
|
|
|
mov rax,QWORD[16+rsp]
|
|
|
|
mov edx,DWORD[24+rsp]
|
|
|
|
lea rdi,[160+rdi]
|
|
dec edx
|
|
jnz NEAR $L$dec4x_loop_grande
|
|
|
|
$L$dec4x_done:
|
|
movaps xmm6,XMMWORD[((-216))+rax]
|
|
movaps xmm7,XMMWORD[((-200))+rax]
|
|
movaps xmm8,XMMWORD[((-184))+rax]
|
|
movaps xmm9,XMMWORD[((-168))+rax]
|
|
movaps xmm10,XMMWORD[((-152))+rax]
|
|
movaps xmm11,XMMWORD[((-136))+rax]
|
|
movaps xmm12,XMMWORD[((-120))+rax]
|
|
|
|
|
|
|
|
mov r15,QWORD[((-48))+rax]
|
|
|
|
mov r14,QWORD[((-40))+rax]
|
|
|
|
mov r13,QWORD[((-32))+rax]
|
|
|
|
mov r12,QWORD[((-24))+rax]
|
|
|
|
mov rbp,QWORD[((-16))+rax]
|
|
|
|
mov rbx,QWORD[((-8))+rax]
|
|
|
|
lea rsp,[rax]
|
|
|
|
$L$dec4x_epilogue:
|
|
mov rdi,QWORD[8+rsp] ;WIN64 epilogue
|
|
mov rsi,QWORD[16+rsp]
|
|
DB 0F3h,0C3h ;repret
|
|
|
|
$L$SEH_end_aesni_multi_cbc_decrypt:
|
|
|
|
ALIGN 32
|
|
aesni_multi_cbc_encrypt_avx:
|
|
mov QWORD[8+rsp],rdi ;WIN64 prologue
|
|
mov QWORD[16+rsp],rsi
|
|
mov rax,rsp
|
|
$L$SEH_begin_aesni_multi_cbc_encrypt_avx:
|
|
mov rdi,rcx
|
|
mov rsi,rdx
|
|
mov rdx,r8
|
|
|
|
|
|
|
|
_avx_cbc_enc_shortcut:
|
|
mov rax,rsp
|
|
|
|
push rbx
|
|
|
|
push rbp
|
|
|
|
push r12
|
|
|
|
push r13
|
|
|
|
push r14
|
|
|
|
push r15
|
|
|
|
lea rsp,[((-168))+rsp]
|
|
movaps XMMWORD[rsp],xmm6
|
|
movaps XMMWORD[16+rsp],xmm7
|
|
movaps XMMWORD[32+rsp],xmm8
|
|
movaps XMMWORD[48+rsp],xmm9
|
|
movaps XMMWORD[64+rsp],xmm10
|
|
movaps XMMWORD[80+rsp],xmm11
|
|
movaps XMMWORD[(-120)+rax],xmm12
|
|
movaps XMMWORD[(-104)+rax],xmm13
|
|
movaps XMMWORD[(-88)+rax],xmm14
|
|
movaps XMMWORD[(-72)+rax],xmm15
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
sub rsp,192
|
|
and rsp,-128
|
|
mov QWORD[16+rsp],rax
|
|
|
|
|
|
$L$enc8x_body:
|
|
vzeroupper
|
|
vmovdqu xmm15,XMMWORD[rsi]
|
|
lea rsi,[120+rsi]
|
|
lea rdi,[160+rdi]
|
|
shr edx,1
|
|
|
|
$L$enc8x_loop_grande:
|
|
|
|
xor edx,edx
|
|
|
|
mov ecx,DWORD[((-144))+rdi]
|
|
|
|
mov r8,QWORD[((-160))+rdi]
|
|
cmp ecx,edx
|
|
|
|
mov rbx,QWORD[((-152))+rdi]
|
|
cmovg edx,ecx
|
|
test ecx,ecx
|
|
|
|
vmovdqu xmm2,XMMWORD[((-136))+rdi]
|
|
mov DWORD[32+rsp],ecx
|
|
cmovle r8,rsp
|
|
sub rbx,r8
|
|
mov QWORD[64+rsp],rbx
|
|
|
|
mov ecx,DWORD[((-104))+rdi]
|
|
|
|
mov r9,QWORD[((-120))+rdi]
|
|
cmp ecx,edx
|
|
|
|
mov rbp,QWORD[((-112))+rdi]
|
|
cmovg edx,ecx
|
|
test ecx,ecx
|
|
|
|
vmovdqu xmm3,XMMWORD[((-96))+rdi]
|
|
mov DWORD[36+rsp],ecx
|
|
cmovle r9,rsp
|
|
sub rbp,r9
|
|
mov QWORD[72+rsp],rbp
|
|
|
|
mov ecx,DWORD[((-64))+rdi]
|
|
|
|
mov r10,QWORD[((-80))+rdi]
|
|
cmp ecx,edx
|
|
|
|
mov rbp,QWORD[((-72))+rdi]
|
|
cmovg edx,ecx
|
|
test ecx,ecx
|
|
|
|
vmovdqu xmm4,XMMWORD[((-56))+rdi]
|
|
mov DWORD[40+rsp],ecx
|
|
cmovle r10,rsp
|
|
sub rbp,r10
|
|
mov QWORD[80+rsp],rbp
|
|
|
|
mov ecx,DWORD[((-24))+rdi]
|
|
|
|
mov r11,QWORD[((-40))+rdi]
|
|
cmp ecx,edx
|
|
|
|
mov rbp,QWORD[((-32))+rdi]
|
|
cmovg edx,ecx
|
|
test ecx,ecx
|
|
|
|
vmovdqu xmm5,XMMWORD[((-16))+rdi]
|
|
mov DWORD[44+rsp],ecx
|
|
cmovle r11,rsp
|
|
sub rbp,r11
|
|
mov QWORD[88+rsp],rbp
|
|
|
|
mov ecx,DWORD[16+rdi]
|
|
|
|
mov r12,QWORD[rdi]
|
|
cmp ecx,edx
|
|
|
|
mov rbp,QWORD[8+rdi]
|
|
cmovg edx,ecx
|
|
test ecx,ecx
|
|
|
|
vmovdqu xmm6,XMMWORD[24+rdi]
|
|
mov DWORD[48+rsp],ecx
|
|
cmovle r12,rsp
|
|
sub rbp,r12
|
|
mov QWORD[96+rsp],rbp
|
|
|
|
mov ecx,DWORD[56+rdi]
|
|
|
|
mov r13,QWORD[40+rdi]
|
|
cmp ecx,edx
|
|
|
|
mov rbp,QWORD[48+rdi]
|
|
cmovg edx,ecx
|
|
test ecx,ecx
|
|
|
|
vmovdqu xmm7,XMMWORD[64+rdi]
|
|
mov DWORD[52+rsp],ecx
|
|
cmovle r13,rsp
|
|
sub rbp,r13
|
|
mov QWORD[104+rsp],rbp
|
|
|
|
mov ecx,DWORD[96+rdi]
|
|
|
|
mov r14,QWORD[80+rdi]
|
|
cmp ecx,edx
|
|
|
|
mov rbp,QWORD[88+rdi]
|
|
cmovg edx,ecx
|
|
test ecx,ecx
|
|
|
|
vmovdqu xmm8,XMMWORD[104+rdi]
|
|
mov DWORD[56+rsp],ecx
|
|
cmovle r14,rsp
|
|
sub rbp,r14
|
|
mov QWORD[112+rsp],rbp
|
|
|
|
mov ecx,DWORD[136+rdi]
|
|
|
|
mov r15,QWORD[120+rdi]
|
|
cmp ecx,edx
|
|
|
|
mov rbp,QWORD[128+rdi]
|
|
cmovg edx,ecx
|
|
test ecx,ecx
|
|
|
|
vmovdqu xmm9,XMMWORD[144+rdi]
|
|
mov DWORD[60+rsp],ecx
|
|
cmovle r15,rsp
|
|
sub rbp,r15
|
|
mov QWORD[120+rsp],rbp
|
|
test edx,edx
|
|
jz NEAR $L$enc8x_done
|
|
|
|
vmovups xmm1,XMMWORD[((16-120))+rsi]
|
|
vmovups xmm0,XMMWORD[((32-120))+rsi]
|
|
mov eax,DWORD[((240-120))+rsi]
|
|
|
|
vpxor xmm10,xmm15,XMMWORD[r8]
|
|
lea rbp,[128+rsp]
|
|
vpxor xmm11,xmm15,XMMWORD[r9]
|
|
vpxor xmm12,xmm15,XMMWORD[r10]
|
|
vpxor xmm13,xmm15,XMMWORD[r11]
|
|
vpxor xmm2,xmm2,xmm10
|
|
vpxor xmm10,xmm15,XMMWORD[r12]
|
|
vpxor xmm3,xmm3,xmm11
|
|
vpxor xmm11,xmm15,XMMWORD[r13]
|
|
vpxor xmm4,xmm4,xmm12
|
|
vpxor xmm12,xmm15,XMMWORD[r14]
|
|
vpxor xmm5,xmm5,xmm13
|
|
vpxor xmm13,xmm15,XMMWORD[r15]
|
|
vpxor xmm6,xmm6,xmm10
|
|
mov ecx,1
|
|
vpxor xmm7,xmm7,xmm11
|
|
vpxor xmm8,xmm8,xmm12
|
|
vpxor xmm9,xmm9,xmm13
|
|
jmp NEAR $L$oop_enc8x
|
|
|
|
ALIGN 32
|
|
$L$oop_enc8x:
|
|
vaesenc xmm2,xmm2,xmm1
|
|
cmp ecx,DWORD[((32+0))+rsp]
|
|
vaesenc xmm3,xmm3,xmm1
|
|
prefetcht0 [31+r8]
|
|
vaesenc xmm4,xmm4,xmm1
|
|
vaesenc xmm5,xmm5,xmm1
|
|
lea rbx,[rbx*1+r8]
|
|
cmovge r8,rsp
|
|
vaesenc xmm6,xmm6,xmm1
|
|
cmovg rbx,rsp
|
|
vaesenc xmm7,xmm7,xmm1
|
|
sub rbx,r8
|
|
vaesenc xmm8,xmm8,xmm1
|
|
vpxor xmm10,xmm15,XMMWORD[16+r8]
|
|
mov QWORD[((64+0))+rsp],rbx
|
|
vaesenc xmm9,xmm9,xmm1
|
|
vmovups xmm1,XMMWORD[((-72))+rsi]
|
|
lea r8,[16+rbx*1+r8]
|
|
vmovdqu XMMWORD[rbp],xmm10
|
|
vaesenc xmm2,xmm2,xmm0
|
|
cmp ecx,DWORD[((32+4))+rsp]
|
|
mov rbx,QWORD[((64+8))+rsp]
|
|
vaesenc xmm3,xmm3,xmm0
|
|
prefetcht0 [31+r9]
|
|
vaesenc xmm4,xmm4,xmm0
|
|
vaesenc xmm5,xmm5,xmm0
|
|
lea rbx,[rbx*1+r9]
|
|
cmovge r9,rsp
|
|
vaesenc xmm6,xmm6,xmm0
|
|
cmovg rbx,rsp
|
|
vaesenc xmm7,xmm7,xmm0
|
|
sub rbx,r9
|
|
vaesenc xmm8,xmm8,xmm0
|
|
vpxor xmm11,xmm15,XMMWORD[16+r9]
|
|
mov QWORD[((64+8))+rsp],rbx
|
|
vaesenc xmm9,xmm9,xmm0
|
|
vmovups xmm0,XMMWORD[((-56))+rsi]
|
|
lea r9,[16+rbx*1+r9]
|
|
vmovdqu XMMWORD[16+rbp],xmm11
|
|
vaesenc xmm2,xmm2,xmm1
|
|
cmp ecx,DWORD[((32+8))+rsp]
|
|
mov rbx,QWORD[((64+16))+rsp]
|
|
vaesenc xmm3,xmm3,xmm1
|
|
prefetcht0 [31+r10]
|
|
vaesenc xmm4,xmm4,xmm1
|
|
prefetcht0 [15+r8]
|
|
vaesenc xmm5,xmm5,xmm1
|
|
lea rbx,[rbx*1+r10]
|
|
cmovge r10,rsp
|
|
vaesenc xmm6,xmm6,xmm1
|
|
cmovg rbx,rsp
|
|
vaesenc xmm7,xmm7,xmm1
|
|
sub rbx,r10
|
|
vaesenc xmm8,xmm8,xmm1
|
|
vpxor xmm12,xmm15,XMMWORD[16+r10]
|
|
mov QWORD[((64+16))+rsp],rbx
|
|
vaesenc xmm9,xmm9,xmm1
|
|
vmovups xmm1,XMMWORD[((-40))+rsi]
|
|
lea r10,[16+rbx*1+r10]
|
|
vmovdqu XMMWORD[32+rbp],xmm12
|
|
vaesenc xmm2,xmm2,xmm0
|
|
cmp ecx,DWORD[((32+12))+rsp]
|
|
mov rbx,QWORD[((64+24))+rsp]
|
|
vaesenc xmm3,xmm3,xmm0
|
|
prefetcht0 [31+r11]
|
|
vaesenc xmm4,xmm4,xmm0
|
|
prefetcht0 [15+r9]
|
|
vaesenc xmm5,xmm5,xmm0
|
|
lea rbx,[rbx*1+r11]
|
|
cmovge r11,rsp
|
|
vaesenc xmm6,xmm6,xmm0
|
|
cmovg rbx,rsp
|
|
vaesenc xmm7,xmm7,xmm0
|
|
sub rbx,r11
|
|
vaesenc xmm8,xmm8,xmm0
|
|
vpxor xmm13,xmm15,XMMWORD[16+r11]
|
|
mov QWORD[((64+24))+rsp],rbx
|
|
vaesenc xmm9,xmm9,xmm0
|
|
vmovups xmm0,XMMWORD[((-24))+rsi]
|
|
lea r11,[16+rbx*1+r11]
|
|
vmovdqu XMMWORD[48+rbp],xmm13
|
|
vaesenc xmm2,xmm2,xmm1
|
|
cmp ecx,DWORD[((32+16))+rsp]
|
|
mov rbx,QWORD[((64+32))+rsp]
|
|
vaesenc xmm3,xmm3,xmm1
|
|
prefetcht0 [31+r12]
|
|
vaesenc xmm4,xmm4,xmm1
|
|
prefetcht0 [15+r10]
|
|
vaesenc xmm5,xmm5,xmm1
|
|
lea rbx,[rbx*1+r12]
|
|
cmovge r12,rsp
|
|
vaesenc xmm6,xmm6,xmm1
|
|
cmovg rbx,rsp
|
|
vaesenc xmm7,xmm7,xmm1
|
|
sub rbx,r12
|
|
vaesenc xmm8,xmm8,xmm1
|
|
vpxor xmm10,xmm15,XMMWORD[16+r12]
|
|
mov QWORD[((64+32))+rsp],rbx
|
|
vaesenc xmm9,xmm9,xmm1
|
|
vmovups xmm1,XMMWORD[((-8))+rsi]
|
|
lea r12,[16+rbx*1+r12]
|
|
vaesenc xmm2,xmm2,xmm0
|
|
cmp ecx,DWORD[((32+20))+rsp]
|
|
mov rbx,QWORD[((64+40))+rsp]
|
|
vaesenc xmm3,xmm3,xmm0
|
|
prefetcht0 [31+r13]
|
|
vaesenc xmm4,xmm4,xmm0
|
|
prefetcht0 [15+r11]
|
|
vaesenc xmm5,xmm5,xmm0
|
|
lea rbx,[r13*1+rbx]
|
|
cmovge r13,rsp
|
|
vaesenc xmm6,xmm6,xmm0
|
|
cmovg rbx,rsp
|
|
vaesenc xmm7,xmm7,xmm0
|
|
sub rbx,r13
|
|
vaesenc xmm8,xmm8,xmm0
|
|
vpxor xmm11,xmm15,XMMWORD[16+r13]
|
|
mov QWORD[((64+40))+rsp],rbx
|
|
vaesenc xmm9,xmm9,xmm0
|
|
vmovups xmm0,XMMWORD[8+rsi]
|
|
lea r13,[16+rbx*1+r13]
|
|
vaesenc xmm2,xmm2,xmm1
|
|
cmp ecx,DWORD[((32+24))+rsp]
|
|
mov rbx,QWORD[((64+48))+rsp]
|
|
vaesenc xmm3,xmm3,xmm1
|
|
prefetcht0 [31+r14]
|
|
vaesenc xmm4,xmm4,xmm1
|
|
prefetcht0 [15+r12]
|
|
vaesenc xmm5,xmm5,xmm1
|
|
lea rbx,[rbx*1+r14]
|
|
cmovge r14,rsp
|
|
vaesenc xmm6,xmm6,xmm1
|
|
cmovg rbx,rsp
|
|
vaesenc xmm7,xmm7,xmm1
|
|
sub rbx,r14
|
|
vaesenc xmm8,xmm8,xmm1
|
|
vpxor xmm12,xmm15,XMMWORD[16+r14]
|
|
mov QWORD[((64+48))+rsp],rbx
|
|
vaesenc xmm9,xmm9,xmm1
|
|
vmovups xmm1,XMMWORD[24+rsi]
|
|
lea r14,[16+rbx*1+r14]
|
|
vaesenc xmm2,xmm2,xmm0
|
|
cmp ecx,DWORD[((32+28))+rsp]
|
|
mov rbx,QWORD[((64+56))+rsp]
|
|
vaesenc xmm3,xmm3,xmm0
|
|
prefetcht0 [31+r15]
|
|
vaesenc xmm4,xmm4,xmm0
|
|
prefetcht0 [15+r13]
|
|
vaesenc xmm5,xmm5,xmm0
|
|
lea rbx,[rbx*1+r15]
|
|
cmovge r15,rsp
|
|
vaesenc xmm6,xmm6,xmm0
|
|
cmovg rbx,rsp
|
|
vaesenc xmm7,xmm7,xmm0
|
|
sub rbx,r15
|
|
vaesenc xmm8,xmm8,xmm0
|
|
vpxor xmm13,xmm15,XMMWORD[16+r15]
|
|
mov QWORD[((64+56))+rsp],rbx
|
|
vaesenc xmm9,xmm9,xmm0
|
|
vmovups xmm0,XMMWORD[40+rsi]
|
|
lea r15,[16+rbx*1+r15]
|
|
vmovdqu xmm14,XMMWORD[32+rsp]
|
|
prefetcht0 [15+r14]
|
|
prefetcht0 [15+r15]
|
|
cmp eax,11
|
|
jb NEAR $L$enc8x_tail
|
|
|
|
vaesenc xmm2,xmm2,xmm1
|
|
vaesenc xmm3,xmm3,xmm1
|
|
vaesenc xmm4,xmm4,xmm1
|
|
vaesenc xmm5,xmm5,xmm1
|
|
vaesenc xmm6,xmm6,xmm1
|
|
vaesenc xmm7,xmm7,xmm1
|
|
vaesenc xmm8,xmm8,xmm1
|
|
vaesenc xmm9,xmm9,xmm1
|
|
vmovups xmm1,XMMWORD[((176-120))+rsi]
|
|
|
|
vaesenc xmm2,xmm2,xmm0
|
|
vaesenc xmm3,xmm3,xmm0
|
|
vaesenc xmm4,xmm4,xmm0
|
|
vaesenc xmm5,xmm5,xmm0
|
|
vaesenc xmm6,xmm6,xmm0
|
|
vaesenc xmm7,xmm7,xmm0
|
|
vaesenc xmm8,xmm8,xmm0
|
|
vaesenc xmm9,xmm9,xmm0
|
|
vmovups xmm0,XMMWORD[((192-120))+rsi]
|
|
je NEAR $L$enc8x_tail
|
|
|
|
vaesenc xmm2,xmm2,xmm1
|
|
vaesenc xmm3,xmm3,xmm1
|
|
vaesenc xmm4,xmm4,xmm1
|
|
vaesenc xmm5,xmm5,xmm1
|
|
vaesenc xmm6,xmm6,xmm1
|
|
vaesenc xmm7,xmm7,xmm1
|
|
vaesenc xmm8,xmm8,xmm1
|
|
vaesenc xmm9,xmm9,xmm1
|
|
vmovups xmm1,XMMWORD[((208-120))+rsi]
|
|
|
|
vaesenc xmm2,xmm2,xmm0
|
|
vaesenc xmm3,xmm3,xmm0
|
|
vaesenc xmm4,xmm4,xmm0
|
|
vaesenc xmm5,xmm5,xmm0
|
|
vaesenc xmm6,xmm6,xmm0
|
|
vaesenc xmm7,xmm7,xmm0
|
|
vaesenc xmm8,xmm8,xmm0
|
|
vaesenc xmm9,xmm9,xmm0
|
|
vmovups xmm0,XMMWORD[((224-120))+rsi]
|
|
|
|
$L$enc8x_tail:
|
|
vaesenc xmm2,xmm2,xmm1
|
|
vpxor xmm15,xmm15,xmm15
|
|
vaesenc xmm3,xmm3,xmm1
|
|
vaesenc xmm4,xmm4,xmm1
|
|
vpcmpgtd xmm15,xmm14,xmm15
|
|
vaesenc xmm5,xmm5,xmm1
|
|
vaesenc xmm6,xmm6,xmm1
|
|
vpaddd xmm15,xmm15,xmm14
|
|
vmovdqu xmm14,XMMWORD[48+rsp]
|
|
vaesenc xmm7,xmm7,xmm1
|
|
mov rbx,QWORD[64+rsp]
|
|
vaesenc xmm8,xmm8,xmm1
|
|
vaesenc xmm9,xmm9,xmm1
|
|
vmovups xmm1,XMMWORD[((16-120))+rsi]
|
|
|
|
vaesenclast xmm2,xmm2,xmm0
|
|
vmovdqa XMMWORD[32+rsp],xmm15
|
|
vpxor xmm15,xmm15,xmm15
|
|
vaesenclast xmm3,xmm3,xmm0
|
|
vaesenclast xmm4,xmm4,xmm0
|
|
vpcmpgtd xmm15,xmm14,xmm15
|
|
vaesenclast xmm5,xmm5,xmm0
|
|
vaesenclast xmm6,xmm6,xmm0
|
|
vpaddd xmm14,xmm14,xmm15
|
|
vmovdqu xmm15,XMMWORD[((-120))+rsi]
|
|
vaesenclast xmm7,xmm7,xmm0
|
|
vaesenclast xmm8,xmm8,xmm0
|
|
vmovdqa XMMWORD[48+rsp],xmm14
|
|
vaesenclast xmm9,xmm9,xmm0
|
|
vmovups xmm0,XMMWORD[((32-120))+rsi]
|
|
|
|
vmovups XMMWORD[(-16)+r8],xmm2
|
|
sub r8,rbx
|
|
vpxor xmm2,xmm2,XMMWORD[rbp]
|
|
vmovups XMMWORD[(-16)+r9],xmm3
|
|
sub r9,QWORD[72+rsp]
|
|
vpxor xmm3,xmm3,XMMWORD[16+rbp]
|
|
vmovups XMMWORD[(-16)+r10],xmm4
|
|
sub r10,QWORD[80+rsp]
|
|
vpxor xmm4,xmm4,XMMWORD[32+rbp]
|
|
vmovups XMMWORD[(-16)+r11],xmm5
|
|
sub r11,QWORD[88+rsp]
|
|
vpxor xmm5,xmm5,XMMWORD[48+rbp]
|
|
vmovups XMMWORD[(-16)+r12],xmm6
|
|
sub r12,QWORD[96+rsp]
|
|
vpxor xmm6,xmm6,xmm10
|
|
vmovups XMMWORD[(-16)+r13],xmm7
|
|
sub r13,QWORD[104+rsp]
|
|
vpxor xmm7,xmm7,xmm11
|
|
vmovups XMMWORD[(-16)+r14],xmm8
|
|
sub r14,QWORD[112+rsp]
|
|
vpxor xmm8,xmm8,xmm12
|
|
vmovups XMMWORD[(-16)+r15],xmm9
|
|
sub r15,QWORD[120+rsp]
|
|
vpxor xmm9,xmm9,xmm13
|
|
|
|
dec edx
|
|
jnz NEAR $L$oop_enc8x
|
|
|
|
mov rax,QWORD[16+rsp]
|
|
|
|
|
|
|
|
|
|
|
|
|
|
$L$enc8x_done:
|
|
vzeroupper
|
|
movaps xmm6,XMMWORD[((-216))+rax]
|
|
movaps xmm7,XMMWORD[((-200))+rax]
|
|
movaps xmm8,XMMWORD[((-184))+rax]
|
|
movaps xmm9,XMMWORD[((-168))+rax]
|
|
movaps xmm10,XMMWORD[((-152))+rax]
|
|
movaps xmm11,XMMWORD[((-136))+rax]
|
|
movaps xmm12,XMMWORD[((-120))+rax]
|
|
movaps xmm13,XMMWORD[((-104))+rax]
|
|
movaps xmm14,XMMWORD[((-88))+rax]
|
|
movaps xmm15,XMMWORD[((-72))+rax]
|
|
mov r15,QWORD[((-48))+rax]
|
|
|
|
mov r14,QWORD[((-40))+rax]
|
|
|
|
mov r13,QWORD[((-32))+rax]
|
|
|
|
mov r12,QWORD[((-24))+rax]
|
|
|
|
mov rbp,QWORD[((-16))+rax]
|
|
|
|
mov rbx,QWORD[((-8))+rax]
|
|
|
|
lea rsp,[rax]
|
|
|
|
$L$enc8x_epilogue:
|
|
mov rdi,QWORD[8+rsp] ;WIN64 epilogue
|
|
mov rsi,QWORD[16+rsp]
|
|
DB 0F3h,0C3h ;repret
|
|
|
|
$L$SEH_end_aesni_multi_cbc_encrypt_avx:
|
|
|
|
|
|
ALIGN 32
|
|
aesni_multi_cbc_decrypt_avx:
|
|
mov QWORD[8+rsp],rdi ;WIN64 prologue
|
|
mov QWORD[16+rsp],rsi
|
|
mov rax,rsp
|
|
$L$SEH_begin_aesni_multi_cbc_decrypt_avx:
|
|
mov rdi,rcx
|
|
mov rsi,rdx
|
|
mov rdx,r8
|
|
|
|
|
|
|
|
_avx_cbc_dec_shortcut:
|
|
mov rax,rsp
|
|
|
|
push rbx
|
|
|
|
push rbp
|
|
|
|
push r12
|
|
|
|
push r13
|
|
|
|
push r14
|
|
|
|
push r15
|
|
|
|
lea rsp,[((-168))+rsp]
|
|
movaps XMMWORD[rsp],xmm6
|
|
movaps XMMWORD[16+rsp],xmm7
|
|
movaps XMMWORD[32+rsp],xmm8
|
|
movaps XMMWORD[48+rsp],xmm9
|
|
movaps XMMWORD[64+rsp],xmm10
|
|
movaps XMMWORD[80+rsp],xmm11
|
|
movaps XMMWORD[(-120)+rax],xmm12
|
|
movaps XMMWORD[(-104)+rax],xmm13
|
|
movaps XMMWORD[(-88)+rax],xmm14
|
|
movaps XMMWORD[(-72)+rax],xmm15
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
sub rsp,256
|
|
and rsp,-256
|
|
sub rsp,192
|
|
mov QWORD[16+rsp],rax
|
|
|
|
|
|
$L$dec8x_body:
|
|
vzeroupper
|
|
vmovdqu xmm15,XMMWORD[rsi]
|
|
lea rsi,[120+rsi]
|
|
lea rdi,[160+rdi]
|
|
shr edx,1
|
|
|
|
$L$dec8x_loop_grande:
|
|
|
|
xor edx,edx
|
|
|
|
mov ecx,DWORD[((-144))+rdi]
|
|
|
|
mov r8,QWORD[((-160))+rdi]
|
|
cmp ecx,edx
|
|
|
|
mov rbx,QWORD[((-152))+rdi]
|
|
cmovg edx,ecx
|
|
test ecx,ecx
|
|
|
|
vmovdqu xmm2,XMMWORD[((-136))+rdi]
|
|
mov DWORD[32+rsp],ecx
|
|
cmovle r8,rsp
|
|
sub rbx,r8
|
|
mov QWORD[64+rsp],rbx
|
|
vmovdqu XMMWORD[192+rsp],xmm2
|
|
|
|
mov ecx,DWORD[((-104))+rdi]
|
|
|
|
mov r9,QWORD[((-120))+rdi]
|
|
cmp ecx,edx
|
|
|
|
mov rbp,QWORD[((-112))+rdi]
|
|
cmovg edx,ecx
|
|
test ecx,ecx
|
|
|
|
vmovdqu xmm3,XMMWORD[((-96))+rdi]
|
|
mov DWORD[36+rsp],ecx
|
|
cmovle r9,rsp
|
|
sub rbp,r9
|
|
mov QWORD[72+rsp],rbp
|
|
vmovdqu XMMWORD[208+rsp],xmm3
|
|
|
|
mov ecx,DWORD[((-64))+rdi]
|
|
|
|
mov r10,QWORD[((-80))+rdi]
|
|
cmp ecx,edx
|
|
|
|
mov rbp,QWORD[((-72))+rdi]
|
|
cmovg edx,ecx
|
|
test ecx,ecx
|
|
|
|
vmovdqu xmm4,XMMWORD[((-56))+rdi]
|
|
mov DWORD[40+rsp],ecx
|
|
cmovle r10,rsp
|
|
sub rbp,r10
|
|
mov QWORD[80+rsp],rbp
|
|
vmovdqu XMMWORD[224+rsp],xmm4
|
|
|
|
mov ecx,DWORD[((-24))+rdi]
|
|
|
|
mov r11,QWORD[((-40))+rdi]
|
|
cmp ecx,edx
|
|
|
|
mov rbp,QWORD[((-32))+rdi]
|
|
cmovg edx,ecx
|
|
test ecx,ecx
|
|
|
|
vmovdqu xmm5,XMMWORD[((-16))+rdi]
|
|
mov DWORD[44+rsp],ecx
|
|
cmovle r11,rsp
|
|
sub rbp,r11
|
|
mov QWORD[88+rsp],rbp
|
|
vmovdqu XMMWORD[240+rsp],xmm5
|
|
|
|
mov ecx,DWORD[16+rdi]
|
|
|
|
mov r12,QWORD[rdi]
|
|
cmp ecx,edx
|
|
|
|
mov rbp,QWORD[8+rdi]
|
|
cmovg edx,ecx
|
|
test ecx,ecx
|
|
|
|
vmovdqu xmm6,XMMWORD[24+rdi]
|
|
mov DWORD[48+rsp],ecx
|
|
cmovle r12,rsp
|
|
sub rbp,r12
|
|
mov QWORD[96+rsp],rbp
|
|
vmovdqu XMMWORD[256+rsp],xmm6
|
|
|
|
mov ecx,DWORD[56+rdi]
|
|
|
|
mov r13,QWORD[40+rdi]
|
|
cmp ecx,edx
|
|
|
|
mov rbp,QWORD[48+rdi]
|
|
cmovg edx,ecx
|
|
test ecx,ecx
|
|
|
|
vmovdqu xmm7,XMMWORD[64+rdi]
|
|
mov DWORD[52+rsp],ecx
|
|
cmovle r13,rsp
|
|
sub rbp,r13
|
|
mov QWORD[104+rsp],rbp
|
|
vmovdqu XMMWORD[272+rsp],xmm7
|
|
|
|
mov ecx,DWORD[96+rdi]
|
|
|
|
mov r14,QWORD[80+rdi]
|
|
cmp ecx,edx
|
|
|
|
mov rbp,QWORD[88+rdi]
|
|
cmovg edx,ecx
|
|
test ecx,ecx
|
|
|
|
vmovdqu xmm8,XMMWORD[104+rdi]
|
|
mov DWORD[56+rsp],ecx
|
|
cmovle r14,rsp
|
|
sub rbp,r14
|
|
mov QWORD[112+rsp],rbp
|
|
vmovdqu XMMWORD[288+rsp],xmm8
|
|
|
|
mov ecx,DWORD[136+rdi]
|
|
|
|
mov r15,QWORD[120+rdi]
|
|
cmp ecx,edx
|
|
|
|
mov rbp,QWORD[128+rdi]
|
|
cmovg edx,ecx
|
|
test ecx,ecx
|
|
|
|
vmovdqu xmm9,XMMWORD[144+rdi]
|
|
mov DWORD[60+rsp],ecx
|
|
cmovle r15,rsp
|
|
sub rbp,r15
|
|
mov QWORD[120+rsp],rbp
|
|
vmovdqu XMMWORD[304+rsp],xmm9
|
|
test edx,edx
|
|
jz NEAR $L$dec8x_done
|
|
|
|
vmovups xmm1,XMMWORD[((16-120))+rsi]
|
|
vmovups xmm0,XMMWORD[((32-120))+rsi]
|
|
mov eax,DWORD[((240-120))+rsi]
|
|
lea rbp,[((192+128))+rsp]
|
|
|
|
vmovdqu xmm2,XMMWORD[r8]
|
|
vmovdqu xmm3,XMMWORD[r9]
|
|
vmovdqu xmm4,XMMWORD[r10]
|
|
vmovdqu xmm5,XMMWORD[r11]
|
|
vmovdqu xmm6,XMMWORD[r12]
|
|
vmovdqu xmm7,XMMWORD[r13]
|
|
vmovdqu xmm8,XMMWORD[r14]
|
|
vmovdqu xmm9,XMMWORD[r15]
|
|
vmovdqu XMMWORD[rbp],xmm2
|
|
vpxor xmm2,xmm2,xmm15
|
|
vmovdqu XMMWORD[16+rbp],xmm3
|
|
vpxor xmm3,xmm3,xmm15
|
|
vmovdqu XMMWORD[32+rbp],xmm4
|
|
vpxor xmm4,xmm4,xmm15
|
|
vmovdqu XMMWORD[48+rbp],xmm5
|
|
vpxor xmm5,xmm5,xmm15
|
|
vmovdqu XMMWORD[64+rbp],xmm6
|
|
vpxor xmm6,xmm6,xmm15
|
|
vmovdqu XMMWORD[80+rbp],xmm7
|
|
vpxor xmm7,xmm7,xmm15
|
|
vmovdqu XMMWORD[96+rbp],xmm8
|
|
vpxor xmm8,xmm8,xmm15
|
|
vmovdqu XMMWORD[112+rbp],xmm9
|
|
vpxor xmm9,xmm9,xmm15
|
|
xor rbp,0x80
|
|
mov ecx,1
|
|
jmp NEAR $L$oop_dec8x
|
|
|
|
ALIGN 32
|
|
$L$oop_dec8x:
|
|
vaesdec xmm2,xmm2,xmm1
|
|
cmp ecx,DWORD[((32+0))+rsp]
|
|
vaesdec xmm3,xmm3,xmm1
|
|
prefetcht0 [31+r8]
|
|
vaesdec xmm4,xmm4,xmm1
|
|
vaesdec xmm5,xmm5,xmm1
|
|
lea rbx,[rbx*1+r8]
|
|
cmovge r8,rsp
|
|
vaesdec xmm6,xmm6,xmm1
|
|
cmovg rbx,rsp
|
|
vaesdec xmm7,xmm7,xmm1
|
|
sub rbx,r8
|
|
vaesdec xmm8,xmm8,xmm1
|
|
vmovdqu xmm10,XMMWORD[16+r8]
|
|
mov QWORD[((64+0))+rsp],rbx
|
|
vaesdec xmm9,xmm9,xmm1
|
|
vmovups xmm1,XMMWORD[((-72))+rsi]
|
|
lea r8,[16+rbx*1+r8]
|
|
vmovdqu XMMWORD[128+rsp],xmm10
|
|
vaesdec xmm2,xmm2,xmm0
|
|
cmp ecx,DWORD[((32+4))+rsp]
|
|
mov rbx,QWORD[((64+8))+rsp]
|
|
vaesdec xmm3,xmm3,xmm0
|
|
prefetcht0 [31+r9]
|
|
vaesdec xmm4,xmm4,xmm0
|
|
vaesdec xmm5,xmm5,xmm0
|
|
lea rbx,[rbx*1+r9]
|
|
cmovge r9,rsp
|
|
vaesdec xmm6,xmm6,xmm0
|
|
cmovg rbx,rsp
|
|
vaesdec xmm7,xmm7,xmm0
|
|
sub rbx,r9
|
|
vaesdec xmm8,xmm8,xmm0
|
|
vmovdqu xmm11,XMMWORD[16+r9]
|
|
mov QWORD[((64+8))+rsp],rbx
|
|
vaesdec xmm9,xmm9,xmm0
|
|
vmovups xmm0,XMMWORD[((-56))+rsi]
|
|
lea r9,[16+rbx*1+r9]
|
|
vmovdqu XMMWORD[144+rsp],xmm11
|
|
vaesdec xmm2,xmm2,xmm1
|
|
cmp ecx,DWORD[((32+8))+rsp]
|
|
mov rbx,QWORD[((64+16))+rsp]
|
|
vaesdec xmm3,xmm3,xmm1
|
|
prefetcht0 [31+r10]
|
|
vaesdec xmm4,xmm4,xmm1
|
|
prefetcht0 [15+r8]
|
|
vaesdec xmm5,xmm5,xmm1
|
|
lea rbx,[rbx*1+r10]
|
|
cmovge r10,rsp
|
|
vaesdec xmm6,xmm6,xmm1
|
|
cmovg rbx,rsp
|
|
vaesdec xmm7,xmm7,xmm1
|
|
sub rbx,r10
|
|
vaesdec xmm8,xmm8,xmm1
|
|
vmovdqu xmm12,XMMWORD[16+r10]
|
|
mov QWORD[((64+16))+rsp],rbx
|
|
vaesdec xmm9,xmm9,xmm1
|
|
vmovups xmm1,XMMWORD[((-40))+rsi]
|
|
lea r10,[16+rbx*1+r10]
|
|
vmovdqu XMMWORD[160+rsp],xmm12
|
|
vaesdec xmm2,xmm2,xmm0
|
|
cmp ecx,DWORD[((32+12))+rsp]
|
|
mov rbx,QWORD[((64+24))+rsp]
|
|
vaesdec xmm3,xmm3,xmm0
|
|
prefetcht0 [31+r11]
|
|
vaesdec xmm4,xmm4,xmm0
|
|
prefetcht0 [15+r9]
|
|
vaesdec xmm5,xmm5,xmm0
|
|
lea rbx,[rbx*1+r11]
|
|
cmovge r11,rsp
|
|
vaesdec xmm6,xmm6,xmm0
|
|
cmovg rbx,rsp
|
|
vaesdec xmm7,xmm7,xmm0
|
|
sub rbx,r11
|
|
vaesdec xmm8,xmm8,xmm0
|
|
vmovdqu xmm13,XMMWORD[16+r11]
|
|
mov QWORD[((64+24))+rsp],rbx
|
|
vaesdec xmm9,xmm9,xmm0
|
|
vmovups xmm0,XMMWORD[((-24))+rsi]
|
|
lea r11,[16+rbx*1+r11]
|
|
vmovdqu XMMWORD[176+rsp],xmm13
|
|
vaesdec xmm2,xmm2,xmm1
|
|
cmp ecx,DWORD[((32+16))+rsp]
|
|
mov rbx,QWORD[((64+32))+rsp]
|
|
vaesdec xmm3,xmm3,xmm1
|
|
prefetcht0 [31+r12]
|
|
vaesdec xmm4,xmm4,xmm1
|
|
prefetcht0 [15+r10]
|
|
vaesdec xmm5,xmm5,xmm1
|
|
lea rbx,[rbx*1+r12]
|
|
cmovge r12,rsp
|
|
vaesdec xmm6,xmm6,xmm1
|
|
cmovg rbx,rsp
|
|
vaesdec xmm7,xmm7,xmm1
|
|
sub rbx,r12
|
|
vaesdec xmm8,xmm8,xmm1
|
|
vmovdqu xmm10,XMMWORD[16+r12]
|
|
mov QWORD[((64+32))+rsp],rbx
|
|
vaesdec xmm9,xmm9,xmm1
|
|
vmovups xmm1,XMMWORD[((-8))+rsi]
|
|
lea r12,[16+rbx*1+r12]
|
|
vaesdec xmm2,xmm2,xmm0
|
|
cmp ecx,DWORD[((32+20))+rsp]
|
|
mov rbx,QWORD[((64+40))+rsp]
|
|
vaesdec xmm3,xmm3,xmm0
|
|
prefetcht0 [31+r13]
|
|
vaesdec xmm4,xmm4,xmm0
|
|
prefetcht0 [15+r11]
|
|
vaesdec xmm5,xmm5,xmm0
|
|
lea rbx,[r13*1+rbx]
|
|
cmovge r13,rsp
|
|
vaesdec xmm6,xmm6,xmm0
|
|
cmovg rbx,rsp
|
|
vaesdec xmm7,xmm7,xmm0
|
|
sub rbx,r13
|
|
vaesdec xmm8,xmm8,xmm0
|
|
vmovdqu xmm11,XMMWORD[16+r13]
|
|
mov QWORD[((64+40))+rsp],rbx
|
|
vaesdec xmm9,xmm9,xmm0
|
|
vmovups xmm0,XMMWORD[8+rsi]
|
|
lea r13,[16+rbx*1+r13]
|
|
vaesdec xmm2,xmm2,xmm1
|
|
cmp ecx,DWORD[((32+24))+rsp]
|
|
mov rbx,QWORD[((64+48))+rsp]
|
|
vaesdec xmm3,xmm3,xmm1
|
|
prefetcht0 [31+r14]
|
|
vaesdec xmm4,xmm4,xmm1
|
|
prefetcht0 [15+r12]
|
|
vaesdec xmm5,xmm5,xmm1
|
|
lea rbx,[rbx*1+r14]
|
|
cmovge r14,rsp
|
|
vaesdec xmm6,xmm6,xmm1
|
|
cmovg rbx,rsp
|
|
vaesdec xmm7,xmm7,xmm1
|
|
sub rbx,r14
|
|
vaesdec xmm8,xmm8,xmm1
|
|
vmovdqu xmm12,XMMWORD[16+r14]
|
|
mov QWORD[((64+48))+rsp],rbx
|
|
vaesdec xmm9,xmm9,xmm1
|
|
vmovups xmm1,XMMWORD[24+rsi]
|
|
lea r14,[16+rbx*1+r14]
|
|
vaesdec xmm2,xmm2,xmm0
|
|
cmp ecx,DWORD[((32+28))+rsp]
|
|
mov rbx,QWORD[((64+56))+rsp]
|
|
vaesdec xmm3,xmm3,xmm0
|
|
prefetcht0 [31+r15]
|
|
vaesdec xmm4,xmm4,xmm0
|
|
prefetcht0 [15+r13]
|
|
vaesdec xmm5,xmm5,xmm0
|
|
lea rbx,[rbx*1+r15]
|
|
cmovge r15,rsp
|
|
vaesdec xmm6,xmm6,xmm0
|
|
cmovg rbx,rsp
|
|
vaesdec xmm7,xmm7,xmm0
|
|
sub rbx,r15
|
|
vaesdec xmm8,xmm8,xmm0
|
|
vmovdqu xmm13,XMMWORD[16+r15]
|
|
mov QWORD[((64+56))+rsp],rbx
|
|
vaesdec xmm9,xmm9,xmm0
|
|
vmovups xmm0,XMMWORD[40+rsi]
|
|
lea r15,[16+rbx*1+r15]
|
|
vmovdqu xmm14,XMMWORD[32+rsp]
|
|
prefetcht0 [15+r14]
|
|
prefetcht0 [15+r15]
|
|
cmp eax,11
|
|
jb NEAR $L$dec8x_tail
|
|
|
|
vaesdec xmm2,xmm2,xmm1
|
|
vaesdec xmm3,xmm3,xmm1
|
|
vaesdec xmm4,xmm4,xmm1
|
|
vaesdec xmm5,xmm5,xmm1
|
|
vaesdec xmm6,xmm6,xmm1
|
|
vaesdec xmm7,xmm7,xmm1
|
|
vaesdec xmm8,xmm8,xmm1
|
|
vaesdec xmm9,xmm9,xmm1
|
|
vmovups xmm1,XMMWORD[((176-120))+rsi]
|
|
|
|
vaesdec xmm2,xmm2,xmm0
|
|
vaesdec xmm3,xmm3,xmm0
|
|
vaesdec xmm4,xmm4,xmm0
|
|
vaesdec xmm5,xmm5,xmm0
|
|
vaesdec xmm6,xmm6,xmm0
|
|
vaesdec xmm7,xmm7,xmm0
|
|
vaesdec xmm8,xmm8,xmm0
|
|
vaesdec xmm9,xmm9,xmm0
|
|
vmovups xmm0,XMMWORD[((192-120))+rsi]
|
|
je NEAR $L$dec8x_tail
|
|
|
|
vaesdec xmm2,xmm2,xmm1
|
|
vaesdec xmm3,xmm3,xmm1
|
|
vaesdec xmm4,xmm4,xmm1
|
|
vaesdec xmm5,xmm5,xmm1
|
|
vaesdec xmm6,xmm6,xmm1
|
|
vaesdec xmm7,xmm7,xmm1
|
|
vaesdec xmm8,xmm8,xmm1
|
|
vaesdec xmm9,xmm9,xmm1
|
|
vmovups xmm1,XMMWORD[((208-120))+rsi]
|
|
|
|
vaesdec xmm2,xmm2,xmm0
|
|
vaesdec xmm3,xmm3,xmm0
|
|
vaesdec xmm4,xmm4,xmm0
|
|
vaesdec xmm5,xmm5,xmm0
|
|
vaesdec xmm6,xmm6,xmm0
|
|
vaesdec xmm7,xmm7,xmm0
|
|
vaesdec xmm8,xmm8,xmm0
|
|
vaesdec xmm9,xmm9,xmm0
|
|
vmovups xmm0,XMMWORD[((224-120))+rsi]
|
|
|
|
$L$dec8x_tail:
|
|
vaesdec xmm2,xmm2,xmm1
|
|
vpxor xmm15,xmm15,xmm15
|
|
vaesdec xmm3,xmm3,xmm1
|
|
vaesdec xmm4,xmm4,xmm1
|
|
vpcmpgtd xmm15,xmm14,xmm15
|
|
vaesdec xmm5,xmm5,xmm1
|
|
vaesdec xmm6,xmm6,xmm1
|
|
vpaddd xmm15,xmm15,xmm14
|
|
vmovdqu xmm14,XMMWORD[48+rsp]
|
|
vaesdec xmm7,xmm7,xmm1
|
|
mov rbx,QWORD[64+rsp]
|
|
vaesdec xmm8,xmm8,xmm1
|
|
vaesdec xmm9,xmm9,xmm1
|
|
vmovups xmm1,XMMWORD[((16-120))+rsi]
|
|
|
|
vaesdeclast xmm2,xmm2,xmm0
|
|
vmovdqa XMMWORD[32+rsp],xmm15
|
|
vpxor xmm15,xmm15,xmm15
|
|
vaesdeclast xmm3,xmm3,xmm0
|
|
vpxor xmm2,xmm2,XMMWORD[rbp]
|
|
vaesdeclast xmm4,xmm4,xmm0
|
|
vpxor xmm3,xmm3,XMMWORD[16+rbp]
|
|
vpcmpgtd xmm15,xmm14,xmm15
|
|
vaesdeclast xmm5,xmm5,xmm0
|
|
vpxor xmm4,xmm4,XMMWORD[32+rbp]
|
|
vaesdeclast xmm6,xmm6,xmm0
|
|
vpxor xmm5,xmm5,XMMWORD[48+rbp]
|
|
vpaddd xmm14,xmm14,xmm15
|
|
vmovdqu xmm15,XMMWORD[((-120))+rsi]
|
|
vaesdeclast xmm7,xmm7,xmm0
|
|
vpxor xmm6,xmm6,XMMWORD[64+rbp]
|
|
vaesdeclast xmm8,xmm8,xmm0
|
|
vpxor xmm7,xmm7,XMMWORD[80+rbp]
|
|
vmovdqa XMMWORD[48+rsp],xmm14
|
|
vaesdeclast xmm9,xmm9,xmm0
|
|
vpxor xmm8,xmm8,XMMWORD[96+rbp]
|
|
vmovups xmm0,XMMWORD[((32-120))+rsi]
|
|
|
|
vmovups XMMWORD[(-16)+r8],xmm2
|
|
sub r8,rbx
|
|
vmovdqu xmm2,XMMWORD[((128+0))+rsp]
|
|
vpxor xmm9,xmm9,XMMWORD[112+rbp]
|
|
vmovups XMMWORD[(-16)+r9],xmm3
|
|
sub r9,QWORD[72+rsp]
|
|
vmovdqu XMMWORD[rbp],xmm2
|
|
vpxor xmm2,xmm2,xmm15
|
|
vmovdqu xmm3,XMMWORD[((128+16))+rsp]
|
|
vmovups XMMWORD[(-16)+r10],xmm4
|
|
sub r10,QWORD[80+rsp]
|
|
vmovdqu XMMWORD[16+rbp],xmm3
|
|
vpxor xmm3,xmm3,xmm15
|
|
vmovdqu xmm4,XMMWORD[((128+32))+rsp]
|
|
vmovups XMMWORD[(-16)+r11],xmm5
|
|
sub r11,QWORD[88+rsp]
|
|
vmovdqu XMMWORD[32+rbp],xmm4
|
|
vpxor xmm4,xmm4,xmm15
|
|
vmovdqu xmm5,XMMWORD[((128+48))+rsp]
|
|
vmovups XMMWORD[(-16)+r12],xmm6
|
|
sub r12,QWORD[96+rsp]
|
|
vmovdqu XMMWORD[48+rbp],xmm5
|
|
vpxor xmm5,xmm5,xmm15
|
|
vmovdqu XMMWORD[64+rbp],xmm10
|
|
vpxor xmm6,xmm15,xmm10
|
|
vmovups XMMWORD[(-16)+r13],xmm7
|
|
sub r13,QWORD[104+rsp]
|
|
vmovdqu XMMWORD[80+rbp],xmm11
|
|
vpxor xmm7,xmm15,xmm11
|
|
vmovups XMMWORD[(-16)+r14],xmm8
|
|
sub r14,QWORD[112+rsp]
|
|
vmovdqu XMMWORD[96+rbp],xmm12
|
|
vpxor xmm8,xmm15,xmm12
|
|
vmovups XMMWORD[(-16)+r15],xmm9
|
|
sub r15,QWORD[120+rsp]
|
|
vmovdqu XMMWORD[112+rbp],xmm13
|
|
vpxor xmm9,xmm15,xmm13
|
|
|
|
xor rbp,128
|
|
dec edx
|
|
jnz NEAR $L$oop_dec8x
|
|
|
|
mov rax,QWORD[16+rsp]
|
|
|
|
|
|
|
|
|
|
|
|
|
|
$L$dec8x_done:
|
|
vzeroupper
|
|
movaps xmm6,XMMWORD[((-216))+rax]
|
|
movaps xmm7,XMMWORD[((-200))+rax]
|
|
movaps xmm8,XMMWORD[((-184))+rax]
|
|
movaps xmm9,XMMWORD[((-168))+rax]
|
|
movaps xmm10,XMMWORD[((-152))+rax]
|
|
movaps xmm11,XMMWORD[((-136))+rax]
|
|
movaps xmm12,XMMWORD[((-120))+rax]
|
|
movaps xmm13,XMMWORD[((-104))+rax]
|
|
movaps xmm14,XMMWORD[((-88))+rax]
|
|
movaps xmm15,XMMWORD[((-72))+rax]
|
|
mov r15,QWORD[((-48))+rax]
|
|
|
|
mov r14,QWORD[((-40))+rax]
|
|
|
|
mov r13,QWORD[((-32))+rax]
|
|
|
|
mov r12,QWORD[((-24))+rax]
|
|
|
|
mov rbp,QWORD[((-16))+rax]
|
|
|
|
mov rbx,QWORD[((-8))+rax]
|
|
|
|
lea rsp,[rax]
|
|
|
|
$L$dec8x_epilogue:
|
|
mov rdi,QWORD[8+rsp] ;WIN64 epilogue
|
|
mov rsi,QWORD[16+rsp]
|
|
DB 0F3h,0C3h ;repret
|
|
|
|
$L$SEH_end_aesni_multi_cbc_decrypt_avx:
|
|
EXTERN __imp_RtlVirtualUnwind
|
|
|
|
ALIGN 16
|
|
se_handler:
|
|
push rsi
|
|
push rdi
|
|
push rbx
|
|
push rbp
|
|
push r12
|
|
push r13
|
|
push r14
|
|
push r15
|
|
pushfq
|
|
sub rsp,64
|
|
|
|
mov rax,QWORD[120+r8]
|
|
mov rbx,QWORD[248+r8]
|
|
|
|
mov rsi,QWORD[8+r9]
|
|
mov r11,QWORD[56+r9]
|
|
|
|
mov r10d,DWORD[r11]
|
|
lea r10,[r10*1+rsi]
|
|
cmp rbx,r10
|
|
jb NEAR $L$in_prologue
|
|
|
|
mov rax,QWORD[152+r8]
|
|
|
|
mov r10d,DWORD[4+r11]
|
|
lea r10,[r10*1+rsi]
|
|
cmp rbx,r10
|
|
jae NEAR $L$in_prologue
|
|
|
|
mov rax,QWORD[16+rax]
|
|
|
|
mov rbx,QWORD[((-8))+rax]
|
|
mov rbp,QWORD[((-16))+rax]
|
|
mov r12,QWORD[((-24))+rax]
|
|
mov r13,QWORD[((-32))+rax]
|
|
mov r14,QWORD[((-40))+rax]
|
|
mov r15,QWORD[((-48))+rax]
|
|
mov QWORD[144+r8],rbx
|
|
mov QWORD[160+r8],rbp
|
|
mov QWORD[216+r8],r12
|
|
mov QWORD[224+r8],r13
|
|
mov QWORD[232+r8],r14
|
|
mov QWORD[240+r8],r15
|
|
|
|
lea rsi,[((-56-160))+rax]
|
|
lea rdi,[512+r8]
|
|
mov ecx,20
|
|
DD 0xa548f3fc
|
|
|
|
$L$in_prologue:
|
|
mov rdi,QWORD[8+rax]
|
|
mov rsi,QWORD[16+rax]
|
|
mov QWORD[152+r8],rax
|
|
mov QWORD[168+r8],rsi
|
|
mov QWORD[176+r8],rdi
|
|
|
|
mov rdi,QWORD[40+r9]
|
|
mov rsi,r8
|
|
mov ecx,154
|
|
DD 0xa548f3fc
|
|
|
|
mov rsi,r9
|
|
xor rcx,rcx
|
|
mov rdx,QWORD[8+rsi]
|
|
mov r8,QWORD[rsi]
|
|
mov r9,QWORD[16+rsi]
|
|
mov r10,QWORD[40+rsi]
|
|
lea r11,[56+rsi]
|
|
lea r12,[24+rsi]
|
|
mov QWORD[32+rsp],r10
|
|
mov QWORD[40+rsp],r11
|
|
mov QWORD[48+rsp],r12
|
|
mov QWORD[56+rsp],rcx
|
|
call QWORD[__imp_RtlVirtualUnwind]
|
|
|
|
mov eax,1
|
|
add rsp,64
|
|
popfq
|
|
pop r15
|
|
pop r14
|
|
pop r13
|
|
pop r12
|
|
pop rbp
|
|
pop rbx
|
|
pop rdi
|
|
pop rsi
|
|
DB 0F3h,0C3h ;repret
|
|
|
|
|
|
section .pdata rdata align=4
|
|
ALIGN 4
|
|
DD $L$SEH_begin_aesni_multi_cbc_encrypt wrt ..imagebase
|
|
DD $L$SEH_end_aesni_multi_cbc_encrypt wrt ..imagebase
|
|
DD $L$SEH_info_aesni_multi_cbc_encrypt wrt ..imagebase
|
|
DD $L$SEH_begin_aesni_multi_cbc_decrypt wrt ..imagebase
|
|
DD $L$SEH_end_aesni_multi_cbc_decrypt wrt ..imagebase
|
|
DD $L$SEH_info_aesni_multi_cbc_decrypt wrt ..imagebase
|
|
DD $L$SEH_begin_aesni_multi_cbc_encrypt_avx wrt ..imagebase
|
|
DD $L$SEH_end_aesni_multi_cbc_encrypt_avx wrt ..imagebase
|
|
DD $L$SEH_info_aesni_multi_cbc_encrypt_avx wrt ..imagebase
|
|
DD $L$SEH_begin_aesni_multi_cbc_decrypt_avx wrt ..imagebase
|
|
DD $L$SEH_end_aesni_multi_cbc_decrypt_avx wrt ..imagebase
|
|
DD $L$SEH_info_aesni_multi_cbc_decrypt_avx wrt ..imagebase
|
|
section .xdata rdata align=8
|
|
ALIGN 8
|
|
$L$SEH_info_aesni_multi_cbc_encrypt:
|
|
DB 9,0,0,0
|
|
DD se_handler wrt ..imagebase
|
|
DD $L$enc4x_body wrt ..imagebase,$L$enc4x_epilogue wrt ..imagebase
|
|
$L$SEH_info_aesni_multi_cbc_decrypt:
|
|
DB 9,0,0,0
|
|
DD se_handler wrt ..imagebase
|
|
DD $L$dec4x_body wrt ..imagebase,$L$dec4x_epilogue wrt ..imagebase
|
|
$L$SEH_info_aesni_multi_cbc_encrypt_avx:
|
|
DB 9,0,0,0
|
|
DD se_handler wrt ..imagebase
|
|
DD $L$enc8x_body wrt ..imagebase,$L$enc8x_epilogue wrt ..imagebase
|
|
$L$SEH_info_aesni_multi_cbc_decrypt_avx:
|
|
DB 9,0,0,0
|
|
DD se_handler wrt ..imagebase
|
|
DD $L$dec8x_body wrt ..imagebase,$L$dec8x_epilogue wrt ..imagebase
|