CloverBootloader/Library/OpensslLib/OpensslGen/X64-MSFT/crypto/bn/rsaz-x86_64.nasm

2262 lines
53 KiB
NASM

default rel
%define XMMWORD
%define YMMWORD
%define ZMMWORD
section .text code align=64
EXTERN OPENSSL_ia32cap_P
global rsaz_512_sqr
ALIGN 32
rsaz_512_sqr:
mov QWORD[8+rsp],rdi ;WIN64 prologue
mov QWORD[16+rsp],rsi
mov rax,rsp
$L$SEH_begin_rsaz_512_sqr:
mov rdi,rcx
mov rsi,rdx
mov rdx,r8
mov rcx,r9
mov r8,QWORD[40+rsp]
push rbx
push rbp
push r12
push r13
push r14
push r15
sub rsp,128+24
$L$sqr_body:
DB 102,72,15,110,202
mov rdx,QWORD[rsi]
mov rax,QWORD[8+rsi]
mov QWORD[128+rsp],rcx
mov r11d,0x80100
and r11d,DWORD[((OPENSSL_ia32cap_P+8))]
cmp r11d,0x80100
je NEAR $L$oop_sqrx
jmp NEAR $L$oop_sqr
ALIGN 32
$L$oop_sqr:
mov DWORD[((128+8))+rsp],r8d
mov rbx,rdx
mov rbp,rax
mul rdx
mov r8,rax
mov rax,QWORD[16+rsi]
mov r9,rdx
mul rbx
add r9,rax
mov rax,QWORD[24+rsi]
mov r10,rdx
adc r10,0
mul rbx
add r10,rax
mov rax,QWORD[32+rsi]
mov r11,rdx
adc r11,0
mul rbx
add r11,rax
mov rax,QWORD[40+rsi]
mov r12,rdx
adc r12,0
mul rbx
add r12,rax
mov rax,QWORD[48+rsi]
mov r13,rdx
adc r13,0
mul rbx
add r13,rax
mov rax,QWORD[56+rsi]
mov r14,rdx
adc r14,0
mul rbx
add r14,rax
mov rax,rbx
adc rdx,0
xor rcx,rcx
add r8,r8
mov r15,rdx
adc rcx,0
mul rax
add rdx,r8
adc rcx,0
mov QWORD[rsp],rax
mov QWORD[8+rsp],rdx
mov rax,QWORD[16+rsi]
mul rbp
add r10,rax
mov rax,QWORD[24+rsi]
mov rbx,rdx
adc rbx,0
mul rbp
add r11,rax
mov rax,QWORD[32+rsi]
adc rdx,0
add r11,rbx
mov rbx,rdx
adc rbx,0
mul rbp
add r12,rax
mov rax,QWORD[40+rsi]
adc rdx,0
add r12,rbx
mov rbx,rdx
adc rbx,0
mul rbp
add r13,rax
mov rax,QWORD[48+rsi]
adc rdx,0
add r13,rbx
mov rbx,rdx
adc rbx,0
mul rbp
add r14,rax
mov rax,QWORD[56+rsi]
adc rdx,0
add r14,rbx
mov rbx,rdx
adc rbx,0
mul rbp
add r15,rax
mov rax,rbp
adc rdx,0
add r15,rbx
adc rdx,0
xor rbx,rbx
add r9,r9
mov r8,rdx
adc r10,r10
adc rbx,0
mul rax
add rax,rcx
mov rbp,QWORD[16+rsi]
add r9,rax
mov rax,QWORD[24+rsi]
adc r10,rdx
adc rbx,0
mov QWORD[16+rsp],r9
mov QWORD[24+rsp],r10
mul rbp
add r12,rax
mov rax,QWORD[32+rsi]
mov rcx,rdx
adc rcx,0
mul rbp
add r13,rax
mov rax,QWORD[40+rsi]
adc rdx,0
add r13,rcx
mov rcx,rdx
adc rcx,0
mul rbp
add r14,rax
mov rax,QWORD[48+rsi]
adc rdx,0
add r14,rcx
mov rcx,rdx
adc rcx,0
mul rbp
add r15,rax
mov rax,QWORD[56+rsi]
adc rdx,0
add r15,rcx
mov rcx,rdx
adc rcx,0
mul rbp
add r8,rax
mov rax,rbp
adc rdx,0
add r8,rcx
adc rdx,0
xor rcx,rcx
add r11,r11
mov r9,rdx
adc r12,r12
adc rcx,0
mul rax
add rax,rbx
mov r10,QWORD[24+rsi]
add r11,rax
mov rax,QWORD[32+rsi]
adc r12,rdx
adc rcx,0
mov QWORD[32+rsp],r11
mov QWORD[40+rsp],r12
mov r11,rax
mul r10
add r14,rax
mov rax,QWORD[40+rsi]
mov rbx,rdx
adc rbx,0
mov r12,rax
mul r10
add r15,rax
mov rax,QWORD[48+rsi]
adc rdx,0
add r15,rbx
mov rbx,rdx
adc rbx,0
mov rbp,rax
mul r10
add r8,rax
mov rax,QWORD[56+rsi]
adc rdx,0
add r8,rbx
mov rbx,rdx
adc rbx,0
mul r10
add r9,rax
mov rax,r10
adc rdx,0
add r9,rbx
adc rdx,0
xor rbx,rbx
add r13,r13
mov r10,rdx
adc r14,r14
adc rbx,0
mul rax
add rax,rcx
add r13,rax
mov rax,r12
adc r14,rdx
adc rbx,0
mov QWORD[48+rsp],r13
mov QWORD[56+rsp],r14
mul r11
add r8,rax
mov rax,rbp
mov rcx,rdx
adc rcx,0
mul r11
add r9,rax
mov rax,QWORD[56+rsi]
adc rdx,0
add r9,rcx
mov rcx,rdx
adc rcx,0
mov r14,rax
mul r11
add r10,rax
mov rax,r11
adc rdx,0
add r10,rcx
adc rdx,0
xor rcx,rcx
add r15,r15
mov r11,rdx
adc r8,r8
adc rcx,0
mul rax
add rax,rbx
add r15,rax
mov rax,rbp
adc r8,rdx
adc rcx,0
mov QWORD[64+rsp],r15
mov QWORD[72+rsp],r8
mul r12
add r10,rax
mov rax,r14
mov rbx,rdx
adc rbx,0
mul r12
add r11,rax
mov rax,r12
adc rdx,0
add r11,rbx
adc rdx,0
xor rbx,rbx
add r9,r9
mov r12,rdx
adc r10,r10
adc rbx,0
mul rax
add rax,rcx
add r9,rax
mov rax,r14
adc r10,rdx
adc rbx,0
mov QWORD[80+rsp],r9
mov QWORD[88+rsp],r10
mul rbp
add r12,rax
mov rax,rbp
adc rdx,0
xor rcx,rcx
add r11,r11
mov r13,rdx
adc r12,r12
adc rcx,0
mul rax
add rax,rbx
add r11,rax
mov rax,r14
adc r12,rdx
adc rcx,0
mov QWORD[96+rsp],r11
mov QWORD[104+rsp],r12
xor rbx,rbx
add r13,r13
adc rbx,0
mul rax
add rax,rcx
add rax,r13
adc rdx,rbx
mov r8,QWORD[rsp]
mov r9,QWORD[8+rsp]
mov r10,QWORD[16+rsp]
mov r11,QWORD[24+rsp]
mov r12,QWORD[32+rsp]
mov r13,QWORD[40+rsp]
mov r14,QWORD[48+rsp]
mov r15,QWORD[56+rsp]
DB 102,72,15,126,205
mov QWORD[112+rsp],rax
mov QWORD[120+rsp],rdx
call __rsaz_512_reduce
add r8,QWORD[64+rsp]
adc r9,QWORD[72+rsp]
adc r10,QWORD[80+rsp]
adc r11,QWORD[88+rsp]
adc r12,QWORD[96+rsp]
adc r13,QWORD[104+rsp]
adc r14,QWORD[112+rsp]
adc r15,QWORD[120+rsp]
sbb rcx,rcx
call __rsaz_512_subtract
mov rdx,r8
mov rax,r9
mov r8d,DWORD[((128+8))+rsp]
mov rsi,rdi
dec r8d
jnz NEAR $L$oop_sqr
jmp NEAR $L$sqr_tail
ALIGN 32
$L$oop_sqrx:
mov DWORD[((128+8))+rsp],r8d
DB 102,72,15,110,199
mulx r9,r8,rax
mov rbx,rax
mulx r10,rcx,QWORD[16+rsi]
xor rbp,rbp
mulx r11,rax,QWORD[24+rsi]
adcx r9,rcx
DB 0xc4,0x62,0xf3,0xf6,0xa6,0x20,0x00,0x00,0x00
adcx r10,rax
DB 0xc4,0x62,0xfb,0xf6,0xae,0x28,0x00,0x00,0x00
adcx r11,rcx
mulx r14,rcx,QWORD[48+rsi]
adcx r12,rax
adcx r13,rcx
mulx r15,rax,QWORD[56+rsi]
adcx r14,rax
adcx r15,rbp
mulx rdi,rax,rdx
mov rdx,rbx
xor rcx,rcx
adox r8,r8
adcx r8,rdi
adox rcx,rbp
adcx rcx,rbp
mov QWORD[rsp],rax
mov QWORD[8+rsp],r8
DB 0xc4,0xe2,0xfb,0xf6,0x9e,0x10,0x00,0x00,0x00
adox r10,rax
adcx r11,rbx
mulx r8,rdi,QWORD[24+rsi]
adox r11,rdi
DB 0x66
adcx r12,r8
mulx rbx,rax,QWORD[32+rsi]
adox r12,rax
adcx r13,rbx
mulx r8,rdi,QWORD[40+rsi]
adox r13,rdi
adcx r14,r8
DB 0xc4,0xe2,0xfb,0xf6,0x9e,0x30,0x00,0x00,0x00
adox r14,rax
adcx r15,rbx
DB 0xc4,0x62,0xc3,0xf6,0x86,0x38,0x00,0x00,0x00
adox r15,rdi
adcx r8,rbp
mulx rdi,rax,rdx
adox r8,rbp
DB 0x48,0x8b,0x96,0x10,0x00,0x00,0x00
xor rbx,rbx
adox r9,r9
adcx rax,rcx
adox r10,r10
adcx r9,rax
adox rbx,rbp
adcx r10,rdi
adcx rbx,rbp
mov QWORD[16+rsp],r9
DB 0x4c,0x89,0x94,0x24,0x18,0x00,0x00,0x00
mulx r9,rdi,QWORD[24+rsi]
adox r12,rdi
adcx r13,r9
mulx rcx,rax,QWORD[32+rsi]
adox r13,rax
adcx r14,rcx
DB 0xc4,0x62,0xc3,0xf6,0x8e,0x28,0x00,0x00,0x00
adox r14,rdi
adcx r15,r9
DB 0xc4,0xe2,0xfb,0xf6,0x8e,0x30,0x00,0x00,0x00
adox r15,rax
adcx r8,rcx
mulx r9,rdi,QWORD[56+rsi]
adox r8,rdi
adcx r9,rbp
mulx rdi,rax,rdx
adox r9,rbp
mov rdx,QWORD[24+rsi]
xor rcx,rcx
adox r11,r11
adcx rax,rbx
adox r12,r12
adcx r11,rax
adox rcx,rbp
adcx r12,rdi
adcx rcx,rbp
mov QWORD[32+rsp],r11
mov QWORD[40+rsp],r12
mulx rbx,rax,QWORD[32+rsi]
adox r14,rax
adcx r15,rbx
mulx r10,rdi,QWORD[40+rsi]
adox r15,rdi
adcx r8,r10
mulx rbx,rax,QWORD[48+rsi]
adox r8,rax
adcx r9,rbx
mulx r10,rdi,QWORD[56+rsi]
adox r9,rdi
adcx r10,rbp
mulx rdi,rax,rdx
adox r10,rbp
mov rdx,QWORD[32+rsi]
xor rbx,rbx
adox r13,r13
adcx rax,rcx
adox r14,r14
adcx r13,rax
adox rbx,rbp
adcx r14,rdi
adcx rbx,rbp
mov QWORD[48+rsp],r13
mov QWORD[56+rsp],r14
mulx r11,rdi,QWORD[40+rsi]
adox r8,rdi
adcx r9,r11
mulx rcx,rax,QWORD[48+rsi]
adox r9,rax
adcx r10,rcx
mulx r11,rdi,QWORD[56+rsi]
adox r10,rdi
adcx r11,rbp
mulx rdi,rax,rdx
mov rdx,QWORD[40+rsi]
adox r11,rbp
xor rcx,rcx
adox r15,r15
adcx rax,rbx
adox r8,r8
adcx r15,rax
adox rcx,rbp
adcx r8,rdi
adcx rcx,rbp
mov QWORD[64+rsp],r15
mov QWORD[72+rsp],r8
DB 0xc4,0xe2,0xfb,0xf6,0x9e,0x30,0x00,0x00,0x00
adox r10,rax
adcx r11,rbx
DB 0xc4,0x62,0xc3,0xf6,0xa6,0x38,0x00,0x00,0x00
adox r11,rdi
adcx r12,rbp
mulx rdi,rax,rdx
adox r12,rbp
mov rdx,QWORD[48+rsi]
xor rbx,rbx
adox r9,r9
adcx rax,rcx
adox r10,r10
adcx r9,rax
adcx r10,rdi
adox rbx,rbp
adcx rbx,rbp
mov QWORD[80+rsp],r9
mov QWORD[88+rsp],r10
DB 0xc4,0x62,0xfb,0xf6,0xae,0x38,0x00,0x00,0x00
adox r12,rax
adox r13,rbp
mulx rdi,rax,rdx
xor rcx,rcx
mov rdx,QWORD[56+rsi]
adox r11,r11
adcx rax,rbx
adox r12,r12
adcx r11,rax
adox rcx,rbp
adcx r12,rdi
adcx rcx,rbp
DB 0x4c,0x89,0x9c,0x24,0x60,0x00,0x00,0x00
DB 0x4c,0x89,0xa4,0x24,0x68,0x00,0x00,0x00
mulx rdx,rax,rdx
xor rbx,rbx
adox r13,r13
adcx rax,rcx
adox rbx,rbp
adcx rax,r13
adcx rbx,rdx
DB 102,72,15,126,199
DB 102,72,15,126,205
mov rdx,QWORD[128+rsp]
mov r8,QWORD[rsp]
mov r9,QWORD[8+rsp]
mov r10,QWORD[16+rsp]
mov r11,QWORD[24+rsp]
mov r12,QWORD[32+rsp]
mov r13,QWORD[40+rsp]
mov r14,QWORD[48+rsp]
mov r15,QWORD[56+rsp]
mov QWORD[112+rsp],rax
mov QWORD[120+rsp],rbx
call __rsaz_512_reducex
add r8,QWORD[64+rsp]
adc r9,QWORD[72+rsp]
adc r10,QWORD[80+rsp]
adc r11,QWORD[88+rsp]
adc r12,QWORD[96+rsp]
adc r13,QWORD[104+rsp]
adc r14,QWORD[112+rsp]
adc r15,QWORD[120+rsp]
sbb rcx,rcx
call __rsaz_512_subtract
mov rdx,r8
mov rax,r9
mov r8d,DWORD[((128+8))+rsp]
mov rsi,rdi
dec r8d
jnz NEAR $L$oop_sqrx
$L$sqr_tail:
lea rax,[((128+24+48))+rsp]
mov r15,QWORD[((-48))+rax]
mov r14,QWORD[((-40))+rax]
mov r13,QWORD[((-32))+rax]
mov r12,QWORD[((-24))+rax]
mov rbp,QWORD[((-16))+rax]
mov rbx,QWORD[((-8))+rax]
lea rsp,[rax]
$L$sqr_epilogue:
mov rdi,QWORD[8+rsp] ;WIN64 epilogue
mov rsi,QWORD[16+rsp]
DB 0F3h,0C3h ;repret
$L$SEH_end_rsaz_512_sqr:
global rsaz_512_mul
ALIGN 32
rsaz_512_mul:
mov QWORD[8+rsp],rdi ;WIN64 prologue
mov QWORD[16+rsp],rsi
mov rax,rsp
$L$SEH_begin_rsaz_512_mul:
mov rdi,rcx
mov rsi,rdx
mov rdx,r8
mov rcx,r9
mov r8,QWORD[40+rsp]
push rbx
push rbp
push r12
push r13
push r14
push r15
sub rsp,128+24
$L$mul_body:
DB 102,72,15,110,199
DB 102,72,15,110,201
mov QWORD[128+rsp],r8
mov r11d,0x80100
and r11d,DWORD[((OPENSSL_ia32cap_P+8))]
cmp r11d,0x80100
je NEAR $L$mulx
mov rbx,QWORD[rdx]
mov rbp,rdx
call __rsaz_512_mul
DB 102,72,15,126,199
DB 102,72,15,126,205
mov r8,QWORD[rsp]
mov r9,QWORD[8+rsp]
mov r10,QWORD[16+rsp]
mov r11,QWORD[24+rsp]
mov r12,QWORD[32+rsp]
mov r13,QWORD[40+rsp]
mov r14,QWORD[48+rsp]
mov r15,QWORD[56+rsp]
call __rsaz_512_reduce
jmp NEAR $L$mul_tail
ALIGN 32
$L$mulx:
mov rbp,rdx
mov rdx,QWORD[rdx]
call __rsaz_512_mulx
DB 102,72,15,126,199
DB 102,72,15,126,205
mov rdx,QWORD[128+rsp]
mov r8,QWORD[rsp]
mov r9,QWORD[8+rsp]
mov r10,QWORD[16+rsp]
mov r11,QWORD[24+rsp]
mov r12,QWORD[32+rsp]
mov r13,QWORD[40+rsp]
mov r14,QWORD[48+rsp]
mov r15,QWORD[56+rsp]
call __rsaz_512_reducex
$L$mul_tail:
add r8,QWORD[64+rsp]
adc r9,QWORD[72+rsp]
adc r10,QWORD[80+rsp]
adc r11,QWORD[88+rsp]
adc r12,QWORD[96+rsp]
adc r13,QWORD[104+rsp]
adc r14,QWORD[112+rsp]
adc r15,QWORD[120+rsp]
sbb rcx,rcx
call __rsaz_512_subtract
lea rax,[((128+24+48))+rsp]
mov r15,QWORD[((-48))+rax]
mov r14,QWORD[((-40))+rax]
mov r13,QWORD[((-32))+rax]
mov r12,QWORD[((-24))+rax]
mov rbp,QWORD[((-16))+rax]
mov rbx,QWORD[((-8))+rax]
lea rsp,[rax]
$L$mul_epilogue:
mov rdi,QWORD[8+rsp] ;WIN64 epilogue
mov rsi,QWORD[16+rsp]
DB 0F3h,0C3h ;repret
$L$SEH_end_rsaz_512_mul:
global rsaz_512_mul_gather4
ALIGN 32
rsaz_512_mul_gather4:
mov QWORD[8+rsp],rdi ;WIN64 prologue
mov QWORD[16+rsp],rsi
mov rax,rsp
$L$SEH_begin_rsaz_512_mul_gather4:
mov rdi,rcx
mov rsi,rdx
mov rdx,r8
mov rcx,r9
mov r8,QWORD[40+rsp]
mov r9,QWORD[48+rsp]
push rbx
push rbp
push r12
push r13
push r14
push r15
sub rsp,328
movaps XMMWORD[160+rsp],xmm6
movaps XMMWORD[176+rsp],xmm7
movaps XMMWORD[192+rsp],xmm8
movaps XMMWORD[208+rsp],xmm9
movaps XMMWORD[224+rsp],xmm10
movaps XMMWORD[240+rsp],xmm11
movaps XMMWORD[256+rsp],xmm12
movaps XMMWORD[272+rsp],xmm13
movaps XMMWORD[288+rsp],xmm14
movaps XMMWORD[304+rsp],xmm15
$L$mul_gather4_body:
movd xmm8,r9d
movdqa xmm1,XMMWORD[(($L$inc+16))]
movdqa xmm0,XMMWORD[$L$inc]
pshufd xmm8,xmm8,0
movdqa xmm7,xmm1
movdqa xmm2,xmm1
paddd xmm1,xmm0
pcmpeqd xmm0,xmm8
movdqa xmm3,xmm7
paddd xmm2,xmm1
pcmpeqd xmm1,xmm8
movdqa xmm4,xmm7
paddd xmm3,xmm2
pcmpeqd xmm2,xmm8
movdqa xmm5,xmm7
paddd xmm4,xmm3
pcmpeqd xmm3,xmm8
movdqa xmm6,xmm7
paddd xmm5,xmm4
pcmpeqd xmm4,xmm8
paddd xmm6,xmm5
pcmpeqd xmm5,xmm8
paddd xmm7,xmm6
pcmpeqd xmm6,xmm8
pcmpeqd xmm7,xmm8
movdqa xmm8,XMMWORD[rdx]
movdqa xmm9,XMMWORD[16+rdx]
movdqa xmm10,XMMWORD[32+rdx]
movdqa xmm11,XMMWORD[48+rdx]
pand xmm8,xmm0
movdqa xmm12,XMMWORD[64+rdx]
pand xmm9,xmm1
movdqa xmm13,XMMWORD[80+rdx]
pand xmm10,xmm2
movdqa xmm14,XMMWORD[96+rdx]
pand xmm11,xmm3
movdqa xmm15,XMMWORD[112+rdx]
lea rbp,[128+rdx]
pand xmm12,xmm4
pand xmm13,xmm5
pand xmm14,xmm6
pand xmm15,xmm7
por xmm8,xmm10
por xmm9,xmm11
por xmm8,xmm12
por xmm9,xmm13
por xmm8,xmm14
por xmm9,xmm15
por xmm8,xmm9
pshufd xmm9,xmm8,0x4e
por xmm8,xmm9
mov r11d,0x80100
and r11d,DWORD[((OPENSSL_ia32cap_P+8))]
cmp r11d,0x80100
je NEAR $L$mulx_gather
DB 102,76,15,126,195
mov QWORD[128+rsp],r8
mov QWORD[((128+8))+rsp],rdi
mov QWORD[((128+16))+rsp],rcx
mov rax,QWORD[rsi]
mov rcx,QWORD[8+rsi]
mul rbx
mov QWORD[rsp],rax
mov rax,rcx
mov r8,rdx
mul rbx
add r8,rax
mov rax,QWORD[16+rsi]
mov r9,rdx
adc r9,0
mul rbx
add r9,rax
mov rax,QWORD[24+rsi]
mov r10,rdx
adc r10,0
mul rbx
add r10,rax
mov rax,QWORD[32+rsi]
mov r11,rdx
adc r11,0
mul rbx
add r11,rax
mov rax,QWORD[40+rsi]
mov r12,rdx
adc r12,0
mul rbx
add r12,rax
mov rax,QWORD[48+rsi]
mov r13,rdx
adc r13,0
mul rbx
add r13,rax
mov rax,QWORD[56+rsi]
mov r14,rdx
adc r14,0
mul rbx
add r14,rax
mov rax,QWORD[rsi]
mov r15,rdx
adc r15,0
lea rdi,[8+rsp]
mov ecx,7
jmp NEAR $L$oop_mul_gather
ALIGN 32
$L$oop_mul_gather:
movdqa xmm8,XMMWORD[rbp]
movdqa xmm9,XMMWORD[16+rbp]
movdqa xmm10,XMMWORD[32+rbp]
movdqa xmm11,XMMWORD[48+rbp]
pand xmm8,xmm0
movdqa xmm12,XMMWORD[64+rbp]
pand xmm9,xmm1
movdqa xmm13,XMMWORD[80+rbp]
pand xmm10,xmm2
movdqa xmm14,XMMWORD[96+rbp]
pand xmm11,xmm3
movdqa xmm15,XMMWORD[112+rbp]
lea rbp,[128+rbp]
pand xmm12,xmm4
pand xmm13,xmm5
pand xmm14,xmm6
pand xmm15,xmm7
por xmm8,xmm10
por xmm9,xmm11
por xmm8,xmm12
por xmm9,xmm13
por xmm8,xmm14
por xmm9,xmm15
por xmm8,xmm9
pshufd xmm9,xmm8,0x4e
por xmm8,xmm9
DB 102,76,15,126,195
mul rbx
add r8,rax
mov rax,QWORD[8+rsi]
mov QWORD[rdi],r8
mov r8,rdx
adc r8,0
mul rbx
add r9,rax
mov rax,QWORD[16+rsi]
adc rdx,0
add r8,r9
mov r9,rdx
adc r9,0
mul rbx
add r10,rax
mov rax,QWORD[24+rsi]
adc rdx,0
add r9,r10
mov r10,rdx
adc r10,0
mul rbx
add r11,rax
mov rax,QWORD[32+rsi]
adc rdx,0
add r10,r11
mov r11,rdx
adc r11,0
mul rbx
add r12,rax
mov rax,QWORD[40+rsi]
adc rdx,0
add r11,r12
mov r12,rdx
adc r12,0
mul rbx
add r13,rax
mov rax,QWORD[48+rsi]
adc rdx,0
add r12,r13
mov r13,rdx
adc r13,0
mul rbx
add r14,rax
mov rax,QWORD[56+rsi]
adc rdx,0
add r13,r14
mov r14,rdx
adc r14,0
mul rbx
add r15,rax
mov rax,QWORD[rsi]
adc rdx,0
add r14,r15
mov r15,rdx
adc r15,0
lea rdi,[8+rdi]
dec ecx
jnz NEAR $L$oop_mul_gather
mov QWORD[rdi],r8
mov QWORD[8+rdi],r9
mov QWORD[16+rdi],r10
mov QWORD[24+rdi],r11
mov QWORD[32+rdi],r12
mov QWORD[40+rdi],r13
mov QWORD[48+rdi],r14
mov QWORD[56+rdi],r15
mov rdi,QWORD[((128+8))+rsp]
mov rbp,QWORD[((128+16))+rsp]
mov r8,QWORD[rsp]
mov r9,QWORD[8+rsp]
mov r10,QWORD[16+rsp]
mov r11,QWORD[24+rsp]
mov r12,QWORD[32+rsp]
mov r13,QWORD[40+rsp]
mov r14,QWORD[48+rsp]
mov r15,QWORD[56+rsp]
call __rsaz_512_reduce
jmp NEAR $L$mul_gather_tail
ALIGN 32
$L$mulx_gather:
DB 102,76,15,126,194
mov QWORD[128+rsp],r8
mov QWORD[((128+8))+rsp],rdi
mov QWORD[((128+16))+rsp],rcx
mulx r8,rbx,QWORD[rsi]
mov QWORD[rsp],rbx
xor edi,edi
mulx r9,rax,QWORD[8+rsi]
mulx r10,rbx,QWORD[16+rsi]
adcx r8,rax
mulx r11,rax,QWORD[24+rsi]
adcx r9,rbx
mulx r12,rbx,QWORD[32+rsi]
adcx r10,rax
mulx r13,rax,QWORD[40+rsi]
adcx r11,rbx
mulx r14,rbx,QWORD[48+rsi]
adcx r12,rax
mulx r15,rax,QWORD[56+rsi]
adcx r13,rbx
adcx r14,rax
DB 0x67
mov rbx,r8
adcx r15,rdi
mov rcx,-7
jmp NEAR $L$oop_mulx_gather
ALIGN 32
$L$oop_mulx_gather:
movdqa xmm8,XMMWORD[rbp]
movdqa xmm9,XMMWORD[16+rbp]
movdqa xmm10,XMMWORD[32+rbp]
movdqa xmm11,XMMWORD[48+rbp]
pand xmm8,xmm0
movdqa xmm12,XMMWORD[64+rbp]
pand xmm9,xmm1
movdqa xmm13,XMMWORD[80+rbp]
pand xmm10,xmm2
movdqa xmm14,XMMWORD[96+rbp]
pand xmm11,xmm3
movdqa xmm15,XMMWORD[112+rbp]
lea rbp,[128+rbp]
pand xmm12,xmm4
pand xmm13,xmm5
pand xmm14,xmm6
pand xmm15,xmm7
por xmm8,xmm10
por xmm9,xmm11
por xmm8,xmm12
por xmm9,xmm13
por xmm8,xmm14
por xmm9,xmm15
por xmm8,xmm9
pshufd xmm9,xmm8,0x4e
por xmm8,xmm9
DB 102,76,15,126,194
DB 0xc4,0x62,0xfb,0xf6,0x86,0x00,0x00,0x00,0x00
adcx rbx,rax
adox r8,r9
mulx r9,rax,QWORD[8+rsi]
adcx r8,rax
adox r9,r10
mulx r10,rax,QWORD[16+rsi]
adcx r9,rax
adox r10,r11
DB 0xc4,0x62,0xfb,0xf6,0x9e,0x18,0x00,0x00,0x00
adcx r10,rax
adox r11,r12
mulx r12,rax,QWORD[32+rsi]
adcx r11,rax
adox r12,r13
mulx r13,rax,QWORD[40+rsi]
adcx r12,rax
adox r13,r14
DB 0xc4,0x62,0xfb,0xf6,0xb6,0x30,0x00,0x00,0x00
adcx r13,rax
DB 0x67
adox r14,r15
mulx r15,rax,QWORD[56+rsi]
mov QWORD[64+rcx*8+rsp],rbx
adcx r14,rax
adox r15,rdi
mov rbx,r8
adcx r15,rdi
inc rcx
jnz NEAR $L$oop_mulx_gather
mov QWORD[64+rsp],r8
mov QWORD[((64+8))+rsp],r9
mov QWORD[((64+16))+rsp],r10
mov QWORD[((64+24))+rsp],r11
mov QWORD[((64+32))+rsp],r12
mov QWORD[((64+40))+rsp],r13
mov QWORD[((64+48))+rsp],r14
mov QWORD[((64+56))+rsp],r15
mov rdx,QWORD[128+rsp]
mov rdi,QWORD[((128+8))+rsp]
mov rbp,QWORD[((128+16))+rsp]
mov r8,QWORD[rsp]
mov r9,QWORD[8+rsp]
mov r10,QWORD[16+rsp]
mov r11,QWORD[24+rsp]
mov r12,QWORD[32+rsp]
mov r13,QWORD[40+rsp]
mov r14,QWORD[48+rsp]
mov r15,QWORD[56+rsp]
call __rsaz_512_reducex
$L$mul_gather_tail:
add r8,QWORD[64+rsp]
adc r9,QWORD[72+rsp]
adc r10,QWORD[80+rsp]
adc r11,QWORD[88+rsp]
adc r12,QWORD[96+rsp]
adc r13,QWORD[104+rsp]
adc r14,QWORD[112+rsp]
adc r15,QWORD[120+rsp]
sbb rcx,rcx
call __rsaz_512_subtract
lea rax,[((128+24+48))+rsp]
movaps xmm6,XMMWORD[((160-200))+rax]
movaps xmm7,XMMWORD[((176-200))+rax]
movaps xmm8,XMMWORD[((192-200))+rax]
movaps xmm9,XMMWORD[((208-200))+rax]
movaps xmm10,XMMWORD[((224-200))+rax]
movaps xmm11,XMMWORD[((240-200))+rax]
movaps xmm12,XMMWORD[((256-200))+rax]
movaps xmm13,XMMWORD[((272-200))+rax]
movaps xmm14,XMMWORD[((288-200))+rax]
movaps xmm15,XMMWORD[((304-200))+rax]
lea rax,[176+rax]
mov r15,QWORD[((-48))+rax]
mov r14,QWORD[((-40))+rax]
mov r13,QWORD[((-32))+rax]
mov r12,QWORD[((-24))+rax]
mov rbp,QWORD[((-16))+rax]
mov rbx,QWORD[((-8))+rax]
lea rsp,[rax]
$L$mul_gather4_epilogue:
mov rdi,QWORD[8+rsp] ;WIN64 epilogue
mov rsi,QWORD[16+rsp]
DB 0F3h,0C3h ;repret
$L$SEH_end_rsaz_512_mul_gather4:
global rsaz_512_mul_scatter4
ALIGN 32
rsaz_512_mul_scatter4:
mov QWORD[8+rsp],rdi ;WIN64 prologue
mov QWORD[16+rsp],rsi
mov rax,rsp
$L$SEH_begin_rsaz_512_mul_scatter4:
mov rdi,rcx
mov rsi,rdx
mov rdx,r8
mov rcx,r9
mov r8,QWORD[40+rsp]
mov r9,QWORD[48+rsp]
push rbx
push rbp
push r12
push r13
push r14
push r15
mov r9d,r9d
sub rsp,128+24
$L$mul_scatter4_body:
lea r8,[r9*8+r8]
DB 102,72,15,110,199
DB 102,72,15,110,202
DB 102,73,15,110,208
mov QWORD[128+rsp],rcx
mov rbp,rdi
mov r11d,0x80100
and r11d,DWORD[((OPENSSL_ia32cap_P+8))]
cmp r11d,0x80100
je NEAR $L$mulx_scatter
mov rbx,QWORD[rdi]
call __rsaz_512_mul
DB 102,72,15,126,199
DB 102,72,15,126,205
mov r8,QWORD[rsp]
mov r9,QWORD[8+rsp]
mov r10,QWORD[16+rsp]
mov r11,QWORD[24+rsp]
mov r12,QWORD[32+rsp]
mov r13,QWORD[40+rsp]
mov r14,QWORD[48+rsp]
mov r15,QWORD[56+rsp]
call __rsaz_512_reduce
jmp NEAR $L$mul_scatter_tail
ALIGN 32
$L$mulx_scatter:
mov rdx,QWORD[rdi]
call __rsaz_512_mulx
DB 102,72,15,126,199
DB 102,72,15,126,205
mov rdx,QWORD[128+rsp]
mov r8,QWORD[rsp]
mov r9,QWORD[8+rsp]
mov r10,QWORD[16+rsp]
mov r11,QWORD[24+rsp]
mov r12,QWORD[32+rsp]
mov r13,QWORD[40+rsp]
mov r14,QWORD[48+rsp]
mov r15,QWORD[56+rsp]
call __rsaz_512_reducex
$L$mul_scatter_tail:
add r8,QWORD[64+rsp]
adc r9,QWORD[72+rsp]
adc r10,QWORD[80+rsp]
adc r11,QWORD[88+rsp]
adc r12,QWORD[96+rsp]
adc r13,QWORD[104+rsp]
adc r14,QWORD[112+rsp]
adc r15,QWORD[120+rsp]
DB 102,72,15,126,214
sbb rcx,rcx
call __rsaz_512_subtract
mov QWORD[rsi],r8
mov QWORD[128+rsi],r9
mov QWORD[256+rsi],r10
mov QWORD[384+rsi],r11
mov QWORD[512+rsi],r12
mov QWORD[640+rsi],r13
mov QWORD[768+rsi],r14
mov QWORD[896+rsi],r15
lea rax,[((128+24+48))+rsp]
mov r15,QWORD[((-48))+rax]
mov r14,QWORD[((-40))+rax]
mov r13,QWORD[((-32))+rax]
mov r12,QWORD[((-24))+rax]
mov rbp,QWORD[((-16))+rax]
mov rbx,QWORD[((-8))+rax]
lea rsp,[rax]
$L$mul_scatter4_epilogue:
mov rdi,QWORD[8+rsp] ;WIN64 epilogue
mov rsi,QWORD[16+rsp]
DB 0F3h,0C3h ;repret
$L$SEH_end_rsaz_512_mul_scatter4:
global rsaz_512_mul_by_one
ALIGN 32
rsaz_512_mul_by_one:
mov QWORD[8+rsp],rdi ;WIN64 prologue
mov QWORD[16+rsp],rsi
mov rax,rsp
$L$SEH_begin_rsaz_512_mul_by_one:
mov rdi,rcx
mov rsi,rdx
mov rdx,r8
mov rcx,r9
push rbx
push rbp
push r12
push r13
push r14
push r15
sub rsp,128+24
$L$mul_by_one_body:
mov eax,DWORD[((OPENSSL_ia32cap_P+8))]
mov rbp,rdx
mov QWORD[128+rsp],rcx
mov r8,QWORD[rsi]
pxor xmm0,xmm0
mov r9,QWORD[8+rsi]
mov r10,QWORD[16+rsi]
mov r11,QWORD[24+rsi]
mov r12,QWORD[32+rsi]
mov r13,QWORD[40+rsi]
mov r14,QWORD[48+rsi]
mov r15,QWORD[56+rsi]
movdqa XMMWORD[rsp],xmm0
movdqa XMMWORD[16+rsp],xmm0
movdqa XMMWORD[32+rsp],xmm0
movdqa XMMWORD[48+rsp],xmm0
movdqa XMMWORD[64+rsp],xmm0
movdqa XMMWORD[80+rsp],xmm0
movdqa XMMWORD[96+rsp],xmm0
and eax,0x80100
cmp eax,0x80100
je NEAR $L$by_one_callx
call __rsaz_512_reduce
jmp NEAR $L$by_one_tail
ALIGN 32
$L$by_one_callx:
mov rdx,QWORD[128+rsp]
call __rsaz_512_reducex
$L$by_one_tail:
mov QWORD[rdi],r8
mov QWORD[8+rdi],r9
mov QWORD[16+rdi],r10
mov QWORD[24+rdi],r11
mov QWORD[32+rdi],r12
mov QWORD[40+rdi],r13
mov QWORD[48+rdi],r14
mov QWORD[56+rdi],r15
lea rax,[((128+24+48))+rsp]
mov r15,QWORD[((-48))+rax]
mov r14,QWORD[((-40))+rax]
mov r13,QWORD[((-32))+rax]
mov r12,QWORD[((-24))+rax]
mov rbp,QWORD[((-16))+rax]
mov rbx,QWORD[((-8))+rax]
lea rsp,[rax]
$L$mul_by_one_epilogue:
mov rdi,QWORD[8+rsp] ;WIN64 epilogue
mov rsi,QWORD[16+rsp]
DB 0F3h,0C3h ;repret
$L$SEH_end_rsaz_512_mul_by_one:
ALIGN 32
__rsaz_512_reduce:
mov rbx,r8
imul rbx,QWORD[((128+8))+rsp]
mov rax,QWORD[rbp]
mov ecx,8
jmp NEAR $L$reduction_loop
ALIGN 32
$L$reduction_loop:
mul rbx
mov rax,QWORD[8+rbp]
neg r8
mov r8,rdx
adc r8,0
mul rbx
add r9,rax
mov rax,QWORD[16+rbp]
adc rdx,0
add r8,r9
mov r9,rdx
adc r9,0
mul rbx
add r10,rax
mov rax,QWORD[24+rbp]
adc rdx,0
add r9,r10
mov r10,rdx
adc r10,0
mul rbx
add r11,rax
mov rax,QWORD[32+rbp]
adc rdx,0
add r10,r11
mov rsi,QWORD[((128+8))+rsp]
adc rdx,0
mov r11,rdx
mul rbx
add r12,rax
mov rax,QWORD[40+rbp]
adc rdx,0
imul rsi,r8
add r11,r12
mov r12,rdx
adc r12,0
mul rbx
add r13,rax
mov rax,QWORD[48+rbp]
adc rdx,0
add r12,r13
mov r13,rdx
adc r13,0
mul rbx
add r14,rax
mov rax,QWORD[56+rbp]
adc rdx,0
add r13,r14
mov r14,rdx
adc r14,0
mul rbx
mov rbx,rsi
add r15,rax
mov rax,QWORD[rbp]
adc rdx,0
add r14,r15
mov r15,rdx
adc r15,0
dec ecx
jne NEAR $L$reduction_loop
DB 0F3h,0C3h ;repret
ALIGN 32
__rsaz_512_reducex:
imul rdx,r8
xor rsi,rsi
mov ecx,8
jmp NEAR $L$reduction_loopx
ALIGN 32
$L$reduction_loopx:
mov rbx,r8
mulx r8,rax,QWORD[rbp]
adcx rax,rbx
adox r8,r9
mulx r9,rax,QWORD[8+rbp]
adcx r8,rax
adox r9,r10
mulx r10,rbx,QWORD[16+rbp]
adcx r9,rbx
adox r10,r11
mulx r11,rbx,QWORD[24+rbp]
adcx r10,rbx
adox r11,r12
DB 0xc4,0x62,0xe3,0xf6,0xa5,0x20,0x00,0x00,0x00
mov rax,rdx
mov rdx,r8
adcx r11,rbx
adox r12,r13
mulx rdx,rbx,QWORD[((128+8))+rsp]
mov rdx,rax
mulx r13,rax,QWORD[40+rbp]
adcx r12,rax
adox r13,r14
DB 0xc4,0x62,0xfb,0xf6,0xb5,0x30,0x00,0x00,0x00
adcx r13,rax
adox r14,r15
mulx r15,rax,QWORD[56+rbp]
mov rdx,rbx
adcx r14,rax
adox r15,rsi
adcx r15,rsi
dec ecx
jne NEAR $L$reduction_loopx
DB 0F3h,0C3h ;repret
ALIGN 32
__rsaz_512_subtract:
mov QWORD[rdi],r8
mov QWORD[8+rdi],r9
mov QWORD[16+rdi],r10
mov QWORD[24+rdi],r11
mov QWORD[32+rdi],r12
mov QWORD[40+rdi],r13
mov QWORD[48+rdi],r14
mov QWORD[56+rdi],r15
mov r8,QWORD[rbp]
mov r9,QWORD[8+rbp]
neg r8
not r9
and r8,rcx
mov r10,QWORD[16+rbp]
and r9,rcx
not r10
mov r11,QWORD[24+rbp]
and r10,rcx
not r11
mov r12,QWORD[32+rbp]
and r11,rcx
not r12
mov r13,QWORD[40+rbp]
and r12,rcx
not r13
mov r14,QWORD[48+rbp]
and r13,rcx
not r14
mov r15,QWORD[56+rbp]
and r14,rcx
not r15
and r15,rcx
add r8,QWORD[rdi]
adc r9,QWORD[8+rdi]
adc r10,QWORD[16+rdi]
adc r11,QWORD[24+rdi]
adc r12,QWORD[32+rdi]
adc r13,QWORD[40+rdi]
adc r14,QWORD[48+rdi]
adc r15,QWORD[56+rdi]
mov QWORD[rdi],r8
mov QWORD[8+rdi],r9
mov QWORD[16+rdi],r10
mov QWORD[24+rdi],r11
mov QWORD[32+rdi],r12
mov QWORD[40+rdi],r13
mov QWORD[48+rdi],r14
mov QWORD[56+rdi],r15
DB 0F3h,0C3h ;repret
ALIGN 32
__rsaz_512_mul:
lea rdi,[8+rsp]
mov rax,QWORD[rsi]
mul rbx
mov QWORD[rdi],rax
mov rax,QWORD[8+rsi]
mov r8,rdx
mul rbx
add r8,rax
mov rax,QWORD[16+rsi]
mov r9,rdx
adc r9,0
mul rbx
add r9,rax
mov rax,QWORD[24+rsi]
mov r10,rdx
adc r10,0
mul rbx
add r10,rax
mov rax,QWORD[32+rsi]
mov r11,rdx
adc r11,0
mul rbx
add r11,rax
mov rax,QWORD[40+rsi]
mov r12,rdx
adc r12,0
mul rbx
add r12,rax
mov rax,QWORD[48+rsi]
mov r13,rdx
adc r13,0
mul rbx
add r13,rax
mov rax,QWORD[56+rsi]
mov r14,rdx
adc r14,0
mul rbx
add r14,rax
mov rax,QWORD[rsi]
mov r15,rdx
adc r15,0
lea rbp,[8+rbp]
lea rdi,[8+rdi]
mov ecx,7
jmp NEAR $L$oop_mul
ALIGN 32
$L$oop_mul:
mov rbx,QWORD[rbp]
mul rbx
add r8,rax
mov rax,QWORD[8+rsi]
mov QWORD[rdi],r8
mov r8,rdx
adc r8,0
mul rbx
add r9,rax
mov rax,QWORD[16+rsi]
adc rdx,0
add r8,r9
mov r9,rdx
adc r9,0
mul rbx
add r10,rax
mov rax,QWORD[24+rsi]
adc rdx,0
add r9,r10
mov r10,rdx
adc r10,0
mul rbx
add r11,rax
mov rax,QWORD[32+rsi]
adc rdx,0
add r10,r11
mov r11,rdx
adc r11,0
mul rbx
add r12,rax
mov rax,QWORD[40+rsi]
adc rdx,0
add r11,r12
mov r12,rdx
adc r12,0
mul rbx
add r13,rax
mov rax,QWORD[48+rsi]
adc rdx,0
add r12,r13
mov r13,rdx
adc r13,0
mul rbx
add r14,rax
mov rax,QWORD[56+rsi]
adc rdx,0
add r13,r14
mov r14,rdx
lea rbp,[8+rbp]
adc r14,0
mul rbx
add r15,rax
mov rax,QWORD[rsi]
adc rdx,0
add r14,r15
mov r15,rdx
adc r15,0
lea rdi,[8+rdi]
dec ecx
jnz NEAR $L$oop_mul
mov QWORD[rdi],r8
mov QWORD[8+rdi],r9
mov QWORD[16+rdi],r10
mov QWORD[24+rdi],r11
mov QWORD[32+rdi],r12
mov QWORD[40+rdi],r13
mov QWORD[48+rdi],r14
mov QWORD[56+rdi],r15
DB 0F3h,0C3h ;repret
ALIGN 32
__rsaz_512_mulx:
mulx r8,rbx,QWORD[rsi]
mov rcx,-6
mulx r9,rax,QWORD[8+rsi]
mov QWORD[8+rsp],rbx
mulx r10,rbx,QWORD[16+rsi]
adc r8,rax
mulx r11,rax,QWORD[24+rsi]
adc r9,rbx
mulx r12,rbx,QWORD[32+rsi]
adc r10,rax
mulx r13,rax,QWORD[40+rsi]
adc r11,rbx
mulx r14,rbx,QWORD[48+rsi]
adc r12,rax
mulx r15,rax,QWORD[56+rsi]
mov rdx,QWORD[8+rbp]
adc r13,rbx
adc r14,rax
adc r15,0
xor rdi,rdi
jmp NEAR $L$oop_mulx
ALIGN 32
$L$oop_mulx:
mov rbx,r8
mulx r8,rax,QWORD[rsi]
adcx rbx,rax
adox r8,r9
mulx r9,rax,QWORD[8+rsi]
adcx r8,rax
adox r9,r10
mulx r10,rax,QWORD[16+rsi]
adcx r9,rax
adox r10,r11
mulx r11,rax,QWORD[24+rsi]
adcx r10,rax
adox r11,r12
DB 0x3e,0xc4,0x62,0xfb,0xf6,0xa6,0x20,0x00,0x00,0x00
adcx r11,rax
adox r12,r13
mulx r13,rax,QWORD[40+rsi]
adcx r12,rax
adox r13,r14
mulx r14,rax,QWORD[48+rsi]
adcx r13,rax
adox r14,r15
mulx r15,rax,QWORD[56+rsi]
mov rdx,QWORD[64+rcx*8+rbp]
mov QWORD[((8+64-8))+rcx*8+rsp],rbx
adcx r14,rax
adox r15,rdi
adcx r15,rdi
inc rcx
jnz NEAR $L$oop_mulx
mov rbx,r8
mulx r8,rax,QWORD[rsi]
adcx rbx,rax
adox r8,r9
DB 0xc4,0x62,0xfb,0xf6,0x8e,0x08,0x00,0x00,0x00
adcx r8,rax
adox r9,r10
DB 0xc4,0x62,0xfb,0xf6,0x96,0x10,0x00,0x00,0x00
adcx r9,rax
adox r10,r11
mulx r11,rax,QWORD[24+rsi]
adcx r10,rax
adox r11,r12
mulx r12,rax,QWORD[32+rsi]
adcx r11,rax
adox r12,r13
mulx r13,rax,QWORD[40+rsi]
adcx r12,rax
adox r13,r14
DB 0xc4,0x62,0xfb,0xf6,0xb6,0x30,0x00,0x00,0x00
adcx r13,rax
adox r14,r15
DB 0xc4,0x62,0xfb,0xf6,0xbe,0x38,0x00,0x00,0x00
adcx r14,rax
adox r15,rdi
adcx r15,rdi
mov QWORD[((8+64-8))+rsp],rbx
mov QWORD[((8+64))+rsp],r8
mov QWORD[((8+64+8))+rsp],r9
mov QWORD[((8+64+16))+rsp],r10
mov QWORD[((8+64+24))+rsp],r11
mov QWORD[((8+64+32))+rsp],r12
mov QWORD[((8+64+40))+rsp],r13
mov QWORD[((8+64+48))+rsp],r14
mov QWORD[((8+64+56))+rsp],r15
DB 0F3h,0C3h ;repret
global rsaz_512_scatter4
ALIGN 16
rsaz_512_scatter4:
lea rcx,[r8*8+rcx]
mov r9d,8
jmp NEAR $L$oop_scatter
ALIGN 16
$L$oop_scatter:
mov rax,QWORD[rdx]
lea rdx,[8+rdx]
mov QWORD[rcx],rax
lea rcx,[128+rcx]
dec r9d
jnz NEAR $L$oop_scatter
DB 0F3h,0C3h ;repret
global rsaz_512_gather4
ALIGN 16
rsaz_512_gather4:
$L$SEH_begin_rsaz_512_gather4:
DB 0x48,0x81,0xec,0xa8,0x00,0x00,0x00
DB 0x0f,0x29,0x34,0x24
DB 0x0f,0x29,0x7c,0x24,0x10
DB 0x44,0x0f,0x29,0x44,0x24,0x20
DB 0x44,0x0f,0x29,0x4c,0x24,0x30
DB 0x44,0x0f,0x29,0x54,0x24,0x40
DB 0x44,0x0f,0x29,0x5c,0x24,0x50
DB 0x44,0x0f,0x29,0x64,0x24,0x60
DB 0x44,0x0f,0x29,0x6c,0x24,0x70
DB 0x44,0x0f,0x29,0xb4,0x24,0x80,0,0,0
DB 0x44,0x0f,0x29,0xbc,0x24,0x90,0,0,0
movd xmm8,r8d
movdqa xmm1,XMMWORD[(($L$inc+16))]
movdqa xmm0,XMMWORD[$L$inc]
pshufd xmm8,xmm8,0
movdqa xmm7,xmm1
movdqa xmm2,xmm1
paddd xmm1,xmm0
pcmpeqd xmm0,xmm8
movdqa xmm3,xmm7
paddd xmm2,xmm1
pcmpeqd xmm1,xmm8
movdqa xmm4,xmm7
paddd xmm3,xmm2
pcmpeqd xmm2,xmm8
movdqa xmm5,xmm7
paddd xmm4,xmm3
pcmpeqd xmm3,xmm8
movdqa xmm6,xmm7
paddd xmm5,xmm4
pcmpeqd xmm4,xmm8
paddd xmm6,xmm5
pcmpeqd xmm5,xmm8
paddd xmm7,xmm6
pcmpeqd xmm6,xmm8
pcmpeqd xmm7,xmm8
mov r9d,8
jmp NEAR $L$oop_gather
ALIGN 16
$L$oop_gather:
movdqa xmm8,XMMWORD[rdx]
movdqa xmm9,XMMWORD[16+rdx]
movdqa xmm10,XMMWORD[32+rdx]
movdqa xmm11,XMMWORD[48+rdx]
pand xmm8,xmm0
movdqa xmm12,XMMWORD[64+rdx]
pand xmm9,xmm1
movdqa xmm13,XMMWORD[80+rdx]
pand xmm10,xmm2
movdqa xmm14,XMMWORD[96+rdx]
pand xmm11,xmm3
movdqa xmm15,XMMWORD[112+rdx]
lea rdx,[128+rdx]
pand xmm12,xmm4
pand xmm13,xmm5
pand xmm14,xmm6
pand xmm15,xmm7
por xmm8,xmm10
por xmm9,xmm11
por xmm8,xmm12
por xmm9,xmm13
por xmm8,xmm14
por xmm9,xmm15
por xmm8,xmm9
pshufd xmm9,xmm8,0x4e
por xmm8,xmm9
movq QWORD[rcx],xmm8
lea rcx,[8+rcx]
dec r9d
jnz NEAR $L$oop_gather
movaps xmm6,XMMWORD[rsp]
movaps xmm7,XMMWORD[16+rsp]
movaps xmm8,XMMWORD[32+rsp]
movaps xmm9,XMMWORD[48+rsp]
movaps xmm10,XMMWORD[64+rsp]
movaps xmm11,XMMWORD[80+rsp]
movaps xmm12,XMMWORD[96+rsp]
movaps xmm13,XMMWORD[112+rsp]
movaps xmm14,XMMWORD[128+rsp]
movaps xmm15,XMMWORD[144+rsp]
add rsp,0xa8
DB 0F3h,0C3h ;repret
$L$SEH_end_rsaz_512_gather4:
ALIGN 64
$L$inc:
DD 0,0,1,1
DD 2,2,2,2
EXTERN __imp_RtlVirtualUnwind
ALIGN 16
se_handler:
push rsi
push rdi
push rbx
push rbp
push r12
push r13
push r14
push r15
pushfq
sub rsp,64
mov rax,QWORD[120+r8]
mov rbx,QWORD[248+r8]
mov rsi,QWORD[8+r9]
mov r11,QWORD[56+r9]
mov r10d,DWORD[r11]
lea r10,[r10*1+rsi]
cmp rbx,r10
jb NEAR $L$common_seh_tail
mov rax,QWORD[152+r8]
mov r10d,DWORD[4+r11]
lea r10,[r10*1+rsi]
cmp rbx,r10
jae NEAR $L$common_seh_tail
lea rax,[((128+24+48))+rax]
lea rbx,[$L$mul_gather4_epilogue]
cmp rbx,r10
jne NEAR $L$se_not_in_mul_gather4
lea rax,[176+rax]
lea rsi,[((-48-168))+rax]
lea rdi,[512+r8]
mov ecx,20
DD 0xa548f3fc
$L$se_not_in_mul_gather4:
mov rbx,QWORD[((-8))+rax]
mov rbp,QWORD[((-16))+rax]
mov r12,QWORD[((-24))+rax]
mov r13,QWORD[((-32))+rax]
mov r14,QWORD[((-40))+rax]
mov r15,QWORD[((-48))+rax]
mov QWORD[144+r8],rbx
mov QWORD[160+r8],rbp
mov QWORD[216+r8],r12
mov QWORD[224+r8],r13
mov QWORD[232+r8],r14
mov QWORD[240+r8],r15
$L$common_seh_tail:
mov rdi,QWORD[8+rax]
mov rsi,QWORD[16+rax]
mov QWORD[152+r8],rax
mov QWORD[168+r8],rsi
mov QWORD[176+r8],rdi
mov rdi,QWORD[40+r9]
mov rsi,r8
mov ecx,154
DD 0xa548f3fc
mov rsi,r9
xor rcx,rcx
mov rdx,QWORD[8+rsi]
mov r8,QWORD[rsi]
mov r9,QWORD[16+rsi]
mov r10,QWORD[40+rsi]
lea r11,[56+rsi]
lea r12,[24+rsi]
mov QWORD[32+rsp],r10
mov QWORD[40+rsp],r11
mov QWORD[48+rsp],r12
mov QWORD[56+rsp],rcx
call QWORD[__imp_RtlVirtualUnwind]
mov eax,1
add rsp,64
popfq
pop r15
pop r14
pop r13
pop r12
pop rbp
pop rbx
pop rdi
pop rsi
DB 0F3h,0C3h ;repret
section .pdata rdata align=4
ALIGN 4
DD $L$SEH_begin_rsaz_512_sqr wrt ..imagebase
DD $L$SEH_end_rsaz_512_sqr wrt ..imagebase
DD $L$SEH_info_rsaz_512_sqr wrt ..imagebase
DD $L$SEH_begin_rsaz_512_mul wrt ..imagebase
DD $L$SEH_end_rsaz_512_mul wrt ..imagebase
DD $L$SEH_info_rsaz_512_mul wrt ..imagebase
DD $L$SEH_begin_rsaz_512_mul_gather4 wrt ..imagebase
DD $L$SEH_end_rsaz_512_mul_gather4 wrt ..imagebase
DD $L$SEH_info_rsaz_512_mul_gather4 wrt ..imagebase
DD $L$SEH_begin_rsaz_512_mul_scatter4 wrt ..imagebase
DD $L$SEH_end_rsaz_512_mul_scatter4 wrt ..imagebase
DD $L$SEH_info_rsaz_512_mul_scatter4 wrt ..imagebase
DD $L$SEH_begin_rsaz_512_mul_by_one wrt ..imagebase
DD $L$SEH_end_rsaz_512_mul_by_one wrt ..imagebase
DD $L$SEH_info_rsaz_512_mul_by_one wrt ..imagebase
DD $L$SEH_begin_rsaz_512_gather4 wrt ..imagebase
DD $L$SEH_end_rsaz_512_gather4 wrt ..imagebase
DD $L$SEH_info_rsaz_512_gather4 wrt ..imagebase
section .xdata rdata align=8
ALIGN 8
$L$SEH_info_rsaz_512_sqr:
DB 9,0,0,0
DD se_handler wrt ..imagebase
DD $L$sqr_body wrt ..imagebase,$L$sqr_epilogue wrt ..imagebase
$L$SEH_info_rsaz_512_mul:
DB 9,0,0,0
DD se_handler wrt ..imagebase
DD $L$mul_body wrt ..imagebase,$L$mul_epilogue wrt ..imagebase
$L$SEH_info_rsaz_512_mul_gather4:
DB 9,0,0,0
DD se_handler wrt ..imagebase
DD $L$mul_gather4_body wrt ..imagebase,$L$mul_gather4_epilogue wrt ..imagebase
$L$SEH_info_rsaz_512_mul_scatter4:
DB 9,0,0,0
DD se_handler wrt ..imagebase
DD $L$mul_scatter4_body wrt ..imagebase,$L$mul_scatter4_epilogue wrt ..imagebase
$L$SEH_info_rsaz_512_mul_by_one:
DB 9,0,0,0
DD se_handler wrt ..imagebase
DD $L$mul_by_one_body wrt ..imagebase,$L$mul_by_one_epilogue wrt ..imagebase
$L$SEH_info_rsaz_512_gather4:
DB 0x01,0x46,0x16,0x00
DB 0x46,0xf8,0x09,0x00
DB 0x3d,0xe8,0x08,0x00
DB 0x34,0xd8,0x07,0x00
DB 0x2e,0xc8,0x06,0x00
DB 0x28,0xb8,0x05,0x00
DB 0x22,0xa8,0x04,0x00
DB 0x1c,0x98,0x03,0x00
DB 0x16,0x88,0x02,0x00
DB 0x10,0x78,0x01,0x00
DB 0x0b,0x68,0x00,0x00
DB 0x07,0x01,0x15,0x00