mirror of
https://github.com/CloverHackyColor/CloverBootloader.git
synced 2024-12-02 13:03:28 +01:00
346 lines
9.2 KiB
NASM
346 lines
9.2 KiB
NASM
%ifidn __OUTPUT_FORMAT__,obj
|
|
section code use32 class=code align=64
|
|
%elifidn __OUTPUT_FORMAT__,win32
|
|
$@feat.00 equ 1
|
|
section .text code align=64
|
|
%else
|
|
section .text code
|
|
%endif
|
|
;extern _OPENSSL_ia32cap_P
|
|
align 16
|
|
__mul_1x1_mmx:
|
|
sub esp,36
|
|
mov ecx,eax
|
|
lea edx,[eax*1+eax]
|
|
and ecx,1073741823
|
|
lea ebp,[edx*1+edx]
|
|
mov DWORD [esp],0
|
|
and edx,2147483647
|
|
movd mm2,eax
|
|
movd mm3,ebx
|
|
mov DWORD [4+esp],ecx
|
|
xor ecx,edx
|
|
pxor mm5,mm5
|
|
pxor mm4,mm4
|
|
mov DWORD [8+esp],edx
|
|
xor edx,ebp
|
|
mov DWORD [12+esp],ecx
|
|
pcmpgtd mm5,mm2
|
|
paddd mm2,mm2
|
|
xor ecx,edx
|
|
mov DWORD [16+esp],ebp
|
|
xor ebp,edx
|
|
pand mm5,mm3
|
|
pcmpgtd mm4,mm2
|
|
mov DWORD [20+esp],ecx
|
|
xor ebp,ecx
|
|
psllq mm5,31
|
|
pand mm4,mm3
|
|
mov DWORD [24+esp],edx
|
|
mov esi,7
|
|
mov DWORD [28+esp],ebp
|
|
mov ebp,esi
|
|
and esi,ebx
|
|
shr ebx,3
|
|
mov edi,ebp
|
|
psllq mm4,30
|
|
and edi,ebx
|
|
shr ebx,3
|
|
movd mm0,DWORD [esi*4+esp]
|
|
mov esi,ebp
|
|
and esi,ebx
|
|
shr ebx,3
|
|
movd mm2,DWORD [edi*4+esp]
|
|
mov edi,ebp
|
|
psllq mm2,3
|
|
and edi,ebx
|
|
shr ebx,3
|
|
pxor mm0,mm2
|
|
movd mm1,DWORD [esi*4+esp]
|
|
mov esi,ebp
|
|
psllq mm1,6
|
|
and esi,ebx
|
|
shr ebx,3
|
|
pxor mm0,mm1
|
|
movd mm2,DWORD [edi*4+esp]
|
|
mov edi,ebp
|
|
psllq mm2,9
|
|
and edi,ebx
|
|
shr ebx,3
|
|
pxor mm0,mm2
|
|
movd mm1,DWORD [esi*4+esp]
|
|
mov esi,ebp
|
|
psllq mm1,12
|
|
and esi,ebx
|
|
shr ebx,3
|
|
pxor mm0,mm1
|
|
movd mm2,DWORD [edi*4+esp]
|
|
mov edi,ebp
|
|
psllq mm2,15
|
|
and edi,ebx
|
|
shr ebx,3
|
|
pxor mm0,mm2
|
|
movd mm1,DWORD [esi*4+esp]
|
|
mov esi,ebp
|
|
psllq mm1,18
|
|
and esi,ebx
|
|
shr ebx,3
|
|
pxor mm0,mm1
|
|
movd mm2,DWORD [edi*4+esp]
|
|
mov edi,ebp
|
|
psllq mm2,21
|
|
and edi,ebx
|
|
shr ebx,3
|
|
pxor mm0,mm2
|
|
movd mm1,DWORD [esi*4+esp]
|
|
mov esi,ebp
|
|
psllq mm1,24
|
|
and esi,ebx
|
|
shr ebx,3
|
|
pxor mm0,mm1
|
|
movd mm2,DWORD [edi*4+esp]
|
|
pxor mm0,mm4
|
|
psllq mm2,27
|
|
pxor mm0,mm2
|
|
movd mm1,DWORD [esi*4+esp]
|
|
pxor mm0,mm5
|
|
psllq mm1,30
|
|
add esp,36
|
|
pxor mm0,mm1
|
|
ret
|
|
align 16
|
|
__mul_1x1_ialu:
|
|
sub esp,36
|
|
mov ecx,eax
|
|
lea edx,[eax*1+eax]
|
|
lea ebp,[eax*4]
|
|
and ecx,1073741823
|
|
lea edi,[eax*1+eax]
|
|
sar eax,31
|
|
mov DWORD [esp],0
|
|
and edx,2147483647
|
|
mov DWORD [4+esp],ecx
|
|
xor ecx,edx
|
|
mov DWORD [8+esp],edx
|
|
xor edx,ebp
|
|
mov DWORD [12+esp],ecx
|
|
xor ecx,edx
|
|
mov DWORD [16+esp],ebp
|
|
xor ebp,edx
|
|
mov DWORD [20+esp],ecx
|
|
xor ebp,ecx
|
|
sar edi,31
|
|
and eax,ebx
|
|
mov DWORD [24+esp],edx
|
|
and edi,ebx
|
|
mov DWORD [28+esp],ebp
|
|
mov edx,eax
|
|
shl eax,31
|
|
mov ecx,edi
|
|
shr edx,1
|
|
mov esi,7
|
|
shl edi,30
|
|
and esi,ebx
|
|
shr ecx,2
|
|
xor eax,edi
|
|
shr ebx,3
|
|
mov edi,7
|
|
and edi,ebx
|
|
shr ebx,3
|
|
xor edx,ecx
|
|
xor eax,DWORD [esi*4+esp]
|
|
mov esi,7
|
|
and esi,ebx
|
|
shr ebx,3
|
|
mov ebp,DWORD [edi*4+esp]
|
|
mov edi,7
|
|
mov ecx,ebp
|
|
shl ebp,3
|
|
and edi,ebx
|
|
shr ecx,29
|
|
xor eax,ebp
|
|
shr ebx,3
|
|
xor edx,ecx
|
|
mov ecx,DWORD [esi*4+esp]
|
|
mov esi,7
|
|
mov ebp,ecx
|
|
shl ecx,6
|
|
and esi,ebx
|
|
shr ebp,26
|
|
xor eax,ecx
|
|
shr ebx,3
|
|
xor edx,ebp
|
|
mov ebp,DWORD [edi*4+esp]
|
|
mov edi,7
|
|
mov ecx,ebp
|
|
shl ebp,9
|
|
and edi,ebx
|
|
shr ecx,23
|
|
xor eax,ebp
|
|
shr ebx,3
|
|
xor edx,ecx
|
|
mov ecx,DWORD [esi*4+esp]
|
|
mov esi,7
|
|
mov ebp,ecx
|
|
shl ecx,12
|
|
and esi,ebx
|
|
shr ebp,20
|
|
xor eax,ecx
|
|
shr ebx,3
|
|
xor edx,ebp
|
|
mov ebp,DWORD [edi*4+esp]
|
|
mov edi,7
|
|
mov ecx,ebp
|
|
shl ebp,15
|
|
and edi,ebx
|
|
shr ecx,17
|
|
xor eax,ebp
|
|
shr ebx,3
|
|
xor edx,ecx
|
|
mov ecx,DWORD [esi*4+esp]
|
|
mov esi,7
|
|
mov ebp,ecx
|
|
shl ecx,18
|
|
and esi,ebx
|
|
shr ebp,14
|
|
xor eax,ecx
|
|
shr ebx,3
|
|
xor edx,ebp
|
|
mov ebp,DWORD [edi*4+esp]
|
|
mov edi,7
|
|
mov ecx,ebp
|
|
shl ebp,21
|
|
and edi,ebx
|
|
shr ecx,11
|
|
xor eax,ebp
|
|
shr ebx,3
|
|
xor edx,ecx
|
|
mov ecx,DWORD [esi*4+esp]
|
|
mov esi,7
|
|
mov ebp,ecx
|
|
shl ecx,24
|
|
and esi,ebx
|
|
shr ebp,8
|
|
xor eax,ecx
|
|
shr ebx,3
|
|
xor edx,ebp
|
|
mov ebp,DWORD [edi*4+esp]
|
|
mov ecx,ebp
|
|
shl ebp,27
|
|
mov edi,DWORD [esi*4+esp]
|
|
shr ecx,5
|
|
mov esi,edi
|
|
xor eax,ebp
|
|
shl edi,30
|
|
xor edx,ecx
|
|
shr esi,2
|
|
xor eax,edi
|
|
xor edx,esi
|
|
add esp,36
|
|
ret
|
|
global _bn_GF2m_mul_2x2
|
|
align 16
|
|
_bn_GF2m_mul_2x2:
|
|
L$_bn_GF2m_mul_2x2_begin:
|
|
lea edx,[_OPENSSL_ia32cap_P]
|
|
mov eax,DWORD [edx]
|
|
mov edx,DWORD [4+edx]
|
|
test eax,8388608
|
|
jz NEAR L$000ialu
|
|
test eax,16777216
|
|
jz NEAR L$001mmx
|
|
test edx,2
|
|
jz NEAR L$001mmx
|
|
movups xmm0,[8+esp]
|
|
shufps xmm0,xmm0,177
|
|
db 102,15,58,68,192,1
|
|
mov eax,DWORD [4+esp]
|
|
movups [eax],xmm0
|
|
ret
|
|
align 16
|
|
L$001mmx:
|
|
push ebp
|
|
push ebx
|
|
push esi
|
|
push edi
|
|
mov eax,DWORD [24+esp]
|
|
mov ebx,DWORD [32+esp]
|
|
call __mul_1x1_mmx
|
|
movq mm7,mm0
|
|
mov eax,DWORD [28+esp]
|
|
mov ebx,DWORD [36+esp]
|
|
call __mul_1x1_mmx
|
|
movq mm6,mm0
|
|
mov eax,DWORD [24+esp]
|
|
mov ebx,DWORD [32+esp]
|
|
xor eax,DWORD [28+esp]
|
|
xor ebx,DWORD [36+esp]
|
|
call __mul_1x1_mmx
|
|
pxor mm0,mm7
|
|
mov eax,DWORD [20+esp]
|
|
pxor mm0,mm6
|
|
movq mm2,mm0
|
|
psllq mm0,32
|
|
pop edi
|
|
psrlq mm2,32
|
|
pop esi
|
|
pxor mm0,mm6
|
|
pop ebx
|
|
pxor mm2,mm7
|
|
movq [eax],mm0
|
|
pop ebp
|
|
movq [8+eax],mm2
|
|
emms
|
|
ret
|
|
align 16
|
|
L$000ialu:
|
|
push ebp
|
|
push ebx
|
|
push esi
|
|
push edi
|
|
sub esp,20
|
|
mov eax,DWORD [44+esp]
|
|
mov ebx,DWORD [52+esp]
|
|
call __mul_1x1_ialu
|
|
mov DWORD [8+esp],eax
|
|
mov DWORD [12+esp],edx
|
|
mov eax,DWORD [48+esp]
|
|
mov ebx,DWORD [56+esp]
|
|
call __mul_1x1_ialu
|
|
mov DWORD [esp],eax
|
|
mov DWORD [4+esp],edx
|
|
mov eax,DWORD [44+esp]
|
|
mov ebx,DWORD [52+esp]
|
|
xor eax,DWORD [48+esp]
|
|
xor ebx,DWORD [56+esp]
|
|
call __mul_1x1_ialu
|
|
mov ebp,DWORD [40+esp]
|
|
mov ebx,DWORD [esp]
|
|
mov ecx,DWORD [4+esp]
|
|
mov edi,DWORD [8+esp]
|
|
mov esi,DWORD [12+esp]
|
|
xor eax,edx
|
|
xor edx,ecx
|
|
xor eax,ebx
|
|
mov DWORD [ebp],ebx
|
|
xor edx,edi
|
|
mov DWORD [12+ebp],esi
|
|
xor eax,esi
|
|
add esp,20
|
|
xor edx,esi
|
|
pop edi
|
|
xor eax,edx
|
|
pop esi
|
|
mov DWORD [8+ebp],edx
|
|
pop ebx
|
|
mov DWORD [4+ebp],eax
|
|
pop ebp
|
|
ret
|
|
db 71,70,40,50,94,109,41,32,77,117,108,116,105,112,108,105
|
|
db 99,97,116,105,111,110,32,102,111,114,32,120,56,54,44,32
|
|
db 67,82,89,80,84,79,71,65,77,83,32,98,121,32,60,97
|
|
db 112,112,114,111,64,111,112,101,110,115,115,108,46,111,114,103
|
|
db 62,0
|
|
segment .bss
|
|
common _OPENSSL_ia32cap_P 16
|