URL
https://opencores.org/ocsvn/openrisc/openrisc/trunk
Subversion Repositories openrisc
[/] [openrisc/] [trunk/] [gnu-dev/] [or1k-gcc/] [zlib/] [contrib/] [masmx86/] [inffas32.asm] - Rev 745
Compare with Previous | Blame | View Log
;/* inffas32.asm is a hand tuned assembler version of inffast.c -- fast decoding; *; * inffas32.asm is derivated from inffas86.c, with translation of assembly code; *; * Copyright (C) 1995-2003 Mark Adler; * For conditions of distribution and use, see copyright notice in zlib.h; *; * Copyright (C) 2003 Chris Anderson <christop@charm.net>; * Please use the copyright conditions above.; *; * Mar-13-2003 -- Most of this is derived from inffast.S which is derived from; * the gcc -S output of zlib-1.2.0/inffast.c. Zlib-1.2.0 is in beta release at; * the moment. I have successfully compiled and tested this code with gcc2.96,; * gcc3.2, icc5.0, msvc6.0. It is very close to the speed of inffast.S; * compiled with gcc -DNO_MMX, but inffast.S is still faster on the P3 with MMX; * enabled. I will attempt to merge the MMX code into this version. Newer; * versions of this and inffast.S can be found at; * http://www.eetbeetee.com/zlib/ and http://www.charm.net/~christop/zlib/; *; * 2005 : modification by Gilles Vollant; */; For Visual C++ 4.x and higher and ML 6.x and higher; ml.exe is in directory \MASM611C of Win95 DDK; ml.exe is also distributed in http://www.masm32.com/masmdl.htm; and in VC++2003 toolkit at http://msdn.microsoft.com/visualc/vctoolkit2003/;;; compile with command line option; ml /coff /Zi /c /Flinffas32.lst inffas32.asm; if you define NO_GZIP (see inflate.h), compile with; ml /coff /Zi /c /Flinffas32.lst /DNO_GUNZIP inffas32.asm; zlib122sup is 0 fort zlib 1.2.2.1 and lower; zlib122sup is 8 fort zlib 1.2.2.2 and more (with addition of dmax and head; in inflate_state in inflate.h)zlib1222sup equ 8IFDEF GUNZIPINFLATE_MODE_TYPE equ 11INFLATE_MODE_BAD equ 26ELSEIFNDEF NO_GUNZIPINFLATE_MODE_TYPE equ 11INFLATE_MODE_BAD equ 26ELSEINFLATE_MODE_TYPE equ 3INFLATE_MODE_BAD equ 17ENDIFENDIF; 75 "inffast.S";FILE "inffast.S";;;GLOBAL _inflate_fast;;;SECTION .text.586p.mmxname inflate_fast_x86.MODEL FLAT_DATA segmentinflate_fast_use_mmx:dd 1_TEXT segmentPUBLIC _inflate_fastALIGN 4_inflate_fast:jmp inflate_fast_entryALIGN 4db 'Fast decoding Code from Chris Anderson'db 0ALIGN 4invalid_literal_length_code_msg:db 'invalid literal/length code'db 0ALIGN 4invalid_distance_code_msg:db 'invalid distance code'db 0ALIGN 4invalid_distance_too_far_msg:db 'invalid distance too far back'db 0ALIGN 4inflate_fast_mask:dd 0dd 1dd 3dd 7dd 15dd 31dd 63dd 127dd 255dd 511dd 1023dd 2047dd 4095dd 8191dd 16383dd 32767dd 65535dd 131071dd 262143dd 524287dd 1048575dd 2097151dd 4194303dd 8388607dd 16777215dd 33554431dd 67108863dd 134217727dd 268435455dd 536870911dd 1073741823dd 2147483647dd 4294967295mode_state equ 0 ;/* state->mode */wsize_state equ (32+zlib1222sup) ;/* state->wsize */write_state equ (36+4+zlib1222sup) ;/* state->write */window_state equ (40+4+zlib1222sup) ;/* state->window */hold_state equ (44+4+zlib1222sup) ;/* state->hold */bits_state equ (48+4+zlib1222sup) ;/* state->bits */lencode_state equ (64+4+zlib1222sup) ;/* state->lencode */distcode_state equ (68+4+zlib1222sup) ;/* state->distcode */lenbits_state equ (72+4+zlib1222sup) ;/* state->lenbits */distbits_state equ (76+4+zlib1222sup) ;/* state->distbits */;;SECTION .text; 205 "inffast.S";GLOBAL inflate_fast_use_mmx;SECTION .data; GLOBAL inflate_fast_use_mmx:object;.size inflate_fast_use_mmx, 4; 226 "inffast.S";SECTION .textALIGN 4inflate_fast_entry:push edipush esipush ebppush ebxpushfdsub esp,64cldmov esi, [esp+88]mov edi, [esi+28]mov edx, [esi+4]mov eax, [esi+0]add edx,eaxsub edx,11mov [esp+44],eaxmov [esp+20],edxmov ebp, [esp+92]mov ecx, [esi+16]mov ebx, [esi+12]sub ebp,ecxneg ebpadd ebp,ebxsub ecx,257add ecx,ebxmov [esp+60],ebxmov [esp+40],ebpmov [esp+16],ecx; 285 "inffast.S"mov eax, [edi+lencode_state]mov ecx, [edi+distcode_state]mov [esp+8],eaxmov [esp+12],ecxmov eax,1mov ecx, [edi+lenbits_state]shl eax,cldec eaxmov [esp+0],eaxmov eax,1mov ecx, [edi+distbits_state]shl eax,cldec eaxmov [esp+4],eaxmov eax, [edi+wsize_state]mov ecx, [edi+write_state]mov edx, [edi+window_state]mov [esp+52],eaxmov [esp+48],ecxmov [esp+56],edxmov ebp, [edi+hold_state]mov ebx, [edi+bits_state]; 321 "inffast.S"mov esi, [esp+44]mov ecx, [esp+20]cmp ecx,esija L_align_longadd ecx,11sub ecx,esimov eax,12sub eax,ecxlea edi, [esp+28]rep movsbmov ecx,eaxxor eax,eaxrep stosblea esi, [esp+28]mov [esp+20],esijmp L_is_alignedL_align_long:test esi,3jz L_is_alignedxor eax,eaxmov al, [esi]inc esimov ecx,ebxadd ebx,8shl eax,clor ebp,eaxjmp L_align_longL_is_aligned:mov edi, [esp+60]; 366 "inffast.S"L_check_mmx:cmp dword ptr [inflate_fast_use_mmx],2je L_init_mmxja L_do_looppush eaxpush ebxpush ecxpush edxpushfdmov eax, [esp]xor dword ptr [esp],0200000hpopfdpushfdpop edxxor edx,eaxjz L_dont_use_mmxxor eax,eaxcpuidcmp ebx,0756e6547hjne L_dont_use_mmxcmp ecx,06c65746ehjne L_dont_use_mmxcmp edx,049656e69hjne L_dont_use_mmxmov eax,1cpuidshr eax,8and eax,15cmp eax,6jne L_dont_use_mmxtest edx,0800000hjnz L_use_mmxjmp L_dont_use_mmxL_use_mmx:mov dword ptr [inflate_fast_use_mmx],2jmp L_check_mmx_popL_dont_use_mmx:mov dword ptr [inflate_fast_use_mmx],3L_check_mmx_pop:pop edxpop ecxpop ebxpop eaxjmp L_check_mmx; 426 "inffast.S"ALIGN 4L_do_loop:; 437 "inffast.S"cmp bl,15ja L_get_length_codexor eax,eaxlodswmov cl,bladd bl,16shl eax,clor ebp,eaxL_get_length_code:mov edx, [esp+0]mov ecx, [esp+8]and edx,ebpmov eax, [ecx+edx*4]L_dolen:mov cl,ahsub bl,ahshr ebp,cltest al,aljnz L_test_for_length_baseshr eax,16stosbL_while_test:cmp [esp+16],edijbe L_break_loopcmp [esp+20],esija L_do_loopjmp L_break_loopL_test_for_length_base:; 502 "inffast.S"mov edx,eaxshr edx,16mov cl,altest al,16jz L_test_for_second_level_lengthand cl,15jz L_save_lencmp bl,cljae L_add_bits_to_lenmov ch,clxor eax,eaxlodswmov cl,bladd bl,16shl eax,clor ebp,eaxmov cl,chL_add_bits_to_len:mov eax,1shl eax,cldec eaxsub bl,cland eax,ebpshr ebp,cladd edx,eaxL_save_len:mov [esp+24],edxL_decode_distance:; 549 "inffast.S"cmp bl,15ja L_get_distance_codexor eax,eaxlodswmov cl,bladd bl,16shl eax,clor ebp,eaxL_get_distance_code:mov edx, [esp+4]mov ecx, [esp+12]and edx,ebpmov eax, [ecx+edx*4]L_dodist:mov edx,eaxshr edx,16mov cl,ahsub bl,ahshr ebp,cl; 584 "inffast.S"mov cl,altest al,16jz L_test_for_second_level_distand cl,15jz L_check_dist_onecmp bl,cljae L_add_bits_to_distmov ch,clxor eax,eaxlodswmov cl,bladd bl,16shl eax,clor ebp,eaxmov cl,chL_add_bits_to_dist:mov eax,1shl eax,cldec eaxsub bl,cland eax,ebpshr ebp,cladd edx,eaxjmp L_check_windowL_check_window:; 625 "inffast.S"mov [esp+44],esimov eax,edisub eax, [esp+40]cmp eax,edxjb L_clip_windowmov ecx, [esp+24]mov esi,edisub esi,edxsub ecx,3mov al, [esi]mov [edi],almov al, [esi+1]mov dl, [esi+2]add esi,3mov [edi+1],almov [edi+2],dladd edi,3rep movsbmov esi, [esp+44]jmp L_while_testALIGN 4L_check_dist_one:cmp edx,1jne L_check_windowcmp [esp+40],edije L_check_windowdec edimov ecx, [esp+24]mov al, [edi]sub ecx,3mov [edi+1],almov [edi+2],almov [edi+3],aladd edi,4rep stosbjmp L_while_testALIGN 4L_test_for_second_level_length:test al,64jnz L_test_for_end_of_blockmov eax,1shl eax,cldec eaxand eax,ebpadd eax,edxmov edx, [esp+8]mov eax, [edx+eax*4]jmp L_dolenALIGN 4L_test_for_second_level_dist:test al,64jnz L_invalid_distance_codemov eax,1shl eax,cldec eaxand eax,ebpadd eax,edxmov edx, [esp+12]mov eax, [edx+eax*4]jmp L_dodistALIGN 4L_clip_window:; 721 "inffast.S"mov ecx,eaxmov eax, [esp+52]neg ecxmov esi, [esp+56]cmp eax,edxjb L_invalid_distance_too_faradd ecx,edxcmp dword ptr [esp+48],0jne L_wrap_around_windowsub eax,ecxadd esi,eax; 749 "inffast.S"mov eax, [esp+24]cmp eax,ecxjbe L_do_copy1sub eax,ecxrep movsbmov esi,edisub esi,edxjmp L_do_copy1cmp eax,ecxjbe L_do_copy1sub eax,ecxrep movsbmov esi,edisub esi,edxjmp L_do_copy1L_wrap_around_window:; 793 "inffast.S"mov eax, [esp+48]cmp ecx,eaxjbe L_contiguous_in_windowadd esi, [esp+52]add esi,eaxsub esi,ecxsub ecx,eaxmov eax, [esp+24]cmp eax,ecxjbe L_do_copy1sub eax,ecxrep movsbmov esi, [esp+56]mov ecx, [esp+48]cmp eax,ecxjbe L_do_copy1sub eax,ecxrep movsbmov esi,edisub esi,edxjmp L_do_copy1L_contiguous_in_window:; 836 "inffast.S"add esi,eaxsub esi,ecxmov eax, [esp+24]cmp eax,ecxjbe L_do_copy1sub eax,ecxrep movsbmov esi,edisub esi,edxL_do_copy1:; 862 "inffast.S"mov ecx,eaxrep movsbmov esi, [esp+44]jmp L_while_test; 878 "inffast.S"ALIGN 4L_init_mmx:emmsmovd mm0,ebpmov ebp,ebx; 896 "inffast.S"movd mm4,[esp+0]movq mm3,mm4movd mm5,[esp+4]movq mm2,mm5pxor mm1,mm1mov ebx, [esp+8]jmp L_do_loop_mmxALIGN 4L_do_loop_mmx:psrlq mm0,mm1cmp ebp,32ja L_get_length_code_mmxmovd mm6,ebpmovd mm7,[esi]add esi,4psllq mm7,mm6add ebp,32por mm0,mm7L_get_length_code_mmx:pand mm4,mm0movd eax,mm4movq mm4,mm3mov eax, [ebx+eax*4]L_dolen_mmx:movzx ecx,ahmovd mm1,ecxsub ebp,ecxtest al,aljnz L_test_for_length_base_mmxshr eax,16stosbL_while_test_mmx:cmp [esp+16],edijbe L_break_loopcmp [esp+20],esija L_do_loop_mmxjmp L_break_loopL_test_for_length_base_mmx:mov edx,eaxshr edx,16test al,16jz L_test_for_second_level_length_mmxand eax,15jz L_decode_distance_mmxpsrlq mm0,mm1movd mm1,eaxmovd ecx,mm0sub ebp,eaxand ecx, [inflate_fast_mask+eax*4]add edx,ecxL_decode_distance_mmx:psrlq mm0,mm1cmp ebp,32ja L_get_dist_code_mmxmovd mm6,ebpmovd mm7,[esi]add esi,4psllq mm7,mm6add ebp,32por mm0,mm7L_get_dist_code_mmx:mov ebx, [esp+12]pand mm5,mm0movd eax,mm5movq mm5,mm2mov eax, [ebx+eax*4]L_dodist_mmx:movzx ecx,ahmov ebx,eaxshr ebx,16sub ebp,ecxmovd mm1,ecxtest al,16jz L_test_for_second_level_dist_mmxand eax,15jz L_check_dist_one_mmxL_add_bits_to_dist_mmx:psrlq mm0,mm1movd mm1,eaxmovd ecx,mm0sub ebp,eaxand ecx, [inflate_fast_mask+eax*4]add ebx,ecxL_check_window_mmx:mov [esp+44],esimov eax,edisub eax, [esp+40]cmp eax,ebxjb L_clip_window_mmxmov ecx,edxmov esi,edisub esi,ebxsub ecx,3mov al, [esi]mov [edi],almov al, [esi+1]mov dl, [esi+2]add esi,3mov [edi+1],almov [edi+2],dladd edi,3rep movsbmov esi, [esp+44]mov ebx, [esp+8]jmp L_while_test_mmxALIGN 4L_check_dist_one_mmx:cmp ebx,1jne L_check_window_mmxcmp [esp+40],edije L_check_window_mmxdec edimov ecx,edxmov al, [edi]sub ecx,3mov [edi+1],almov [edi+2],almov [edi+3],aladd edi,4rep stosbmov ebx, [esp+8]jmp L_while_test_mmxALIGN 4L_test_for_second_level_length_mmx:test al,64jnz L_test_for_end_of_blockand eax,15psrlq mm0,mm1movd ecx,mm0and ecx, [inflate_fast_mask+eax*4]add ecx,edxmov eax, [ebx+ecx*4]jmp L_dolen_mmxALIGN 4L_test_for_second_level_dist_mmx:test al,64jnz L_invalid_distance_codeand eax,15psrlq mm0,mm1movd ecx,mm0and ecx, [inflate_fast_mask+eax*4]mov eax, [esp+12]add ecx,ebxmov eax, [eax+ecx*4]jmp L_dodist_mmxALIGN 4L_clip_window_mmx:mov ecx,eaxmov eax, [esp+52]neg ecxmov esi, [esp+56]cmp eax,ebxjb L_invalid_distance_too_faradd ecx,ebxcmp dword ptr [esp+48],0jne L_wrap_around_window_mmxsub eax,ecxadd esi,eaxcmp edx,ecxjbe L_do_copy1_mmxsub edx,ecxrep movsbmov esi,edisub esi,ebxjmp L_do_copy1_mmxcmp edx,ecxjbe L_do_copy1_mmxsub edx,ecxrep movsbmov esi,edisub esi,ebxjmp L_do_copy1_mmxL_wrap_around_window_mmx:mov eax, [esp+48]cmp ecx,eaxjbe L_contiguous_in_window_mmxadd esi, [esp+52]add esi,eaxsub esi,ecxsub ecx,eaxcmp edx,ecxjbe L_do_copy1_mmxsub edx,ecxrep movsbmov esi, [esp+56]mov ecx, [esp+48]cmp edx,ecxjbe L_do_copy1_mmxsub edx,ecxrep movsbmov esi,edisub esi,ebxjmp L_do_copy1_mmxL_contiguous_in_window_mmx:add esi,eaxsub esi,ecxcmp edx,ecxjbe L_do_copy1_mmxsub edx,ecxrep movsbmov esi,edisub esi,ebxL_do_copy1_mmx:mov ecx,edxrep movsbmov esi, [esp+44]mov ebx, [esp+8]jmp L_while_test_mmx; 1174 "inffast.S"L_invalid_distance_code:mov ecx, invalid_distance_code_msgmov edx,INFLATE_MODE_BADjmp L_update_stream_stateL_test_for_end_of_block:test al,32jz L_invalid_literal_length_codemov ecx,0mov edx,INFLATE_MODE_TYPEjmp L_update_stream_stateL_invalid_literal_length_code:mov ecx, invalid_literal_length_code_msgmov edx,INFLATE_MODE_BADjmp L_update_stream_stateL_invalid_distance_too_far:mov esi, [esp+44]mov ecx, invalid_distance_too_far_msgmov edx,INFLATE_MODE_BADjmp L_update_stream_stateL_update_stream_state:mov eax, [esp+88]test ecx,ecxjz L_skip_msgmov [eax+24],ecxL_skip_msg:mov eax, [eax+28]mov [eax+mode_state],edxjmp L_break_loopALIGN 4L_break_loop:; 1243 "inffast.S"cmp dword ptr [inflate_fast_use_mmx],2jne L_update_next_inmov ebx,ebpL_update_next_in:; 1266 "inffast.S"mov eax, [esp+88]mov ecx,ebxmov edx, [eax+28]shr ecx,3sub esi,ecxshl ecx,3sub ebx,ecxmov [eax+12],edimov [edx+bits_state],ebxmov ecx,ebxlea ebx, [esp+28]cmp [esp+20],ebxjne L_buf_not_usedsub esi,ebxmov ebx, [eax+0]mov [esp+20],ebxadd esi,ebxmov ebx, [eax+4]sub ebx,11add [esp+20],ebxL_buf_not_used:mov [eax+0],esimov ebx,1shl ebx,cldec ebxcmp dword ptr [inflate_fast_use_mmx],2jne L_update_holdpsrlq mm0,mm1movd ebp,mm0emmsL_update_hold:and ebp,ebxmov [edx+hold_state],ebpmov ebx, [esp+20]cmp ebx,esijbe L_last_is_smallersub ebx,esiadd ebx,11mov [eax+4],ebxjmp L_fixup_outL_last_is_smaller:sub esi,ebxneg esiadd esi,11mov [eax+4],esiL_fixup_out:mov ebx, [esp+16]cmp ebx,edijbe L_end_is_smallersub ebx,ediadd ebx,257mov [eax+16],ebxjmp L_doneL_end_is_smaller:sub edi,ebxneg ediadd edi,257mov [eax+16],ediL_done:add esp,64popfdpop ebxpop ebppop esipop ediret_TEXT endsend
