OpenCores
URL https://opencores.org/ocsvn/openrisc/openrisc/trunk

Subversion Repositories openrisc

[/] [openrisc/] [trunk/] [gnu-dev/] [or1k-gcc/] [zlib/] [contrib/] [masmx86/] [gvmat32.asm] - Blame information for rev 867

Go to most recent revision | Details | Compare with Previous | View Log

Line No. Rev Author Line
1 745 jeremybenn
; gvmat32.asm -- Asm portion of the optimized longest_match for 32 bits x86
2
; Copyright (C) 1995-1996 Jean-loup Gailly and Gilles Vollant.
3
; File written by Gilles Vollant, by modifiying the longest_match
4
;  from Jean-loup Gailly in deflate.c
5
;
6
;         http://www.zlib.net
7
;         http://www.winimage.com/zLibDll
8
;         http://www.muppetlabs.com/~breadbox/software/assembly.html
9
;
10
; For Visual C++ 4.x and higher and ML 6.x and higher
11
;   ml.exe is in directory \MASM611C of Win95 DDK
12
;   ml.exe is also distributed in http://www.masm32.com/masmdl.htm
13
;    and in VC++2003 toolkit at http://msdn.microsoft.com/visualc/vctoolkit2003/
14
;
15
; this file contain two implementation of longest_match
16
;
17
;  longest_match_7fff : written 1996 by Gilles Vollant optimized for
18
;            first Pentium. Assume s->w_mask == 0x7fff
19
;  longest_match_686 : written by Brian raiter (1998), optimized for Pentium Pro
20
;
21
;  for using an seembly version of longest_match, you need define ASMV in project
22
;  There is two way in using gvmat32.asm
23
;
24
;  A) Suggested method
25
;    if you want include both longest_match_7fff and longest_match_686
26
;    compile the asm file running
27
;           ml /coff /Zi /Flgvmat32.lst /c gvmat32.asm
28
;    and include gvmat32c.c in your project
29
;    if you have an old cpu (386,486 or first Pentium) and s->w_mask==0x7fff,
30
;        longest_match_7fff will be used
31
;    if you have a more modern CPU (Pentium Pro, II and higher)
32
;        longest_match_686 will be used
33
;    on old cpu with s->w_mask!=0x7fff, longest_match_686 will be used,
34
;        but this is not a sitation you'll find often
35
;
36
;  B) Alternative
37
;    if you are not interresed in old cpu performance and want the smaller
38
;       binaries possible
39
;
40
;    compile the asm file running
41
;           ml /coff /Zi /c /Flgvmat32.lst /DNOOLDPENTIUMCODE gvmat32.asm
42
;    and do not include gvmat32c.c in your project (ou define also
43
;              NOOLDPENTIUMCODE)
44
;
45
; note : as I known, longest_match_686 is very faster than longest_match_7fff
46
;        on pentium Pro/II/III, faster (but less) in P4, but it seem
47
;        longest_match_7fff can be faster (very very litte) on AMD Athlon64/K8
48
;
49
; see below : zlib1222add must be adjuster if you use a zlib version < 1.2.2.2
50
 
51
;uInt longest_match_7fff(s, cur_match)
52
;    deflate_state *s;
53
;    IPos cur_match;                             /* current match */
54
 
55
    NbStack         equ     76
56
    cur_match       equ     dword ptr[esp+NbStack-0]
57
    str_s           equ     dword ptr[esp+NbStack-4]
58
; 5 dword on top (ret,ebp,esi,edi,ebx)
59
    adrret          equ     dword ptr[esp+NbStack-8]
60
    pushebp         equ     dword ptr[esp+NbStack-12]
61
    pushedi         equ     dword ptr[esp+NbStack-16]
62
    pushesi         equ     dword ptr[esp+NbStack-20]
63
    pushebx         equ     dword ptr[esp+NbStack-24]
64
 
65
    chain_length    equ     dword ptr [esp+NbStack-28]
66
    limit           equ     dword ptr [esp+NbStack-32]
67
    best_len        equ     dword ptr [esp+NbStack-36]
68
    window          equ     dword ptr [esp+NbStack-40]
69
    prev            equ     dword ptr [esp+NbStack-44]
70
    scan_start      equ      word ptr [esp+NbStack-48]
71
    wmask           equ     dword ptr [esp+NbStack-52]
72
    match_start_ptr equ     dword ptr [esp+NbStack-56]
73
    nice_match      equ     dword ptr [esp+NbStack-60]
74
    scan            equ     dword ptr [esp+NbStack-64]
75
 
76
    windowlen       equ     dword ptr [esp+NbStack-68]
77
    match_start     equ     dword ptr [esp+NbStack-72]
78
    strend          equ     dword ptr [esp+NbStack-76]
79
    NbStackAdd      equ     (NbStack-24)
80
 
81
    .386p
82
 
83
    name    gvmatch
84
    .MODEL  FLAT
85
 
86
 
87
 
88
;  all the +zlib1222add offsets are due to the addition of fields
89
;  in zlib in the deflate_state structure since the asm code was first written
90
;  (if you compile with zlib 1.0.4 or older, use "zlib1222add equ (-4)").
91
;  (if you compile with zlib between 1.0.5 and 1.2.2.1, use "zlib1222add equ 0").
92
;  if you compile with zlib 1.2.2.2 or later , use "zlib1222add equ 8").
93
 
94
    zlib1222add         equ     8
95
 
96
;  Note : these value are good with a 8 bytes boundary pack structure
97
    dep_chain_length    equ     74h+zlib1222add
98
    dep_window          equ     30h+zlib1222add
99
    dep_strstart        equ     64h+zlib1222add
100
    dep_prev_length     equ     70h+zlib1222add
101
    dep_nice_match      equ     88h+zlib1222add
102
    dep_w_size          equ     24h+zlib1222add
103
    dep_prev            equ     38h+zlib1222add
104
    dep_w_mask          equ     2ch+zlib1222add
105
    dep_good_match      equ     84h+zlib1222add
106
    dep_match_start     equ     68h+zlib1222add
107
    dep_lookahead       equ     6ch+zlib1222add
108
 
109
 
110
_TEXT                   segment
111
 
112
IFDEF NOUNDERLINE
113
   IFDEF NOOLDPENTIUMCODE
114
            public  longest_match
115
            public  match_init
116
   ELSE
117
            public  longest_match_7fff
118
            public  cpudetect32
119
            public  longest_match_686
120
   ENDIF
121
ELSE
122
   IFDEF NOOLDPENTIUMCODE
123
            public  _longest_match
124
            public  _match_init
125
   ELSE
126
            public  _longest_match_7fff
127
            public  _cpudetect32
128
            public  _longest_match_686
129
   ENDIF
130
ENDIF
131
 
132
    MAX_MATCH           equ     258
133
    MIN_MATCH           equ     3
134
    MIN_LOOKAHEAD       equ     (MAX_MATCH+MIN_MATCH+1)
135
 
136
 
137
 
138
IFNDEF NOOLDPENTIUMCODE
139
IFDEF NOUNDERLINE
140
longest_match_7fff   proc near
141
ELSE
142
_longest_match_7fff  proc near
143
ENDIF
144
 
145
    mov     edx,[esp+4]
146
 
147
 
148
 
149
    push    ebp
150
    push    edi
151
    push    esi
152
    push    ebx
153
 
154
    sub     esp,NbStackAdd
155
 
156
; initialize or check the variables used in match.asm.
157
    mov     ebp,edx
158
 
159
; chain_length = s->max_chain_length
160
; if (prev_length>=good_match) chain_length >>= 2
161
    mov     edx,[ebp+dep_chain_length]
162
    mov     ebx,[ebp+dep_prev_length]
163
    cmp     [ebp+dep_good_match],ebx
164
    ja      noshr
165
    shr     edx,2
166
noshr:
167
; we increment chain_length because in the asm, the --chain_lenght is in the beginning of the loop
168
    inc     edx
169
    mov     edi,[ebp+dep_nice_match]
170
    mov     chain_length,edx
171
    mov     eax,[ebp+dep_lookahead]
172
    cmp     eax,edi
173
; if ((uInt)nice_match > s->lookahead) nice_match = s->lookahead;
174
    jae     nolookaheadnicematch
175
    mov     edi,eax
176
nolookaheadnicematch:
177
; best_len = s->prev_length
178
    mov     best_len,ebx
179
 
180
; window = s->window
181
    mov     esi,[ebp+dep_window]
182
    mov     ecx,[ebp+dep_strstart]
183
    mov     window,esi
184
 
185
    mov     nice_match,edi
186
; scan = window + strstart
187
    add     esi,ecx
188
    mov     scan,esi
189
; dx = *window
190
    mov     dx,word ptr [esi]
191
; bx = *(window+best_len-1)
192
    mov     bx,word ptr [esi+ebx-1]
193
    add     esi,MAX_MATCH-1
194
; scan_start = *scan
195
    mov     scan_start,dx
196
; strend = scan + MAX_MATCH-1
197
    mov     strend,esi
198
; bx = scan_end = *(window+best_len-1)
199
 
200
;    IPos limit = s->strstart > (IPos)MAX_DIST(s) ?
201
;        s->strstart - (IPos)MAX_DIST(s) : NIL;
202
 
203
    mov     esi,[ebp+dep_w_size]
204
    sub     esi,MIN_LOOKAHEAD
205
; here esi = MAX_DIST(s)
206
    sub     ecx,esi
207
    ja      nodist
208
    xor     ecx,ecx
209
nodist:
210
    mov     limit,ecx
211
 
212
; prev = s->prev
213
    mov     edx,[ebp+dep_prev]
214
    mov     prev,edx
215
 
216
;
217
    mov     edx,dword ptr [ebp+dep_match_start]
218
    mov     bp,scan_start
219
    mov     eax,cur_match
220
    mov     match_start,edx
221
 
222
    mov     edx,window
223
    mov     edi,edx
224
    add     edi,best_len
225
    mov     esi,prev
226
    dec     edi
227
; windowlen = window + best_len -1
228
    mov     windowlen,edi
229
 
230
    jmp     beginloop2
231
    align   4
232
 
233
; here, in the loop
234
;       eax = ax = cur_match
235
;       ecx = limit
236
;        bx = scan_end
237
;        bp = scan_start
238
;       edi = windowlen (window + best_len -1)
239
;       esi = prev
240
 
241
 
242
;// here; chain_length <=16
243
normalbeg0add16:
244
    add     chain_length,16
245
    jz      exitloop
246
normalbeg0:
247
    cmp     word ptr[edi+eax],bx
248
    je      normalbeg2noroll
249
rcontlabnoroll:
250
; cur_match = prev[cur_match & wmask]
251
    and     eax,7fffh
252
    mov     ax,word ptr[esi+eax*2]
253
; if cur_match > limit, go to exitloop
254
    cmp     ecx,eax
255
    jnb     exitloop
256
; if --chain_length != 0, go to exitloop
257
    dec     chain_length
258
    jnz     normalbeg0
259
    jmp     exitloop
260
 
261
normalbeg2noroll:
262
; if (scan_start==*(cur_match+window)) goto normalbeg2
263
    cmp     bp,word ptr[edx+eax]
264
    jne     rcontlabnoroll
265
    jmp     normalbeg2
266
 
267
contloop3:
268
    mov     edi,windowlen
269
 
270
; cur_match = prev[cur_match & wmask]
271
    and     eax,7fffh
272
    mov     ax,word ptr[esi+eax*2]
273
; if cur_match > limit, go to exitloop
274
    cmp     ecx,eax
275
jnbexitloopshort1:
276
    jnb     exitloop
277
; if --chain_length != 0, go to exitloop
278
 
279
 
280
; begin the main loop
281
beginloop2:
282
    sub     chain_length,16+1
283
; if chain_length <=16, don't use the unrolled loop
284
    jna     normalbeg0add16
285
 
286
do16:
287
    cmp     word ptr[edi+eax],bx
288
    je      normalbeg2dc0
289
 
290
maccn   MACRO   lab
291
    and     eax,7fffh
292
    mov     ax,word ptr[esi+eax*2]
293
    cmp     ecx,eax
294
    jnb     exitloop
295
    cmp     word ptr[edi+eax],bx
296
    je      lab
297
    ENDM
298
 
299
rcontloop0:
300
    maccn   normalbeg2dc1
301
 
302
rcontloop1:
303
    maccn   normalbeg2dc2
304
 
305
rcontloop2:
306
    maccn   normalbeg2dc3
307
 
308
rcontloop3:
309
    maccn   normalbeg2dc4
310
 
311
rcontloop4:
312
    maccn   normalbeg2dc5
313
 
314
rcontloop5:
315
    maccn   normalbeg2dc6
316
 
317
rcontloop6:
318
    maccn   normalbeg2dc7
319
 
320
rcontloop7:
321
    maccn   normalbeg2dc8
322
 
323
rcontloop8:
324
    maccn   normalbeg2dc9
325
 
326
rcontloop9:
327
    maccn   normalbeg2dc10
328
 
329
rcontloop10:
330
    maccn   short normalbeg2dc11
331
 
332
rcontloop11:
333
    maccn   short normalbeg2dc12
334
 
335
rcontloop12:
336
    maccn   short normalbeg2dc13
337
 
338
rcontloop13:
339
    maccn   short normalbeg2dc14
340
 
341
rcontloop14:
342
    maccn   short normalbeg2dc15
343
 
344
rcontloop15:
345
    and     eax,7fffh
346
    mov     ax,word ptr[esi+eax*2]
347
    cmp     ecx,eax
348
    jnb     exitloop
349
 
350
    sub     chain_length,16
351
    ja      do16
352
    jmp     normalbeg0add16
353
 
354
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
355
 
356
normbeg MACRO   rcontlab,valsub
357
; if we are here, we know that *(match+best_len-1) == scan_end
358
    cmp     bp,word ptr[edx+eax]
359
; if (match != scan_start) goto rcontlab
360
    jne     rcontlab
361
; calculate the good chain_length, and we'll compare scan and match string
362
    add     chain_length,16-valsub
363
    jmp     iseq
364
    ENDM
365
 
366
 
367
normalbeg2dc11:
368
    normbeg rcontloop11,11
369
 
370
normalbeg2dc12:
371
    normbeg short rcontloop12,12
372
 
373
normalbeg2dc13:
374
    normbeg short rcontloop13,13
375
 
376
normalbeg2dc14:
377
    normbeg short rcontloop14,14
378
 
379
normalbeg2dc15:
380
    normbeg short rcontloop15,15
381
 
382
normalbeg2dc10:
383
    normbeg rcontloop10,10
384
 
385
normalbeg2dc9:
386
    normbeg rcontloop9,9
387
 
388
normalbeg2dc8:
389
    normbeg rcontloop8,8
390
 
391
normalbeg2dc7:
392
    normbeg rcontloop7,7
393
 
394
normalbeg2dc6:
395
    normbeg rcontloop6,6
396
 
397
normalbeg2dc5:
398
    normbeg rcontloop5,5
399
 
400
normalbeg2dc4:
401
    normbeg rcontloop4,4
402
 
403
normalbeg2dc3:
404
    normbeg rcontloop3,3
405
 
406
normalbeg2dc2:
407
    normbeg rcontloop2,2
408
 
409
normalbeg2dc1:
410
    normbeg rcontloop1,1
411
 
412
normalbeg2dc0:
413
    normbeg rcontloop0,0
414
 
415
 
416
; we go in normalbeg2 because *(ushf*)(match+best_len-1) == scan_end
417
 
418
normalbeg2:
419
    mov     edi,window
420
 
421
    cmp     bp,word ptr[edi+eax]
422
    jne     contloop3                   ; if *(ushf*)match != scan_start, continue
423
 
424
iseq:
425
; if we are here, we know that *(match+best_len-1) == scan_end
426
; and (match == scan_start)
427
 
428
    mov     edi,edx
429
    mov     esi,scan                    ; esi = scan
430
    add     edi,eax                     ; edi = window + cur_match = match
431
 
432
    mov     edx,[esi+3]                 ; compare manually dword at match+3
433
    xor     edx,[edi+3]                 ; and scan +3
434
 
435
    jz      begincompare                ; if equal, go to long compare
436
 
437
; we will determine the unmatch byte and calculate len (in esi)
438
    or      dl,dl
439
    je      eq1rr
440
    mov     esi,3
441
    jmp     trfinval
442
eq1rr:
443
    or      dx,dx
444
    je      eq1
445
 
446
    mov     esi,4
447
    jmp     trfinval
448
eq1:
449
    and     edx,0ffffffh
450
    jz      eq11
451
    mov     esi,5
452
    jmp     trfinval
453
eq11:
454
    mov     esi,6
455
    jmp     trfinval
456
 
457
begincompare:
458
    ; here we now scan and match begin same
459
    add     edi,6
460
    add     esi,6
461
    mov     ecx,(MAX_MATCH-(2+4))/4     ; scan for at most MAX_MATCH bytes
462
    repe    cmpsd                       ; loop until mismatch
463
 
464
    je      trfin                       ; go to trfin if not unmatch
465
; we determine the unmatch byte
466
    sub     esi,4
467
    mov     edx,[edi-4]
468
    xor     edx,[esi]
469
 
470
    or      dl,dl
471
    jnz     trfin
472
    inc     esi
473
 
474
    or      dx,dx
475
    jnz     trfin
476
    inc     esi
477
 
478
    and     edx,0ffffffh
479
    jnz     trfin
480
    inc     esi
481
 
482
trfin:
483
    sub     esi,scan          ; esi = len
484
trfinval:
485
; here we have finised compare, and esi contain len of equal string
486
    cmp     esi,best_len        ; if len > best_len, go newbestlen
487
    ja      short newbestlen
488
; now we restore edx, ecx and esi, for the big loop
489
    mov     esi,prev
490
    mov     ecx,limit
491
    mov     edx,window
492
    jmp     contloop3
493
 
494
newbestlen:
495
    mov     best_len,esi        ; len become best_len
496
 
497
    mov     match_start,eax     ; save new position as match_start
498
    cmp     esi,nice_match      ; if best_len >= nice_match, exit
499
    jae     exitloop
500
    mov     ecx,scan
501
    mov     edx,window          ; restore edx=window
502
    add     ecx,esi
503
    add     esi,edx
504
 
505
    dec     esi
506
    mov     windowlen,esi       ; windowlen = window + best_len-1
507
    mov     bx,[ecx-1]          ; bx = *(scan+best_len-1) = scan_end
508
 
509
; now we restore ecx and esi, for the big loop :
510
    mov     esi,prev
511
    mov     ecx,limit
512
    jmp     contloop3
513
 
514
exitloop:
515
; exit : s->match_start=match_start
516
    mov     ebx,match_start
517
    mov     ebp,str_s
518
    mov     ecx,best_len
519
    mov     dword ptr [ebp+dep_match_start],ebx
520
    mov     eax,dword ptr [ebp+dep_lookahead]
521
    cmp     ecx,eax
522
    ja      minexlo
523
    mov     eax,ecx
524
minexlo:
525
; return min(best_len,s->lookahead)
526
 
527
; restore stack and register ebx,esi,edi,ebp
528
    add     esp,NbStackAdd
529
 
530
    pop     ebx
531
    pop     esi
532
    pop     edi
533
    pop     ebp
534
    ret
535
InfoAuthor:
536
; please don't remove this string !
537
; Your are free use gvmat32 in any fre or commercial apps if you don't remove the string in the binary!
538
    db     0dh,0ah,"GVMat32 optimised assembly code written 1996-98 by Gilles Vollant",0dh,0ah
539
 
540
 
541
 
542
IFDEF NOUNDERLINE
543
longest_match_7fff   endp
544
ELSE
545
_longest_match_7fff  endp
546
ENDIF
547
 
548
 
549
IFDEF NOUNDERLINE
550
cpudetect32     proc near
551
ELSE
552
_cpudetect32    proc near
553
ENDIF
554
 
555
    push    ebx
556
 
557
    pushfd                  ; push original EFLAGS
558
    pop     eax             ; get original EFLAGS
559
    mov     ecx, eax        ; save original EFLAGS
560
    xor     eax, 40000h     ; flip AC bit in EFLAGS
561
    push    eax             ; save new EFLAGS value on stack
562
    popfd                   ; replace current EFLAGS value
563
    pushfd                  ; get new EFLAGS
564
    pop     eax             ; store new EFLAGS in EAX
565
    xor     eax, ecx        ; can’t toggle AC bit, processor=80386
566
    jz      end_cpu_is_386  ; jump if 80386 processor
567
    push    ecx
568
    popfd                   ; restore AC bit in EFLAGS first
569
 
570
    pushfd
571
    pushfd
572
    pop     ecx
573
 
574
    mov     eax, ecx        ; get original EFLAGS
575
    xor     eax, 200000h    ; flip ID bit in EFLAGS
576
    push    eax             ; save new EFLAGS value on stack
577
    popfd                   ; replace current EFLAGS value
578
    pushfd                  ; get new EFLAGS
579
    pop     eax             ; store new EFLAGS in EAX
580
    popfd                   ; restore original EFLAGS
581
    xor     eax, ecx        ; can’t toggle ID bit,
582
    je      is_old_486      ; processor=old
583
 
584
    mov     eax,1
585
    db      0fh,0a2h        ;CPUID
586
 
587
exitcpudetect:
588
    pop ebx
589
    ret
590
 
591
end_cpu_is_386:
592
    mov     eax,0300h
593
    jmp     exitcpudetect
594
 
595
is_old_486:
596
    mov     eax,0400h
597
    jmp     exitcpudetect
598
 
599
IFDEF NOUNDERLINE
600
cpudetect32     endp
601
ELSE
602
_cpudetect32    endp
603
ENDIF
604
ENDIF
605
 
606
MAX_MATCH       equ     258
607
MIN_MATCH       equ     3
608
MIN_LOOKAHEAD   equ     (MAX_MATCH + MIN_MATCH + 1)
609
MAX_MATCH_8_     equ     ((MAX_MATCH + 7) AND 0FFF0h)
610
 
611
 
612
;;; stack frame offsets
613
 
614
chainlenwmask   equ  esp + 0    ; high word: current chain len
615
                    ; low word: s->wmask
616
window      equ  esp + 4    ; local copy of s->window
617
windowbestlen   equ  esp + 8    ; s->window + bestlen
618
scanstart   equ  esp + 16   ; first two bytes of string
619
scanend     equ  esp + 12   ; last two bytes of string
620
scanalign   equ  esp + 20   ; dword-misalignment of string
621
nicematch   equ  esp + 24   ; a good enough match size
622
bestlen     equ  esp + 28   ; size of best match so far
623
scan        equ  esp + 32   ; ptr to string wanting match
624
 
625
LocalVarsSize   equ 36
626
;   saved ebx   byte esp + 36
627
;   saved edi   byte esp + 40
628
;   saved esi   byte esp + 44
629
;   saved ebp   byte esp + 48
630
;   return address  byte esp + 52
631
deflatestate    equ  esp + 56   ; the function arguments
632
curmatch    equ  esp + 60
633
 
634
;;; Offsets for fields in the deflate_state structure. These numbers
635
;;; are calculated from the definition of deflate_state, with the
636
;;; assumption that the compiler will dword-align the fields. (Thus,
637
;;; changing the definition of deflate_state could easily cause this
638
;;; program to crash horribly, without so much as a warning at
639
;;; compile time. Sigh.)
640
 
641
dsWSize     equ 36+zlib1222add
642
dsWMask     equ 44+zlib1222add
643
dsWindow    equ 48+zlib1222add
644
dsPrev      equ 56+zlib1222add
645
dsMatchLen  equ 88+zlib1222add
646
dsPrevMatch equ 92+zlib1222add
647
dsStrStart  equ 100+zlib1222add
648
dsMatchStart    equ 104+zlib1222add
649
dsLookahead equ 108+zlib1222add
650
dsPrevLen   equ 112+zlib1222add
651
dsMaxChainLen   equ 116+zlib1222add
652
dsGoodMatch equ 132+zlib1222add
653
dsNiceMatch equ 136+zlib1222add
654
 
655
 
656
;;; match.asm -- Pentium-Pro-optimized version of longest_match()
657
;;; Written for zlib 1.1.2
658
;;; Copyright (C) 1998 Brian Raiter 
659
;;; You can look at http://www.muppetlabs.com/~breadbox/software/assembly.html
660
;;;
661
;;; This is free software; you can redistribute it and/or modify it
662
;;; under the terms of the GNU General Public License.
663
 
664
;GLOBAL _longest_match, _match_init
665
 
666
 
667
;SECTION    .text
668
 
669
;;; uInt longest_match(deflate_state *deflatestate, IPos curmatch)
670
 
671
;_longest_match:
672
IFDEF NOOLDPENTIUMCODE
673
    IFDEF NOUNDERLINE
674
    longest_match       proc near
675
    ELSE
676
    _longest_match      proc near
677
    ENDIF
678
ELSE
679
    IFDEF NOUNDERLINE
680
    longest_match_686   proc near
681
    ELSE
682
    _longest_match_686  proc near
683
    ENDIF
684
ENDIF
685
 
686
;;; Save registers that the compiler may be using, and adjust esp to
687
;;; make room for our stack frame.
688
 
689
        push    ebp
690
        push    edi
691
        push    esi
692
        push    ebx
693
        sub esp, LocalVarsSize
694
 
695
;;; Retrieve the function arguments. ecx will hold cur_match
696
;;; throughout the entire function. edx will hold the pointer to the
697
;;; deflate_state structure during the function's setup (before
698
;;; entering the main loop.
699
 
700
        mov edx, [deflatestate]
701
        mov ecx, [curmatch]
702
 
703
;;; uInt wmask = s->w_mask;
704
;;; unsigned chain_length = s->max_chain_length;
705
;;; if (s->prev_length >= s->good_match) {
706
;;;     chain_length >>= 2;
707
;;; }
708
 
709
        mov eax, [edx + dsPrevLen]
710
        mov ebx, [edx + dsGoodMatch]
711
        cmp eax, ebx
712
        mov eax, [edx + dsWMask]
713
        mov ebx, [edx + dsMaxChainLen]
714
        jl  LastMatchGood
715
        shr ebx, 2
716
LastMatchGood:
717
 
718
;;; chainlen is decremented once beforehand so that the function can
719
;;; use the sign flag instead of the zero flag for the exit test.
720
;;; It is then shifted into the high word, to make room for the wmask
721
;;; value, which it will always accompany.
722
 
723
        dec ebx
724
        shl ebx, 16
725
        or  ebx, eax
726
        mov [chainlenwmask], ebx
727
 
728
;;; if ((uInt)nice_match > s->lookahead) nice_match = s->lookahead;
729
 
730
        mov eax, [edx + dsNiceMatch]
731
        mov ebx, [edx + dsLookahead]
732
        cmp ebx, eax
733
        jl  LookaheadLess
734
        mov ebx, eax
735
LookaheadLess:  mov [nicematch], ebx
736
 
737
;;; register Bytef *scan = s->window + s->strstart;
738
 
739
        mov esi, [edx + dsWindow]
740
        mov [window], esi
741
        mov ebp, [edx + dsStrStart]
742
        lea edi, [esi + ebp]
743
        mov [scan], edi
744
 
745
;;; Determine how many bytes the scan ptr is off from being
746
;;; dword-aligned.
747
 
748
        mov eax, edi
749
        neg eax
750
        and eax, 3
751
        mov [scanalign], eax
752
 
753
;;; IPos limit = s->strstart > (IPos)MAX_DIST(s) ?
754
;;;     s->strstart - (IPos)MAX_DIST(s) : NIL;
755
 
756
        mov eax, [edx + dsWSize]
757
        sub eax, MIN_LOOKAHEAD
758
        sub ebp, eax
759
        jg  LimitPositive
760
        xor ebp, ebp
761
LimitPositive:
762
 
763
;;; int best_len = s->prev_length;
764
 
765
        mov eax, [edx + dsPrevLen]
766
        mov [bestlen], eax
767
 
768
;;; Store the sum of s->window + best_len in esi locally, and in esi.
769
 
770
        add esi, eax
771
        mov [windowbestlen], esi
772
 
773
;;; register ush scan_start = *(ushf*)scan;
774
;;; register ush scan_end   = *(ushf*)(scan+best_len-1);
775
;;; Posf *prev = s->prev;
776
 
777
        movzx   ebx, word ptr [edi]
778
        mov [scanstart], ebx
779
        movzx   ebx, word ptr [edi + eax - 1]
780
        mov [scanend], ebx
781
        mov edi, [edx + dsPrev]
782
 
783
;;; Jump into the main loop.
784
 
785
        mov edx, [chainlenwmask]
786
        jmp short LoopEntry
787
 
788
align 4
789
 
790
;;; do {
791
;;;     match = s->window + cur_match;
792
;;;     if (*(ushf*)(match+best_len-1) != scan_end ||
793
;;;         *(ushf*)match != scan_start) continue;
794
;;;     [...]
795
;;; } while ((cur_match = prev[cur_match & wmask]) > limit
796
;;;          && --chain_length != 0);
797
;;;
798
;;; Here is the inner loop of the function. The function will spend the
799
;;; majority of its time in this loop, and majority of that time will
800
;;; be spent in the first ten instructions.
801
;;;
802
;;; Within this loop:
803
;;; ebx = scanend
804
;;; ecx = curmatch
805
;;; edx = chainlenwmask - i.e., ((chainlen << 16) | wmask)
806
;;; esi = windowbestlen - i.e., (window + bestlen)
807
;;; edi = prev
808
;;; ebp = limit
809
 
810
LookupLoop:
811
        and ecx, edx
812
        movzx   ecx, word ptr [edi + ecx*2]
813
        cmp ecx, ebp
814
        jbe LeaveNow
815
        sub edx, 00010000h
816
        js  LeaveNow
817
LoopEntry:  movzx   eax, word ptr [esi + ecx - 1]
818
        cmp eax, ebx
819
        jnz LookupLoop
820
        mov eax, [window]
821
        movzx   eax, word ptr [eax + ecx]
822
        cmp eax, [scanstart]
823
        jnz LookupLoop
824
 
825
;;; Store the current value of chainlen.
826
 
827
        mov [chainlenwmask], edx
828
 
829
;;; Point edi to the string under scrutiny, and esi to the string we
830
;;; are hoping to match it up with. In actuality, esi and edi are
831
;;; both pointed (MAX_MATCH_8 - scanalign) bytes ahead, and edx is
832
;;; initialized to -(MAX_MATCH_8 - scanalign).
833
 
834
        mov esi, [window]
835
        mov edi, [scan]
836
        add esi, ecx
837
        mov eax, [scanalign]
838
        mov edx, 0fffffef8h; -(MAX_MATCH_8)
839
        lea edi, [edi + eax + 0108h] ;MAX_MATCH_8]
840
        lea esi, [esi + eax + 0108h] ;MAX_MATCH_8]
841
 
842
;;; Test the strings for equality, 8 bytes at a time. At the end,
843
;;; adjust edx so that it is offset to the exact byte that mismatched.
844
;;;
845
;;; We already know at this point that the first three bytes of the
846
;;; strings match each other, and they can be safely passed over before
847
;;; starting the compare loop. So what this code does is skip over 0-3
848
;;; bytes, as much as necessary in order to dword-align the edi
849
;;; pointer. (esi will still be misaligned three times out of four.)
850
;;;
851
;;; It should be confessed that this loop usually does not represent
852
;;; much of the total running time. Replacing it with a more
853
;;; straightforward "rep cmpsb" would not drastically degrade
854
;;; performance.
855
 
856
LoopCmps:
857
        mov eax, [esi + edx]
858
        xor eax, [edi + edx]
859
        jnz LeaveLoopCmps
860
        mov eax, [esi + edx + 4]
861
        xor eax, [edi + edx + 4]
862
        jnz LeaveLoopCmps4
863
        add edx, 8
864
        jnz LoopCmps
865
        jmp short LenMaximum
866
LeaveLoopCmps4: add edx, 4
867
LeaveLoopCmps:  test    eax, 0000FFFFh
868
        jnz LenLower
869
        add edx,  2
870
        shr eax, 16
871
LenLower:   sub al, 1
872
        adc edx, 0
873
 
874
;;; Calculate the length of the match. If it is longer than MAX_MATCH,
875
;;; then automatically accept it as the best possible match and leave.
876
 
877
        lea eax, [edi + edx]
878
        mov edi, [scan]
879
        sub eax, edi
880
        cmp eax, MAX_MATCH
881
        jge LenMaximum
882
 
883
;;; If the length of the match is not longer than the best match we
884
;;; have so far, then forget it and return to the lookup loop.
885
 
886
        mov edx, [deflatestate]
887
        mov ebx, [bestlen]
888
        cmp eax, ebx
889
        jg  LongerMatch
890
        mov esi, [windowbestlen]
891
        mov edi, [edx + dsPrev]
892
        mov ebx, [scanend]
893
        mov edx, [chainlenwmask]
894
        jmp LookupLoop
895
 
896
;;;         s->match_start = cur_match;
897
;;;         best_len = len;
898
;;;         if (len >= nice_match) break;
899
;;;         scan_end = *(ushf*)(scan+best_len-1);
900
 
901
LongerMatch:    mov ebx, [nicematch]
902
        mov [bestlen], eax
903
        mov [edx + dsMatchStart], ecx
904
        cmp eax, ebx
905
        jge LeaveNow
906
        mov esi, [window]
907
        add esi, eax
908
        mov [windowbestlen], esi
909
        movzx   ebx, word ptr [edi + eax - 1]
910
        mov edi, [edx + dsPrev]
911
        mov [scanend], ebx
912
        mov edx, [chainlenwmask]
913
        jmp LookupLoop
914
 
915
;;; Accept the current string, with the maximum possible length.
916
 
917
LenMaximum: mov edx, [deflatestate]
918
        mov dword ptr [bestlen], MAX_MATCH
919
        mov [edx + dsMatchStart], ecx
920
 
921
;;; if ((uInt)best_len <= s->lookahead) return (uInt)best_len;
922
;;; return s->lookahead;
923
 
924
LeaveNow:
925
        mov edx, [deflatestate]
926
        mov ebx, [bestlen]
927
        mov eax, [edx + dsLookahead]
928
        cmp ebx, eax
929
        jg  LookaheadRet
930
        mov eax, ebx
931
LookaheadRet:
932
 
933
;;; Restore the stack and return from whence we came.
934
 
935
        add esp, LocalVarsSize
936
        pop ebx
937
        pop esi
938
        pop edi
939
        pop ebp
940
 
941
        ret
942
; please don't remove this string !
943
; Your can freely use gvmat32 in any free or commercial app if you don't remove the string in the binary!
944
    db     0dh,0ah,"asm686 with masm, optimised assembly code from Brian Raiter, written 1998",0dh,0ah
945
 
946
 
947
IFDEF NOOLDPENTIUMCODE
948
    IFDEF NOUNDERLINE
949
    longest_match       endp
950
    ELSE
951
    _longest_match      endp
952
    ENDIF
953
 
954
    IFDEF NOUNDERLINE
955
    match_init      proc near
956
                    ret
957
    match_init      endp
958
    ELSE
959
    _match_init     proc near
960
                    ret
961
    _match_init     endp
962
    ENDIF
963
ELSE
964
    IFDEF NOUNDERLINE
965
    longest_match_686   endp
966
    ELSE
967
    _longest_match_686  endp
968
    ENDIF
969
ENDIF
970
 
971
_TEXT   ends
972
end

powered by: WebSVN 2.1.0

© copyright 1999-2024 OpenCores.org, equivalent to Oliscience, all rights reserved. OpenCores®, registered trademark.