OpenCores
URL https://opencores.org/ocsvn/openrisc/openrisc/trunk

Subversion Repositories openrisc

[/] [openrisc/] [trunk/] [gnu-dev/] [or1k-gcc/] [zlib/] [contrib/] [masm686/] [match.asm] - Blame information for rev 856

Go to most recent revision | Details | Compare with Previous | View Log

Line No. Rev Author Line
1 745 jeremybenn
 
2
; match.asm -- Pentium-Pro optimized version of longest_match()
3
;
4
; Updated for zlib 1.1.3 and converted to MASM 6.1x
5
; Copyright (C) 2000 Dan Higdon 
6
;                    and Chuck Walbourn 
7
; Corrections by Cosmin Truta 
8
;
9
; This is free software; you can redistribute it and/or modify it
10
; under the terms of the GNU General Public License.
11
 
12
; Based on match.S
13
; Written for zlib 1.1.2
14
; Copyright (C) 1998 Brian Raiter 
15
;
16
; Modified by Gilles Vollant (2005) for add gzhead and gzindex
17
 
18
        .686P
19
        .MODEL  FLAT
20
 
21
;===========================================================================
22
; EQUATES
23
;===========================================================================
24
 
25
MAX_MATCH       EQU 258
26
MIN_MATCH       EQU 3
27
MIN_LOOKAHEAD   EQU (MAX_MATCH + MIN_MATCH + 1)
28
MAX_MATCH_8     EQU ((MAX_MATCH + 7) AND (NOT 7))
29
 
30
;===========================================================================
31
; STRUCTURES
32
;===========================================================================
33
 
34
; This STRUCT assumes a 4-byte alignment
35
 
36
DEFLATE_STATE   STRUCT
37
ds_strm                 dd ?
38
ds_status               dd ?
39
ds_pending_buf          dd ?
40
ds_pending_buf_size     dd ?
41
ds_pending_out          dd ?
42
ds_pending              dd ?
43
ds_wrap                 dd ?
44
; gzhead and gzindex are added in zlib 1.2.2.2 (see deflate.h)
45
ds_gzhead               dd ?
46
ds_gzindex              dd ?
47
ds_data_type            db ?
48
ds_method               db ?
49
                        db ?    ; padding
50
                        db ?    ; padding
51
ds_last_flush           dd ?
52
ds_w_size               dd ?    ; used
53
ds_w_bits               dd ?
54
ds_w_mask               dd ?    ; used
55
ds_window               dd ?    ; used
56
ds_window_size          dd ?
57
ds_prev                 dd ?    ; used
58
ds_head                 dd ?
59
ds_ins_h                dd ?
60
ds_hash_size            dd ?
61
ds_hash_bits            dd ?
62
ds_hash_mask            dd ?
63
ds_hash_shift           dd ?
64
ds_block_start          dd ?
65
ds_match_length         dd ?    ; used
66
ds_prev_match           dd ?    ; used
67
ds_match_available      dd ?
68
ds_strstart             dd ?    ; used
69
ds_match_start          dd ?    ; used
70
ds_lookahead            dd ?    ; used
71
ds_prev_length          dd ?    ; used
72
ds_max_chain_length     dd ?    ; used
73
ds_max_laxy_match       dd ?
74
ds_level                dd ?
75
ds_strategy             dd ?
76
ds_good_match           dd ?    ; used
77
ds_nice_match           dd ?    ; used
78
 
79
; Don't need anymore of the struct for match
80
DEFLATE_STATE   ENDS
81
 
82
;===========================================================================
83
; CODE
84
;===========================================================================
85
_TEXT   SEGMENT
86
 
87
;---------------------------------------------------------------------------
88
; match_init
89
;---------------------------------------------------------------------------
90
        ALIGN   4
91
PUBLIC  _match_init
92
_match_init     PROC
93
        ; no initialization needed
94
        ret
95
_match_init     ENDP
96
 
97
;---------------------------------------------------------------------------
98
; uInt longest_match(deflate_state *deflatestate, IPos curmatch)
99
;---------------------------------------------------------------------------
100
        ALIGN   4
101
 
102
PUBLIC  _longest_match
103
_longest_match  PROC
104
 
105
; Since this code uses EBP for a scratch register, the stack frame must
106
; be manually constructed and referenced relative to the ESP register.
107
 
108
; Stack image
109
; Variables
110
chainlenwmask   =  0    ; high word: current chain len
111
                        ; low word: s->wmask
112
window          =  4    ; local copy of s->window
113
windowbestlen   =  8    ; s->window + bestlen
114
scanend         = 12    ; last two bytes of string
115
scanstart       = 16    ; first two bytes of string
116
scanalign       = 20    ; dword-misalignment of string
117
nicematch       = 24    ; a good enough match size
118
bestlen         = 28    ; size of best match so far
119
scan            = 32    ; ptr to string wanting match
120
varsize         = 36    ; number of bytes (also offset to last saved register)
121
 
122
; Saved Registers (actually pushed into place)
123
ebx_save        = 36
124
edi_save        = 40
125
esi_save        = 44
126
ebp_save        = 48
127
 
128
; Parameters
129
retaddr         = 52
130
deflatestate    = 56
131
curmatch        = 60
132
 
133
; Save registers that the compiler may be using
134
        push    ebp
135
        push    edi
136
        push    esi
137
        push    ebx
138
 
139
; Allocate local variable space
140
        sub     esp,varsize
141
 
142
; Retrieve the function arguments. ecx will hold cur_match
143
; throughout the entire function. edx will hold the pointer to the
144
; deflate_state structure during the function's setup (before
145
; entering the main loop).
146
 
147
        mov     edx, [esp+deflatestate]
148
ASSUME  edx:PTR DEFLATE_STATE
149
 
150
        mov     ecx, [esp+curmatch]
151
 
152
; uInt wmask = s->w_mask;
153
; unsigned chain_length = s->max_chain_length;
154
; if (s->prev_length >= s->good_match) {
155
;     chain_length >>= 2;
156
; }
157
 
158
        mov     eax, [edx].ds_prev_length
159
        mov     ebx, [edx].ds_good_match
160
        cmp     eax, ebx
161
        mov     eax, [edx].ds_w_mask
162
        mov     ebx, [edx].ds_max_chain_length
163
        jl      SHORT LastMatchGood
164
        shr     ebx, 2
165
LastMatchGood:
166
 
167
; chainlen is decremented once beforehand so that the function can
168
; use the sign flag instead of the zero flag for the exit test.
169
; It is then shifted into the high word, to make room for the wmask
170
; value, which it will always accompany.
171
 
172
        dec     ebx
173
        shl     ebx, 16
174
        or      ebx, eax
175
        mov     [esp+chainlenwmask], ebx
176
 
177
; if ((uInt)nice_match > s->lookahead) nice_match = s->lookahead;
178
 
179
        mov     eax, [edx].ds_nice_match
180
        mov     ebx, [edx].ds_lookahead
181
        cmp     ebx, eax
182
        jl      SHORT LookaheadLess
183
        mov     ebx, eax
184
LookaheadLess:
185
        mov     [esp+nicematch], ebx
186
 
187
;/* register Bytef *scan = s->window + s->strstart;                     */
188
 
189
        mov     esi, [edx].ds_window
190
        mov     [esp+window], esi
191
        mov     ebp, [edx].ds_strstart
192
        lea     edi, [esi+ebp]
193
        mov     [esp+scan],edi
194
 
195
;/* Determine how many bytes the scan ptr is off from being             */
196
;/* dword-aligned.                                                      */
197
 
198
        mov     eax, edi
199
        neg     eax
200
        and     eax, 3
201
        mov     [esp+scanalign], eax
202
 
203
;/* IPos limit = s->strstart > (IPos)MAX_DIST(s) ?                      */
204
;/*     s->strstart - (IPos)MAX_DIST(s) : NIL;                          */
205
 
206
        mov     eax, [edx].ds_w_size
207
        sub     eax, MIN_LOOKAHEAD
208
        sub     ebp, eax
209
        jg      SHORT LimitPositive
210
        xor     ebp, ebp
211
LimitPositive:
212
 
213
;/* int best_len = s->prev_length;                                      */
214
 
215
        mov     eax, [edx].ds_prev_length
216
        mov     [esp+bestlen], eax
217
 
218
;/* Store the sum of s->window + best_len in %esi locally, and in %esi. */
219
 
220
        add     esi, eax
221
        mov     [esp+windowbestlen], esi
222
 
223
;/* register ush scan_start = *(ushf*)scan;                             */
224
;/* register ush scan_end   = *(ushf*)(scan+best_len-1);                */
225
;/* Posf *prev = s->prev;                                               */
226
 
227
        movzx   ebx, WORD PTR[edi]
228
        mov     [esp+scanstart], ebx
229
        movzx   ebx, WORD PTR[eax+edi-1]
230
        mov     [esp+scanend], ebx
231
        mov     edi, [edx].ds_prev
232
 
233
;/* Jump into the main loop.                                            */
234
 
235
        mov     edx, [esp+chainlenwmask]
236
        jmp     SHORT LoopEntry
237
 
238
;/* do {
239
; *     match = s->window + cur_match;
240
; *     if (*(ushf*)(match+best_len-1) != scan_end ||
241
; *         *(ushf*)match != scan_start) continue;
242
; *     [...]
243
; * } while ((cur_match = prev[cur_match & wmask]) > limit
244
; *          && --chain_length != 0);
245
; *
246
; * Here is the inner loop of the function. The function will spend the
247
; * majority of its time in this loop, and majority of that time will
248
; * be spent in the first ten instructions.
249
; *
250
; * Within this loop:
251
; * %ebx = scanend
252
; * %ecx = curmatch
253
; * %edx = chainlenwmask - i.e., ((chainlen << 16) | wmask)
254
; * %esi = windowbestlen - i.e., (window + bestlen)
255
; * %edi = prev
256
; * %ebp = limit
257
; */
258
 
259
        ALIGN   4
260
LookupLoop:
261
        and     ecx, edx
262
        movzx   ecx, WORD PTR[edi+ecx*2]
263
        cmp     ecx, ebp
264
        jbe     LeaveNow
265
        sub     edx, 000010000H
266
        js      LeaveNow
267
 
268
LoopEntry:
269
        movzx   eax, WORD PTR[esi+ecx-1]
270
        cmp     eax, ebx
271
        jnz     SHORT LookupLoop
272
 
273
        mov     eax, [esp+window]
274
        movzx   eax, WORD PTR[eax+ecx]
275
        cmp     eax, [esp+scanstart]
276
        jnz     SHORT LookupLoop
277
 
278
;/* Store the current value of chainlen.                                */
279
 
280
        mov     [esp+chainlenwmask], edx
281
 
282
;/* Point %edi to the string under scrutiny, and %esi to the string we  */
283
;/* are hoping to match it up with. In actuality, %esi and %edi are     */
284
;/* both pointed (MAX_MATCH_8 - scanalign) bytes ahead, and %edx is     */
285
;/* initialized to -(MAX_MATCH_8 - scanalign).                          */
286
 
287
        mov     esi, [esp+window]
288
        mov     edi, [esp+scan]
289
        add     esi, ecx
290
        mov     eax, [esp+scanalign]
291
        mov     edx, -MAX_MATCH_8
292
        lea     edi, [edi+eax+MAX_MATCH_8]
293
        lea     esi, [esi+eax+MAX_MATCH_8]
294
 
295
;/* Test the strings for equality, 8 bytes at a time. At the end,
296
; * adjust %edx so that it is offset to the exact byte that mismatched.
297
; *
298
; * We already know at this point that the first three bytes of the
299
; * strings match each other, and they can be safely passed over before
300
; * starting the compare loop. So what this code does is skip over 0-3
301
; * bytes, as much as necessary in order to dword-align the %edi
302
; * pointer. (%esi will still be misaligned three times out of four.)
303
; *
304
; * It should be confessed that this loop usually does not represent
305
; * much of the total running time. Replacing it with a more
306
; * straightforward "rep cmpsb" would not drastically degrade
307
; * performance.
308
; */
309
 
310
LoopCmps:
311
        mov     eax, DWORD PTR[esi+edx]
312
        xor     eax, DWORD PTR[edi+edx]
313
        jnz     SHORT LeaveLoopCmps
314
 
315
        mov     eax, DWORD PTR[esi+edx+4]
316
        xor     eax, DWORD PTR[edi+edx+4]
317
        jnz     SHORT LeaveLoopCmps4
318
 
319
        add     edx, 8
320
        jnz     SHORT LoopCmps
321
        jmp     LenMaximum
322
        ALIGN   4
323
 
324
LeaveLoopCmps4:
325
        add     edx, 4
326
 
327
LeaveLoopCmps:
328
        test    eax, 00000FFFFH
329
        jnz     SHORT LenLower
330
 
331
        add     edx, 2
332
        shr     eax, 16
333
 
334
LenLower:
335
        sub     al, 1
336
        adc     edx, 0
337
 
338
;/* Calculate the length of the match. If it is longer than MAX_MATCH,  */
339
;/* then automatically accept it as the best possible match and leave.  */
340
 
341
        lea     eax, [edi+edx]
342
        mov     edi, [esp+scan]
343
        sub     eax, edi
344
        cmp     eax, MAX_MATCH
345
        jge     SHORT LenMaximum
346
 
347
;/* If the length of the match is not longer than the best match we     */
348
;/* have so far, then forget it and return to the lookup loop.          */
349
 
350
        mov     edx, [esp+deflatestate]
351
        mov     ebx, [esp+bestlen]
352
        cmp     eax, ebx
353
        jg      SHORT LongerMatch
354
        mov     esi, [esp+windowbestlen]
355
        mov     edi, [edx].ds_prev
356
        mov     ebx, [esp+scanend]
357
        mov     edx, [esp+chainlenwmask]
358
        jmp     LookupLoop
359
        ALIGN   4
360
 
361
;/*         s->match_start = cur_match;                                 */
362
;/*         best_len = len;                                             */
363
;/*         if (len >= nice_match) break;                               */
364
;/*         scan_end = *(ushf*)(scan+best_len-1);                       */
365
 
366
LongerMatch:
367
        mov     ebx, [esp+nicematch]
368
        mov     [esp+bestlen], eax
369
        mov     [edx].ds_match_start, ecx
370
        cmp     eax, ebx
371
        jge     SHORT LeaveNow
372
        mov     esi, [esp+window]
373
        add     esi, eax
374
        mov     [esp+windowbestlen], esi
375
        movzx   ebx, WORD PTR[edi+eax-1]
376
        mov     edi, [edx].ds_prev
377
        mov     [esp+scanend], ebx
378
        mov     edx, [esp+chainlenwmask]
379
        jmp     LookupLoop
380
        ALIGN   4
381
 
382
;/* Accept the current string, with the maximum possible length.        */
383
 
384
LenMaximum:
385
        mov     edx, [esp+deflatestate]
386
        mov     DWORD PTR[esp+bestlen], MAX_MATCH
387
        mov     [edx].ds_match_start, ecx
388
 
389
;/* if ((uInt)best_len <= s->lookahead) return (uInt)best_len;          */
390
;/* return s->lookahead;                                                */
391
 
392
LeaveNow:
393
        mov     edx, [esp+deflatestate]
394
        mov     ebx, [esp+bestlen]
395
        mov     eax, [edx].ds_lookahead
396
        cmp     ebx, eax
397
        jg      SHORT LookaheadRet
398
        mov     eax, ebx
399
LookaheadRet:
400
 
401
; Restore the stack and return from whence we came.
402
 
403
        add     esp, varsize
404
        pop     ebx
405
        pop     esi
406
        pop     edi
407
        pop     ebp
408
        ret
409
 
410
_longest_match  ENDP
411
 
412
_TEXT   ENDS
413
END

powered by: WebSVN 2.1.0

© copyright 1999-2025 OpenCores.org, equivalent to Oliscience, all rights reserved. OpenCores®, registered trademark.