OpenCores
URL https://opencores.org/ocsvn/openrisc/openrisc/trunk

Subversion Repositories openrisc

[/] [openrisc/] [trunk/] [gnu-dev/] [or1k-gcc/] [zlib/] [contrib/] [inflate86/] [inffas86.c] - Blame information for rev 867

Go to most recent revision | Details | Compare with Previous | View Log

Line No. Rev Author Line
1 745 jeremybenn
/* inffas86.c is a hand tuned assembler version of
2
 *
3
 * inffast.c -- fast decoding
4
 * Copyright (C) 1995-2003 Mark Adler
5
 * For conditions of distribution and use, see copyright notice in zlib.h
6
 *
7
 * Copyright (C) 2003 Chris Anderson <christop@charm.net>
8
 * Please use the copyright conditions above.
9
 *
10
 * Dec-29-2003 -- I added AMD64 inflate asm support.  This version is also
11
 * slightly quicker on x86 systems because, instead of using rep movsb to copy
12
 * data, it uses rep movsw, which moves data in 2-byte chunks instead of single
13
 * bytes.  I've tested the AMD64 code on a Fedora Core 1 + the x86_64 updates
14
 * from http://fedora.linux.duke.edu/fc1_x86_64
15
 * which is running on an Athlon 64 3000+ / Gigabyte GA-K8VT800M system with
16
 * 1GB ram.  The 64-bit version is about 4% faster than the 32-bit version,
17
 * when decompressing mozilla-source-1.3.tar.gz.
18
 *
19
 * Mar-13-2003 -- Most of this is derived from inffast.S which is derived from
20
 * the gcc -S output of zlib-1.2.0/inffast.c.  Zlib-1.2.0 is in beta release at
21
 * the moment.  I have successfully compiled and tested this code with gcc2.96,
22
 * gcc3.2, icc5.0, msvc6.0.  It is very close to the speed of inffast.S
23
 * compiled with gcc -DNO_MMX, but inffast.S is still faster on the P3 with MMX
24
 * enabled.  I will attempt to merge the MMX code into this version.  Newer
25
 * versions of this and inffast.S can be found at
26
 * http://www.eetbeetee.com/zlib/ and http://www.charm.net/~christop/zlib/
27
 */
28
 
29
#include "zutil.h"
30
#include "inftrees.h"
31
#include "inflate.h"
32
#include "inffast.h"
33
 
34
/* Mark Adler's comments from inffast.c: */
35
 
36
/*
37
   Decode literal, length, and distance codes and write out the resulting
38
   literal and match bytes until either not enough input or output is
39
   available, an end-of-block is encountered, or a data error is encountered.
40
   When large enough input and output buffers are supplied to inflate(), for
41
   example, a 16K input buffer and a 64K output buffer, more than 95% of the
42
   inflate execution time is spent in this routine.
43
 
44
   Entry assumptions:
45
 
46
        state->mode == LEN
47
        strm->avail_in >= 6
48
        strm->avail_out >= 258
49
        start >= strm->avail_out
50
        state->bits < 8
51
 
52
   On return, state->mode is one of:
53
 
54
        LEN -- ran out of enough output space or enough available input
55
        TYPE -- reached end of block code, inflate() to interpret next block
56
        BAD -- error in block data
57
 
58
   Notes:
59
 
60
    - The maximum input bits used by a length/distance pair is 15 bits for the
61
      length code, 5 bits for the length extra, 15 bits for the distance code,
62
      and 13 bits for the distance extra.  This totals 48 bits, or six bytes.
63
      Therefore if strm->avail_in >= 6, then there is enough input to avoid
64
      checking for available input while decoding.
65
 
66
    - The maximum bytes that a single length/distance pair can output is 258
67
      bytes, which is the maximum length that can be coded.  inflate_fast()
68
      requires strm->avail_out >= 258 for each loop to avoid checking for
69
      output space.
70
 */
71
void inflate_fast(strm, start)
72
z_streamp strm;
73
unsigned start;         /* inflate()'s starting value for strm->avail_out */
74
{
75
    struct inflate_state FAR *state;
76
    struct inffast_ar {
77
/* 64   32                               x86  x86_64 */
78
/* ar offset                              register */
79
/*  0    0 */ void *esp;                /* esp save */
80
/*  8    4 */ void *ebp;                /* ebp save */
81
/* 16    8 */ unsigned char FAR *in;    /* esi rsi  local strm->next_in */
82
/* 24   12 */ unsigned char FAR *last;  /*     r9   while in < last */
83
/* 32   16 */ unsigned char FAR *out;   /* edi rdi  local strm->next_out */
84
/* 40   20 */ unsigned char FAR *beg;   /*          inflate()'s init next_out */
85
/* 48   24 */ unsigned char FAR *end;   /*     r10  while out < end */
86
/* 56   28 */ unsigned char FAR *window;/*          size of window, wsize!=0 */
87
/* 64   32 */ code const FAR *lcode;    /* ebp rbp  local strm->lencode */
88
/* 72   36 */ code const FAR *dcode;    /*     r11  local strm->distcode */
89
/* 80   40 */ unsigned long hold;       /* edx rdx  local strm->hold */
90
/* 88   44 */ unsigned bits;            /* ebx rbx  local strm->bits */
91
/* 92   48 */ unsigned wsize;           /*          window size */
92
/* 96   52 */ unsigned write;           /*          window write index */
93
/*100   56 */ unsigned lmask;           /*     r12  mask for lcode */
94
/*104   60 */ unsigned dmask;           /*     r13  mask for dcode */
95
/*108   64 */ unsigned len;             /*     r14  match length */
96
/*112   68 */ unsigned dist;            /*     r15  match distance */
97
/*116   72 */ unsigned status;          /*          set when state chng*/
98
    } ar;
99
 
100
#if defined( __GNUC__ ) && defined( __amd64__ ) && ! defined( __i386 )
101
#define PAD_AVAIL_IN 6
102
#define PAD_AVAIL_OUT 258
103
#else
104
#define PAD_AVAIL_IN 5
105
#define PAD_AVAIL_OUT 257
106
#endif
107
 
108
    /* copy state to local variables */
109
    state = (struct inflate_state FAR *)strm->state;
110
    ar.in = strm->next_in;
111
    ar.last = ar.in + (strm->avail_in - PAD_AVAIL_IN);
112
    ar.out = strm->next_out;
113
    ar.beg = ar.out - (start - strm->avail_out);
114
    ar.end = ar.out + (strm->avail_out - PAD_AVAIL_OUT);
115
    ar.wsize = state->wsize;
116
    ar.write = state->write;
117
    ar.window = state->window;
118
    ar.hold = state->hold;
119
    ar.bits = state->bits;
120
    ar.lcode = state->lencode;
121
    ar.dcode = state->distcode;
122
    ar.lmask = (1U << state->lenbits) - 1;
123
    ar.dmask = (1U << state->distbits) - 1;
124
 
125
    /* decode literals and length/distances until end-of-block or not enough
126
       input data or output space */
127
 
128
    /* align in on 1/2 hold size boundary */
129
    while (((unsigned long)(void *)ar.in & (sizeof(ar.hold) / 2 - 1)) != 0) {
130
        ar.hold += (unsigned long)*ar.in++ << ar.bits;
131
        ar.bits += 8;
132
    }
133
 
134
#if defined( __GNUC__ ) && defined( __amd64__ ) && ! defined( __i386 )
135
    __asm__ __volatile__ (
136
"        leaq    %0, %%rax\n"
137
"        movq    %%rbp, 8(%%rax)\n"       /* save regs rbp and rsp */
138
"        movq    %%rsp, (%%rax)\n"
139
"        movq    %%rax, %%rsp\n"          /* make rsp point to &ar */
140
"        movq    16(%%rsp), %%rsi\n"      /* rsi  = in */
141
"        movq    32(%%rsp), %%rdi\n"      /* rdi  = out */
142
"        movq    24(%%rsp), %%r9\n"       /* r9   = last */
143
"        movq    48(%%rsp), %%r10\n"      /* r10  = end */
144
"        movq    64(%%rsp), %%rbp\n"      /* rbp  = lcode */
145
"        movq    72(%%rsp), %%r11\n"      /* r11  = dcode */
146
"        movq    80(%%rsp), %%rdx\n"      /* rdx  = hold */
147
"        movl    88(%%rsp), %%ebx\n"      /* ebx  = bits */
148
"        movl    100(%%rsp), %%r12d\n"    /* r12d = lmask */
149
"        movl    104(%%rsp), %%r13d\n"    /* r13d = dmask */
150
                                          /* r14d = len */
151
                                          /* r15d = dist */
152
"        cld\n"
153
"        cmpq    %%rdi, %%r10\n"
154
"        je      .L_one_time\n"           /* if only one decode left */
155
"        cmpq    %%rsi, %%r9\n"
156
"        je      .L_one_time\n"
157
"        jmp     .L_do_loop\n"
158
 
159
".L_one_time:\n"
160
"        movq    %%r12, %%r8\n"           /* r8 = lmask */
161
"        cmpb    $32, %%bl\n"
162
"        ja      .L_get_length_code_one_time\n"
163
 
164
"        lodsl\n"                         /* eax = *(uint *)in++ */
165
"        movb    %%bl, %%cl\n"            /* cl = bits, needs it for shifting */
166
"        addb    $32, %%bl\n"             /* bits += 32 */
167
"        shlq    %%cl, %%rax\n"
168
"        orq     %%rax, %%rdx\n"          /* hold |= *((uint *)in)++ << bits */
169
"        jmp     .L_get_length_code_one_time\n"
170
 
171
".align 32,0x90\n"
172
".L_while_test:\n"
173
"        cmpq    %%rdi, %%r10\n"
174
"        jbe     .L_break_loop\n"
175
"        cmpq    %%rsi, %%r9\n"
176
"        jbe     .L_break_loop\n"
177
 
178
".L_do_loop:\n"
179
"        movq    %%r12, %%r8\n"           /* r8 = lmask */
180
"        cmpb    $32, %%bl\n"
181
"        ja      .L_get_length_code\n"    /* if (32 < bits) */
182
 
183
"        lodsl\n"                         /* eax = *(uint *)in++ */
184
"        movb    %%bl, %%cl\n"            /* cl = bits, needs it for shifting */
185
"        addb    $32, %%bl\n"             /* bits += 32 */
186
"        shlq    %%cl, %%rax\n"
187
"        orq     %%rax, %%rdx\n"          /* hold |= *((uint *)in)++ << bits */
188
 
189
".L_get_length_code:\n"
190
"        andq    %%rdx, %%r8\n"            /* r8 &= hold */
191
"        movl    (%%rbp,%%r8,4), %%eax\n"  /* eax = lcode[hold & lmask] */
192
 
193
"        movb    %%ah, %%cl\n"            /* cl = this.bits */
194
"        subb    %%ah, %%bl\n"            /* bits -= this.bits */
195
"        shrq    %%cl, %%rdx\n"           /* hold >>= this.bits */
196
 
197
"        testb   %%al, %%al\n"
198
"        jnz     .L_test_for_length_base\n" /* if (op != 0) 45.7% */
199
 
200
"        movq    %%r12, %%r8\n"            /* r8 = lmask */
201
"        shrl    $16, %%eax\n"            /* output this.val char */
202
"        stosb\n"
203
 
204
".L_get_length_code_one_time:\n"
205
"        andq    %%rdx, %%r8\n"            /* r8 &= hold */
206
"        movl    (%%rbp,%%r8,4), %%eax\n" /* eax = lcode[hold & lmask] */
207
 
208
".L_dolen:\n"
209
"        movb    %%ah, %%cl\n"            /* cl = this.bits */
210
"        subb    %%ah, %%bl\n"            /* bits -= this.bits */
211
"        shrq    %%cl, %%rdx\n"           /* hold >>= this.bits */
212
 
213
"        testb   %%al, %%al\n"
214
"        jnz     .L_test_for_length_base\n" /* if (op != 0) 45.7% */
215
 
216
"        shrl    $16, %%eax\n"            /* output this.val char */
217
"        stosb\n"
218
"        jmp     .L_while_test\n"
219
 
220
".align 32,0x90\n"
221
".L_test_for_length_base:\n"
222
"        movl    %%eax, %%r14d\n"         /* len = this */
223
"        shrl    $16, %%r14d\n"           /* len = this.val */
224
"        movb    %%al, %%cl\n"
225
 
226
"        testb   $16, %%al\n"
227
"        jz      .L_test_for_second_level_length\n" /* if ((op & 16) == 0) 8% */
228
"        andb    $15, %%cl\n"             /* op &= 15 */
229
"        jz      .L_decode_distance\n"    /* if (!op) */
230
 
231
".L_add_bits_to_len:\n"
232
"        subb    %%cl, %%bl\n"
233
"        xorl    %%eax, %%eax\n"
234
"        incl    %%eax\n"
235
"        shll    %%cl, %%eax\n"
236
"        decl    %%eax\n"
237
"        andl    %%edx, %%eax\n"          /* eax &= hold */
238
"        shrq    %%cl, %%rdx\n"
239
"        addl    %%eax, %%r14d\n"         /* len += hold & mask[op] */
240
 
241
".L_decode_distance:\n"
242
"        movq    %%r13, %%r8\n"           /* r8 = dmask */
243
"        cmpb    $32, %%bl\n"
244
"        ja      .L_get_distance_code\n"  /* if (32 < bits) */
245
 
246
"        lodsl\n"                         /* eax = *(uint *)in++ */
247
"        movb    %%bl, %%cl\n"            /* cl = bits, needs it for shifting */
248
"        addb    $32, %%bl\n"             /* bits += 32 */
249
"        shlq    %%cl, %%rax\n"
250
"        orq     %%rax, %%rdx\n"          /* hold |= *((uint *)in)++ << bits */
251
 
252
".L_get_distance_code:\n"
253
"        andq    %%rdx, %%r8\n"           /* r8 &= hold */
254
"        movl    (%%r11,%%r8,4), %%eax\n" /* eax = dcode[hold & dmask] */
255
 
256
".L_dodist:\n"
257
"        movl    %%eax, %%r15d\n"         /* dist = this */
258
"        shrl    $16, %%r15d\n"           /* dist = this.val */
259
"        movb    %%ah, %%cl\n"
260
"        subb    %%ah, %%bl\n"            /* bits -= this.bits */
261
"        shrq    %%cl, %%rdx\n"           /* hold >>= this.bits */
262
"        movb    %%al, %%cl\n"            /* cl = this.op */
263
 
264
"        testb   $16, %%al\n"             /* if ((op & 16) == 0) */
265
"        jz      .L_test_for_second_level_dist\n"
266
"        andb    $15, %%cl\n"             /* op &= 15 */
267
"        jz      .L_check_dist_one\n"
268
 
269
".L_add_bits_to_dist:\n"
270
"        subb    %%cl, %%bl\n"
271
"        xorl    %%eax, %%eax\n"
272
"        incl    %%eax\n"
273
"        shll    %%cl, %%eax\n"
274
"        decl    %%eax\n"                 /* (1 << op) - 1 */
275
"        andl    %%edx, %%eax\n"          /* eax &= hold */
276
"        shrq    %%cl, %%rdx\n"
277
"        addl    %%eax, %%r15d\n"         /* dist += hold & ((1 << op) - 1) */
278
 
279
".L_check_window:\n"
280
"        movq    %%rsi, %%r8\n"           /* save in so from can use it's reg */
281
"        movq    %%rdi, %%rax\n"
282
"        subq    40(%%rsp), %%rax\n"      /* nbytes = out - beg */
283
 
284
"        cmpl    %%r15d, %%eax\n"
285
"        jb      .L_clip_window\n"        /* if (dist > nbytes) 4.2% */
286
 
287
"        movl    %%r14d, %%ecx\n"         /* ecx = len */
288
"        movq    %%rdi, %%rsi\n"
289
"        subq    %%r15, %%rsi\n"          /* from = out - dist */
290
 
291
"        sarl    %%ecx\n"
292
"        jnc     .L_copy_two\n"           /* if len % 2 == 0 */
293
 
294
"        rep     movsw\n"
295
"        movb    (%%rsi), %%al\n"
296
"        movb    %%al, (%%rdi)\n"
297
"        incq    %%rdi\n"
298
 
299
"        movq    %%r8, %%rsi\n"           /* move in back to %rsi, toss from */
300
"        jmp     .L_while_test\n"
301
 
302
".L_copy_two:\n"
303
"        rep     movsw\n"
304
"        movq    %%r8, %%rsi\n"           /* move in back to %rsi, toss from */
305
"        jmp     .L_while_test\n"
306
 
307
".align 32,0x90\n"
308
".L_check_dist_one:\n"
309
"        cmpl    $1, %%r15d\n"            /* if dist 1, is a memset */
310
"        jne     .L_check_window\n"
311
"        cmpq    %%rdi, 40(%%rsp)\n"      /* if out == beg, outside window */
312
"        je      .L_check_window\n"
313
 
314
"        movl    %%r14d, %%ecx\n"         /* ecx = len */
315
"        movb    -1(%%rdi), %%al\n"
316
"        movb    %%al, %%ah\n"
317
 
318
"        sarl    %%ecx\n"
319
"        jnc     .L_set_two\n"
320
"        movb    %%al, (%%rdi)\n"
321
"        incq    %%rdi\n"
322
 
323
".L_set_two:\n"
324
"        rep     stosw\n"
325
"        jmp     .L_while_test\n"
326
 
327
".align 32,0x90\n"
328
".L_test_for_second_level_length:\n"
329
"        testb   $64, %%al\n"
330
"        jnz     .L_test_for_end_of_block\n" /* if ((op & 64) != 0) */
331
 
332
"        xorl    %%eax, %%eax\n"
333
"        incl    %%eax\n"
334
"        shll    %%cl, %%eax\n"
335
"        decl    %%eax\n"
336
"        andl    %%edx, %%eax\n"         /* eax &= hold */
337
"        addl    %%r14d, %%eax\n"        /* eax += len */
338
"        movl    (%%rbp,%%rax,4), %%eax\n" /* eax = lcode[val+(hold&mask[op])]*/
339
"        jmp     .L_dolen\n"
340
 
341
".align 32,0x90\n"
342
".L_test_for_second_level_dist:\n"
343
"        testb   $64, %%al\n"
344
"        jnz     .L_invalid_distance_code\n" /* if ((op & 64) != 0) */
345
 
346
"        xorl    %%eax, %%eax\n"
347
"        incl    %%eax\n"
348
"        shll    %%cl, %%eax\n"
349
"        decl    %%eax\n"
350
"        andl    %%edx, %%eax\n"         /* eax &= hold */
351
"        addl    %%r15d, %%eax\n"        /* eax += dist */
352
"        movl    (%%r11,%%rax,4), %%eax\n" /* eax = dcode[val+(hold&mask[op])]*/
353
"        jmp     .L_dodist\n"
354
 
355
".align 32,0x90\n"
356
".L_clip_window:\n"
357
"        movl    %%eax, %%ecx\n"         /* ecx = nbytes */
358
"        movl    92(%%rsp), %%eax\n"     /* eax = wsize, prepare for dist cmp */
359
"        negl    %%ecx\n"                /* nbytes = -nbytes */
360
 
361
"        cmpl    %%r15d, %%eax\n"
362
"        jb      .L_invalid_distance_too_far\n" /* if (dist > wsize) */
363
 
364
"        addl    %%r15d, %%ecx\n"         /* nbytes = dist - nbytes */
365
"        cmpl    $0, 96(%%rsp)\n"
366
"        jne     .L_wrap_around_window\n" /* if (write != 0) */
367
 
368
"        movq    56(%%rsp), %%rsi\n"     /* from  = window */
369
"        subl    %%ecx, %%eax\n"         /* eax  -= nbytes */
370
"        addq    %%rax, %%rsi\n"         /* from += wsize - nbytes */
371
 
372
"        movl    %%r14d, %%eax\n"        /* eax = len */
373
"        cmpl    %%ecx, %%r14d\n"
374
"        jbe     .L_do_copy\n"           /* if (nbytes >= len) */
375
 
376
"        subl    %%ecx, %%eax\n"         /* eax -= nbytes */
377
"        rep     movsb\n"
378
"        movq    %%rdi, %%rsi\n"
379
"        subq    %%r15, %%rsi\n"         /* from = &out[ -dist ] */
380
"        jmp     .L_do_copy\n"
381
 
382
".align 32,0x90\n"
383
".L_wrap_around_window:\n"
384
"        movl    96(%%rsp), %%eax\n"     /* eax = write */
385
"        cmpl    %%eax, %%ecx\n"
386
"        jbe     .L_contiguous_in_window\n" /* if (write >= nbytes) */
387
 
388
"        movl    92(%%rsp), %%esi\n"     /* from  = wsize */
389
"        addq    56(%%rsp), %%rsi\n"     /* from += window */
390
"        addq    %%rax, %%rsi\n"         /* from += write */
391
"        subq    %%rcx, %%rsi\n"         /* from -= nbytes */
392
"        subl    %%eax, %%ecx\n"         /* nbytes -= write */
393
 
394
"        movl    %%r14d, %%eax\n"        /* eax = len */
395
"        cmpl    %%ecx, %%eax\n"
396
"        jbe     .L_do_copy\n"           /* if (nbytes >= len) */
397
 
398
"        subl    %%ecx, %%eax\n"         /* len -= nbytes */
399
"        rep     movsb\n"
400
"        movq    56(%%rsp), %%rsi\n"     /* from = window */
401
"        movl    96(%%rsp), %%ecx\n"     /* nbytes = write */
402
"        cmpl    %%ecx, %%eax\n"
403
"        jbe     .L_do_copy\n"           /* if (nbytes >= len) */
404
 
405
"        subl    %%ecx, %%eax\n"         /* len -= nbytes */
406
"        rep     movsb\n"
407
"        movq    %%rdi, %%rsi\n"
408
"        subq    %%r15, %%rsi\n"         /* from = out - dist */
409
"        jmp     .L_do_copy\n"
410
 
411
".align 32,0x90\n"
412
".L_contiguous_in_window:\n"
413
"        movq    56(%%rsp), %%rsi\n"     /* rsi = window */
414
"        addq    %%rax, %%rsi\n"
415
"        subq    %%rcx, %%rsi\n"         /* from += write - nbytes */
416
 
417
"        movl    %%r14d, %%eax\n"        /* eax = len */
418
"        cmpl    %%ecx, %%eax\n"
419
"        jbe     .L_do_copy\n"           /* if (nbytes >= len) */
420
 
421
"        subl    %%ecx, %%eax\n"         /* len -= nbytes */
422
"        rep     movsb\n"
423
"        movq    %%rdi, %%rsi\n"
424
"        subq    %%r15, %%rsi\n"         /* from = out - dist */
425
"        jmp     .L_do_copy\n"           /* if (nbytes >= len) */
426
 
427
".align 32,0x90\n"
428
".L_do_copy:\n"
429
"        movl    %%eax, %%ecx\n"         /* ecx = len */
430
"        rep     movsb\n"
431
 
432
"        movq    %%r8, %%rsi\n"          /* move in back to %esi, toss from */
433
"        jmp     .L_while_test\n"
434
 
435
".L_test_for_end_of_block:\n"
436
"        testb   $32, %%al\n"
437
"        jz      .L_invalid_literal_length_code\n"
438
"        movl    $1, 116(%%rsp)\n"
439
"        jmp     .L_break_loop_with_status\n"
440
 
441
".L_invalid_literal_length_code:\n"
442
"        movl    $2, 116(%%rsp)\n"
443
"        jmp     .L_break_loop_with_status\n"
444
 
445
".L_invalid_distance_code:\n"
446
"        movl    $3, 116(%%rsp)\n"
447
"        jmp     .L_break_loop_with_status\n"
448
 
449
".L_invalid_distance_too_far:\n"
450
"        movl    $4, 116(%%rsp)\n"
451
"        jmp     .L_break_loop_with_status\n"
452
 
453
".L_break_loop:\n"
454
"        movl    $0, 116(%%rsp)\n"
455
 
456
".L_break_loop_with_status:\n"
457
/* put in, out, bits, and hold back into ar and pop esp */
458
"        movq    %%rsi, 16(%%rsp)\n"     /* in */
459
"        movq    %%rdi, 32(%%rsp)\n"     /* out */
460
"        movl    %%ebx, 88(%%rsp)\n"     /* bits */
461
"        movq    %%rdx, 80(%%rsp)\n"     /* hold */
462
"        movq    (%%rsp), %%rax\n"       /* restore rbp and rsp */
463
"        movq    8(%%rsp), %%rbp\n"
464
"        movq    %%rax, %%rsp\n"
465
          :
466
          : "m" (ar)
467
          : "memory", "%rax", "%rbx", "%rcx", "%rdx", "%rsi", "%rdi",
468
            "%r8", "%r9", "%r10", "%r11", "%r12", "%r13", "%r14", "%r15"
469
    );
470
#elif ( defined( __GNUC__ ) || defined( __ICC ) ) && defined( __i386 )
471
    __asm__ __volatile__ (
472
"        leal    %0, %%eax\n"
473
"        movl    %%esp, (%%eax)\n"        /* save esp, ebp */
474
"        movl    %%ebp, 4(%%eax)\n"
475
"        movl    %%eax, %%esp\n"
476
"        movl    8(%%esp), %%esi\n"       /* esi = in */
477
"        movl    16(%%esp), %%edi\n"      /* edi = out */
478
"        movl    40(%%esp), %%edx\n"      /* edx = hold */
479
"        movl    44(%%esp), %%ebx\n"      /* ebx = bits */
480
"        movl    32(%%esp), %%ebp\n"      /* ebp = lcode */
481
 
482
"        cld\n"
483
"        jmp     .L_do_loop\n"
484
 
485
".align 32,0x90\n"
486
".L_while_test:\n"
487
"        cmpl    %%edi, 24(%%esp)\n"      /* out < end */
488
"        jbe     .L_break_loop\n"
489
"        cmpl    %%esi, 12(%%esp)\n"      /* in < last */
490
"        jbe     .L_break_loop\n"
491
 
492
".L_do_loop:\n"
493
"        cmpb    $15, %%bl\n"
494
"        ja      .L_get_length_code\n"    /* if (15 < bits) */
495
 
496
"        xorl    %%eax, %%eax\n"
497
"        lodsw\n"                         /* al = *(ushort *)in++ */
498
"        movb    %%bl, %%cl\n"            /* cl = bits, needs it for shifting */
499
"        addb    $16, %%bl\n"             /* bits += 16 */
500
"        shll    %%cl, %%eax\n"
501
"        orl     %%eax, %%edx\n"        /* hold |= *((ushort *)in)++ << bits */
502
 
503
".L_get_length_code:\n"
504
"        movl    56(%%esp), %%eax\n"      /* eax = lmask */
505
"        andl    %%edx, %%eax\n"          /* eax &= hold */
506
"        movl    (%%ebp,%%eax,4), %%eax\n" /* eax = lcode[hold & lmask] */
507
 
508
".L_dolen:\n"
509
"        movb    %%ah, %%cl\n"            /* cl = this.bits */
510
"        subb    %%ah, %%bl\n"            /* bits -= this.bits */
511
"        shrl    %%cl, %%edx\n"           /* hold >>= this.bits */
512
 
513
"        testb   %%al, %%al\n"
514
"        jnz     .L_test_for_length_base\n" /* if (op != 0) 45.7% */
515
 
516
"        shrl    $16, %%eax\n"            /* output this.val char */
517
"        stosb\n"
518
"        jmp     .L_while_test\n"
519
 
520
".align 32,0x90\n"
521
".L_test_for_length_base:\n"
522
"        movl    %%eax, %%ecx\n"          /* len = this */
523
"        shrl    $16, %%ecx\n"            /* len = this.val */
524
"        movl    %%ecx, 64(%%esp)\n"      /* save len */
525
"        movb    %%al, %%cl\n"
526
 
527
"        testb   $16, %%al\n"
528
"        jz      .L_test_for_second_level_length\n" /* if ((op & 16) == 0) 8% */
529
"        andb    $15, %%cl\n"             /* op &= 15 */
530
"        jz      .L_decode_distance\n"    /* if (!op) */
531
"        cmpb    %%cl, %%bl\n"
532
"        jae     .L_add_bits_to_len\n"    /* if (op <= bits) */
533
 
534
"        movb    %%cl, %%ch\n"            /* stash op in ch, freeing cl */
535
"        xorl    %%eax, %%eax\n"
536
"        lodsw\n"                         /* al = *(ushort *)in++ */
537
"        movb    %%bl, %%cl\n"            /* cl = bits, needs it for shifting */
538
"        addb    $16, %%bl\n"             /* bits += 16 */
539
"        shll    %%cl, %%eax\n"
540
"        orl     %%eax, %%edx\n"         /* hold |= *((ushort *)in)++ << bits */
541
"        movb    %%ch, %%cl\n"            /* move op back to ecx */
542
 
543
".L_add_bits_to_len:\n"
544
"        subb    %%cl, %%bl\n"
545
"        xorl    %%eax, %%eax\n"
546
"        incl    %%eax\n"
547
"        shll    %%cl, %%eax\n"
548
"        decl    %%eax\n"
549
"        andl    %%edx, %%eax\n"          /* eax &= hold */
550
"        shrl    %%cl, %%edx\n"
551
"        addl    %%eax, 64(%%esp)\n"      /* len += hold & mask[op] */
552
 
553
".L_decode_distance:\n"
554
"        cmpb    $15, %%bl\n"
555
"        ja      .L_get_distance_code\n"  /* if (15 < bits) */
556
 
557
"        xorl    %%eax, %%eax\n"
558
"        lodsw\n"                         /* al = *(ushort *)in++ */
559
"        movb    %%bl, %%cl\n"            /* cl = bits, needs it for shifting */
560
"        addb    $16, %%bl\n"             /* bits += 16 */
561
"        shll    %%cl, %%eax\n"
562
"        orl     %%eax, %%edx\n"         /* hold |= *((ushort *)in)++ << bits */
563
 
564
".L_get_distance_code:\n"
565
"        movl    60(%%esp), %%eax\n"      /* eax = dmask */
566
"        movl    36(%%esp), %%ecx\n"      /* ecx = dcode */
567
"        andl    %%edx, %%eax\n"          /* eax &= hold */
568
"        movl    (%%ecx,%%eax,4), %%eax\n"/* eax = dcode[hold & dmask] */
569
 
570
".L_dodist:\n"
571
"        movl    %%eax, %%ebp\n"          /* dist = this */
572
"        shrl    $16, %%ebp\n"            /* dist = this.val */
573
"        movb    %%ah, %%cl\n"
574
"        subb    %%ah, %%bl\n"            /* bits -= this.bits */
575
"        shrl    %%cl, %%edx\n"           /* hold >>= this.bits */
576
"        movb    %%al, %%cl\n"            /* cl = this.op */
577
 
578
"        testb   $16, %%al\n"             /* if ((op & 16) == 0) */
579
"        jz      .L_test_for_second_level_dist\n"
580
"        andb    $15, %%cl\n"             /* op &= 15 */
581
"        jz      .L_check_dist_one\n"
582
"        cmpb    %%cl, %%bl\n"
583
"        jae     .L_add_bits_to_dist\n"   /* if (op <= bits) 97.6% */
584
 
585
"        movb    %%cl, %%ch\n"            /* stash op in ch, freeing cl */
586
"        xorl    %%eax, %%eax\n"
587
"        lodsw\n"                         /* al = *(ushort *)in++ */
588
"        movb    %%bl, %%cl\n"            /* cl = bits, needs it for shifting */
589
"        addb    $16, %%bl\n"             /* bits += 16 */
590
"        shll    %%cl, %%eax\n"
591
"        orl     %%eax, %%edx\n"        /* hold |= *((ushort *)in)++ << bits */
592
"        movb    %%ch, %%cl\n"            /* move op back to ecx */
593
 
594
".L_add_bits_to_dist:\n"
595
"        subb    %%cl, %%bl\n"
596
"        xorl    %%eax, %%eax\n"
597
"        incl    %%eax\n"
598
"        shll    %%cl, %%eax\n"
599
"        decl    %%eax\n"                 /* (1 << op) - 1 */
600
"        andl    %%edx, %%eax\n"          /* eax &= hold */
601
"        shrl    %%cl, %%edx\n"
602
"        addl    %%eax, %%ebp\n"          /* dist += hold & ((1 << op) - 1) */
603
 
604
".L_check_window:\n"
605
"        movl    %%esi, 8(%%esp)\n"       /* save in so from can use it's reg */
606
"        movl    %%edi, %%eax\n"
607
"        subl    20(%%esp), %%eax\n"      /* nbytes = out - beg */
608
 
609
"        cmpl    %%ebp, %%eax\n"
610
"        jb      .L_clip_window\n"        /* if (dist > nbytes) 4.2% */
611
 
612
"        movl    64(%%esp), %%ecx\n"      /* ecx = len */
613
"        movl    %%edi, %%esi\n"
614
"        subl    %%ebp, %%esi\n"          /* from = out - dist */
615
 
616
"        sarl    %%ecx\n"
617
"        jnc     .L_copy_two\n"           /* if len % 2 == 0 */
618
 
619
"        rep     movsw\n"
620
"        movb    (%%esi), %%al\n"
621
"        movb    %%al, (%%edi)\n"
622
"        incl    %%edi\n"
623
 
624
"        movl    8(%%esp), %%esi\n"       /* move in back to %esi, toss from */
625
"        movl    32(%%esp), %%ebp\n"      /* ebp = lcode */
626
"        jmp     .L_while_test\n"
627
 
628
".L_copy_two:\n"
629
"        rep     movsw\n"
630
"        movl    8(%%esp), %%esi\n"       /* move in back to %esi, toss from */
631
"        movl    32(%%esp), %%ebp\n"      /* ebp = lcode */
632
"        jmp     .L_while_test\n"
633
 
634
".align 32,0x90\n"
635
".L_check_dist_one:\n"
636
"        cmpl    $1, %%ebp\n"            /* if dist 1, is a memset */
637
"        jne     .L_check_window\n"
638
"        cmpl    %%edi, 20(%%esp)\n"
639
"        je      .L_check_window\n"      /* out == beg, if outside window */
640
 
641
"        movl    64(%%esp), %%ecx\n"      /* ecx = len */
642
"        movb    -1(%%edi), %%al\n"
643
"        movb    %%al, %%ah\n"
644
 
645
"        sarl    %%ecx\n"
646
"        jnc     .L_set_two\n"
647
"        movb    %%al, (%%edi)\n"
648
"        incl    %%edi\n"
649
 
650
".L_set_two:\n"
651
"        rep     stosw\n"
652
"        movl    32(%%esp), %%ebp\n"      /* ebp = lcode */
653
"        jmp     .L_while_test\n"
654
 
655
".align 32,0x90\n"
656
".L_test_for_second_level_length:\n"
657
"        testb   $64, %%al\n"
658
"        jnz     .L_test_for_end_of_block\n" /* if ((op & 64) != 0) */
659
 
660
"        xorl    %%eax, %%eax\n"
661
"        incl    %%eax\n"
662
"        shll    %%cl, %%eax\n"
663
"        decl    %%eax\n"
664
"        andl    %%edx, %%eax\n"         /* eax &= hold */
665
"        addl    64(%%esp), %%eax\n"     /* eax += len */
666
"        movl    (%%ebp,%%eax,4), %%eax\n" /* eax = lcode[val+(hold&mask[op])]*/
667
"        jmp     .L_dolen\n"
668
 
669
".align 32,0x90\n"
670
".L_test_for_second_level_dist:\n"
671
"        testb   $64, %%al\n"
672
"        jnz     .L_invalid_distance_code\n" /* if ((op & 64) != 0) */
673
 
674
"        xorl    %%eax, %%eax\n"
675
"        incl    %%eax\n"
676
"        shll    %%cl, %%eax\n"
677
"        decl    %%eax\n"
678
"        andl    %%edx, %%eax\n"         /* eax &= hold */
679
"        addl    %%ebp, %%eax\n"         /* eax += dist */
680
"        movl    36(%%esp), %%ecx\n"     /* ecx = dcode */
681
"        movl    (%%ecx,%%eax,4), %%eax\n" /* eax = dcode[val+(hold&mask[op])]*/
682
"        jmp     .L_dodist\n"
683
 
684
".align 32,0x90\n"
685
".L_clip_window:\n"
686
"        movl    %%eax, %%ecx\n"
687
"        movl    48(%%esp), %%eax\n"     /* eax = wsize */
688
"        negl    %%ecx\n"                /* nbytes = -nbytes */
689
"        movl    28(%%esp), %%esi\n"     /* from = window */
690
 
691
"        cmpl    %%ebp, %%eax\n"
692
"        jb      .L_invalid_distance_too_far\n" /* if (dist > wsize) */
693
 
694
"        addl    %%ebp, %%ecx\n"         /* nbytes = dist - nbytes */
695
"        cmpl    $0, 52(%%esp)\n"
696
"        jne     .L_wrap_around_window\n" /* if (write != 0) */
697
 
698
"        subl    %%ecx, %%eax\n"
699
"        addl    %%eax, %%esi\n"         /* from += wsize - nbytes */
700
 
701
"        movl    64(%%esp), %%eax\n"     /* eax = len */
702
"        cmpl    %%ecx, %%eax\n"
703
"        jbe     .L_do_copy\n"           /* if (nbytes >= len) */
704
 
705
"        subl    %%ecx, %%eax\n"         /* len -= nbytes */
706
"        rep     movsb\n"
707
"        movl    %%edi, %%esi\n"
708
"        subl    %%ebp, %%esi\n"         /* from = out - dist */
709
"        jmp     .L_do_copy\n"
710
 
711
".align 32,0x90\n"
712
".L_wrap_around_window:\n"
713
"        movl    52(%%esp), %%eax\n"     /* eax = write */
714
"        cmpl    %%eax, %%ecx\n"
715
"        jbe     .L_contiguous_in_window\n" /* if (write >= nbytes) */
716
 
717
"        addl    48(%%esp), %%esi\n"     /* from += wsize */
718
"        addl    %%eax, %%esi\n"         /* from += write */
719
"        subl    %%ecx, %%esi\n"         /* from -= nbytes */
720
"        subl    %%eax, %%ecx\n"         /* nbytes -= write */
721
 
722
"        movl    64(%%esp), %%eax\n"     /* eax = len */
723
"        cmpl    %%ecx, %%eax\n"
724
"        jbe     .L_do_copy\n"           /* if (nbytes >= len) */
725
 
726
"        subl    %%ecx, %%eax\n"         /* len -= nbytes */
727
"        rep     movsb\n"
728
"        movl    28(%%esp), %%esi\n"     /* from = window */
729
"        movl    52(%%esp), %%ecx\n"     /* nbytes = write */
730
"        cmpl    %%ecx, %%eax\n"
731
"        jbe     .L_do_copy\n"           /* if (nbytes >= len) */
732
 
733
"        subl    %%ecx, %%eax\n"         /* len -= nbytes */
734
"        rep     movsb\n"
735
"        movl    %%edi, %%esi\n"
736
"        subl    %%ebp, %%esi\n"         /* from = out - dist */
737
"        jmp     .L_do_copy\n"
738
 
739
".align 32,0x90\n"
740
".L_contiguous_in_window:\n"
741
"        addl    %%eax, %%esi\n"
742
"        subl    %%ecx, %%esi\n"         /* from += write - nbytes */
743
 
744
"        movl    64(%%esp), %%eax\n"     /* eax = len */
745
"        cmpl    %%ecx, %%eax\n"
746
"        jbe     .L_do_copy\n"           /* if (nbytes >= len) */
747
 
748
"        subl    %%ecx, %%eax\n"         /* len -= nbytes */
749
"        rep     movsb\n"
750
"        movl    %%edi, %%esi\n"
751
"        subl    %%ebp, %%esi\n"         /* from = out - dist */
752
"        jmp     .L_do_copy\n"           /* if (nbytes >= len) */
753
 
754
".align 32,0x90\n"
755
".L_do_copy:\n"
756
"        movl    %%eax, %%ecx\n"
757
"        rep     movsb\n"
758
 
759
"        movl    8(%%esp), %%esi\n"      /* move in back to %esi, toss from */
760
"        movl    32(%%esp), %%ebp\n"     /* ebp = lcode */
761
"        jmp     .L_while_test\n"
762
 
763
".L_test_for_end_of_block:\n"
764
"        testb   $32, %%al\n"
765
"        jz      .L_invalid_literal_length_code\n"
766
"        movl    $1, 72(%%esp)\n"
767
"        jmp     .L_break_loop_with_status\n"
768
 
769
".L_invalid_literal_length_code:\n"
770
"        movl    $2, 72(%%esp)\n"
771
"        jmp     .L_break_loop_with_status\n"
772
 
773
".L_invalid_distance_code:\n"
774
"        movl    $3, 72(%%esp)\n"
775
"        jmp     .L_break_loop_with_status\n"
776
 
777
".L_invalid_distance_too_far:\n"
778
"        movl    8(%%esp), %%esi\n"
779
"        movl    $4, 72(%%esp)\n"
780
"        jmp     .L_break_loop_with_status\n"
781
 
782
".L_break_loop:\n"
783
"        movl    $0, 72(%%esp)\n"
784
 
785
".L_break_loop_with_status:\n"
786
/* put in, out, bits, and hold back into ar and pop esp */
787
"        movl    %%esi, 8(%%esp)\n"      /* save in */
788
"        movl    %%edi, 16(%%esp)\n"     /* save out */
789
"        movl    %%ebx, 44(%%esp)\n"     /* save bits */
790
"        movl    %%edx, 40(%%esp)\n"     /* save hold */
791
"        movl    4(%%esp), %%ebp\n"      /* restore esp, ebp */
792
"        movl    (%%esp), %%esp\n"
793
          :
794
          : "m" (ar)
795
          : "memory", "%eax", "%ebx", "%ecx", "%edx", "%esi", "%edi"
796
    );
797
#elif defined( _MSC_VER ) && ! defined( _M_AMD64 )
798
    __asm {
799
        lea     eax, ar
800
        mov     [eax], esp         /* save esp, ebp */
801
        mov     [eax+4], ebp
802
        mov     esp, eax
803
        mov     esi, [esp+8]       /* esi = in */
804
        mov     edi, [esp+16]      /* edi = out */
805
        mov     edx, [esp+40]      /* edx = hold */
806
        mov     ebx, [esp+44]      /* ebx = bits */
807
        mov     ebp, [esp+32]      /* ebp = lcode */
808
 
809
        cld
810
        jmp     L_do_loop
811
 
812
ALIGN 4
813
L_while_test:
814
        cmp     [esp+24], edi
815
        jbe     L_break_loop
816
        cmp     [esp+12], esi
817
        jbe     L_break_loop
818
 
819
L_do_loop:
820
        cmp     bl, 15
821
        ja      L_get_length_code    /* if (15 < bits) */
822
 
823
        xor     eax, eax
824
        lodsw                         /* al = *(ushort *)in++ */
825
        mov     cl, bl            /* cl = bits, needs it for shifting */
826
        add     bl, 16             /* bits += 16 */
827
        shl     eax, cl
828
        or      edx, eax        /* hold |= *((ushort *)in)++ << bits */
829
 
830
L_get_length_code:
831
        mov     eax, [esp+56]      /* eax = lmask */
832
        and     eax, edx          /* eax &= hold */
833
        mov     eax, [ebp+eax*4] /* eax = lcode[hold & lmask] */
834
 
835
L_dolen:
836
        mov     cl, ah            /* cl = this.bits */
837
        sub     bl, ah            /* bits -= this.bits */
838
        shr     edx, cl           /* hold >>= this.bits */
839
 
840
        test    al, al
841
        jnz     L_test_for_length_base /* if (op != 0) 45.7% */
842
 
843
        shr     eax, 16            /* output this.val char */
844
        stosb
845
        jmp     L_while_test
846
 
847
ALIGN 4
848
L_test_for_length_base:
849
        mov     ecx, eax          /* len = this */
850
        shr     ecx, 16            /* len = this.val */
851
        mov     [esp+64], ecx      /* save len */
852
        mov     cl, al
853
 
854
        test    al, 16
855
        jz      L_test_for_second_level_length /* if ((op & 16) == 0) 8% */
856
        and     cl, 15             /* op &= 15 */
857
        jz      L_decode_distance    /* if (!op) */
858
        cmp     bl, cl
859
        jae     L_add_bits_to_len    /* if (op <= bits) */
860
 
861
        mov     ch, cl            /* stash op in ch, freeing cl */
862
        xor     eax, eax
863
        lodsw                         /* al = *(ushort *)in++ */
864
        mov     cl, bl            /* cl = bits, needs it for shifting */
865
        add     bl, 16             /* bits += 16 */
866
        shl     eax, cl
867
        or      edx, eax         /* hold |= *((ushort *)in)++ << bits */
868
        mov     cl, ch            /* move op back to ecx */
869
 
870
L_add_bits_to_len:
871
        sub     bl, cl
872
        xor     eax, eax
873
        inc     eax
874
        shl     eax, cl
875
        dec     eax
876
        and     eax, edx          /* eax &= hold */
877
        shr     edx, cl
878
        add     [esp+64], eax      /* len += hold & mask[op] */
879
 
880
L_decode_distance:
881
        cmp     bl, 15
882
        ja      L_get_distance_code  /* if (15 < bits) */
883
 
884
        xor     eax, eax
885
        lodsw                         /* al = *(ushort *)in++ */
886
        mov     cl, bl            /* cl = bits, needs it for shifting */
887
        add     bl, 16             /* bits += 16 */
888
        shl     eax, cl
889
        or      edx, eax         /* hold |= *((ushort *)in)++ << bits */
890
 
891
L_get_distance_code:
892
        mov     eax, [esp+60]      /* eax = dmask */
893
        mov     ecx, [esp+36]      /* ecx = dcode */
894
        and     eax, edx          /* eax &= hold */
895
        mov     eax, [ecx+eax*4]/* eax = dcode[hold & dmask] */
896
 
897
L_dodist:
898
        mov     ebp, eax          /* dist = this */
899
        shr     ebp, 16            /* dist = this.val */
900
        mov     cl, ah
901
        sub     bl, ah            /* bits -= this.bits */
902
        shr     edx, cl           /* hold >>= this.bits */
903
        mov     cl, al            /* cl = this.op */
904
 
905
        test    al, 16             /* if ((op & 16) == 0) */
906
        jz      L_test_for_second_level_dist
907
        and     cl, 15             /* op &= 15 */
908
        jz      L_check_dist_one
909
        cmp     bl, cl
910
        jae     L_add_bits_to_dist   /* if (op <= bits) 97.6% */
911
 
912
        mov     ch, cl            /* stash op in ch, freeing cl */
913
        xor     eax, eax
914
        lodsw                         /* al = *(ushort *)in++ */
915
        mov     cl, bl            /* cl = bits, needs it for shifting */
916
        add     bl, 16             /* bits += 16 */
917
        shl     eax, cl
918
        or      edx, eax        /* hold |= *((ushort *)in)++ << bits */
919
        mov     cl, ch            /* move op back to ecx */
920
 
921
L_add_bits_to_dist:
922
        sub     bl, cl
923
        xor     eax, eax
924
        inc     eax
925
        shl     eax, cl
926
        dec     eax                 /* (1 << op) - 1 */
927
        and     eax, edx          /* eax &= hold */
928
        shr     edx, cl
929
        add     ebp, eax          /* dist += hold & ((1 << op) - 1) */
930
 
931
L_check_window:
932
        mov     [esp+8], esi       /* save in so from can use it's reg */
933
        mov     eax, edi
934
        sub     eax, [esp+20]      /* nbytes = out - beg */
935
 
936
        cmp     eax, ebp
937
        jb      L_clip_window        /* if (dist > nbytes) 4.2% */
938
 
939
        mov     ecx, [esp+64]      /* ecx = len */
940
        mov     esi, edi
941
        sub     esi, ebp          /* from = out - dist */
942
 
943
        sar     ecx, 1
944
        jnc     L_copy_two
945
 
946
        rep     movsw
947
        mov     al, [esi]
948
        mov     [edi], al
949
        inc     edi
950
 
951
        mov     esi, [esp+8]      /* move in back to %esi, toss from */
952
        mov     ebp, [esp+32]     /* ebp = lcode */
953
        jmp     L_while_test
954
 
955
L_copy_two:
956
        rep     movsw
957
        mov     esi, [esp+8]      /* move in back to %esi, toss from */
958
        mov     ebp, [esp+32]     /* ebp = lcode */
959
        jmp     L_while_test
960
 
961
ALIGN 4
962
L_check_dist_one:
963
        cmp     ebp, 1            /* if dist 1, is a memset */
964
        jne     L_check_window
965
        cmp     [esp+20], edi
966
        je      L_check_window    /* out == beg, if outside window */
967
 
968
        mov     ecx, [esp+64]     /* ecx = len */
969
        mov     al, [edi-1]
970
        mov     ah, al
971
 
972
        sar     ecx, 1
973
        jnc     L_set_two
974
        mov     [edi], al         /* memset out with from[-1] */
975
        inc     edi
976
 
977
L_set_two:
978
        rep     stosw
979
        mov     ebp, [esp+32]     /* ebp = lcode */
980
        jmp     L_while_test
981
 
982
ALIGN 4
983
L_test_for_second_level_length:
984
        test    al, 64
985
        jnz     L_test_for_end_of_block /* if ((op & 64) != 0) */
986
 
987
        xor     eax, eax
988
        inc     eax
989
        shl     eax, cl
990
        dec     eax
991
        and     eax, edx         /* eax &= hold */
992
        add     eax, [esp+64]     /* eax += len */
993
        mov     eax, [ebp+eax*4] /* eax = lcode[val+(hold&mask[op])]*/
994
        jmp     L_dolen
995
 
996
ALIGN 4
997
L_test_for_second_level_dist:
998
        test    al, 64
999
        jnz     L_invalid_distance_code /* if ((op & 64) != 0) */
1000
 
1001
        xor     eax, eax
1002
        inc     eax
1003
        shl     eax, cl
1004
        dec     eax
1005
        and     eax, edx         /* eax &= hold */
1006
        add     eax, ebp         /* eax += dist */
1007
        mov     ecx, [esp+36]     /* ecx = dcode */
1008
        mov     eax, [ecx+eax*4] /* eax = dcode[val+(hold&mask[op])]*/
1009
        jmp     L_dodist
1010
 
1011
ALIGN 4
1012
L_clip_window:
1013
        mov     ecx, eax
1014
        mov     eax, [esp+48]     /* eax = wsize */
1015
        neg     ecx                /* nbytes = -nbytes */
1016
        mov     esi, [esp+28]     /* from = window */
1017
 
1018
        cmp     eax, ebp
1019
        jb      L_invalid_distance_too_far /* if (dist > wsize) */
1020
 
1021
        add     ecx, ebp         /* nbytes = dist - nbytes */
1022
        cmp     dword ptr [esp+52], 0
1023
        jne     L_wrap_around_window /* if (write != 0) */
1024
 
1025
        sub     eax, ecx
1026
        add     esi, eax         /* from += wsize - nbytes */
1027
 
1028
        mov     eax, [esp+64]    /* eax = len */
1029
        cmp     eax, ecx
1030
        jbe     L_do_copy          /* if (nbytes >= len) */
1031
 
1032
        sub     eax, ecx         /* len -= nbytes */
1033
        rep     movsb
1034
        mov     esi, edi
1035
        sub     esi, ebp         /* from = out - dist */
1036
        jmp     L_do_copy
1037
 
1038
ALIGN 4
1039
L_wrap_around_window:
1040
        mov     eax, [esp+52]    /* eax = write */
1041
        cmp     ecx, eax
1042
        jbe     L_contiguous_in_window /* if (write >= nbytes) */
1043
 
1044
        add     esi, [esp+48]    /* from += wsize */
1045
        add     esi, eax         /* from += write */
1046
        sub     esi, ecx         /* from -= nbytes */
1047
        sub     ecx, eax         /* nbytes -= write */
1048
 
1049
        mov     eax, [esp+64]    /* eax = len */
1050
        cmp     eax, ecx
1051
        jbe     L_do_copy          /* if (nbytes >= len) */
1052
 
1053
        sub     eax, ecx         /* len -= nbytes */
1054
        rep     movsb
1055
        mov     esi, [esp+28]     /* from = window */
1056
        mov     ecx, [esp+52]     /* nbytes = write */
1057
        cmp     eax, ecx
1058
        jbe     L_do_copy          /* if (nbytes >= len) */
1059
 
1060
        sub     eax, ecx         /* len -= nbytes */
1061
        rep     movsb
1062
        mov     esi, edi
1063
        sub     esi, ebp         /* from = out - dist */
1064
        jmp     L_do_copy
1065
 
1066
ALIGN 4
1067
L_contiguous_in_window:
1068
        add     esi, eax
1069
        sub     esi, ecx         /* from += write - nbytes */
1070
 
1071
        mov     eax, [esp+64]    /* eax = len */
1072
        cmp     eax, ecx
1073
        jbe     L_do_copy          /* if (nbytes >= len) */
1074
 
1075
        sub     eax, ecx         /* len -= nbytes */
1076
        rep     movsb
1077
        mov     esi, edi
1078
        sub     esi, ebp         /* from = out - dist */
1079
        jmp     L_do_copy
1080
 
1081
ALIGN 4
1082
L_do_copy:
1083
        mov     ecx, eax
1084
        rep     movsb
1085
 
1086
        mov     esi, [esp+8]      /* move in back to %esi, toss from */
1087
        mov     ebp, [esp+32]     /* ebp = lcode */
1088
        jmp     L_while_test
1089
 
1090
L_test_for_end_of_block:
1091
        test    al, 32
1092
        jz      L_invalid_literal_length_code
1093
        mov     dword ptr [esp+72], 1
1094
        jmp     L_break_loop_with_status
1095
 
1096
L_invalid_literal_length_code:
1097
        mov     dword ptr [esp+72], 2
1098
        jmp     L_break_loop_with_status
1099
 
1100
L_invalid_distance_code:
1101
        mov     dword ptr [esp+72], 3
1102
        jmp     L_break_loop_with_status
1103
 
1104
L_invalid_distance_too_far:
1105
        mov     esi, [esp+4]
1106
        mov     dword ptr [esp+72], 4
1107
        jmp     L_break_loop_with_status
1108
 
1109
L_break_loop:
1110
        mov     dword ptr [esp+72], 0
1111
 
1112
L_break_loop_with_status:
1113
/* put in, out, bits, and hold back into ar and pop esp */
1114
        mov     [esp+8], esi     /* save in */
1115
        mov     [esp+16], edi    /* save out */
1116
        mov     [esp+44], ebx    /* save bits */
1117
        mov     [esp+40], edx    /* save hold */
1118
        mov     ebp, [esp+4]     /* restore esp, ebp */
1119
        mov     esp, [esp]
1120
    }
1121
#else
1122
#error "x86 architecture not defined"
1123
#endif
1124
 
1125
    if (ar.status > 1) {
1126
        if (ar.status == 2)
1127
            strm->msg = "invalid literal/length code";
1128
        else if (ar.status == 3)
1129
            strm->msg = "invalid distance code";
1130
        else
1131
            strm->msg = "invalid distance too far back";
1132
        state->mode = BAD;
1133
    }
1134
    else if ( ar.status == 1 ) {
1135
        state->mode = TYPE;
1136
    }
1137
 
1138
    /* return unused bytes (on entry, bits < 8, so in won't go too far back) */
1139
    ar.len = ar.bits >> 3;
1140
    ar.in -= ar.len;
1141
    ar.bits -= ar.len << 3;
1142
    ar.hold &= (1U << ar.bits) - 1;
1143
 
1144
    /* update state and return */
1145
    strm->next_in = ar.in;
1146
    strm->next_out = ar.out;
1147
    strm->avail_in = (unsigned)(ar.in < ar.last ?
1148
                                PAD_AVAIL_IN + (ar.last - ar.in) :
1149
                                PAD_AVAIL_IN - (ar.in - ar.last));
1150
    strm->avail_out = (unsigned)(ar.out < ar.end ?
1151
                                 PAD_AVAIL_OUT + (ar.end - ar.out) :
1152
                                 PAD_AVAIL_OUT - (ar.out - ar.end));
1153
    state->hold = ar.hold;
1154
    state->bits = ar.bits;
1155
    return;
1156
}
1157
 

powered by: WebSVN 2.1.0

© copyright 1999-2024 OpenCores.org, equivalent to Oliscience, all rights reserved. OpenCores®, registered trademark.