OpenCores
URL https://opencores.org/ocsvn/openrisc/openrisc/trunk

Subversion Repositories openrisc

[/] [openrisc/] [trunk/] [gnu-dev/] [or1k-gcc/] [zlib/] [contrib/] [inflate86/] [inffast.S] - Blame information for rev 746

Go to most recent revision | Details | Compare with Previous | View Log

Line No. Rev Author Line
1 745 jeremybenn
/*
2
 * inffast.S is a hand tuned assembler version of:
3
 *
4
 * inffast.c -- fast decoding
5
 * Copyright (C) 1995-2003 Mark Adler
6
 * For conditions of distribution and use, see copyright notice in zlib.h
7
 *
8
 * Copyright (C) 2003 Chris Anderson 
9
 * Please use the copyright conditions above.
10
 *
11
 * This version (Jan-23-2003) of inflate_fast was coded and tested under
12
 * GNU/Linux on a pentium 3, using the gcc-3.2 compiler distribution.  On that
13
 * machine, I found that gzip style archives decompressed about 20% faster than
14
 * the gcc-3.2 -O3 -fomit-frame-pointer compiled version.  Your results will
15
 * depend on how large of a buffer is used for z_stream.next_in & next_out
16
 * (8K-32K worked best for my 256K cpu cache) and how much overhead there is in
17
 * stream processing I/O and crc32/addler32.  In my case, this routine used
18
 * 70% of the cpu time and crc32 used 20%.
19
 *
20
 * I am confident that this version will work in the general case, but I have
21
 * not tested a wide variety of datasets or a wide variety of platforms.
22
 *
23
 * Jan-24-2003 -- Added -DUSE_MMX define for slightly faster inflating.
24
 * It should be a runtime flag instead of compile time flag...
25
 *
26
 * Jan-26-2003 -- Added runtime check for MMX support with cpuid instruction.
27
 * With -DUSE_MMX, only MMX code is compiled.  With -DNO_MMX, only non-MMX code
28
 * is compiled.  Without either option, runtime detection is enabled.  Runtime
29
 * detection should work on all modern cpus and the recomended algorithm (flip
30
 * ID bit on eflags and then use the cpuid instruction) is used in many
31
 * multimedia applications.  Tested under win2k with gcc-2.95 and gas-2.12
32
 * distributed with cygwin3.  Compiling with gcc-2.95 -c inffast.S -o
33
 * inffast.obj generates a COFF object which can then be linked with MSVC++
34
 * compiled code.  Tested under FreeBSD 4.7 with gcc-2.95.
35
 *
36
 * Jan-28-2003 -- Tested Athlon XP... MMX mode is slower than no MMX (and
37
 * slower than compiler generated code).  Adjusted cpuid check to use the MMX
38
 * code only for Pentiums < P4 until I have more data on the P4.  Speed
39
 * improvment is only about 15% on the Athlon when compared with code generated
40
 * with MSVC++.  Not sure yet, but I think the P4 will also be slower using the
41
 * MMX mode because many of it's x86 ALU instructions execute in .5 cycles and
42
 * have less latency than MMX ops.  Added code to buffer the last 11 bytes of
43
 * the input stream since the MMX code grabs bits in chunks of 32, which
44
 * differs from the inffast.c algorithm.  I don't think there would have been
45
 * read overruns where a page boundary was crossed (a segfault), but there
46
 * could have been overruns when next_in ends on unaligned memory (unintialized
47
 * memory read).
48
 *
49
 * Mar-13-2003 -- P4 MMX is slightly slower than P4 NO_MMX.  I created a C
50
 * version of the non-MMX code so that it doesn't depend on zstrm and zstate
51
 * structure offsets which are hard coded in this file.  This was last tested
52
 * with zlib-1.2.0 which is currently in beta testing, newer versions of this
53
 * and inffas86.c can be found at http://www.eetbeetee.com/zlib/ and
54
 * http://www.charm.net/~christop/zlib/
55
 */
56
 
57
 
58
/*
59
 * if you have underscore linking problems (_inflate_fast undefined), try
60
 * using -DGAS_COFF
61
 */
62
#if ! defined( GAS_COFF ) && ! defined( GAS_ELF )
63
 
64
#if defined( WIN32 ) || defined( __CYGWIN__ )
65
#define GAS_COFF /* windows object format */
66
#else
67
#define GAS_ELF
68
#endif
69
 
70
#endif /* ! GAS_COFF && ! GAS_ELF */
71
 
72
 
73
#if defined( GAS_COFF )
74
 
75
/* coff externals have underscores */
76
#define inflate_fast _inflate_fast
77
#define inflate_fast_use_mmx _inflate_fast_use_mmx
78
 
79
#endif /* GAS_COFF */
80
 
81
 
82
.file "inffast.S"
83
 
84
.globl inflate_fast
85
 
86
.text
87
.align 4,0
88
.L_invalid_literal_length_code_msg:
89
.string "invalid literal/length code"
90
 
91
.align 4,0
92
.L_invalid_distance_code_msg:
93
.string "invalid distance code"
94
 
95
.align 4,0
96
.L_invalid_distance_too_far_msg:
97
.string "invalid distance too far back"
98
 
99
#if ! defined( NO_MMX )
100
.align 4,0
101
.L_mask: /* mask[N] = ( 1 << N ) - 1 */
102
.long 0
103
.long 1
104
.long 3
105
.long 7
106
.long 15
107
.long 31
108
.long 63
109
.long 127
110
.long 255
111
.long 511
112
.long 1023
113
.long 2047
114
.long 4095
115
.long 8191
116
.long 16383
117
.long 32767
118
.long 65535
119
.long 131071
120
.long 262143
121
.long 524287
122
.long 1048575
123
.long 2097151
124
.long 4194303
125
.long 8388607
126
.long 16777215
127
.long 33554431
128
.long 67108863
129
.long 134217727
130
.long 268435455
131
.long 536870911
132
.long 1073741823
133
.long 2147483647
134
.long 4294967295
135
#endif /* NO_MMX */
136
 
137
.text
138
 
139
/*
140
 * struct z_stream offsets, in zlib.h
141
 */
142
#define next_in_strm   0   /* strm->next_in */
143
#define avail_in_strm  4   /* strm->avail_in */
144
#define next_out_strm  12  /* strm->next_out */
145
#define avail_out_strm 16  /* strm->avail_out */
146
#define msg_strm       24  /* strm->msg */
147
#define state_strm     28  /* strm->state */
148
 
149
/*
150
 * struct inflate_state offsets, in inflate.h
151
 */
152
#define mode_state     0   /* state->mode */
153
#define wsize_state    32  /* state->wsize */
154
#define write_state    40  /* state->write */
155
#define window_state   44  /* state->window */
156
#define hold_state     48  /* state->hold */
157
#define bits_state     52  /* state->bits */
158
#define lencode_state  68  /* state->lencode */
159
#define distcode_state 72  /* state->distcode */
160
#define lenbits_state  76  /* state->lenbits */
161
#define distbits_state 80  /* state->distbits */
162
 
163
/*
164
 * inflate_fast's activation record
165
 */
166
#define local_var_size 64 /* how much local space for vars */
167
#define strm_sp        88 /* first arg: z_stream * (local_var_size + 24) */
168
#define start_sp       92 /* second arg: unsigned int (local_var_size + 28) */
169
 
170
/*
171
 * offsets for local vars on stack
172
 */
173
#define out            60  /* unsigned char* */
174
#define window         56  /* unsigned char* */
175
#define wsize          52  /* unsigned int */
176
#define write          48  /* unsigned int */
177
#define in             44  /* unsigned char* */
178
#define beg            40  /* unsigned char* */
179
#define buf            28  /* char[ 12 ] */
180
#define len            24  /* unsigned int */
181
#define last           20  /* unsigned char* */
182
#define end            16  /* unsigned char* */
183
#define dcode          12  /* code* */
184
#define lcode           8  /* code* */
185
#define dmask           4  /* unsigned int */
186
#define lmask           0  /* unsigned int */
187
 
188
/*
189
 * typedef enum inflate_mode consts, in inflate.h
190
 */
191
#define INFLATE_MODE_TYPE 11  /* state->mode flags enum-ed in inflate.h */
192
#define INFLATE_MODE_BAD  26
193
 
194
 
195
#if ! defined( USE_MMX ) && ! defined( NO_MMX )
196
 
197
#define RUN_TIME_MMX
198
 
199
#define CHECK_MMX    1
200
#define DO_USE_MMX   2
201
#define DONT_USE_MMX 3
202
 
203
.globl inflate_fast_use_mmx
204
 
205
.data
206
 
207
.align 4,0
208
inflate_fast_use_mmx: /* integer flag for run time control 1=check,2=mmx,3=no */
209
.long CHECK_MMX
210
 
211
#if defined( GAS_ELF )
212
/* elf info */
213
.type   inflate_fast_use_mmx,@object
214
.size   inflate_fast_use_mmx,4
215
#endif
216
 
217
#endif /* RUN_TIME_MMX */
218
 
219
#if defined( GAS_COFF )
220
/* coff info: scl 2 = extern, type 32 = function */
221
.def inflate_fast; .scl 2; .type 32; .endef
222
#endif
223
 
224
.text
225
 
226
.align 32,0x90
227
inflate_fast:
228
        pushl   %edi
229
        pushl   %esi
230
        pushl   %ebp
231
        pushl   %ebx
232
        pushf   /* save eflags (strm_sp, state_sp assumes this is 32 bits) */
233
        subl    $local_var_size, %esp
234
        cld
235
 
236
#define strm_r  %esi
237
#define state_r %edi
238
 
239
        movl    strm_sp(%esp), strm_r
240
        movl    state_strm(strm_r), state_r
241
 
242
        /* in = strm->next_in;
243
         * out = strm->next_out;
244
         * last = in + strm->avail_in - 11;
245
         * beg = out - (start - strm->avail_out);
246
         * end = out + (strm->avail_out - 257);
247
         */
248
        movl    avail_in_strm(strm_r), %edx
249
        movl    next_in_strm(strm_r), %eax
250
 
251
        addl    %eax, %edx      /* avail_in += next_in */
252
        subl    $11, %edx       /* avail_in -= 11 */
253
 
254
        movl    %eax, in(%esp)
255
        movl    %edx, last(%esp)
256
 
257
        movl    start_sp(%esp), %ebp
258
        movl    avail_out_strm(strm_r), %ecx
259
        movl    next_out_strm(strm_r), %ebx
260
 
261
        subl    %ecx, %ebp      /* start -= avail_out */
262
        negl    %ebp            /* start = -start */
263
        addl    %ebx, %ebp      /* start += next_out */
264
 
265
        subl    $257, %ecx      /* avail_out -= 257 */
266
        addl    %ebx, %ecx      /* avail_out += out */
267
 
268
        movl    %ebx, out(%esp)
269
        movl    %ebp, beg(%esp)
270
        movl    %ecx, end(%esp)
271
 
272
        /* wsize = state->wsize;
273
         * write = state->write;
274
         * window = state->window;
275
         * hold = state->hold;
276
         * bits = state->bits;
277
         * lcode = state->lencode;
278
         * dcode = state->distcode;
279
         * lmask = ( 1 << state->lenbits ) - 1;
280
         * dmask = ( 1 << state->distbits ) - 1;
281
         */
282
 
283
        movl    lencode_state(state_r), %eax
284
        movl    distcode_state(state_r), %ecx
285
 
286
        movl    %eax, lcode(%esp)
287
        movl    %ecx, dcode(%esp)
288
 
289
        movl    $1, %eax
290
        movl    lenbits_state(state_r), %ecx
291
        shll    %cl, %eax
292
        decl    %eax
293
        movl    %eax, lmask(%esp)
294
 
295
        movl    $1, %eax
296
        movl    distbits_state(state_r), %ecx
297
        shll    %cl, %eax
298
        decl    %eax
299
        movl    %eax, dmask(%esp)
300
 
301
        movl    wsize_state(state_r), %eax
302
        movl    write_state(state_r), %ecx
303
        movl    window_state(state_r), %edx
304
 
305
        movl    %eax, wsize(%esp)
306
        movl    %ecx, write(%esp)
307
        movl    %edx, window(%esp)
308
 
309
        movl    hold_state(state_r), %ebp
310
        movl    bits_state(state_r), %ebx
311
 
312
#undef strm_r
313
#undef state_r
314
 
315
#define in_r       %esi
316
#define from_r     %esi
317
#define out_r      %edi
318
 
319
        movl    in(%esp), in_r
320
        movl    last(%esp), %ecx
321
        cmpl    in_r, %ecx
322
        ja      .L_align_long           /* if in < last */
323
 
324
        addl    $11, %ecx               /* ecx = &in[ avail_in ] */
325
        subl    in_r, %ecx              /* ecx = avail_in */
326
        movl    $12, %eax
327
        subl    %ecx, %eax              /* eax = 12 - avail_in */
328
        leal    buf(%esp), %edi
329
        rep     movsb                   /* memcpy( buf, in, avail_in ) */
330
        movl    %eax, %ecx
331
        xorl    %eax, %eax
332
        rep     stosb         /* memset( &buf[ avail_in ], 0, 12 - avail_in ) */
333
        leal    buf(%esp), in_r         /* in = buf */
334
        movl    in_r, last(%esp)        /* last = in, do just one iteration */
335
        jmp     .L_is_aligned
336
 
337
        /* align in_r on long boundary */
338
.L_align_long:
339
        testl   $3, in_r
340
        jz      .L_is_aligned
341
        xorl    %eax, %eax
342
        movb    (in_r), %al
343
        incl    in_r
344
        movl    %ebx, %ecx
345
        addl    $8, %ebx
346
        shll    %cl, %eax
347
        orl     %eax, %ebp
348
        jmp     .L_align_long
349
 
350
.L_is_aligned:
351
        movl    out(%esp), out_r
352
 
353
#if defined( NO_MMX )
354
        jmp     .L_do_loop
355
#endif
356
 
357
#if defined( USE_MMX )
358
        jmp     .L_init_mmx
359
#endif
360
 
361
/*** Runtime MMX check ***/
362
 
363
#if defined( RUN_TIME_MMX )
364
.L_check_mmx:
365
        cmpl    $DO_USE_MMX, inflate_fast_use_mmx
366
        je      .L_init_mmx
367
        ja      .L_do_loop /* > 2 */
368
 
369
        pushl   %eax
370
        pushl   %ebx
371
        pushl   %ecx
372
        pushl   %edx
373
        pushf
374
        movl    (%esp), %eax      /* copy eflags to eax */
375
        xorl    $0x200000, (%esp) /* try toggling ID bit of eflags (bit 21)
376
                                   * to see if cpu supports cpuid...
377
                                   * ID bit method not supported by NexGen but
378
                                   * bios may load a cpuid instruction and
379
                                   * cpuid may be disabled on Cyrix 5-6x86 */
380
        popf
381
        pushf
382
        popl    %edx              /* copy new eflags to edx */
383
        xorl    %eax, %edx        /* test if ID bit is flipped */
384
        jz      .L_dont_use_mmx   /* not flipped if zero */
385
        xorl    %eax, %eax
386
        cpuid
387
        cmpl    $0x756e6547, %ebx /* check for GenuineIntel in ebx,ecx,edx */
388
        jne     .L_dont_use_mmx
389
        cmpl    $0x6c65746e, %ecx
390
        jne     .L_dont_use_mmx
391
        cmpl    $0x49656e69, %edx
392
        jne     .L_dont_use_mmx
393
        movl    $1, %eax
394
        cpuid                     /* get cpu features */
395
        shrl    $8, %eax
396
        andl    $15, %eax
397
        cmpl    $6, %eax          /* check for Pentium family, is 0xf for P4 */
398
        jne     .L_dont_use_mmx
399
        testl   $0x800000, %edx   /* test if MMX feature is set (bit 23) */
400
        jnz     .L_use_mmx
401
        jmp     .L_dont_use_mmx
402
.L_use_mmx:
403
        movl    $DO_USE_MMX, inflate_fast_use_mmx
404
        jmp     .L_check_mmx_pop
405
.L_dont_use_mmx:
406
        movl    $DONT_USE_MMX, inflate_fast_use_mmx
407
.L_check_mmx_pop:
408
        popl    %edx
409
        popl    %ecx
410
        popl    %ebx
411
        popl    %eax
412
        jmp     .L_check_mmx
413
#endif
414
 
415
 
416
/*** Non-MMX code ***/
417
 
418
#if defined ( NO_MMX ) || defined( RUN_TIME_MMX )
419
 
420
#define hold_r     %ebp
421
#define bits_r     %bl
422
#define bitslong_r %ebx
423
 
424
.align 32,0x90
425
.L_while_test:
426
        /* while (in < last && out < end)
427
         */
428
        cmpl    out_r, end(%esp)
429
        jbe     .L_break_loop           /* if (out >= end) */
430
 
431
        cmpl    in_r, last(%esp)
432
        jbe     .L_break_loop
433
 
434
.L_do_loop:
435
        /* regs: %esi = in, %ebp = hold, %bl = bits, %edi = out
436
         *
437
         * do {
438
         *   if (bits < 15) {
439
         *     hold |= *((unsigned short *)in)++ << bits;
440
         *     bits += 16
441
         *   }
442
         *   this = lcode[hold & lmask]
443
         */
444
        cmpb    $15, bits_r
445
        ja      .L_get_length_code      /* if (15 < bits) */
446
 
447
        xorl    %eax, %eax
448
        lodsw                           /* al = *(ushort *)in++ */
449
        movb    bits_r, %cl             /* cl = bits, needs it for shifting */
450
        addb    $16, bits_r             /* bits += 16 */
451
        shll    %cl, %eax
452
        orl     %eax, hold_r            /* hold |= *((ushort *)in)++ << bits */
453
 
454
.L_get_length_code:
455
        movl    lmask(%esp), %edx       /* edx = lmask */
456
        movl    lcode(%esp), %ecx       /* ecx = lcode */
457
        andl    hold_r, %edx            /* edx &= hold */
458
        movl    (%ecx,%edx,4), %eax     /* eax = lcode[hold & lmask] */
459
 
460
.L_dolen:
461
        /* regs: %esi = in, %ebp = hold, %bl = bits, %edi = out
462
         *
463
         * dolen:
464
         *    bits -= this.bits;
465
         *    hold >>= this.bits
466
         */
467
        movb    %ah, %cl                /* cl = this.bits */
468
        subb    %ah, bits_r             /* bits -= this.bits */
469
        shrl    %cl, hold_r             /* hold >>= this.bits */
470
 
471
        /* check if op is a literal
472
         * if (op == 0) {
473
         *    PUP(out) = this.val;
474
         *  }
475
         */
476
        testb   %al, %al
477
        jnz     .L_test_for_length_base /* if (op != 0) 45.7% */
478
 
479
        shrl    $16, %eax               /* output this.val char */
480
        stosb
481
        jmp     .L_while_test
482
 
483
.L_test_for_length_base:
484
        /* regs: %esi = in, %ebp = hold, %bl = bits, %edi = out, %edx = len
485
         *
486
         * else if (op & 16) {
487
         *   len = this.val
488
         *   op &= 15
489
         *   if (op) {
490
         *     if (op > bits) {
491
         *       hold |= *((unsigned short *)in)++ << bits;
492
         *       bits += 16
493
         *     }
494
         *     len += hold & mask[op];
495
         *     bits -= op;
496
         *     hold >>= op;
497
         *   }
498
         */
499
#define len_r %edx
500
        movl    %eax, len_r             /* len = this */
501
        shrl    $16, len_r              /* len = this.val */
502
        movb    %al, %cl
503
 
504
        testb   $16, %al
505
        jz      .L_test_for_second_level_length /* if ((op & 16) == 0) 8% */
506
        andb    $15, %cl                /* op &= 15 */
507
        jz      .L_save_len             /* if (!op) */
508
        cmpb    %cl, bits_r
509
        jae     .L_add_bits_to_len      /* if (op <= bits) */
510
 
511
        movb    %cl, %ch                /* stash op in ch, freeing cl */
512
        xorl    %eax, %eax
513
        lodsw                           /* al = *(ushort *)in++ */
514
        movb    bits_r, %cl             /* cl = bits, needs it for shifting */
515
        addb    $16, bits_r             /* bits += 16 */
516
        shll    %cl, %eax
517
        orl     %eax, hold_r            /* hold |= *((ushort *)in)++ << bits */
518
        movb    %ch, %cl                /* move op back to ecx */
519
 
520
.L_add_bits_to_len:
521
        movl    $1, %eax
522
        shll    %cl, %eax
523
        decl    %eax
524
        subb    %cl, bits_r
525
        andl    hold_r, %eax            /* eax &= hold */
526
        shrl    %cl, hold_r
527
        addl    %eax, len_r             /* len += hold & mask[op] */
528
 
529
.L_save_len:
530
        movl    len_r, len(%esp)        /* save len */
531
#undef  len_r
532
 
533
.L_decode_distance:
534
        /* regs: %esi = in, %ebp = hold, %bl = bits, %edi = out, %edx = dist
535
         *
536
         *   if (bits < 15) {
537
         *     hold |= *((unsigned short *)in)++ << bits;
538
         *     bits += 16
539
         *   }
540
         *   this = dcode[hold & dmask];
541
         * dodist:
542
         *   bits -= this.bits;
543
         *   hold >>= this.bits;
544
         *   op = this.op;
545
         */
546
 
547
        cmpb    $15, bits_r
548
        ja      .L_get_distance_code    /* if (15 < bits) */
549
 
550
        xorl    %eax, %eax
551
        lodsw                           /* al = *(ushort *)in++ */
552
        movb    bits_r, %cl             /* cl = bits, needs it for shifting */
553
        addb    $16, bits_r             /* bits += 16 */
554
        shll    %cl, %eax
555
        orl     %eax, hold_r            /* hold |= *((ushort *)in)++ << bits */
556
 
557
.L_get_distance_code:
558
        movl    dmask(%esp), %edx       /* edx = dmask */
559
        movl    dcode(%esp), %ecx       /* ecx = dcode */
560
        andl    hold_r, %edx            /* edx &= hold */
561
        movl    (%ecx,%edx,4), %eax     /* eax = dcode[hold & dmask] */
562
 
563
#define dist_r %edx
564
.L_dodist:
565
        movl    %eax, dist_r            /* dist = this */
566
        shrl    $16, dist_r             /* dist = this.val */
567
        movb    %ah, %cl
568
        subb    %ah, bits_r             /* bits -= this.bits */
569
        shrl    %cl, hold_r             /* hold >>= this.bits */
570
 
571
        /* if (op & 16) {
572
         *   dist = this.val
573
         *   op &= 15
574
         *   if (op > bits) {
575
         *     hold |= *((unsigned short *)in)++ << bits;
576
         *     bits += 16
577
         *   }
578
         *   dist += hold & mask[op];
579
         *   bits -= op;
580
         *   hold >>= op;
581
         */
582
        movb    %al, %cl                /* cl = this.op */
583
 
584
        testb   $16, %al                /* if ((op & 16) == 0) */
585
        jz      .L_test_for_second_level_dist
586
        andb    $15, %cl                /* op &= 15 */
587
        jz      .L_check_dist_one
588
        cmpb    %cl, bits_r
589
        jae     .L_add_bits_to_dist     /* if (op <= bits) 97.6% */
590
 
591
        movb    %cl, %ch                /* stash op in ch, freeing cl */
592
        xorl    %eax, %eax
593
        lodsw                           /* al = *(ushort *)in++ */
594
        movb    bits_r, %cl             /* cl = bits, needs it for shifting */
595
        addb    $16, bits_r             /* bits += 16 */
596
        shll    %cl, %eax
597
        orl     %eax, hold_r            /* hold |= *((ushort *)in)++ << bits */
598
        movb    %ch, %cl                /* move op back to ecx */
599
 
600
.L_add_bits_to_dist:
601
        movl    $1, %eax
602
        shll    %cl, %eax
603
        decl    %eax                    /* (1 << op) - 1 */
604
        subb    %cl, bits_r
605
        andl    hold_r, %eax            /* eax &= hold */
606
        shrl    %cl, hold_r
607
        addl    %eax, dist_r            /* dist += hold & ((1 << op) - 1) */
608
        jmp     .L_check_window
609
 
610
.L_check_window:
611
        /* regs: %esi = from, %ebp = hold, %bl = bits, %edi = out, %edx = dist
612
         *       %ecx = nbytes
613
         *
614
         * nbytes = out - beg;
615
         * if (dist <= nbytes) {
616
         *   from = out - dist;
617
         *   do {
618
         *     PUP(out) = PUP(from);
619
         *   } while (--len > 0) {
620
         * }
621
         */
622
 
623
        movl    in_r, in(%esp)          /* save in so from can use it's reg */
624
        movl    out_r, %eax
625
        subl    beg(%esp), %eax         /* nbytes = out - beg */
626
 
627
        cmpl    dist_r, %eax
628
        jb      .L_clip_window          /* if (dist > nbytes) 4.2% */
629
 
630
        movl    len(%esp), %ecx
631
        movl    out_r, from_r
632
        subl    dist_r, from_r          /* from = out - dist */
633
 
634
        subl    $3, %ecx
635
        movb    (from_r), %al
636
        movb    %al, (out_r)
637
        movb    1(from_r), %al
638
        movb    2(from_r), %dl
639
        addl    $3, from_r
640
        movb    %al, 1(out_r)
641
        movb    %dl, 2(out_r)
642
        addl    $3, out_r
643
        rep     movsb
644
 
645
        movl    in(%esp), in_r          /* move in back to %esi, toss from */
646
        jmp     .L_while_test
647
 
648
.align 16,0x90
649
.L_check_dist_one:
650
        cmpl    $1, dist_r
651
        jne     .L_check_window
652
        cmpl    out_r, beg(%esp)
653
        je      .L_check_window
654
 
655
        decl    out_r
656
        movl    len(%esp), %ecx
657
        movb    (out_r), %al
658
        subl    $3, %ecx
659
 
660
        movb    %al, 1(out_r)
661
        movb    %al, 2(out_r)
662
        movb    %al, 3(out_r)
663
        addl    $4, out_r
664
        rep     stosb
665
 
666
        jmp     .L_while_test
667
 
668
.align 16,0x90
669
.L_test_for_second_level_length:
670
        /* else if ((op & 64) == 0) {
671
         *   this = lcode[this.val + (hold & mask[op])];
672
         * }
673
         */
674
        testb   $64, %al
675
        jnz     .L_test_for_end_of_block  /* if ((op & 64) != 0) */
676
 
677
        movl    $1, %eax
678
        shll    %cl, %eax
679
        decl    %eax
680
        andl    hold_r, %eax            /* eax &= hold */
681
        addl    %edx, %eax              /* eax += this.val */
682
        movl    lcode(%esp), %edx       /* edx = lcode */
683
        movl    (%edx,%eax,4), %eax     /* eax = lcode[val + (hold&mask[op])] */
684
        jmp     .L_dolen
685
 
686
.align 16,0x90
687
.L_test_for_second_level_dist:
688
        /* else if ((op & 64) == 0) {
689
         *   this = dcode[this.val + (hold & mask[op])];
690
         * }
691
         */
692
        testb   $64, %al
693
        jnz     .L_invalid_distance_code  /* if ((op & 64) != 0) */
694
 
695
        movl    $1, %eax
696
        shll    %cl, %eax
697
        decl    %eax
698
        andl    hold_r, %eax            /* eax &= hold */
699
        addl    %edx, %eax              /* eax += this.val */
700
        movl    dcode(%esp), %edx       /* edx = dcode */
701
        movl    (%edx,%eax,4), %eax     /* eax = dcode[val + (hold&mask[op])] */
702
        jmp     .L_dodist
703
 
704
.align 16,0x90
705
.L_clip_window:
706
        /* regs: %esi = from, %ebp = hold, %bl = bits, %edi = out, %edx = dist
707
         *       %ecx = nbytes
708
         *
709
         * else {
710
         *   if (dist > wsize) {
711
         *     invalid distance
712
         *   }
713
         *   from = window;
714
         *   nbytes = dist - nbytes;
715
         *   if (write == 0) {
716
         *     from += wsize - nbytes;
717
         */
718
#define nbytes_r %ecx
719
        movl    %eax, nbytes_r
720
        movl    wsize(%esp), %eax       /* prepare for dist compare */
721
        negl    nbytes_r                /* nbytes = -nbytes */
722
        movl    window(%esp), from_r    /* from = window */
723
 
724
        cmpl    dist_r, %eax
725
        jb      .L_invalid_distance_too_far /* if (dist > wsize) */
726
 
727
        addl    dist_r, nbytes_r        /* nbytes = dist - nbytes */
728
        cmpl    $0, write(%esp)
729
        jne     .L_wrap_around_window   /* if (write != 0) */
730
 
731
        subl    nbytes_r, %eax
732
        addl    %eax, from_r            /* from += wsize - nbytes */
733
 
734
        /* regs: %esi = from, %ebp = hold, %bl = bits, %edi = out, %edx = dist
735
         *       %ecx = nbytes, %eax = len
736
         *
737
         *     if (nbytes < len) {
738
         *       len -= nbytes;
739
         *       do {
740
         *         PUP(out) = PUP(from);
741
         *       } while (--nbytes);
742
         *       from = out - dist;
743
         *     }
744
         *   }
745
         */
746
#define len_r %eax
747
        movl    len(%esp), len_r
748
        cmpl    nbytes_r, len_r
749
        jbe     .L_do_copy1             /* if (nbytes >= len) */
750
 
751
        subl    nbytes_r, len_r         /* len -= nbytes */
752
        rep     movsb
753
        movl    out_r, from_r
754
        subl    dist_r, from_r          /* from = out - dist */
755
        jmp     .L_do_copy1
756
 
757
        cmpl    nbytes_r, len_r
758
        jbe     .L_do_copy1             /* if (nbytes >= len) */
759
 
760
        subl    nbytes_r, len_r         /* len -= nbytes */
761
        rep     movsb
762
        movl    out_r, from_r
763
        subl    dist_r, from_r          /* from = out - dist */
764
        jmp     .L_do_copy1
765
 
766
.L_wrap_around_window:
767
        /* regs: %esi = from, %ebp = hold, %bl = bits, %edi = out, %edx = dist
768
         *       %ecx = nbytes, %eax = write, %eax = len
769
         *
770
         *   else if (write < nbytes) {
771
         *     from += wsize + write - nbytes;
772
         *     nbytes -= write;
773
         *     if (nbytes < len) {
774
         *       len -= nbytes;
775
         *       do {
776
         *         PUP(out) = PUP(from);
777
         *       } while (--nbytes);
778
         *       from = window;
779
         *       nbytes = write;
780
         *       if (nbytes < len) {
781
         *         len -= nbytes;
782
         *         do {
783
         *           PUP(out) = PUP(from);
784
         *         } while(--nbytes);
785
         *         from = out - dist;
786
         *       }
787
         *     }
788
         *   }
789
         */
790
#define write_r %eax
791
        movl    write(%esp), write_r
792
        cmpl    write_r, nbytes_r
793
        jbe     .L_contiguous_in_window /* if (write >= nbytes) */
794
 
795
        addl    wsize(%esp), from_r
796
        addl    write_r, from_r
797
        subl    nbytes_r, from_r        /* from += wsize + write - nbytes */
798
        subl    write_r, nbytes_r       /* nbytes -= write */
799
#undef write_r
800
 
801
        movl    len(%esp), len_r
802
        cmpl    nbytes_r, len_r
803
        jbe     .L_do_copy1             /* if (nbytes >= len) */
804
 
805
        subl    nbytes_r, len_r         /* len -= nbytes */
806
        rep     movsb
807
        movl    window(%esp), from_r    /* from = window */
808
        movl    write(%esp), nbytes_r   /* nbytes = write */
809
        cmpl    nbytes_r, len_r
810
        jbe     .L_do_copy1             /* if (nbytes >= len) */
811
 
812
        subl    nbytes_r, len_r         /* len -= nbytes */
813
        rep     movsb
814
        movl    out_r, from_r
815
        subl    dist_r, from_r          /* from = out - dist */
816
        jmp     .L_do_copy1
817
 
818
.L_contiguous_in_window:
819
        /* regs: %esi = from, %ebp = hold, %bl = bits, %edi = out, %edx = dist
820
         *       %ecx = nbytes, %eax = write, %eax = len
821
         *
822
         *   else {
823
         *     from += write - nbytes;
824
         *     if (nbytes < len) {
825
         *       len -= nbytes;
826
         *       do {
827
         *         PUP(out) = PUP(from);
828
         *       } while (--nbytes);
829
         *       from = out - dist;
830
         *     }
831
         *   }
832
         */
833
#define write_r %eax
834
        addl    write_r, from_r
835
        subl    nbytes_r, from_r        /* from += write - nbytes */
836
#undef write_r
837
 
838
        movl    len(%esp), len_r
839
        cmpl    nbytes_r, len_r
840
        jbe     .L_do_copy1             /* if (nbytes >= len) */
841
 
842
        subl    nbytes_r, len_r         /* len -= nbytes */
843
        rep     movsb
844
        movl    out_r, from_r
845
        subl    dist_r, from_r          /* from = out - dist */
846
 
847
.L_do_copy1:
848
        /* regs: %esi = from, %esi = in, %ebp = hold, %bl = bits, %edi = out
849
         *       %eax = len
850
         *
851
         *     while (len > 0) {
852
         *       PUP(out) = PUP(from);
853
         *       len--;
854
         *     }
855
         *   }
856
         * } while (in < last && out < end);
857
         */
858
#undef nbytes_r
859
#define in_r %esi
860
        movl    len_r, %ecx
861
        rep     movsb
862
 
863
        movl    in(%esp), in_r          /* move in back to %esi, toss from */
864
        jmp     .L_while_test
865
 
866
#undef len_r
867
#undef dist_r
868
 
869
#endif /* NO_MMX || RUN_TIME_MMX */
870
 
871
 
872
/*** MMX code ***/
873
 
874
#if defined( USE_MMX ) || defined( RUN_TIME_MMX )
875
 
876
.align 32,0x90
877
.L_init_mmx:
878
        emms
879
 
880
#undef  bits_r
881
#undef  bitslong_r
882
#define bitslong_r %ebp
883
#define hold_mm    %mm0
884
        movd    %ebp, hold_mm
885
        movl    %ebx, bitslong_r
886
 
887
#define used_mm   %mm1
888
#define dmask2_mm %mm2
889
#define lmask2_mm %mm3
890
#define lmask_mm  %mm4
891
#define dmask_mm  %mm5
892
#define tmp_mm    %mm6
893
 
894
        movd    lmask(%esp), lmask_mm
895
        movq    lmask_mm, lmask2_mm
896
        movd    dmask(%esp), dmask_mm
897
        movq    dmask_mm, dmask2_mm
898
        pxor    used_mm, used_mm
899
        movl    lcode(%esp), %ebx       /* ebx = lcode */
900
        jmp     .L_do_loop_mmx
901
 
902
.align 32,0x90
903
.L_while_test_mmx:
904
        /* while (in < last && out < end)
905
         */
906
        cmpl    out_r, end(%esp)
907
        jbe     .L_break_loop           /* if (out >= end) */
908
 
909
        cmpl    in_r, last(%esp)
910
        jbe     .L_break_loop
911
 
912
.L_do_loop_mmx:
913
        psrlq   used_mm, hold_mm        /* hold_mm >>= last bit length */
914
 
915
        cmpl    $32, bitslong_r
916
        ja      .L_get_length_code_mmx  /* if (32 < bits) */
917
 
918
        movd    bitslong_r, tmp_mm
919
        movd    (in_r), %mm7
920
        addl    $4, in_r
921
        psllq   tmp_mm, %mm7
922
        addl    $32, bitslong_r
923
        por     %mm7, hold_mm           /* hold_mm |= *((uint *)in)++ << bits */
924
 
925
.L_get_length_code_mmx:
926
        pand    hold_mm, lmask_mm
927
        movd    lmask_mm, %eax
928
        movq    lmask2_mm, lmask_mm
929
        movl    (%ebx,%eax,4), %eax     /* eax = lcode[hold & lmask] */
930
 
931
.L_dolen_mmx:
932
        movzbl  %ah, %ecx               /* ecx = this.bits */
933
        movd    %ecx, used_mm
934
        subl    %ecx, bitslong_r        /* bits -= this.bits */
935
 
936
        testb   %al, %al
937
        jnz     .L_test_for_length_base_mmx /* if (op != 0) 45.7% */
938
 
939
        shrl    $16, %eax               /* output this.val char */
940
        stosb
941
        jmp     .L_while_test_mmx
942
 
943
.L_test_for_length_base_mmx:
944
#define len_r  %edx
945
        movl    %eax, len_r             /* len = this */
946
        shrl    $16, len_r              /* len = this.val */
947
 
948
        testb   $16, %al
949
        jz      .L_test_for_second_level_length_mmx /* if ((op & 16) == 0) 8% */
950
        andl    $15, %eax               /* op &= 15 */
951
        jz      .L_decode_distance_mmx  /* if (!op) */
952
 
953
        psrlq   used_mm, hold_mm        /* hold_mm >>= last bit length */
954
        movd    %eax, used_mm
955
        movd    hold_mm, %ecx
956
        subl    %eax, bitslong_r
957
        andl    .L_mask(,%eax,4), %ecx
958
        addl    %ecx, len_r             /* len += hold & mask[op] */
959
 
960
.L_decode_distance_mmx:
961
        psrlq   used_mm, hold_mm        /* hold_mm >>= last bit length */
962
 
963
        cmpl    $32, bitslong_r
964
        ja      .L_get_dist_code_mmx    /* if (32 < bits) */
965
 
966
        movd    bitslong_r, tmp_mm
967
        movd    (in_r), %mm7
968
        addl    $4, in_r
969
        psllq   tmp_mm, %mm7
970
        addl    $32, bitslong_r
971
        por     %mm7, hold_mm           /* hold_mm |= *((uint *)in)++ << bits */
972
 
973
.L_get_dist_code_mmx:
974
        movl    dcode(%esp), %ebx       /* ebx = dcode */
975
        pand    hold_mm, dmask_mm
976
        movd    dmask_mm, %eax
977
        movq    dmask2_mm, dmask_mm
978
        movl    (%ebx,%eax,4), %eax     /* eax = dcode[hold & lmask] */
979
 
980
.L_dodist_mmx:
981
#define dist_r %ebx
982
        movzbl  %ah, %ecx               /* ecx = this.bits */
983
        movl    %eax, dist_r
984
        shrl    $16, dist_r             /* dist  = this.val */
985
        subl    %ecx, bitslong_r        /* bits -= this.bits */
986
        movd    %ecx, used_mm
987
 
988
        testb   $16, %al                /* if ((op & 16) == 0) */
989
        jz      .L_test_for_second_level_dist_mmx
990
        andl    $15, %eax               /* op &= 15 */
991
        jz      .L_check_dist_one_mmx
992
 
993
.L_add_bits_to_dist_mmx:
994
        psrlq   used_mm, hold_mm        /* hold_mm >>= last bit length */
995
        movd    %eax, used_mm           /* save bit length of current op */
996
        movd    hold_mm, %ecx           /* get the next bits on input stream */
997
        subl    %eax, bitslong_r        /* bits -= op bits */
998
        andl    .L_mask(,%eax,4), %ecx  /* ecx   = hold & mask[op] */
999
        addl    %ecx, dist_r            /* dist += hold & mask[op] */
1000
 
1001
.L_check_window_mmx:
1002
        movl    in_r, in(%esp)          /* save in so from can use it's reg */
1003
        movl    out_r, %eax
1004
        subl    beg(%esp), %eax         /* nbytes = out - beg */
1005
 
1006
        cmpl    dist_r, %eax
1007
        jb      .L_clip_window_mmx      /* if (dist > nbytes) 4.2% */
1008
 
1009
        movl    len_r, %ecx
1010
        movl    out_r, from_r
1011
        subl    dist_r, from_r          /* from = out - dist */
1012
 
1013
        subl    $3, %ecx
1014
        movb    (from_r), %al
1015
        movb    %al, (out_r)
1016
        movb    1(from_r), %al
1017
        movb    2(from_r), %dl
1018
        addl    $3, from_r
1019
        movb    %al, 1(out_r)
1020
        movb    %dl, 2(out_r)
1021
        addl    $3, out_r
1022
        rep     movsb
1023
 
1024
        movl    in(%esp), in_r          /* move in back to %esi, toss from */
1025
        movl    lcode(%esp), %ebx       /* move lcode back to %ebx, toss dist */
1026
        jmp     .L_while_test_mmx
1027
 
1028
.align 16,0x90
1029
.L_check_dist_one_mmx:
1030
        cmpl    $1, dist_r
1031
        jne     .L_check_window_mmx
1032
        cmpl    out_r, beg(%esp)
1033
        je      .L_check_window_mmx
1034
 
1035
        decl    out_r
1036
        movl    len_r, %ecx
1037
        movb    (out_r), %al
1038
        subl    $3, %ecx
1039
 
1040
        movb    %al, 1(out_r)
1041
        movb    %al, 2(out_r)
1042
        movb    %al, 3(out_r)
1043
        addl    $4, out_r
1044
        rep     stosb
1045
 
1046
        movl    lcode(%esp), %ebx       /* move lcode back to %ebx, toss dist */
1047
        jmp     .L_while_test_mmx
1048
 
1049
.align 16,0x90
1050
.L_test_for_second_level_length_mmx:
1051
        testb   $64, %al
1052
        jnz     .L_test_for_end_of_block  /* if ((op & 64) != 0) */
1053
 
1054
        andl    $15, %eax
1055
        psrlq   used_mm, hold_mm        /* hold_mm >>= last bit length */
1056
        movd    hold_mm, %ecx
1057
        andl    .L_mask(,%eax,4), %ecx
1058
        addl    len_r, %ecx
1059
        movl    (%ebx,%ecx,4), %eax     /* eax = lcode[hold & lmask] */
1060
        jmp     .L_dolen_mmx
1061
 
1062
.align 16,0x90
1063
.L_test_for_second_level_dist_mmx:
1064
        testb   $64, %al
1065
        jnz     .L_invalid_distance_code  /* if ((op & 64) != 0) */
1066
 
1067
        andl    $15, %eax
1068
        psrlq   used_mm, hold_mm        /* hold_mm >>= last bit length */
1069
        movd    hold_mm, %ecx
1070
        andl    .L_mask(,%eax,4), %ecx
1071
        movl    dcode(%esp), %eax       /* ecx = dcode */
1072
        addl    dist_r, %ecx
1073
        movl    (%eax,%ecx,4), %eax     /* eax = lcode[hold & lmask] */
1074
        jmp     .L_dodist_mmx
1075
 
1076
.align 16,0x90
1077
.L_clip_window_mmx:
1078
#define nbytes_r %ecx
1079
        movl    %eax, nbytes_r
1080
        movl    wsize(%esp), %eax       /* prepare for dist compare */
1081
        negl    nbytes_r                /* nbytes = -nbytes */
1082
        movl    window(%esp), from_r    /* from = window */
1083
 
1084
        cmpl    dist_r, %eax
1085
        jb      .L_invalid_distance_too_far /* if (dist > wsize) */
1086
 
1087
        addl    dist_r, nbytes_r        /* nbytes = dist - nbytes */
1088
        cmpl    $0, write(%esp)
1089
        jne     .L_wrap_around_window_mmx /* if (write != 0) */
1090
 
1091
        subl    nbytes_r, %eax
1092
        addl    %eax, from_r            /* from += wsize - nbytes */
1093
 
1094
        cmpl    nbytes_r, len_r
1095
        jbe     .L_do_copy1_mmx         /* if (nbytes >= len) */
1096
 
1097
        subl    nbytes_r, len_r         /* len -= nbytes */
1098
        rep     movsb
1099
        movl    out_r, from_r
1100
        subl    dist_r, from_r          /* from = out - dist */
1101
        jmp     .L_do_copy1_mmx
1102
 
1103
        cmpl    nbytes_r, len_r
1104
        jbe     .L_do_copy1_mmx         /* if (nbytes >= len) */
1105
 
1106
        subl    nbytes_r, len_r         /* len -= nbytes */
1107
        rep     movsb
1108
        movl    out_r, from_r
1109
        subl    dist_r, from_r          /* from = out - dist */
1110
        jmp     .L_do_copy1_mmx
1111
 
1112
.L_wrap_around_window_mmx:
1113
#define write_r %eax
1114
        movl    write(%esp), write_r
1115
        cmpl    write_r, nbytes_r
1116
        jbe     .L_contiguous_in_window_mmx /* if (write >= nbytes) */
1117
 
1118
        addl    wsize(%esp), from_r
1119
        addl    write_r, from_r
1120
        subl    nbytes_r, from_r        /* from += wsize + write - nbytes */
1121
        subl    write_r, nbytes_r       /* nbytes -= write */
1122
#undef write_r
1123
 
1124
        cmpl    nbytes_r, len_r
1125
        jbe     .L_do_copy1_mmx         /* if (nbytes >= len) */
1126
 
1127
        subl    nbytes_r, len_r         /* len -= nbytes */
1128
        rep     movsb
1129
        movl    window(%esp), from_r    /* from = window */
1130
        movl    write(%esp), nbytes_r   /* nbytes = write */
1131
        cmpl    nbytes_r, len_r
1132
        jbe     .L_do_copy1_mmx         /* if (nbytes >= len) */
1133
 
1134
        subl    nbytes_r, len_r         /* len -= nbytes */
1135
        rep     movsb
1136
        movl    out_r, from_r
1137
        subl    dist_r, from_r          /* from = out - dist */
1138
        jmp     .L_do_copy1_mmx
1139
 
1140
.L_contiguous_in_window_mmx:
1141
#define write_r %eax
1142
        addl    write_r, from_r
1143
        subl    nbytes_r, from_r        /* from += write - nbytes */
1144
#undef write_r
1145
 
1146
        cmpl    nbytes_r, len_r
1147
        jbe     .L_do_copy1_mmx         /* if (nbytes >= len) */
1148
 
1149
        subl    nbytes_r, len_r         /* len -= nbytes */
1150
        rep     movsb
1151
        movl    out_r, from_r
1152
        subl    dist_r, from_r          /* from = out - dist */
1153
 
1154
.L_do_copy1_mmx:
1155
#undef nbytes_r
1156
#define in_r %esi
1157
        movl    len_r, %ecx
1158
        rep     movsb
1159
 
1160
        movl    in(%esp), in_r          /* move in back to %esi, toss from */
1161
        movl    lcode(%esp), %ebx       /* move lcode back to %ebx, toss dist */
1162
        jmp     .L_while_test_mmx
1163
 
1164
#undef hold_r
1165
#undef bitslong_r
1166
 
1167
#endif /* USE_MMX || RUN_TIME_MMX */
1168
 
1169
 
1170
/*** USE_MMX, NO_MMX, and RUNTIME_MMX from here on ***/
1171
 
1172
.L_invalid_distance_code:
1173
        /* else {
1174
         *   strm->msg = "invalid distance code";
1175
         *   state->mode = BAD;
1176
         * }
1177
         */
1178
        movl    $.L_invalid_distance_code_msg, %ecx
1179
        movl    $INFLATE_MODE_BAD, %edx
1180
        jmp     .L_update_stream_state
1181
 
1182
.L_test_for_end_of_block:
1183
        /* else if (op & 32) {
1184
         *   state->mode = TYPE;
1185
         *   break;
1186
         * }
1187
         */
1188
        testb   $32, %al
1189
        jz      .L_invalid_literal_length_code  /* if ((op & 32) == 0) */
1190
 
1191
        movl    $0, %ecx
1192
        movl    $INFLATE_MODE_TYPE, %edx
1193
        jmp     .L_update_stream_state
1194
 
1195
.L_invalid_literal_length_code:
1196
        /* else {
1197
         *   strm->msg = "invalid literal/length code";
1198
         *   state->mode = BAD;
1199
         * }
1200
         */
1201
        movl    $.L_invalid_literal_length_code_msg, %ecx
1202
        movl    $INFLATE_MODE_BAD, %edx
1203
        jmp     .L_update_stream_state
1204
 
1205
.L_invalid_distance_too_far:
1206
        /* strm->msg = "invalid distance too far back";
1207
         * state->mode = BAD;
1208
         */
1209
        movl    in(%esp), in_r          /* from_r has in's reg, put in back */
1210
        movl    $.L_invalid_distance_too_far_msg, %ecx
1211
        movl    $INFLATE_MODE_BAD, %edx
1212
        jmp     .L_update_stream_state
1213
 
1214
.L_update_stream_state:
1215
        /* set strm->msg = %ecx, strm->state->mode = %edx */
1216
        movl    strm_sp(%esp), %eax
1217
        testl   %ecx, %ecx              /* if (msg != NULL) */
1218
        jz      .L_skip_msg
1219
        movl    %ecx, msg_strm(%eax)    /* strm->msg = msg */
1220
.L_skip_msg:
1221
        movl    state_strm(%eax), %eax  /* state = strm->state */
1222
        movl    %edx, mode_state(%eax)  /* state->mode = edx (BAD | TYPE) */
1223
        jmp     .L_break_loop
1224
 
1225
.align 32,0x90
1226
.L_break_loop:
1227
 
1228
/*
1229
 * Regs:
1230
 *
1231
 * bits = %ebp when mmx, and in %ebx when non-mmx
1232
 * hold = %hold_mm when mmx, and in %ebp when non-mmx
1233
 * in   = %esi
1234
 * out  = %edi
1235
 */
1236
 
1237
#if defined( USE_MMX ) || defined( RUN_TIME_MMX )
1238
 
1239
#if defined( RUN_TIME_MMX )
1240
 
1241
        cmpl    $DO_USE_MMX, inflate_fast_use_mmx
1242
        jne     .L_update_next_in
1243
 
1244
#endif /* RUN_TIME_MMX */
1245
 
1246
        movl    %ebp, %ebx
1247
 
1248
.L_update_next_in:
1249
 
1250
#endif
1251
 
1252
#define strm_r  %eax
1253
#define state_r %edx
1254
 
1255
        /* len = bits >> 3;
1256
         * in -= len;
1257
         * bits -= len << 3;
1258
         * hold &= (1U << bits) - 1;
1259
         * state->hold = hold;
1260
         * state->bits = bits;
1261
         * strm->next_in = in;
1262
         * strm->next_out = out;
1263
         */
1264
        movl    strm_sp(%esp), strm_r
1265
        movl    %ebx, %ecx
1266
        movl    state_strm(strm_r), state_r
1267
        shrl    $3, %ecx
1268
        subl    %ecx, in_r
1269
        shll    $3, %ecx
1270
        subl    %ecx, %ebx
1271
        movl    out_r, next_out_strm(strm_r)
1272
        movl    %ebx, bits_state(state_r)
1273
        movl    %ebx, %ecx
1274
 
1275
        leal    buf(%esp), %ebx
1276
        cmpl    %ebx, last(%esp)
1277
        jne     .L_buf_not_used         /* if buf != last */
1278
 
1279
        subl    %ebx, in_r              /* in -= buf */
1280
        movl    next_in_strm(strm_r), %ebx
1281
        movl    %ebx, last(%esp)        /* last = strm->next_in */
1282
        addl    %ebx, in_r              /* in += strm->next_in */
1283
        movl    avail_in_strm(strm_r), %ebx
1284
        subl    $11, %ebx
1285
        addl    %ebx, last(%esp)    /* last = &strm->next_in[ avail_in - 11 ] */
1286
 
1287
.L_buf_not_used:
1288
        movl    in_r, next_in_strm(strm_r)
1289
 
1290
        movl    $1, %ebx
1291
        shll    %cl, %ebx
1292
        decl    %ebx
1293
 
1294
#if defined( USE_MMX ) || defined( RUN_TIME_MMX )
1295
 
1296
#if defined( RUN_TIME_MMX )
1297
 
1298
        cmpl    $DO_USE_MMX, inflate_fast_use_mmx
1299
        jne     .L_update_hold
1300
 
1301
#endif /* RUN_TIME_MMX */
1302
 
1303
        psrlq   used_mm, hold_mm        /* hold_mm >>= last bit length */
1304
        movd    hold_mm, %ebp
1305
 
1306
        emms
1307
 
1308
.L_update_hold:
1309
 
1310
#endif /* USE_MMX || RUN_TIME_MMX */
1311
 
1312
        andl    %ebx, %ebp
1313
        movl    %ebp, hold_state(state_r)
1314
 
1315
#define last_r %ebx
1316
 
1317
        /* strm->avail_in = in < last ? 11 + (last - in) : 11 - (in - last) */
1318
        movl    last(%esp), last_r
1319
        cmpl    in_r, last_r
1320
        jbe     .L_last_is_smaller     /* if (in >= last) */
1321
 
1322
        subl    in_r, last_r           /* last -= in */
1323
        addl    $11, last_r            /* last += 11 */
1324
        movl    last_r, avail_in_strm(strm_r)
1325
        jmp     .L_fixup_out
1326
.L_last_is_smaller:
1327
        subl    last_r, in_r           /* in -= last */
1328
        negl    in_r                   /* in = -in */
1329
        addl    $11, in_r              /* in += 11 */
1330
        movl    in_r, avail_in_strm(strm_r)
1331
 
1332
#undef last_r
1333
#define end_r %ebx
1334
 
1335
.L_fixup_out:
1336
        /* strm->avail_out = out < end ? 257 + (end - out) : 257 - (out - end)*/
1337
        movl    end(%esp), end_r
1338
        cmpl    out_r, end_r
1339
        jbe     .L_end_is_smaller      /* if (out >= end) */
1340
 
1341
        subl    out_r, end_r           /* end -= out */
1342
        addl    $257, end_r            /* end += 257 */
1343
        movl    end_r, avail_out_strm(strm_r)
1344
        jmp     .L_done
1345
.L_end_is_smaller:
1346
        subl    end_r, out_r           /* out -= end */
1347
        negl    out_r                  /* out = -out */
1348
        addl    $257, out_r            /* out += 257 */
1349
        movl    out_r, avail_out_strm(strm_r)
1350
 
1351
#undef end_r
1352
#undef strm_r
1353
#undef state_r
1354
 
1355
.L_done:
1356
        addl    $local_var_size, %esp
1357
        popf
1358
        popl    %ebx
1359
        popl    %ebp
1360
        popl    %esi
1361
        popl    %edi
1362
        ret
1363
 
1364
#if defined( GAS_ELF )
1365
/* elf info */
1366
.type inflate_fast,@function
1367
.size inflate_fast,.-inflate_fast
1368
#endif

powered by: WebSVN 2.1.0

© copyright 1999-2024 OpenCores.org, equivalent to Oliscience, all rights reserved. OpenCores®, registered trademark.