OpenCores
URL https://opencores.org/ocsvn/or1k/or1k/trunk

Subversion Repositories or1k

[/] [or1k/] [trunk/] [linux/] [linux-2.4/] [drivers/] [char/] [ftape/] [compressor/] [lzrw3.c] - Blame information for rev 1275

Go to most recent revision | Details | Compare with Previous | View Log

Line No. Rev Author Line
1 1275 phoenix
/*
2
 * $Source: /home/marcus/revision_ctrl_test/oc_cvs/cvs/or1k/linux/linux-2.4/drivers/char/ftape/compressor/lzrw3.c,v $
3
 * $Revision: 1.1.1.1 $
4
 * $Date: 2004-04-15 02:02:24 $
5
 *
6
 * Implementation of Ross Williams lzrw3 algorithm. Adaption for zftape.
7
 *
8
 */
9
 
10
#include "../compressor/lzrw3.h"       /* Defines single exported function "compress".   */
11
 
12
/******************************************************************************/
13
/*                                                                            */
14
/*                                    LZRW3.C                                 */
15
/*                                                                            */
16
/******************************************************************************/
17
/*                                                                            */
18
/* Author  : Ross Williams.                                                   */
19
/* Date    : 30-Jun-1991.                                                     */
20
/* Release : 1.                                                               */
21
/*                                                                            */
22
/******************************************************************************/
23
/*                                                                            */
24
/* This file contains an implementation of the LZRW3 data compression         */
25
/* algorithm in C.                                                            */
26
/*                                                                            */
27
/* The algorithm is a general purpose compression algorithm that runs fast    */
28
/* and gives reasonable compression. The algorithm is a member of the Lempel  */
29
/* Ziv family of algorithms and bases its compression on the presence in the  */
30
/* data of repeated substrings.                                               */
31
/*                                                                            */
32
/* This algorithm is unpatented and the code is public domain. As the         */
33
/* algorithm is based on the LZ77 class of algorithms, it is unlikely to be   */
34
/* the subject of a patent challenge.                                         */
35
/*                                                                            */
36
/* Unlike the LZRW1 and LZRW1-A algorithms, the LZRW3 algorithm is            */
37
/* deterministic and is guaranteed to yield the same compressed               */
38
/* representation for a given file each time it is run.                       */
39
/*                                                                            */
40
/* The LZRW3 algorithm was originally designed and implemented                */
41
/* by Ross Williams on 31-Dec-1990.                                           */
42
/*                                                                            */
43
/* Here are the results of applying this code, compiled under THINK C 4.0     */
44
/* and running on a Mac-SE (8MHz 68000), to the standard calgary corpus.      */
45
/*                                                                            */
46
/*    +----------------------------------------------------------------+      */
47
/*    | DATA COMPRESSION TEST                                          |      */
48
/*    | =====================                                          |      */
49
/*    | Time of run     : Sun 30-Jun-1991 09:31PM                      |      */
50
/*    | Timing accuracy : One part in 100                              |      */
51
/*    | Context length  : 262144 bytes (= 256.0000K)                   |      */
52
/*    | Test suite      : Calgary Corpus Suite                         |      */
53
/*    | Files in suite  : 14                                           |      */
54
/*    | Algorithm       : LZRW3                                        |      */
55
/*    | Note: All averages are calculated from the un-rounded values.  |      */
56
/*    +----------------------------------------------------------------+      */
57
/*    | File Name   Length  CxB  ComLen  %Remn  Bits  Com K/s  Dec K/s |      */
58
/*    | ----------  ------  ---  ------  -----  ----  -------  ------- |      */
59
/*    | rpus:Bib.D  111261    1   55033   49.5  3.96    19.46    32.27 |      */
60
/*    | us:Book1.D  768771    3  467962   60.9  4.87    17.03    31.07 |      */
61
/*    | us:Book2.D  610856    3  317102   51.9  4.15    19.39    34.15 |      */
62
/*    | rpus:Geo.D  102400    1   82424   80.5  6.44    11.65    18.18 |      */
63
/*    | pus:News.D  377109    2  205670   54.5  4.36    17.14    27.47 |      */
64
/*    | pus:Obj1.D   21504    1   13027   60.6  4.85    13.40    18.95 |      */
65
/*    | pus:Obj2.D  246814    1  116286   47.1  3.77    19.31    30.10 |      */
66
/*    | s:Paper1.D   53161    1   27522   51.8  4.14    18.60    31.15 |      */
67
/*    | s:Paper2.D   82199    1   45160   54.9  4.40    18.45    32.84 |      */
68
/*    | rpus:Pic.D  513216    2  122388   23.8  1.91    35.29    51.05 |      */
69
/*    | us:Progc.D   39611    1   19669   49.7  3.97    18.87    30.64 |      */
70
/*    | us:Progl.D   71646    1   28247   39.4  3.15    24.34    40.66 |      */
71
/*    | us:Progp.D   49379    1   19377   39.2  3.14    23.91    39.23 |      */
72
/*    | us:Trans.D   93695    1   33481   35.7  2.86    25.48    40.37 |      */
73
/*    +----------------------------------------------------------------+      */
74
/*    | Average     224401    1  110953   50.0  4.00    20.17    32.72 |      */
75
/*    +----------------------------------------------------------------+      */
76
/*                                                                            */
77
/******************************************************************************/
78
 
79
/******************************************************************************/
80
 
81
/* The following structure is returned by the "compress" function below when  */
82
/* the user asks the function to return identifying information.              */
83
/* The most important field in the record is the working memory field which   */
84
/* tells the calling program how much working memory should be passed to      */
85
/* "compress" when it is called to perform a compression or decompression.    */
86
/* LZRW3 uses the same amount of memory during compression and decompression. */
87
/* For more information on this structure see "compress.h".                   */
88
 
89
#define U(X)            ((ULONG) X)
90
#define SIZE_P_BYTE     (U(sizeof(UBYTE *)))
91
#define SIZE_WORD       (U(sizeof(UWORD  )))
92
#define ALIGNMENT_FUDGE (U(16))
93
#define MEM_REQ ( U(4096)*(SIZE_P_BYTE) + ALIGNMENT_FUDGE )
94
 
95
static struct compress_identity identity =
96
{
97
 U(0x032DDEA8),                           /* Algorithm identification number. */
98
 MEM_REQ,                                 /* Working memory (bytes) required. */
99
 "LZRW3",                                 /* Name of algorithm.               */
100
 "1.0",                                   /* Version number of algorithm.     */
101
 "31-Dec-1990",                           /* Date of algorithm.               */
102
 "Public Domain",                         /* Copyright notice.                */
103
 "Ross N. Williams",                      /* Author of algorithm.             */
104
 "Renaissance Software",                  /* Affiliation of author.           */
105
 "Public Domain"                          /* Vendor of algorithm.             */
106
};
107
 
108
LOCAL void compress_compress  (UBYTE *,UBYTE *,ULONG,UBYTE *, LONG *);
109
LOCAL void compress_decompress(UBYTE *,UBYTE *,LONG, UBYTE *, ULONG *);
110
 
111
/******************************************************************************/
112
 
113
/* This function is the only function exported by this module.                */
114
/* Depending on its first parameter, the function can be requested to         */
115
/* compress a block of memory, decompress a block of memory, or to identify   */
116
/* itself. For more information, see the specification file "compress.h".     */
117
 
118
EXPORT void lzrw3_compress(action,wrk_mem,src_adr,src_len,dst_adr,p_dst_len)
119
UWORD     action;      /* Action to be performed.                             */
120
UBYTE   *wrk_mem;      /* Address of working memory we can use.               */
121
UBYTE   *src_adr;      /* Address of input data.                              */
122
LONG     src_len;      /* Length  of input data.                              */
123
UBYTE   *dst_adr;      /* Address to put output data.                         */
124
void  *p_dst_len;      /* Address of longword for length of output data.      */
125
{
126
 switch (action)
127
   {
128
    case COMPRESS_ACTION_IDENTITY:
129
       *((struct compress_identity **)p_dst_len)= &identity;
130
       break;
131
    case COMPRESS_ACTION_COMPRESS:
132
       compress_compress(wrk_mem,src_adr,src_len,dst_adr,(LONG *)p_dst_len);
133
       break;
134
    case COMPRESS_ACTION_DECOMPRESS:
135
       compress_decompress(wrk_mem,src_adr,src_len,dst_adr,(LONG *)p_dst_len);
136
       break;
137
   }
138
}
139
 
140
/******************************************************************************/
141
/*                                                                            */
142
/* BRIEF DESCRIPTION OF THE LZRW3 ALGORITHM                                   */
143
/* ========================================                                   */
144
/* The LZRW3 algorithm is identical to the LZRW1-A algorithm except that      */
145
/* instead of transmitting history offsets, it transmits hash table indexes.  */
146
/* In order to decode the indexes, the decompressor must maintain an          */
147
/* identical hash table. Copy items are straightforward:when the decompressor */
148
/* receives a copy item, it simply looks up the hash table to translate the   */
149
/* index into a pointer into the data already decompressed. To update the     */
150
/* hash table, it replaces the same table entry with a pointer to the start   */
151
/* of the newly decoded phrase. The tricky part is with literal items, for at */
152
/* the time that the decompressor receives a literal item the decompressor    */
153
/* does not have the three bytes in the Ziv (that the compressor has) to      */
154
/* perform the three-byte hash. To solve this problem, in LZRW3, both the     */
155
/* compressor and decompressor are wired up so that they "buffer" these       */
156
/* literals and update their hash tables only when three bytes are available. */
157
/* This makes the maximum buffering 2 bytes.                                  */
158
/*                                                                            */
159
/* Replacement of offsets by hash table indexes yields a few percent extra    */
160
/* compression at the cost of some speed. LZRW3 is slower than LZRW1, LZRW1-A */
161
/* and LZRW2, but yields better compression.                                  */
162
/*                                                                            */
163
/* Extra compression could be obtained by using a hash table of depth two.    */
164
/* However, increasing the depth above one incurs a significant decrease in   */
165
/* compression speed which was not considered worthwhile. Another reason for  */
166
/* keeping the depth down to one was to allow easy comparison with the        */
167
/* LZRW1-A and LZRW2 algorithms so as to demonstrate the exact effect of the  */
168
/* use of direct hash indexes.                                                */
169
/*                                                                            */
170
/*                                  +---+                                     */
171
/*                                  |___|4095                                 */
172
/*                                  |___|                                     */
173
/*              +---------------------*_|<---+   /----+---\                   */
174
/*              |                   |___|    +---|Hash    |                   */
175
/*              |                   |___|        |Function|                   */
176
/*              |                   |___|        \--------/                   */
177
/*              |                   |___|0            ^                       */
178
/*              |                   +---+             |                       */
179
/*              |                   Hash        +-----+                       */
180
/*              |                   Table       |                             */
181
/*              |                              ---                            */
182
/*              v                              ^^^                            */
183
/*      +-------------------------------------|----------------+              */
184
/*      ||||||||||||||||||||||||||||||||||||||||||||||||||||||||              */
185
/*      +-------------------------------------|----------------+              */
186
/*      |                                     |1......18|      |              */
187
/*      |<------- Lempel=History ------------>|<--Ziv-->|      |              */
188
/*      |     (=bytes already processed)      |<-Still to go-->|              */
189
/*      |<-------------------- INPUT BLOCK ------------------->|              */
190
/*                                                                            */
191
/* The diagram above for LZRW3 looks almost identical to the diagram for      */
192
/* LZRW1. The difference is that in LZRW3, the compressor transmits hash      */
193
/* table indices instead of Lempel offsets. For this to work, the             */
194
/* decompressor must maintain a hash table as well as the compressor and both */
195
/* compressor and decompressor must "buffer" literals, as the decompressor    */
196
/* cannot hash phrases commencing with a literal until another two bytes have */
197
/* arrived.                                                                   */
198
/*                                                                            */
199
/*  LZRW3 Algorithm Execution Summary                                         */
200
/*  ---------------------------------                                         */
201
/*  1. Hash the first three bytes of the Ziv to yield a hash table index h.   */
202
/*  2. Look up the hash table yielding history pointer p.                     */
203
/*  3. Match where p points with the Ziv. If there is a match of three or     */
204
/*     more bytes, code those bytes (in the Ziv) as a copy item, otherwise    */
205
/*     code the next byte in the Ziv as a literal item.                       */
206
/*  4. Update the hash table as possible subject to the constraint that only  */
207
/*     phrases commencing three bytes back from the Ziv can be hashed and     */
208
/*     entered into the hash table. (This enables the decompressor to keep    */
209
/*     pace). See the description and code for more details.                  */
210
/*                                                                            */
211
/******************************************************************************/
212
/*                                                                            */
213
/*                     DEFINITION OF COMPRESSED FILE FORMAT                   */
214
/*                     ====================================                   */
215
/*  * A compressed file consists of a COPY FLAG followed by a REMAINDER.      */
216
/*  * The copy flag CF uses up four bytes with the first byte being the       */
217
/*    least significant.                                                      */
218
/*  * If CF=1, then the compressed file represents the remainder of the file  */
219
/*    exactly. Otherwise CF=0 and the remainder of the file consists of zero  */
220
/*    or more GROUPS, each of which represents one or more bytes.             */
221
/*  * Each group consists of two bytes of CONTROL information followed by     */
222
/*    sixteen ITEMs except for the last group which can contain from one      */
223
/*    to sixteen items.                                                       */
224
/*  * An item can be either a LITERAL item or a COPY item.                    */
225
/*  * Each item corresponds to a bit in the control bytes.                    */
226
/*  * The first control byte corresponds to the first 8 items in the group    */
227
/*    with bit 0 corresponding to the first item in the group and bit 7 to    */
228
/*    the eighth item in the group.                                           */
229
/*  * The second control byte corresponds to the second 8 items in the group  */
230
/*    with bit 0 corresponding to the ninth item in the group and bit 7 to    */
231
/*    the sixteenth item in the group.                                        */
232
/*  * A zero bit in a control word means that the corresponding item is a     */
233
/*    literal item. A one bit corresponds to a copy item.                     */
234
/*  * A literal item consists of a single byte which represents itself.       */
235
/*  * A copy item consists of two bytes that represent from 3 to 18 bytes.    */
236
/*  * The first  byte in a copy item will be denoted C1.                      */
237
/*  * The second byte in a copy item will be denoted C2.                      */
238
/*  * Bits will be selected using square brackets.                            */
239
/*    For example: C1[0..3] is the low nibble of the first control byte.      */
240
/*    of copy item C1.                                                        */
241
/*  * The LENGTH of a copy item is defined to be C1[0..3]+3 which is a number */
242
/*    in the range [3,18].                                                    */
243
/*  * The INDEX of a copy item is defined to be C1[4..7]*256+C2[0..8] which   */
244
/*    is a number in the range [0,4095].                                      */
245
/*  * A copy item represents the sequence of bytes                            */
246
/*       text[POS-OFFSET..POS-OFFSET+LENGTH-1] where                          */
247
/*          text   is the entire text of the uncompressed string.             */
248
/*          POS    is the index in the text of the character following the    */
249
/*                   string represented by all the items preceeding the item  */
250
/*                   being defined.                                           */
251
/*          OFFSET is obtained from INDEX by looking up the hash table.       */
252
/*                                                                            */
253
/******************************************************************************/
254
 
255
/* The following #define defines the length of the copy flag that appears at  */
256
/* the start of the compressed file. The value of four bytes was chosen       */
257
/* because the fast_copy routine on my Macintosh runs faster if the source    */
258
/* and destination blocks are relatively longword aligned.                    */
259
/* The actual flag data appears in the first byte. The rest are zeroed so as  */
260
/* to normalize the compressed representation (i.e. not non-deterministic).   */
261
#define FLAG_BYTES 4
262
 
263
/* The following #defines define the meaning of the values of the copy        */
264
/* flag at the start of the compressed file.                                  */
265
#define FLAG_COMPRESS 0     /* Signals that output was result of compression. */
266
#define FLAG_COPY     1     /* Signals that output was simply copied over.    */
267
 
268
/* The 68000 microprocessor (on which this algorithm was originally developed */
269
/* is fussy about non-aligned arrays of words. To avoid these problems the    */
270
/* following macro can be used to "waste" from 0 to 3 bytes so as to align    */
271
/* the argument pointer.                                                      */
272
#define ULONG_ALIGN_UP(X) ((((ULONG)X)+sizeof(ULONG)-1)&~(sizeof(ULONG)-1))
273
 
274
 
275
/* The following constant defines the maximum length of an uncompressed item. */
276
/* This definition must not be changed; its value is hardwired into the code. */
277
/* The longest number of bytes that can be spanned by a single item is 18     */
278
/* for the longest copy item.                                                 */
279
#define MAX_RAW_ITEM (18)
280
 
281
/* The following constant defines the maximum length of an uncompressed group.*/
282
/* This definition must not be changed; its value is hardwired into the code. */
283
/* A group contains at most 16 items which explains this definition.          */
284
#define MAX_RAW_GROUP (16*MAX_RAW_ITEM)
285
 
286
/* The following constant defines the maximum length of a compressed group.   */
287
/* This definition must not be changed; its value is hardwired into the code. */
288
/* A compressed group consists of two control bytes followed by up to 16      */
289
/* compressed items each of which can have a maximum length of two bytes.     */
290
#define MAX_CMP_GROUP (2+16*2)
291
 
292
/* The following constant defines the number of entries in the hash table.    */
293
/* This definition must not be changed; its value is hardwired into the code. */
294
#define HASH_TABLE_LENGTH (4096)
295
 
296
/* LZRW3, unlike LZRW1(-A), must initialize its hash table so as to enable    */
297
/* the compressor and decompressor to stay in step maintaining identical hash */
298
/* tables. In an early version of the algorithm, the tables were simply       */
299
/* initialized to zero and a check for zero was included just before the      */
300
/* matching code. However, this test costs time. A better solution is to      */
301
/* initialize all the entries in the hash table to point to a constant        */
302
/* string. The decompressor does the same. This solution requires no extra    */
303
/* test. The contents of the string do not matter so long as the string is    */
304
/* the same for the compressor and decompressor and contains at least         */
305
/* MAX_RAW_ITEM bytes. I chose consecutive decimal digits because they do not */
306
/* have white space problems (e.g. there is no chance that the compiler will  */
307
/* replace more than one space by a TAB) and because they make the length of  */
308
/* the string obvious by inspection.                                          */
309
#define START_STRING_18 ((UBYTE *) "123456789012345678")
310
 
311
/* In this algorithm, hash values have to be calculated at more than one      */
312
/* point. The following macro neatens the code up for this.                   */
313
#define HASH(PTR) \
314
   (((40543*(((*(PTR))<<8)^((*((PTR)+1))<<4)^(*((PTR)+2))))>>4) & 0xFFF)
315
 
316
/******************************************************************************/
317
 
318
LOCAL void compress_compress
319
           (p_wrk_mem,p_src_first,src_len,p_dst_first,p_dst_len)
320
/* Input  : Hand over the required amount of working memory in p_wrk_mem.     */
321
/* Input  : Specify input block using p_src_first and src_len.                */
322
/* Input  : Point p_dst_first to the start of the output zone (OZ).           */
323
/* Input  : Point p_dst_len to a ULONG to receive the output length.          */
324
/* Input  : Input block and output zone must not overlap.                     */
325
/* Output : Length of output block written to *p_dst_len.                     */
326
/* Output : Output block in Mem[p_dst_first..p_dst_first+*p_dst_len-1]. May   */
327
/* Output : write in OZ=Mem[p_dst_first..p_dst_first+src_len+MAX_CMP_GROUP-1].*/
328
/* Output : Upon completion guaranteed *p_dst_len<=src_len+FLAG_BYTES.        */
329
UBYTE *p_wrk_mem;
330
UBYTE *p_src_first;
331
ULONG  src_len;
332
UBYTE *p_dst_first;
333
LONG  *p_dst_len;
334
{
335
 /* p_src and p_dst step through the source and destination blocks.           */
336
 register UBYTE *p_src = p_src_first;
337
 register UBYTE *p_dst = p_dst_first;
338
 
339
 /* The following variables are never modified and are used in the            */
340
 /* calculations that determine when the main loop terminates.                */
341
 UBYTE *p_src_post  = p_src_first+src_len;
342
 UBYTE *p_dst_post  = p_dst_first+src_len;
343
 UBYTE *p_src_max1  = p_src_first+src_len-MAX_RAW_ITEM;
344
 UBYTE *p_src_max16 = p_src_first+src_len-MAX_RAW_ITEM*16;
345
 
346
 /* The variables 'p_control' and 'control' are used to buffer control bits.  */
347
 /* Before each group is processed, the next two bytes of the output block    */
348
 /* are set aside for the control word for the group about to be processed.   */
349
 /* 'p_control' is set to point to the first byte of that word. Meanwhile,    */
350
 /* 'control' buffers the control bits being generated during the processing  */
351
 /* of the group. Instead of having a counter to keep track of how many items */
352
 /* have been processed (=the number of bits in the control word), at the     */
353
 /* start of each group, the top word of 'control' is filled with 1 bits.     */
354
 /* As 'control' is shifted for each item, the 1 bits in the top word are     */
355
 /* absorbed or destroyed. When they all run out (i.e. when the top word is   */
356
 /* all zero bits, we know that we are at the end of a group.                 */
357
# define TOPWORD 0xFFFF0000
358
 UBYTE *p_control;
359
 register ULONG control=TOPWORD;
360
 
361
 /* THe variable 'hash' always points to the first element of the hash table. */
362
 UBYTE **hash= (UBYTE **)  ULONG_ALIGN_UP(p_wrk_mem);
363
 
364
 /* The following two variables represent the literal buffer. p_h1 points to  */
365
 /* the hash table entry corresponding to the youngest literal. p_h2 points   */
366
 /* to the hash table entry corresponding to the second youngest literal.     */
367
 /* Note: p_h1=0=>p_h2=0 because zero values denote absence of a pending      */
368
 /* literal. The variables are initialized to zero meaning an empty "buffer". */
369
 UBYTE **p_h1=0;
370
 UBYTE **p_h2=0;
371
 
372
 /* To start, we write the flag bytes. Being optimistic, we set the flag to   */
373
 /* FLAG_COMPRESS. The remaining flag bytes are zeroed so as to keep the      */
374
 /* algorithm deterministic.                                                  */
375
 *p_dst++=FLAG_COMPRESS;
376
 {UWORD i; for (i=2;i<=FLAG_BYTES;i++) *p_dst++=0;}
377
 
378
 /* Reserve the first word of output as the control word for the first group. */
379
 /* Note: This is undone at the end if the input block is empty.              */
380
 p_control=p_dst; p_dst+=2;
381
 
382
 /* Initialize all elements of the hash table to point to a constant string.  */
383
 /* Use of an unrolled loop speeds this up considerably.                      */
384
 {UWORD i; UBYTE **p_h=hash;
385
#  define ZH *p_h++=START_STRING_18
386
  for (i=0;i<256;i++)     /* 256=HASH_TABLE_LENGTH/16. */
387
    {ZH;ZH;ZH;ZH;
388
     ZH;ZH;ZH;ZH;
389
     ZH;ZH;ZH;ZH;
390
     ZH;ZH;ZH;ZH;}
391
 }
392
 
393
 /* The main loop processes either 1 or 16 items per iteration. As its        */
394
 /* termination logic is complicated, I have opted for an infinite loop       */
395
 /* structure containing 'break' and 'goto' statements.                       */
396
 while (TRUE)
397
   {/* Begin main processing loop. */
398
 
399
    /* Note: All the variables here except unroll should be defined within    */
400
    /*       the inner loop. Unfortunately the loop hasn't got a block.       */
401
     register UBYTE *p;         /* Scans through targ phrase during matching. */
402
     register UBYTE *p_ziv= NULL ;     /* Points to first byte of current Ziv.       */
403
     register UWORD unroll;     /* Loop counter for unrolled inner loop.      */
404
     register UWORD index;      /* Index of current hash table entry.         */
405
     register UBYTE **p_h0 = NULL ;     /* Pointer to current hash table entry.       */
406
 
407
    /* Test for overrun and jump to overrun code if necessary.                */
408
    if (p_dst>p_dst_post)
409
       goto overrun;
410
 
411
    /* The following cascade of if statements efficiently catches and deals   */
412
    /* with varying degrees of closeness to the end of the input block.       */
413
    /* When we get very close to the end, we stop updating the table and      */
414
    /* code the remaining bytes as literals. This makes the code simpler.     */
415
    unroll=16;
416
    if (p_src>p_src_max16)
417
      {
418
       unroll=1;
419
       if (p_src>p_src_max1)
420
         {
421
          if (p_src==p_src_post)
422
             break;
423
          else
424
             goto literal;
425
         }
426
      }
427
 
428
    /* This inner unrolled loop processes 'unroll' (whose value is either 1   */
429
    /* or 16) items. I have chosen to implement this loop with labels and     */
430
    /* gotos to heighten the ease with which the loop may be implemented with */
431
    /* a single decrement and branch instruction in assembly language and     */
432
    /* also because the labels act as highly readable place markers.          */
433
    /* (Also because we jump into the loop for endgame literals (see above)). */
434
 
435
    begin_unrolled_loop:
436
 
437
       /* To process the next phrase, we hash the next three bytes and use    */
438
       /* the resultant hash table index to look up the hash table. A pointer */
439
       /* to the entry is stored in p_h0 so as to avoid an array lookup. The  */
440
       /* hash table entry *p_h0 is looked up yielding a pointer p to a       */
441
       /* potential match of the Ziv in the history.                          */
442
       index=HASH(p_src);
443
       p_h0=&hash[index];
444
       p=*p_h0;
445
 
446
       /* Having looked up the candidate position, we are in a position to    */
447
       /* attempt a match. The match loop has been unrolled using the PS      */
448
       /* macro so that failure within the first three bytes automatically    */
449
       /* results in the literal branch being taken. The coding is simple.    */
450
       /* p_ziv saves p_src so we can let p_src wander.                       */
451
#       define PS *p++!=*p_src++
452
       p_ziv=p_src;
453
       if (PS || PS || PS)
454
         {
455
          /* Literal. */
456
 
457
          /* Code the literal byte as itself and a zero control bit.          */
458
          p_src=p_ziv; literal: *p_dst++=*p_src++; control&=0xFFFEFFFF;
459
 
460
          /* We have just coded a literal. If we had two pending ones, that   */
461
          /* makes three and we can update the hash table.                    */
462
          if (p_h2!=0)
463
             {*p_h2=p_ziv-2;}
464
 
465
          /* In any case, rotate the hash table pointers for next time. */
466
          p_h2=p_h1; p_h1=p_h0;
467
 
468
         }
469
       else
470
         {
471
          /* Copy */
472
 
473
          /* Match up to 15 remaining bytes using an unrolled loop and code. */
474
#if 0
475
          PS || PS || PS || PS || PS || PS || PS || PS ||
476
          PS || PS || PS || PS || PS || PS || PS || p_src++;
477
#else     
478
          if (
479
               !( PS || PS || PS || PS || PS || PS || PS || PS ||
480
                  PS || PS || PS || PS || PS || PS || PS )
481
             ) p_src++;
482
#endif
483
          *p_dst++=((index&0xF00)>>4)|(--p_src-p_ziv-3);
484
          *p_dst++=index&0xFF;
485
 
486
          /* As we have just coded three bytes, we are now in a position to   */
487
          /* update the hash table with the literal bytes that were pending   */
488
          /* upon the arrival of extra context bytes.                         */
489
          if (p_h1!=0)
490
            {
491
             if (p_h2!=0)
492
               {*p_h2=p_ziv-2; p_h2=0;}
493
             *p_h1=p_ziv-1; p_h1=0;
494
            }
495
 
496
          /* In any case, we can update the hash table based on the current   */
497
          /* position as we just coded at least three bytes in a copy items.  */
498
          *p_h0=p_ziv;
499
 
500
         }
501
       control>>=1;
502
 
503
       /* This loop is all set up for a decrement and jump instruction! */
504
#ifndef linux
505
`    end_unrolled_loop: if (--unroll) goto begin_unrolled_loop;
506
#else
507
    /* end_unrolled_loop: */ if (--unroll) goto begin_unrolled_loop;
508
#endif
509
 
510
    /* At this point it will nearly always be the end of a group in which     */
511
    /* case, we have to do some control-word processing. However, near the    */
512
    /* end of the input block, the inner unrolled loop is only executed once. */
513
    /* This necessitates the 'if' test.                                       */
514
    if ((control&TOPWORD)==0)
515
      {
516
       /* Write the control word to the place we saved for it in the output. */
517
       *p_control++=  control     &0xFF;
518
       *p_control  = (control>>8) &0xFF;
519
 
520
       /* Reserve the next word in the output block for the control word */
521
       /* for the group about to be processed.                           */
522
       p_control=p_dst; p_dst+=2;
523
 
524
       /* Reset the control bits buffer. */
525
       control=TOPWORD;
526
      }
527
 
528
   } /* End main processing loop. */
529
 
530
 /* After the main processing loop has executed, all the input bytes have     */
531
 /* been processed. However, the control word has still to be written to the  */
532
 /* word reserved for it in the output at the start of the most recent group. */
533
 /* Before writing, the control word has to be shifted so that all the bits   */
534
 /* are in the right place. The "empty" bit positions are filled with 1s      */
535
 /* which partially fill the top word.                                        */
536
 while(control&TOPWORD) control>>=1;
537
 *p_control++= control     &0xFF;
538
 *p_control++=(control>>8) &0xFF;
539
 
540
 /* If the last group contained no items, delete the control word too.        */
541
 if (p_control==p_dst) p_dst-=2;
542
 
543
 /* Write the length of the output block to the dst_len parameter and return. */
544
 *p_dst_len=p_dst-p_dst_first;
545
 return;
546
 
547
 /* Jump here as soon as an overrun is detected. An overrun is defined to     */
548
 /* have occurred if p_dst>p_dst_first+src_len. That is, the moment the       */
549
 /* length of the output written so far exceeds the length of the input block.*/
550
 /* The algorithm checks for overruns at least at the end of each group       */
551
 /* which means that the maximum overrun is MAX_CMP_GROUP bytes.              */
552
 /* Once an overrun occurs, the only thing to do is to set the copy flag and  */
553
 /* copy the input over.                                                      */
554
 overrun:
555
#if 0
556
 *p_dst_first=FLAG_COPY;
557
 fast_copy(p_src_first,p_dst_first+FLAG_BYTES,src_len);
558
 *p_dst_len=src_len+FLAG_BYTES;
559
#else
560
 fast_copy(p_src_first,p_dst_first,src_len);
561
 *p_dst_len= -src_len; /* return a negative number to indicate uncompressed data */
562
#endif
563
}
564
 
565
/******************************************************************************/
566
 
567
LOCAL void compress_decompress
568
           (p_wrk_mem,p_src_first,src_len,p_dst_first,p_dst_len)
569
/* Input  : Hand over the required amount of working memory in p_wrk_mem.     */
570
/* Input  : Specify input block using p_src_first and src_len.                */
571
/* Input  : Point p_dst_first to the start of the output zone.                */
572
/* Input  : Point p_dst_len to a ULONG to receive the output length.          */
573
/* Input  : Input block and output zone must not overlap. User knows          */
574
/* Input  : upperbound on output block length from earlier compression.       */
575
/* Input  : In any case, maximum expansion possible is nine times.            */
576
/* Output : Length of output block written to *p_dst_len.                     */
577
/* Output : Output block in Mem[p_dst_first..p_dst_first+*p_dst_len-1].       */
578
/* Output : Writes only  in Mem[p_dst_first..p_dst_first+*p_dst_len-1].       */
579
UBYTE *p_wrk_mem;
580
UBYTE *p_src_first;
581
LONG   src_len;
582
UBYTE *p_dst_first;
583
ULONG *p_dst_len;
584
{
585
 /* Byte pointers p_src and p_dst scan through the input and output blocks.   */
586
 register UBYTE *p_src = p_src_first+FLAG_BYTES;
587
 register UBYTE *p_dst = p_dst_first;
588
 /* we need to avoid a SEGV when trying to uncompress corrupt data */
589
 register UBYTE *p_dst_post = p_dst_first + *p_dst_len;
590
 
591
 /* The following two variables are never modified and are used to control    */
592
 /* the main loop.                                                            */
593
 UBYTE *p_src_post  = p_src_first+src_len;
594
 UBYTE *p_src_max16 = p_src_first+src_len-(MAX_CMP_GROUP-2);
595
 
596
 /* The hash table is the only resident of the working memory. The hash table */
597
 /* contains HASH_TABLE_LENGTH=4096 pointers to positions in the history. To  */
598
 /* keep Macintoshes happy, it is longword aligned.                           */
599
 UBYTE **hash = (UBYTE **) ULONG_ALIGN_UP(p_wrk_mem);
600
 
601
 /* The variable 'control' is used to buffer the control bits which appear in */
602
 /* groups of 16 bits (control words) at the start of each compressed group.  */
603
 /* When each group is read, bit 16 of the register is set to one. Whenever   */
604
 /* a new bit is needed, the register is shifted right. When the value of the */
605
 /* register becomes 1, we know that we have reached the end of a group.      */
606
 /* Initializing the register to 1 thus instructs the code to follow that it  */
607
 /* should read a new control word immediately.                               */
608
 register ULONG control=1;
609
 
610
 /* The value of 'literals' is always in the range 0..3. It is the number of  */
611
 /* consecutive literal items just seen. We have to record this number so as  */
612
 /* to know when to update the hash table. When literals gets to 3, there     */
613
 /* have been three consecutive literals and we can update at the position of */
614
 /* the oldest of the three.                                                  */
615
 register UWORD literals=0;
616
 
617
 /* Check the leading copy flag to see if the compressor chose to use a copy  */
618
 /* operation instead of a compression operation. If a copy operation was     */
619
 /* used, then all we need to do is copy the data over, set the output length */
620
 /* and return.                                                               */
621
#if 0
622
 if (*p_src_first==FLAG_COPY)
623
   {
624
    fast_copy(p_src_first+FLAG_BYTES,p_dst_first,src_len-FLAG_BYTES);
625
    *p_dst_len=src_len-FLAG_BYTES;
626
    return;
627
   }
628
#else
629
  if ( src_len < 0 )
630
  {
631
   fast_copy(p_src_first,p_dst_first,-src_len );
632
   *p_dst_len = (ULONG)-src_len;
633
   return;
634
  }
635
#endif
636
 
637
 /* Initialize all elements of the hash table to point to a constant string.  */
638
 /* Use of an unrolled loop speeds this up considerably.                      */
639
 {UWORD i; UBYTE **p_h=hash;
640
#  define ZJ *p_h++=START_STRING_18
641
  for (i=0;i<256;i++)     /* 256=HASH_TABLE_LENGTH/16. */
642
    {ZJ;ZJ;ZJ;ZJ;
643
     ZJ;ZJ;ZJ;ZJ;
644
     ZJ;ZJ;ZJ;ZJ;
645
     ZJ;ZJ;ZJ;ZJ;}
646
 }
647
 
648
 /* The outer loop processes either 1 or 16 items per iteration depending on  */
649
 /* how close p_src is to the end of the input block.                         */
650
 while (p_src!=p_src_post)
651
   {/* Start of outer loop */
652
 
653
    register UWORD unroll;   /* Counts unrolled loop executions.              */
654
 
655
    /* When 'control' has the value 1, it means that the 16 buffered control  */
656
    /* bits that were read in at the start of the current group have all been */
657
    /* shifted out and that all that is left is the 1 bit that was injected   */
658
    /* into bit 16 at the start of the current group. When we reach the end   */
659
    /* of a group, we have to load a new control word and inject a new 1 bit. */
660
    if (control==1)
661
      {
662
       control=0x10000|*p_src++;
663
       control|=(*p_src++)<<8;
664
      }
665
 
666
    /* If it is possible that we are within 16 groups from the end of the     */
667
    /* input, execute the unrolled loop only once, else process a whole group */
668
    /* of 16 items by looping 16 times.                                       */
669
    unroll= p_src<=p_src_max16 ? 16 : 1;
670
 
671
    /* This inner loop processes one phrase (item) per iteration. */
672
    while (unroll--)
673
      { /* Begin unrolled inner loop. */
674
 
675
       /* Process a literal or copy item depending on the next control bit. */
676
       if (control&1)
677
         {
678
          /* Copy item. */
679
 
680
          register UBYTE *p;           /* Points to place from which to copy. */
681
          register UWORD lenmt;        /* Length of copy item minus three.    */
682
          register UBYTE **p_hte;      /* Pointer to current hash table entry.*/
683
          register UBYTE *p_ziv=p_dst; /* Pointer to start of current Ziv.    */
684
 
685
          /* Read and dismantle the copy word. Work out from where to copy.   */
686
          lenmt=*p_src++;
687
          p_hte=&hash[((lenmt&0xF0)<<4)|*p_src++];
688
          p=*p_hte;
689
          lenmt&=0xF;
690
 
691
          /* Now perform the copy using a half unrolled loop. */
692
          *p_dst++=*p++;
693
          *p_dst++=*p++;
694
          *p_dst++=*p++;
695
          while (lenmt--)
696
             *p_dst++=*p++;
697
 
698
          /* Because we have just received 3 or more bytes in a copy item     */
699
          /* (whose bytes we have just installed in the output), we are now   */
700
          /* in a position to flush all the pending literal hashings that had */
701
          /* been postponed for lack of bytes.                                */
702
          if (literals>0)
703
            {
704
             register UBYTE *r=p_ziv-literals;;
705
             hash[HASH(r)]=r;
706
             if (literals==2)
707
                {r++; hash[HASH(r)]=r;}
708
             literals=0;
709
            }
710
 
711
          /* In any case, we can immediately update the hash table with the   */
712
          /* current position. We don't need to do a HASH(...) to work out    */
713
          /* where to put the pointer, as the compressor just told us!!!      */
714
          *p_hte=p_ziv;
715
 
716
         }
717
       else
718
         {
719
          /* Literal item. */
720
 
721
          /* Copy over the literal byte. */
722
          *p_dst++=*p_src++;
723
 
724
          /* If we now have three literals waiting to be hashed into the hash */
725
          /* table, we can do one of them now (because there are three).      */
726
          if (++literals == 3)
727
             {register UBYTE *p=p_dst-3; hash[HASH(p)]=p; literals=2;}
728
         }
729
 
730
       /* Shift the control buffer so the next control bit is in bit 0. */
731
       control>>=1;
732
#if 1
733
       if (p_dst > p_dst_post)
734
       {
735
               /* Shit: we tried to decompress corrupt data */
736
               *p_dst_len = 0;
737
               return;
738
       }
739
#endif
740
      } /* End unrolled inner loop. */
741
 
742
   } /* End of outer loop */
743
 
744
 /* Write the length of the decompressed data before returning. */
745
  *p_dst_len=p_dst-p_dst_first;
746
}
747
 
748
/******************************************************************************/
749
/*                               End of LZRW3.C                               */
750
/******************************************************************************/

powered by: WebSVN 2.1.0

© copyright 1999-2025 OpenCores.org, equivalent to Oliscience, all rights reserved. OpenCores®, registered trademark.