OpenCores
URL https://opencores.org/ocsvn/lzrw1-compressor-core/lzrw1-compressor-core/trunk

Subversion Repositories lzrw1-compressor-core

[/] [lzrw1-compressor-core/] [trunk/] [LZ77.java] - Blame information for rev 2

Details | Compare with Previous | View Log

Line No. Rev Author Line
1 2 habicht
/**************************************************************************************************************
2
*
3
*    L Z R W 1   E N C O D E R   C O R E
4
*
5
*  A high throughput loss less data compression core.
6
*
7
* Copyright 2012-2013   Lukas Schrittwieser (LS)
8
*
9
*    This program is free software: you can redistribute it and/or modify
10
*    it under the terms of the GNU General Public License as published by
11
*    the Free Software Foundation, either version 2 of the License, or
12
*    (at your option) any later version.
13
*
14
*    This program is distributed in the hope that it will be useful,
15
*    but WITHOUT ANY WARRANTY; without even the implied warranty of
16
*    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
17
*    GNU General Public License for more details.
18
*
19
*    You should have received a copy of the GNU General Public License
20
*    along with this program; if not, write to the Free Software
21
*    Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
22
*    Or see <http://www.gnu.org/licenses/>
23
*
24
***************************************************************************************************************
25
*
26
* Change Log:
27
*
28
* Version 1.0 - 2013/04/05 - LS
29
*   released
30
*
31
***************************************************************************************************************
32
*
33
* This program implements the same algorithm as the core written in VHDL. Moreover it can decode (decompress)
34
* data compressed by the core (using its testbench for example).
35
*
36
***************************************************************************************************************/
37
 
38
import java.io.BufferedOutputStream;
39
import java.io.BufferedInputStream;
40
import java.io.BufferedReader;
41
import java.io.InputStreamReader;
42
import java.io.FileReader;
43
import java.io.FileOutputStream;
44
import java.io.FileInputStream;
45
import java.io.File;
46
import java.io.IOException;
47
 
48
class LZ77
49
{
50
        final static boolean DEBUG = false;             // set this to true to get verbose output about the algorithm
51
        // configure core properties
52
        final static int HISTORY_LEN = 4096;    // buffer length in bytes 
53
        final static int LOOK_AHEAD_LEN = 16;
54
        final static int HASH_BIT_LEN = 11;     // number of bits for the index in the hash table
55
 
56
        /* configer the number of items (offset-length-tuples or literals) per frame */
57
        public final int ITEMS_PER_FRAME = 8;
58
 
59
        public static void main (String args[])
60
        {
61
                LZ77 compressor = new LZ77();
62
                File dest = null;
63
 
64
                if (args.length < 2)
65
                {
66
                        System.err.println("Usage: LZ77 (encode|decode) in-file [out-file]");
67
                        System.err.println("Usage: LZ77 verify compressed-file clear-text-verify-file");
68
                        return;
69
                }
70
 
71
                if (args.length >= 3)
72
                        dest = new File(args[2]);
73
 
74
                try
75
                {
76
                        if (args[0].compareTo("encode") == 0)
77
                                compressor.encode(new File(args[1]), dest);
78
                        else if (args[0].compareTo("decode") == 0)
79
                                compressor.decode(new File(args[1]), dest, null);
80
                        else if (args[0].compareTo("verify") == 0)
81
                                compressor.decode(new File(args[1]), null, dest);
82
                        else
83
                        {
84
                                System.err.println("Usage: LZ77 (encode|decode) in-file [out-file]");
85
                                System.err.println("Usage: LZ77 verify compressed-file clear-text-verify-file");
86
                        }
87
                }
88
                catch(Exception e)
89
                {
90
                        System.err.println("compression created exception: "+e);
91
                        e.printStackTrace();
92
                }
93
        }
94
 
95
        public LZ77 ()
96
        {
97
                // allocate a buffer and fill it with \0
98
                buffer = new int[HISTORY_LEN];
99
                for (int i=0; i<buffer.length; i++)
100
                        buffer[i] = ' ';
101
 
102
                lookAheadBuf = new int[LOOK_AHEAD_LEN];
103
                lookAheadLen = 0;
104
                hashTbl = new int[(int)Math.ceil(Math.pow(2,HASH_BIT_LEN))];    // does not have to be initialized
105
 
106
                histWrInd = 0;
107
 
108
                frameBuffer = new int[ITEMS_PER_FRAME*2];
109
                frameBufferIndex = 0;
110
 
111
        }
112
 
113
        // compress a file
114
        void encode (File source, File destination) throws IOException
115
        {
116
                BufferedReader br = new BufferedReader(new FileReader(source));
117
                BufferedOutputStream bw = null;
118
                if (destination != null)
119
                        bw = new BufferedOutputStream(new FileOutputStream(destination));
120
 
121
                int [] cand = new int[lookAheadBuf.length];
122
                //FileReader br = new FileReader(f);
123
                int ch = 0;
124
                int h;
125
                int byteCnt = 0; // for debug purposes (better error messages)
126
                supressCnt = 0;
127
                histWrInd = 0;
128
                frameItemCnt = 0;
129
                frameBufferIndex = 0;
130
                validBufferLen = 0;
131
 
132
                // fill the lookahead buffer with data
133
                do
134
                {
135
                        ch = br.read();
136
                        if (ch != -1)
137
                        {
138
                                //buffer[histWrInd++] = ch;
139
                                lookAheadBuf[lookAheadLen] = ch;
140
                                lookAheadLen++;
141
                                insertIntoHistBuf(ch);  // Note: the front the of the history buffer holds a copy of the lookahead buffer
142
                        }
143
                } while ((ch != -1) && (lookAheadLen < lookAheadBuf.length-1));
144
 
145
                while ((ch != -1) || (lookAheadLen > 0))
146
                {
147
 
148
                        ch = br.read();
149
                        if (ch != -1)
150
                        {
151
                                // enter this char into the stream history buffer
152
                                insertIntoHistBuf(ch);
153
                                // store it in look ahead shift register
154
                                lookAheadBuf[lookAheadLen] = ch;
155
                                lookAheadLen++;
156
                        }
157
                        if (DEBUG)
158
                                System.out.println("lookAheadLen: "+lookAheadLen);
159
 
160
                        byteCnt++;
161
                        if (DEBUG)
162
                                System.out.println("processing byte number "+byteCnt);
163
                        dbgPrint();
164
 
165
                        // get the hash for the current look ahead buffer
166
                        h = hash(lookAheadBuf[0], lookAheadBuf[1], lookAheadBuf[2]);
167
                        // get match candidate index from table
168
                        int cInd = hashTbl[h];
169
                        // save new pointer in hash table
170
                        int x = histWrInd - lookAheadLen;       //
171
                        if (x < 0)
172
                                x += buffer.length;
173
                        hashTbl[h] = x;
174
                        //System.out.println("h: "+h+"  cInd: "+cInd+"   histWrInd: "+histWrInd);
175
                        // load the candidate from the stream history buffer
176
                        for (int i=0; i<lookAheadLen; i++)
177
                        {
178
                                int ind = cInd + i;
179
                                if (ind >= buffer.length)
180
                                        ind -= buffer.length;
181
                                cand[i] = buffer[ind];
182
                        }
183
                        if (DEBUG)
184
                        {
185
                                System.out.print("candidate: <");
186
                                for (int i=0; i<lookAheadLen; i++)
187
                                {
188
                                        if (cand[i] != '\n')
189
                                                System.out.print(((char)cand[i]));
190
                                        else
191
                                                System.out.print("\\n");
192
                                }
193
                                System.out.println(">  len:"+lookAheadLen);
194
                        }
195
 
196
                        int mLen = checkCandidate(cand);
197
 
198
                        // the hardware has a limitation on the candidate length depending on the candidate address, we model this here
199
                        int maxLen=0;
200
                        if (lookAheadLen == 16)
201
                                maxLen = lookAheadLen - (cInd&0x03);
202
                        else
203
                                maxLen = lookAheadLen;
204
 
205
                        if (mLen > maxLen)
206
                                mLen = maxLen;
207
                        if (DEBUG)
208
                                System.out.println("mLen: "+mLen+"     maxLen: "+maxLen);
209
 
210
                        int lit = lookAheadBuf[0];
211
                        shiftLookAhead(1);
212
                        lookAheadLen--;
213
 
214
                        if (supressCnt > 0)
215
                        {
216
                                // this byte has already been encoded. Discard it
217
                                supressCnt--;
218
                                if (DEBUG)
219
                                        System.out.println("data output supressed");
220
                                continue;
221
                        }
222
 
223
                        if (mLen < 3)
224
                        {
225
                                // we have no mtach, simply output a literal
226
                                output(bw, 0,1,lit);
227
                                continue;
228
                        }
229
 
230
                        // we have a match, calculate the offset
231
                        if (DEBUG)
232
                        {
233
                                System.out.println("wrInd: "+histWrInd+"    lookAheadLen: "+lookAheadLen+ "   cInd: "+cInd);
234
                        }
235
                        int offset = histWrInd - (lookAheadLen+1) - cInd - 1;
236
                        if (offset < 0)
237
                                offset += buffer.length ;
238
                        // check offset
239
                        if (offset >= (validBufferLen))
240
                        {
241
                                System.out.println("offset "+offset+" is too old");
242
                                // this match is too old and therefore not valid (we have already overwritten this data with the lookAhead data)
243
                                output(bw, 0,1,lit);
244
                                continue;
245
                        }
246
 
247
                        if (offset < 0)
248
                        {
249
                                System.out.println("offset "+offset+" is negative!");
250
                                output(bw, 0,1,lit);
251
                                continue;
252
                        }
253
                        output(bw, offset, mLen-1, 0); // take length -1 to save one bit (we can't have length 0 but 16 becomes 15)
254
                        supressCnt = mLen-1;    // supress output for the next mLen-1 bytes (as we already encoded them)
255
                }
256
                br.close();
257
 
258
                // create 'end of data' flag
259
                output(bw, 0, 0, 0);
260
 
261
                if (bw != null)
262
                        bw.close();
263
        }
264
 
265
 
266
        void decode (File source, File destination, File verify) throws IOException
267
        {
268
                BufferedInputStream bi = new BufferedInputStream(new FileInputStream(source));
269
                BufferedInputStream verifyStream = null;
270
                BufferedOutputStream bo = null;
271
                if (destination != null)
272
                        bo = new BufferedOutputStream(new FileOutputStream(destination));
273
                if (verify != null)
274
                        verifyStream = new BufferedInputStream(new FileInputStream(verify));
275
 
276
                int ch = 0;
277
                int flags = 0;
278
                int highByte = 0;
279
                boolean isPair = false;
280
                histWrInd = 0;
281
                frameItemCnt = 0;
282
                boolean verifyFailed = false;
283
 
284
                // we keep a statistic of the match length values we encounter during decoding
285
                int [] mLenCnt = new int[16];
286
                for (int i=0; i<mLenCnt.length; i++)
287
                        mLenCnt[0] = 0;
288
 
289
                while (ch != -1)
290
                {
291
                        ch = bi.read();
292
 
293
                        if (ch == -1)
294
                        {
295
                                System.err.println("corrupt file: EOF without an end of data marker");
296
                                break;
297
                        }
298
 
299
                        if (isPair)
300
                        {
301
                                // this is the second byte of a length-distance pair, processes it
302
                                isPair = false;
303
                                int len = (highByte>>4) & 0x0f;
304
                                int offset = ((highByte<<8)&0x0f00) + (ch&0xff);
305
                                // increment the counter for this match length
306
                                mLenCnt[len]++;
307
 
308
                                // check for the end of data signal
309
                                if ((len==0) && (offset==0))
310
                                {
311
                                        if (DEBUG)
312
                                                System.out.println("found end of data");
313
                                        while((ch=bi.read()) != -1)
314
                                        {
315
                                                System.err.printf("found extra byte: '%c'=%02x\n",ch,ch);
316
                                                verifyFailed = true;
317
                                        }
318
                                        break;
319
                                }
320
                                if (DEBUG)
321
                                        System.out.print("copy o="+offset+" l="+len+": ");
322
                                for (int i=0; i<=len; i++)
323
                                {
324
                                        // load the byte from the history buffer
325
                                        int ind = histWrInd-offset-1;
326
                                        if (ind < 0)
327
                                                ind += HISTORY_LEN;
328
                                        if (ind < 0)
329
                                        {
330
                                                // index is still zero, this is error in the encoded data
331
                                                System.err.println("Error in encoded data, offset is too long");
332
                                                verifyFailed = true;
333
                                                return;
334
                                        }
335
                                        ch = buffer[ind];
336
 
337
                                        if (DEBUG)
338
                                                System.out.printf(" '%c'=%x",ch,ch);
339
                                        if (bo != null)
340
                                                bo.write(ch);
341
                                        // save this byte in the history buffer
342
                                        buffer[histWrInd++] = ch;
343
                                        if (histWrInd >= buffer.length)
344
                                                histWrInd = 0;
345
 
346
                                        // check with verify file
347
                                        if (verifyStream != null)
348
                                        {
349
                                                int vCh = verifyStream.read();
350
                                                if (vCh < 0)
351
                                                {
352
                                                        System.err.printf("verify failed: decoded '%c'=%02x,  verify file is empty\n", ch, ch);
353
                                                        verifyFailed = true;
354
                                                }
355
                                                else if (vCh != ch)
356
                                                {
357
                                                        System.err.printf("verify failed: decoded '%c'=%02x,  expected '%c'=%02x\n", ch, ch, vCh, vCh);
358
                                                        verifyFailed = true;
359
                                                }
360
                                        }
361
                                }
362
                                if (DEBUG)
363
                                        System.out.println(" ");
364
 
365
                                continue;
366
                        }
367
 
368
                        // check whether we have a new header
369
                        if (frameItemCnt==0)
370
                        {
371
                                flags = ch;
372
                                if (DEBUG)
373
                                        System.out.printf("found header: %02x\n",ch);
374
                                frameItemCnt = 8;
375
                                continue;
376
                        }
377
 
378
                        if ((flags&0x01) == 1)
379
                        {
380
                                // this is a length-offset pair, save first byte for later processing
381
                                highByte = ch;
382
                                isPair = true;
383
                        }
384
                        else
385
                        {
386
                                isPair = false;
387
                                // this is a literal
388
                                if (DEBUG)
389
                                        System.out.printf("lit: '%c'=%02x\n",ch,ch);
390
                                if (bo != null)
391
                                        bo.write(ch);
392
                                // count this as a literal in the frequency counter for the statistics
393
                                mLenCnt[0]++;
394
                                // save this byte in the history buffer
395
                                buffer[histWrInd++] = ch;
396
                                if (histWrInd >= buffer.length)
397
                                        histWrInd = 0;
398
                                // check with verify file
399
                                if (verifyStream != null)
400
                                {
401
                                        int vCh = verifyStream.read();
402
                                        if (vCh < 0)
403
                                        {
404
                                                System.err.printf("verify failed: decoded '%c'=%02x,  verify file is empty\n", ch, ch);
405
                                                verifyFailed = true;
406
                                        }
407
                                        else if (vCh != ch)
408
                                        {
409
                                                System.err.printf("verify failed: decoded '%c'=%02x,  expected '%c'=%02x\n", ch, ch, vCh, vCh);
410
                                                verifyFailed = true;
411
                                        }
412
                                }
413
                        }
414
                        // shift header for next byte
415
                        frameItemCnt--;
416
                        flags >>= 1;
417
                }
418
 
419
                bi.close();
420
 
421
                if (bo != null)
422
                        bo.close();
423
 
424
                // print some statistics
425
                System.out.println("Percentages of different match lengths:");
426
                int total = 0;
427
                for (int i=0; i<mLenCnt.length; i++)
428
                        total += mLenCnt[i];
429
                for (int i=0; i<mLenCnt.length; i++)
430
                        System.out.println("match len "+(i+1)+": \t"+(mLenCnt[i]/(float)total));
431
 
432
                if (verifyStream != null)
433
                {
434
                        while((ch=verifyStream.read()) != -1)
435
                        {
436
                                verifyFailed = true;
437
                                System.err.printf("found extra byte in verify file: '%c'=%02x\n",ch,ch);
438
                        }
439
                        verifyStream.close();
440
                        if (verifyFailed)
441
                                System.err.println("verify failed");
442
                        else
443
                                System.err.println("verify passed");
444
                }
445
 
446
 
447
        }
448
 
449
        private void insertIntoHistBuf (int ch)
450
        {
451
                buffer[histWrInd++] = ch;
452
                if (histWrInd >= buffer.length)
453
                        histWrInd = 0;
454
                // count the number of valid bytes in the histroy buffer
455
                if (validBufferLen < (HISTORY_LEN - LOOK_AHEAD_LEN))
456
                        validBufferLen++;
457
        }
458
 
459
        /* checks a candidate to look ahead buffer
460
           returns the length of the match in bytes (0 means no match)*/
461
        private int checkCandidate(int [] cand)
462
        {
463
                int i;
464
                for (i=0; i<lookAheadLen; i++)
465
                {
466
                        if (cand[i] != lookAheadBuf[i])
467
                                return i;
468
                }
469
                return lookAheadLen;    // all chars matched
470
        }
471
 
472
        // look ahead is implemented as shift to left register
473
        private void shiftLookAhead(int cnt)
474
        {
475
                for (int i=0; i<(LOOK_AHEAD_LEN-cnt); i++)
476
                        lookAheadBuf[i] = lookAheadBuf[i+cnt];
477
        }
478
 
479
        private int hash(int k0, int k1, int k2)
480
        {
481
                int mask = 0;
482
                for (int i=0; i<HASH_BIT_LEN; i++)
483
                {
484
                        mask <<= 1;
485
                        mask |= 1;
486
                }
487
                //System.out.println("mask: "+mask);
488
                // original hash function as used by LZRW
489
                return ( (40543*((((k0<<4)^k1)<<4)^k2)) >>4 ) & mask;
490
        }
491
 
492
        private void dbgPrint()
493
        {
494
                if (DEBUG)
495
                {
496
                        int ind = histWrInd;
497
                        System.out.print('|');
498
                        for (int i=0; i<buffer.length; i++)
499
                        {
500
                                char c = (char)buffer[ind++];
501
                                if (c == '\n')
502
                                        System.out.print("\\n");
503
                                else
504
                                        System.out.print(c);
505
                        //      System.out.print(" ");
506
                                if (ind >= buffer.length)
507
                                        ind = 0;
508
                        }
509
                        System.out.print("|   |");
510
                        for (int i=0; i<lookAheadLen; i++)
511
                        {
512
                                char c = (char)lookAheadBuf[i];
513
                                if (c == '\n')
514
                                        System.out.print("\\n");
515
                                else
516
                                        System.out.print(c);
517
                        //      System.out.print(" ");
518
                        }
519
                        System.out.println("|");
520
                }
521
        }
522
 
523
        private void output (BufferedOutputStream bw, int offset, int len, int ch) throws IOException
524
        {
525
                if (DEBUG)
526
                {
527
                        if (len<=1)
528
                        {
529
                                System.out.print("lit: '");
530
                                if (ch == '\n')
531
                                        System.out.print("\\n");
532
                                else
533
                                        System.out.print((char)ch);
534
                                System.out.println("'");
535
 
536
                        }
537
                        else
538
                                System.out.println("off: "+offset+"\t  len:"+len);
539
                        if (len == 0)
540
                                System.out.println("creating EOD");
541
                }
542
 
543
                if (bw != null)
544
                {
545
                        assert(len < 16);
546
                        assert(len >= 0);
547
                        assert(ch >= 0);
548
                        assert(ch <= 255);
549
 
550
                        if (len == 1)
551
                        {
552
                                // output a string literal
553
                                header &= (~(1<<frameItemCnt));         // clear flag bit in header to indicate a literal
554
                                frameBuffer[frameBufferIndex++] = ch; // copy literal to buffer
555
                        }
556
 
557
                        if (len >= 2)
558
                        {
559
                                header |= (1<<frameItemCnt);         // set flag bit in header to indicate a pair
560
                                int code = (len<<12) | offset;
561
                                // we use big endian encoding
562
                                frameBuffer[frameBufferIndex++] = (code>>8) & 0xff;
563
                                frameBuffer[frameBufferIndex++] = (code & 0xff);
564
                        }
565
 
566
                        if (len == 0)
567
                        {
568
                                // create an end of data code (pair with length of zero)
569
                                header |= (1<<frameItemCnt);         // set flag bit in header to indicate a pair
570
                                frameBuffer[frameBufferIndex++] = 0;
571
                                frameBuffer[frameBufferIndex++] = 0;
572
                        }
573
 
574
                        frameItemCnt++;
575
                        //if (DEBUG)
576
                        //      System.out.println("frame item cnt: "+frameItemCnt);
577
 
578
                        // send the buffer if the frame is full or if we have to send an end of data signal
579
                        if ((frameItemCnt==8) || (len==0))
580
                        {
581
                                header &= 0xff;
582
                                bw.write(header);
583
                                for (int i=0; i<frameBufferIndex; i++)
584
                                        bw.write(frameBuffer[i] & 0xff);
585
                                frameBufferIndex = 0;
586
                                frameItemCnt = 0;
587
                                /*header = 0;
588
                                bw.flush();
589
                                BufferedReader in = new BufferedReader(new InputStreamReader(System.in));
590
                                in.readLine();*/
591
                        }
592
                }
593
        }
594
 
595
        int [] buffer;
596
        int validBufferLen;
597
        int [] hashTbl;
598
        int [] lookAheadBuf;
599
        int lookAheadLen;
600
        int histWrInd;
601
        int supressCnt;
602
        int header;
603
        int frameItemCnt;
604
        int [] frameBuffer;
605
        int frameBufferIndex;
606
 
607
 
608
}
609
 
610
 

powered by: WebSVN 2.1.0

© copyright 1999-2024 OpenCores.org, equivalent to Oliscience, all rights reserved. OpenCores®, registered trademark.