OpenCores
URL https://opencores.org/ocsvn/openrisc/openrisc/trunk

Subversion Repositories openrisc

[/] [openrisc/] [trunk/] [gnu-dev/] [or1k-gcc/] [libgo/] [go/] [compress/] [flate/] [huffman_bit_writer.go] - Blame information for rev 747

Details | Compare with Previous | View Log

Line No. Rev Author Line
1 747 jeremybenn
// Copyright 2009 The Go Authors. All rights reserved.
2
// Use of this source code is governed by a BSD-style
3
// license that can be found in the LICENSE file.
4
 
5
package flate
6
 
7
import (
8
        "io"
9
        "math"
10
        "strconv"
11
)
12
 
13
const (
14
        // The largest offset code.
15
        offsetCodeCount = 30
16
 
17
        // The special code used to mark the end of a block.
18
        endBlockMarker = 256
19
 
20
        // The first length code.
21
        lengthCodesStart = 257
22
 
23
        // The number of codegen codes.
24
        codegenCodeCount = 19
25
        badCode          = 255
26
)
27
 
28
// The number of extra bits needed by length code X - LENGTH_CODES_START.
29
var lengthExtraBits = []int8{
30
        /* 257 */ 0, 0, 0,
31
        /* 260 */ 0, 0, 0, 0, 0, 1, 1, 1, 1, 2,
32
        /* 270 */ 2, 2, 2, 3, 3, 3, 3, 4, 4, 4,
33
        /* 280 */ 4, 5, 5, 5, 5, 0,
34
}
35
 
36
// The length indicated by length code X - LENGTH_CODES_START.
37
var lengthBase = []uint32{
38
        0, 1, 2, 3, 4, 5, 6, 7, 8, 10,
39
        12, 14, 16, 20, 24, 28, 32, 40, 48, 56,
40
        64, 80, 96, 112, 128, 160, 192, 224, 255,
41
}
42
 
43
// offset code word extra bits.
44
var offsetExtraBits = []int8{
45
        0, 0, 0, 0, 1, 1, 2, 2, 3, 3,
46
        4, 4, 5, 5, 6, 6, 7, 7, 8, 8,
47
        9, 9, 10, 10, 11, 11, 12, 12, 13, 13,
48
        /* extended window */
49
        14, 14, 15, 15, 16, 16, 17, 17, 18, 18, 19, 19, 20, 20,
50
}
51
 
52
var offsetBase = []uint32{
53
        /* normal deflate */
54
        0x000000, 0x000001, 0x000002, 0x000003, 0x000004,
55
        0x000006, 0x000008, 0x00000c, 0x000010, 0x000018,
56
        0x000020, 0x000030, 0x000040, 0x000060, 0x000080,
57
        0x0000c0, 0x000100, 0x000180, 0x000200, 0x000300,
58
        0x000400, 0x000600, 0x000800, 0x000c00, 0x001000,
59
        0x001800, 0x002000, 0x003000, 0x004000, 0x006000,
60
 
61
        /* extended window */
62
        0x008000, 0x00c000, 0x010000, 0x018000, 0x020000,
63
        0x030000, 0x040000, 0x060000, 0x080000, 0x0c0000,
64
        0x100000, 0x180000, 0x200000, 0x300000,
65
}
66
 
67
// The odd order in which the codegen code sizes are written.
68
var codegenOrder = []uint32{16, 17, 18, 0, 8, 7, 9, 6, 10, 5, 11, 4, 12, 3, 13, 2, 14, 1, 15}
69
 
70
type huffmanBitWriter struct {
71
        w io.Writer
72
        // Data waiting to be written is bytes[0:nbytes]
73
        // and then the low nbits of bits.
74
        bits            uint32
75
        nbits           uint32
76
        bytes           [64]byte
77
        nbytes          int
78
        literalFreq     []int32
79
        offsetFreq      []int32
80
        codegen         []uint8
81
        codegenFreq     []int32
82
        literalEncoding *huffmanEncoder
83
        offsetEncoding  *huffmanEncoder
84
        codegenEncoding *huffmanEncoder
85
        err             error
86
}
87
 
88
type WrongValueError struct {
89
        name  string
90
        from  int32
91
        to    int32
92
        value int32
93
}
94
 
95
func newHuffmanBitWriter(w io.Writer) *huffmanBitWriter {
96
        return &huffmanBitWriter{
97
                w:               w,
98
                literalFreq:     make([]int32, maxLit),
99
                offsetFreq:      make([]int32, offsetCodeCount),
100
                codegen:         make([]uint8, maxLit+offsetCodeCount+1),
101
                codegenFreq:     make([]int32, codegenCodeCount),
102
                literalEncoding: newHuffmanEncoder(maxLit),
103
                offsetEncoding:  newHuffmanEncoder(offsetCodeCount),
104
                codegenEncoding: newHuffmanEncoder(codegenCodeCount),
105
        }
106
}
107
 
108
func (err WrongValueError) Error() string {
109
        return "huffmanBitWriter: " + err.name + " should belong to [" + strconv.FormatInt(int64(err.from), 10) + ";" +
110
                strconv.FormatInt(int64(err.to), 10) + "] but actual value is " + strconv.FormatInt(int64(err.value), 10)
111
}
112
 
113
func (w *huffmanBitWriter) flushBits() {
114
        if w.err != nil {
115
                w.nbits = 0
116
                return
117
        }
118
        bits := w.bits
119
        w.bits >>= 16
120
        w.nbits -= 16
121
        n := w.nbytes
122
        w.bytes[n] = byte(bits)
123
        w.bytes[n+1] = byte(bits >> 8)
124
        if n += 2; n >= len(w.bytes) {
125
                _, w.err = w.w.Write(w.bytes[0:])
126
                n = 0
127
        }
128
        w.nbytes = n
129
}
130
 
131
func (w *huffmanBitWriter) flush() {
132
        if w.err != nil {
133
                w.nbits = 0
134
                return
135
        }
136
        n := w.nbytes
137
        if w.nbits > 8 {
138
                w.bytes[n] = byte(w.bits)
139
                w.bits >>= 8
140
                w.nbits -= 8
141
                n++
142
        }
143
        if w.nbits > 0 {
144
                w.bytes[n] = byte(w.bits)
145
                w.nbits = 0
146
                n++
147
        }
148
        w.bits = 0
149
        _, w.err = w.w.Write(w.bytes[0:n])
150
        w.nbytes = 0
151
}
152
 
153
func (w *huffmanBitWriter) writeBits(b, nb int32) {
154
        w.bits |= uint32(b) << w.nbits
155
        if w.nbits += uint32(nb); w.nbits >= 16 {
156
                w.flushBits()
157
        }
158
}
159
 
160
func (w *huffmanBitWriter) writeBytes(bytes []byte) {
161
        if w.err != nil {
162
                return
163
        }
164
        n := w.nbytes
165
        if w.nbits == 8 {
166
                w.bytes[n] = byte(w.bits)
167
                w.nbits = 0
168
                n++
169
        }
170
        if w.nbits != 0 {
171
                w.err = InternalError("writeBytes with unfinished bits")
172
                return
173
        }
174
        if n != 0 {
175
                _, w.err = w.w.Write(w.bytes[0:n])
176
                if w.err != nil {
177
                        return
178
                }
179
        }
180
        w.nbytes = 0
181
        _, w.err = w.w.Write(bytes)
182
}
183
 
184
// RFC 1951 3.2.7 specifies a special run-length encoding for specifying
185
// the literal and offset lengths arrays (which are concatenated into a single
186
// array).  This method generates that run-length encoding.
187
//
188
// The result is written into the codegen array, and the frequencies
189
// of each code is written into the codegenFreq array.
190
// Codes 0-15 are single byte codes. Codes 16-18 are followed by additional
191
// information.  Code badCode is an end marker
192
//
193
//  numLiterals      The number of literals in literalEncoding
194
//  numOffsets       The number of offsets in offsetEncoding
195
func (w *huffmanBitWriter) generateCodegen(numLiterals int, numOffsets int) {
196
        for i := range w.codegenFreq {
197
                w.codegenFreq[i] = 0
198
        }
199
        // Note that we are using codegen both as a temporary variable for holding
200
        // a copy of the frequencies, and as the place where we put the result.
201
        // This is fine because the output is always shorter than the input used
202
        // so far.
203
        codegen := w.codegen // cache
204
        // Copy the concatenated code sizes to codegen.  Put a marker at the end.
205
        copy(codegen[0:numLiterals], w.literalEncoding.codeBits)
206
        copy(codegen[numLiterals:numLiterals+numOffsets], w.offsetEncoding.codeBits)
207
        codegen[numLiterals+numOffsets] = badCode
208
 
209
        size := codegen[0]
210
        count := 1
211
        outIndex := 0
212
        for inIndex := 1; size != badCode; inIndex++ {
213
                // INVARIANT: We have seen "count" copies of size that have not yet
214
                // had output generated for them.
215
                nextSize := codegen[inIndex]
216
                if nextSize == size {
217
                        count++
218
                        continue
219
                }
220
                // We need to generate codegen indicating "count" of size.
221
                if size != 0 {
222
                        codegen[outIndex] = size
223
                        outIndex++
224
                        w.codegenFreq[size]++
225
                        count--
226
                        for count >= 3 {
227
                                n := 6
228
                                if n > count {
229
                                        n = count
230
                                }
231
                                codegen[outIndex] = 16
232
                                outIndex++
233
                                codegen[outIndex] = uint8(n - 3)
234
                                outIndex++
235
                                w.codegenFreq[16]++
236
                                count -= n
237
                        }
238
                } else {
239
                        for count >= 11 {
240
                                n := 138
241
                                if n > count {
242
                                        n = count
243
                                }
244
                                codegen[outIndex] = 18
245
                                outIndex++
246
                                codegen[outIndex] = uint8(n - 11)
247
                                outIndex++
248
                                w.codegenFreq[18]++
249
                                count -= n
250
                        }
251
                        if count >= 3 {
252
                                // count >= 3 && count <= 10
253
                                codegen[outIndex] = 17
254
                                outIndex++
255
                                codegen[outIndex] = uint8(count - 3)
256
                                outIndex++
257
                                w.codegenFreq[17]++
258
                                count = 0
259
                        }
260
                }
261
                count--
262
                for ; count >= 0; count-- {
263
                        codegen[outIndex] = size
264
                        outIndex++
265
                        w.codegenFreq[size]++
266
                }
267
                // Set up invariant for next time through the loop.
268
                size = nextSize
269
                count = 1
270
        }
271
        // Marker indicating the end of the codegen.
272
        codegen[outIndex] = badCode
273
}
274
 
275
func (w *huffmanBitWriter) writeCode(code *huffmanEncoder, literal uint32) {
276
        if w.err != nil {
277
                return
278
        }
279
        w.writeBits(int32(code.code[literal]), int32(code.codeBits[literal]))
280
}
281
 
282
// Write the header of a dynamic Huffman block to the output stream.
283
//
284
//  numLiterals  The number of literals specified in codegen
285
//  numOffsets   The number of offsets specified in codegen
286
//  numCodegens  The number of codegens used in codegen
287
func (w *huffmanBitWriter) writeDynamicHeader(numLiterals int, numOffsets int, numCodegens int, isEof bool) {
288
        if w.err != nil {
289
                return
290
        }
291
        var firstBits int32 = 4
292
        if isEof {
293
                firstBits = 5
294
        }
295
        w.writeBits(firstBits, 3)
296
        w.writeBits(int32(numLiterals-257), 5)
297
        w.writeBits(int32(numOffsets-1), 5)
298
        w.writeBits(int32(numCodegens-4), 4)
299
 
300
        for i := 0; i < numCodegens; i++ {
301
                value := w.codegenEncoding.codeBits[codegenOrder[i]]
302
                w.writeBits(int32(value), 3)
303
        }
304
 
305
        i := 0
306
        for {
307
                var codeWord int = int(w.codegen[i])
308
                i++
309
                if codeWord == badCode {
310
                        break
311
                }
312
                // The low byte contains the actual code to generate.
313
                w.writeCode(w.codegenEncoding, uint32(codeWord))
314
 
315
                switch codeWord {
316
                case 16:
317
                        w.writeBits(int32(w.codegen[i]), 2)
318
                        i++
319
                        break
320
                case 17:
321
                        w.writeBits(int32(w.codegen[i]), 3)
322
                        i++
323
                        break
324
                case 18:
325
                        w.writeBits(int32(w.codegen[i]), 7)
326
                        i++
327
                        break
328
                }
329
        }
330
}
331
 
332
func (w *huffmanBitWriter) writeStoredHeader(length int, isEof bool) {
333
        if w.err != nil {
334
                return
335
        }
336
        var flag int32
337
        if isEof {
338
                flag = 1
339
        }
340
        w.writeBits(flag, 3)
341
        w.flush()
342
        w.writeBits(int32(length), 16)
343
        w.writeBits(int32(^uint16(length)), 16)
344
}
345
 
346
func (w *huffmanBitWriter) writeFixedHeader(isEof bool) {
347
        if w.err != nil {
348
                return
349
        }
350
        // Indicate that we are a fixed Huffman block
351
        var value int32 = 2
352
        if isEof {
353
                value = 3
354
        }
355
        w.writeBits(value, 3)
356
}
357
 
358
func (w *huffmanBitWriter) writeBlock(tokens []token, eof bool, input []byte) {
359
        if w.err != nil {
360
                return
361
        }
362
        for i := range w.literalFreq {
363
                w.literalFreq[i] = 0
364
        }
365
        for i := range w.offsetFreq {
366
                w.offsetFreq[i] = 0
367
        }
368
 
369
        n := len(tokens)
370
        tokens = tokens[0 : n+1]
371
        tokens[n] = endBlockMarker
372
 
373
        for _, t := range tokens {
374
                switch t.typ() {
375
                case literalType:
376
                        w.literalFreq[t.literal()]++
377
                case matchType:
378
                        length := t.length()
379
                        offset := t.offset()
380
                        w.literalFreq[lengthCodesStart+lengthCode(length)]++
381
                        w.offsetFreq[offsetCode(offset)]++
382
                }
383
        }
384
 
385
        // get the number of literals
386
        numLiterals := len(w.literalFreq)
387
        for w.literalFreq[numLiterals-1] == 0 {
388
                numLiterals--
389
        }
390
        // get the number of offsets
391
        numOffsets := len(w.offsetFreq)
392
        for numOffsets > 0 && w.offsetFreq[numOffsets-1] == 0 {
393
                numOffsets--
394
        }
395
        if numOffsets == 0 {
396
                // We haven't found a single match. If we want to go with the dynamic encoding,
397
                // we should count at least one offset to be sure that the offset huffman tree could be encoded.
398
                w.offsetFreq[0] = 1
399
                numOffsets = 1
400
        }
401
 
402
        w.literalEncoding.generate(w.literalFreq, 15)
403
        w.offsetEncoding.generate(w.offsetFreq, 15)
404
 
405
        storedBytes := 0
406
        if input != nil {
407
                storedBytes = len(input)
408
        }
409
        var extraBits int64
410
        var storedSize int64 = math.MaxInt64
411
        if storedBytes <= maxStoreBlockSize && input != nil {
412
                storedSize = int64((storedBytes + 5) * 8)
413
                // We only bother calculating the costs of the extra bits required by
414
                // the length of offset fields (which will be the same for both fixed
415
                // and dynamic encoding), if we need to compare those two encodings
416
                // against stored encoding.
417
                for lengthCode := lengthCodesStart + 8; lengthCode < numLiterals; lengthCode++ {
418
                        // First eight length codes have extra size = 0.
419
                        extraBits += int64(w.literalFreq[lengthCode]) * int64(lengthExtraBits[lengthCode-lengthCodesStart])
420
                }
421
                for offsetCode := 4; offsetCode < numOffsets; offsetCode++ {
422
                        // First four offset codes have extra size = 0.
423
                        extraBits += int64(w.offsetFreq[offsetCode]) * int64(offsetExtraBits[offsetCode])
424
                }
425
        }
426
 
427
        // Figure out smallest code.
428
        // Fixed Huffman baseline.
429
        var size = int64(3) +
430
                fixedLiteralEncoding.bitLength(w.literalFreq) +
431
                fixedOffsetEncoding.bitLength(w.offsetFreq) +
432
                extraBits
433
        var literalEncoding = fixedLiteralEncoding
434
        var offsetEncoding = fixedOffsetEncoding
435
 
436
        // Dynamic Huffman?
437
        var numCodegens int
438
 
439
        // Generate codegen and codegenFrequencies, which indicates how to encode
440
        // the literalEncoding and the offsetEncoding.
441
        w.generateCodegen(numLiterals, numOffsets)
442
        w.codegenEncoding.generate(w.codegenFreq, 7)
443
        numCodegens = len(w.codegenFreq)
444
        for numCodegens > 4 && w.codegenFreq[codegenOrder[numCodegens-1]] == 0 {
445
                numCodegens--
446
        }
447
        dynamicHeader := int64(3+5+5+4+(3*numCodegens)) +
448
                w.codegenEncoding.bitLength(w.codegenFreq) +
449
                int64(extraBits) +
450
                int64(w.codegenFreq[16]*2) +
451
                int64(w.codegenFreq[17]*3) +
452
                int64(w.codegenFreq[18]*7)
453
        dynamicSize := dynamicHeader +
454
                w.literalEncoding.bitLength(w.literalFreq) +
455
                w.offsetEncoding.bitLength(w.offsetFreq)
456
 
457
        if dynamicSize < size {
458
                size = dynamicSize
459
                literalEncoding = w.literalEncoding
460
                offsetEncoding = w.offsetEncoding
461
        }
462
 
463
        // Stored bytes?
464
        if storedSize < size {
465
                w.writeStoredHeader(storedBytes, eof)
466
                w.writeBytes(input[0:storedBytes])
467
                return
468
        }
469
 
470
        // Huffman.
471
        if literalEncoding == fixedLiteralEncoding {
472
                w.writeFixedHeader(eof)
473
        } else {
474
                w.writeDynamicHeader(numLiterals, numOffsets, numCodegens, eof)
475
        }
476
        for _, t := range tokens {
477
                switch t.typ() {
478
                case literalType:
479
                        w.writeCode(literalEncoding, t.literal())
480
                        break
481
                case matchType:
482
                        // Write the length
483
                        length := t.length()
484
                        lengthCode := lengthCode(length)
485
                        w.writeCode(literalEncoding, lengthCode+lengthCodesStart)
486
                        extraLengthBits := int32(lengthExtraBits[lengthCode])
487
                        if extraLengthBits > 0 {
488
                                extraLength := int32(length - lengthBase[lengthCode])
489
                                w.writeBits(extraLength, extraLengthBits)
490
                        }
491
                        // Write the offset
492
                        offset := t.offset()
493
                        offsetCode := offsetCode(offset)
494
                        w.writeCode(offsetEncoding, offsetCode)
495
                        extraOffsetBits := int32(offsetExtraBits[offsetCode])
496
                        if extraOffsetBits > 0 {
497
                                extraOffset := int32(offset - offsetBase[offsetCode])
498
                                w.writeBits(extraOffset, extraOffsetBits)
499
                        }
500
                        break
501
                default:
502
                        panic("unknown token type: " + string(t))
503
                }
504
        }
505
}

powered by: WebSVN 2.1.0

© copyright 1999-2024 OpenCores.org, equivalent to Oliscience, all rights reserved. OpenCores®, registered trademark.