| 1 | 747 | jeremybenn | // Copyright 2009 The Go Authors. All rights reserved.
 | 
      
         | 2 |  |  | // Use of this source code is governed by a BSD-style
 | 
      
         | 3 |  |  | // license that can be found in the LICENSE file.
 | 
      
         | 4 |  |  |  
 | 
      
         | 5 |  |  | package flate
 | 
      
         | 6 |  |  |  
 | 
      
         | 7 |  |  | import (
 | 
      
         | 8 |  |  |         "math"
 | 
      
         | 9 |  |  |         "sort"
 | 
      
         | 10 |  |  | )
 | 
      
         | 11 |  |  |  
 | 
      
         | 12 |  |  | type huffmanEncoder struct {
 | 
      
         | 13 |  |  |         codeBits []uint8
 | 
      
         | 14 |  |  |         code     []uint16
 | 
      
         | 15 |  |  | }
 | 
      
         | 16 |  |  |  
 | 
      
         | 17 |  |  | type literalNode struct {
 | 
      
         | 18 |  |  |         literal uint16
 | 
      
         | 19 |  |  |         freq    int32
 | 
      
         | 20 |  |  | }
 | 
      
         | 21 |  |  |  
 | 
      
         | 22 |  |  | type chain struct {
 | 
      
         | 23 |  |  |         // The sum of the leaves in this tree
 | 
      
         | 24 |  |  |         freq int32
 | 
      
         | 25 |  |  |  
 | 
      
         | 26 |  |  |         // The number of literals to the left of this item at this level
 | 
      
         | 27 |  |  |         leafCount int32
 | 
      
         | 28 |  |  |  
 | 
      
         | 29 |  |  |         // The right child of this chain in the previous level.
 | 
      
         | 30 |  |  |         up *chain
 | 
      
         | 31 |  |  | }
 | 
      
         | 32 |  |  |  
 | 
      
         | 33 |  |  | type levelInfo struct {
 | 
      
         | 34 |  |  |         // Our level.  for better printing
 | 
      
         | 35 |  |  |         level int32
 | 
      
         | 36 |  |  |  
 | 
      
         | 37 |  |  |         // The most recent chain generated for this level
 | 
      
         | 38 |  |  |         lastChain *chain
 | 
      
         | 39 |  |  |  
 | 
      
         | 40 |  |  |         // The frequency of the next character to add to this level
 | 
      
         | 41 |  |  |         nextCharFreq int32
 | 
      
         | 42 |  |  |  
 | 
      
         | 43 |  |  |         // The frequency of the next pair (from level below) to add to this level.
 | 
      
         | 44 |  |  |         // Only valid if the "needed" value of the next lower level is 0.
 | 
      
         | 45 |  |  |         nextPairFreq int32
 | 
      
         | 46 |  |  |  
 | 
      
         | 47 |  |  |         // The number of chains remaining to generate for this level before moving
 | 
      
         | 48 |  |  |         // up to the next level
 | 
      
         | 49 |  |  |         needed int32
 | 
      
         | 50 |  |  |  
 | 
      
         | 51 |  |  |         // The levelInfo for level+1
 | 
      
         | 52 |  |  |         up *levelInfo
 | 
      
         | 53 |  |  |  
 | 
      
         | 54 |  |  |         // The levelInfo for level-1
 | 
      
         | 55 |  |  |         down *levelInfo
 | 
      
         | 56 |  |  | }
 | 
      
         | 57 |  |  |  
 | 
      
         | 58 |  |  | func maxNode() literalNode { return literalNode{math.MaxUint16, math.MaxInt32} }
 | 
      
         | 59 |  |  |  
 | 
      
         | 60 |  |  | func newHuffmanEncoder(size int) *huffmanEncoder {
 | 
      
         | 61 |  |  |         return &huffmanEncoder{make([]uint8, size), make([]uint16, size)}
 | 
      
         | 62 |  |  | }
 | 
      
         | 63 |  |  |  
 | 
      
         | 64 |  |  | // Generates a HuffmanCode corresponding to the fixed literal table
 | 
      
         | 65 |  |  | func generateFixedLiteralEncoding() *huffmanEncoder {
 | 
      
         | 66 |  |  |         h := newHuffmanEncoder(maxLit)
 | 
      
         | 67 |  |  |         codeBits := h.codeBits
 | 
      
         | 68 |  |  |         code := h.code
 | 
      
         | 69 |  |  |         var ch uint16
 | 
      
         | 70 |  |  |         for ch = 0; ch < maxLit; ch++ {
 | 
      
         | 71 |  |  |                 var bits uint16
 | 
      
         | 72 |  |  |                 var size uint8
 | 
      
         | 73 |  |  |                 switch {
 | 
      
         | 74 |  |  |                 case ch < 144:
 | 
      
         | 75 |  |  |                         // size 8, 000110000  .. 10111111
 | 
      
         | 76 |  |  |                         bits = ch + 48
 | 
      
         | 77 |  |  |                         size = 8
 | 
      
         | 78 |  |  |                         break
 | 
      
         | 79 |  |  |                 case ch < 256:
 | 
      
         | 80 |  |  |                         // size 9, 110010000 .. 111111111
 | 
      
         | 81 |  |  |                         bits = ch + 400 - 144
 | 
      
         | 82 |  |  |                         size = 9
 | 
      
         | 83 |  |  |                         break
 | 
      
         | 84 |  |  |                 case ch < 280:
 | 
      
         | 85 |  |  |                         // size 7, 0000000 .. 0010111
 | 
      
         | 86 |  |  |                         bits = ch - 256
 | 
      
         | 87 |  |  |                         size = 7
 | 
      
         | 88 |  |  |                         break
 | 
      
         | 89 |  |  |                 default:
 | 
      
         | 90 |  |  |                         // size 8, 11000000 .. 11000111
 | 
      
         | 91 |  |  |                         bits = ch + 192 - 280
 | 
      
         | 92 |  |  |                         size = 8
 | 
      
         | 93 |  |  |                 }
 | 
      
         | 94 |  |  |                 codeBits[ch] = size
 | 
      
         | 95 |  |  |                 code[ch] = reverseBits(bits, size)
 | 
      
         | 96 |  |  |         }
 | 
      
         | 97 |  |  |         return h
 | 
      
         | 98 |  |  | }
 | 
      
         | 99 |  |  |  
 | 
      
         | 100 |  |  | func generateFixedOffsetEncoding() *huffmanEncoder {
 | 
      
         | 101 |  |  |         h := newHuffmanEncoder(30)
 | 
      
         | 102 |  |  |         codeBits := h.codeBits
 | 
      
         | 103 |  |  |         code := h.code
 | 
      
         | 104 |  |  |         for ch := uint16(0); ch < 30; ch++ {
 | 
      
         | 105 |  |  |                 codeBits[ch] = 5
 | 
      
         | 106 |  |  |                 code[ch] = reverseBits(ch, 5)
 | 
      
         | 107 |  |  |         }
 | 
      
         | 108 |  |  |         return h
 | 
      
         | 109 |  |  | }
 | 
      
         | 110 |  |  |  
 | 
      
         | 111 |  |  | var fixedLiteralEncoding *huffmanEncoder = generateFixedLiteralEncoding()
 | 
      
         | 112 |  |  | var fixedOffsetEncoding *huffmanEncoder = generateFixedOffsetEncoding()
 | 
      
         | 113 |  |  |  
 | 
      
         | 114 |  |  | func (h *huffmanEncoder) bitLength(freq []int32) int64 {
 | 
      
         | 115 |  |  |         var total int64
 | 
      
         | 116 |  |  |         for i, f := range freq {
 | 
      
         | 117 |  |  |                 if f != 0 {
 | 
      
         | 118 |  |  |                         total += int64(f) * int64(h.codeBits[i])
 | 
      
         | 119 |  |  |                 }
 | 
      
         | 120 |  |  |         }
 | 
      
         | 121 |  |  |         return total
 | 
      
         | 122 |  |  | }
 | 
      
         | 123 |  |  |  
 | 
      
         | 124 |  |  | // Return the number of literals assigned to each bit size in the Huffman encoding
 | 
      
         | 125 |  |  | //
 | 
      
         | 126 |  |  | // This method is only called when list.length >= 3
 | 
      
         | 127 |  |  | // The cases of 0, 1, and 2 literals are handled by special case code.
 | 
      
         | 128 |  |  | //
 | 
      
         | 129 |  |  | // list  An array of the literals with non-zero frequencies
 | 
      
         | 130 |  |  | //             and their associated frequencies.  The array is in order of increasing
 | 
      
         | 131 |  |  | //             frequency, and has as its last element a special element with frequency
 | 
      
         | 132 |  |  | //             MaxInt32
 | 
      
         | 133 |  |  | // maxBits     The maximum number of bits that should be used to encode any literal.
 | 
      
         | 134 |  |  | // return      An integer array in which array[i] indicates the number of literals
 | 
      
         | 135 |  |  | //             that should be encoded in i bits.
 | 
      
         | 136 |  |  | func (h *huffmanEncoder) bitCounts(list []literalNode, maxBits int32) []int32 {
 | 
      
         | 137 |  |  |         n := int32(len(list))
 | 
      
         | 138 |  |  |         list = list[0 : n+1]
 | 
      
         | 139 |  |  |         list[n] = maxNode()
 | 
      
         | 140 |  |  |  
 | 
      
         | 141 |  |  |         // The tree can't have greater depth than n - 1, no matter what.  This
 | 
      
         | 142 |  |  |         // saves a little bit of work in some small cases
 | 
      
         | 143 |  |  |         if maxBits > n-1 {
 | 
      
         | 144 |  |  |                 maxBits = n - 1
 | 
      
         | 145 |  |  |         }
 | 
      
         | 146 |  |  |  
 | 
      
         | 147 |  |  |         // Create information about each of the levels.
 | 
      
         | 148 |  |  |         // A bogus "Level 0" whose sole purpose is so that
 | 
      
         | 149 |  |  |         // level1.prev.needed==0.  This makes level1.nextPairFreq
 | 
      
         | 150 |  |  |         // be a legitimate value that never gets chosen.
 | 
      
         | 151 |  |  |         top := &levelInfo{needed: 0}
 | 
      
         | 152 |  |  |         chain2 := &chain{list[1].freq, 2, new(chain)}
 | 
      
         | 153 |  |  |         for level := int32(1); level <= maxBits; level++ {
 | 
      
         | 154 |  |  |                 // For every level, the first two items are the first two characters.
 | 
      
         | 155 |  |  |                 // We initialize the levels as if we had already figured this out.
 | 
      
         | 156 |  |  |                 top = &levelInfo{
 | 
      
         | 157 |  |  |                         level:        level,
 | 
      
         | 158 |  |  |                         lastChain:    chain2,
 | 
      
         | 159 |  |  |                         nextCharFreq: list[2].freq,
 | 
      
         | 160 |  |  |                         nextPairFreq: list[0].freq + list[1].freq,
 | 
      
         | 161 |  |  |                         down:         top,
 | 
      
         | 162 |  |  |                 }
 | 
      
         | 163 |  |  |                 top.down.up = top
 | 
      
         | 164 |  |  |                 if level == 1 {
 | 
      
         | 165 |  |  |                         top.nextPairFreq = math.MaxInt32
 | 
      
         | 166 |  |  |                 }
 | 
      
         | 167 |  |  |         }
 | 
      
         | 168 |  |  |  
 | 
      
         | 169 |  |  |         // We need a total of 2*n - 2 items at top level and have already generated 2.
 | 
      
         | 170 |  |  |         top.needed = 2*n - 4
 | 
      
         | 171 |  |  |  
 | 
      
         | 172 |  |  |         l := top
 | 
      
         | 173 |  |  |         for {
 | 
      
         | 174 |  |  |                 if l.nextPairFreq == math.MaxInt32 && l.nextCharFreq == math.MaxInt32 {
 | 
      
         | 175 |  |  |                         // We've run out of both leafs and pairs.
 | 
      
         | 176 |  |  |                         // End all calculations for this level.
 | 
      
         | 177 |  |  |                         // To m sure we never come back to this level or any lower level,
 | 
      
         | 178 |  |  |                         // set nextPairFreq impossibly large.
 | 
      
         | 179 |  |  |                         l.lastChain = nil
 | 
      
         | 180 |  |  |                         l.needed = 0
 | 
      
         | 181 |  |  |                         l = l.up
 | 
      
         | 182 |  |  |                         l.nextPairFreq = math.MaxInt32
 | 
      
         | 183 |  |  |                         continue
 | 
      
         | 184 |  |  |                 }
 | 
      
         | 185 |  |  |  
 | 
      
         | 186 |  |  |                 prevFreq := l.lastChain.freq
 | 
      
         | 187 |  |  |                 if l.nextCharFreq < l.nextPairFreq {
 | 
      
         | 188 |  |  |                         // The next item on this row is a leaf node.
 | 
      
         | 189 |  |  |                         n := l.lastChain.leafCount + 1
 | 
      
         | 190 |  |  |                         l.lastChain = &chain{l.nextCharFreq, n, l.lastChain.up}
 | 
      
         | 191 |  |  |                         l.nextCharFreq = list[n].freq
 | 
      
         | 192 |  |  |                 } else {
 | 
      
         | 193 |  |  |                         // The next item on this row is a pair from the previous row.
 | 
      
         | 194 |  |  |                         // nextPairFreq isn't valid until we generate two
 | 
      
         | 195 |  |  |                         // more values in the level below
 | 
      
         | 196 |  |  |                         l.lastChain = &chain{l.nextPairFreq, l.lastChain.leafCount, l.down.lastChain}
 | 
      
         | 197 |  |  |                         l.down.needed = 2
 | 
      
         | 198 |  |  |                 }
 | 
      
         | 199 |  |  |  
 | 
      
         | 200 |  |  |                 if l.needed--; l.needed == 0 {
 | 
      
         | 201 |  |  |                         // We've done everything we need to do for this level.
 | 
      
         | 202 |  |  |                         // Continue calculating one level up.  Fill in nextPairFreq
 | 
      
         | 203 |  |  |                         // of that level with the sum of the two nodes we've just calculated on
 | 
      
         | 204 |  |  |                         // this level.
 | 
      
         | 205 |  |  |                         up := l.up
 | 
      
         | 206 |  |  |                         if up == nil {
 | 
      
         | 207 |  |  |                                 // All done!
 | 
      
         | 208 |  |  |                                 break
 | 
      
         | 209 |  |  |                         }
 | 
      
         | 210 |  |  |                         up.nextPairFreq = prevFreq + l.lastChain.freq
 | 
      
         | 211 |  |  |                         l = up
 | 
      
         | 212 |  |  |                 } else {
 | 
      
         | 213 |  |  |                         // If we stole from below, move down temporarily to replenish it.
 | 
      
         | 214 |  |  |                         for l.down.needed > 0 {
 | 
      
         | 215 |  |  |                                 l = l.down
 | 
      
         | 216 |  |  |                         }
 | 
      
         | 217 |  |  |                 }
 | 
      
         | 218 |  |  |         }
 | 
      
         | 219 |  |  |  
 | 
      
         | 220 |  |  |         // Somethings is wrong if at the end, the top level is null or hasn't used
 | 
      
         | 221 |  |  |         // all of the leaves.
 | 
      
         | 222 |  |  |         if top.lastChain.leafCount != n {
 | 
      
         | 223 |  |  |                 panic("top.lastChain.leafCount != n")
 | 
      
         | 224 |  |  |         }
 | 
      
         | 225 |  |  |  
 | 
      
         | 226 |  |  |         bitCount := make([]int32, maxBits+1)
 | 
      
         | 227 |  |  |         bits := 1
 | 
      
         | 228 |  |  |         for chain := top.lastChain; chain.up != nil; chain = chain.up {
 | 
      
         | 229 |  |  |                 // chain.leafCount gives the number of literals requiring at least "bits"
 | 
      
         | 230 |  |  |                 // bits to encode.
 | 
      
         | 231 |  |  |                 bitCount[bits] = chain.leafCount - chain.up.leafCount
 | 
      
         | 232 |  |  |                 bits++
 | 
      
         | 233 |  |  |         }
 | 
      
         | 234 |  |  |         return bitCount
 | 
      
         | 235 |  |  | }
 | 
      
         | 236 |  |  |  
 | 
      
         | 237 |  |  | // Look at the leaves and assign them a bit count and an encoding as specified
 | 
      
         | 238 |  |  | // in RFC 1951 3.2.2
 | 
      
         | 239 |  |  | func (h *huffmanEncoder) assignEncodingAndSize(bitCount []int32, list []literalNode) {
 | 
      
         | 240 |  |  |         code := uint16(0)
 | 
      
         | 241 |  |  |         for n, bits := range bitCount {
 | 
      
         | 242 |  |  |                 code <<= 1
 | 
      
         | 243 |  |  |                 if n == 0 || bits == 0 {
 | 
      
         | 244 |  |  |                         continue
 | 
      
         | 245 |  |  |                 }
 | 
      
         | 246 |  |  |                 // The literals list[len(list)-bits] .. list[len(list)-bits]
 | 
      
         | 247 |  |  |                 // are encoded using "bits" bits, and get the values
 | 
      
         | 248 |  |  |                 // code, code + 1, ....  The code values are
 | 
      
         | 249 |  |  |                 // assigned in literal order (not frequency order).
 | 
      
         | 250 |  |  |                 chunk := list[len(list)-int(bits):]
 | 
      
         | 251 |  |  |                 sortByLiteral(chunk)
 | 
      
         | 252 |  |  |                 for _, node := range chunk {
 | 
      
         | 253 |  |  |                         h.codeBits[node.literal] = uint8(n)
 | 
      
         | 254 |  |  |                         h.code[node.literal] = reverseBits(code, uint8(n))
 | 
      
         | 255 |  |  |                         code++
 | 
      
         | 256 |  |  |                 }
 | 
      
         | 257 |  |  |                 list = list[0 : len(list)-int(bits)]
 | 
      
         | 258 |  |  |         }
 | 
      
         | 259 |  |  | }
 | 
      
         | 260 |  |  |  
 | 
      
         | 261 |  |  | // Update this Huffman Code object to be the minimum code for the specified frequency count.
 | 
      
         | 262 |  |  | //
 | 
      
         | 263 |  |  | // freq  An array of frequencies, in which frequency[i] gives the frequency of literal i.
 | 
      
         | 264 |  |  | // maxBits  The maximum number of bits to use for any literal.
 | 
      
         | 265 |  |  | func (h *huffmanEncoder) generate(freq []int32, maxBits int32) {
 | 
      
         | 266 |  |  |         list := make([]literalNode, len(freq)+1)
 | 
      
         | 267 |  |  |         // Number of non-zero literals
 | 
      
         | 268 |  |  |         count := 0
 | 
      
         | 269 |  |  |         // Set list to be the set of all non-zero literals and their frequencies
 | 
      
         | 270 |  |  |         for i, f := range freq {
 | 
      
         | 271 |  |  |                 if f != 0 {
 | 
      
         | 272 |  |  |                         list[count] = literalNode{uint16(i), f}
 | 
      
         | 273 |  |  |                         count++
 | 
      
         | 274 |  |  |                 } else {
 | 
      
         | 275 |  |  |                         h.codeBits[i] = 0
 | 
      
         | 276 |  |  |                 }
 | 
      
         | 277 |  |  |         }
 | 
      
         | 278 |  |  |         // If freq[] is shorter than codeBits[], fill rest of codeBits[] with zeros
 | 
      
         | 279 |  |  |         h.codeBits = h.codeBits[0:len(freq)]
 | 
      
         | 280 |  |  |         list = list[0:count]
 | 
      
         | 281 |  |  |         if count <= 2 {
 | 
      
         | 282 |  |  |                 // Handle the small cases here, because they are awkward for the general case code.  With
 | 
      
         | 283 |  |  |                 // two or fewer literals, everything has bit length 1.
 | 
      
         | 284 |  |  |                 for i, node := range list {
 | 
      
         | 285 |  |  |                         // "list" is in order of increasing literal value.
 | 
      
         | 286 |  |  |                         h.codeBits[node.literal] = 1
 | 
      
         | 287 |  |  |                         h.code[node.literal] = uint16(i)
 | 
      
         | 288 |  |  |                 }
 | 
      
         | 289 |  |  |                 return
 | 
      
         | 290 |  |  |         }
 | 
      
         | 291 |  |  |         sortByFreq(list)
 | 
      
         | 292 |  |  |  
 | 
      
         | 293 |  |  |         // Get the number of literals for each bit count
 | 
      
         | 294 |  |  |         bitCount := h.bitCounts(list, maxBits)
 | 
      
         | 295 |  |  |         // And do the assignment
 | 
      
         | 296 |  |  |         h.assignEncodingAndSize(bitCount, list)
 | 
      
         | 297 |  |  | }
 | 
      
         | 298 |  |  |  
 | 
      
         | 299 |  |  | type literalNodeSorter struct {
 | 
      
         | 300 |  |  |         a    []literalNode
 | 
      
         | 301 |  |  |         less func(i, j int) bool
 | 
      
         | 302 |  |  | }
 | 
      
         | 303 |  |  |  
 | 
      
         | 304 |  |  | func (s literalNodeSorter) Len() int { return len(s.a) }
 | 
      
         | 305 |  |  |  
 | 
      
         | 306 |  |  | func (s literalNodeSorter) Less(i, j int) bool {
 | 
      
         | 307 |  |  |         return s.less(i, j)
 | 
      
         | 308 |  |  | }
 | 
      
         | 309 |  |  |  
 | 
      
         | 310 |  |  | func (s literalNodeSorter) Swap(i, j int) { s.a[i], s.a[j] = s.a[j], s.a[i] }
 | 
      
         | 311 |  |  |  
 | 
      
         | 312 |  |  | func sortByFreq(a []literalNode) {
 | 
      
         | 313 |  |  |         s := &literalNodeSorter{a, func(i, j int) bool {
 | 
      
         | 314 |  |  |                 if a[i].freq == a[j].freq {
 | 
      
         | 315 |  |  |                         return a[i].literal < a[j].literal
 | 
      
         | 316 |  |  |                 }
 | 
      
         | 317 |  |  |                 return a[i].freq < a[j].freq
 | 
      
         | 318 |  |  |         }}
 | 
      
         | 319 |  |  |         sort.Sort(s)
 | 
      
         | 320 |  |  | }
 | 
      
         | 321 |  |  |  
 | 
      
         | 322 |  |  | func sortByLiteral(a []literalNode) {
 | 
      
         | 323 |  |  |         s := &literalNodeSorter{a, func(i, j int) bool { return a[i].literal < a[j].literal }}
 | 
      
         | 324 |  |  |         sort.Sort(s)
 | 
      
         | 325 |  |  | }
 |