1 |
745 |
jeremybenn |
/* fitblk.c: example of fitting compressed output to a specified size
|
2 |
|
|
Not copyrighted -- provided to the public domain
|
3 |
|
|
Version 1.1 25 November 2004 Mark Adler */
|
4 |
|
|
|
5 |
|
|
/* Version history:
|
6 |
|
|
1.0 24 Nov 2004 First version
|
7 |
|
|
1.1 25 Nov 2004 Change deflateInit2() to deflateInit()
|
8 |
|
|
Use fixed-size, stack-allocated raw buffers
|
9 |
|
|
Simplify code moving compression to subroutines
|
10 |
|
|
Use assert() for internal errors
|
11 |
|
|
Add detailed description of approach
|
12 |
|
|
*/
|
13 |
|
|
|
14 |
|
|
/* Approach to just fitting a requested compressed size:
|
15 |
|
|
|
16 |
|
|
fitblk performs three compression passes on a portion of the input
|
17 |
|
|
data in order to determine how much of that input will compress to
|
18 |
|
|
nearly the requested output block size. The first pass generates
|
19 |
|
|
enough deflate blocks to produce output to fill the requested
|
20 |
|
|
output size plus a specfied excess amount (see the EXCESS define
|
21 |
|
|
below). The last deflate block may go quite a bit past that, but
|
22 |
|
|
is discarded. The second pass decompresses and recompresses just
|
23 |
|
|
the compressed data that fit in the requested plus excess sized
|
24 |
|
|
buffer. The deflate process is terminated after that amount of
|
25 |
|
|
input, which is less than the amount consumed on the first pass.
|
26 |
|
|
The last deflate block of the result will be of a comparable size
|
27 |
|
|
to the final product, so that the header for that deflate block and
|
28 |
|
|
the compression ratio for that block will be about the same as in
|
29 |
|
|
the final product. The third compression pass decompresses the
|
30 |
|
|
result of the second step, but only the compressed data up to the
|
31 |
|
|
requested size minus an amount to allow the compressed stream to
|
32 |
|
|
complete (see the MARGIN define below). That will result in a
|
33 |
|
|
final compressed stream whose length is less than or equal to the
|
34 |
|
|
requested size. Assuming sufficient input and a requested size
|
35 |
|
|
greater than a few hundred bytes, the shortfall will typically be
|
36 |
|
|
less than ten bytes.
|
37 |
|
|
|
38 |
|
|
If the input is short enough that the first compression completes
|
39 |
|
|
before filling the requested output size, then that compressed
|
40 |
|
|
stream is return with no recompression.
|
41 |
|
|
|
42 |
|
|
EXCESS is chosen to be just greater than the shortfall seen in a
|
43 |
|
|
two pass approach similar to the above. That shortfall is due to
|
44 |
|
|
the last deflate block compressing more efficiently with a smaller
|
45 |
|
|
header on the second pass. EXCESS is set to be large enough so
|
46 |
|
|
that there is enough uncompressed data for the second pass to fill
|
47 |
|
|
out the requested size, and small enough so that the final deflate
|
48 |
|
|
block of the second pass will be close in size to the final deflate
|
49 |
|
|
block of the third and final pass. MARGIN is chosen to be just
|
50 |
|
|
large enough to assure that the final compression has enough room
|
51 |
|
|
to complete in all cases.
|
52 |
|
|
*/
|
53 |
|
|
|
54 |
|
|
#include <stdio.h>
|
55 |
|
|
#include <stdlib.h>
|
56 |
|
|
#include <assert.h>
|
57 |
|
|
#include "zlib.h"
|
58 |
|
|
|
59 |
|
|
#define local static
|
60 |
|
|
|
61 |
|
|
/* print nastygram and leave */
|
62 |
|
|
local void quit(char *why)
|
63 |
|
|
{
|
64 |
|
|
fprintf(stderr, "fitblk abort: %s\n", why);
|
65 |
|
|
exit(1);
|
66 |
|
|
}
|
67 |
|
|
|
68 |
|
|
#define RAWLEN 4096 /* intermediate uncompressed buffer size */
|
69 |
|
|
|
70 |
|
|
/* compress from file to def until provided buffer is full or end of
|
71 |
|
|
input reached; return last deflate() return value, or Z_ERRNO if
|
72 |
|
|
there was read error on the file */
|
73 |
|
|
local int partcompress(FILE *in, z_streamp def)
|
74 |
|
|
{
|
75 |
|
|
int ret, flush;
|
76 |
|
|
unsigned char raw[RAWLEN];
|
77 |
|
|
|
78 |
|
|
flush = Z_NO_FLUSH;
|
79 |
|
|
do {
|
80 |
|
|
def->avail_in = fread(raw, 1, RAWLEN, in);
|
81 |
|
|
if (ferror(in))
|
82 |
|
|
return Z_ERRNO;
|
83 |
|
|
def->next_in = raw;
|
84 |
|
|
if (feof(in))
|
85 |
|
|
flush = Z_FINISH;
|
86 |
|
|
ret = deflate(def, flush);
|
87 |
|
|
assert(ret != Z_STREAM_ERROR);
|
88 |
|
|
} while (def->avail_out != 0 && flush == Z_NO_FLUSH);
|
89 |
|
|
return ret;
|
90 |
|
|
}
|
91 |
|
|
|
92 |
|
|
/* recompress from inf's input to def's output; the input for inf and
|
93 |
|
|
the output for def are set in those structures before calling;
|
94 |
|
|
return last deflate() return value, or Z_MEM_ERROR if inflate()
|
95 |
|
|
was not able to allocate enough memory when it needed to */
|
96 |
|
|
local int recompress(z_streamp inf, z_streamp def)
|
97 |
|
|
{
|
98 |
|
|
int ret, flush;
|
99 |
|
|
unsigned char raw[RAWLEN];
|
100 |
|
|
|
101 |
|
|
flush = Z_NO_FLUSH;
|
102 |
|
|
do {
|
103 |
|
|
/* decompress */
|
104 |
|
|
inf->avail_out = RAWLEN;
|
105 |
|
|
inf->next_out = raw;
|
106 |
|
|
ret = inflate(inf, Z_NO_FLUSH);
|
107 |
|
|
assert(ret != Z_STREAM_ERROR && ret != Z_DATA_ERROR &&
|
108 |
|
|
ret != Z_NEED_DICT);
|
109 |
|
|
if (ret == Z_MEM_ERROR)
|
110 |
|
|
return ret;
|
111 |
|
|
|
112 |
|
|
/* compress what was decompresed until done or no room */
|
113 |
|
|
def->avail_in = RAWLEN - inf->avail_out;
|
114 |
|
|
def->next_in = raw;
|
115 |
|
|
if (inf->avail_out != 0)
|
116 |
|
|
flush = Z_FINISH;
|
117 |
|
|
ret = deflate(def, flush);
|
118 |
|
|
assert(ret != Z_STREAM_ERROR);
|
119 |
|
|
} while (ret != Z_STREAM_END && def->avail_out != 0);
|
120 |
|
|
return ret;
|
121 |
|
|
}
|
122 |
|
|
|
123 |
|
|
#define EXCESS 256 /* empirically determined stream overage */
|
124 |
|
|
#define MARGIN 8 /* amount to back off for completion */
|
125 |
|
|
|
126 |
|
|
/* compress from stdin to fixed-size block on stdout */
|
127 |
|
|
int main(int argc, char **argv)
|
128 |
|
|
{
|
129 |
|
|
int ret; /* return code */
|
130 |
|
|
unsigned size; /* requested fixed output block size */
|
131 |
|
|
unsigned have; /* bytes written by deflate() call */
|
132 |
|
|
unsigned char *blk; /* intermediate and final stream */
|
133 |
|
|
unsigned char *tmp; /* close to desired size stream */
|
134 |
|
|
z_stream def, inf; /* zlib deflate and inflate states */
|
135 |
|
|
|
136 |
|
|
/* get requested output size */
|
137 |
|
|
if (argc != 2)
|
138 |
|
|
quit("need one argument: size of output block");
|
139 |
|
|
ret = strtol(argv[1], argv + 1, 10);
|
140 |
|
|
if (argv[1][0] != 0)
|
141 |
|
|
quit("argument must be a number");
|
142 |
|
|
if (ret < 8) /* 8 is minimum zlib stream size */
|
143 |
|
|
quit("need positive size of 8 or greater");
|
144 |
|
|
size = (unsigned)ret;
|
145 |
|
|
|
146 |
|
|
/* allocate memory for buffers and compression engine */
|
147 |
|
|
blk = malloc(size + EXCESS);
|
148 |
|
|
def.zalloc = Z_NULL;
|
149 |
|
|
def.zfree = Z_NULL;
|
150 |
|
|
def.opaque = Z_NULL;
|
151 |
|
|
ret = deflateInit(&def, Z_DEFAULT_COMPRESSION);
|
152 |
|
|
if (ret != Z_OK || blk == NULL)
|
153 |
|
|
quit("out of memory");
|
154 |
|
|
|
155 |
|
|
/* compress from stdin until output full, or no more input */
|
156 |
|
|
def.avail_out = size + EXCESS;
|
157 |
|
|
def.next_out = blk;
|
158 |
|
|
ret = partcompress(stdin, &def);
|
159 |
|
|
if (ret == Z_ERRNO)
|
160 |
|
|
quit("error reading input");
|
161 |
|
|
|
162 |
|
|
/* if it all fit, then size was undersubscribed -- done! */
|
163 |
|
|
if (ret == Z_STREAM_END && def.avail_out >= EXCESS) {
|
164 |
|
|
/* write block to stdout */
|
165 |
|
|
have = size + EXCESS - def.avail_out;
|
166 |
|
|
if (fwrite(blk, 1, have, stdout) != have || ferror(stdout))
|
167 |
|
|
quit("error writing output");
|
168 |
|
|
|
169 |
|
|
/* clean up and print results to stderr */
|
170 |
|
|
ret = deflateEnd(&def);
|
171 |
|
|
assert(ret != Z_STREAM_ERROR);
|
172 |
|
|
free(blk);
|
173 |
|
|
fprintf(stderr,
|
174 |
|
|
"%u bytes unused out of %u requested (all input)\n",
|
175 |
|
|
size - have, size);
|
176 |
|
|
return 0;
|
177 |
|
|
}
|
178 |
|
|
|
179 |
|
|
/* it didn't all fit -- set up for recompression */
|
180 |
|
|
inf.zalloc = Z_NULL;
|
181 |
|
|
inf.zfree = Z_NULL;
|
182 |
|
|
inf.opaque = Z_NULL;
|
183 |
|
|
inf.avail_in = 0;
|
184 |
|
|
inf.next_in = Z_NULL;
|
185 |
|
|
ret = inflateInit(&inf);
|
186 |
|
|
tmp = malloc(size + EXCESS);
|
187 |
|
|
if (ret != Z_OK || tmp == NULL)
|
188 |
|
|
quit("out of memory");
|
189 |
|
|
ret = deflateReset(&def);
|
190 |
|
|
assert(ret != Z_STREAM_ERROR);
|
191 |
|
|
|
192 |
|
|
/* do first recompression close to the right amount */
|
193 |
|
|
inf.avail_in = size + EXCESS;
|
194 |
|
|
inf.next_in = blk;
|
195 |
|
|
def.avail_out = size + EXCESS;
|
196 |
|
|
def.next_out = tmp;
|
197 |
|
|
ret = recompress(&inf, &def);
|
198 |
|
|
if (ret == Z_MEM_ERROR)
|
199 |
|
|
quit("out of memory");
|
200 |
|
|
|
201 |
|
|
/* set up for next reocmpression */
|
202 |
|
|
ret = inflateReset(&inf);
|
203 |
|
|
assert(ret != Z_STREAM_ERROR);
|
204 |
|
|
ret = deflateReset(&def);
|
205 |
|
|
assert(ret != Z_STREAM_ERROR);
|
206 |
|
|
|
207 |
|
|
/* do second and final recompression (third compression) */
|
208 |
|
|
inf.avail_in = size - MARGIN; /* assure stream will complete */
|
209 |
|
|
inf.next_in = tmp;
|
210 |
|
|
def.avail_out = size;
|
211 |
|
|
def.next_out = blk;
|
212 |
|
|
ret = recompress(&inf, &def);
|
213 |
|
|
if (ret == Z_MEM_ERROR)
|
214 |
|
|
quit("out of memory");
|
215 |
|
|
assert(ret == Z_STREAM_END); /* otherwise MARGIN too small */
|
216 |
|
|
|
217 |
|
|
/* done -- write block to stdout */
|
218 |
|
|
have = size - def.avail_out;
|
219 |
|
|
if (fwrite(blk, 1, have, stdout) != have || ferror(stdout))
|
220 |
|
|
quit("error writing output");
|
221 |
|
|
|
222 |
|
|
/* clean up and print results to stderr */
|
223 |
|
|
free(tmp);
|
224 |
|
|
ret = inflateEnd(&inf);
|
225 |
|
|
assert(ret != Z_STREAM_ERROR);
|
226 |
|
|
ret = deflateEnd(&def);
|
227 |
|
|
assert(ret != Z_STREAM_ERROR);
|
228 |
|
|
free(blk);
|
229 |
|
|
fprintf(stderr,
|
230 |
|
|
"%u bytes unused out of %u requested (%lu input)\n",
|
231 |
|
|
size - have, size, def.total_in);
|
232 |
|
|
return 0;
|
233 |
|
|
}
|