URL https://opencores.org/ocsvn/mpeg2fpga/mpeg2fpga/trunk

Subversion Repositories mpeg2fpga

[/] [mpeg2fpga/] [trunk/] [tools/] [ieee1180/] [ieee1180/] [jrevdct.c] - Blame information for rev 2

Details | Compare with Previous | View Log


/*
 * jrevdct.c
 *
 * Copyright (C) 1991, 1992, Thomas G. Lane.
 * This file is part of the Independent JPEG Group's software.
 * For conditions of distribution and use, see the accompanying README file.
 *
 * This file contains the basic inverse-DCT transformation subroutine.
 *
 * This implementation is based on an algorithm described in
 *   C. Loeffler, A. Ligtenberg and G. Moschytz, "Practical Fast 1-D DCT
 *   Algorithms with 11 Multiplications", Proc. Int'l. Conf. on Acoustics,
 *   Speech, and Signal Processing 1989 (ICASSP '89), pp. 988-991.
 * The primary algorithm described there uses 11 multiplies and 29 adds.
 * We use their alternate method with 12 multiplies and 32 adds.
 * The advantage of this method is that no data path contains more than one
 * multiplication; this allows a very simple and accurate implementation in
 * scaled fixed-point arithmetic, with a minimal number of shifts.
 */
 
#include "dct.h"
#include <stdio.h>
 
/* We assume that right shift corresponds to signed division by 2 with
 * rounding towards minus infinity.  This is correct for typical "arithmetic
 * shift" instructions that shift in copies of the sign bit.  But some
 * C compilers implement >> with an unsigned shift.  For these machines you
 * must define RIGHT_SHIFT_IS_UNSIGNED.
 * RIGHT_SHIFT provides a proper signed right shift of an INT32 quantity.
 * It is only applied with constant shift counts.  SHIFT_TEMPS must be
 * included in the variables of any routine using RIGHT_SHIFT.
 */
 
#ifdef RIGHT_SHIFT_IS_UNSIGNED
#define SHIFT_TEMPS     INT32 shift_temp;
#define RIGHT_SHIFT(x,shft)  \
        ((shift_temp = (x)) < 0 ? \
         (shift_temp >> (shft)) | ((~((INT32) 0)) << (32-(shft))) : \
         (shift_temp >> (shft)))
#else
#define SHIFT_TEMPS
#define RIGHT_SHIFT(x,shft)     ((x) >> (shft))
#endif
 
 
/*
 * This routine is specialized to the case DCTSIZE = 8.
 */
 
#if DCTSIZE != 8
  Sorry, this code only copes with 8x8 DCTs. /* deliberate syntax err */
#endif
 
 
/*
 * A 2-D IDCT can be done by 1-D IDCT on each row followed by 1-D IDCT
 * on each column.  Direct algorithms are also available, but they are
 * much more complex and seem not to be any faster when reduced to code.
 *
 * The poop on this scaling stuff is as follows:
 *
 * Each 1-D IDCT step produces outputs which are a factor of sqrt(N)
 * larger than the true IDCT outputs.  The final outputs are therefore
 * a factor of N larger than desired; since N=8 this can be cured by
 * a simple right shift at the end of the algorithm.  The advantage of
 * this arrangement is that we save two multiplications per 1-D IDCT,
 * because the y0 and y4 inputs need not be divided by sqrt(N).
 *
 * We have to do addition and subtraction of the integer inputs, which
 * is no problem, and multiplication by fractional constants, which is
 * a problem to do in integer arithmetic.  We multiply all the constants
 * by CONST_SCALE and convert them to integer constants (thus retaining
 * CONST_BITS bits of precision in the constants).  After doing a
 * multiplication we have to divide the product by CONST_SCALE, with proper
 * rounding, to produce the correct output.  This division can be done
 * cheaply as a right shift of CONST_BITS bits.  We postpone shifting
 * as long as possible so that partial sums can be added together with
 * full fractional precision.
 *
 * The outputs of the first pass are scaled up by PASS1_BITS bits so that
 * they are represented to better-than-integral precision.  These outputs
 * require BITS_IN_JSAMPLE + PASS1_BITS + 3 bits; this fits in a 16-bit word
 * with the recommended scaling.  (To scale up 12-bit sample data further, an
 * intermediate INT32 array would be needed.)
 *
 * To avoid overflow of the 32-bit intermediate results in pass 2, we must
 * have BITS_IN_JSAMPLE + CONST_BITS + PASS1_BITS <= 26.  Error analysis
 * shows that the values given below are the most effective.
 */
 
#ifdef EIGHT_BIT_SAMPLES
#define CONST_BITS  13
#define PASS1_BITS  2
#else
#define CONST_BITS  13
#define PASS1_BITS  1           /* lose a little precision to avoid overflow */
#endif
 
#define ONE     ((INT32) 1)
 
#define CONST_SCALE (ONE << CONST_BITS)
 
/* Convert a positive real constant to an integer scaled by CONST_SCALE. */
 
#define FIX(x)  ((INT32) ((x) * CONST_SCALE + 0.5))
 
/* Some C compilers fail to reduce "FIX(constant)" at compile time, thus
 * causing a lot of useless floating-point operations at run time.
 * To get around this we use the following pre-calculated constants.
 * If you change CONST_BITS you may want to add appropriate values.
 * (With a reasonable C compiler, you can just rely on the FIX() macro...)
 */
 
#if CONST_BITS == 13
#define FIX_0_298631336  ((INT32)  2446)        /* FIX(0.298631336) */
#define FIX_0_390180644  ((INT32)  3196)        /* FIX(0.390180644) */
#define FIX_0_541196100  ((INT32)  4433)        /* FIX(0.541196100) */
#define FIX_0_765366865  ((INT32)  6270)        /* FIX(0.765366865) */
#define FIX_0_899976223  ((INT32)  7373)        /* FIX(0.899976223) */
#define FIX_1_175875602  ((INT32)  9633)        /* FIX(1.175875602) */
#define FIX_1_501321110  ((INT32)  12299)       /* FIX(1.501321110) */
#define FIX_1_847759065  ((INT32)  15137)       /* FIX(1.847759065) */
#define FIX_1_961570560  ((INT32)  16069)       /* FIX(1.961570560) */
#define FIX_2_053119869  ((INT32)  16819)       /* FIX(2.053119869) */
#define FIX_2_562915447  ((INT32)  20995)       /* FIX(2.562915447) */
#define FIX_3_072711026  ((INT32)  25172)       /* FIX(3.072711026) */
#else
#define FIX_0_298631336  FIX(0.298631336)
#define FIX_0_390180644  FIX(0.390180644)
#define FIX_0_541196100  FIX(0.541196100)
#define FIX_0_765366865  FIX(0.765366865)
#define FIX_0_899976223  FIX(0.899976223)
#define FIX_1_175875602  FIX(1.175875602)
#define FIX_1_501321110  FIX(1.501321110)
#define FIX_1_847759065  FIX(1.847759065)
#define FIX_1_961570560  FIX(1.961570560)
#define FIX_2_053119869  FIX(2.053119869)
#define FIX_2_562915447  FIX(2.562915447)
#define FIX_3_072711026  FIX(3.072711026)
#endif
 
 
/* Descale and correctly round an INT32 value that's scaled by N bits.
 * We assume RIGHT_SHIFT rounds towards minus infinity, so adding
 * the fudge factor is correct for either sign of X.
 */
 
#define DESCALE(x,n)  RIGHT_SHIFT((x) + (ONE << ((n)-1)), n)
 
/* Multiply an INT32 variable by an INT32 constant to yield an INT32 result.
 * For 8-bit samples with the recommended scaling, all the variable
 * and constant values involved are no more than 16 bits wide, so a
 * 16x16->32 bit multiply can be used instead of a full 32x32 multiply;
 * this provides a useful speedup on many machines.
 * There is no way to specify a 16x16->32 multiply in portable C, but
 * some C compilers will do the right thing if you provide the correct
 * combination of casts.
 * NB: for 12-bit samples, a full 32-bit multiplication will be needed.
 */
 
#ifdef EIGHT_BIT_SAMPLES
#ifdef SHORTxSHORT_32           /* may work if 'int' is 32 bits */
#define MULTIPLY(var,const)  (((INT16) (var)) * ((INT16) (const)))
#endif
#ifdef SHORTxLCONST_32          /* known to work with Microsoft C 6.0 */
#define MULTIPLY(var,const)  (((INT16) (var)) * ((INT32) (const)))
#endif
#endif
 
#ifndef MULTIPLY                /* default definition */
#define MULTIPLY(var,const)  ((var) * (const))
#endif
 
 
/*
 * Perform the inverse DCT on one block of coefficients.
 */
 
void
j_rev_dct (DCTBLOCK data)
{
  register DCTELEM *dataptr;
  int i, j;
  FILE *idctdat;
  SHIFT_TEMPS
 
 
  dataptr = data;
  idctdat = fopen("idct-in", "w");
  for (i = 0; i <= 63; i++) {
    j = ((INT32)dataptr[i]) & 4095;
    fprintf(idctdat, "%x\n", j);
  }
  fclose(idctdat);
  system("./idct-verilog > idct-out");
  idctdat = fopen("idct-out", "r");
  for (i = 0; i <= 63; i++) {
    fscanf(idctdat, "%d", &j);
    dataptr[i] = j;
  }
  fclose(idctdat);
}

Browse

Tools

Subversion Repositories mpeg2fpga

[/] [mpeg2fpga/] [trunk/] [tools/] [ieee1180/] [ieee1180/] [jrevdct.c] - Blame information for rev 2

Line No.	Rev	Author	Line
1	2	kdv	`/*`
2			`* jrevdct.c`
3			`*`
4			`* Copyright (C) 1991, 1992, Thomas G. Lane.`
5			`* This file is part of the Independent JPEG Group's software.`
6			`* For conditions of distribution and use, see the accompanying README file.`
7			`*`
8			`* This file contains the basic inverse-DCT transformation subroutine.`
9			`*`
10			`* This implementation is based on an algorithm described in`
11			`* C. Loeffler, A. Ligtenberg and G. Moschytz, "Practical Fast 1-D DCT`
12			`* Algorithms with 11 Multiplications", Proc. Int'l. Conf. on Acoustics,`
13			`* Speech, and Signal Processing 1989 (ICASSP '89), pp. 988-991.`
14			`* The primary algorithm described there uses 11 multiplies and 29 adds.`
15			`* We use their alternate method with 12 multiplies and 32 adds.`
16			`* The advantage of this method is that no data path contains more than one`
17			`* multiplication; this allows a very simple and accurate implementation in`
18			`* scaled fixed-point arithmetic, with a minimal number of shifts.`
19			`*/`
20
21			`#include "dct.h"`
22			`#include <stdio.h>`
23
24			`/* We assume that right shift corresponds to signed division by 2 with`
25			`* rounding towards minus infinity. This is correct for typical "arithmetic`
26			`* shift" instructions that shift in copies of the sign bit. But some`
27			`* C compilers implement >> with an unsigned shift. For these machines you`
28			`* must define RIGHT_SHIFT_IS_UNSIGNED.`
29			`* RIGHT_SHIFT provides a proper signed right shift of an INT32 quantity.`
30			`* It is only applied with constant shift counts. SHIFT_TEMPS must be`
31			`* included in the variables of any routine using RIGHT_SHIFT.`
32			`*/`
33
34			`#ifdef RIGHT_SHIFT_IS_UNSIGNED`
35			`#define SHIFT_TEMPS INT32 shift_temp;`
36			`#define RIGHT_SHIFT(x,shft) \`
37			`((shift_temp = (x)) < 0 ? \`
38			`(shift_temp >> (shft)) \| ((~((INT32) 0)) << (32-(shft))) : \`
39			`(shift_temp >> (shft)))`
40			`#else`
41			`#define SHIFT_TEMPS`
42			`#define RIGHT_SHIFT(x,shft) ((x) >> (shft))`
43			`#endif`
44
45
46			`/*`
47			`* This routine is specialized to the case DCTSIZE = 8.`
48			`*/`
49
50			`#if DCTSIZE != 8`
51			`Sorry, this code only copes with 8x8 DCTs. /* deliberate syntax err */`
52			`#endif`
53
54
55			`/*`
56			`* A 2-D IDCT can be done by 1-D IDCT on each row followed by 1-D IDCT`
57			`* on each column. Direct algorithms are also available, but they are`
58			`* much more complex and seem not to be any faster when reduced to code.`
59			`*`
60			`* The poop on this scaling stuff is as follows:`
61			`*`
62			`* Each 1-D IDCT step produces outputs which are a factor of sqrt(N)`
63			`* larger than the true IDCT outputs. The final outputs are therefore`
64			`* a factor of N larger than desired; since N=8 this can be cured by`
65			`* a simple right shift at the end of the algorithm. The advantage of`
66			`* this arrangement is that we save two multiplications per 1-D IDCT,`
67			`* because the y0 and y4 inputs need not be divided by sqrt(N).`
68			`*`
69			`* We have to do addition and subtraction of the integer inputs, which`
70			`* is no problem, and multiplication by fractional constants, which is`
71			`* a problem to do in integer arithmetic. We multiply all the constants`
72			`* by CONST_SCALE and convert them to integer constants (thus retaining`
73			`* CONST_BITS bits of precision in the constants). After doing a`
74			`* multiplication we have to divide the product by CONST_SCALE, with proper`
75			`* rounding, to produce the correct output. This division can be done`
76			`* cheaply as a right shift of CONST_BITS bits. We postpone shifting`
77			`* as long as possible so that partial sums can be added together with`
78			`* full fractional precision.`
79			`*`
80			`* The outputs of the first pass are scaled up by PASS1_BITS bits so that`
81			`* they are represented to better-than-integral precision. These outputs`
82			`* require BITS_IN_JSAMPLE + PASS1_BITS + 3 bits; this fits in a 16-bit word`
83			`* with the recommended scaling. (To scale up 12-bit sample data further, an`
84			`* intermediate INT32 array would be needed.)`
85			`*`
86			`* To avoid overflow of the 32-bit intermediate results in pass 2, we must`
87			`* have BITS_IN_JSAMPLE + CONST_BITS + PASS1_BITS <= 26. Error analysis`
88			`* shows that the values given below are the most effective.`
89			`*/`
90
91			`#ifdef EIGHT_BIT_SAMPLES`
92			`#define CONST_BITS 13`
93			`#define PASS1_BITS 2`
94			`#else`
95			`#define CONST_BITS 13`
96			`#define PASS1_BITS 1 /* lose a little precision to avoid overflow */`
97			`#endif`
98
99			`#define ONE ((INT32) 1)`
100
101			`#define CONST_SCALE (ONE << CONST_BITS)`
102
103			`/* Convert a positive real constant to an integer scaled by CONST_SCALE. */`
104
105			`#define FIX(x) ((INT32) ((x) * CONST_SCALE + 0.5))`
106
107			`/* Some C compilers fail to reduce "FIX(constant)" at compile time, thus`
108			`* causing a lot of useless floating-point operations at run time.`
109			`* To get around this we use the following pre-calculated constants.`
110			`* If you change CONST_BITS you may want to add appropriate values.`
111			`* (With a reasonable C compiler, you can just rely on the FIX() macro...)`
112			`*/`
113
114			`#if CONST_BITS == 13`
115			`#define FIX_0_298631336 ((INT32) 2446) /* FIX(0.298631336) */`
116			`#define FIX_0_390180644 ((INT32) 3196) /* FIX(0.390180644) */`
117			`#define FIX_0_541196100 ((INT32) 4433) /* FIX(0.541196100) */`
118			`#define FIX_0_765366865 ((INT32) 6270) /* FIX(0.765366865) */`
119			`#define FIX_0_899976223 ((INT32) 7373) /* FIX(0.899976223) */`
120			`#define FIX_1_175875602 ((INT32) 9633) /* FIX(1.175875602) */`
121			`#define FIX_1_501321110 ((INT32) 12299) /* FIX(1.501321110) */`
122			`#define FIX_1_847759065 ((INT32) 15137) /* FIX(1.847759065) */`
123			`#define FIX_1_961570560 ((INT32) 16069) /* FIX(1.961570560) */`
124			`#define FIX_2_053119869 ((INT32) 16819) /* FIX(2.053119869) */`
125			`#define FIX_2_562915447 ((INT32) 20995) /* FIX(2.562915447) */`
126			`#define FIX_3_072711026 ((INT32) 25172) /* FIX(3.072711026) */`
127			`#else`
128			`#define FIX_0_298631336 FIX(0.298631336)`
129			`#define FIX_0_390180644 FIX(0.390180644)`
130			`#define FIX_0_541196100 FIX(0.541196100)`
131			`#define FIX_0_765366865 FIX(0.765366865)`
132			`#define FIX_0_899976223 FIX(0.899976223)`
133			`#define FIX_1_175875602 FIX(1.175875602)`
134			`#define FIX_1_501321110 FIX(1.501321110)`
135			`#define FIX_1_847759065 FIX(1.847759065)`
136			`#define FIX_1_961570560 FIX(1.961570560)`
137			`#define FIX_2_053119869 FIX(2.053119869)`
138			`#define FIX_2_562915447 FIX(2.562915447)`
139			`#define FIX_3_072711026 FIX(3.072711026)`
140			`#endif`
141
142
143			`/* Descale and correctly round an INT32 value that's scaled by N bits.`
144			`* We assume RIGHT_SHIFT rounds towards minus infinity, so adding`
145			`* the fudge factor is correct for either sign of X.`
146			`*/`
147
148			`#define DESCALE(x,n) RIGHT_SHIFT((x) + (ONE << ((n)-1)), n)`
149
150			`/* Multiply an INT32 variable by an INT32 constant to yield an INT32 result.`
151			`* For 8-bit samples with the recommended scaling, all the variable`
152			`* and constant values involved are no more than 16 bits wide, so a`
153			`* 16x16->32 bit multiply can be used instead of a full 32x32 multiply;`
154			`* this provides a useful speedup on many machines.`
155			`* There is no way to specify a 16x16->32 multiply in portable C, but`
156			`* some C compilers will do the right thing if you provide the correct`
157			`* combination of casts.`
158			`* NB: for 12-bit samples, a full 32-bit multiplication will be needed.`
159			`*/`
160
161			`#ifdef EIGHT_BIT_SAMPLES`
162			`#ifdef SHORTxSHORT_32 /* may work if 'int' is 32 bits */`
163			`#define MULTIPLY(var,const) (((INT16) (var)) * ((INT16) (const)))`
164			`#endif`
165			`#ifdef SHORTxLCONST_32 /* known to work with Microsoft C 6.0 */`
166			`#define MULTIPLY(var,const) (((INT16) (var)) * ((INT32) (const)))`
167			`#endif`
168			`#endif`
169
170			`#ifndef MULTIPLY /* default definition */`
171			`#define MULTIPLY(var,const) ((var) * (const))`
172			`#endif`
173
174
175			`/*`
176			`* Perform the inverse DCT on one block of coefficients.`
177			`*/`
178
179			`void`
180			`j_rev_dct (DCTBLOCK data)`
181			`{`
182			`register DCTELEM *dataptr;`
183			`int i, j;`
184			`FILE *idctdat;`
185			`SHIFT_TEMPS`
186
187
188			`dataptr = data;`
189			`idctdat = fopen("idct-in", "w");`
190			`for (i = 0; i <= 63; i++) {`
191			`j = ((INT32)dataptr[i]) & 4095;`
192			`fprintf(idctdat, "%x\n", j);`
193			`}`
194			`fclose(idctdat);`
195			`system("./idct-verilog > idct-out");`
196			`idctdat = fopen("idct-out", "r");`
197			`for (i = 0; i <= 63; i++) {`
198			`fscanf(idctdat, "%d", &j);`
199			`dataptr[i] = j;`
200			`}`
201			`fclose(idctdat);`
202			`}`