1 
2 
kdv 
/*

2 


* jrevdct.c

3 


*

4 


* Copyright (C) 1991, 1992, Thomas G. Lane.

5 


* This file is part of the Independent JPEG Group's software.

6 


* For conditions of distribution and use, see the accompanying README file.

7 


*

8 


* This file contains the basic inverseDCT transformation subroutine.

9 


*

10 


* This implementation is based on an algorithm described in

11 


* C. Loeffler, A. Ligtenberg and G. Moschytz, "Practical Fast 1D DCT

12 


* Algorithms with 11 Multiplications", Proc. Int'l. Conf. on Acoustics,

13 


* Speech, and Signal Processing 1989 (ICASSP '89), pp. 988991.

14 


* The primary algorithm described there uses 11 multiplies and 29 adds.

15 


* We use their alternate method with 12 multiplies and 32 adds.

16 


* The advantage of this method is that no data path contains more than one

17 


* multiplication; this allows a very simple and accurate implementation in

18 


* scaled fixedpoint arithmetic, with a minimal number of shifts.

19 


*/

20 



21 


#include "dct.h"

22 


#include <stdio.h>

23 



24 


/* We assume that right shift corresponds to signed division by 2 with

25 


* rounding towards minus infinity. This is correct for typical "arithmetic

26 


* shift" instructions that shift in copies of the sign bit. But some

27 


* C compilers implement >> with an unsigned shift. For these machines you

28 


* must define RIGHT_SHIFT_IS_UNSIGNED.

29 


* RIGHT_SHIFT provides a proper signed right shift of an INT32 quantity.

30 


* It is only applied with constant shift counts. SHIFT_TEMPS must be

31 


* included in the variables of any routine using RIGHT_SHIFT.

32 


*/

33 



34 


#ifdef RIGHT_SHIFT_IS_UNSIGNED

35 


#define SHIFT_TEMPS INT32 shift_temp;

36 


#define RIGHT_SHIFT(x,shft) \

37 


((shift_temp = (x)) < 0 ? \

38 


(shift_temp >> (shft))  ((~((INT32) 0)) << (32(shft))) : \

39 


(shift_temp >> (shft)))

40 


#else

41 


#define SHIFT_TEMPS

42 


#define RIGHT_SHIFT(x,shft) ((x) >> (shft))

43 


#endif

44 



45 



46 


/*

47 


* This routine is specialized to the case DCTSIZE = 8.

48 


*/

49 



50 


#if DCTSIZE != 8

51 


Sorry, this code only copes with 8x8 DCTs. /* deliberate syntax err */

52 


#endif

53 



54 



55 


/*

56 


* A 2D IDCT can be done by 1D IDCT on each row followed by 1D IDCT

57 


* on each column. Direct algorithms are also available, but they are

58 


* much more complex and seem not to be any faster when reduced to code.

59 


*

60 


* The poop on this scaling stuff is as follows:

61 


*

62 


* Each 1D IDCT step produces outputs which are a factor of sqrt(N)

63 


* larger than the true IDCT outputs. The final outputs are therefore

64 


* a factor of N larger than desired; since N=8 this can be cured by

65 


* a simple right shift at the end of the algorithm. The advantage of

66 


* this arrangement is that we save two multiplications per 1D IDCT,

67 


* because the y0 and y4 inputs need not be divided by sqrt(N).

68 


*

69 


* We have to do addition and subtraction of the integer inputs, which

70 


* is no problem, and multiplication by fractional constants, which is

71 


* a problem to do in integer arithmetic. We multiply all the constants

72 


* by CONST_SCALE and convert them to integer constants (thus retaining

73 


* CONST_BITS bits of precision in the constants). After doing a

74 


* multiplication we have to divide the product by CONST_SCALE, with proper

75 


* rounding, to produce the correct output. This division can be done

76 


* cheaply as a right shift of CONST_BITS bits. We postpone shifting

77 


* as long as possible so that partial sums can be added together with

78 


* full fractional precision.

79 


*

80 


* The outputs of the first pass are scaled up by PASS1_BITS bits so that

81 


* they are represented to betterthanintegral precision. These outputs

82 


* require BITS_IN_JSAMPLE + PASS1_BITS + 3 bits; this fits in a 16bit word

83 


* with the recommended scaling. (To scale up 12bit sample data further, an

84 


* intermediate INT32 array would be needed.)

85 


*

86 


* To avoid overflow of the 32bit intermediate results in pass 2, we must

87 


* have BITS_IN_JSAMPLE + CONST_BITS + PASS1_BITS <= 26. Error analysis

88 


* shows that the values given below are the most effective.

89 


*/

90 



91 


#ifdef EIGHT_BIT_SAMPLES

92 


#define CONST_BITS 13

93 


#define PASS1_BITS 2

94 


#else

95 


#define CONST_BITS 13

96 


#define PASS1_BITS 1 /* lose a little precision to avoid overflow */

97 


#endif

98 



99 


#define ONE ((INT32) 1)

100 



101 


#define CONST_SCALE (ONE << CONST_BITS)

102 



103 


/* Convert a positive real constant to an integer scaled by CONST_SCALE. */

104 



105 


#define FIX(x) ((INT32) ((x) * CONST_SCALE + 0.5))

106 



107 


/* Some C compilers fail to reduce "FIX(constant)" at compile time, thus

108 


* causing a lot of useless floatingpoint operations at run time.

109 


* To get around this we use the following precalculated constants.

110 


* If you change CONST_BITS you may want to add appropriate values.

111 


* (With a reasonable C compiler, you can just rely on the FIX() macro...)

112 


*/

113 



114 


#if CONST_BITS == 13

115 


#define FIX_0_298631336 ((INT32) 2446) /* FIX(0.298631336) */

116 


#define FIX_0_390180644 ((INT32) 3196) /* FIX(0.390180644) */

117 


#define FIX_0_541196100 ((INT32) 4433) /* FIX(0.541196100) */

118 


#define FIX_0_765366865 ((INT32) 6270) /* FIX(0.765366865) */

119 


#define FIX_0_899976223 ((INT32) 7373) /* FIX(0.899976223) */

120 


#define FIX_1_175875602 ((INT32) 9633) /* FIX(1.175875602) */

121 


#define FIX_1_501321110 ((INT32) 12299) /* FIX(1.501321110) */

122 


#define FIX_1_847759065 ((INT32) 15137) /* FIX(1.847759065) */

123 


#define FIX_1_961570560 ((INT32) 16069) /* FIX(1.961570560) */

124 


#define FIX_2_053119869 ((INT32) 16819) /* FIX(2.053119869) */

125 


#define FIX_2_562915447 ((INT32) 20995) /* FIX(2.562915447) */

126 


#define FIX_3_072711026 ((INT32) 25172) /* FIX(3.072711026) */

127 


#else

128 


#define FIX_0_298631336 FIX(0.298631336)

129 


#define FIX_0_390180644 FIX(0.390180644)

130 


#define FIX_0_541196100 FIX(0.541196100)

131 


#define FIX_0_765366865 FIX(0.765366865)

132 


#define FIX_0_899976223 FIX(0.899976223)

133 


#define FIX_1_175875602 FIX(1.175875602)

134 


#define FIX_1_501321110 FIX(1.501321110)

135 


#define FIX_1_847759065 FIX(1.847759065)

136 


#define FIX_1_961570560 FIX(1.961570560)

137 


#define FIX_2_053119869 FIX(2.053119869)

138 


#define FIX_2_562915447 FIX(2.562915447)

139 


#define FIX_3_072711026 FIX(3.072711026)

140 


#endif

141 



142 



143 


/* Descale and correctly round an INT32 value that's scaled by N bits.

144 


* We assume RIGHT_SHIFT rounds towards minus infinity, so adding

145 


* the fudge factor is correct for either sign of X.

146 


*/

147 



148 


#define DESCALE(x,n) RIGHT_SHIFT((x) + (ONE << ((n)1)), n)

149 



150 


/* Multiply an INT32 variable by an INT32 constant to yield an INT32 result.

151 


* For 8bit samples with the recommended scaling, all the variable

152 


* and constant values involved are no more than 16 bits wide, so a

153 


* 16x16>32 bit multiply can be used instead of a full 32x32 multiply;

154 


* this provides a useful speedup on many machines.

155 


* There is no way to specify a 16x16>32 multiply in portable C, but

156 


* some C compilers will do the right thing if you provide the correct

157 


* combination of casts.

158 


* NB: for 12bit samples, a full 32bit multiplication will be needed.

159 


*/

160 



161 


#ifdef EIGHT_BIT_SAMPLES

162 


#ifdef SHORTxSHORT_32 /* may work if 'int' is 32 bits */

163 


#define MULTIPLY(var,const) (((INT16) (var)) * ((INT16) (const)))

164 


#endif

165 


#ifdef SHORTxLCONST_32 /* known to work with Microsoft C 6.0 */

166 


#define MULTIPLY(var,const) (((INT16) (var)) * ((INT32) (const)))

167 


#endif

168 


#endif

169 



170 


#ifndef MULTIPLY /* default definition */

171 


#define MULTIPLY(var,const) ((var) * (const))

172 


#endif

173 



174 



175 


/*

176 


* Perform the inverse DCT on one block of coefficients.

177 


*/

178 



179 


void

180 


j_rev_dct (DCTBLOCK data)

181 


{

182 


register DCTELEM *dataptr;

183 


int i, j;

184 


FILE *idctdat;

185 


SHIFT_TEMPS

186 



187 



188 


dataptr = data;

189 


idctdat = fopen("idctin", "w");

190 


for (i = 0; i <= 63; i++) {

191 


j = ((INT32)dataptr[i]) & 4095;

192 


fprintf(idctdat, "%x\n", j);

193 


}

194 


fclose(idctdat);

195 


system("./idctverilog > idctout");

196 


idctdat = fopen("idctout", "r");

197 


for (i = 0; i <= 63; i++) {

198 


fscanf(idctdat, "%d", &j);

199 


dataptr[i] = j;

200 


}

201 


fclose(idctdat);

202 


}
