1 |
62 |
marcus.erl |
/* gf128mul.h - GF(2^128) multiplication functions
|
2 |
|
|
*
|
3 |
|
|
* Copyright (c) 2003, Dr Brian Gladman, Worcester, UK.
|
4 |
|
|
* Copyright (c) 2006 Rik Snel <rsnel@cube.dyndns.org>
|
5 |
|
|
*
|
6 |
|
|
* Based on Dr Brian Gladman's (GPL'd) work published at
|
7 |
|
|
* http://fp.gladman.plus.com/cryptography_technology/index.htm
|
8 |
|
|
* See the original copyright notice below.
|
9 |
|
|
*
|
10 |
|
|
* This program is free software; you can redistribute it and/or modify it
|
11 |
|
|
* under the terms of the GNU General Public License as published by the Free
|
12 |
|
|
* Software Foundation; either version 2 of the License, or (at your option)
|
13 |
|
|
* any later version.
|
14 |
|
|
*/
|
15 |
|
|
/*
|
16 |
|
|
---------------------------------------------------------------------------
|
17 |
|
|
Copyright (c) 2003, Dr Brian Gladman, Worcester, UK. All rights reserved.
|
18 |
|
|
|
19 |
|
|
LICENSE TERMS
|
20 |
|
|
|
21 |
|
|
The free distribution and use of this software in both source and binary
|
22 |
|
|
form is allowed (with or without changes) provided that:
|
23 |
|
|
|
24 |
|
|
1. distributions of this source code include the above copyright
|
25 |
|
|
notice, this list of conditions and the following disclaimer;
|
26 |
|
|
|
27 |
|
|
2. distributions in binary form include the above copyright
|
28 |
|
|
notice, this list of conditions and the following disclaimer
|
29 |
|
|
in the documentation and/or other associated materials;
|
30 |
|
|
|
31 |
|
|
3. the copyright holder's name is not used to endorse products
|
32 |
|
|
built using this software without specific written permission.
|
33 |
|
|
|
34 |
|
|
ALTERNATIVELY, provided that this notice is retained in full, this product
|
35 |
|
|
may be distributed under the terms of the GNU General Public License (GPL),
|
36 |
|
|
in which case the provisions of the GPL apply INSTEAD OF those given above.
|
37 |
|
|
|
38 |
|
|
DISCLAIMER
|
39 |
|
|
|
40 |
|
|
This software is provided 'as is' with no explicit or implied warranties
|
41 |
|
|
in respect of its properties, including, but not limited to, correctness
|
42 |
|
|
and/or fitness for purpose.
|
43 |
|
|
---------------------------------------------------------------------------
|
44 |
|
|
Issue Date: 31/01/2006
|
45 |
|
|
|
46 |
|
|
An implementation of field multiplication in Galois Field GF(128)
|
47 |
|
|
*/
|
48 |
|
|
|
49 |
|
|
#ifndef _CRYPTO_GF128MUL_H
|
50 |
|
|
#define _CRYPTO_GF128MUL_H
|
51 |
|
|
|
52 |
|
|
#include <crypto/b128ops.h>
|
53 |
|
|
#include <linux/slab.h>
|
54 |
|
|
|
55 |
|
|
/* Comment by Rik:
|
56 |
|
|
*
|
57 |
|
|
* For some background on GF(2^128) see for example: http://-
|
58 |
|
|
* csrc.nist.gov/CryptoToolkit/modes/proposedmodes/gcm/gcm-revised-spec.pdf
|
59 |
|
|
*
|
60 |
|
|
* The elements of GF(2^128) := GF(2)[X]/(X^128-X^7-X^2-X^1-1) can
|
61 |
|
|
* be mapped to computer memory in a variety of ways. Let's examine
|
62 |
|
|
* three common cases.
|
63 |
|
|
*
|
64 |
|
|
* Take a look at the 16 binary octets below in memory order. The msb's
|
65 |
|
|
* are left and the lsb's are right. char b[16] is an array and b[0] is
|
66 |
|
|
* the first octet.
|
67 |
|
|
*
|
68 |
|
|
* 80000000 00000000 00000000 00000000 .... 00000000 00000000 00000000
|
69 |
|
|
* b[0] b[1] b[2] b[3] b[13] b[14] b[15]
|
70 |
|
|
*
|
71 |
|
|
* Every bit is a coefficient of some power of X. We can store the bits
|
72 |
|
|
* in every byte in little-endian order and the bytes themselves also in
|
73 |
|
|
* little endian order. I will call this lle (little-little-endian).
|
74 |
|
|
* The above buffer represents the polynomial 1, and X^7+X^2+X^1+1 looks
|
75 |
|
|
* like 11100001 00000000 .... 00000000 = { 0xE1, 0x00, }.
|
76 |
|
|
* This format was originally implemented in gf128mul and is used
|
77 |
|
|
* in GCM (Galois/Counter mode) and in ABL (Arbitrary Block Length).
|
78 |
|
|
*
|
79 |
|
|
* Another convention says: store the bits in bigendian order and the
|
80 |
|
|
* bytes also. This is bbe (big-big-endian). Now the buffer above
|
81 |
|
|
* represents X^127. X^7+X^2+X^1+1 looks like 00000000 .... 10000111,
|
82 |
|
|
* b[15] = 0x87 and the rest is 0. LRW uses this convention and bbe
|
83 |
|
|
* is partly implemented.
|
84 |
|
|
*
|
85 |
|
|
* Both of the above formats are easy to implement on big-endian
|
86 |
|
|
* machines.
|
87 |
|
|
*
|
88 |
|
|
* EME (which is patent encumbered) uses the ble format (bits are stored
|
89 |
|
|
* in big endian order and the bytes in little endian). The above buffer
|
90 |
|
|
* represents X^7 in this case and the primitive polynomial is b[0] = 0x87.
|
91 |
|
|
*
|
92 |
|
|
* The common machine word-size is smaller than 128 bits, so to make
|
93 |
|
|
* an efficient implementation we must split into machine word sizes.
|
94 |
|
|
* This file uses one 32bit for the moment. Machine endianness comes into
|
95 |
|
|
* play. The lle format in relation to machine endianness is discussed
|
96 |
|
|
* below by the original author of gf128mul Dr Brian Gladman.
|
97 |
|
|
*
|
98 |
|
|
* Let's look at the bbe and ble format on a little endian machine.
|
99 |
|
|
*
|
100 |
|
|
* bbe on a little endian machine u32 x[4]:
|
101 |
|
|
*
|
102 |
|
|
* MS x[0] LS MS x[1] LS
|
103 |
|
|
* ms ls ms ls ms ls ms ls ms ls ms ls ms ls ms ls
|
104 |
|
|
* 103..96 111.104 119.112 127.120 71...64 79...72 87...80 95...88
|
105 |
|
|
*
|
106 |
|
|
* MS x[2] LS MS x[3] LS
|
107 |
|
|
* ms ls ms ls ms ls ms ls ms ls ms ls ms ls ms ls
|
108 |
|
|
* 39...32 47...40 55...48 63...56 07...00 15...08 23...16 31...24
|
109 |
|
|
*
|
110 |
|
|
* ble on a little endian machine
|
111 |
|
|
*
|
112 |
|
|
* MS x[0] LS MS x[1] LS
|
113 |
|
|
* ms ls ms ls ms ls ms ls ms ls ms ls ms ls ms ls
|
114 |
|
|
* 31...24 23...16 15...08 07...00 63...56 55...48 47...40 39...32
|
115 |
|
|
*
|
116 |
|
|
* MS x[2] LS MS x[3] LS
|
117 |
|
|
* ms ls ms ls ms ls ms ls ms ls ms ls ms ls ms ls
|
118 |
|
|
* 95...88 87...80 79...72 71...64 127.120 199.112 111.104 103..96
|
119 |
|
|
*
|
120 |
|
|
* Multiplications in GF(2^128) are mostly bit-shifts, so you see why
|
121 |
|
|
* ble (and lbe also) are easier to implement on a little-endian
|
122 |
|
|
* machine than on a big-endian machine. The converse holds for bbe
|
123 |
|
|
* and lle.
|
124 |
|
|
*
|
125 |
|
|
* Note: to have good alignment, it seems to me that it is sufficient
|
126 |
|
|
* to keep elements of GF(2^128) in type u64[2]. On 32-bit wordsize
|
127 |
|
|
* machines this will automatically aligned to wordsize and on a 64-bit
|
128 |
|
|
* machine also.
|
129 |
|
|
*/
|
130 |
|
|
/* Multiply a GF128 field element by x. Field elements are held in arrays
|
131 |
|
|
of bytes in which field bits 8n..8n + 7 are held in byte[n], with lower
|
132 |
|
|
indexed bits placed in the more numerically significant bit positions
|
133 |
|
|
within bytes.
|
134 |
|
|
|
135 |
|
|
On little endian machines the bit indexes translate into the bit
|
136 |
|
|
positions within four 32-bit words in the following way
|
137 |
|
|
|
138 |
|
|
MS x[0] LS MS x[1] LS
|
139 |
|
|
ms ls ms ls ms ls ms ls ms ls ms ls ms ls ms ls
|
140 |
|
|
24...31 16...23 08...15 00...07 56...63 48...55 40...47 32...39
|
141 |
|
|
|
142 |
|
|
MS x[2] LS MS x[3] LS
|
143 |
|
|
ms ls ms ls ms ls ms ls ms ls ms ls ms ls ms ls
|
144 |
|
|
88...95 80...87 72...79 64...71 120.127 112.119 104.111 96..103
|
145 |
|
|
|
146 |
|
|
On big endian machines the bit indexes translate into the bit
|
147 |
|
|
positions within four 32-bit words in the following way
|
148 |
|
|
|
149 |
|
|
MS x[0] LS MS x[1] LS
|
150 |
|
|
ms ls ms ls ms ls ms ls ms ls ms ls ms ls ms ls
|
151 |
|
|
00...07 08...15 16...23 24...31 32...39 40...47 48...55 56...63
|
152 |
|
|
|
153 |
|
|
MS x[2] LS MS x[3] LS
|
154 |
|
|
ms ls ms ls ms ls ms ls ms ls ms ls ms ls ms ls
|
155 |
|
|
64...71 72...79 80...87 88...95 96..103 104.111 112.119 120.127
|
156 |
|
|
*/
|
157 |
|
|
|
158 |
|
|
/* A slow generic version of gf_mul, implemented for lle and bbe
|
159 |
|
|
* It multiplies a and b and puts the result in a */
|
160 |
|
|
void gf128mul_lle(be128 *a, const be128 *b);
|
161 |
|
|
|
162 |
|
|
void gf128mul_bbe(be128 *a, const be128 *b);
|
163 |
|
|
|
164 |
|
|
/* multiply by x in ble format, needed by XTS */
|
165 |
|
|
void gf128mul_x_ble(be128 *a, const be128 *b);
|
166 |
|
|
|
167 |
|
|
/* 4k table optimization */
|
168 |
|
|
|
169 |
|
|
struct gf128mul_4k {
|
170 |
|
|
be128 t[256];
|
171 |
|
|
};
|
172 |
|
|
|
173 |
|
|
struct gf128mul_4k *gf128mul_init_4k_lle(const be128 *g);
|
174 |
|
|
struct gf128mul_4k *gf128mul_init_4k_bbe(const be128 *g);
|
175 |
|
|
void gf128mul_4k_lle(be128 *a, struct gf128mul_4k *t);
|
176 |
|
|
void gf128mul_4k_bbe(be128 *a, struct gf128mul_4k *t);
|
177 |
|
|
|
178 |
|
|
static inline void gf128mul_free_4k(struct gf128mul_4k *t)
|
179 |
|
|
{
|
180 |
|
|
kfree(t);
|
181 |
|
|
}
|
182 |
|
|
|
183 |
|
|
|
184 |
|
|
/* 64k table optimization, implemented for lle and bbe */
|
185 |
|
|
|
186 |
|
|
struct gf128mul_64k {
|
187 |
|
|
struct gf128mul_4k *t[16];
|
188 |
|
|
};
|
189 |
|
|
|
190 |
|
|
/* first initialize with the constant factor with which you
|
191 |
|
|
* want to multiply and then call gf128_64k_lle with the other
|
192 |
|
|
* factor in the first argument, the table in the second and a
|
193 |
|
|
* scratch register in the third. Afterwards *a = *r. */
|
194 |
|
|
struct gf128mul_64k *gf128mul_init_64k_lle(const be128 *g);
|
195 |
|
|
struct gf128mul_64k *gf128mul_init_64k_bbe(const be128 *g);
|
196 |
|
|
void gf128mul_free_64k(struct gf128mul_64k *t);
|
197 |
|
|
void gf128mul_64k_lle(be128 *a, struct gf128mul_64k *t);
|
198 |
|
|
void gf128mul_64k_bbe(be128 *a, struct gf128mul_64k *t);
|
199 |
|
|
|
200 |
|
|
#endif /* _CRYPTO_GF128MUL_H */
|