OpenCores
URL https://opencores.org/ocsvn/aes_highthroughput_lowarea/aes_highthroughput_lowarea/trunk

Subversion Repositories aes_highthroughput_lowarea

Compare Revisions

  • This comparison shows the changes necessary to convert path
    /aes_highthroughput_lowarea/trunk
    from Rev 1 to Rev 2
    Reverse comparison

Rev 1 → Rev 2

/rtl/sbox.v
0,0 → 1,216
//////////////////////////////////////////////////////////////////////
//// ////
//// Sub Bytes Box file ////
//// ////
//// Description: ////
//// Implement sub byte box look up table ////
//// ////
//// To Do: ////
//// - done ////
//// ////
//// Author(s): ////
//// - Luo Dongjun, dongjun_luo@hotmail.com ////
//// ////
//////////////////////////////////////////////////////////////////////
module sbox(
clk,
reset_n,
enable,
din,
ende,
en_dout,
de_dout);
 
input clk;
input reset_n;
input enable;
input [7:0] din;
input ende; //0: encryption; 1: decryption
output [7:0] en_dout;
output [7:0] de_dout;
 
wire [7:0] first_matrix_out,first_matrix_in,last_matrix_out_enc,last_matrix_out_dec;
wire [3:0] p,q,p2,q2,sumpq,sump2q2,inv_sump2q2,p_new,q_new,mulpq,q2B;
reg [7:0] first_matrix_out_L;
reg [3:0] p_new_L,q_new_L;
 
// GF(256) to GF(16) transformation
assign first_matrix_in[7:0] = ende ? INV_AFFINE(din[7:0]): din[7:0];
assign first_matrix_out[7:0] = GF256_TO_GF16(first_matrix_in[7:0]);
 
// pipeline 1
always @ (posedge clk or negedge reset_n)
begin
if (!reset_n)
first_matrix_out_L[7:0] <= 8'b0;
else if (enable)
first_matrix_out_L[7:0] <= first_matrix_out[7:0];
end
 
/*****************************************************************************/
// GF16 inverse logic
/*****************************************************************************/
// p+q _____
// \
// p --> p2 ___ \
// \ \ x --> p_new
// x -> p*q -- + --> inverse -/
// / / \
// q --> q2*B-/ x --> q_new
// \___________________________/
//
assign p[3:0] = first_matrix_out_L[3:0];
assign q[3:0] = first_matrix_out_L[7:4];
assign p2[3:0] = SQUARE(p[3:0]);
assign q2[3:0] = SQUARE(q[3:0]);
//p+q
assign sumpq[3:0] = p[3:0] ^ q[3:0];
//p*q
assign mulpq[3:0] = MUL(p[3:0],q[3:0]);
//q2B calculation
assign q2B[0]=q2[1]^q2[2]^q2[3];
assign q2B[1]=q2[0]^q2[1];
assign q2B[2]=q2[0]^q2[1]^q2[2];
assign q2B[3]=q2[0]^q2[1]^q2[2]^q2[3];
//p2+p*q+q2B
assign sump2q2[3:0] = q2B[3:0] ^ mulpq[3:0] ^ p2[3:0];
// inverse p2+pq+q2B
assign inv_sump2q2[3:0] = INVERSE(sump2q2[3:0]);
// results
assign p_new[3:0] = MUL(sumpq[3:0],inv_sump2q2[3:0]);
assign q_new[3:0] = MUL(q[3:0],inv_sump2q2[3:0]);
 
// pipeline 2
always @ (posedge clk or negedge reset_n)
begin
if (!reset_n)
{p_new_L[3:0],q_new_L[3:0]} <= 8'b0;
else if (enable)
{p_new_L[3:0],q_new_L[3:0]} <= {p_new[3:0],q_new[3:0]};
end
 
// GF(16) to GF(256) transformation
assign last_matrix_out_dec[7:0] = GF16_TO_GF256(p_new_L[3:0],q_new_L[3:0]);
assign last_matrix_out_enc[7:0] = AFFINE(last_matrix_out_dec[7:0]);
assign en_dout[7:0] = last_matrix_out_enc[7:0];
assign de_dout[7:0] = last_matrix_out_dec[7:0];
 
/*****************************************************************************/
// Functions
/*****************************************************************************/
 
// convert GF(256) to GF(16)
function [7:0] GF256_TO_GF16;
input [7:0] data;
reg a,b,c;
begin
a = data[1]^data[7];
b = data[5]^data[7];
c = data[4]^data[6];
GF256_TO_GF16[0] = c^data[0]^data[5];
GF256_TO_GF16[1] = data[1]^data[2];
GF256_TO_GF16[2] = a;
GF256_TO_GF16[3] = data[2]^data[4];
GF256_TO_GF16[4] = c^data[5];
GF256_TO_GF16[5] = a^c;
GF256_TO_GF16[6] = b^data[2]^data[3];
GF256_TO_GF16[7] = b;
end
endfunction
 
// squre
function [3:0] SQUARE;
input [3:0] data;
begin
SQUARE[0] = data[0]^data[2];
SQUARE[1] = data[2];
SQUARE[2] = data[1]^data[3];
SQUARE[3] = data[3];
end
endfunction
 
// inverse
function [3:0] INVERSE;
input [3:0] data;
reg a;
begin
a=data[1]^data[2]^data[3]^(data[1]&data[2]&data[3]);
INVERSE[0]=a^data[0]^(data[0]&data[2])^(data[1]&data[2])^(data[0]&data[1]&data[2]);
INVERSE[1]=(data[0]&data[1])^(data[0]&data[2])^(data[1]&data[2])^data[3]^
(data[1]&data[3])^(data[0]&data[1]&data[3]);
INVERSE[2]=(data[0]&data[1])^data[2]^(data[0]&data[2])^data[3]^
(data[0]&data[3])^(data[0]&data[2]&data[3]);
INVERSE[3]=a^(data[0]&data[3])^(data[1]&data[3])^(data[2]&data[3]);
end
endfunction
 
// multiply
function [3:0] MUL;
input [3:0] d1,d2;
reg a,b;
begin
a=d1[0]^d1[3];
b=d1[2]^d1[3];
 
MUL[0]=(d1[0]&d2[0])^(d1[3]&d2[1])^(d1[2]&d2[2])^(d1[1]&d2[3]);
MUL[1]=(d1[1]&d2[0])^(a&d2[1])^(b&d2[2])^((d1[1]^d1[2])&d2[3]);
MUL[2]=(d1[2]&d2[0])^(d1[1]&d2[1])^(a&d2[2])^(b&d2[3]);
MUL[3]=(d1[3]&d2[0])^(d1[2]&d2[1])^(d1[1]&d2[2])^(a&d2[3]);
end
endfunction
 
// GF16 to GF256 transform
function [7:0] GF16_TO_GF256;
input [3:0] p,q;
reg a,b;
begin
a=p[1]^q[3];
b=q[0]^q[1];
 
GF16_TO_GF256[0]=p[0]^q[0];
GF16_TO_GF256[1]=b^q[3];
GF16_TO_GF256[2]=a^b;
GF16_TO_GF256[3]=b^p[1]^q[2];
GF16_TO_GF256[4]=a^b^p[3];
GF16_TO_GF256[5]=b^p[2];
GF16_TO_GF256[6]=a^p[2]^p[3]^q[0];
GF16_TO_GF256[7]=b^p[2]^q[3];
end
endfunction
 
// affine transformation
function [7:0] AFFINE;
input [7:0] data;
begin
//affine trasformation
AFFINE[0]=(!data[0])^data[4]^data[5]^data[6]^data[7];
AFFINE[1]=(!data[0])^data[1]^data[5]^data[6]^data[7];
AFFINE[2]=data[0]^data[1]^data[2]^data[6]^data[7];
AFFINE[3]=data[0]^data[1]^data[2]^data[3]^data[7];
AFFINE[4]=data[0]^data[1]^data[2]^data[3]^data[4];
AFFINE[5]=(!data[1])^data[2]^data[3]^data[4]^data[5];
AFFINE[6]=(!data[2])^data[3]^data[4]^data[5]^data[6];
AFFINE[7]=data[3]^data[4]^data[5]^data[6]^data[7];
end
endfunction
 
// inverse affine transformation
function [7:0] INV_AFFINE;
input [7:0] data;
reg a,b,c,d;
begin
a=data[0]^data[5];
b=data[1]^data[4];
c=data[2]^data[7];
d=data[3]^data[6];
INV_AFFINE[0]=(!data[5])^c;
INV_AFFINE[1]=data[0]^d;
INV_AFFINE[2]=(!data[7])^b;
INV_AFFINE[3]=data[2]^a;
INV_AFFINE[4]=data[1]^d;
INV_AFFINE[5]=data[4]^c;
INV_AFFINE[6]=data[3]^a;
INV_AFFINE[7]=data[6]^b;
end
endfunction
endmodule
/rtl/mix_columns.v
0,0 → 1,152
//////////////////////////////////////////////////////////////////////
//// ////
//// Mix Columns File ////
//// ////
//// Description: ////
//// Includes functions for mix columns and inverse mix columns ////
//// ////
//// To Do: ////
//// - done ////
//// ////
//// Author(s): ////
//// - Luo Dongjun, dongjun_luo@hotmail.com ////
//// ////
//////////////////////////////////////////////////////////////////////
// Mix columns size: 4 words
function [127:0] mix_columns;
input [127:0] si;
reg [127:0] so;
begin
so[127:96] = word_mix_columns(si[127:96]);
so[95:64] = word_mix_columns(si[95:64]);
so[63:32] = word_mix_columns(si[63:32]);
so[31:0] = word_mix_columns(si[31:0]);
mix_columns[127:0] = so[127:0];
end
endfunction
 
// Inverse Mix columns size: 4 words
function [127:0] inv_mix_columns;
input [127:0] si;
reg [127:0] so;
begin
so[127:96] = inv_word_mix_columns(si[127:96]);
so[95:64] = inv_word_mix_columns(si[95:64]);
so[63:32] = inv_word_mix_columns(si[63:32]);
so[31:0] = inv_word_mix_columns(si[31:0]);
inv_mix_columns[127:0] = so[127:0];
end
endfunction
 
// Mix Columns for encryption word
function [31:0] word_mix_columns;
input [31:0] si;
reg [7:0] si0,si1,si2,si3;
reg [7:0] so0,so1,so2,so3;
begin
si0[7:0] = si[31:24];
si1[7:0] = si[23:16];
si2[7:0] = si[15:8];
si3[7:0] = si[7:0];
so0[7:0] = byte_mix_columns(si0[7:0],si1[7:0],si2[7:0],si3[7:0]);
so1[7:0] = byte_mix_columns(si1[7:0],si2[7:0],si3[7:0],si0[7:0]);
so2[7:0] = byte_mix_columns(si2[7:0],si3[7:0],si0[7:0],si1[7:0]);
so3[7:0] = byte_mix_columns(si3[7:0],si0[7:0],si1[7:0],si2[7:0]);
word_mix_columns[31:0] = {so0[7:0],so1[7:0],so2[7:0],so3[7:0]};
end
endfunction
 
// inverse Mix Columns for decryption word
function [31:0] inv_word_mix_columns;
input [31:0] si;
reg [7:0] si0,si1,si2,si3;
reg [7:0] so0,so1,so2,so3;
begin
si0[7:0] = si[31:24];
si1[7:0] = si[23:16];
si2[7:0] = si[15:8];
si3[7:0] = si[7:0];
so0[7:0] = inv_byte_mix_columns(si0[7:0],si1[7:0],si2[7:0],si3[7:0]);
so1[7:0] = inv_byte_mix_columns(si1[7:0],si2[7:0],si3[7:0],si0[7:0]);
so2[7:0] = inv_byte_mix_columns(si2[7:0],si3[7:0],si0[7:0],si1[7:0]);
so3[7:0] = inv_byte_mix_columns(si3[7:0],si0[7:0],si1[7:0],si2[7:0]);
inv_word_mix_columns[31:0] = {so0[7:0],so1[7:0],so2[7:0],so3[7:0]};
end
endfunction
 
function [7:0] byte_mix_columns;
input [7:0] a,b,c,d;
begin
byte_mix_columns[7:0] = MUL2(a[7:0]) ^ MUL3(b[7:0]) ^ c[7:0] ^ d[7:0];
end
endfunction
 
function [7:0] inv_byte_mix_columns;
input [7:0] a,b,c,d;
begin
inv_byte_mix_columns[7:0] = MULE(a[7:0]) ^ MULB(b[7:0]) ^ MULD(c[7:0]) ^ MUL9(d[7:0]);
end
endfunction
 
//xtimes
function [7:0] xtimes;
input [7:0] d;
reg [3:0] xt;
begin
xtimes[7:5] = d[6:4];
xt[3] = d[7];
xt[2] = d[7];
xt[1] = 1'b0;
xt[0] = d[7];
xtimes[4:1] =xt[3:0]^d[3:0];
xtimes[0] = d[7];
end
endfunction
 
// multiply 2
function [7:0] MUL2;
input [7:0] d;
begin
MUL2[7:0] = xtimes(d[7:0]);
end
endfunction
 
// multiply 2
function [7:0] MUL3;
input [7:0] d;
begin
MUL3[7:0] = xtimes(d[7:0]) ^ d[7:0];
end
endfunction
 
// multiply e
function [7:0] MULE;
input [7:0] d;
begin
MULE[7:0] = xtimes(xtimes(xtimes(d[7:0])))^xtimes(xtimes(d[7:0]))^xtimes(d[7:0]);
end
endfunction
 
// multiply b
function [7:0] MULB;
input [7:0] d;
begin
MULB[7:0] = xtimes(xtimes(xtimes(d[7:0])))^xtimes(d[7:0])^d[7:0];
end
endfunction
 
// multiply D
function [7:0] MULD;
input [7:0] d;
begin
MULD[7:0] = xtimes(xtimes(xtimes(d[7:0])))^xtimes(xtimes(d[7:0]))^d[7:0];
end
endfunction
 
// multiply 9
function [7:0] MUL9;
input [7:0] d;
begin
MUL9[7:0] = xtimes(xtimes(xtimes(d[7:0])))^d[7:0];
end
endfunction
/rtl/key_exp.v
0,0 → 1,340
//////////////////////////////////////////////////////////////////////
//// ////
//// Key expansion module ////
//// ////
//// Description: ////
//// Used to expand the key based on key expansion procudure ////
//// ////
//// To Do: ////
//// - done ////
//// ////
//// Author(s): ////
//// - Luo Dongjun, dongjun_luo@hotmail.com ////
//// ////
//////////////////////////////////////////////////////////////////////
module key_exp (
clk,
reset_n,
key_in,
key_mode,
key_start,
wr,
wr_addr,
wr_data,
key_ready
);
 
input clk;
input reset_n;
input [255:0] key_in; // initial key value
input [1:0] key_mode; // 0:128, 1:192, 2:256
input key_start;// start key expansion
output wr; // key expansion ram interface
output [4:0] wr_addr;
output [63:0] wr_data;
output key_ready;
 
reg [31:0] rcon;
reg rcon_is_1b;
reg [1:0] state,nstate,pstate;
reg [3:0] round;
reg sbox_in_valid;
reg [31:0] sbox_in;
reg [4:0] valid;
wire sbox_out_valid;
wire [31:0] sbox_out;
wire [31:0] w0_next,w1_next,w2_next,w3_next,w4_next1,w5_next1,w6_next,w7_next;
wire [31:0] w4_next2,w5_next2;
reg [31:0] w0,w1,w2,w3,w4,w5,w6,w7;
wire wr1,wr2,wr3,init_wr1,init_wr2,init_wr3,init_wr4;
reg wr;
wire [63:0] wr_data1,wr_data2,wr_data3;
reg key_start_L,key_start_L2,key_start_L3;
reg wr_256;
reg [4:0] wr_addr;
reg [63:0] wr_data;
reg key_ready;
wire [3:0] max_round_p1;
 
parameter IDLE = 2'b00,
START = 2'b01,
GENKEY1 = 2'b10,
GENKEY_256 = 2'b11;
 
assign max_round_p1[3:0] = (key_mode == 2'b00) ? 4'd11 : (key_mode == 2'b01 ? 4'd13 : 4'd15);
 
// rcon generation
always @ (posedge clk or negedge reset_n)
begin
if (!reset_n)
begin
rcon[31:0] <= 32'h01000000;
rcon_is_1b <= 1'b0;
end
else if (key_start)
begin
rcon[31:0] <= 32'h01000000;
rcon_is_1b <= 1'b0;
end
else if (sbox_out_valid && (state[1:0] == GENKEY1))
begin
if (rcon[31])
begin
rcon[31:0] <= 32'h1b000000;
rcon_is_1b <= 1'b1;
end
else if (rcon_is_1b)
begin
rcon[31:0] <= 32'h36000000;
rcon_is_1b <= 1'b1;
end
else
rcon[31:0] <= {rcon[30:0],1'b0};
end
end
 
/*****************************************************************************/
// State machine for Key expansion
//
//
always @ (posedge clk or negedge reset_n)
begin
if (!reset_n)
begin
state[1:0] <= IDLE;
pstate[1:0] <= IDLE;
end
else
begin
state[1:0] <= nstate[1:0];
pstate[1:0] <= state[1:0];
end
end
 
always @ (*)
begin
nstate[1:0] = state[1:0];
case (state[1:0])
IDLE:
if (key_start) nstate[1:0] = START;
START:
begin
nstate[1:0] = GENKEY1;
end
GENKEY1:
begin
if (sbox_out_valid)
begin
if (key_mode == 2'b00) //128 bit mode 4 x 10 + 4
if (round[3:0] == 4'd10) nstate[1:0] = IDLE;
else nstate[1:0] = START;
else if (key_mode == 2'b01) // 192 bit mode 6 + 6 x 8 = 54 > 52
if (round[3:0] == 4'd8) nstate[1:0] = IDLE;
else nstate[1:0] = START;
else if (round[3:0] == 4'd7)// 256 bit mode 8 + 8 x 7 = 64 > 60
nstate[1:0] = IDLE;
else
nstate[1:0] = GENKEY_256;
end
end
GENKEY_256:
begin
if (sbox_out_valid)
nstate[1:0] = START;
end
endcase
end
 
// round counter: 10/12/14
always @ (posedge clk or negedge reset_n)
begin
if (!reset_n)
round[3:0] <= 1'b0;
else if (nstate[1:0] == IDLE)
round[3:0] <= 4'b0;
else if (state[1:0] == START)
round[3:0] <= round[3:0] + 1'b1;
end
 
always @ (posedge clk or negedge reset_n)
begin
if (!reset_n)
begin
sbox_in_valid <= 1'b0;
sbox_in[31:0] <= 32'b0;
end
else if (state[1:0] == START) // rotword
begin
sbox_in_valid <= 1'b1;
if (key_mode == 2'b00) //128
sbox_in[31:0] <= {w3[23:0],w3[31:24]};
else if (key_mode == 2'b01) //192
sbox_in[31:0] <= {w5[23:0],w5[31:24]};
else //256
sbox_in[31:0] <= {w7[23:0],w7[31:24]};
end
else if ((state[1:0] == GENKEY_256) && (pstate[1:0] ==GENKEY1))
begin
sbox_in_valid <= 1'b1;
sbox_in[31:0] <= w3[31:0];
end
else
sbox_in_valid <= 1'b0;
end
 
always @ (posedge clk or negedge reset_n)
begin
if (!reset_n)
valid[4:0] <= 5'b0;
else
valid[4:0] <= {valid[3:0],sbox_in_valid};
end
assign sbox_out_valid = valid[1];
 
sbox u_0(.clk(clk),.reset_n(reset_n),.enable(1'b1),.din(sbox_in[7:0]),.ende(1'b0),.en_dout(sbox_out[7:0]),.de_dout());
sbox u_1(.clk(clk),.reset_n(reset_n),.enable(1'b1),.din(sbox_in[15:8]),.ende(1'b0),.en_dout(sbox_out[15:8]),.de_dout());
sbox u_2(.clk(clk),.reset_n(reset_n),.enable(1'b1),.din(sbox_in[23:16]),.ende(1'b0),.en_dout(sbox_out[23:16]),.de_dout());
sbox u_3(.clk(clk),.reset_n(reset_n),.enable(1'b1),.din(sbox_in[31:24]),.ende(1'b0),.en_dout(sbox_out[31:24]),.de_dout());
 
/*****************************************************************************/
// key expansion calculation
//
//
assign w0_next[31:0] = sbox_out[31:0]^rcon[31:0]^w0[31:0];
assign w1_next[31:0] = w0_next[31:0]^w1[31:0];
assign w2_next[31:0] = w1_next[31:0]^w2[31:0];
assign w3_next[31:0] = w2_next[31:0]^w3[31:0];
assign w4_next1[31:0] = w3_next[31:0] ^ w4[31:0];
assign w5_next1[31:0] = w4_next1[31:0]^w5[31:0];
assign w4_next2[31:0] = sbox_out[31:0] ^ w4[31:0];
assign w5_next2[31:0] = w4_next2[31:0]^w5[31:0];
assign w6_next[31:0] = w5_next2[31:0]^w6[31:0];
assign w7_next[31:0] = w6_next[31:0]^w7[31:0];
 
always @ (posedge clk or negedge reset_n)
begin
if (!reset_n)
begin
{w0[31:0],w1[31:0],w2[31:0],w3[31:0],w4[31:0],w5[31:0],w6[31:0],w7[31:0]} <= 256'b0;
end
else if (key_start)
begin
{w0[31:0],w1[31:0],w2[31:0],w3[31:0],w4[31:0],w5[31:0],w6[31:0],w7[31:0]} <= key_in[255:0];
end
else if ((key_mode[1:0] == 2'b10) && sbox_out_valid)
begin
if (state[1:0] == GENKEY1)
begin
w0[31:0] <= w0_next[31:0];
w1[31:0] <= w1_next[31:0];
w2[31:0] <= w2_next[31:0];
w3[31:0] <= w3_next[31:0];
end
else
begin
w4[31:0] <= w4_next2[31:0];
w5[31:0] <= w5_next2[31:0];
w6[31:0] <= w6_next[31:0];
w7[31:0] <= w7_next[31:0];
end
end
else if (sbox_out_valid)
begin
w0[31:0] <= w0_next[31:0];
w1[31:0] <= w1_next[31:0];
w2[31:0] <= w2_next[31:0];
w3[31:0] <= w3_next[31:0];
if (key_mode[1:0] == 2'b01)
begin
w4[31:0] <= w4_next1[31:0];
w5[31:0] <= w5_next1[31:0];
end
end
end
 
// write to external ram
assign init_wr1 = key_start;
assign init_wr2 = key_start_L;
assign init_wr3 = key_start_L2 && (key_mode[1:0] != 2'b00);
assign init_wr4 = key_start_L3 && (key_mode[1:0] == 2'b10);
assign wr1 = valid[2];
assign wr2 = valid[3];
assign wr3 = valid[4] && (key_mode[1:0] == 2'b01) && (state[1:0] != IDLE); // remove the last write
 
assign wr_data1[63:0] = wr_256 ?{w4[31:0],w5[31:0]} : {w0[31:0],w1[31:0]};
assign wr_data2[63:0] = wr_256 ?{w6[31:0],w7[31:0]} : {w2[31:0],w3[31:0]};
assign wr_data3[63:0] = {w4[31:0],w5[31:0]};
 
always @ (posedge clk or negedge reset_n)
begin
if (!reset_n)
wr_256 <= 1'b0;
else if (key_start)
wr_256 <= 1'b0;
else if (sbox_out_valid && (state[1:0] == GENKEY_256))
wr_256 <= 1'b1;
else if (sbox_out_valid)
wr_256 <= 1'b0;
end
 
always @ (posedge clk or negedge reset_n)
begin
if (!reset_n)
{key_start_L3,key_start_L2,key_start_L} <= 3'b0;
else
{key_start_L3,key_start_L2,key_start_L} <= {key_start_L2,key_start_L,key_start};
end
 
always @ (posedge clk or negedge reset_n)
begin
if (!reset_n)
wr <= 1'b0;
else
wr <= wr1 || wr2 || wr3 || init_wr1 || init_wr2 || init_wr3 || init_wr4;
end
 
always @ (posedge clk or negedge reset_n)
begin
if (!reset_n)
begin
wr_data[63:0] <= 64'b0;
end
else
begin
if (init_wr1)
wr_data[63:0] <= key_in[255:192];
else if (init_wr2)
wr_data[63:0] <= key_in[191:128];
else if (init_wr3)
wr_data[63:0] <= key_in[127:64];
else if (init_wr4)
wr_data[63:0] <= key_in[63:0];
else if (wr1)
wr_data[63:0] <= wr_data1[63:0];
else if (wr2)
wr_data[63:0] <= wr_data2[63:0];
else if (wr3)
wr_data[63:0] <= wr_data3[63:0];
end
end
 
always @ (posedge clk or negedge reset_n)
begin
if (!reset_n)
wr_addr[4:0] <= 5'b0;
else if (key_start)
wr_addr[4:0] <= 5'd0;
else if (wr)
wr_addr[4:0] <= wr_addr[4:0] + 1'b1;
end
 
always @ (posedge clk or negedge reset_n)
begin
if (!reset_n)
key_ready <= 1'b0;
else if (key_start)
key_ready <= 1'b0;
else if (wr_addr[4:1] == max_round_p1[3:0])
key_ready <= 1'b1;
end
endmodule
/rtl/ram_16x64.v
0,0 → 1,38
//////////////////////////////////////////////////////////////////////
//// ////
//// Ram module ////
//// ////
//// Description: ////
//// this is 16x64, we can use a 16x128 to replace two of this ////
//// module, also, can use specific foundry libs instead ////
//// ////
//// To Do: ////
//// - done ////
//// ////
//// Author(s): ////
//// - Luo Dongjun, dongjun_luo@hotmail.com ////
//// ////
//////////////////////////////////////////////////////////////////////
module ram_16x64 (clk,wr,wr_addr,wr_data,rd,rd_addr,rd_data);
 
input clk,wr,rd;
input [3:0] wr_addr,rd_addr;
input [63:0] wr_data;
output [63:0] rd_data;
 
reg [63:0] mem[15:0];
reg [63:0] rd_data;
 
// behavioral code for 16x64 mem
always @ (posedge clk)
begin
if (wr)
mem[wr_addr] <= wr_data;
end
 
always @ (posedge clk)
begin
if (rd)
rd_data <= mem[rd_addr];
end
endmodule
/rtl/shift_rows.v
0,0 → 1,39
//////////////////////////////////////////////////////////////////////
//// ////
//// shift rows file ////
//// ////
//// Description: ////
//// Include functions for shift rows and inverse shift rows ////
//// ////
//// To Do: ////
//// - done ////
//// ////
//// Author(s): ////
//// - Luo Dongjun, dongjun_luo@hotmail.com ////
//// ////
//////////////////////////////////////////////////////////////////////
// shift rows for encryption
function [127:0] shift_rows;
input [127:0] si;
reg [127:0]so;
begin
so[127:96] = {si[127:120],si[87:80],si[47:40],si[7:0]};
so[95:64] = {si[95:88],si[55:48],si[15:8],si[103:96]};
so[63:32] = {si[63:56],si[23:16],si[111:104],si[71:64]};
so[31:0] = {si[31:24],si[119:112],si[79:72],si[39:32]};
shift_rows[127:0] = so[127:0];
end
endfunction
 
// inverse shift rows for decryption
function [127:0] inv_shift_rows;
input [127:0] si;
reg [127:0] so;
begin
so[127:96] = {si[127:120],si[23:16],si[47:40],si[71:64]};
so[95:64] = {si[95:88],si[119:112],si[15:8],si[39:32]};
so[63:32] = {si[63:56],si[87:80],si[111:104],si[7:0]};
so[31:0] = {si[31:24],si[55:48],si[79:72],si[103:96]};
inv_shift_rows[127:0] = so[127:0];
end
endfunction
/rtl/aes.v
0,0 → 1,234
//////////////////////////////////////////////////////////////////////
//// ////
//// AES top file ////
//// ////
//// Description: ////
//// AES top ////
//// ////
//// To Do: ////
//// - done ////
//// ////
//// Author(s): ////
//// - Luo Dongjun, dongjun_luo@hotmail.com ////
//// ////
//////////////////////////////////////////////////////////////////////
module aes (
clk,
reset_n,
i_start,
i_enable,
i_ende,
i_key,
i_key_mode,
i_data,
i_data_valid,
o_ready,
o_data,
o_data_valid,
o_key_ready
);
 
input clk;
input reset_n;
input i_start;
input i_enable;
input [1:0] i_key_mode; // 0: 128; 1: 192; 2: 256
input [255:0] i_key; // if key size is 128/192, upper bits are the inputs
input [127:0] i_data;
input i_data_valid;
input i_ende; // 0: encryption; 1: decryption
output o_ready; // user shall not input data if IP is not ready
output [127:0] o_data; // output data
output o_data_valid;
output o_key_ready; // key expansion procedure completes
 
`include "shift_rows.v"
`include "mix_columns.v"
 
genvar i;
wire final_round;
reg [3:0] max_round;
wire [127:0] en_sb_data,de_sb_data,sr_data,mc_data,imc_data,ark_data;
reg [127:0] sb_data,o_data,i_data_L;
reg i_data_valid_L;
reg round_valid;
reg [2:0] sb_valid;
reg o_data_valid;
reg [3:0] round_cnt,sb_round_cnt1,sb_round_cnt2,sb_round_cnt3;
wire [3:0] rd_addr;
wire [127:0] round_key;
wire [63:0] rd_data0,rd_data1;
wire wr;
wire [4:0] wr_addr;
wire [63:0] wr_data;
wire [127:0] imc_round_key,en_ark_data,de_ark_data,ark_data_final,ark_data_init;
 
assign final_round = sb_round_cnt3[3:0] == max_round[3:0];
//assign o_ready = ~sb_valid[1]; // if ready is asserted, user can input data for the same cycle
assign o_ready = ~sb_valid[0]; // if ready is asserted, user can input data for the next cycle
 
// round count is Nr - 1
always @ (*)
begin
case (i_key_mode)
2'b00: max_round[3:0] = 4'd10;
2'b01: max_round[3:0] = 4'd12;
default: max_round[3:0] = 4'd14;
endcase
end
 
/*****************************************************************************/
// Sub Bytes
//
//
generate
for (i=0;i<16;i=i+1)
begin : sbox_block
sbox u_sbox (
.clk(clk),
.reset_n(reset_n),
.enable(i_enable),
.ende(i_ende),
.din(o_data[i*8+7:i*8]),
.en_dout(en_sb_data[i*8+7:i*8]),
.de_dout(de_sb_data[i*8+7:i*8])
);
end
endgenerate
 
always @ (posedge clk or negedge reset_n)
begin
if (!reset_n)
sb_data[127:0] <= 128'b0;
else if (i_enable)
sb_data[127:0] <= i_ende ? de_sb_data[127:0] : en_sb_data[127:0];
end
 
/*****************************************************************************/
// Shift Rows
//
//
assign sr_data[127:0] = i_ende ? inv_shift_rows(sb_data[127:0]) : shift_rows(sb_data[127:0]);
 
/*****************************************************************************/
// Mix Columns
//
//
assign mc_data[127:0] = mix_columns(sr_data[127:0]);
 
always @ (posedge clk or negedge reset_n)
begin
if (!reset_n)
begin
i_data_valid_L <= 1'b0;
i_data_L[127:0] <= 128'b0;
end
else
begin
i_data_valid_L <= i_data_valid;
i_data_L[127:0] <=i_data[127:0];
end
end
 
/*****************************************************************************/
// Inverse Mix Columns
//
//
assign imc_data[127:0] = inv_mix_columns(sr_data[127:0]);
/*****************************************************************************/
// add round key for decryption
//
assign imc_round_key[127:0] = inv_mix_columns(round_key[127:0]);
assign ark_data_final[127:0] = sr_data[127:0] ^ round_key[127:0];
assign ark_data_init[127:0] = i_data_L[127:0] ^ round_key[127:0];
assign en_ark_data[127:0] = mc_data[127:0] ^ round_key[127:0];
assign de_ark_data[127:0] = imc_data[127:0] ^ imc_round_key[127:0];
assign ark_data[127:0] = i_data_valid_L ? ark_data_init[127:0] :
(final_round ? ark_data_final[127:0] :
(i_ende ? de_ark_data[127:0] : en_ark_data[127:0]));
 
/*****************************************************************************/
// Data outputs after each round
//
always @ (posedge clk or negedge reset_n)
begin
if (!reset_n)
o_data[127:0] <= 128'b0;
else if (i_enable && (i_data_valid_L || sb_valid[2]))
o_data[127:0] <= ark_data[127:0];
end
 
/*****************************************************************************/
// in sbox, we have 3 stages (sb_valid),
// before the end of each round, we have another stage (round_valid)
//
always @ (posedge clk or negedge reset_n)
begin
if (!reset_n)
begin
round_valid <= 1'b0;
sb_valid[2:0] <= 3'b0;
o_data_valid <= 1'b0;
end
else if (i_enable)
begin
o_data_valid <= sb_valid[2] && final_round;
round_valid <= (sb_valid[2] && !final_round) || i_data_valid_L;
sb_valid[2:0] <= {sb_valid[1:0],round_valid};
end
end
 
always @ (posedge clk or negedge reset_n)
begin
if (!reset_n) round_cnt[3:0] <= 4'd0;
else if (i_data_valid_L) round_cnt[3:0] <= 4'd1;
else if (i_enable && sb_valid[2]) round_cnt[3:0] <= sb_round_cnt3[3:0] + 1'b1;
end
 
always @ (posedge clk or negedge reset_n)
begin
if (!reset_n)
begin
sb_round_cnt1[3:0] <= 4'd0;
sb_round_cnt2[3:0] <= 4'd0;
sb_round_cnt3[3:0] <= 4'd0;
end
else if (i_enable)
begin
if (round_valid) sb_round_cnt1[3:0] <= round_cnt[3:0];
if (sb_valid[0]) sb_round_cnt2[3:0] <= sb_round_cnt1[3:0];
if (sb_valid[1]) sb_round_cnt3[3:0] <= sb_round_cnt2[3:0];
end
end
 
/*****************************************************************************/
// round key generation: the expansion keys are stored in 4 16*32 rams or
// 2 16*64 rams or 1 16*128 rams
//
//assign rd_addr[3:0] = i_ende ? (max_round[3:0] - sb_round_cnt2[3:0]) : sb_round_cnt2[3:0];
assign rd_addr[3:0] = i_ende ? (i_data_valid ? max_round[3:0] : (max_round[3:0] - sb_round_cnt2[3:0])) :
(i_data_valid ? 4'b0 : sb_round_cnt2[3:0]);
assign round_key[127:0] = {rd_data0[63:0],rd_data1[63:0]};
 
ram_16x64 u_ram_0 (.clk(clk),.wr(wr&!wr_addr[0]),.wr_addr(wr_addr[4:1]),.wr_data(wr_data[63:0]),
.rd_addr(rd_addr[3:0]),.rd_data(rd_data0[63:0]),.rd(sb_valid[1]|i_data_valid));
ram_16x64 u_ram_1 (.clk(clk),.wr(wr&wr_addr[0]),.wr_addr(wr_addr[4:1]),.wr_data(wr_data[63:0]),
.rd_addr(rd_addr[3:0]),.rd_data(rd_data1[63:0]),.rd(sb_valid[1]|i_data_valid));
 
/*****************************************************************************/
// Key Expansion module
//
//
key_exp u_key_exp (
.clk(clk),
.reset_n(reset_n),
.key_in(i_key[255:0]),
.key_mode(i_key_mode[1:0]),
.key_start(i_start),
.wr(wr),
.wr_addr(wr_addr[4:0]),
.wr_data(wr_data[63:0]),
.key_ready(o_key_ready)
);
 
endmodule
/rtl.fl
0,0 → 1,5
+incdir+rtl
./rtl/sbox.v
./rtl/ram_16x64.v
./rtl/key_exp.v
./rtl/aes.v
/tb.v
0,0 → 1,119
`timescale 1ns / 10ps
module tb ();
 
reg clk;
reg reset_n;
reg [7:0] din;
wire [7:0] dout;
 
reg key_start;
reg [255:0] key_in;
reg data_in_valid;
reg [127:0] data_in;
wire key_ready;
wire ready_out;
reg enable;
initial
begin
clk = 1'b1;
key_in = 1'b0;
key_start = 1'b0;
data_in_valid = 1'b0;
reset_n = 1'b0;
enable = 1;
#100;
reset_n = 1'b1;
#100;
din = 8'hae;
@ (posedge clk);
key_start <= 1'b1;
//key_in[255:128] = 128'h2b7e151628aed2a6abf7158809cf4f3c;
//key_in[255:64] = 192'h8e73b0f7da0e6452c810f32b809079e562f8ead2522c6b7b;
//key_in[255:0] = 256'h603deb1015ca71be2b73aef0857d77811f352c073b6108d72d9810a30914dff4;
key_in[255:64] = 192'h000102030405060708090a0b0c0d0e0f1011121314151617;
//key_in[255:0] = 256'h000102030405060708090a0b0c0d0e0f101112131415161718191a1b1c1d1e1f;
@ (posedge clk);
key_start <= 1'b0;
wait (key_ready);
data_in_valid <= 1'b1;
//data_in[127:0] = 128'h3243f6a8885a308d313198a2e0370731;
//data_in[127:0] = 128'h00112233445566778899aabbccddeeff;
//data_in[127:0] = 128'h8ea2b7ca516745bfeafc49904b496089;
data_in[127:0] = 128'hdda97ca4864cdfe06eaf70a0ec0d7191;
//@ (posedge clk);
//data_in[127:0] = 128'h3243f6a8885a308d313198a2e0370734;
//@ (posedge clk);
//data_in_valid <= 1'b0;
//data_in[127:0] = 128'h3243f6a8885a308d313198a2e0370731;
//@ (posedge clk);
//data_in_valid <= 1'b1;
//data_in[127:0] = 128'h3243f6a8885a308d313198a2e0370734;
//data_in[127:0] = 128'h00112233445566778899aabbccddeeff;
@ (posedge clk);
data_in_valid <= 1'b0;
repeat (3) @ (posedge clk);
wait (ready_out);
@ (posedge clk);
//data_in_valid <= 1'b1;
@ (posedge clk);
data_in_valid <= 1'b0;
repeat (6) @(posedge clk);
//enable <= 0;
//repeat (15) @(posedge clk);
//enable <= 1;
#200;
//$display("dout is %h",dout);
din = 8'h1e;
#2000;
//$display("dout is %h",dout);
#100;
$finish;
end
 
/*sbox u_sbox(
.clk(clk),
.reset_n(reset_n),
.din(din),
.ende(1'b1),
.dout(dout));*/
//wire wr;
//wire [4:0] wr_addr;
//wire [63:0] wr_data;
 
wire [127:0] data_out;
aes dut(
.clk(clk),
.reset_n(reset_n),
.i_start(key_start),
.i_enable(enable), //TBD
.i_ende(1'b1),
.i_key(key_in),
.i_key_mode(2'b01),
.i_data(data_in),
.i_data_valid(data_in_valid),
.o_ready(ready_out),
.o_data(data_out),
.o_data_valid(data_out_valid),
.o_key_ready(key_ready)
);
 
/*key_exp u_key_exp (
.clk(clk),
.reset_n(reset_n),
.key_in(key_in),
.key_mode(2'b10),
.key_start(key_start),
.wr(wr),
.wr_addr(wr_addr),
.wr_data(wr_data)
);*/
 
always @ (posedge clk)
if (data_out_valid)
$display("DATA: %16h",data_out);
 
 
always
#10 clk = ~clk;
 
endmodule
/timescale.v
0,0 → 1,119
`timescale 1ns / 10ps
/readme.txt
0,0 → 1,34
____________________________________________________________________________
General Description:
This is a high performance AES core. It supports 128/192/256 key size
modes for encryption and decryption.
____________________________________________________________________________
Clock Speed:
It can reach more than 300 MHz under 65nm process.
____________________________________________________________________________
Gatecount:
Around 35K NAND2 gates;
____________________________________________________________________________
Performance:
Clock Frequency * 128 / Round number, under 200 MHz, it is:
128 bit -> 2.5Gbps;
192 bit -> 2.1Gbps;
256 bit -> 1.8Gbps;
____________________________________________________________________________
Some notes for the interface:
1. After a i_start assert (pluse), please wait for o_key_ready high
2. For decryption, don't input data before o_key_ready is not high
3. For encryption, data can be input after 1cycle of i_start pluse
4. Don't input data if previous cycle's o_ready is low
5. Don't input data if i_enable is low
6. make i_key_mode and i_key stable before o_key_ready is high
7. i_enable is used pause the core for any purpose
8. Basically, you can import 4 128 bit data to the core before the first valid output
data, because there are 4 pipelines inside. Then you need to wait for the output data for
Nr*4 cycles. (o_ready is reflecting it actually)
9. key expansion will take 30~40 cycles based on key modes (o_key_ready marks it).
10. Currently, there are 2 16x64 rams, with minor modifications, can change to
1 16x128 ram or 4 16x32 rams or 8 16x16 rams
11. in 128/192 mode, the higher bits of i_key is valid
____________________________________________________________________________
Any questions, please contact dongjun_luo@hotmail.com
/test_imc.v
0,0 → 1,29
`include "timescale.v"
module top();
`include "mix_columns.v"
`include "shift_rows.v"
 
reg [7:0] a,b,c,d;
wire [7:0] datao;
wire [31:0] data2o;
wire [127:0] data3o;
assign datao[7:0] = inv_byte_mix_columns(a,b,c,d);
assign data2o[31:0] = inv_word_mix_columns({a,b,c,d});
//assign data3o[127:0] = inv_mix_columns(128'h2c21a820306f154ab712c75eee0da04f);
assign data3o = inv_shift_rows(128'haa5ece06ee6e3c56dde68bac2621bebf);
 
initial
begin
#10;
a = 8'h2c;
b = 8'h21;
c = 8'ha8;
d = 8'h20;
#10;
$display("datao is %h",datao);
$display("data2o is %h",data2o);
$display("data3o is %h",data3o);
$finish;
end
 
endmodule

powered by: WebSVN 2.1.0

© copyright 1999-2024 OpenCores.org, equivalent to Oliscience, all rights reserved. OpenCores®, registered trademark.