OpenCores
URL https://opencores.org/ocsvn/arm4u/arm4u/trunk

Subversion Repositories arm4u

Compare Revisions

  • This comparison shows the changes necessary to convert path
    /arm4u
    from Rev 1 to Rev 2
    Reverse comparison

Rev 1 → Rev 2

/trunk/test_program/arm_test.s
0,0 → 1,1255
@ This file is part of ARM4U CPU
@
@ This is a creation of the Laboratory of Processor Architecture
@ of Ecole Polytechnique Fédérale de Lausanne ( http://lap.epfl.ch )
@
@ asm_test.s --- Test program which uses all the instruction set
@ to be assembled with GCC assembler
@
@ Written By - Jonathan Masur and Xavier Jimenez (2013)
@
@ This program is free software; you can redistribute it and/or modify it
@ under the terms of the GNU General Public License as published by the
@ Free Software Foundation; either version 2, or (at your option) any
@ later version.
@
@ This program is distributed in the hope that it will be useful,
@ but WITHOUT ANY WARRANTY; without even the implied warranty of
@ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
@ GNU General Public License for more details.
@
@ In other words, you are welcome to use, share and improve this program.
@ You are forbidden to forbid anyone else to use, share and improve2
@ what you give them. Help stamp out software-hoarding!
 
.text
.global test_cond, test_fwd, test_bshift, test_logic, test_adder, test_bshift_reg, test_load
.global test_store, test_byte, test_cpsr, test_mul, test_ldmstm, test_r15jumps, test_rti
 
_start:
bl test_cond
fail1:
teq r0, #0
bne fail1
 
bl test_fwd
fail2:
teq r0, #0
bne fail2
 
bl test_bshift
fail3:
teq r0, #0
bne fail3
 
bl test_logic
fail4:
teq r0, #0
bne fail4
 
bl test_adder
fail5:
teq r0, #0
bne fail5
 
bl test_bshift_reg
fail6:
teq r0, #0
bne fail6
 
bl test_load
fail7:
teq r0, #0
bne fail7
 
bl test_store
fail8:
teq r0, #0
bne fail8
 
bl test_byte
fail9:
teq r0, #0
bne fail9
 
bl test_cpsr
fail10:
teq r0, #0
bne fail10
 
bl test_mul
fail11:
teq r0, #0
bne fail11
 
bl test_ldmstm
fail12:
teq r0, #0
bne fail12
 
bl test_r15jumps
fail13:
teq r0, #0
bne fail13
 
bl test_rti
passed:
b passed
 
@test N and Z flags conditional execution
test_cond:
mov r0, #1
 
@ test 1 - test that the Z flag is set properly, and N flag clear properly
movs r5, #0
bne fail
bmi fail
add r0, #1
 
@test 2 - test that an instruction without 'S' does not affect the flags
movs r5, #1
mov r5, #0
beq fail
bmi fail
add r0, #1
 
@test 3 - test that the N flag is set properly
movs r5, #-2
mov r5, #0
beq fail
bpl fail
add r0, #1
 
@test4 - make sure conditional MOV are skipped, and that flags are not updated on a skipped instruction
movs r5, #1
movpls r5, #0 @valid
movnes r5, #1 @invalid
movmis r5, #2 @invalid
bne fail
cmp r5, #0
bne fail
add r0, #1
 
@ test 5 - make sure instructions after a branch are skipped completely
b .dummy
movs r5, #-1
movs r5, #-2
movs r5, #-3
.dummy:
bne fail
bmi fail
 
@condition test passed
mov r0, #0
fail:
bx lr
 
test_fwd:
mov r0, #1
 
@test forwarding and register file for OPA
mov r1, #1
add r1, r1, #1
add r1, r1, #1
add r1, r1, #1
add r1, r1, #1
add r1, r1, #1
cmp r1, #6
bne fail
add r0, #1
 
@test forwarding priority for opb
mov r1, #1
mov r1, #2
mov r1, #3
mov r1, #4
mov r1, #5
cmp r1, #5
bne fail
add r0, #1
 
@forwarding test passed
mov r0, #0
bx lr
 
test_bshift:
@test barrel shifter all modes (shift by literal const. only for now)
mov r0, #1
 
@test 1 - test LSL output
movs r5, #0xf0000000
mov r1, #0x0f
mov r2, r1, lsl #28
cmp r5, r2
bne fail
add r0, #1
 
@test 2 - test ROR output
mov r3, r1, ror #4
cmp r5, r3
bne fail
add r0, #1
 
@test 3 - test LSR output
mov r4, r5, lsr #28
cmp r4, r1
bne fail
add r0, #1
 
@test 4 - test ASR output
mov r1, #0x80000000
mov r2, r1, asr #3
cmp r5 ,r2
bne fail
add r0, #1
 
@test 5 - test RRX output and carry
mov r1, #1
movs r1, r1, rrx
bcc fail
movs r1, r1, rrx
beq fail
bcs fail
add r0, #1
 
@test 6 - test carry output from rotated constant
movs r5, #0xf0000000
bcc fail
movs r5, #0xf
bcc fail
movs r5, #0x100
bcs fail
add r0, #1
 
@test 7 - test carry output from LSL
mov r5, #0x1
movs r5, r5, lsl #1
bcs fail
mov r5, #0x80000000
movs r5, r5, lsl #1
bcc fail
add r0, #1
 
@test 8 - test carry output from LSR
mov r5, #2
movs r5, r5, lsr #1
bcs fail
movs r5, r5, lsr #1
bcc fail
bne fail
add r0, #1
 
@test 9 - test carry output from ASR
mvn r5, #0x01
movs r5, r5, asr #1
bcs fail
movs r5, r5, asr #1
bcc fail
add r0, #1
 
@test 10 - check for LSR #32 to behave correctly
mov r1, #0xa5000000
mvn r2, r1
lsrs r3, r1, #32
bcc fail
lsrs r3, r2, #32
bcs fail
add r0, #1
 
@test 11 - check for ASR #32 to behave correctly
asrs r3, r1, #32
bcc fail
cmp r3, #-1
bne fail
asrs r3, r2, #32
bcs fail
bne fail
 
@barrelshift test passed
mov r0, #0
bx lr
 
@test logical operations
test_logic:
mov r0, #1
 
@test 1 - NOT operation
mov r5, #-1
mvns r5, r5
bne fail
add r0, #1
 
@test 2 - AND operation
mov r5, #0xa0
mov r1, #0x0b
mov r2, #0xab
mov r3, #0xba
 
ands r4, r5, r1
bne fail
ands r4, r5, r2
cmp r4, r5
bne fail
add r0, #1
 
@test 3 - ORR and EOR operations
orr r4, r5, r1
eors r4, r2, r4
bne fail
orr r4, r1, r5
teq r4, r2
bne fail
add r0, #1
 
@test 4 - TST opcode
tst r1, r5
bne fail
tst r4, r2
beq fail
add r0, #1
 
@test 5 - BIC opcode
bics r4, r2, r3
cmp r4, #1
bne fail
 
@logical test passed
mov r0, #0
bx lr
 
@test adder, substracter, C and V flags
test_adder:
mov r0, #1
 
@test 1 - check for carry when adding
mov r5, #0xf0000000
mvn r1, r5 @0x0fffffff
adds r2, r1, r5
bcs fail
bvs fail
 
adds r2, #1
bcc fail
bvs fail
 
adc r2, #120
cmp r2, #121
bne fail
bvs fail
add r0, #1
 
@test 2 - check for overflow when adding
mov r3, #0x8fffffff @two large negative numbers become positive
adds r3, r5
bvc fail
bcc fail
bmi fail
 
mov r3, #0x10000000
adds r3, r1 @r3 = 0x1fffffff
bvs fail
bcs fail
 
adds r3, #0x60000001 @two large positive numbers become negative
bvc fail
bpl fail
 
add r0, #1
 
@test 3 - check for carry when substracting
mov r5, #0x10000000
subs r2, r5, r1
bcc fail
bvs fail
 
subs r2, #1
bcc fail
bvs fail
 
subs r2, #1
bcs fail
bvs fail
 
add r0, #1
 
@test 4 - check for overflow when substracting
mov r3, #0x90000000
subs r3, r5
bvs fail
bcc fail
 
subs r3, #1 @substract a positive num from a large negative make the result positive
bvc fail
bcc fail
 
@test 5 - check for carry when reverse substracting
mov r3, #1
rsbs r2, r1, r5
bcc fail
bvs fail
rsbs r2, r3, r2
bcc fail
bvs fail
rscs r2, r3, r2
bcs fail
bvs fail
 
add r0, #1
 
@test 6 - check for overflow when reverse substracting
mov r2, #0x80000000
mov r1, #-1
rsbs r2, r1
bvs fail
bmi fail
bcc fail
 
mov r0, #0
bx lr
 
@test barrelshift with register controler rotates
test_bshift_reg:
mov r0, #1
 
mov r1, #0
mov r2, #7
mov r3, #32
mov r4, #33
mov r5, #127
mov r6, #256
add r7, r6, #7
mov r8, #0xff000000
 
@test 1 LSL mode with register shift
movs r9, r8, lsl r2
bpl fail
bcc fail
@make sure lsl #0 does not affect carry
movs r9, r2, lsl r1
bcc fail
@test using the same register twice
mov r9, r2, lsl r2
cmp r9, #0x380
bne fail
 
add r0, #1
 
@test 2 - LSL mode with barrelshift > 31
movs r9, r2, lsl r3
bcc fail
bne fail
movs r9, r2, lsl r4
bcs fail
bne fail
add r0, #1
 
@test 3 - LSL mode with barrelshift >= 256 (only 8 bits used)
movs r9, r2, lsl r6
bcs fail
cmp r9, #7
bne fail
 
mov r9, r2, lsl r7
cmp r9, #0x380
bne fail
 
movs r9, r8, lsl r7
bpl fail
bcc fail
 
add r0, #1
 
@test 4 - LSR mode with register shift
mov r2, #4
add r7, r6, #4
 
movs r9, r8, lsr r2
bmi fail
bcs fail
@make sure lsr #0 does not affect carry
movs r9, r2, lsr r1
bcs fail
cmp r9, #4
bne fail
 
movs r9, r8, lsr r2
bcs fail
cmp r9, #0xff00000
bne fail
 
add r0, #1
 
@test 5 - LSR mode with barrelshift > 31
movs r9, r8, lsr r3
bcc fail
bne fail
movs r9, r8, lsr r4
bcs fail
bne fail
add r0, #1
 
@test 6 - LSR mode with barrelshift >= 256 (only 8 bits used)
movs r9, r8, lsr r6
bcs fail
cmp r9, #0xff000000
bne fail
 
movs r9, r8, lsr r7
cmp r9, #0xff00000
bne fail
 
mov r0, #0
bx lr
 
array:
.word 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15
array2:
.word 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31
test_load:
mov r0, #1
 
@ Test1 basic load operations
ldr r1, .larray1
ldr r2, .larray2
 
ldr r3, [r1]
teq r3, #0
bne fail
 
ldr r3, [r2]
teq r3, #16
bne fail
add r0, #1
 
@ Test 2 load operations with offsets
ldr r3, [r2, #-60]
teq r3, #1
bne fail
 
ldr r3, [r1, #20]
teq r3, #5
bne fail
add r0, #1
 
@ Test 3 - test positive register offset addressing
mov r3, #124
.lloop:
ldr r4, [r1, r3]
cmp r4, r3, lsr #2
bne fail
subs r3, #4
bpl .lloop
add r0, #1
 
@ Test 4 - test negative register offset addressing
mov r3, #64
.lloop2:
ldr r4, [r2, -r3]
rsb r4, #0x10
cmp r4, r3, lsr #2
bne fail
subs r3, #4
bne .lloop2
add r0, #1
 
@ Test 5 - test positive register offset addressing with shift
mov r3, #0
.lloop3:
ldr r4, [r1, r3, lsl #2]
cmp r4, r3
bne fail
add r3, #1
cmp r3, #32
bne .lloop3
add r0, #1
 
@ Test 6 - test negative register offset addressing with shift
mov r3, #0
.lloop4:
ldr r4, [r2, -r3, lsl #2]
rsb r4, #0x10
cmp r4, r3
bne fail
add r3, #1
cmp r3, #16
bne .lloop4
add r0, #1
 
@ Test 7 - test offset with pre-increment
mov r3, #31
mov r5, r1
.lloop5:
ldr r4, [r5, #4]!
rsb r4, #32
cmp r4, r3
bne fail
subs r3, #1
bne .lloop5
add r0, #1
 
@ Test 8 - test offset with pre-degrement
mov r3, #31
add r5, r1, #128
.lloop6:
ldr r4, [r5, #-4]!
cmp r4, r3
bne fail
subs r3, #1
bpl .lloop6
add r0, #1
 
@ Test 9 - test offset with post-increment
mov r3, #32
mov r5, r1
.lloop7:
ldr r4, [r5], #4
rsb r4, #32
cmp r4, r3
bne fail
subs r3, #1
bne .lloop7
add r0, #1
 
@ Test 10 - test offset with post-decrement
mov r3, #31
add r5, r1, #124
.lloop8:
ldr r4, [r5], #-4
cmp r3, r4
bne fail
subs r3, #1
bpl .lloop8
add r0, #1
 
@ Test 11 - test register post-increment with a negative value
mov r6, #0xfffffff0
mov r5, r2
mov r3, #16
.lloop9:
ldr r4, [r5], r6, asr #2
cmp r4, r3
bne fail
subs r3, #1
bpl .lloop9
 
mov r0, #0
bx lr
 
.larray1:
.word array
.larray2:
.word array2
 
test_store:
mov r0, #1
 
@ Test 1 - test basic store opperation
ldr r1, .larray1
mov r2, #0x24
str r2, [r1]
ldr r2, [r1]
cmp r2, #0x24
bne fail
add r0, #1
 
@ Test 2 - check for post-increment and pre-decrement writes
mov r2, #0xab
mov r3, #0xbc
str r2, [r1, #4]! @ array[1] = 0xab
str r3, [r1], #4 @ array[1] = 0xbc
ldr r2, [r1, #-4]! @ read 0xbc
ldr r3, [r1, #-4]! @ read 0x24
cmp r3, #0x24
bne fail
cmp r2, #0xbc
bne fail
add r0, #1
 
@ Test 3 - check for register post-increment addressing
mov r2, #8
mov r3, #20
mov r4, r1
str r2, [r4], r2
str r3, [r4], r2
sub r4, #16
cmp r4, r1
bne fail
ldr r2, [r1]
cmp r2, #8
bne fail
ldr r2, [r1, #8]
cmp r2, #20
bne fail
 
mov r0, #0
bx lr
 
@ Tests byte loads and store
test_byte:
mov r0, #1
 
@ test 1 - test store bytes
ldr r1, .larray1
mov r2, #8
.bloop:
strb r2, [r1], #1
subs r2, #1
bne .bloop
 
ldr r2, .ref_words+4
ldr r3, [r1, #-4]!
cmp r2, r3
bne fail
 
ldr r2, .ref_words
ldr r3, [r1, #-4]!
cmp r2, r3
bne fail
add r0, #1
 
@ test 2 - test load bytes
mov r2, #8
.bloop2:
ldrb r3, [r1], #1
cmp r3, r2
bne fail
subs r2, #1
bne .bloop2
 
mov r0, #0
bx lr
 
.ref_words:
@ Table for ARMs who access bytes in a little-endian order
.word 0x05060708, 0x01020304
 
@ Table for ARMs who access bytes in a big-endian order
@ .word 0x08070605, 0x04030201
 
@ Good source for flags info :
@ http://blogs.arm.com/software-enablement/206-condition-codes-1-condition-flags-and-codes/
test_cpsr:
mov r0, #1
 
@ Test 1 - in depth test for the condition flags
mrs r1, cpsr
and r1, #0x000000ff
msr cpsr_flg, r1
@ NZCV = {0000}
bvs fail
bcs fail
beq fail
bmi fail
bhi fail @ bhi <-> bls
blt fail @ blt <-> bge
ble fail @ ble <-> bgt
 
add r1, #0x10000000
msr cpsr, r1
@ NZCV = {0001}
bvc fail
bhi fail
bge fail
bgt fail
 
add r1, #0x10000000
msr cpsr, r1
@ NZCV = {0010}
bvs fail
bcc fail
bls fail
 
add r1, #0x10000000
msr cpsr, r1
@ NZCV = {0011}
bls fail
bge fail
bgt fail
 
add r1, #0x10000000
msr cpsr, r1
@ NZCV = {0100}
bne fail
bhi fail
bgt fail
 
add r1, #0x10000000
msr cpsr, r1
@ NZCV = {0101}
bgt fail
 
add r1, #0x10000000
msr cpsr, r1
@ NZCV = {0110}
bhi fail
 
add r1, #0x20000000
msr cpsr, r1
@ NZCV = {1000}
bpl fail
bge fail
bgt fail
 
add r1, #0x10000000
msr cpsr, r1
@ NZCV = {1001}
blt fail
 
add r1, #0x30000000
msr cpsr, r1
@ NZCV = {1100}
bgt fail
 
add r0, #1
 
@ Test 2 - test for the FIQ processor mode
mov r1, r14 @ save our link register and stack pointer
mov r2, r13
mov r3, #30
mov r4, #40
mov r5, #50
mov r6, #60
mov r7, #70
mov r8, #80
mov r9, #90
mov r10, #100
mov r11, #110
mov r12, #120
mov r13, #130
mov r14, #140
 
msr cpsr, #0xd1 @ go into FIQ mode, disable all interrupts (F and I bits set)
cmp r3, #30
bne .fail
mov r8, #8 @ overwrite fiq regs...
mov r9, #9
mov r10, #10
mov r11, #11
mov r12, #12
mov r13, #13
mov r14, #14
mov r3, #3 @ also overwrite some user regs
mov r4, #4
mov r5, #5
mov r6, #6
mov r7, #7
msr cpsr, #0x10 @ back to user mode
cmp r3, #3 @ r3-7 should have been affected, but not r8-r14
bne .fail
cmp r4, #4
bne .fail
cmp r5, #5
bne .fail
cmp r6, #6
bne .fail
cmp r7, #7
bne .fail
cmp r8, #80
bne .fail
cmp r9, #90
bne .fail
cmp r10, #100
bne .fail
cmp r11, #110
bne .fail
cmp r12, #120
bne .fail
cmp r13, #130
bne .fail
cmp r14, #140
bne .fail
add r0, #1
 
 
@ Test 3 - test for the SUP processor mode
mov r12, #120
mov r13, #130
mov r14, #140
msr cpsr, #0x13 @ enter SUP mode
cmp r12, #120
bne .fail
mov r12, #12
mov r13, #13
mov r14, #14
msr cpsr, #0x10 @ back into user mode
cmp r12, #12
bne .fail
cmp r13, #130
bne .fail
cmp r14, #140
bne .fail
add r0, #1
 
@ Test 4 - test for the UND processor mode
mov r12, #120
mov r13, #130
mov r14, #140
msr cpsr, #0x1b @ enter UND mode
cmp r12, #120
bne .fail
mov r12, #12
mov r13, #13
mov r14, #14
msr cpsr, #0x10 @ back into user mode
cmp r12, #12
bne .fail
cmp r13, #130
bne .fail
cmp r14, #140
bne .fail
add r0, #1
 
@ Test 5 - test for the IRQ processor mode
mov r12, #120
mov r13, #130
mov r14, #140
msr cpsr, #0x92 @ enter IRQ mode, IRQ disabled
cmp r12, #120
bne .fail
mov r12, #12
mov r13, #13
mov r14, #14
msr cpsr, #0x10 @ back into user mode
cmp r12, #12
bne .fail
cmp r13, #130
bne .fail
cmp r14, #140
bne .fail
 
mov r0, #0
 
.fail:
msr cpsr, #0x10 @ back into user mode
mov r13, r2
bx r1 @ return
 
@ Test multiplier and how it affects the flags
test_mul:
mov r0, #1
 
@ Test 1 - MUL instruction
mov r1, #0
mov r2, #2
mov r3, #3
mul r4, r2, r3
cmp r4, #6
bne fail
bmi fail
 
muls r5, r1, r2
bne fail
bmi fail
 
muls r4, r2
cmp r4, #12
bne fail
bmi fail
 
@ mul r3, r3, r4 @ no joke, verified to fail on a real ARM !
@ cmp r4, #36
@ bne fail
 
mov r3, #-3 @ multiply positive * negative
muls r5, r2, r3
bpl fail
cmp r5, #-6
bne fail
 
mov r2, #-2 @ multiply negative * negative
muls r5, r2, r3
bmi fail
cmp r5, #6
bne fail
add r0, #1
 
@ Test 2 - MLA instruction
mov r1, #10
mov r2, #2
mov r3, #5
mlas r4, r1, r2, r3 @ 2*10 + 5 = 25
bmi fail
@ bcs fail @ on a real ARM, C flag after MLA is unpredictable
bvs fail
cmp r4, #25
bne fail
 
mov r1, #-10
mlas r4, r1, r2, r3 @ 2*-10 + 5 = -15
bpl fail
bvs fail
cmp r4, #-15
bne fail
 
mov r3, #0x80000001 @ causes addition overflow
mlas r4, r1, r2, r3
bmi fail
@ bvc fail @ on a real ARM, V flag is not updated ?
 
mov r0, #0
bx lr
 
@ Test load multiple and store multiple instructions
test_ldmstm:
mov r0, #1
 
@ Test 1 - STMIA
mov r1, #1
mov r2, #2
mov r3, #3
mov r4, #4
ldr r5, .larray1
mov r6, r5
 
stmia r6!, {r1-r4}
sub r6, r5
cmp r6, #16
bne fail
 
ldr r6, [r5]
cmp r6, #1
bne fail
ldr r6, [r5, #4]
cmp r6, #2
bne fail
ldr r6, [r5, #8]
cmp r6, #3
bne fail
ldr r6, [r5, #12]
cmp r6, #4
bne fail
add r0, #1
 
@ Test 2 - STMIB
mov r6, r5
stmib r6!, {r1-r3}
sub r6, r5
cmp r6, #12
bne fail
 
ldr r6, [r5, #4]
cmp r6, #1
bne fail
ldr r6, [r5, #8]
cmp r6, #2
bne fail
ldr r6, [r5, #12]
cmp r6, #3
bne fail
add r0, #1
 
@ Test 3 - STMDB
add r6, r5, #12
stmdb r6!, {r1-r3}
cmp r6, r5
bne fail
 
ldr r6, [r5]
cmp r6, #1
bne fail
ldr r6, [r5, #8]
cmp r6, #3
bne fail
add r0, #1
 
@ Test 4 - STMDA
add r6, r5, #12
stmda r6!, {r1-r3}
cmp r6, r5
bne fail
ldr r6, [r5, #4]
cmp r6, #1
bne fail
ldr r6, [r5, #12]
cmp r6, #3
bne fail
add r0, #1
 
@ Test 5 - LDMIA
ldr r5, .larray2
ldmia r5, {r1-r4}
cmp r1, #16
bne fail
cmp r2, #17
bne fail
cmp r3, #18
bne fail
cmp r4, #19
bne fail
add r0, #1
 
@ Test 6 - LDMIB
ldmib r5!, {r1-r4}
cmp r1, #17
bne fail
cmp r2, #18
bne fail
cmp r3, #19
bne fail
cmp r4, #20
bne fail
add r0, #1
 
@ Test 7 - LDMDB
ldmdb r5!, {r1-r3}
cmp r3, #19
bne fail
cmp r2, #18
bne fail
cmp r1, #17
bne fail
add r0, #1
 
@ Test 8 - LDMDA
ldmda r5, {r1-r2}
cmp r1, #16
bne fail
cmp r2, #17
bne fail
 
mov r0, #0
bx lr
 
@ Test proper jumping on instructions that affect R15
test_r15jumps:
mov r0, #1
 
@ Test 1 - a standard, conditional jump instruction
ldr r3, .llabels
mov r1, #0
movs r2, #0
moveq r15, r3 @ jump to label 1
movs r2, #12
movs r1, #13 @ make sure fetched/decoded instructions do no execute
.label1:
bne fail
cmp r1, #0
bne fail
cmp r2, #0
bne fail
add r0, #1
 
@ Test 2 - a jump instruction is not executed
ldr r3, .llabels+4
movs r2, #12
moveq r15, r3
movs r2, #0
.label2:
cmp r2, #0
bne fail
add r0, #1
 
@ Test 3 - add instruction to calculate new address
ldr r3, .llabels+8
movs r1, #0
movs r2, #0
add r15, r3, #8 @go 2 instructions after label 3
.label3:
movs r1, #12
movs r2, #13
bne fail @ program executions continues here
bne fail
add r0, #1
 
@ Test 4 - use an addition directly from PC+8 (r15)
movs r2, #0
movs r1, #0
add r15, r15, #4 @ Skip 2 instructions This could actually be used for a nice jump table if a register were used instead of #4
movs r1, #1
movs r2, #2
bne fail
bne fail
add r0, #1
 
@ Test 5 - load r15 directly from memory
movs r1, #1
movs r2, #2
ldrne r15, .llabels+12 @ Makes sure code after a ldr r15 is not executed
movs r1, #0
movs r2, #0
.label4:
beq fail
beq fail
 
ldreq r15, .llabels+16 @ Makes sure everything is right when a ldr r15 is not taken
movs r2, #-2
.label5:
bpl fail
cmp r2, #-2
bne fail
add r0, #1
 
@ Test 6 - load r15 as the last step of a LDM instruction
ldr r3, .llabels + 6*4
movs r1, #0
movs r2, #0
ldmia r3, {r4-r8, r15} @jump to label6
movs r1, #4
movs r2, #2
.label6:
bne fail
bne fail
 
mov r0, #0
bx lr
 
.align 8
.llabels:
.word .label1, .label2, .label3, .label4, .label5, .label6, .llabels
 
test_rti:
mov r0, #1
 
@ Test 1 - test normal RTI
msr cpsr, #0xd1 @ enter into FIQ mode (interrupt disabled)
msr spsr, #0x40000010 @ emulate a saved CPSR in user mode, with NZCV = {0100}
 
movs r8, #-12 @ now the FIQ sets it's CPSR to NZCV = {1000}
ldr r8, .rtilabels @ simulate an interrupt return
movs r15, r8 @ return from interrupt and move SPSR to CPSR
 
.rtilabel1:
bmi .rtifail @ ?!? WTF !?!
bne .rtifail
add r0, #1
 
@ Test 2 - test LDM instruction with S flag
msr cpsr, #0xd1
ldr r8, .rtilabels + 20
ldmib r8!, {r9, r10} @ fiq_r9 = 1, fiq_r10 = 2
ldmib r8, {r9, r10}^ @ r8 = 3, r9 = 4 ( ^ => load to user registers )
cmp r9, #1
bne .rtifail
cmp r10, #2
bne .rtifail
msr cpsr, #0x10
cmp r9, #3
bne .rtifail
cmp r10, #4
bne .rtifail
add r0, #1
 
@ Test 3 - test LDM instruction with S flag for returning from an interrupt
msr cpsr, #0xd1 @ FIQ mode, NZCV = {0000}
msr spsr_c, #0x80000010 @ saved is normal mode with NZCV = {1000}
 
ldr r8, .rtilabels + 20
add r8, #8
 
movs r9, #0 @ NZCV = {0100}
ldmib r8, {r9-r11, r15}^ @ This should return to user mode and restore CPSR to NZCV = {1000}
 
.rtilabel2:
bpl .rtifail
beq .rtifail
 
mov r0, #0
 
.rtifail:
msr cpsr, #0x10
bx lr
 
 
.rtilabels:
.word .rtilabel1, 1, 2, 3, 4, .rtilabels, .rtilabel2
trunk/test_program/arm_test.s Property changes : Added: svn:executable ## -0,0 +1 ## +* \ No newline at end of property Index: trunk/hdl/alu.vhd =================================================================== --- trunk/hdl/alu.vhd (nonexistent) +++ trunk/hdl/alu.vhd (revision 2) @@ -0,0 +1,170 @@ +-- This file is part of ARM4U CPU +-- +-- This is a creation of the Laboratory of Processor Architecture +-- of Ecole Polytechnique Fédérale de Lausanne ( http://lap.epfl.ch ) +-- +-- alu.vhd -- Hadrware description of the ALU unit (inside Execute pipeline stage) +-- +-- Written By - Jonathan Masur and Xavier Jimenez (2013) +-- +-- This program is free software; you can redistribute it and/or modify it +-- under the terms of the GNU General Public License as published by the +-- Free Software Foundation; either version 2, or (at your option) any +-- later version. +-- +-- This program is distributed in the hope that it will be useful, +-- but WITHOUT ANY WARRANTY; without even the implied warranty of +-- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +-- GNU General Public License for more details. +-- +-- In other words, you are welcome to use, share and improve this program. +-- You are forbidden to forbid anyone else to use, share and improve +-- what you give them. Help stamp out software-hoarding! + +library ieee; +use ieee.std_logic_1164.all; +use ieee.numeric_std.all; +use work.arm_types.all; + +entity alu is + port ( + exe_alu_operation : in ALU_OPERATION; + alu_o : out unsigned(31 downto 0); + alu_opb, alu_opa : in unsigned(31 downto 0); + n, z, c, v, barrelshift_c : in std_logic; + lowflags : in std_logic_vector(5 downto 0); + next_n, next_z, next_c, next_v : out std_logic; + next_lowflags : out std_logic_vector(5 downto 0) + ); +end; + +architecture rtl of alu is + signal alu_out : unsigned(31 downto 0); + + signal adder_a, adder_b, adder_out : unsigned(31 downto 0); + signal adder_cout, adder_vout : std_logic; + signal adder_cin : unsigned(0 downto 0); + +begin + alu_o <= alu_out; -- annoying VHDL + + -- 32 bit adder with carry in and carry out + adder : process(adder_a, adder_b, adder_cin, adder_out) is + variable add33 :unsigned(32 downto 0); + begin + add33 := ('0' & adder_a) + ('0' & adder_b) + adder_cin; + adder_out <= add33(31 downto 0); + + -- carry out is bit 32 of the result + adder_cout <= add33(32); + + -- overflow true if both operands were the same sign and the result is not the same sign + adder_vout <= (add33(31) and not adder_a(31) and not adder_b(31)) or (not adder_out(31) and adder_a(31) and adder_b(31)); + end process; + + -- 32-bit ALU + alu : process(exe_alu_operation, alu_out, alu_opb, alu_opa, n, z, c, v, lowflags, barrelshift_c, adder_out, adder_cout, adder_vout) is + variable carry : unsigned(0 downto 0); + begin + adder_a <= (others => '-'); + adder_b <= (others => '-'); + adder_cin <= "-"; + + -- annoying VHDL + if c = '1' then carry := "1"; else carry := "0"; end if; + + -- default values for nzvc and low flags (v and lowflags doesn't change by default) + next_n <= alu_out(31); + if alu_out = X"00000000" + then + next_z <= '1'; + else + next_z <= '0'; + end if; + next_v <= v; + next_c <= barrelshift_c; + next_lowflags <= lowflags; + + case exe_alu_operation is + when ALU_NOP => -- no ALU operation + alu_out <= alu_opb; + when ALU_NOT => -- one's complement operation + alu_out <= not alu_opb; + when ALU_ORR => + alu_out <= alu_opa or alu_opb; + when ALU_AND => + alu_out <= alu_opa and alu_opb; + when ALU_EOR => + alu_out <= alu_opa xor alu_opb; + when ALU_BIC => -- bit clear + alu_out <= alu_opa and not alu_opb; + + when ALU_RWF => -- read/write flags + next_n <= alu_opb(31); + next_z <= alu_opb(30); + next_c <= alu_opb(29); + next_v <= alu_opb(28); + -- I and F flags + next_lowflags(5 downto 4) <= std_logic_vector(alu_opb(7 downto 6)); + -- mode flags + next_lowflags(3 downto 0) <= std_logic_vector(alu_opb(3 downto 0)); + + --read (old) flags + alu_out <= unsigned( n & z & c & v & (27 downto 8 => '0') & lowflags(5 downto 4) & '0' & '1' & lowflags(3 downto 0) ); + + when ALU_ADD => -- addition without carry + adder_a <= alu_opa; + adder_b <= alu_opb; + adder_cin <= "0"; + + next_v <= adder_vout; + alu_out <= adder_out; + next_c <= adder_cout; + + when ALU_ADC => -- addition with carry + adder_a <= alu_opa; + adder_b <= alu_opb; + adder_cin <= carry; + + next_v <= adder_vout; + alu_out <= adder_out; + next_c <= adder_cout; + + when ALU_SUB => -- substraction without carry + adder_a <= alu_opa; + adder_b <= not alu_opb; + adder_cin <= "1"; + + next_v <= adder_vout; + alu_out <= adder_out; + next_c <= adder_cout; + + when ALU_SBC => -- substraction with carry + adder_a <= alu_opa; + adder_b <= not alu_opb; + adder_cin <= carry; + + next_v <= adder_vout; + alu_out <= adder_out; + next_c <= adder_cout; + + when ALU_RSB => -- reverse substraction without carry + adder_a <= not alu_opa; + adder_b <= alu_opb; + adder_cin <= "1"; + + next_v <= adder_vout; + alu_out <= adder_out; + next_c <= adder_cout; + + when ALU_RSC => -- reverse substraction with carry + adder_a <= not alu_opa; + adder_b <= alu_opb; + adder_cin <= carry; + + next_v <= adder_vout; + alu_out <= adder_out; + next_c <= adder_cout; + end case; + end process; +end; \ No newline at end of file
trunk/hdl/alu.vhd Property changes : Added: svn:executable ## -0,0 +1 ## +* \ No newline at end of property Index: trunk/hdl/execute.vhd =================================================================== --- trunk/hdl/execute.vhd (nonexistent) +++ trunk/hdl/execute.vhd (revision 2) @@ -0,0 +1,304 @@ +-- This file is part of ARM4U CPU +-- +-- This is a creation of the Laboratory of Processor Architecture +-- of Ecole Polytechnique Fédérale de Lausanne ( http://lap.epfl.ch ) +-- +-- execute.vhd -- Description of the execute pipeline stage +-- +-- Written By - Jonathan Masur and Xavier Jimenez (2013) +-- +-- This program is free software; you can redistribute it and/or modify it +-- under the terms of the GNU General Public License as published by the +-- Free Software Foundation; either version 2, or (at your option) any +-- later version. +-- +-- This program is distributed in the hope that it will be useful, +-- but WITHOUT ANY WARRANTY; without even the implied warranty of +-- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +-- GNU General Public License for more details. +-- +-- In other words, you are welcome to use, share and improve this program. +-- You are forbidden to forbid anyone else to use, share and improve +-- what you give them. Help stamp out software-hoarding! + +library ieee; +use ieee.std_logic_1164.all; +use ieee.numeric_std.all; +use work.arm_types.all; + +entity execute is + port( + clk : in std_logic; + n_reset : in std_logic; + + exe_A_adr, exe_B_adr, exe_C_adr : in std_logic_vector(5 downto 0); + exe_stage_valid : in std_logic; + exe_barrelshift_operand : in std_logic; + exe_barrelshift_type : in std_logic_vector(1 downto 0); + exe_literal_shift_amnt : in std_logic_vector(4 downto 0); + exe_literal_data : in std_logic_vector(23 downto 0); + exe_opb_is_literal : in std_logic; + exe_opb_sel : in std_logic; + exe_alu_operation : in ALU_OPERATION; + exe_condition : in std_logic_vector(3 downto 0); + exe_affect_sflags : in std_logic; + exe_data_sel : in std_logic; + exe_rdest_wren : in std_logic; + exe_rdest_adr : in std_logic_vector(4 downto 0); + exe_branch_en : in std_logic; + exe_wb_sel : in std_logic; + exe_mem_ctrl : in MEM_OPERATION; + exe_mem_burstcount : in std_logic_vector(3 downto 0); + + exe_pc_plus_4 : in unsigned(31 downto 0); + exe_pc_plus_8 : in unsigned(31 downto 0); + --- fowrarding signals to come here + + rfile_A_data : in std_logic_vector(31 downto 0); + rfile_B_data : in std_logic_vector(31 downto 0); + rfile_C_data : in std_logic_vector(31 downto 0); + + fwd_wb2_enable : in std_logic; + fwd_wb2_address : in std_logic_vector(4 downto 0); + fwd_wb2_data : in std_logic_vector(31 downto 0); + fwd_wb1_enable : in std_logic; + fwd_wb1_address : in std_logic_vector(4 downto 0); + fwd_wb1_data : in std_logic_vector(31 downto 0); + fwd_wb1_is_invalid : in std_logic; + fwd_mem_enable : in std_logic; + fwd_mem_address : in std_logic_vector(4 downto 0); + fwd_mem_data : in std_logic_vector(31 downto 0); + fwd_mem_is_invalid : in std_logic; + + mem_stage_valid : out std_logic; + mem_rdest_wren : out std_logic; + mem_rdest_adr : out std_logic_vector(4 downto 0); + mem_branch_en : out std_logic; + mem_wb_sel : out std_logic; + mem_exe_data : out std_logic_vector(31 downto 0); + mem_wrdata : out std_logic_vector(31 downto 0); + mem_mem_ctrl : out MEM_OPERATION; + mem_mem_burstcount : out std_logic_vector(3 downto 0); + + low_flags : out std_logic_vector(5 downto 0); + exe_PC_wrdata : out unsigned(31 downto 0); + exe_blocked_n : out std_logic; + exe_PC_wr : out std_logic; + exe_latch_enable : in std_logic + ); +end entity; + +architecture rtl of execute is + + signal exe_data : std_logic_vector(31 downto 0); + signal stage_active, forward_ok, forward_a_ok, forward_b_ok, forward_c_ok, condition_is_true : std_logic; + signal barrelshift_out, alu_out, mult_out, alu_opb, op_a_data, op_b_data, op_c_data : unsigned(31 downto 0); + + signal n, z, v, c : std_logic; + signal next_n, next_z, next_v, next_c, barrelshift_c : std_logic; + signal lowflags, next_lowflags : std_logic_vector(5 downto 0); + +begin + + -- output latch + process(clk, n_reset) is + begin + if n_reset = '0' + then + mem_stage_valid <= '0'; + elsif rising_edge(clk) + then + if exe_latch_enable = '1' + then + mem_stage_valid <= stage_active; + end if; + end if; + end process; + + process(clk) is + begin + if rising_edge(clk) + then + if exe_latch_enable = '1' + then + mem_rdest_wren <= exe_rdest_wren; + mem_rdest_adr <= exe_rdest_adr; + mem_branch_en <= exe_branch_en; + mem_wb_sel <= exe_wb_sel; + mem_exe_data <= exe_data; + mem_wrdata <= std_logic_vector(op_c_data); + mem_mem_ctrl <= exe_mem_ctrl; + mem_mem_burstcount <= exe_mem_burstcount; + end if; + end if; + end process; + + low_flags <= lowflags; + + -- enable stage condition + stage_active <= exe_stage_valid and forward_ok and condition_is_true; + + exe_data <= std_logic_vector(alu_out) when exe_data_sel = '1' else std_logic_vector(exe_pc_plus_4); + exe_pc_wrdata <= alu_out; + exe_pc_wr <= exe_branch_en and (not exe_wb_sel) and stage_active; + + exe_blocked_n <= forward_ok or not (exe_stage_valid and condition_is_true); + + -- fowrawrding for operand a + fwa : entity work.forwarding(rtl) port map + ( + reg => exe_A_adr, + + fwd_wb2_enable => fwd_wb2_enable, + fwd_wb2_address => fwd_wb2_address, + fwd_wb2_data => fwd_wb2_data, + fwd_wb1_enable => fwd_wb1_enable, + fwd_wb1_address => fwd_wb1_address, + fwd_wb1_data => fwd_wb1_data, + fwd_wb1_is_invalid => fwd_wb1_is_invalid, + fwd_mem_enable => fwd_mem_enable, + fwd_mem_address => fwd_mem_address, + fwd_mem_data => fwd_mem_data, + fwd_mem_is_invalid => fwd_mem_is_invalid, + + exe_pc_plus_8 => exe_pc_plus_8, + rfile_data => rfile_a_data, + + forward_ok => forward_a_ok, + op_data => op_a_data + ); + + -- fowrawrding for operand b + fwb : entity work.forwarding(rtl) port map + ( + reg => exe_B_adr, + + fwd_wb2_enable => fwd_wb2_enable, + fwd_wb2_address => fwd_wb2_address, + fwd_wb2_data => fwd_wb2_data, + fwd_wb1_enable => fwd_wb1_enable, + fwd_wb1_address => fwd_wb1_address, + fwd_wb1_data => fwd_wb1_data, + fwd_wb1_is_invalid => fwd_wb1_is_invalid, + fwd_mem_enable => fwd_mem_enable, + fwd_mem_address => fwd_mem_address, + fwd_mem_data => fwd_mem_data, + fwd_mem_is_invalid => fwd_mem_is_invalid, + + exe_pc_plus_8 => exe_pc_plus_8, + rfile_data => rfile_b_data, + + forward_ok => forward_b_ok, + op_data => op_b_data + ); + + -- fowrawrding for operands c + fwc : entity work.forwarding(rtl) port map + ( + reg => exe_C_adr, + + fwd_wb2_enable => fwd_wb2_enable, + fwd_wb2_address => fwd_wb2_address, + fwd_wb2_data => fwd_wb2_data, + fwd_wb1_enable => fwd_wb1_enable, + fwd_wb1_address => fwd_wb1_address, + fwd_wb1_data => fwd_wb1_data, + fwd_wb1_is_invalid => fwd_wb1_is_invalid, + fwd_mem_enable => fwd_mem_enable, + fwd_mem_address => fwd_mem_address, + fwd_mem_data => fwd_mem_data, + fwd_mem_is_invalid => fwd_mem_is_invalid, + + exe_pc_plus_8 => exe_pc_plus_8, + rfile_data => rfile_c_data, + + forward_ok => forward_c_ok, + op_data => op_c_data + ); + -- in order for the forwarding to work, all 3 of the operands have to work + forward_ok <= forward_a_ok and forward_b_ok and forward_c_ok; + + -- check if the condition is true + with exe_condition select condition_is_true <= + z when COND_EQ, + not z when COND_NE, + c when COND_CS, + not c when COND_CC, + n when COND_MI, + not n when COND_PL, + v when COND_VS, + not v when COND_VC, + c and not z when COND_HI, + z or not c when COND_LS, + n xnor v when COND_GE, + n xor v when COND_LT, + (not z) and (n xnor v) when COND_GT, + z or (n xor v) when COND_LE, + '1' when COND_AL, + '-' when others; + + -- barrel shifter (exernal component) + bs : entity work.barrelshift(optimized) port map + ( + c => c, + exe_barrelshift_operand => exe_barrelshift_operand, + exe_barrelshift_type => exe_barrelshift_type, + exe_literal_shift_amnt => exe_literal_shift_amnt, + exe_literal_data => exe_literal_data, + exe_opb_is_literal => exe_opb_is_literal, + op_b_data => op_b_data, + op_c_data => op_c_data, + barrelshift_c => barrelshift_c, + barrelshift_out => barrelshift_out + ); + + -- multiplier unit + multiplier : process(op_b_data, op_c_data) is + variable mult_dummy : unsigned(63 downto 0); + begin + mult_dummy := op_b_data * op_c_data; + mult_out <= mult_dummy(31 downto 0); + end process; + + -- end process; + + -- alu opb multiplexer + alu_opb <= mult_out when exe_opb_sel = '1' else barrelshift_out; + + -- alu + alu : entity work.alu(rtl) port map + ( + exe_alu_operation => exe_alu_operation, + alu_o => alu_out, + alu_opb => alu_opb, + alu_opa => op_a_data, + n => n, + z => z, + c => c, + v => v, + lowflags => lowflags, + barrelshift_c => barrelshift_c, + next_n => next_n, + next_z => next_z, + next_c => next_c, + next_v => next_v, + next_lowflags => next_lowflags + ); + + -- flags flip flops + process(clk, n_reset) is + begin + if rising_edge(clk) + then + if exe_affect_sflags = '1' and stage_active = '1' and exe_latch_enable = '1' + then + n <= next_n; + z <= next_z; + v <= next_v; + c <= next_c; + lowflags <= next_lowflags; + end if; + end if; + end process; + +end architecture; \ No newline at end of file
trunk/hdl/execute.vhd Property changes : Added: svn:executable ## -0,0 +1 ## +* \ No newline at end of property Index: trunk/hdl/forwarding.vhd =================================================================== --- trunk/hdl/forwarding.vhd (nonexistent) +++ trunk/hdl/forwarding.vhd (revision 2) @@ -0,0 +1,85 @@ +-- This file is part of ARM4U CPU +-- +-- This is a creation of the Laboratory of Processor Architecture +-- of Ecole Polytechnique Fédérale de Lausanne ( http://lap.epfl.ch ) +-- +-- forwarding.vhd -- Describes the unit capable of detecting data harzards and forwards +-- register values form memory and writeback pipeline stages into execute stage +-- +-- Written By - Jonathan Masur and Xavier Jimenez (2013) +-- +-- This program is free software; you can redistribute it and/or modify it +-- under the terms of the GNU General Public License as published by the +-- Free Software Foundation; either version 2, or (at your option) any +-- later version. +-- +-- This program is distributed in the hope that it will be useful, +-- but WITHOUT ANY WARRANTY; without even the implied warranty of +-- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +-- GNU General Public License for more details. +-- +-- In other words, you are welcome to use, share and improve this program. +-- You are forbidden to forbid anyone else to use, share and improve +-- what you give them. Help stamp out software-hoarding! + +library ieee; +use ieee.std_logic_1164.all; +use ieee.numeric_std.all; +use work.arm_types.all; + +entity forwarding is + port ( + reg : in std_logic_vector(5 downto 0); + + fwd_wb2_enable : in std_logic; + fwd_wb2_address : in std_logic_vector(4 downto 0); + fwd_wb2_data : in std_logic_vector(31 downto 0); + fwd_wb1_enable : in std_logic; + fwd_wb1_address : in std_logic_vector(4 downto 0); + fwd_wb1_data : in std_logic_vector(31 downto 0); + fwd_wb1_is_invalid : in std_logic; + fwd_mem_enable : in std_logic; + fwd_mem_address : in std_logic_vector(4 downto 0); + fwd_mem_data : in std_logic_vector(31 downto 0); + fwd_mem_is_invalid : in std_logic; + + exe_pc_plus_8 : in unsigned(31 downto 0); + rfile_data : in std_logic_vector(31 downto 0); + + op_data : out unsigned(31 downto 0); + forward_ok : out std_logic + ); +end; + +architecture rtl of forwarding is +begin + forwarding : process(reg, exe_pc_plus_8, rfile_data, + fwd_wb2_enable, fwd_wb2_address, fwd_wb2_data, + fwd_wb1_enable, fwd_wb1_address, fwd_wb1_data, fwd_wb1_is_invalid, + fwd_mem_enable, fwd_mem_address, fwd_mem_data, fwd_mem_is_invalid) is + begin + if reg(5) = '1' + then + -- PC+8 is used as an operand + op_data <= exe_pc_plus_8; + forward_ok <= '1'; + else + if fwd_mem_enable = '1' and fwd_mem_address = reg(4 downto 0) + then + op_data <= unsigned(fwd_mem_data); + forward_ok <= not fwd_mem_is_invalid; + elsif fwd_wb1_enable = '1' and fwd_wb1_address = reg(4 downto 0) + then + op_data <= unsigned(fwd_wb1_data); + forward_ok <= not fwd_wb1_is_invalid; + elsif fwd_wb2_enable = '1' and fwd_wb2_address = reg(4 downto 0) + then + op_data <= unsigned(fwd_wb2_data); + forward_ok <= '1'; + else + op_data <= unsigned(rfile_data); + forward_ok <= '1'; + end if; + end if; + end process; +end; \ No newline at end of file
trunk/hdl/forwarding.vhd Property changes : Added: svn:executable ## -0,0 +1 ## +* \ No newline at end of property Index: trunk/hdl/arm_types.vhd =================================================================== --- trunk/hdl/arm_types.vhd (nonexistent) +++ trunk/hdl/arm_types.vhd (revision 2) @@ -0,0 +1,95 @@ +-- This file is part of ARM4U CPU +-- +-- This is a creation of the Laboratory of Processor Architecture +-- of Ecole Polytechnique Fédérale de Lausanne ( http://lap.epfl.ch ) +-- +-- arm_types.vhd -- Package containing types for the whole project +-- +-- Written By - Jonathan Masur and Xavier Jimenez (2013) +-- +-- This program is free software; you can redistribute it and/or modify it +-- under the terms of the GNU General Public License as published by the +-- Free Software Foundation; either version 2, or (at your option) any +-- later version. +-- +-- This program is distributed in the hope that it will be useful, +-- but WITHOUT ANY WARRANTY; without even the implied warranty of +-- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +-- GNU General Public License for more details. +-- +-- In other words, you are welcome to use, share and improve this program. +-- You are forbidden to forbid anyone else to use, share and improve +-- what you give them. Help stamp out software-hoarding! + +library ieee; +use ieee.std_logic_1164.all; + +package arm_types is + + -- Condition flags (top 4 bits of ARM instruction) + constant COND_EQ : std_logic_vector(3 downto 0) := "0000"; + constant COND_NE : std_logic_vector(3 downto 0) := "0001"; + constant COND_CS : std_logic_vector(3 downto 0) := "0010"; + constant COND_CC : std_logic_vector(3 downto 0) := "0011"; + constant COND_MI : std_logic_vector(3 downto 0) := "0100"; + constant COND_PL : std_logic_vector(3 downto 0) := "0101"; + constant COND_VS : std_logic_vector(3 downto 0) := "0110"; + constant COND_VC : std_logic_vector(3 downto 0) := "0111"; + constant COND_HI : std_logic_vector(3 downto 0) := "1000"; + constant COND_LS : std_logic_vector(3 downto 0) := "1001"; + constant COND_GE : std_logic_vector(3 downto 0) := "1010"; + constant COND_LT : std_logic_vector(3 downto 0) := "1011"; + constant COND_GT : std_logic_vector(3 downto 0) := "1100"; + constant COND_LE : std_logic_vector(3 downto 0) := "1101"; + constant COND_AL : std_logic_vector(3 downto 0) := "1110"; + + -- register re-mapping at decode stage + constant r0 : std_logic_vector(4 downto 0) := "00000"; + constant r1 : std_logic_vector(4 downto 0) := "00001"; + constant r2 : std_logic_vector(4 downto 0) := "00010"; + constant r3 : std_logic_vector(4 downto 0) := "00011"; + constant r4 : std_logic_vector(4 downto 0) := "00100"; + constant r5 : std_logic_vector(4 downto 0) := "00101"; + constant r6 : std_logic_vector(4 downto 0) := "00110"; + constant r7 : std_logic_vector(4 downto 0) := "00111"; + constant r8 : std_logic_vector(4 downto 0) := "01000"; + constant r9 : std_logic_vector(4 downto 0) := "01001"; + constant r10 : std_logic_vector(4 downto 0) := "01010"; + constant r11 : std_logic_vector(4 downto 0) := "01011"; + constant r12 : std_logic_vector(4 downto 0) := "01100"; + constant r13 : std_logic_vector(4 downto 0) := "01101"; + constant r14 : std_logic_vector(4 downto 0) := "01110"; + constant fiq_r8 : std_logic_vector(4 downto 0) := "01111"; + constant fiq_r9 : std_logic_vector(4 downto 0) := "10000"; + constant fiq_r10 : std_logic_vector(4 downto 0) := "10001"; + constant fiq_r11 : std_logic_vector(4 downto 0) := "10010"; + constant fiq_r12 : std_logic_vector(4 downto 0) := "10011"; + constant fiq_r13 : std_logic_vector(4 downto 0) := "10100"; + constant sup_r13 : std_logic_vector(4 downto 0) := "10101"; + constant irq_r13 : std_logic_vector(4 downto 0) := "10110"; + constant und_r13 : std_logic_vector(4 downto 0) := "10111"; + constant fiq_r14 : std_logic_vector(4 downto 0) := "11000"; + constant sup_r14 : std_logic_vector(4 downto 0) := "11001"; + constant irq_r14 : std_logic_vector(4 downto 0) := "11010"; + constant und_r14 : std_logic_vector(4 downto 0) := "11011"; + constant fiq_spsr : std_logic_vector(4 downto 0) := "11100"; + constant sup_spsr : std_logic_vector(4 downto 0) := "11101"; + constant irq_spsr : std_logic_vector(4 downto 0) := "11110"; + constant und_spsr : std_logic_vector(4 downto 0) := "11111"; + + -- Finite state machine inside the Decode pipeline stage + type DECODE_FSM is (MAIN_STATE, RETURN_FROM_EXCEPTION, TWO_LATENCY_CYCLES, ONE_LATENCY_CYCLE, LOADSTORE_WRITEBACK, + LDMSTM_TRANSFER, LDMSTM_RETURN_FROM_EXCEPTION, LDMSTM_WRITEBACK, + RESET_CYCLE2, UNDEF_CYCLE2, SWI_CYCLE2, IRQ_CYCLE2, FIQ_CYCLE2); + + -- List of arithmetic and logical operations which can be performed in the execute pipeline stage + type ALU_OPERATION is (ALU_NOP, ALU_NOT, ALU_ORR, ALU_AND, ALU_EOR, ALU_BIC, ALU_RWF, ALU_ADD, ALU_ADC, ALU_SUB, ALU_SBC, ALU_RSB, ALU_RSC); + + -- List of memory-related operation that can be perfored in the memory pipeline stage + type MEM_OPERATION is (NO_MEM_OP, LOAD_WORD, LOAD_BYTE, LOAD_BURST, STORE_WORD, STORE_BYTE); + +end package; + +package body arm_types is + +end package body;
trunk/hdl/arm_types.vhd Property changes : Added: svn:executable ## -0,0 +1 ## +* \ No newline at end of property Index: trunk/hdl/cache.vhd =================================================================== --- trunk/hdl/cache.vhd (nonexistent) +++ trunk/hdl/cache.vhd (revision 2) @@ -0,0 +1,285 @@ +-- This file is part of ARM4U CPU +-- +-- This is a creation of the Laboratory of Processor Architecture +-- of Ecole Polytechnique Fédérale de Lausanne ( http://lap.epfl.ch ) +-- +-- cache.vhd -- A cache with an Avalon master interface. Only for instruction and direct-mapped for now. +-- +-- Written By - Jonathan Masur and Xavier Jimenez (2013) +-- +-- This program is free software; you can redistribute it and/or modify it +-- under the terms of the GNU General Public License as published by the +-- Free Software Foundation; either version 2, or (at your option) any +-- later version. +-- +-- This program is distributed in the hope that it will be useful, +-- but WITHOUT ANY WARRANTY; without even the implied warranty of +-- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +-- GNU General Public License for more details. +-- +-- In other words, you are welcome to use, share and improve this program. +-- You are forbidden to forbid anyone else to use, share and improve +-- what you give them. Help stamp out software-hoarding! + +library IEEE; +use IEEE.std_logic_1164.all; +use IEEE.std_logic_unsigned.all; +use IEEE.numeric_std.all; + +library altera_mf; +use altera_mf.all; + +library work; +use work.utils.all; + +entity cache is + generic( + INSTR_BADDR_BITWDTH : natural := 32; -- input coe_cpu_address width in bits + BLOCK_BITWIDTH : natural := 5; -- byte address range of a block (hence C_BLOCK_SIZE = 2**BLOCK_BITWIDTH) + CACHE_SIZE : natural := 4096 -- cache size in bytes, must be a factor of C_BLOCK_SIZE * CACHE_WAYS + ); + port( + -- Globals + clk : in std_logic; + reset : in std_logic; + + -- CPU conduit extern + coe_cpu_enabled : in std_logic; -- fetches a new instruction. If deactivated, the last read is kept on the output. + coe_cpu_flush : in std_logic := '0'; -- flushes the cache line addressed by "coe_cpu_address" and cancels any pending read + coe_cpu_address : in std_logic_vector(INSTR_BADDR_BITWDTH-1 downto 0); -- byte address + coe_cpu_readdata : out std_logic_vector(31 downto 0); + coe_cpu_miss : out std_logic; + + --Avalon Master Interface + avm_waitrequest : in std_logic; + avm_readdatavalid : in std_logic; + avm_readdata : in std_logic_vector(31 downto 0); + avm_read : out std_logic; + avm_burstcount : out std_logic_vector(BLOCK_BITWIDTH-2 downto 0); + avm_address : out std_logic_vector(31 downto 0) + ); +end cache; + +architecture synth of cache is +constant C_BLOCK_SIZE : natural := 2**BLOCK_BITWIDTH; +constant C_SET_COUNT : natural := CACHE_SIZE / C_BLOCK_SIZE; +constant C_INDEX_BITWIDTH : natural := log2(C_SET_COUNT); +constant C_TAG_BITWIDTH : natural := INSTR_BADDR_BITWDTH - BLOCK_BITWIDTH - C_INDEX_BITWIDTH; +constant C_DATA_WADDR_BITWIDTH : natural := log2(CACHE_SIZE)-2; -- addressable words in the data sram + +-- registerd coe_cpu_address (without the 2 lsb) +signal r_address : std_logic_vector(INSTR_BADDR_BITWDTH-3 downto 0); +signal r_read : std_logic; +-- register flush command +signal r_flush : std_logic; +-- the current offset in a burst +signal r_burstoffset : std_logic_vector(log2(C_BLOCK_SIZE)-3 downto 0); +-- the tag and valid bit +signal s_vtag_in : std_logic_vector(C_TAG_BITWIDTH DOWNTO 0); +signal s_vtag_out : std_logic_vector(C_TAG_BITWIDTH DOWNTO 0); +-- signals to the tag and data srams +signal s_data_wren : std_logic; +signal s_data_rdaddr : std_logic_vector(C_DATA_WADDR_BITWIDTH-1 downto 0); +signal s_data_wraddr : std_logic_vector(C_DATA_WADDR_BITWIDTH-1 downto 0); +signal s_tag_wren : std_logic; +signal s_tag_rdaddr : std_logic_vector(C_INDEX_BITWIDTH-1 downto 0); +signal s_tag_wraddr : std_logic_vector(C_INDEX_BITWIDTH-1 downto 0); +signal s_addr_stall : std_logic; +signal s_miss : std_logic; + +type state_type is (S_READY, S_WAIT, S_READ, S_DELAY); +signal state, nextstate : state_type; + +-- SRAM component declaration +component altsyncram +generic ( + address_reg_b : STRING; + clock_enable_input_a : STRING; + clock_enable_input_b : STRING; + clock_enable_output_a : STRING; + clock_enable_output_b : STRING; + intended_device_family : STRING; + lpm_type : STRING; + numwords_a : NATURAL; + numwords_b : NATURAL; + operation_mode : STRING; + outdata_aclr_b : STRING; + outdata_reg_b : STRING; + power_up_uninitialized : STRING; + read_during_write_mode_mixed_ports : STRING; + widthad_a : NATURAL; + widthad_b : NATURAL; + width_a : NATURAL; + width_b : NATURAL; + width_byteena_a : NATURAL +); +port ( + addressstall_b : IN STD_LOGIC ; + wren_a : IN STD_LOGIC ; + clock0 : IN STD_LOGIC ; + clock1 : IN STD_LOGIC ; + address_a : IN STD_LOGIC_VECTOR (widthad_a-1 DOWNTO 0); + address_b : IN STD_LOGIC_VECTOR (widthad_b-1 DOWNTO 0); + q_b : OUT STD_LOGIC_VECTOR (width_b-1 DOWNTO 0); + data_a : IN STD_LOGIC_VECTOR (width_a-1 downto 0) +); +end component; + +begin +coe_cpu_miss <= s_miss; +-- we do not have a coe_cpu_miss when flushing, or when the tag matches a valid entry +s_miss <= '0' when r_read='0' or (coe_cpu_flush or r_flush)='1' or s_vtag_in=s_vtag_out else '1'; -- TODO: to be modified for multiple ways + +-- the burstcount is fixed +avm_burstcount <= std_logic_vector(to_unsigned(C_BLOCK_SIZE/4, avm_burstcount'length)); +avm_address <= (31 downto INSTR_BADDR_BITWDTH =>'0') & r_address(INSTR_BADDR_BITWDTH-3 downto BLOCK_BITWIDTH-2) & (BLOCK_BITWIDTH-1 downto 0 => '0'); + +-- signals to the data and tag srams +s_addr_stall <= s_miss or not coe_cpu_enabled; +s_data_rdaddr <= coe_cpu_address(C_DATA_WADDR_BITWIDTH+1 downto 2); +s_data_wraddr <= r_address(C_DATA_WADDR_BITWIDTH-1 downto BLOCK_BITWIDTH-2) & r_burstoffset; +s_tag_rdaddr <= coe_cpu_address(C_DATA_WADDR_BITWIDTH+1 downto BLOCK_BITWIDTH); +s_tag_wraddr <= r_address(C_DATA_WADDR_BITWIDTH-1 downto BLOCK_BITWIDTH-2); +-- s_tag_wren and s_vtag_in +process(r_address, r_flush, r_burstoffset, avm_readdatavalid) +begin + s_tag_wren <= '0'; + s_vtag_in <= r_address(INSTR_BADDR_BITWDTH-3 downto INSTR_BADDR_BITWDTH-C_TAG_BITWIDTH-2) & '1'; + if (r_flush = '1') then + s_tag_wren <= '1'; + s_vtag_in <= (others => '0'); + elsif (r_burstoffset = (r_burstoffset'range => '1') and avm_readdatavalid='1') then + s_tag_wren <= '1'; + end if; +end process; + +process(reset, clk) +begin + if (reset = '1') then + r_burstoffset <= (others => '0'); + state <= S_READY; + r_flush <= '0'; + r_read <= '0'; + elsif (rising_edge(clk)) then + r_read <= coe_cpu_enabled or s_miss; -- in case of miss we fix r_read to 1. + case state is + when S_READY => + r_flush <= coe_cpu_flush; + if (s_miss = '1') then + if (avm_waitrequest = '1') then + state <= S_WAIT; + else + state <= S_READ; + end if; + else + -- in case of a coe_cpu_miss the coe_cpu_address is unchanged + if (coe_cpu_enabled = '1' or coe_cpu_flush='1') then + r_address <= coe_cpu_address(INSTR_BADDR_BITWDTH-1 downto 2); + end if; + end if; + r_burstoffset <= (others => '0'); + + when S_WAIT => + if (avm_waitrequest = '0') then + state <= S_READ; + end if; + + when S_READ => + if (r_burstoffset = (r_burstoffset'range => '1') and avm_readdatavalid='1') then + state <= S_DELAY; + end if; + + when S_DELAY => + state <= S_READY; + + end case; + + -- update r_burst_offset + if (avm_readdatavalid='1') then + r_burstoffset <= r_burstoffset + 1; + end if; + end if; +end process; + +process(state, s_miss) +begin + case state is + when S_READY => + avm_read <= s_miss; + when S_WAIT => + avm_read <= '1'; + when others => + avm_read <= '0'; + end case; +end process; + + +-- Data SRAM +g_data_sram : altsyncram + GENERIC MAP ( + address_reg_b => "CLOCK1", + clock_enable_input_a => "BYPASS", + clock_enable_input_b => "BYPASS", + clock_enable_output_a => "BYPASS", + clock_enable_output_b => "BYPASS", + intended_device_family => "Cyclone IV E", + lpm_type => "altsyncram", + numwords_a => CACHE_SIZE/4, + numwords_b => CACHE_SIZE/4, + operation_mode => "DUAL_PORT", + outdata_aclr_b => "NONE", + outdata_reg_b => "UNREGISTERED", + power_up_uninitialized => "FALSE", + read_during_write_mode_mixed_ports => "DONT_CARE", + widthad_a => C_DATA_WADDR_BITWIDTH, + widthad_b => C_DATA_WADDR_BITWIDTH, + width_a => 32, + width_b => 32, + width_byteena_a => 1 + ) + PORT MAP ( + addressstall_b => s_addr_stall, + wren_a => avm_readdatavalid, + clock0 => clk, + clock1 => clk, + address_a => s_data_wraddr, + address_b => s_data_rdaddr, + data_a => avm_readdata, + q_b => coe_cpu_readdata + ); + + +g_tag_sram : altsyncram + GENERIC MAP ( + address_reg_b => "CLOCK1", + clock_enable_input_a => "BYPASS", + clock_enable_input_b => "BYPASS", + clock_enable_output_a => "BYPASS", + clock_enable_output_b => "BYPASS", + intended_device_family => "Cyclone IV E", + lpm_type => "altsyncram", + numwords_a => C_SET_COUNT, + numwords_b => C_SET_COUNT, + operation_mode => "DUAL_PORT", + outdata_aclr_b => "NONE", + outdata_reg_b => "UNREGISTERED", + power_up_uninitialized => "FALSE", + read_during_write_mode_mixed_ports => "DONT_CARE", + widthad_a => C_INDEX_BITWIDTH, + widthad_b => C_INDEX_BITWIDTH, + width_a => C_TAG_BITWIDTH+1, + width_b => C_TAG_BITWIDTH+1, + width_byteena_a => 1 + ) + PORT MAP ( + addressstall_b => s_addr_stall, + wren_a => s_tag_wren, + clock0 => clk, + clock1 => clk, + address_a => s_tag_wraddr, + address_b => s_tag_rdaddr, + data_a => s_vtag_in, + q_b => s_vtag_out + ); + +end synth;
trunk/hdl/cache.vhd Property changes : Added: svn:executable ## -0,0 +1 ## +* \ No newline at end of property Index: trunk/hdl/decode.vhd =================================================================== --- trunk/hdl/decode.vhd (nonexistent) +++ trunk/hdl/decode.vhd (revision 2) @@ -0,0 +1,1268 @@ +-- This file is part of ARM4U CPU +-- +-- This is a creation of the Laboratory of Processor Architecture +-- of Ecole Polytechnique Fédérale de Lausanne ( http://lap.epfl.ch ) +-- +-- decode.vhd -- Description of the decode pipeline stage +-- +-- Written By - Jonathan Masur and Xavier Jimenez (2013) +-- +-- This program is free software; you can redistribute it and/or modify it +-- under the terms of the GNU General Public License as published by the +-- Free Software Foundation; either version 2, or (at your option) any +-- later version. +-- +-- This program is distributed in the hope that it will be useful, +-- but WITHOUT ANY WARRANTY; without even the implied warranty of +-- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +-- GNU General Public License for more details. +-- +-- In other words, you are welcome to use, share and improve this program. +-- You are forbidden to forbid anyone else to use, share and improve +-- what you give them. Help stamp out software-hoarding! + + +library ieee; +use ieee.std_logic_1164.all; +use ieee.numeric_std.all; +use work.arm_types.all; + +entity decode is +port( + clk : in std_logic; + reset_n : in std_logic; + + fiq : in std_logic; + irq : in std_logic; + flush : in std_logic; + low_flags : in std_logic_vector(5 downto 0); + decode_stage_valid : in std_logic; + inst_cache_miss : in std_logic; + dec_pc_plus_4 : in unsigned(31 downto 0); + dec_pc_plus_8 : in unsigned(31 downto 0); + + -- instruction being decoded + inst_data : in std_logic_vector(31 downto 0); + + -- regiser file adresses + rfile_A_adr : out std_logic_vector(4 downto 0); + rfile_B_adr : out std_logic_vector(4 downto 0); + rfile_C_adr : out std_logic_vector(4 downto 0); + + -- register operands + exe_A_adr : out std_logic_vector(5 downto 0); + exe_B_adr : out std_logic_vector(5 downto 0); + exe_C_adr : out std_logic_vector(5 downto 0); + exe_pc_plus_4 : out unsigned(31 downto 0); + exe_pc_plus_8 : out unsigned(31 downto 0); + + -- output of the latch + exe_stage_valid : out std_logic; + exe_barrelshift_operand : out std_logic; + exe_barrelshift_type : out std_logic_vector(1 downto 0); + exe_literal_shift_amnt : out std_logic_vector(4 downto 0); + exe_literal_data : out std_logic_vector(23 downto 0); + exe_opb_is_literal : out std_logic; + exe_opb_sel : out std_logic; + exe_alu_operation : out ALU_OPERATION; + exe_condition : out std_logic_vector(3 downto 0); + exe_affect_sflags : out std_logic; + exe_data_sel : out std_logic; + exe_rdest_wren : out std_logic; + exe_rdest_adr : out std_logic_vector(4 downto 0); + exe_branch_en : out std_logic; + exe_wb_sel : out std_logic; + exe_mem_ctrl : out MEM_OPERATION; + exe_mem_burstcount : out std_logic_vector(3 downto 0); + + -- 1 if the stage has completed an instruction for this cycle, 0 otherwise + decode_blocked_n : out std_logic; + -- enable signal for latch after the fetch stage + decode_latch_enable : in std_logic +); +end entity decode; + +architecture rtl of decode is + + signal state, next_state : DECODE_FSM; + + signal barrelshift_operand : std_logic; + signal barrelshift_type : std_logic_vector(1 downto 0); + signal literal_shift_amnt : std_logic_vector(4 downto 0); + signal literal_data : std_logic_vector(23 downto 0); + signal opb_is_literal : std_logic; + signal opb_sel : std_logic; + signal alu_operation : ALU_OPERATION; + signal condition : std_logic_vector(3 downto 0); + signal affect_sflags : std_logic; + signal data_sel : std_logic; + signal rdest_wren : std_logic; + signal rdest_adr : std_logic_vector(4 downto 0); + signal branch_en : std_logic; + signal wb_sel : std_logic; + signal mem_ctrl : MEM_OPERATION; + signal mem_burstcount : std_logic_vector(3 downto 0); + signal stage_active : std_logic; + + signal rfA_adr : std_logic_vector(5 downto 0); + signal rfB_adr : std_logic_vector(5 downto 0); + signal rfC_adr : std_logic_vector(5 downto 0); + + signal i, f, reset_l : std_logic; + signal mode : std_logic_vector(3 downto 0); + + signal current_inst, current_inst_l : std_logic_vector(31 downto 0); + + signal ldmstm_cur_bitmask : std_logic_vector(15 downto 0); + signal ldmstm_next_bitmask : std_logic_vector(15 downto 0); + signal ldmstm_current_reg : integer range 0 to 15; + + -- remap register adresses (4 bits) to actual registers (5 adress bits) + function address_remap(adr : std_logic_vector(3 downto 0); + mode : std_logic_vector(3 downto 0)) return std_logic_vector is + begin + -- source : http://www.heyrick.co.uk/armwiki/The_Status_register + -- highest bit of "mode" is always 1 and is not implemented + case mode is + when "0000" => + --user mode + case adr is + when "0000" => return r0; + when "0001" => return r1; + when "0010" => return r2; + when "0011" => return r3; + when "0100" => return r4; + when "0101" => return r5; + when "0110" => return r6; + when "0111" => return r7; + when "1000" => return r8; + when "1001" => return r9; + when "1010" => return r10; + when "1011" => return r11; + when "1100" => return r12; + when "1101" => return r13; + when "1110" => return r14; + when others => return "-----"; + end case; + + when "0001" => + -- FIQ mode + case adr is + when "0000" => return r0; + when "0001" => return r1; + when "0010" => return r2; + when "0011" => return r3; + when "0100" => return r4; + when "0101" => return r5; + when "0110" => return r6; + when "0111" => return r7; + when "1000" => return fiq_r8; + when "1001" => return fiq_r9; + when "1010" => return fiq_r10; + when "1011" => return fiq_r11; + when "1100" => return fiq_r12; + when "1101" => return fiq_r13; + when "1110" => return fiq_r14; + when others => return "-----"; + end case; + + when "0010" => + -- IRQ mode + case adr is + when "0000" => return r0; + when "0001" => return r1; + when "0010" => return r2; + when "0011" => return r3; + when "0100" => return r4; + when "0101" => return r5; + when "0110" => return r6; + when "0111" => return r7; + when "1000" => return r8; + when "1001" => return r9; + when "1010" => return r10; + when "1011" => return r11; + when "1100" => return r12; + when "1101" => return irq_r13; + when "1110" => return irq_r14; + when others => return "-----"; + end case; + + when "0011" => + -- Supervisor (software interrupt) mode + case adr is + when "0000" => return r0; + when "0001" => return r1; + when "0010" => return r2; + when "0011" => return r3; + when "0100" => return r4; + when "0101" => return r5; + when "0110" => return r6; + when "0111" => return r7; + when "1000" => return r8; + when "1001" => return r9; + when "1010" => return r10; + when "1011" => return r11; + when "1100" => return r12; + when "1101" => return sup_r13; + when "1110" => return sup_r14; + when others => return "-----"; + end case; + + when "1011" => + -- Undefined instruction mode + case adr is + when "0000" => return r0; + when "0001" => return r1; + when "0010" => return r2; + when "0011" => return r3; + when "0100" => return r4; + when "0101" => return r5; + when "0110" => return r6; + when "0111" => return r7; + when "1000" => return r8; + when "1001" => return r9; + when "1010" => return r10; + when "1011" => return r11; + when "1100" => return r12; + when "1101" => return und_r13; + when "1110" => return und_r14; + when others => return "-----"; + end case; + + when others => return "-----"; + end case; + end; + + function address_remap_pc(adr : std_logic_vector(3 downto 0); + mode : std_logic_vector(3 downto 0)) return std_logic_vector is + begin + -- adr is PC + if adr = "1111" + then + return "1-----"; + else + return "0" & address_remap(adr, mode); + end if; + end; + + function address_remap_spsr(mode : std_logic_vector(3 downto 0)) return std_logic_vector is + begin + case mode is + -- FIQ mode + when "0001" => return fiq_spsr; + -- IRQ mode + when "0010" => return irq_spsr; + -- Supervisor (software interrupt) mode + when "0011" => return sup_spsr; + -- Undefined instruction mode + when "1011" => return und_spsr; + -- user mode and invalid mode - undefined implementation + when others => return "-----"; + end case; + end; + + function count_set_bits(word : std_logic_vector) return integer is + variable i, res : integer; + begin + res := 0; + for i in 0 to word'length - 1 loop + if word(i) = '1' + then + res := res + 1; + end if; + end loop; + return res; + end; + + function find_rightmost_bit(word : std_logic_vector(15 downto 0)) return integer is + variable i : integer; + begin + i := 0; + while i < 15 and word(i) = '0' loop + i := i + 1; + end loop; + return i; + end; + +begin + -- separate low flags bits + i <= low_flags(5); + f <= low_flags(4); + mode <= low_flags(3 downto 0); + + stage_active <= '1' when inst_cache_miss = '0' and decode_stage_valid = '1' and decode_latch_enable = '1' else '0'; + + -- instruction latch + process(clk) is + begin + if rising_edge(clk) + then + current_inst_l <= current_inst; + end if; + end process; + current_inst <= inst_data when stage_active = '1' and state = MAIN_STATE else current_inst_l; + + -- output latch + process(clk) is + begin + if rising_edge(clk) + then + if decode_latch_enable = '1' + then + exe_A_adr <= rfA_adr; + exe_B_adr <= rfB_adr; + exe_C_adr <= rfC_adr; + exe_pc_plus_8 <= dec_pc_plus_8; + exe_pc_plus_4 <= dec_pc_plus_4; + + exe_stage_valid <= stage_active; + exe_barrelshift_operand <= barrelshift_operand; + exe_barrelshift_type <= barrelshift_type; + exe_literal_shift_amnt <= literal_shift_amnt; + exe_literal_data <= literal_data; + exe_opb_is_literal <= opb_is_literal; + exe_opb_sel <= opb_sel; + exe_alu_operation <= alu_operation; + exe_condition <= condition; + exe_affect_sflags <= affect_sflags; + exe_data_sel <= data_sel; + exe_rdest_wren <= rdest_wren; + exe_rdest_adr <= rdest_adr; + exe_branch_en <= branch_en; + exe_wb_sel <= wb_sel; + exe_mem_ctrl <= mem_ctrl; + exe_mem_burstcount <= mem_burstcount; + + ldmstm_cur_bitmask <= ldmstm_next_bitmask; + end if; + if flush = '1' + then + exe_stage_valid <= '0'; + end if; + end if; + end process; + + fsm : process(clk, reset_n) is + begin + if reset_n = '0' + then + state <= MAIN_STATE; + elsif rising_edge(clk) + then + if stage_active = '1' + then + state <= next_state; + end if; + end if; + end process fsm; + + -- reset interrupt happens one cycle after reset_n is deasserted + resetl : process(clk, reset_n) is + begin + if reset_n = '0' + then + reset_l <= '1'; + elsif rising_edge(clk) + then + if stage_active = '1' + then + reset_l <= '0'; + end if; + end if; + end process resetl; + + ldmstm_current_reg <= find_rightmost_bit(current_inst(15 downto 0) and ldmstm_cur_bitmask); + + -- Decoding matrix : please consult the decode matrix spreadsheet for more information about how this works + decode : process(state, flush, current_inst, low_flags, reset_l, irq, fiq, mode, f, i, stage_active, ldmstm_cur_bitmask, ldmstm_current_reg, ldmstm_next_bitmask) is + begin + -- default instruction condition + condition <= current_inst(31 downto 28); + + -- behaviour in case of cache miss (the state should not be changed, and other values are don't care) + next_state <= state; + + -- using PC as a "unused" register is good in the way it prevents forwarding to stall the processor without reason + rfA_adr <= "1-----"; + rfB_adr <= "1-----"; + rfC_adr <= "1-----"; + + rdest_adr <= (others=>'-'); + barrelshift_operand <= '-'; + barrelshift_type <= (others=>'-'); + literal_shift_amnt <= (others=>'-'); + literal_data <= (others=>'-'); + opb_is_literal <= '-'; + opb_sel <= '-'; + alu_operation <= ALU_NOP; + affect_sflags <= '-'; + rdest_wren <= '-'; + data_sel <= '-'; + branch_en <= '-'; + wb_sel <= '-'; + mem_ctrl <= NO_MEM_OP; + mem_burstcount <= "----"; + ldmstm_next_bitmask <= (others=>'1'); + + if flush = '1' + then + next_state <= MAIN_STATE; + elsif stage_active = '1' + then + if state = MAIN_STATE or state = RETURN_FROM_EXCEPTION or state = LDMSTM_TRANSFER + then + -- reset interrupt - used to move into user mode + if reset_l = '1' + then + condition <= "1110"; + next_state <= RESET_CYCLE2; + alu_operation <= ALU_RWF; + affect_sflags <= '1'; + branch_en <= '0'; + rdest_wren <= '0'; + mem_ctrl <= NO_MEM_OP; + + -- Enter USR mode, clears IRQ and FIQ flags + barrelshift_operand <= '0'; + barrelshift_type <= "00"; + literal_shift_amnt <= (others => '0'); + literal_data <= (23 downto 8 => '0') & "00010000"; + opb_is_literal <= '1'; + opb_sel <= '0'; + + -- See if a FIQ interrupt is pending + elsif state = MAIN_STATE and f = '0' and fiq = '1' + then + condition <= "1110"; -- force ALWAYS condition, so that the FIQ is always executed ! + next_state <= FIQ_CYCLE2; + -- read CPSR and save it in fiq_SPSR, and also write new value to CPSR at the same time + alu_operation <= ALU_RWF; + affect_sflags <= '1'; + data_sel <= '1'; + branch_en <= '0'; + rdest_wren <= '1'; + wb_sel <= '0'; + mem_ctrl <= NO_MEM_OP; + rdest_adr <= fiq_SPSR; + + -- Enter FIQ mode, set FIQ and IRQ flags + barrelshift_operand <= '0'; + barrelshift_type <= "00"; + literal_shift_amnt <= (others => '0'); + literal_data <= (23 downto 8 => '0') & "11010001"; + opb_is_literal <= '1'; + opb_sel <= '0'; + + -- See if an IRQ interrupt is pending + elsif state = MAIN_STATE and i = '0' and irq = '1' + then + condition <= "1110"; -- force ALWAYS condition, so that the IRQ is always executed ! + next_state <= IRQ_CYCLE2; + -- read CPSR and save it in fiq_SPSR, and also write new value to CPSR at the same time + alu_operation <= ALU_RWF; + affect_sflags <= '1'; + data_sel <= '1'; + branch_en <= '0'; + rdest_wren <= '1'; + wb_sel <= '0'; + mem_ctrl <= NO_MEM_OP; + rdest_adr <= irq_SPSR; + + -- Enter IRQ mode, set IRQ flag, clear FIQ flag + barrelshift_operand <= '0'; + barrelshift_type <= "00"; + literal_shift_amnt <= (others => '0'); + literal_data <= (23 downto 8 => '0') & "10010010"; + opb_is_literal <= '1'; + opb_sel <= '0'; + + -- ** MUL/MLA instructions ** + elsif current_inst(27 downto 22) = "000000" and current_inst(7 downto 4) = "1001" + then + -- single cycle instruction + next_state <= MAIN_STATE; + + -- ALU adds or does nothing + if current_inst(21) = '0' + then + alu_operation <= ALU_NOP; + else + alu_operation <= ALU_ADD; + end if; + + -- use multiplier + opb_sel <= '1'; + affect_sflags <= current_inst(20); + data_sel <= '1'; + branch_en <= '0'; + rdest_wren <= '1'; + wb_sel <= '0'; + -- no memory access + mem_ctrl <= NO_MEM_OP; + + rdest_adr <= address_remap(current_inst(19 downto 16), mode); + rfA_adr <= address_remap_pc(current_inst(15 downto 12), mode); + rfB_adr <= address_remap_pc(current_inst(3 downto 0), mode); + rfC_adr <= address_remap_pc(current_inst(11 downto 8), mode); + + + -- ** MSR instruction ** + elsif (current_inst(27 downto 26) = "00" and current_inst(24 downto 23) = "10" + and current_inst(21 downto 20) = "10" and current_inst(15 downto 12) = "1111") + and (current_inst(25) = '1' or current_inst(11 downto 4) = X"00") + then + if current_inst(22) = '0' + then + -- ** write to CPSR ** + alu_operation <= ALU_RWF; + -- wait an additional cycle, so that the decoding of next instruction is done in correct mode + next_state <= ONE_LATENCY_CYCLE; + -- don't write any register, write to flags + affect_sflags <= '1'; + rdest_wren <= '0'; + else + -- ** write to SPSR ** + alu_operation <= ALU_NOP; + -- no need for a latency cycle, don't write flags, but need for writeback + next_state <= MAIN_STATE; + affect_sflags <= '0'; + rdest_wren <= '1'; + end if; + data_sel <= '1'; + wb_sel <= '0'; + rdest_adr <= address_remap_spsr(mode); + + -- use barrel shifter + opb_sel <= '0'; + branch_en <= '0'; + -- no memory access + mem_ctrl <= NO_MEM_OP; + + -- rotate by literal value + barrelshift_operand <= '0'; + if current_inst(25) = '1' + then + -- rotated constant adressing + if current_inst(11 downto 8) = "0000" then + -- no rotation - LSL #0 + barrelshift_type <= "00"; + literal_shift_amnt <= (others => '0'); + else + -- use ROR barrelshift to compute rotated constant + literal_shift_amnt <= current_inst(11 downto 8) & '0'; + barrelshift_type <= "11"; + end if; + opb_is_literal <= '1'; + literal_data <= (23 downto 8 => '0') & current_inst(7 downto 0); + else + -- register adressing, use LSL #0 (i.e. no modification of the register) + barrelshift_type <= "00"; + literal_shift_amnt <= (others => '0'); + opb_is_literal <= '0'; + end if; + rfB_adr <= address_remap_pc(current_inst(3 downto 0), mode); + + -- ** MRS instruction ** + elsif current_inst(27 downto 23) = "00010" and current_inst(21 downto 16) = "001111" and current_inst(11 downto 0) = X"000" + then + next_state <= MAIN_STATE; + + if current_inst(22) = '0' + then + -- ** Read CPSR ** + alu_operation <= ALU_RWF; + else + -- ** Read SPSR **, use LSL #0 (i.e. no modification of the register) + alu_operation <= ALU_NOP; + end if; + + opb_sel <= '0'; + rfB_adr <= '0' & address_remap_spsr(mode); + barrelshift_operand <= '0'; + barrelshift_type <= "00"; + literal_shift_amnt <= (others => '0'); + opb_is_literal <= '0'; + + -- don't affect sflags, always write back ALU data + affect_sflags <= '0'; + data_sel <= '1'; + branch_en <= '0'; + rdest_wren <= '1'; + wb_sel <= '0'; + mem_ctrl <= NO_MEM_OP; + rdest_adr <= address_remap(current_inst(15 downto 12), mode); + + -- ** data processing instructions ** + elsif current_inst(27 downto 26) = "00" + and ((current_inst(4)='0' or (current_inst(4)='1' and current_inst(7)='0')) or current_inst(25)='1') + then + -- this is (normally) a single cycle instruction + next_state <= MAIN_STATE; + + case current_inst(24 downto 21) is + -- ADC instruction + when "0101" => alu_operation <= ALU_ADC; + -- ADD instruction + when "0100" => alu_operation <= ALU_ADD; + -- AND instruction + when "0000" => alu_operation <= ALU_AND; + -- BIC instruction + when "1110" => alu_operation <= ALU_BIC; + -- CMN instruction + when "1011" => alu_operation <= ALU_ADD; + -- CMP instruction + when "1010" => alu_operation <= ALU_SUB; + -- EOR instruction + when "0001" => alu_operation <= ALU_EOR; + -- MVN instruction + when "1111" => alu_operation <= ALU_NOT; + -- ORR instruction + when "1100" => alu_operation <= ALU_ORR; + -- RSB instruction + when "0011" => alu_operation <= ALU_RSB; + -- RSC instruction + when "0111" => alu_operation <= ALU_RSC; + -- SBC instruction + when "0110" => alu_operation <= ALU_SBC; + -- SUB instruction + when "0010" => alu_operation <= ALU_SUB; + -- TEQ instruction + when "1001" => alu_operation <= ALU_EOR; + -- TST instruction + when "1000" => alu_operation <= ALU_AND; + -- MOV instruction + when others => alu_operation <= ALU_NOP; + end case; + + -- use barrelshifter + opb_sel <= '0'; + + -- CMP, CMN, TEQ and TST instructions always affect sflags, never write enable + if current_inst(24 downto 23) = "10" + then + affect_sflags <= '1'; + rdest_wren <= '0'; + else + -- other instructions + affect_sflags <= current_inst(20); + if current_inst(15 downto 12) = "1111" + then + rdest_wren <= '0'; + else + rdest_wren <= '1'; + end if; + end if; + + -- use ALU result, writeback from ALU, no memory operation + data_sel <= '1'; + wb_sel <= '0'; + mem_ctrl <= NO_MEM_OP; + + literal_data <= (23 downto 8 => '0') & current_inst(7 downto 0); + opb_is_literal <= current_inst(25); + + rdest_adr <= address_remap(current_inst(15 downto 12), mode); + rfA_adr <= address_remap_pc(current_inst(19 downto 16), mode); + rfB_adr <= address_remap_pc(current_inst(3 downto 0), mode); + rfC_adr <= address_remap_pc(current_inst(11 downto 8), mode); + + -- adressing mode decoding + if current_inst(4)='1' and current_inst(25)='0' + then + barrelshift_operand <= '1' ; + else + barrelshift_operand <= '0'; + end if; + if current_inst(25) = '0' + then + -- rotated register adressing + barrelshift_type <= current_inst(6 downto 5); + literal_shift_amnt <= current_inst(11 downto 7); + else + if current_inst(11 downto 8) = "0000" + then + -- no rotation - LSL #0 + barrelshift_type <= "00"; + literal_shift_amnt <= (others => '0'); + else + -- use ROR barrelshift to compute rotated constant + literal_shift_amnt <= current_inst(11 downto 8) & '0'; + barrelshift_type <= "11"; + end if; + end if; + + -- is rdest = r15 = PC (hidden jump instruction) ? + if current_inst(15 downto 12) = "1111" + then + if current_inst(20) = '0' + then + branch_en <= '1'; + else + -- special 2 cycle return from instruction + if state = MAIN_STATE + then + -- 1st cycle : move the SPSR into CPSR + affect_sflags <= '1'; -- ALWAYS affect sflags + + -- ** Read SPSR **, use LSL #0 (i.e. no modification of the register) + barrelshift_operand <= '0'; + barrelshift_type <= "00"; + literal_shift_amnt <= (others => '0'); + opb_is_literal <= '0'; + rfB_adr <= '0' & address_remap_spsr(mode); + + -- write flags and execute the real instruction on next cycle + alu_operation <= ALU_RWF; + branch_en <= '0'; + next_state <= RETURN_FROM_EXCEPTION; + + else + -- 2nd cycle of a ***S R15, **** instruction + -- which is used to return from an exeption + branch_en <= '1'; + affect_sflags <= '0'; -- do NOT affect sflags, obviously + end if; + end if; + else + branch_en <= '0' ; + end if; + + -- ** branch instructions ** + elsif current_inst(27 downto 25) = "101" + then + next_state <= MAIN_STATE; + alu_operation <= ALU_ADD; + opb_sel <= '0'; + affect_sflags <= '0'; + branch_en <= '1'; + + data_sel <= '0'; + if(current_inst(24) = '1') + then + -- BL, write PC+4 into R14 of current mode + rdest_wren <= '1'; + else + -- normal branch + rdest_wren <= '0'; + end if; + wb_sel <= '0'; + rdest_adr <= address_remap("1110", mode); + + rfA_adr <= "100000"; + + -- multiply operand by 4 (LSL #2) + barrelshift_operand <= '0'; + barrelshift_type <= "00"; + literal_shift_amnt <= "00010"; + literal_data <= current_inst(23 downto 0); + opb_is_literal <= '1'; + mem_ctrl <= NO_MEM_OP; + + -- ** LDR(B) instruction ** + elsif current_inst(27 downto 26) = "01" and current_inst(20) = '1' + and ((current_inst(21) = '0') or (current_inst(21) = '1' and current_inst(24) = '1')) + then + -- if pre or post-indexed adressing with writeback enabled, a second state is needed + if current_inst(24) = current_inst(21) + then + next_state <= LOADSTORE_WRITEBACK; + else + if current_inst(15 downto 12) = "1111" + then -- insert two bubbles if loading into R15 + next_state <= TWO_LATENCY_CYCLES; + else + next_state <= MAIN_STATE; + end if; + end if; + + -- add/sub address (common lines for all load/store instructions cycles) + if current_inst(23) = '0' + then + alu_operation <= ALU_SUB; + else + alu_operation <= ALU_ADD; + end if; + affect_sflags <= '0'; + opb_sel <= '0'; + barrelshift_operand <= '0'; + data_sel <= '1'; + + -- branch if loading to R15, else writeback + if current_inst(15 downto 12) = "1111" + then + branch_en <= '1'; + rdest_wren <= '0'; + else + branch_en <= '0'; + rdest_wren <= '1'; + end if; + + -- write back from data bus + wb_sel <= '1'; + rdest_adr <= address_remap(current_inst(15 downto 12), mode); + + if current_inst(22) = '1' + then + mem_ctrl <= LOAD_BYTE; + else + mem_ctrl <= LOAD_WORD; + end if; + mem_burstcount <= "0001"; + + rfA_adr <= address_remap_pc(current_inst(19 downto 16), mode); + rfB_adr <= address_remap_pc(current_inst(3 downto 0), mode); + + -- offset adressing + if current_inst(25) = '0' + then + barrelshift_type <= "00"; -- LSR #00 + literal_shift_amnt <= (others => '0'); + literal_data <= (23 downto 12 => '0') & current_inst(11 downto 0); + opb_is_literal <= '1'; + else + -- register addressing + barrelshift_type <= current_inst(6 downto 5); + literal_shift_amnt <= current_inst(11 downto 7); + opb_is_literal <= '0'; + end if; + literal_data <= (23 downto 12 => '0') & current_inst(11 downto 0); + + if current_inst(24) = '0' + then + literal_data <= (others => '0'); + opb_is_literal <= '1'; + barrelshift_type <= "00"; + end if; + + -- ** STR(B) instruction ** + elsif current_inst(27 downto 26) = "01" and current_inst(20) = '0' + and ((current_inst(21) = '0') or (current_inst(21) = '1' and current_inst(24) = '1')) + then + if current_inst(24) = '0' + then + next_state <= LOADSTORE_WRITEBACK; + else + next_state <= MAIN_STATE; + end if; + + -- pre-indexing : add/sub address + if current_inst(23) = '0' + then + alu_operation <= ALU_SUB; + else + alu_operation <= ALU_ADD; + end if; + + affect_sflags <= '0'; + opb_sel <= '0'; + barrelshift_operand <= '0'; + data_sel <= '1'; + + branch_en <= '0'; + wb_sel <= '0'; + rdest_adr <= address_remap(current_inst(19 downto 16), mode); + rdest_wren <= current_inst(21); + + if current_inst(22) = '1' + then + mem_ctrl <= STORE_BYTE; + else + mem_ctrl <= STORE_WORD; + end if; + mem_burstcount <= "0001"; + + rfA_adr <= address_remap_pc(current_inst(19 downto 16), mode); + rfB_adr <= address_remap_pc(current_inst(3 downto 0), mode); + rfC_adr <= address_remap_pc(current_inst(15 downto 12), mode); + + -- offset adressing + if current_inst(25) = '0' + then + barrelshift_type <= "00"; -- LSR #00 + literal_shift_amnt <= (others => '0'); + opb_is_literal <= '1'; + else + -- register addressing + barrelshift_type <= current_inst(6 downto 5); + literal_shift_amnt <= current_inst(11 downto 7); + opb_is_literal <= '0'; + end if; + literal_data <= (23 downto 12 => '0') & current_inst(11 downto 0); + + if current_inst(24) = '0' + then + literal_data <= (others => '0'); + opb_is_literal <= '1'; + barrelshift_type <= "00"; + end if; + + -- LDM/STM instruction, procceed to burst transfter start + elsif current_inst(27 downto 25) = "100" and current_inst(15 downto 0) /= x"0000" + then + -- ldmstm_current_reg := 0; + + -- while current_inst(ldmstm_current_reg) = '0' and ldmstm_cur_bitmask(ldmstm_current_reg) = '0' loop + -- ldmstm_current_reg := ldmstm_current_reg + 1; + -- end loop; + + -- Compute mask for next cycle, excluding the current register + for n in 0 to 15 + loop + if n > ldmstm_current_reg + then + ldmstm_next_bitmask(n) <= '1'; + else + ldmstm_next_bitmask(n) <= '0'; + end if; + end loop; + + -- Don't branch by default + branch_en <= '0'; + + -- Check if we are done with the LDM/STM transfer + if (ldmstm_next_bitmask and current_inst(15 downto 0)) = x"0000" + then + -- if we are loading from R15, we should branch + if current_inst(20) = '1' and current_inst(15) = '1' + then + branch_en <= '1'; + -- if the 'S' flag is set, we should move SPSR to CPSR, and do a potential writeback the following cycle + if current_inst(22) = '1' + then + next_state <= LDMSTM_RETURN_FROM_EXCEPTION; + -- 'S' flag is clear, directly do the writeback, then insert a bubble + elsif current_inst(21) = '1' + then + next_state <= LDMSTM_WRITEBACK; + -- Neither of those are true -> insert two bubbles + else + next_state <= TWO_LATENCY_CYCLES; + end if; + + -- Is writeback enabled in something that is not STMDB ? + elsif current_inst(21) = '1' and (current_inst(24 downto 23) /= "10" or current_inst(20) /= '0') + then + -- Yes -> a writeback cycle should follow + next_state <= LDMSTM_WRITEBACK; + else + -- No -> continue code execution + next_state <= MAIN_STATE; + end if; + else + next_state <= LDMSTM_TRANSFER; + end if; + + -- Use SUB, for IA and IB addressing, use ADD + if current_inst(23) = '0' + then + alu_operation <= ALU_SUB; + else + alu_operation <= ALU_ADD; + end if; + + -- Use barrelshifter in all cases + opb_sel <= '0'; + affect_sflags <= '0'; + data_sel <= '1'; + barrelshift_operand <= '1'; + + -- If we are loading a register which is not R15, writeback si enabled + if (current_inst(20) = '1' and ldmstm_current_reg /= 15) + -- If base writeback is enabled and we are in a STMDB instruction, writeback is enabled + or (current_inst(24 downto 23) = "10" and current_inst(21 downto 20) = "10" and state = MAIN_STATE) + then + rdest_wren <= '1'; + else + rdest_wren <= '0'; + end if; + + wb_sel <= current_inst(20); + + if current_inst(20) = '1' + then + -- Load from memory in the case of LDM + wb_sel <= '1'; + + -- if S bit is clear or r15 is in the list (ret. from interrupt), load into current mode registers + if current_inst(22) = '0' or current_inst(15) = '1' + then + rdest_adr <= address_remap(std_logic_vector(to_unsigned(ldmstm_current_reg, 4) ), mode); + else + -- S bit is set and r15 is not in the list, load into user mode registers + rdest_adr <= address_remap(std_logic_vector(to_unsigned(ldmstm_current_reg, 4) ), "0000"); + end if; + + -- Start a burst if this is the 1st cylce, else continue a burst + if state = MAIN_STATE + then + mem_ctrl <= LOAD_WORD; + else + mem_ctrl <= LOAD_BURST; + end if; + else + -- Write back the address (STM, only actually used in STMDB, as it's the only case where the address is correct) + wb_sel <= '0'; + + rdest_adr <= address_remap(current_inst(19 downto 16), mode); + mem_ctrl <= STORE_WORD; + end if; + + -- Send the number of transfers that should be done + mem_burstcount <= std_logic_vector(to_unsigned(count_set_bits(current_inst(15 downto 0)) , 4)); + + rfA_adr <= address_remap_pc(current_inst(19 downto 16), mode); + + if current_inst(22) = '0' + then + rfC_adr <= address_remap_pc(std_logic_vector(to_unsigned(ldmstm_current_reg, 4) ), mode); + else + -- S bit is set, store user mode registers insted of current mode + rfC_adr <= address_remap_pc(std_logic_vector(to_unsigned(ldmstm_current_reg, 4) ), "0000"); + end if; + + -- LSL #0 + barrelshift_type <= "00"; + barrelshift_operand <= '0'; + literal_shift_amnt <= "00010"; + + case current_inst(24 downto 23) is + -- IA + when "01" => literal_data <= x"000000"; + -- IB + when "11" => literal_data <= x"000001"; + -- DA + when "00" => literal_data <= std_logic_vector(to_signed(count_set_bits(current_inst(15 downto 0)) - 1, 24)); + -- DB + when others => literal_data <= std_logic_vector(to_signed(count_set_bits(current_inst(15 downto 0)), 24)); + end case; + + opb_is_literal <= '1'; + + -- SWI (software interrupt) instruction + elsif current_inst(27 downto 24) = "1111" + then + next_state <= SWI_CYCLE2; + -- read CPSR and save it in sup_SPSR, and also write new value to CPSR at the same time + alu_operation <= ALU_RWF; + affect_sflags <= '1'; + data_sel <= '1'; + branch_en <= '0'; + rdest_wren <= '1'; + wb_sel <= '0'; + mem_ctrl <= NO_MEM_OP; + rdest_adr <= sup_SPSR; + + -- Enter supervisor mode, clears FIQ or IRQ flags + barrelshift_operand <= '0'; + barrelshift_type <= "00"; + literal_shift_amnt <= (others => '0'); + literal_data <= (23 downto 8 => '0') & "00010011"; + opb_is_literal <= '1'; + opb_sel <= '0'; + + -- ** Undefined instruction ** + else + -- Could not decode the instruction, start an undefined instruction trap interrupt + next_state <= UNDEF_CYCLE2; + -- read CPSR and save it in fiq_SPSR, and also write new value to CPSR at the same time + alu_operation <= ALU_RWF; + affect_sflags <= '1'; + data_sel <= '1'; + branch_en <= '0'; + rdest_wren <= '1'; + wb_sel <= '0'; + mem_ctrl <= NO_MEM_OP; + rdest_adr <= und_SPSR; + + -- Enter UNDEF mode, clears FIQ and IRQ flags + barrelshift_operand <= '0'; + barrelshift_type <= "00"; + literal_shift_amnt <= (others => '0'); + literal_data <= (23 downto 8 => '0') & "00011011"; + opb_is_literal <= '1'; + opb_sel <= '0'; + end if; + + elsif state = TWO_LATENCY_CYCLES + then + -- 2 dummy latency cycles which does nothing + next_state <= ONE_LATENCY_CYCLE; + affect_sflags <= '0'; + rdest_wren <= '0'; + branch_en <= '0'; + mem_ctrl <= NO_MEM_OP; + + elsif state = ONE_LATENCY_CYCLE + then + -- a dummy latency cycle which do nothing + next_state <= MAIN_STATE; + + affect_sflags <= '0'; + rdest_wren <= '0'; + branch_en <= '0'; + mem_ctrl <= NO_MEM_OP; + + -- Rn writeback state after a load or a store + elsif state = LOADSTORE_WRITEBACK + then + -- one more latency cycle needed if loading from PC + if current_inst(20) = '1' and current_inst(15 downto 12) = "1111" + then + next_state <= ONE_LATENCY_CYCLE; + else + next_state <= MAIN_STATE; + end if; + + -- add/sub address + if current_inst(23) = '1' + then + alu_operation <= ALU_ADD; + else + alu_operation <= ALU_SUB; + end if; + affect_sflags <= '0'; + opb_sel <= '0'; + barrelshift_operand <= '0'; + data_sel <= '1'; + + branch_en <= '0'; + rdest_wren <= '1'; + wb_sel <= '0'; + rdest_adr <= address_remap(current_inst(19 downto 16), mode); + mem_ctrl <= NO_MEM_OP; + + rfA_adr <= address_remap_pc(current_inst(19 downto 16), mode); + rfB_adr <= address_remap_pc(current_inst(3 downto 0), mode); + + -- offset adressing + if current_inst(25) = '0' + then + barrelshift_type <= "00"; -- LSR #00 + literal_shift_amnt <= (others => '0'); + literal_data <= (23 downto 12 => '0') & current_inst(11 downto 0); + opb_is_literal <= '1'; + else + -- register addressing + barrelshift_type <= current_inst(6 downto 5); + literal_shift_amnt <= current_inst(11 downto 7); + opb_is_literal <= '0'; + end if; + + elsif state = LDMSTM_WRITEBACK + then + -- one more latency cycle needed if loading from PC and S flag was clear + if current_inst(20) = '1' and current_inst(15) = '1' and current_inst(22) = '0' + then + next_state <= ONE_LATENCY_CYCLE; + else + next_state <= MAIN_STATE; + end if; + + -- add/sub address + if current_inst(23) = '1' + then + alu_operation <= ALU_ADD; + else + alu_operation <= ALU_SUB; + end if; + + branch_en <= '0'; + affect_sflags <= '0'; + opb_sel <= '0'; + barrelshift_operand <= '0'; + data_sel <= '1'; + rdest_wren <= '1'; + wb_sel <= '0'; + rdest_adr <= address_remap(current_inst(19 downto 16), mode); + mem_ctrl <= NO_MEM_OP; + rfA_adr <= address_remap_pc(current_inst(19 downto 16), mode); + + -- LSL #0 + barrelshift_type <= "00"; + literal_shift_amnt <= "00010"; + + -- Add the # of register writtens to Rn and write it back + literal_data <= std_logic_vector(to_signed(count_set_bits(current_inst(15 downto 0)) , 24)); + opb_is_literal <= '1'; + + -- Cycle that moves SPSR -> CPSR when LDM with R15 in the list and S flag set + elsif state = LDMSTM_RETURN_FROM_EXCEPTION + then + affect_sflags <= '1'; -- ALWAYS affect sflags + + -- ** Read SPSR **, use LSL #0 (i.e. no modification of the register) + barrelshift_operand <= '0'; + barrelshift_type <= "00"; + literal_shift_amnt <= (others => '0'); + opb_is_literal <= '0'; + rfB_adr <= '0' & address_remap_spsr(mode); + + -- write flags and execute the real instruction on next cycle + alu_operation <= ALU_RWF; + branch_en <= '0'; + rdest_wren <= '0'; + + if current_inst(21) = '1' + then + next_state <= LDMSTM_WRITEBACK; -- if base writeback is enabled do it + else + next_state <= ONE_LATENCY_CYCLE; -- else insert simply a bubble in the pipeline + end if; + + -- 2nd cycle of an interrupt, very similar to a BL instruction + elsif state = RESET_CYCLE2 or state = UNDEF_CYCLE2 or state = SWI_CYCLE2 + or state = IRQ_CYCLE2 or state = FIQ_CYCLE2 + then + next_state <= MAIN_STATE; + alu_operation <= ALU_NOP; + opb_sel <= '0'; + affect_sflags <= '0'; + branch_en <= '1'; + + -- BL, write PC+4 into R14 of current mode + data_sel <= '0'; + rdest_wren <= '1'; + wb_sel <= '0'; + + barrelshift_operand <= '0'; + barrelshift_type <= "00"; + literal_shift_amnt <= (others => '0'); + -- interrupt vectors + case state is + when RESET_CYCLE2 => literal_data <= x"000000"; + rdest_adr <= r14; + -- prevents to fetch a useless opcode on next cycle + next_state <= ONE_LATENCY_CYCLE; + condition <= "1110"; -- always + + when UNDEF_CYCLE2 => literal_data <= x"000004"; + rdest_adr <= und_r14; + + when SWI_CYCLE2 => literal_data <= x"000008"; + rdest_adr <= und_r14; + + when IRQ_CYCLE2 => literal_data <= x"000018"; + rdest_adr <= irq_r14; + next_state <= ONE_LATENCY_CYCLE; + condition <= "1110"; + + when others => literal_data <= x"00001c"; -- FIQ cycle 2 + rdest_adr <= fiq_r14; + next_state <= ONE_LATENCY_CYCLE; + condition <= "1110"; + end case; + opb_is_literal <= '1'; + mem_ctrl <= NO_MEM_OP; + end if; + end if; + end process decode; + + -- send the 4 low bits (adress bits) to the register file adresses + rfile_A_adr <= rfA_adr(4 downto 0); + rfile_B_adr <= rfB_adr(4 downto 0); + rfile_C_adr <= rfC_adr(4 downto 0); + + --decode_blocked_n <= '0' when state /= MAIN_STATE or (decode_stage_valid = '1' and inst_cache_miss = '0') else '1'; + decode_blocked_n <= '0' when next_state /= MAIN_STATE or (inst_cache_miss = '1') else '1'; + +end architecture; \ No newline at end of file
trunk/hdl/decode.vhd Property changes : Added: svn:executable ## -0,0 +1 ## +* \ No newline at end of property Index: trunk/hdl/register_file.vhd =================================================================== --- trunk/hdl/register_file.vhd (nonexistent) +++ trunk/hdl/register_file.vhd (revision 2) @@ -0,0 +1,81 @@ +-- This file is part of ARM4U CPU +-- +-- This is a creation of the Laboratory of Processor Architecture +-- of Ecole Polytechnique Fédérale de Lausanne ( http://lap.epfl.ch ) +-- +-- register_file.vhd -- Describes the register file of the processor +-- Normally the synthesis tool should automatically infer +-- a 32x32 SRAM unit to store the registers +-- +-- Written By - Jonathan Masur and Xavier Jimenez (2013) +-- +-- This program is free software; you can redistribute it and/or modify it +-- under the terms of the GNU General Public License as published by the +-- Free Software Foundation; either version 2, or (at your option) any +-- later version. +-- +-- This program is distributed in the hope that it will be useful, +-- but WITHOUT ANY WARRANTY; without even the implied warranty of +-- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +-- GNU General Public License for more details. +-- +-- In other words, you are welcome to use, share and improve this program. +-- You are forbidden to forbid anyone else to use, share and improve +-- what you give them. Help stamp out software-hoarding! + +library ieee; +use ieee.std_logic_1164.all; +use ieee.std_logic_unsigned.all; + +entity register_file is + port( + clk : in std_logic; + aa : in std_logic_vector( 4 downto 0); + ab : in std_logic_vector( 4 downto 0); + ac : in std_logic_vector( 4 downto 0); + aw : in std_logic_vector( 4 downto 0); + wren : in std_logic; + rd_clken : in std_logic := '1'; + wrdata : in std_logic_vector(31 downto 0); + a : out std_logic_vector(31 downto 0); + b : out std_logic_vector(31 downto 0); + c : out std_logic_vector(31 downto 0) + ); +end register_file; + +architecture synth of register_file is + type reg_type is array (0 to 31) of std_logic_vector(31 downto 0); + signal reg_array : reg_type := (others=>(others=>'0')); + signal aal, abl, acl : std_logic_vector(4 downto 0); +begin + +process(clk) is + variable aav, abv, acv : std_logic_vector(4 downto 0); +begin + if(rising_edge(clk))then + if rd_clken = '1' + then + aav := aa; + abv := ab; + acv := ac; + else + aav := aal; + abv := abl; + acv := acl; + end if; + + a <= reg_array(conv_integer(aav)); + b <= reg_array(conv_integer(abv)); + c <= reg_array(conv_integer(acv)); + + aal <= aav; + abl <= abv; + acl <= acv; + + if(wren='1')then + reg_array(conv_integer(aw)) <= wrdata; + end if; + end if; +end process; + +end synth; \ No newline at end of file
trunk/hdl/register_file.vhd Property changes : Added: svn:executable ## -0,0 +1 ## +* \ No newline at end of property Index: trunk/hdl/barrelshift.vhd =================================================================== --- trunk/hdl/barrelshift.vhd (nonexistent) +++ trunk/hdl/barrelshift.vhd (revision 2) @@ -0,0 +1,411 @@ +-- This file is part of ARM4U CPU +-- +-- This is a creation of the Laboratory of Processor Architecture +-- of Ecole Polytechnique Fédérale de Lausanne ( http://lap.epfl.ch ) +-- +-- barrelshift.vhd -- Describes the barrel shifter inside the execute pipeline stage +-- +-- Written By - Jonathan Masur and Xavier Jimenez (2013) +-- +-- This program is free software; you can redistribute it and/or modify it +-- under the terms of the GNU General Public License as published by the +-- Free Software Foundation; either version 2, or (at your option) any +-- later version. +-- +-- This program is distributed in the hope that it will be useful, +-- but WITHOUT ANY WARRANTY; without even the implied warranty of +-- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +-- GNU General Public License for more details. +-- +-- In other words, you are welcome to use, share and improve this program. +-- You are forbidden to forbid anyone else to use, share and improve +-- what you give them. Help stamp out software-hoarding! + +library ieee; +use ieee.std_logic_1164.all; +use ieee.numeric_std.all; +use work.arm_types.all; + +entity barrelshift is + port( + c : in std_logic; + exe_barrelshift_operand : in std_logic; + exe_barrelshift_type : in std_logic_vector(1 downto 0); + exe_literal_shift_amnt : in std_logic_vector(4 downto 0); + exe_literal_data : in std_logic_vector(23 downto 0); + exe_opb_is_literal : in std_logic; + op_b_data : in unsigned(31 downto 0); + op_c_data : in unsigned(31 downto 0); + barrelshift_c : out std_logic; + barrelshift_out : out unsigned(31 downto 0) + ); +end; + +-- Note : This architecture synthetizes poorly +architecture rtl of barrelshift is +begin + -- barrel shifter + barrelshift : process(exe_barrelshift_operand, exe_barrelshift_type, op_b_data, op_c_data, exe_opb_is_literal, exe_literal_shift_amnt, exe_literal_data, c) is + variable shift_positions : integer range 0 to 31; + variable shift_in : unsigned(31 downto 0); + begin + -- shift by register (opc) + if exe_barrelshift_operand = '1' + then + shift_positions := to_integer(op_c_data(4 downto 0)); + else + -- shift by literal value + shift_positions := to_integer(unsigned(exe_literal_shift_amnt)); + end if; + + if exe_opb_is_literal = '1' + then + -- sign extend literal value + shift_in := (31 downto 24 => exe_literal_data(23)) & unsigned(exe_literal_data); + else + shift_in := op_b_data; + end if; + + case exe_barrelshift_type is + -- LSR + when "01" => + -- shift by register > 32 -> overflows, all bits are out + if exe_barrelshift_operand = '1' and op_c_data(7 downto 0) > x"20" + then + barrelshift_out <= (others => '0'); + barrelshift_c <= '0'; + + -- shift by register or literal, 32 positions + elsif (exe_barrelshift_operand = '1' and op_c_data(7 downto 0) = x"20") + or (exe_barrelshift_operand = '0' and exe_literal_shift_amnt = "00000") + then + barrelshift_out <= (others => '0'); + barrelshift_c <= shift_in(31); + + + -- shift by register = 0, opb passes through and C is unaffected + elsif exe_barrelshift_operand = '1' and op_c_data(7 downto 0) = x"00" + then + barrelshift_out <= shift_in; + barrelshift_c <= c; + + -- shift by literal or register, range 1..31 + else + barrelshift_out <= shift_in srl shift_positions; + barrelshift_c <= shift_in(shift_positions - 1); + end if; + + -- ASR + when "10" => + -- shift by register or literal >= 32 -> overflows, all bits are the sign bit + -- shift by register or literal, 32 positions + if (exe_barrelshift_operand = '1' and op_c_data(7 downto 0) >= x"20") + or (exe_barrelshift_operand = '0' and exe_literal_shift_amnt = "00000") + then + barrelshift_out <= (others => shift_in(31)); + barrelshift_c <= shift_in(31); + + -- shift by register = 0, opb passes through and C is unaffected + elsif exe_barrelshift_operand = '1' and op_c_data(7 downto 0) = x"00" + then + barrelshift_out <= shift_in; + barrelshift_c <= c; + + -- shift by literal or register, range 1..31 + else + barrelshift_out <= unsigned(shift_right(signed(shift_in), shift_positions)); + barrelshift_c <= shift_in(shift_positions - 1); + end if; + + -- ROR / RRX + when "11" => + -- RRX - 33 bit rotation with carry + if exe_barrelshift_operand = '0' and exe_literal_shift_amnt = "00000" + then + barrelshift_out <= c & shift_in(31 downto 1); + barrelshift_c <= shift_in(0); + + -- ROR by register = 0, opb passes through and C is unaffected + elsif exe_barrelshift_operand = '1' and op_c_data(7 downto 0) = x"00" + then + barrelshift_out <= shift_in; + barrelshift_c <= c; + + -- ROR by register = 32, 64, etc.... opb passes through but C is affected + elsif exe_barrelshift_operand = '1' and op_c_data(4 downto 0) = "00000" + then + barrelshift_out <= shift_in; + barrelshift_c <= shift_in(31); + + -- ROR by literal or register, range 1..31 (if ROR by register, 33 => 1, 34 => 2, etc....) + else + barrelshift_out <= shift_in ror shift_positions; + barrelshift_c <= shift_in(shift_positions - 1); + end if; + + -- LSL + when others => -- "00" + -- shift by register > 32 -> overflows, all bits are out + if exe_barrelshift_operand = '1' and op_c_data(7 downto 0) > x"20" + then + barrelshift_out <= (others => '0'); + barrelshift_c <= '0'; + + -- shift by register = 32 positions + elsif exe_barrelshift_operand = '1' and op_c_data(7 downto 0) = x"20" + then + barrelshift_out <= (others => '0'); + barrelshift_c <= shift_in(0); + + -- shift by register = 0 or literal = 0, opb passes through and C is unaffected + elsif shift_positions = 0 + then + barrelshift_out <= shift_in; + barrelshift_c <= c; + + -- shift by literal or register, range 1..31 + else + barrelshift_out <= shift_in sll shift_positions; + barrelshift_c <= shift_in(32 - shift_positions); + end if; + end case; + end process; +end; + +-- optimized architecture expliciting all stages of the barrel shifter +-- (individual shifters by power of 2 bits in series) +-- synthetizes in something way better +architecture optimized of barrelshift is + signal shift_in : unsigned(31 downto 0); + signal shift_amnt : unsigned(4 downto 0); + signal stage1_dout, stage2_dout, stage3_dout, stage4_dout, stage5_dout : unsigned(31 downto 0); + signal stage1_cout, stage2_cout, stage3_cout, stage4_cout, stage5_cout : std_logic; +begin + -- Barrelshifter made manually with 5 individual shift stages in series + + -- shift by 1 position + stage1 : process(shift_in, c, shift_amnt, exe_barrelshift_type) is + begin + if shift_amnt(0) = '1' + then + case exe_barrelshift_type is + when "00" => -- LSL #1 + stage1_dout <= shift_in(30 downto 0) & '0'; + stage1_cout <= shift_in(31); + when "01" => -- LSR #1 + stage1_dout <= '0' & shift_in(31 downto 1); + stage1_cout <= shift_in(0); + when "10" => -- ASR #1 + stage1_dout <= shift_in(31) & shift_in(31 downto 1); + stage1_cout <= shift_in(0); + when others => -- ROR #1 + stage1_dout <= shift_in(0) & shift_in(31 downto 1); + stage1_cout <= shift_in(0); + end case; + else + stage1_dout <= shift_in; + stage1_cout <= c; + end if; + end process; + + -- shift by 2 positions + stage2 : process(stage1_dout, stage1_cout, shift_amnt, exe_barrelshift_type) is + begin + if shift_amnt(1) = '1' + then + case exe_barrelshift_type is + when "00" => -- LSL #2 + stage2_dout <= stage1_dout(29 downto 0) & "00"; + stage2_cout <= stage1_dout(30); + when "01" => -- LSR #2 + stage2_dout <= "00" & stage1_dout(31 downto 2); + stage2_cout <= stage1_dout(1); + when "10" => -- ASR #2 + stage2_dout <= (1 downto 0 => stage1_dout(31)) & stage1_dout(31 downto 2); + stage2_cout <= stage1_dout(1); + when others => -- ROR #2 + stage2_dout <= stage1_dout(1 downto 0) & stage1_dout(31 downto 2); + stage2_cout <= stage1_dout(1); + end case; + else + stage2_dout <= stage1_dout; + stage2_cout <= stage1_cout; + end if; + end process; + + -- shift by 4 positions + stage3 : process(stage2_dout, stage2_cout, shift_amnt, exe_barrelshift_type) is + begin + if shift_amnt(2) = '1' + then + case exe_barrelshift_type is + when "00" => -- LSL #4 + stage3_dout <= stage2_dout(27 downto 0) & "0000"; + stage3_cout <= stage2_dout(28); + when "01" => -- LSR #4 + stage3_dout <= "0000" & stage2_dout(31 downto 4); + stage3_cout <= stage2_dout(3); + when "10" => -- ASR #4 + stage3_dout <= (3 downto 0 => stage2_dout(31)) & stage2_dout(31 downto 4); + stage3_cout <= stage2_dout(3); + when others => -- ROR #4 + stage3_dout <= stage2_dout(3 downto 0) & stage2_dout(31 downto 4); + stage3_cout <= stage2_dout(3); + end case; + else + stage3_dout <= stage2_dout; + stage3_cout <= stage2_cout; + end if; + end process; + + -- shift by 8 positions + stage4 : process(stage3_dout, stage3_cout, shift_amnt, exe_barrelshift_type) is + begin + if shift_amnt(3) = '1' + then + case exe_barrelshift_type is + when "00" => -- LSL #8 + stage4_dout <= stage3_dout(23 downto 0) & (7 downto 0 => '0'); + stage4_cout <= stage3_dout(24); + when "01" => -- LSR #8 + stage4_dout <= (7 downto 0 => '0') & stage3_dout(31 downto 8); + stage4_cout <= stage3_dout(7); + when "10" => -- ASR #8 + stage4_dout <= (7 downto 0 => stage3_dout(31)) & stage3_dout(31 downto 8); + stage4_cout <= stage3_dout(7); + when others => -- ROR #8 + stage4_dout <= stage3_dout(7 downto 0) & stage3_dout(31 downto 8); + stage4_cout <= stage3_dout(7); + end case; + else + stage4_dout <= stage3_dout; + stage4_cout <= stage3_cout; + end if; + end process; + + -- shift by 16 positions + stage5 : process(stage4_dout, stage4_cout, shift_amnt, exe_barrelshift_type) is + begin + if shift_amnt(4) = '1' + then + case exe_barrelshift_type is + when "00" => -- LSL #16 + stage5_dout <= stage4_dout(15 downto 0) & (15 downto 0 => '0'); + stage5_cout <= stage4_dout(15); + when "01" => -- LSR #16 + stage5_dout <= (15 downto 0 => '0') & stage4_dout(31 downto 16); + stage5_cout <= stage4_dout(15); + when "10" => -- ASR #16 + stage5_dout <= (15 downto 0 => stage4_dout(31)) & stage4_dout(31 downto 16); + stage5_cout <= stage4_dout(15); + when others => -- ROR #16 + stage5_dout <= stage4_dout(15 downto 0) & stage4_dout(31 downto 16); + stage5_cout <= stage4_dout(15); + end case; + else + stage5_dout <= stage4_dout; + stage5_cout <= stage4_cout; + end if; + end process; + + -- Barelshifter control logic + barrelshift : process(exe_barrelshift_operand, exe_barrelshift_type, op_b_data, op_c_data, exe_opb_is_literal, + exe_literal_shift_amnt, exe_literal_data, c, shift_in, stage5_dout, stage5_cout) is + begin + -- shift by register (opc) + if exe_barrelshift_operand = '1' + then + shift_amnt <= op_c_data(4 downto 0); + else + -- shift by literal value + shift_amnt <= unsigned(exe_literal_shift_amnt); + end if; + + if exe_opb_is_literal = '1' + then + -- sign extend literal value + shift_in <= (31 downto 24 => exe_literal_data(23)) & unsigned(exe_literal_data); + else + shift_in <= op_b_data; + end if; + + case exe_barrelshift_type is + -- LSL + when "00" => + -- shift by register > 32 -> overflows, all bits are out + if exe_barrelshift_operand = '1' and op_c_data(7 downto 0) > x"20" + then + barrelshift_out <= (others => '0'); + barrelshift_c <= '0'; + + -- shift by register = 32 positions + elsif exe_barrelshift_operand = '1' and op_c_data(7 downto 0) = x"20" + then + barrelshift_out <= (others => '0'); + barrelshift_c <= shift_in(0); + + -- shift by literal or register, range 0..31 + else + barrelshift_out <= stage5_dout; + barrelshift_c <= stage5_cout; + end if; + + -- LSR + when "01" => + -- shift by register > 32 -> overflows, all bits are out + if exe_barrelshift_operand = '1' and op_c_data(7 downto 0) > x"20" + then + barrelshift_out <= (others => '0'); + barrelshift_c <= '0'; + + -- shift by register or literal = 0, 32 positions + elsif (exe_barrelshift_operand = '1' and op_c_data(7 downto 0) = x"20") + or (exe_barrelshift_operand = '0' and exe_literal_shift_amnt = "00000") + then + barrelshift_out <= (others => '0'); + barrelshift_c <= shift_in(31); + + -- shift by literal or register, range 0..31 + else + barrelshift_out <= stage5_dout; + barrelshift_c <= stage5_cout; + end if; + + -- ASR + when "10" => + -- shift by register >= 32 or literal = 0, 32 positions -> overflows, all bits are the sign bit + if (exe_barrelshift_operand = '1' and op_c_data(7 downto 0) >= x"20") + or (exe_barrelshift_operand = '0' and exe_literal_shift_amnt = "00000") + then + barrelshift_out <= (others => shift_in(31)); + barrelshift_c <= shift_in(31); + + -- shift by literal or register, range 0..31 + else + barrelshift_out <= stage5_dout; + barrelshift_c <= stage5_cout; + end if; + + -- ROR / RRX + when others => -- "11" + -- RRX - 33 bit rotation with carry + if exe_barrelshift_operand = '0' and exe_literal_shift_amnt = "00000" + then + barrelshift_out <= c & op_b_data(31 downto 1); + barrelshift_c <= shift_in(0); + + -- ROR by register = 32, 64, etc.... opb passes through but C is affected + elsif exe_barrelshift_operand = '1' and op_c_data(4 downto 0) = "00000" + then + barrelshift_out <= stage5_dout; + barrelshift_c <= shift_in(31); + + -- ROR by literal or register, range 0..31 (if ROR by register, 33 => 1, 34 => 2, etc....) + else + barrelshift_out <= stage5_dout; + barrelshift_c <= stage5_cout; + end if; + end case; + end process; + +end; \ No newline at end of file
trunk/hdl/barrelshift.vhd Property changes : Added: svn:executable ## -0,0 +1 ## +* \ No newline at end of property Index: trunk/hdl/cpu.vhd =================================================================== --- trunk/hdl/cpu.vhd (nonexistent) +++ trunk/hdl/cpu.vhd (revision 2) @@ -0,0 +1,356 @@ +-- This file is part of ARM4U CPU +-- +-- This is a creation of the Laboratory of Processor Architecture +-- of Ecole Polytechnique Fédérale de Lausanne ( http://lap.epfl.ch ) +-- +-- cpu.vhd -- The top level module of the CPU +-- +-- Written By - Jonathan Masur and Xavier Jimenez (2013) +-- +-- This program is free software; you can redistribute it and/or modify it +-- under the terms of the GNU General Public License as published by the +-- Free Software Foundation; either version 2, or (at your option) any +-- later version. +-- +-- This program is distributed in the hope that it will be useful, +-- but WITHOUT ANY WARRANTY; without even the implied warranty of +-- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +-- GNU General Public License for more details. +-- +-- In other words, you are welcome to use, share and improve this program. +-- You are forbidden to forbid anyone else to use, share and improve +-- what you give them. Help stamp out software-hoarding! + +library ieee; +use ieee.std_logic_1164.all; +use ieee.numeric_std.all; +use ieee.math_real.all; +use work.arm_types.all; + +entity cpu is + generic( + CACHE_BLOCK_BITWIDTH : natural := 5 -- byte address range of a block (hence C_BLOCK_SIZE = 2**BLOCK_BITWIDTH) + ); + port( + -- Globals + clk : in std_logic; + reset : in std_logic; + + --Avalon Master Interface for instructions + avm_inst_waitrequest : in std_logic; + avm_inst_readdatavalid : in std_logic; + avm_inst_readdata : in std_logic_vector(31 downto 0); + avm_inst_read : out std_logic; + avm_inst_burstcount : out std_logic_vector(CACHE_BLOCK_BITWIDTH-2 downto 0); + avm_inst_address : out std_logic_vector(31 downto 0); + + --Avalon Master Interface for data + avm_data_waitrequest : in std_logic; + avm_data_readdatavalid : in std_logic; + avm_data_readdata : in std_logic_vector(31 downto 0); + avm_data_read : out std_logic; + avm_data_writedata : out std_logic_vector(31 downto 0); + avm_data_write : out std_logic; + avm_data_byteen : out std_logic_vector(3 downto 0); + avm_data_burstcount : out std_logic_vector(4 downto 0); + avm_data_address : out std_logic_vector(31 downto 0); + + --Interrupt interface + inr_irq : in std_logic_vector(31 downto 0) := (others => '0') + ); +end entity; + +architecture bench of cpu is + + signal n_reset : std_logic := '0'; + signal fiq, irq : std_logic; + signal inst_cache_adr, inst_data : std_logic_vector(31 downto 0); + signal inst_cache_miss, pc_wr : std_logic := '0'; + signal pc_wrdata : unsigned(31 downto 0) := (others => 'Z'); + signal fetch_stage_en, fetch_latch_enable : std_logic; + signal inst_cache_rd, flush, decode_stage_valid, decode_blocked_n, decode_latch_enable: std_logic; + signal low_flags : std_logic_vector(5 downto 0); + signal rfile_A_adr, rfile_B_adr, rfile_C_adr : std_logic_vector(4 downto 0); + signal dec_pc_plus_8, dec_pc_plus_4, exe_pc_plus_8, exe_pc_plus_4 : unsigned(31 downto 0); + signal exe_A_adr, exe_B_adr, exe_C_adr : std_logic_vector(5 downto 0); + signal rfile_A_data, rfile_B_data, rfile_C_data : std_logic_vector(31 downto 0); + signal exe_condition : std_logic_vector(3 downto 0); + signal exe_stage_valid, exe_barrelshift_operand, exe_opb_is_literal, exe_opb_sel, exe_affect_sflags, exe_data_sel, exe_rdest_wren, exe_branch_en, exe_wb_sel, exe_latch_enable : std_logic; + signal exe_barrelshift_type : std_logic_vector(1 downto 0); + signal exe_literal_shift_amnt, exe_rdest_adr : std_logic_vector(4 downto 0); + signal exe_literal_data : std_logic_vector(23 downto 0); + signal exe_alu_operation : ALU_OPERATION; + signal exe_mem_ctrl : MEM_OPERATION; + signal exe_mem_burstcount : std_logic_vector(3 downto 0); + signal exe_PC_wrdata : unsigned(31 downto 0); + signal exe_pc_wr, exe_blocked_n : std_logic; + signal mem_stage_valid, mem_rdest_wren, mem_branch_en, mem_wb_sel : std_logic; + signal mem_rdest_adr : std_logic_vector(4 downto 0); + signal mem_exe_data, mem_wrdata : std_logic_vector(31 downto 0); + signal mem_mem_ctrl : MEM_OPERATION; + signal mem_mem_burstcount : std_logic_vector(3 downto 0); + signal mem_blocked_n, mem_latch_enable, fwd_mem_enable : std_logic; + signal fwd_mem_address : std_logic_vector(4 downto 0); + signal fwd_mem_data : std_logic_vector(31 downto 0); + signal wb_stage_valid, wb_rdest_wren, wb_branch_en, wb_wb_sel : std_logic; + signal wb_rdest_adr : std_logic_vector(4 downto 0); + signal wb_exe_data : std_logic_vector(31 downto 0); + signal wb_mem_ctrl : MEM_OPERATION; + signal rfile_wr_enable, wb_pc_wr, wb_blocked_n : std_logic; + signal rfile_address : std_logic_vector(4 downto 0); + signal wb_data : std_logic_vector(31 downto 0); + signal fwd_wb2_enable : std_logic; + signal fwd_wb2_address : std_logic_vector(4 downto 0); + signal fwd_wb2_data : std_logic_vector(31 downto 0); + +begin + + n_reset <= not reset; + + c: entity work.cache(synth) generic map( + INSTR_BADDR_BITWDTH => 32, -- input coe_cpu_address width in bits + BLOCK_BITWIDTH => CACHE_BLOCK_BITWIDTH, -- byte address range of a block (hence C_BLOCK_SIZE = 2**BLOCK_BITWIDTH) + CACHE_WAYS => 1, -- number of ways in the cache (power of 2), for now only direct-mapped + CACHE_SIZE => 4096 -- cache size in bytes, must be a factor of C_BLOCK_SIZE * CACHE_WAYS + ) port map( + -- Globals + clk => clk, + reset => reset, + + -- CPU conduit extern + coe_cpu_enabled => inst_cache_rd, -- fetches a new instruction. If deactivated, the last read is kept on the output. + coe_cpu_address => inst_cache_adr, -- byte address + coe_cpu_readdata => inst_data, + coe_cpu_miss => inst_cache_miss, + + --Avalon Master Interface + avm_waitrequest => avm_inst_waitrequest, + avm_readdatavalid => avm_inst_readdatavalid, + avm_readdata => avm_inst_readdata, + avm_read => avm_inst_read, + avm_burstcount => avm_inst_burstcount, + avm_address => avm_inst_address + ); + + f : entity work.fetch(rtl) port map + ( + clk => clk, + n_reset => n_reset, + decode_stage_valid => decode_stage_valid, + dec_pc_plus_8 => dec_pc_plus_8, + dec_pc_plus_4 => dec_pc_plus_4, + flush => flush, + inst_cache_adr => inst_cache_adr, + inst_cache_rd => inst_cache_rd, + pc_wr => pc_wr, + pc_wrdata => pc_wrdata, + fetch_stage_en => fetch_stage_en, + + fetch_latch_enable => fetch_latch_enable + ); + + d : entity work.decode(rtl) port map + ( + clk => clk, + reset_n => n_reset, + fiq => fiq, + irq => irq, + flush => flush, + low_flags => low_flags, + decode_stage_valid => decode_stage_valid, + inst_cache_miss => inst_cache_miss, + dec_pc_plus_8 => dec_pc_plus_8, + dec_pc_plus_4 => dec_pc_plus_4, + + inst_data => inst_data, + decode_blocked_n => decode_blocked_n, + + rfile_A_adr => rfile_A_adr, + rfile_B_adr => rfile_B_adr, + rfile_C_adr => rfile_C_adr, + + exe_A_adr => exe_A_adr, + exe_B_adr => exe_B_adr, + exe_C_adr => exe_C_adr, + exe_pc_plus_4 => exe_pc_plus_4, + exe_pc_plus_8 => exe_pc_plus_8, + + exe_stage_valid => exe_stage_valid, + exe_barrelshift_operand => exe_barrelshift_operand, + exe_barrelshift_type => exe_barrelshift_type, + exe_literal_shift_amnt => exe_literal_shift_amnt, + exe_literal_data => exe_literal_data, + exe_opb_is_literal => exe_opb_is_literal, + exe_opb_sel => exe_opb_sel, + exe_alu_operation => exe_alu_operation, + exe_condition => exe_condition, + exe_affect_sflags => exe_affect_sflags, + exe_data_sel => exe_data_sel, + exe_rdest_wren => exe_rdest_wren, + exe_rdest_adr => exe_rdest_adr, + exe_branch_en => exe_branch_en, + exe_wb_sel => exe_wb_sel, + exe_mem_ctrl => exe_mem_ctrl, + exe_mem_burstcount => exe_mem_burstcount, + + decode_latch_enable => decode_latch_enable + ); + + e : entity work.execute(rtl) port map + ( + clk => clk, + n_reset => n_reset, + + exe_A_adr => exe_A_adr, + exe_B_adr => exe_B_adr, + exe_C_adr => exe_C_adr, + exe_stage_valid => exe_stage_valid, + exe_barrelshift_operand => exe_barrelshift_operand, + exe_barrelshift_type => exe_barrelshift_type, + exe_literal_shift_amnt => exe_literal_shift_amnt, + exe_literal_data => exe_literal_data, + exe_opb_is_literal => exe_opb_is_literal, + exe_opb_sel => exe_opb_sel, + exe_alu_operation => exe_alu_operation, + exe_condition => exe_condition, + exe_affect_sflags => exe_affect_sflags, + exe_data_sel => exe_data_sel, + exe_rdest_wren => exe_rdest_wren, + exe_rdest_adr => exe_rdest_adr, + exe_branch_en => exe_branch_en, + exe_wb_sel => exe_wb_sel, + exe_mem_ctrl => exe_mem_ctrl, + exe_mem_burstcount => exe_mem_burstcount, + + exe_pc_plus_4 => exe_pc_plus_4, + exe_pc_plus_8 => exe_pc_plus_8, + + rfile_A_data => rfile_A_data, + rfile_B_data => rfile_B_data, + rfile_C_data => rfile_C_data, + + fwd_wb2_enable => fwd_wb2_enable, + fwd_wb2_address => fwd_wb2_address, + fwd_wb2_data => fwd_wb2_data, + fwd_wb1_enable => rfile_wr_enable, + fwd_wb1_address => rfile_address, + fwd_wb1_data => wb_exe_data, + fwd_wb1_is_invalid => wb_wb_sel, + fwd_mem_enable => fwd_mem_enable, + fwd_mem_address => fwd_mem_address, + fwd_mem_data => fwd_mem_data, + fwd_mem_is_invalid => mem_wb_sel, + + mem_stage_valid => mem_stage_valid, + mem_rdest_wren => mem_rdest_wren, + mem_rdest_adr => mem_rdest_adr, + mem_branch_en => mem_branch_en, + mem_wb_sel => mem_wb_sel, + mem_exe_data => mem_exe_data, + mem_wrdata => mem_wrdata, + mem_mem_ctrl => mem_mem_ctrl, + mem_mem_burstcount => mem_mem_burstcount, + + low_flags => low_flags, + exe_PC_wrdata => exe_PC_wrdata, + exe_PC_wr => exe_PC_wr, + + exe_blocked_n => exe_blocked_n, + exe_latch_enable => exe_latch_enable + ); + + m : entity work.memory(rtl) port map + ( + + clk => clk, + reset_n => n_reset, + + mem_stage_valid => mem_stage_valid, + mem_rdest_wren => mem_rdest_wren, + mem_rdest_adr => mem_rdest_adr, + mem_branch_en => mem_branch_en, + mem_wb_sel => mem_wb_sel, + mem_exe_data => mem_exe_data, + mem_wrdata => mem_wrdata, + mem_mem_ctrl => mem_mem_ctrl, + mem_mem_burstcount => mem_mem_burstcount, + + wb_stage_valid => wb_stage_valid, + wb_rdest_wren => wb_rdest_wren, + wb_rdest_adr => wb_rdest_adr, + wb_branch_en => wb_branch_en, + wb_wb_sel => wb_wb_sel, + wb_exe_data => wb_exe_data, + wb_mem_ctrl => wb_mem_ctrl, + + fwd_mem_enable => fwd_mem_enable, + fwd_mem_address => fwd_mem_address, + fwd_mem_data => fwd_mem_data, + + avm_data_waitrequest => avm_data_waitrequest, + avm_data_read => avm_data_read, + avm_data_writedata => avm_data_writedata, + avm_data_write => avm_data_write, + avm_data_byteen => avm_data_byteen, + avm_data_burstcount => avm_data_burstcount, + avm_data_address => avm_data_address, + + mem_blocked_n => mem_blocked_n, + mem_latch_enable => mem_latch_enable + ); + + w : entity work.writeback(rtl) port map + ( + clk => clk, + + wb_stage_valid => wb_stage_valid, + wb_rdest_wren => wb_rdest_wren, + wb_rdest_adr => wb_rdest_adr, + wb_branch_en => wb_branch_en, + wb_wb_sel => wb_wb_sel, + wb_exe_data => wb_exe_data, + wb_mem_ctrl => wb_mem_ctrl, + + rfile_wr_enable => rfile_wr_enable, + rfile_address => rfile_address, + wb_data => wb_data, + + fwd_wb2_enable => fwd_wb2_enable, + fwd_wb2_address => fwd_wb2_address, + fwd_wb2_data => fwd_wb2_data, + + avm_data_readdatavalid => avm_data_readdatavalid, + avm_data_readdata => avm_data_readdata, + + wb_pc_wr => wb_pc_wr, + wb_blocked_n => wb_blocked_n + ); + + rf : entity work.register_file(synth) port map + ( + clk => clk, + aa => rfile_A_adr, + ab => rfile_B_adr, + ac => rfile_C_adr, + aw => rfile_address, + wren => rfile_wr_enable, + wrdata => wb_data, + a => rfile_A_data, + b => rfile_B_data, + c => rfile_C_data, + rd_clken => decode_latch_enable + ); + + fiq <= inr_irq(0); + irq <= '0' when inr_irq(31 downto 1) = (31 downto 1 => '0') else '1'; + + + fetch_stage_en <= fetch_latch_enable; + fetch_latch_enable <= decode_latch_enable and decode_blocked_n; + decode_latch_enable <= exe_latch_enable and exe_blocked_n; + exe_latch_enable <= mem_latch_enable and mem_blocked_n; + mem_latch_enable <= wb_blocked_n; + + pc_wrdata <= exe_pc_wrdata when exe_pc_wr = '1' else unsigned(wb_data); + pc_wr <= exe_pc_wr or wb_pc_wr; +end architecture bench; \ No newline at end of file
trunk/hdl/cpu.vhd Property changes : Added: svn:executable ## -0,0 +1 ## +* \ No newline at end of property Index: trunk/hdl/memory.vhd =================================================================== --- trunk/hdl/memory.vhd (nonexistent) +++ trunk/hdl/memory.vhd (revision 2) @@ -0,0 +1,198 @@ +-- This file is part of ARM4U CPU +-- +-- This is a creation of the Laboratory of Processor Architecture +-- of Ecole Polytechnique Fédérale de Lausanne ( http://lap.epfl.ch ) +-- +-- memory.vhd -- Describes the memory pipeline stage +-- +-- Written By - Jonathan Masur and Xavier Jimenez (2013) +-- +-- This program is free software; you can redistribute it and/or modify it +-- under the terms of the GNU General Public License as published by the +-- Free Software Foundation; either version 2, or (at your option) any +-- later version. +-- +-- This program is distributed in the hope that it will be useful, +-- but WITHOUT ANY WARRANTY; without even the implied warranty of +-- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +-- GNU General Public License for more details. +-- +-- In other words, you are welcome to use, share and improve this program. +-- You are forbidden to forbid anyone else to use, share and improve +-- what you give them. Help stamp out software-hoarding! + +library ieee; +use ieee.std_logic_1164.all; +use ieee.numeric_std.all; +use work.arm_types.all; + +entity memory is + port( + clk : in std_logic; + reset_n : in std_logic; + + mem_stage_valid : in std_logic; + mem_rdest_wren : in std_logic; + mem_rdest_adr : in std_logic_vector(4 downto 0); + mem_branch_en : in std_logic; + mem_wb_sel : in std_logic; + mem_exe_data : in std_logic_vector(31 downto 0); + mem_wrdata : in std_logic_vector(31 downto 0); + mem_mem_ctrl : in MEM_OPERATION; + mem_mem_burstcount : in std_logic_vector(3 downto 0); + + wb_stage_valid : out std_logic; + wb_rdest_wren : out std_logic; + wb_rdest_adr : out std_logic_vector(4 downto 0); + wb_branch_en : out std_logic; + wb_wb_sel : out std_logic; + wb_exe_data : out std_logic_vector(31 downto 0); + wb_mem_ctrl : out MEM_OPERATION; + + fwd_mem_enable : out std_logic; + fwd_mem_address : out std_logic_vector(4 downto 0); + fwd_mem_data : out std_logic_vector(31 downto 0); + + avm_data_waitrequest : in std_logic; + avm_data_read : out std_logic; + avm_data_writedata : out std_logic_vector(31 downto 0); + avm_data_write : out std_logic; + avm_data_byteen : out std_logic_vector(3 downto 0); + avm_data_burstcount : out std_logic_vector(4 downto 0); + avm_data_address : out std_logic_vector(31 downto 0); + + mem_blocked_n : out std_logic; + mem_latch_enable : in std_logic + ); +end entity; + +architecture rtl of memory is + + signal avalon_acknowledge : std_logic; + + function get_byteen(adr: std_logic_vector) return std_logic_vector is + begin + -- Assuming little endian memory + case adr(1 downto 0) is + when "00" => return "0001"; + when "01" => return "0010"; + when "10" => return "0100"; + when others => return "1000"; + end case; + end; + +begin + process(clk, reset_n) is + begin + if reset_n = '0' + then + wb_stage_valid <= '0'; + elsif rising_edge(clk) + then + if mem_latch_enable = '1' + then + if mem_mem_ctrl = NO_MEM_OP or mem_mem_ctrl = LOAD_BURST + then + wb_stage_valid <= mem_stage_valid; + else + wb_stage_valid <= mem_stage_valid and (not avm_data_waitrequest or avalon_acknowledge); + end if; + end if; + end if; + end process; + + -- output latch + process(clk) is + begin + if rising_edge(clk) + then + if mem_latch_enable = '1' + then + wb_rdest_wren <= mem_rdest_wren; + wb_rdest_adr <= mem_rdest_adr; + wb_branch_en <= mem_branch_en; + wb_wb_sel <= mem_wb_sel; + wb_exe_data <= mem_exe_data; + wb_mem_ctrl <= mem_mem_ctrl; + end if; + end if; + end process; + + -- forwarding + fwd_mem_enable <= mem_rdest_wren and mem_stage_valid; + fwd_mem_address <= mem_rdest_adr; + fwd_mem_data <= mem_exe_data; + + -- avalon master + avm_data_address <= mem_exe_data; + process(mem_mem_ctrl, mem_mem_burstcount, mem_wrdata, mem_stage_valid, mem_exe_data, avm_data_waitrequest, avalon_acknowledge) is + begin + avm_data_read <= '0'; + avm_data_write <= '0'; + avm_data_writedata <= (others => '-'); + avm_data_byteen <= (others => '-'); + mem_blocked_n <= '1'; + + -- 0 actually means a burst count of 16 bytes + if mem_mem_burstcount = "0000" + then + avm_data_burstcount <= "10000"; + else + avm_data_burstcount <= '0' & mem_mem_burstcount; + end if; + + if avalon_acknowledge = '0' + then + case mem_mem_ctrl is + when NO_MEM_OP => null; + + when LOAD_WORD => + avm_data_read <= mem_stage_valid; + avm_data_write <= '0'; + avm_data_byteen <= "1111"; + + mem_blocked_n <= not avm_data_waitrequest or not mem_stage_valid; + + when LOAD_BYTE => + avm_data_read <= mem_stage_valid; + avm_data_write <= '0'; + avm_data_byteen <= get_byteen(mem_exe_data(1 downto 0)); + mem_blocked_n <= not avm_data_waitrequest or not mem_stage_valid; + + when LOAD_BURST => null; + + when STORE_WORD => + avm_data_read <= '0'; + avm_data_write <= mem_stage_valid; + avm_data_writedata <= mem_wrdata; + avm_data_byteen <= "1111"; + + mem_blocked_n <= not avm_data_waitrequest or not mem_stage_valid; + + when STORE_BYTE => + avm_data_read <= '0'; + avm_data_write <= mem_stage_valid; + -- Byte enable signals + avm_data_byteen <= get_byteen(mem_exe_data(1 downto 0)); + -- Byte repetition + avm_data_writedata <= mem_wrdata(7 downto 0) & mem_wrdata(7 downto 0) & mem_wrdata(7 downto 0) & mem_wrdata(7 downto 0); + mem_blocked_n <= not avm_data_waitrequest or not mem_stage_valid; + end case; + end if; + end process; + + -- Is nessesary to prevent multiple reads/writes to avalon bus when the WB stage is blocked (mem latch disabled) + process(clk) is + begin + if rising_edge(clk) + then + if mem_latch_enable = '1' + then + avalon_acknowledge <= '0'; + else + avalon_acknowledge <= not avm_data_waitrequest or avalon_acknowledge; + end if; + end if; + end process; + +end architecture; \ No newline at end of file
trunk/hdl/memory.vhd Property changes : Added: svn:executable ## -0,0 +1 ## +* \ No newline at end of property Index: trunk/hdl/fetch.vhd =================================================================== --- trunk/hdl/fetch.vhd (nonexistent) +++ trunk/hdl/fetch.vhd (revision 2) @@ -0,0 +1,109 @@ +-- This file is part of ARM4U CPU +-- +-- This is a creation of the Laboratory of Processor Architecture +-- of Ecole Polytechnique Fédérale de Lausanne ( http://lap.epfl.ch ) +-- +-- fetch.vhd -- Descrption of the fetch pipeline stage +-- +-- Written By - Jonathan Masur and Xavier Jimenez (2013) +-- +-- This program is free software; you can redistribute it and/or modify it +-- under the terms of the GNU General Public License as published by the +-- Free Software Foundation; either version 2, or (at your option) any +-- later version. +-- +-- This program is distributed in the hope that it will be useful, +-- but WITHOUT ANY WARRANTY; without even the implied warranty of +-- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +-- GNU General Public License for more details. +-- +-- In other words, you are welcome to use, share and improve this program. +-- You are forbidden to forbid anyone else to use, share and improve +-- what you give them. Help stamp out software-hoarding! + + +library ieee; +use ieee.std_logic_1164.all; +use ieee.numeric_std.all; + +entity fetch is + port( + clk : in std_logic; + n_reset : in std_logic; + + -- port to write to the programm counter + pc_wr : in std_logic := '0'; + pc_wrdata : in unsigned(31 downto 0) := (others => '0'); + + -- enable the fetch stage + fetch_stage_en : in std_logic; + + -- flush output for following pipeline stages + -- (activated on PC writes) + flush : out std_logic; + + -- enable the next stage (out) + decode_stage_valid : out std_logic; + dec_pc_plus_8 : out unsigned(31 downto 0); + dec_pc_plus_4 : out unsigned(31 downto 0); + + -- memory bus + inst_cache_adr : out std_logic_vector(31 downto 0); + inst_cache_rd : out std_logic; + + -- enable signal for latch after the fetch stage + fetch_latch_enable : in std_logic + ); +end entity; + +architecture rtl of fetch is + signal pc : unsigned(31 downto 0); + signal pc4 : unsigned(31 downto 0); + signal cur_pc : unsigned(31 downto 0); + signal flush_r, flush_s : std_logic; +begin + flush <= flush_s; + -- flush the pipeline on writes (including reset and cases when a flush occurs during a miss) + flush_s <= '1' when pc_wr = '1' or flush_r = '1' else '0'; + + cur_pc <= pc_wrdata when pc_wr = '1' else pc; + inst_cache_adr <= std_logic_vector(cur_pc); + + -- handles the reading of the instruction cache memory + inst_cache_rd <= fetch_stage_en; + + -- computation of next PC value (async) + pc4 <= cur_pc + 4; + + dec_pc_plus_8 <= pc4; + dec_pc_plus_4 <= pc; + + -- handles resets and fetch latch at output of the stage + fetchlatch: + process(n_reset, clk) is + begin + if n_reset='0' + then + pc <= (others => '0'); -- reset address is 0x000000 + decode_stage_valid <= '0'; + flush_r <= '0'; + elsif rising_edge(clk) + then + if fetch_stage_en = '1' + then + pc <= pc4; + else + pc <= cur_pc; + end if; + + if fetch_latch_enable = '1' + then + flush_r <= '0'; + decode_stage_valid <= fetch_stage_en; + else + flush_r <= flush_s; + end if; + end if; + end process; + +end architecture; \ No newline at end of file
trunk/hdl/fetch.vhd Property changes : Added: svn:executable ## -0,0 +1 ## +* \ No newline at end of property Index: trunk/hdl/writeback.vhd =================================================================== --- trunk/hdl/writeback.vhd (nonexistent) +++ trunk/hdl/writeback.vhd (revision 2) @@ -0,0 +1,110 @@ +-- This file is part of ARM4U CPU +-- +-- This is a creation of the Laboratory of Processor Architecture +-- of Ecole Polytechnique Fédérale de Lausanne ( http://lap.epfl.ch ) +-- +-- writeback.vhd -- Description of the writeback pipeline stage +-- +-- Written By - Jonathan Masur and Xavier Jimenez (2013) +-- +-- This program is free software; you can redistribute it and/or modify it +-- under the terms of the GNU General Public License as published by the +-- Free Software Foundation; either version 2, or (at your option) any +-- later version. +-- +-- This program is distributed in the hope that it will be useful, +-- but WITHOUT ANY WARRANTY; without even the implied warranty of +-- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +-- GNU General Public License for more details. +-- +-- In other words, you are welcome to use, share and improve this program. +-- You are forbidden to forbid anyone else to use, share and improve +-- what you give them. Help stamp out software-hoarding! + +library ieee; +use ieee.std_logic_1164.all; +use ieee.numeric_std.all; +use work.arm_types.all; + +entity writeback is + port( + clk : in std_logic; + + wb_stage_valid : in std_logic; + wb_rdest_wren : in std_logic; + wb_rdest_adr : in std_logic_vector(4 downto 0); + wb_branch_en : in std_logic; + wb_wb_sel : in std_logic; + wb_exe_data : in std_logic_vector(31 downto 0); + wb_mem_ctrl : in MEM_OPERATION; + + rfile_wr_enable : out std_logic; + rfile_address : out std_logic_vector(4 downto 0); + wb_data : out std_logic_vector(31 downto 0); + + fwd_wb2_enable : out std_logic; + fwd_wb2_address : out std_logic_vector(4 downto 0); + fwd_wb2_data : out std_logic_vector(31 downto 0); + + avm_data_readdatavalid : in std_logic; + avm_data_readdata : in std_logic_vector(31 downto 0); + + wb_pc_wr : out std_logic; + wb_blocked_n : out std_logic + ); +end entity; + +architecture rtl of writeback is + signal outdata : std_logic_vector(31 downto 0); + signal avalon_data : std_logic_vector(31 downto 0); + signal rd_ok : std_logic; + +begin + -- 0 if the stage should stall because read data is not valid + rd_ok <= avm_data_readdatavalid when wb_mem_ctrl = LOAD_WORD or wb_mem_ctrl = LOAD_BYTE or wb_mem_ctrl = LOAD_BURST else '1'; + wb_blocked_n <= rd_ok or not wb_stage_valid; + + -- write to PC on branches from avalon data + wb_pc_wr <= wb_branch_en and wb_wb_sel and wb_stage_valid and rd_ok; + + -- output MUX between avalon data and execute data + outdata <= wb_exe_data when wb_wb_sel = '0' else avalon_data; + + -- register file signals (also writeback 1 forwarding path) + rfile_wr_enable <= wb_rdest_wren and wb_stage_valid; + rfile_address <= wb_rdest_adr; + wb_data <= outdata; + + avm : process(wb_exe_data, avm_data_readdata, wb_mem_ctrl) is + begin + -- convert byte->word if a load byte command + if wb_mem_ctrl = LOAD_BYTE + then + case wb_exe_data(1 downto 0) is + when "00" => + avalon_data <= (31 downto 8 => '0') & avm_data_readdata(7 downto 0); + when "01" => + avalon_data <= (31 downto 8 => '0') & avm_data_readdata(15 downto 8); + when "10" => + avalon_data <= (31 downto 8 => '0') & avm_data_readdata(23 downto 16); + when others => + avalon_data <= (31 downto 8 => '0') & avm_data_readdata(31 downto 24); + end case; + else + -- else data just goes through + avalon_data <= avm_data_readdata; + end if; + end process; + + -- register for writeback2 forwarding path + process(clk) is + begin + if rising_edge(clk) + then + fwd_wb2_enable <= wb_rdest_wren and wb_stage_valid; + fwd_wb2_address <= wb_rdest_adr; + fwd_wb2_data <= outdata; + end if; + end process; + +end architecture; \ No newline at end of file
trunk/hdl/writeback.vhd Property changes : Added: svn:executable ## -0,0 +1 ## +* \ No newline at end of property Index: trunk/readme.txt =================================================================== --- trunk/readme.txt (nonexistent) +++ trunk/readme.txt (revision 2) @@ -0,0 +1,97 @@ + ============================================ +=== ARM4U Doccumentation === += By Jonathan Masur, 2014 == += Made in spring 2014 for OpenCores release == + ============================================ + + **************** +** Introduction ** + **************** + +ARM4U is a "softcore" processor that was created in the context of an university project in the processor architecture laboratory at Ecole Polytechnique Fédérale de Lausanne ( http://lap.epfl.ch ) + +We decided, one year after the complexion of the project, to release the processor on the site OpenCores ( http://www.opencores.org ) for free under the GPL licence in order to make the source code and doccumentation available to the general public. It comes as-it with ABSOLUTELY NO WARRANTY. + +The ARM4U processor clones early ARM processors in functionality, it implements the almost full ARMv3 instruction set, and can be targeted by the GCC toolchain. It is free for use and distribute for anyone. However, if someone ever make a cool use of this processor, I would of course be very happy to know about it. +This documentation doesn't cover the ARM by itself, for most info about the inner working of the processor (instruction set, etc...) please consult doccumentation of the ARM processors. This doccumentation instead covers how to use the softcore and what are the difference between it and a geniune ARM. + + ************************************** +** Internal workings of the processor ** + ************************************* + +The processor works with a classical 5-stage RISC pipeline (Fetch, Decode, Execute, Memory, Writeback). +Since a drawing is worth a thousand words, schematics of the processor are joined. PLEASE CONSULT THE SCHEMATICS FOR UNDERSTANDING THE INNER WORKING OF THE PROCESSOR. + +The processor was not build for extreme performance, nor for extreme minimization of FPGA resources. Instead it was build with the 3 goals of : simplicity, pedagogy, but fully working and usable result. + +The CPU communicates with the external world (memory, I/O, etc...) through the Altera Avalon bus. The CPU can be used as a QSys component, just like the NIOS II processor furnished by Altera. However, it should be relatively straightforward to adapt it to another bus. We managed to synthesize a 50 MHz version using a Cyclone IV FPGA. The resource usage was only slightly larger than a NIOS II/s (standard), but the frequency was lower. However, the ARM instructions are more dense and efficient overal, and we can expect comparable performance between both CPU. No benchmarks were made to proof that. + +The instruction cache allows to fetch instructions while reading/writing to memory, and to fetch a new instruction each cycle (hopefully) even if the memory has a read/write latency (DRAM). +There is no cache coherency : an attempt to write self-modifying code will not work unless some additional circuitery is added done. + + ************************************* +** Differences with an authentic ARM ** + ************************************* + +The ARM4U behaves identically to an ARM implementing the ARMv3 instruction set (ARM6 generation) exept for the following differences : + +- Abort mode and interrupt doesn't exist +- There is no support for coprocessor, and related instructions +- There is no 24-bit (ARMv2) compatibility mode +- The 'msr' instruction always affect all status flags (you can't limit it to a part of the flags, leaving other flags unaffected) +- When an interrupt occurs, the status flags takes an hard-coded values. For conditional flags, this shouldn't be a problem, the only major difference is that the 'F' flag is cleared when an IRQ triggers, in other words, FIQs are enabled whenever an IRQ happens +- R15 (PC+8) can be used as an input for every instructions, and will always produce correct results, even when doing so is forbidden on an authentic ARM +- 'mul' and 'mla' instructions can be used for all operands and will always produce correct results, even when doing so is forbidden on an authentic ARM +- 'mlas' instruction will affect the overflow and carry flags based on the addition operation +- 'swap' and 'swapb' instructions are absent + + ************** +** Interrupts ** + ************** + +The following interrupts are supported + +- Reset +- IRQ +- FIQ ("fast IRQ") +- Software interrupt ('swi' instruction) +- Undefined instruction trap (any instruction not implemented) + +The vectors, register bank switching, PSW and PC saving words exactly the same as on an authentic ARM. Other kinds of interrupts (namely, "abort") aren't supported. + + ******************* +** Compiling notes ** + ******************* + +1) With GCC +VERY IMPORTANT : Always use command line options --fix-v4bx and -march=armv3 when compiling code for the ARM4U with GCC ! + +When compiling C code, use -Xassembler --fix-v4bx instead of plain --fix-v4bx + +According to our tests and experiences, the difference between this processor and a genuine ARMv3 instruction set is normally too subtle to make compiled C code fail, but the CPU comes with *absolutely no warranty*. + +2) With other compiler/assembler +Consult your compiler's documentation and make sure that no "new" instructions from more recent instruction sets than ARMv3 are ever used. It's possible to simulate them with the undefined instruction trap, too. + +3) A note about endianness : +This CPU has been made "little endian", in the sense that individual byte access to memory are made in that order on the bus. That would be trivial to change by affecting the "memory.vhd" file, lines 77-80. + +However, because of conversion issues of .hex files between 32-bit .hex files and 8-bit .hex files inside the Altera Quartus program, we had to use -EB option as well, in order to make the generated binary code appear in big endian in the hex file. The processor itself is not big endian. As far as we know, the -EB option in GCC has only 2 effects : + +1) The generated file (either binary, hex, or object file) is written in the corresponding order +2) A bit in object file's header is affected so that it prevents linking big and little endian object files together +The -EB file doesn't affect the compiled code itself in any way, as far as we know. + + ***************** +** Test programm ** + ***************** + +A test program using all ARM instructions is included as an example, it was used to debug and proof correct operation of the processor. + +Unfortunately the processor doesn't come with any debugger, so FPGA usage is a bit painful, as the whole hardware has to be re-downloaded for each change in the program, and the only way to debug program is using output LEDs or anything similar. + + *********** +** Contact ** + *********** + +Contact me at jmasur [at] bluewin [dot] ch if needed. \ No newline at end of file
trunk/readme.txt Property changes : Added: svn:executable ## -0,0 +1 ## +* \ No newline at end of property

powered by: WebSVN 2.1.0

© copyright 1999-2024 OpenCores.org, equivalent to Oliscience, all rights reserved. OpenCores®, registered trademark.