OpenCores
URL https://opencores.org/ocsvn/lxp32/lxp32/trunk

Subversion Repositories lxp32

Compare Revisions

  • This comparison shows the changes necessary to convert path
    /lxp32/trunk
    from Rev 6 to Rev 2
    Reverse comparison

Rev 6 → Rev 2

/tools/src/lxp32asm/assembler.h
41,7 → 41,6
std::vector<std::string> _currentLabels;
std::string _currentFileName;
std::vector<std::string> _includeSearchDirs;
std::vector<std::string> _exportedSymbols;
public:
void processFile(const std::string &filename);
81,7 → 80,6
void encodeJmp(const TokenList &list);
void encodeIret(const TokenList &list);
void encodeLc(const TokenList &list);
void encodeLcs(const TokenList &list);
void encodeLsb(const TokenList &list);
void encodeLub(const TokenList &list);
void encodeLw(const TokenList &list);
89,7 → 87,6
void encodeModu(const TokenList &list);
void encodeMov(const TokenList &list);
void encodeMul(const TokenList &list);
void encodeNeg(const TokenList &list);
void encodeNop(const TokenList &list);
void encodeNot(const TokenList &list);
void encodeOr(const TokenList &list);
/tools/src/lxp32asm/outputwriter.h
19,15 → 19,11
*/
 
class OutputWriter {
std::size_t _size=0;
public:
virtual ~OutputWriter() {}
virtual void write(const char *data,std::size_t n);
virtual void write(const char *data,std::size_t n)=0;
virtual void abort() {}
void pad(std::size_t size);
std::size_t size() const;
protected:
virtual void writeData(const char *data,std::size_t n)=0;
};
 
/*
39,9 → 35,8
std::ofstream _os;
public:
BinaryOutputWriter(const std::string &filename);
virtual void write(const char *data,std::size_t n) override;
virtual void abort() override;
protected:
virtual void writeData(const char *data,std::size_t n) override;
};
 
/*
59,9 → 54,8
public:
TextOutputWriter(const std::string &filename,Format f);
~TextOutputWriter();
virtual void write(const char *data,std::size_t n) override;
virtual void abort() override;
protected:
virtual void writeData(const char *data,std::size_t n) override;
};
 
#endif
/tools/src/lxp32asm/linker.cpp
9,7 → 9,6
#include "linker.h"
 
#include "linkableobject.h"
#include "utils.h"
 
#include <iostream>
#include <fstream>
17,7 → 16,6
#include <map>
#include <stdexcept>
#include <cassert>
#include <algorithm>
 
void Linker::addObject(LinkableObject &obj) {
_objects.push_back(&obj);
31,8 → 29,14
// Determine entry point
if(_objects.size()==1) _entryObject=_objects[0];
else if(_entryObject==nullptr)
throw std::runtime_error("Entry point not defined: cannot find \"entry\" or \"Entry\" symbol");
else {
auto const it=_globalSymbolTable.find("entry");
if(it==_globalSymbolTable.end())
throw std::runtime_error("Entry point not defined: cannot find \"entry\" symbol");
if(it->second.rva!=0)
throw std::runtime_error(it->second.obj->name()+": Entry point must refer to the start of the object");
_entryObject=it->second.obj;
}
// Assign virtual addresses
placeObjects();
42,7 → 46,6
// Write binary data
writeObjects(writer);
_bytesWritten=writer.size();
}
 
void Linker::setBase(LinkableObject::Word base) {
57,41 → 60,6
_imageSize=size;
}
 
void Linker::generateMap(std::ostream &s) {
// Calculate the maximum length of a symbol name
std::size_t len=0;
for(auto const &obj: _objects) {
for(auto const &sym: obj->symbols()) {
if(sym.second.type!=LinkableObject::Imported)
len=std::max(len,sym.first.size());
}
}
len=std::max(len+3,std::size_t(8)); // width of the first column
s<<"Image base address: "<<Utils::hex(_base)<<std::endl;
s<<"Object alignment: "<<_align<<std::endl;
s<<"Image size: "<<(_bytesWritten/4)<<" words"<<std::endl;
s<<"Number of objects: "<<_objects.size()<<std::endl;
s<<std::endl;
for(auto const &obj: _objects) {
s<<"Object \""<<obj->name()<<"\" at address "<<Utils::hex(obj->virtualAddress())<<std::endl;
s<<std::endl;
std::multimap<LinkableObject::Word,std::pair<std::string,LinkableObject::SymbolData> > sorted;
for(auto const &sym: obj->symbols()) sorted.emplace(sym.second.rva,sym);
for(auto const &sym: sorted) {
if(sym.second.second.type==LinkableObject::Imported) continue;
s<<sym.second.first;
s<<std::string(len-sym.second.first.size(),' ');
s<<Utils::hex(obj->virtualAddress()+sym.second.second.rva);
if(sym.second.second.type==LinkableObject::Local) s<<" Local";
else s<<" Exported";
s<<std::endl;
}
s<<std::endl;
}
}
 
/*
* Private members
*/
99,31 → 67,14
void Linker::buildSymbolTable() {
_globalSymbolTable.clear();
// Build a table of exported symbols from all modules
for(auto const &obj: _objects) {
auto const &table=obj->symbols();
for(auto const &item: table) {
if((item.first=="entry"||item.first=="Entry")&&item.second.type!=LinkableObject::Imported) {
if(_entryObject) {
std::ostringstream msg;
msg<<obj->name()<<": Duplicate definition of the entry symbol ";
msg<<"(previously defined in "<<_entryObject->name()<<")";
throw std::runtime_error(msg.str());
}
if(item.second.rva!=0) {
std::ostringstream msg;
msg<<obj->name()<<": ";
msg<<"Entry point must refer to the start of the object";
throw std::runtime_error(msg.str());
}
_entryObject=obj;
}
if(item.second.type==LinkableObject::Local) continue;
// Insert item to the global symbol table if it doesn't exist yet
auto it=_globalSymbolTable.emplace(item.first,GlobalSymbolData()).first;
 
// Check that the symbol has not been already defined in another object
if(item.second.type==LinkableObject::Exported) {
// If the symbol is local, check that it has not been already defined in another object
if(item.second.type==LinkableObject::Local) {
if(it->second.obj) {
std::ostringstream msg;
msg<<obj->name()<<": Duplicate definition of \""<<item.first;
133,28 → 84,12
it->second.obj=obj;
it->second.rva=item.second.rva;
}
if(!item.second.refs.empty()) it->second.refs.insert(obj);
 
// Merge reference tables
for(auto const &ref: item.second.refs) it->second.refs.emplace(obj,ref.rva);
}
}
// Check that local symbols don't shadow the public ones
for(auto const &obj: _objects) {
auto const &table=obj->symbols();
for(auto const &item: table) {
if(item.second.type!=LinkableObject::Local) continue;
auto it=_globalSymbolTable.find(item.first);
if(it==_globalSymbolTable.end()) continue;
if(!it->second.obj) continue;
if(item.first==it->first) {
std::ostringstream msg;
msg<<obj->name()<<": Local symbol \""<<item.first<<"\" shadows the public one ";
msg<<"(defined in "<<it->second.obj->name()<<")";
throw std::runtime_error(msg.str());
}
}
}
// Check that no undefined symbols remain
for(auto const &item: _globalSymbolTable) {
if(item.second.obj==nullptr&&!item.second.refs.empty()) {
161,7 → 96,7
std::ostringstream msg;
msg<<"Undefined symbol: \""<<item.first<<"\"";
auto const it=item.second.refs.begin();
msg<<" (referenced from "<<(*it)->name()<<")";
msg<<" (referenced from "<<it->first->name()<<")";
throw std::runtime_error(msg.str());
}
}
174,31 → 109,12
if(_objects.size()>1) {
for(auto it=_objects.begin();it!=_objects.end();++it) {
if(*it==_entryObject) {
_objects.erase(it);
std::swap(*it,_objects[0]);
break;
}
}
_objects.insert(_objects.begin(),_entryObject);
}
// Remove unreferenced objects
if(_objects.size()>1) {
std::set<const LinkableObject*> used;
markAsUsed(_objects[0],used);
for(auto it=_objects.begin();it!=_objects.end();) {
if(used.find(*it)==used.end()) {
std::cerr<<"Linker warning: skipping an unreferenced object \"";
std::cerr<<(*it)->name()<<"\""<<std::endl;
for(auto sym=_globalSymbolTable.begin();sym!=_globalSymbolTable.end();) {
if(sym->second.obj==*it) sym=_globalSymbolTable.erase(sym);
else ++sym;
}
it=_objects.erase(it);
}
else ++it;
}
}
// Set base addresses
for(auto it=_objects.begin();it!=_objects.end();++it) {
(*it)->setVirtualAddress(currentBase);
210,33 → 126,14
 
void Linker::relocateObject(LinkableObject *obj) {
for(auto const &sym: obj->symbols()) {
LinkableObject::Word addr;
if(sym.second.refs.empty()) continue;
if(sym.second.type==LinkableObject::Local) addr=obj->virtualAddress()+sym.second.rva;
else {
auto it=_globalSymbolTable.find(sym.first);
assert(it!=_globalSymbolTable.end());
assert(it->second.obj);
addr=it->second.obj->virtualAddress()+it->second.rva;
}
auto it=_globalSymbolTable.find(sym.first);
assert(it!=_globalSymbolTable.end());
if(it->second.refs.empty()) continue;
assert(it->second.obj);
auto addr=it->second.obj->virtualAddress()+it->second.rva;
for(auto const &ref: sym.second.refs) {
if(ref.type==LinkableObject::Regular) obj->replaceWord(ref.rva,addr+ref.offset);
else {
auto target=static_cast<LinkableObject::Word>(addr+ref.offset);
if(target>0xFFFFF&&target<0xFFF00000) {
std::ostringstream msg;
msg<<"Address 0x"<<Utils::hex(target)<<" is out of the range for a short reference";
msg<<" (referenced from "<<ref.source<<":"<<ref.line<<")";
throw std::runtime_error(msg.str());
}
target&=0x1FFFFF;
auto w=obj->getWord(ref.rva);
w|=(target&0xFFFF);
w|=((target<<8)&0x1F000000);
obj->replaceWord(ref.rva,w);
}
auto offset=obj->getWord(ref.rva);
obj->replaceWord(ref.rva,addr+offset);
}
}
}
260,13 → 157,3
else if(currentSize<_imageSize) writer.pad(_imageSize-currentSize);
}
}
 
void Linker::markAsUsed(const LinkableObject *obj,std::set<const LinkableObject*> &used) {
if(used.find(obj)!=used.end()) return; // already processed
used.insert(obj);
for(auto const &sym: _globalSymbolTable) {
for(auto const &ref: sym.second.refs) {
if(ref==obj) markAsUsed(sym.second.obj,used);
}
}
}
/tools/src/lxp32asm/main.cpp
8,7 → 8,6
 
#include "assembler.h"
#include "linker.h"
#include "utils.h"
 
#include <iostream>
#include <fstream>
26,7 → 25,6
bool compileOnly=false;
std::string outputFileName;
std::string mapFileName;
std::vector<std::string> includeSearchDirs;
LinkableObject::Word base=0;
std::size_t align=4;
40,7 → 38,7
os<<" "<<program<<" [ option(s) | input file(s) ]"<<std::endl<<std::endl;
os<<"Options:"<<std::endl;
os<<" -a <align> Object alignment (default: 4)"<<std::endl;
os<<" -a <align> Section alignment (default: 4)"<<std::endl;
os<<" -b <addr> Base address (default: 0)"<<std::endl;
os<<" -c Compile only (don't link)"<<std::endl;
os<<" -f <fmt> Output file format (see below)"<<std::endl;
47,14 → 45,12
os<<" -h, --help Display a short help message"<<std::endl;
os<<" -i <dir> Add directory to the list of directories used to search"<<std::endl;
os<<" for included files (multiple directories can be specified)"<<std::endl;
os<<" -m <file> Generate map file"<<std::endl;
os<<" -o <file> Output file name"<<std::endl;
os<<" -s <size> Output image size"<<std::endl;
os<<" -- Do not interpret subsequent arguments as options"<<std::endl;
os<<std::endl;
os<<"Object alignment must be a power of two and can't be less than 4."<<std::endl;
os<<"Base address must be a multiple of object alignment."<<std::endl;
os<<"Image size must be a multiple of 4."<<std::endl;
os<<"Section alignment and image size must be multiples of 4."<<std::endl;
os<<"Base address must be a multiple of section alignment."<<std::endl;
os<<std::endl;
os<<"Output file formats:"<<std::endl;
71,8 → 67,6
std::ifstream in(filename,std::ios_base::in);
if(!in) return false;
if(in.tellg()==static_cast<std::ifstream::pos_type>(-1))
return false; // the stream is not seekable
std::vector<char> buf(idSize);
in.read(buf.data(),idSize);
90,7 → 84,7
bool noMoreOptions=false;
std::cout<<"LXP32 Platform Assembler and Linker"<<std::endl;
std::cout<<"Copyright (c) 2016-2019 by Alex I. Kuznetsov"<<std::endl;
std::cout<<"Copyright (c) 2016 by Alex I. Kuznetsov"<<std::endl;
if(argc<=1) {
displayUsage(std::cout,argv[0]);
107,12 → 101,11
}
try {
options.align=std::stoul(argv[i],nullptr,0);
if(!Utils::isPowerOf2(options.align)) throw std::exception();
if(options.align<4) throw std::exception();
if(options.align%4!=0||options.align==0) throw std::exception();
alignmentSpecified=true;
}
catch(std::exception &) {
throw std::runtime_error("Invalid object alignment");
throw std::runtime_error("Invalid section alignment");
}
}
else if(!strcmp(argv[i],"-b")) {
122,6 → 115,7
}
try {
options.base=std::stoul(argv[i],nullptr,0);
//if(options.base%4!=0) throw std::exception();
baseSpecified=true;
}
catch(std::exception &) {
154,13 → 148,6
}
options.includeSearchDirs.push_back(argv[i]);
}
else if(!strcmp(argv[i],"-m")) {
if(++i==argc) {
displayUsage(std::cerr,argv[0]);
return EXIT_FAILURE;
}
options.mapFileName=argv[i];
}
else if(!strcmp(argv[i],"-o")) {
if(++i==argc) {
displayUsage(std::cerr,argv[0]);
185,11 → 172,11
}
if(options.base%options.align!=0)
throw std::runtime_error("Base address must be a multiple of object alignment");
throw std::runtime_error("Base address must be a multiple of section alignment");
if(options.compileOnly) {
if(alignmentSpecified)
std::cerr<<"Warning: Object alignment is ignored in compile-only mode"<<std::endl;
std::cerr<<"Warning: Section alignment is ignored in compile-only mode"<<std::endl;
if(baseSpecified)
std::cerr<<"Warning: Base address is ignored in compile-only mode"<<std::endl;
if(formatSpecified)
196,8 → 183,6
std::cerr<<"Warning: Output format is ignored in compile-only mode"<<std::endl;
if(options.imageSize>0)
std::cerr<<"Warning: Image size is ignored in compile-only mode"<<std::endl;
if(!options.mapFileName.empty())
std::cerr<<"Warning: Map file is not generated in compile-only mode"<<std::endl;
}
if(inputFiles.empty())
293,14 → 278,6
std::cerr<<"Linker error: "<<ex.what()<<std::endl;
return EXIT_FAILURE;
}
std::cout<<writer->size()/4<<" words written"<<std::endl;
if(!options.mapFileName.empty()) {
std::ofstream out(options.mapFileName);
if(!out) throw std::runtime_error("Cannot open file \""+options.mapFileName+"\" for writing");
linker.generateMap(out);
}
}
catch(std::exception &ex) {
std::cerr<<"Error: "<<ex.what()<<std::endl;
/tools/src/lxp32asm/utils.h
52,11 → 52,6
bool ishexdigit(char ch);
bool isoctdigit(char ch);
template <typename T> bool isPowerOf2(const T &x) {
static_assert(std::is_integral<T>::value,"Argument must be of integral type");
return (x!=0)&&((x&(x-1))==0);
}
}
 
#endif
/tools/src/lxp32asm/linkableobject.cpp
96,7 → 96,7
_code[rva++]=static_cast<Byte>(value>>24);
}
 
void LinkableObject::addSymbol(const std::string &name,Word rva) {
void LinkableObject::addLocalSymbol(const std::string &name,Word rva) {
auto &data=symbol(name);
if(data.type!=Unknown) throw std::runtime_error("Symbol \""+name+"\" is already defined");
data.type=Local;
103,23 → 103,15
data.rva=rva;
}
 
void LinkableObject::addImportedSymbol(const std::string &name) {
void LinkableObject::addExternalSymbol(const std::string &name) {
auto &data=symbol(name);
if(data.type!=Unknown) throw std::runtime_error("Symbol \""+name+"\" is already defined");
data.type=Imported;
data.type=External;
}
 
void LinkableObject::exportSymbol(const std::string &name) {
auto it=_symbols.find(name);
if(it==_symbols.end()||it->second.type==Unknown) throw std::runtime_error("Undefined symbol \""+name+"\"");
if(it->second.type==Imported) throw std::runtime_error("Symbol \""+name+"\" can't be both imported and exported at the same time");
if(it->second.type==Exported) throw std::runtime_error("Symbol \""+name+"\" has been already exported");
it->second.type=Exported;
}
 
void LinkableObject::addReference(const std::string &symbolName,const Reference &ref) {
void LinkableObject::addReference(const std::string &symbolName,const std::string &source,int line,Word rva) {
auto &data=symbol(symbolName);
data.refs.push_back(ref);
data.refs.push_back({source,line,rva});
}
 
LinkableObject::SymbolData &LinkableObject::symbol(const std::string &name) {
128,7 → 120,7
 
const LinkableObject::SymbolData &LinkableObject::symbol(const std::string &name) const {
auto const it=_symbols.find(name);
if(it==_symbols.end()) throw std::runtime_error("Undefined symbol \""+name+"\"");
if(it==_symbols.end()) throw std::runtime_error("Undefined symbol");
return it->second;
}
 
159,18 → 151,10
out<<std::endl;
out<<"Start Symbol"<<std::endl;
out<<"\tName "<<Utils::urlEncode(sym.first)<<std::endl;
if(sym.second.type==Local) out<<"\tType Local"<<std::endl;
else if(sym.second.type==Exported) out<<"\tType Exported"<<std::endl;
else out<<"\tType Imported"<<std::endl;
if(sym.second.type!=Imported) out<<"\tRVA 0x"<<Utils::hex(sym.second.rva)<<std::endl;
if(sym.second.type==Local) out<<"\tRVA 0x"<<Utils::hex(sym.second.rva)<<std::endl;
else out<<"\tExternal"<<std::endl;
for(auto const &ref: sym.second.refs) {
out<<"\tRef ";
out<<Utils::urlEncode(ref.source)<<" ";
out<<ref.line<<" ";
out<<"0x"<<Utils::hex(ref.rva)<<" ";
out<<ref.offset<<" ";
if(ref.type==Regular) out<<"Regular"<<std::endl;
else if(ref.type==Short) out<<"Short"<<std::endl;
out<<"\tRef "<<Utils::urlEncode(ref.source)<<" "<<ref.line<<" 0x"<<Utils::hex(ref.rva)<<std::endl;
}
out<<"End Symbol"<<std::endl;
}
247,15 → 231,10
if(tokens.size()<2) throw std::runtime_error("Unexpected end of line");
name=Utils::urlDecode(tokens[1]);
}
else if(tokens[0]=="Type") {
if(tokens.size()<2) throw std::runtime_error("Unexpected end of line");
if(tokens[1]=="Local") data.type=Local;
else if(tokens[1]=="Exported") data.type=Exported;
else if(tokens[1]=="Imported") data.type=Imported;
else throw std::runtime_error("Bad symbol type");
}
else if(tokens[0]=="External") data.type=External;
else if(tokens[0]=="RVA") {
if(tokens.size()<2) throw std::runtime_error("Unexpected end of line");
data.type=Local;
data.rva=std::strtoul(tokens[1].c_str(),NULL,0);
}
else if(tokens[0]=="Ref") {
264,10 → 243,6
ref.source=Utils::urlDecode(tokens[1]);
ref.line=std::strtoul(tokens[2].c_str(),NULL,0);
ref.rva=std::strtoul(tokens[3].c_str(),NULL,0);
ref.offset=std::strtoll(tokens[4].c_str(),NULL,0);
if(tokens[5]=="Regular") ref.type=Regular;
else if(tokens[5]=="Short") ref.type=Short;
else throw std::runtime_error("Invalid reference type: \""+tokens[5]+"\"");
data.refs.push_back(std::move(ref));
}
}
/tools/src/lxp32asm/linker.h
17,17 → 17,16
#include <map>
#include <vector>
#include <string>
#include <set>
 
class Linker {
struct GlobalSymbolData {
LinkableObject *obj=nullptr;
LinkableObject::Word rva=0;
std::set<const LinkableObject*> refs;
std::multimap<const LinkableObject*,LinkableObject::Word> refs;
};
std::vector<LinkableObject*> _objects;
LinkableObject *_entryObject=nullptr;
LinkableObject *_entryObject;
std::map<std::string,GlobalSymbolData> _globalSymbolTable;
// Various output options
34,7 → 33,6
LinkableObject::Word _base=0;
std::size_t _align=4;
std::size_t _imageSize=0;
std::size_t _bytesWritten=0;
public:
void addObject(LinkableObject &obj);
void link(OutputWriter &writer);
41,13 → 39,11
void setBase(LinkableObject::Word base);
void setAlignment(std::size_t align);
void setImageSize(std::size_t size);
void generateMap(std::ostream &s);
private:
void buildSymbolTable();
void placeObjects();
void relocateObject(LinkableObject *obj);
void writeObjects(OutputWriter &writer);
void markAsUsed(const LinkableObject *obj,std::set<const LinkableObject*> &used);
};
 
#endif
/tools/src/lxp32asm/assembler.cpp
30,7 → 30,6
_state=Initial;
_currentFileName=filename;
processFileRecursive(filename);
 
// Examine symbol table
for(auto const &sym: _obj.symbols()) {
if(sym.second.type==LinkableObject::Unknown&&!sym.second.refs.empty()) {
41,8 → 40,6
throw std::runtime_error(msg.str());
}
}
for(auto const &sym: _exportedSymbols) _obj.exportSymbol(sym);
}
 
void Assembler::processFileRecursive(const std::string &filename) {
66,14 → 63,16
_line++;
}
if(_state!=Initial) throw std::runtime_error("Unexpected end of file");
_line=savedLine;
_state=savedState;
_currentFileName=savedFileName;
if(!_currentLabels.empty())
throw std::runtime_error("Symbol definition must be followed by an instruction or data definition statement");
for(auto const &label: _currentLabels) {
_obj.addLocalSymbol(label,
static_cast<LinkableObject::Word>(_obj.codeSize()));
}
_currentLabels.clear();
}
 
void Assembler::addIncludeSearchDir(const std::string &dir) {
128,7 → 127,7
else throw std::runtime_error(std::string("Unexpected character: \"")+ch+"\"");
break;
case Word:
if(std::isalnum(ch)||ch=='_'||ch=='@'||ch=='+'||ch=='-') word+=ch;
if(std::isalnum(ch)||ch=='_'||ch=='@') word+=ch;
else {
i--;
_state=Initial;
207,12 → 206,7
// Perform macro substitution
for(auto &token: list) {
auto it=_macros.find(token);
// Note: we don't expand a macro identifier in the #define statement
// since that would lead to counter-intuitive results
if(it==_macros.end()||
(newlist.size()==1&&newlist[0]=="#define")||
(newlist.size()==3&&newlist[1]==":"&&newlist[2]=="#define"))
newlist.push_back(std::move(token));
if(it==_macros.end()) newlist.push_back(std::move(token));
else for(auto const &replace: it->second) newlist.push_back(replace);
}
list=std::move(newlist);
239,7 → 233,7
else rva=elaborateInstruction(list);
for(auto const &label: _currentLabels) {
_obj.addSymbol(label,rva);
_obj.addLocalSymbol(label,rva);
}
_currentLabels.clear();
}
249,24 → 243,15
assert(!list.empty());
if(list[0]=="#define") {
if(list.size()<3)
throw std::runtime_error("Wrong number of tokens in the directive");
if(_macros.find(list[1])!=_macros.end())
throw std::runtime_error("Macro \""+list[1]+"\" has been already defined");
if(!validateIdentifier(list[1]))
throw std::runtime_error("Ill-formed identifier: \""+list[1]+"\"");
if(list.size()<3) throw std::runtime_error("Wrong number of tokens in the directive");
if(!validateIdentifier(list[1])) throw std::runtime_error("Ill-formed identifier: \""+list[1]+"\"");
_macros.emplace(list[1],TokenList(list.begin()+2,list.end()));
}
else if(list[0]=="#export") {
else if(list[0]=="#extern") {
if(list.size()!=2) std::runtime_error("Wrong number of tokens in the directive");
if(!validateIdentifier(list[1])) throw std::runtime_error("Ill-formed identifier: \""+list[1]+"\"");
_exportedSymbols.push_back(list[1]);
_obj.addExternalSymbol(list[1]);
}
else if(list[0]=="#import") {
if(list.size()!=2) std::runtime_error("Wrong number of tokens in the directive");
if(!validateIdentifier(list[1])) throw std::runtime_error("Ill-formed identifier: \""+list[1]+"\"");
_obj.addImportedSymbol(list[1]);
}
else if(list[0]=="#include") {
if(list.size()!=2) std::runtime_error("Wrong number of tokens in the directive");
auto filename=Utils::dequoteString(list[1]);
300,8 → 285,6
if(list.size()>2) throw std::runtime_error("Unexpected token: \""+list[2]+"\"");
std::size_t align=4;
if(list.size()>1) align=static_cast<std::size_t>(numericLiteral(list[1]));
if(!Utils::isPowerOf2(align)) throw std::runtime_error("Alignment must be a power of 2");
if(align<4) throw std::runtime_error("Alignment must be at least 4");
rva=_obj.addPadding(align);
}
else if(list[0]==".reserve") {
368,7 → 351,6
else if(list[0]=="jmp") encodeJmp(list);
else if(list[0]=="iret") encodeIret(list);
else if(list[0]=="lc") encodeLc(list);
else if(list[0]=="lcs") encodeLcs(list);
else if(list[0]=="lsb") encodeLsb(list);
else if(list[0]=="lub") encodeLub(list);
else if(list[0]=="lw") encodeLw(list);
376,7 → 358,6
else if(list[0]=="modu") encodeModu(list);
else if(list[0]=="mov") encodeMov(list);
else if(list[0]=="mul") encodeMul(list);
else if(list[0]=="neg") encodeNeg(list);
else if(list[0]=="nop") encodeNop(list);
else if(list[0]=="not") encodeNot(list);
else if(list[0]=="or") encodeOr(list);
475,7 → 456,7
arglist.push_back(std::move(a));
}
else if(list[i].size()==3&&list[i].substr(0,2)=="iv"&&
list[i][2]>='0'&&list[i][2]<='7') // interrupt vector
std::isdigit(list[i][2])) // interrupt vector
{
a.type=Operand::Register;
a.reg=240+(list[i][2]-'0');
617,6 → 598,11
void Assembler::encodeDivs(const TokenList &list) {
auto args=getOperands(list);
if(args.size()!=3) throw std::runtime_error("divs instruction requires 3 operands");
if(args[2].type==Operand::NumericLiteral&&args[2].i==0) {
std::cerr<<currentFileName()<<":"<<line()<<": ";
std::cerr<<"Warning: Division by zero"<<std::endl;
}
LinkableObject::Word w=0x54000000;
encodeDstOperand(w,args[0]);
encodeRd1Operand(w,args[1]);
627,6 → 613,11
void Assembler::encodeDivu(const TokenList &list) {
auto args=getOperands(list);
if(args.size()!=3) throw std::runtime_error("divu instruction requires 3 operands");
if(args[2].type==Operand::NumericLiteral&&args[2].i==0) {
std::cerr<<currentFileName()<<":"<<line()<<": ";
std::cerr<<"Warning: Division by zero"<<std::endl;
}
LinkableObject::Word w=0x50000000;
encodeDstOperand(w,args[0]);
encodeRd1Operand(w,args[1]);
665,13 → 656,8
_obj.addWord(w);
if(args[1].type==Operand::Identifier) {
LinkableObject::Reference ref;
ref.source=currentFileName();
ref.line=line();
ref.rva=_obj.addWord(0);
ref.offset=args[1].i;
ref.type=LinkableObject::Regular;
_obj.addReference(args[1].str,ref);
auto symRva=_obj.addWord(static_cast<LinkableObject::Word>(args[1].i));
_obj.addReference(args[1].str,currentFileName(),line(),symRva);
}
else if(args[1].type==Operand::NumericLiteral) {
_obj.addWord(static_cast<LinkableObject::Word>(args[1].i));
679,33 → 665,6
else throw std::runtime_error("\""+args[1].str+"\": bad argument");
}
 
void Assembler::encodeLcs(const TokenList &list) {
auto args=getOperands(list);
if(args.size()!=2) throw std::runtime_error("lcs instruction requires 2 operands");
LinkableObject::Word w=0xA0000000;
encodeDstOperand(w,args[0]);
if(args[1].type==Operand::NumericLiteral) {
if((args[1].i<-1048576||args[1].i>1048575)&&(args[1].i<0xFFF00000||args[1].i>0xFFFFFFFF))
throw std::runtime_error("\""+args[1].str+"\": out of range");
auto c=static_cast<LinkableObject::Word>(args[1].i)&0x1FFFFF;
w|=(c&0xFFFF);
w|=((c<<8)&0x1F000000);
_obj.addWord(w);
}
else if(args[1].type==Operand::Identifier) {
LinkableObject::Reference ref;
ref.source=currentFileName();
ref.line=line();
ref.rva=_obj.addWord(w);
ref.offset=args[1].i;
ref.type=LinkableObject::Short;
_obj.addReference(args[1].str,ref);
}
else throw std::runtime_error("\""+args[1].str+"\": bad argument");
}
 
void Assembler::encodeLsb(const TokenList &list) {
auto args=getOperands(list);
if(args.size()!=2) throw std::runtime_error("lsb instruction requires 2 operands");
776,16 → 735,6
_obj.addWord(w);
}
 
void Assembler::encodeNeg(const TokenList &list) {
// Note: "neg" is not a real instruction, but an alias for "sub dst, 0, src"
auto args=getOperands(list);
if(args.size()!=2) throw std::runtime_error("neg instruction requires 2 operands");
LinkableObject::Word w=0x44000000;
encodeDstOperand(w,args[0]);
encodeRd2Operand(w,args[1]);
_obj.addWord(w);
}
 
void Assembler::encodeNop(const TokenList &list) {
auto args=getOperands(list);
if(!args.empty()) throw std::runtime_error("nop instruction doesn't take operands");
/tools/src/lxp32asm/outputwriter.cpp
22,11 → 22,6
* OutputWriter members
*/
 
void OutputWriter::write(const char *data,std::size_t n) {
writeData(data,n);
_size+=n;
}
 
void OutputWriter::pad(std::size_t size) {
static char zeros[256]; // static objects are zero-initialized
while(size>0) {
36,10 → 31,6
}
}
 
std::size_t OutputWriter::size() const {
return _size;
}
 
/*
* BinaryOutputWriter members
*/
51,7 → 42,7
if(!_os) throw std::runtime_error("Cannot open \""+filename+"\" for writing");
}
 
void BinaryOutputWriter::writeData(const char *data,std::size_t n) {
void BinaryOutputWriter::write(const char *data,std::size_t n) {
_os.write(data,n);
}
 
79,7 → 70,7
}
}
 
void TextOutputWriter::writeData(const char *data,std::size_t n) {
void TextOutputWriter::write(const char *data,std::size_t n) {
while(n>0) {
assert(_buf.size()<4);
auto count=std::min(4-_buf.size(),n);
/tools/src/lxp32asm/linkableobject.h
20,17 → 20,12
public:
typedef unsigned char Byte;
typedef std::uint32_t Word;
typedef std::int_least64_t Integer;
enum SymbolType {Unknown,Local,Exported,Imported};
enum RefType {Regular,Short};
enum SymbolType {Unknown,Local,External};
struct Reference {
std::string source;
int line;
Word rva;
Integer offset;
RefType type;
};
struct SymbolData {
SymbolType type=Unknown;
37,7 → 32,6
Word rva;
std::vector<Reference> refs;
};
typedef std::map<std::string,SymbolData> SymbolTable;
private:
67,10 → 61,9
Word getWord(Word rva) const;
void replaceWord(Word rva,Word value);
void addSymbol(const std::string &name,Word rva);
void addImportedSymbol(const std::string &name);
void exportSymbol(const std::string &name);
void addReference(const std::string &symbolName,const Reference &ref);
void addLocalSymbol(const std::string &name,Word rva);
void addExternalSymbol(const std::string &name);
void addReference(const std::string &symbolName,const std::string &source,int line,Word rva);
SymbolData &symbol(const std::string &name);
const SymbolData &symbol(const std::string &name) const;
/tools/src/lxp32dump/disassembler.h
27,14 → 27,15
int _value;
public:
Operand(Type t,int value);
Type type() const;
int value() const;
std::string str() const;
};
std::istream &_is;
std::ostream &_os;
Format _fmt;
bool _preferAliases;
int _lineNumber;
Word _pos;
public:
41,7 → 42,6
Disassembler(std::istream &is,std::ostream &os);
void setFormat(Format fmt);
void setBase(Word base);
void setPreferAliases(bool b);
void dump();
template <typename T> static std::string hex(const T &w) {
58,12 → 58,11
}
private:
bool getWord(Word &w);
std::string str(const Operand &op);
static Operand decodeRd1Operand(Word w);
static Operand decodeRd2Operand(Word w);
static Operand decodeDstOperand(Word w);
static std::string decodeSimpleInstruction(const std::string &op,Word w);
std::string decodeSimpleInstruction(const std::string &op,Word w);
std::string decodeAdd(Word w);
std::string decodeAnd(Word w);
std::string decodeCall(Word w);
73,7 → 72,6
std::string decodeHlt(Word w);
std::string decodeJmp(Word w);
std::string decodeLc(Word w,bool &valid,Word &operand);
std::string decodeLcs(Word w);
std::string decodeLsb(Word w);
std::string decodeLub(Word w);
std::string decodeLw(Word w);
/tools/src/lxp32dump/main.cpp
30,7 → 30,6
os<<" -b <addr> Base address (for comments only)"<<std::endl;
os<<" -f <fmt> Input format (bin, textio, dec, hex), default: autodetect"<<std::endl;
os<<" -h, --help Display a short help message"<<std::endl;
os<<" -na Do not use instruction and register aliases"<<std::endl;
os<<" -o <file> Output file name, default: standard output"<<std::endl;
os<<" -- Do not interpret subsequent arguments as options"<<std::endl;
}
65,13 → 64,12
std::string inputFileName,outputFileName;
std::cerr<<"LXP32 Platform Disassembler"<<std::endl;
std::cerr<<"Copyright (c) 2016-2019 by Alex I. Kuznetsov"<<std::endl;
std::cerr<<"Copyright (c) 2016 by Alex I. Kuznetsov"<<std::endl;
Disassembler::Format fmt=Disassembler::Bin;
bool noMoreOptions=false;
bool formatSpecified=false;
Disassembler::Word base=0;
bool noAliases=false;
if(argc<=1) {
displayUsage(std::cout,argv[0]);
113,9 → 111,6
displayUsage(std::cout,argv[0]);
return 0;
}
else if(!strcmp(argv[i],"-na")) {
noAliases=true;
}
else if(!strcmp(argv[i],"-o")) {
if(++i==argc) {
displayUsage(std::cerr,argv[0]);
180,7 → 175,6
Disassembler disasm(in,*os);
disasm.setFormat(fmt);
disasm.setBase(base);
disasm.setPreferAliases(!noAliases);
try {
disasm.dump();
/tools/src/lxp32dump/disassembler.cpp
26,12 → 26,24
return _value;
}
 
std::string Disassembler::Operand::str() const {
if(_type==Register) {
if(_value>=240&&_value<=247) return "iv"+std::to_string(_value-240);
else if(_value==252) return "cr";
else if(_value==253) return "irp";
else if(_value==254) return "rp";
else if(_value==255) return "sp";
else return "r"+std::to_string(_value);
}
else return std::to_string(_value);
}
 
/*
* Disassembler class members
*/
 
Disassembler::Disassembler(std::istream &is,std::ostream &os):
_is(is),_os(os),_fmt(Bin),_preferAliases(true),_lineNumber(0),_pos(0) {}
_is(is),_os(os),_fmt(Bin),_lineNumber(0),_pos(0) {}
 
void Disassembler::setFormat(Format fmt) {
_fmt=fmt;
41,10 → 53,6
_pos=base;
}
 
void Disassembler::setPreferAliases(bool b) {
_preferAliases=b;
}
 
void Disassembler::dump() {
Word word;
130,7 → 138,6
break;
default:
if((opcode>>4)==0x03) instruction=decodeCjmpxx(word);
else if((opcode>>3)==0x05) instruction=decodeLcs(word);
else instruction=decodeWord(word);
}
176,19 → 183,6
return true;
}
 
std::string Disassembler::str(const Operand &op) {
if(op.type()==Operand::Register) {
if(!_preferAliases) return "r"+std::to_string(op.value());
else if(op.value()>=240&&op.value()<=247) return "iv"+std::to_string(op.value()-240);
else if(op.value()==252) return "cr";
else if(op.value()==253) return "irp";
else if(op.value()==254) return "rp";
else if(op.value()==255) return "sp";
else return "r"+std::to_string(op.value());
}
else return std::to_string(op.value());
}
 
Disassembler::Operand Disassembler::decodeRd1Operand(Word w) {
int value=(w>>8)&0xFF;
if(w&0x02000000) return Operand(Operand::Register,value);
217,7 → 211,7
auto dst=decodeDstOperand(w);
auto rd1=decodeRd1Operand(w);
auto rd2=decodeRd2Operand(w);
oss<<op<<' '<<str(dst)<<", "<<str(rd1)<<", "<<str(rd2);
oss<<op<<' '<<dst.str()<<", "<<rd1.str()<<", "<<rd2.str();
return oss.str();
}
 
228,10 → 222,10
auto rd1=decodeRd1Operand(w);
auto rd2=decodeRd2Operand(w);
if(rd2.type()==Operand::Direct&&rd2.value()==0&&_preferAliases)
oss<<"mov "<<str(dst)<<", "<<str(rd1);
if(rd2.type()==Operand::Direct&&rd2.value()==0)
oss<<"mov "<<dst.str()<<", "<<rd1.str();
else
oss<<"add "<<str(dst)<<", "<<str(rd1)<<", "<<str(rd2);
oss<<"add "<<dst.str()<<", "<<rd1.str()<<", "<<rd2.str();
return oss.str();
}
249,7 → 243,7
if(rd1.type()!=Operand::Register) return decodeWord(w);
if(rd2.type()!=Operand::Direct||rd2.value()!=0) return decodeWord(w);
return "call "+str(rd1);
return "call "+rd1.str();
}
 
std::string Disassembler::decodeCjmpxx(Word w) {
308,9 → 302,9
if(rd1.type()!=Operand::Register) return decodeWord(w);
if(rd2.type()!=Operand::Direct||rd2.value()!=0) return decodeWord(w);
if(rd1.value()==253&&_preferAliases) return "iret";
if(rd1.value()==254&&_preferAliases) return "ret";
return "jmp "+str(rd1);
if(rd1.value()==253) return "iret";
if(rd1.value()==254) return "ret";
return "jmp "+rd1.str();
}
 
std::string Disassembler::decodeLc(Word w,bool &valid,Word &operand) {
327,17 → 321,9
if(!b) return decodeWord(w);
valid=true;
return "lc "+str(dst)+", 0x"+hex(operand);
return "lc "+dst.str()+", 0x"+hex(operand);
}
 
std::string Disassembler::decodeLcs(Word w) {
auto dst=decodeDstOperand(w);
auto operand=w&0xFFFF;
operand|=(w>>8)&0x001F0000;
if(operand&0x00100000) operand|=0xFFE00000;
return "lcs "+str(dst)+", 0x"+hex(operand);
}
 
std::string Disassembler::decodeLsb(Word w) {
std::ostringstream oss;
348,7 → 334,7
if(rd1.type()!=Operand::Register) return decodeWord(w);
if(rd2.type()!=Operand::Direct||rd2.value()!=0) return decodeWord(w);
return "lsb "+str(dst)+", "+str(rd1);
return "lsb "+dst.str()+", "+rd1.str();
}
 
std::string Disassembler::decodeLub(Word w) {
361,7 → 347,7
if(rd1.type()!=Operand::Register) return decodeWord(w);
if(rd2.type()!=Operand::Direct||rd2.value()!=0) return decodeWord(w);
return "lub "+str(dst)+", "+str(rd1);
return "lub "+dst.str()+", "+rd1.str();
}
 
std::string Disassembler::decodeLw(Word w) {
374,7 → 360,7
if(rd1.type()!=Operand::Register) return decodeWord(w);
if(rd2.type()!=Operand::Direct||rd2.value()!=0) return decodeWord(w);
return "lw "+str(dst)+", "+str(rd1);
return "lw "+dst.str()+", "+rd1.str();
}
 
std::string Disassembler::decodeMods(Word w) {
412,7 → 398,7
if(dst.value()!=0) return decodeWord(w);
if(rd1.type()!=Operand::Register) return decodeWord(w);
return "sb "+str(rd1)+", "+str(rd2);
return "sb "+rd1.str()+", "+rd2.str();
}
 
std::string Disassembler::decodeSl(Word w) {
434,18 → 420,7
}
 
std::string Disassembler::decodeSub(Word w) {
std::ostringstream oss;
auto dst=decodeDstOperand(w);
auto rd1=decodeRd1Operand(w);
auto rd2=decodeRd2Operand(w);
if(rd1.type()==Operand::Direct&&rd1.value()==0&&_preferAliases)
oss<<"neg "<<str(dst)<<", "<<str(rd2);
else
oss<<"sub "<<str(dst)<<", "<<str(rd1)<<", "<<str(rd2);
return oss.str();
return decodeSimpleInstruction("sub",w);
}
 
std::string Disassembler::decodeSw(Word w) {
458,7 → 433,7
if(dst.value()!=0) return decodeWord(w);
if(rd1.type()!=Operand::Register) return decodeWord(w);
return "sw "+str(rd1)+", "+str(rd2);
return "sw "+rd1.str()+", "+rd2.str();
}
 
std::string Disassembler::decodeXor(Word w) {
468,10 → 443,10
auto rd1=decodeRd1Operand(w);
auto rd2=decodeRd2Operand(w);
if(rd2.type()==Operand::Direct&&rd2.value()==-1&&_preferAliases)
oss<<"not "<<str(dst)<<", "<<str(rd1);
if(rd2.type()==Operand::Direct&&rd2.value()==-1)
oss<<"not "<<dst.str()<<", "<<rd1.str();
else
oss<<"xor "<<str(dst)<<", "<<str(rd1)<<", "<<str(rd2);
oss<<"xor "<<dst.str()<<", "<<rd1.str()<<", "<<rd2.str();
return oss.str();
}
/rtl/lxp32_ram256x32.vhd
17,12 → 17,12
 
entity lxp32_ram256x32 is
port(
clk_i: in std_logic;
wclk_i: in std_logic;
we_i: in std_logic;
waddr_i: in std_logic_vector(7 downto 0);
wdata_i: in std_logic_vector(31 downto 0);
rclk_i: in std_logic;
re_i: in std_logic;
raddr_i: in std_logic_vector(7 downto 0);
rdata_o: out std_logic_vector(31 downto 0)
35,7 → 35,7
signal ram: ram_type:=(others=>(others=>'0')); -- zero-initialize for SRAM-based FPGAs
 
attribute syn_ramstyle: string;
attribute syn_ramstyle of ram: signal is "no_rw_check";
attribute syn_ramstyle of ram: signal is "block_ram,no_rw_check";
attribute ram_style: string; -- for Xilinx
attribute ram_style of ram: signal is "block";
 
43,9 → 43,9
 
-- Write port
 
process (clk_i) is
process (wclk_i) is
begin
if rising_edge(clk_i) then
if rising_edge(wclk_i) then
if we_i='1' then
ram(to_integer(unsigned(waddr_i)))<=wdata_i;
end if;
54,15 → 54,11
 
-- Read port
 
process (clk_i) is
process (rclk_i) is
begin
if rising_edge(clk_i) then
if rising_edge(rclk_i) then
if re_i='1' then
if is_x(raddr_i) then -- to avoid numeric_std warnings during simulation
rdata_o<=(others=>'X');
else
rdata_o<=ram(to_integer(unsigned(raddr_i)));
end if;
rdata_o<=ram(to_integer(to_01(unsigned(raddr_i))));
end if;
end if;
end process;
/rtl/lxp32_scratchpad.vhd
42,12 → 42,12
 
ram_inst1: entity work.lxp32_ram256x32(rtl)
port map(
clk_i=>clk_i,
wclk_i=>clk_i,
we_i=>we_i,
waddr_i=>waddr_i,
wdata_i=>wdata_i,
rclk_i=>clk_i,
re_i=>'1',
raddr_i=>raddr1_i,
rdata_o=>ram1_rdata
57,12 → 57,12
 
ram_inst2: entity work.lxp32_ram256x32(rtl)
port map(
clk_i=>clk_i,
wclk_i=>clk_i,
we_i=>we_i,
waddr_i=>waddr_i,
wdata_i=>wdata_i,
rclk_i=>clk_i,
re_i=>'1',
raddr_i=>raddr2_i,
rdata_o=>ram2_rdata
/rtl/lxp32_icache.vhd
111,12 → 111,12
 
ram_inst: entity work.lxp32_ram256x32(rtl)
port map(
clk_i=>clk_i,
wclk_i=>clk_i,
we_i=>ram_we,
waddr_i=>ram_waddr,
wdata_i=>wbm_dat_i,
rclk_i=>clk_i,
re_i=>ram_re,
raddr_i=>ram_raddr,
rdata_o=>lli_dat_o
202,22 → 202,6
burst_cnt<=0;
wb_stb<='0';
wrap_cnt<=0;
wb_cti<=(others=>'-');
burst1<='-';
current_offset<=(others=>'-');
start_offset<=(others=>'-');
current_base<=(others=>'-');
next_base<=(others=>'-');
prev_base<=(others=>'-');
-- To suppress numeric_std warnings
-- synthesis translate_off
current_offset<=(others=>'0');
start_offset<=(others=>'0');
current_base<=(others=>'0');
next_base<=(others=>'0');
prev_base<=(others=>'0');
-- synthesis translate_on
else
if burst_cnt=0 and init='1' then
if miss='1' and near_miss='0' then
/rtl/lxp32_mul_opt.vhd
65,7 → 65,6
 
signal cnt: integer range 0 to 4:=0;
 
signal result: std_logic_vector(result_o'range);
signal ceo: std_logic:='0';
 
begin
110,17 → 109,8
if rst_i='1' then
ceo<='0';
cnt<=0;
reg1<=(others=>'-');
reg2<=(others=>'-');
acc_sum<=(others=>'-');
acc_carry<=(others=>'-');
else
if cnt=1 then
ceo<='1';
else
ceo<='0';
end if;
ceo<='0';
if ce_i='1' then
cnt<=4;
reg1<=unsigned(op1_i);
127,13 → 117,14
reg2<=unsigned(op2_i);
acc_sum<=(others=>'0');
acc_carry<=(others=>'0');
else
elsif cnt>0 then
acc_sum<=pp_sum(7);
acc_carry<=pp_carry(7)(acc_carry'range);
reg1<=reg1(reg1'high-8 downto 0)&X"00";
reg2<=X"00"&reg2(reg2'high downto 8);
if cnt>0 then
cnt<=cnt-1;
cnt<=cnt-1;
if cnt=1 then
ceo<='1';
end if;
end if;
end if;
140,29 → 131,7
end if;
end process;
 
result<=std_logic_vector(acc_sum+acc_carry);
 
result_o<=result;
result_o<=std_logic_vector(acc_sum+acc_carry);
ce_o<=ceo;
 
-- A simulation-time multiplication check
 
-- synthesis translate_off
 
process (clk_i) is
variable p: unsigned(op1_i'length+op2_i'length-1 downto 0);
begin
if rising_edge(clk_i) then
if ce_i='1' then
p:=unsigned(op1_i)*unsigned(op2_i);
elsif ceo='1' then
assert result=std_logic_vector(p(result'range))
report "Incorrect multiplication result"
severity failure;
end if;
end if;
end process;
 
-- synthesis translate_on
 
end architecture;
/rtl/lxp32_alu.vhd
31,6 → 31,7
cmd_cmp_i: in std_logic;
cmd_negate_op2_i: in std_logic;
cmd_and_i: in std_logic;
cmd_or_i: in std_logic;
cmd_xor_i: in std_logic;
cmd_shift_i: in std_logic;
cmd_shift_right_i: in std_logic;
61,16 → 62,22
signal cmp_s1: std_logic;
signal cmp_s2: std_logic;
 
signal logic_result: std_logic_vector(31 downto 0);
signal logic_we: std_logic;
signal and_result: std_logic_vector(31 downto 0);
signal and_we: std_logic;
signal or_result: std_logic_vector(31 downto 0);
signal or_we: std_logic;
signal xor_result: std_logic_vector(31 downto 0);
signal xor_we: std_logic;
 
signal mul_result: std_logic_vector(31 downto 0);
signal mul_ce: std_logic;
signal mul_we: std_logic;
 
signal div_result: std_logic_vector(31 downto 0);
signal div_quotient: std_logic_vector(31 downto 0);
signal div_remainder: std_logic_vector(31 downto 0);
signal div_ce: std_logic;
signal div_we: std_logic;
signal div_select_remainder: std_logic;
 
signal shift_result: std_logic_vector(31 downto 0);
signal shift_ce: std_logic;
90,11 → 97,7
-- Add/subtract
 
addend1<=unsigned(op1_i);
 
addend2_gen: for i in addend2'range generate
addend2(i)<=op2_i(i) xor cmd_negate_op2_i;
end generate;
 
addend2<=unsigned(op2_i) when cmd_negate_op2_i='0' else not unsigned(op2_i);
adder_result<=("0"&addend1)+("0"&addend2)+(to_unsigned(0,adder_result'length-1)&cmd_negate_op2_i);
adder_we<=cmd_addsub_i and valid_i;
 
123,16 → 126,15
(not cmp_s1 and not cmp_s2 and cmp_carry) or
(not cmp_s1 and cmp_s2)) and not cmp_eq;
 
-- Bitwise operations (and, or, xor)
-- Note: (a or b) = (a and b) or (a xor b)
-- Logical functions
 
logic_result_gen: for i in logic_result'range generate
logic_result(i)<=((op1_i(i) and op2_i(i)) and cmd_and_i) or
((op1_i(i) xor op2_i(i)) and cmd_xor_i);
end generate;
and_result<=op1_i and op2_i;
and_we<=cmd_and_i and valid_i;
or_result<=op1_i or op2_i;
or_we<=cmd_or_i and valid_i;
xor_result<=op1_i xor op2_i;
xor_we<=cmd_xor_i and valid_i;
 
logic_we<=(cmd_and_i or cmd_xor_i) and valid_i;
 
-- Multiplier
 
mul_ce<=cmd_mul_i and valid_i;
189,17 → 191,27
op1_i=>op1_i,
op2_i=>op2_i,
signed_i=>cmd_signed_i,
rem_i=>cmd_div_mod_i,
ce_o=>div_we,
result_o=>div_result
quotient_o=>div_quotient,
remainder_o=>div_remainder
);
end generate;
 
gen_no_divider: if not DIVIDER_EN generate
div_we<=div_ce;
div_result<=(others=>'0');
div_quotient<=(others=>'0');
div_remainder<=(others=>'0');
end generate;
 
process (clk_i) is
begin
if rising_edge(clk_i) then
if div_ce='1' then
div_select_remainder<=cmd_div_mod_i;
end if;
end if;
end process;
 
-- Shifter
 
shift_ce<=cmd_shift_i and valid_i;
221,15 → 233,18
 
result_mux_gen: for i in result_mux'range generate
result_mux(i)<=(adder_result(i) and adder_we) or
(logic_result(i) and logic_we) or
(and_result(i) and and_we) or
(or_result(i) and or_we) or
(xor_result(i) and xor_we) or
(mul_result(i) and mul_we) or
(div_result(i) and div_we) or
(div_quotient(i) and div_we and not div_select_remainder) or
(div_remainder(i) and div_we and div_select_remainder) or
(shift_result(i) and shift_we);
end generate;
 
result_o<=result_mux;
 
result_we<=adder_we or logic_we or mul_we or div_we or shift_we;
result_we<=adder_we or and_we or or_we or xor_we or mul_we or div_we or shift_we;
we_o<=result_we;
 
-- Pipeline control
237,10 → 252,15
process (clk_i) is
begin
if rising_edge(clk_i) then
if rst_i='1' or result_we='1' then
if rst_i='1' then
busy<='0';
elsif shift_ce='1' or mul_ce='1' or div_ce='1' then
busy<='1';
else
if shift_ce='1' or mul_ce='1' or div_ce='1' then
busy<='1';
end if;
if result_we='1' then
busy<='0';
end if;
end if;
end if;
end process;
/rtl/lxp32_decode.vhd
19,7 → 19,6
word_i: in std_logic_vector(31 downto 0);
next_ip_i: in std_logic_vector(29 downto 0);
current_ip_i: in std_logic_vector(29 downto 0);
valid_i: in std_logic;
jump_valid_i: in std_logic;
ready_o: out std_logic;
49,6 → 48,7
cmd_jump_o: out std_logic;
cmd_negate_op2_o: out std_logic;
cmd_and_o: out std_logic;
cmd_or_o: out std_logic;
cmd_xor_o: out std_logic;
cmd_shift_o: out std_logic;
cmd_shift_right_o: out std_logic;
78,6 → 78,8
signal rd1: std_logic_vector(7 downto 0);
signal rd2: std_logic_vector(7 downto 0);
 
signal current_ip: unsigned(next_ip_i'range);
 
-- Signals related to pipeline control
 
signal downstream_busy: std_logic;
117,6 → 119,8
downstream_busy<=valid_out and not ready_i;
busy<=downstream_busy or self_busy;
 
current_ip<=unsigned(next_ip_i)-1;
 
process (clk_i) is
begin
if rising_edge(clk_i) then
125,29 → 129,6
self_busy<='0';
state<=Regular;
interrupt_ready<='0';
cmd_loadop3_o<='-';
cmd_signed_o<='-';
cmd_dbus_o<='-';
cmd_dbus_store_o<='-';
cmd_dbus_byte_o<='-';
cmd_addsub_o<='-';
cmd_negate_op2_o<='-';
cmd_mul_o<='-';
cmd_div_o<='-';
cmd_div_mod_o<='-';
cmd_cmp_o<='-';
cmd_jump_o<='-';
cmd_and_o<='-';
cmd_xor_o<='-';
cmd_shift_o<='-';
cmd_shift_right_o<='-';
rd1_select<='-';
rd1_direct<=(others=>'-');
rd2_select<='-';
rd2_direct<=(others=>'-');
op3_o<=(others=>'-');
jump_type_o<=(others=>'-');
dst_out<=(others=>'-');
else
interrupt_ready<='0';
if jump_valid_i='1' then
155,36 → 136,34
self_busy<='0';
state<=Regular;
elsif downstream_busy='0' then
op3_o<=(others=>'-');
rd1_direct<=std_logic_vector(resize(signed(rd1),rd1_direct'length));
rd2_direct<=std_logic_vector(resize(signed(rd2),rd2_direct'length));
cmd_signed_o<=opcode(0);
cmd_div_mod_o<=opcode(1);
cmd_shift_right_o<=opcode(1);
cmd_dbus_byte_o<=opcode(1);
cmd_dbus_store_o<=opcode(2);
case state is
when Regular =>
cmd_loadop3_o<='0';
cmd_signed_o<='0';
cmd_dbus_o<='0';
cmd_dbus_store_o<='0';
cmd_dbus_byte_o<='0';
cmd_addsub_o<='0';
cmd_negate_op2_o<='0';
cmd_mul_o<='0';
cmd_div_o<='0';
cmd_div_mod_o<='0';
cmd_cmp_o<='0';
cmd_jump_o<='0';
cmd_and_o<='0';
cmd_or_o<='0';
cmd_xor_o<='0';
cmd_shift_o<='0';
cmd_shift_right_o<='0';
op3_o<=(others=>'-');
jump_type_o<=opcode(3 downto 0);
if interrupt_valid_i='1' and valid_i='1' then
cmd_jump_o<='1';
cmd_loadop3_o<='1';
op3_o<=current_ip_i&"01"; -- LSB indicates interrupt return
op3_o<=std_logic_vector(current_ip)&"01"; -- LSB indicates interrupt return
dst_out<=X"FD"; -- interrupt return pointer
rd1_select<='1';
rd2_select<='0';
193,17 → 172,19
self_busy<='1';
state<=ContinueInterrupt;
else
if opcode(5 downto 3)="101" or opcode="000001" then -- lc or lcs
if opcode="000001" then
cmd_loadop3_o<='1';
-- Setting op3_o here only affects the lcs instruction
op3_o<=std_logic_vector(resize(signed(opcode(2 downto 0)&
t1&t2&rd1&rd2),op3_o'length));
end if;
cmd_signed_o<=opcode(0);
if opcode(5 downto 3)="001" then
cmd_dbus_o<='1';
end if;
cmd_dbus_store_o<=opcode(2);
cmd_dbus_byte_o<=opcode(1);
if opcode(5 downto 1)="01000" then
cmd_addsub_o<='1';
end if;
218,20 → 199,27
cmd_div_o<='1';
end if;
if opcode(5 downto 3)="100" then -- jump or call
cmd_div_mod_o<=opcode(1);
if opcode="100000" then
cmd_jump_o<='1';
cmd_loadop3_o<=opcode(0);
-- Setting op3_o here only affects the call instruction
end if;
if opcode="100001" then
cmd_jump_o<='1';
cmd_loadop3_o<='1';
op3_o<=next_ip_i&"00";
end if;
-- Note: (a or b) = (a and b) or (a xor b)
if opcode(5 downto 1)="01100" then
if opcode="011000" then
cmd_and_o<='1';
end if;
if opcode="011010" or opcode="011001" then
if opcode="011001" then
cmd_or_o<='1';
end if;
if opcode="011010" then
cmd_xor_o<='1';
end if;
239,6 → 227,8
cmd_shift_o<='1';
end if;
cmd_shift_right_o<=opcode(1);
if opcode(5 downto 4)="11" then
cmd_cmp_o<='1';
cmd_negate_op2_o<='1';
245,7 → 235,9
end if;
rd1_select<=t1;
rd1_direct<=std_logic_vector(resize(signed(rd1),rd1_direct'length));
rd2_select<=t2;
rd2_direct<=std_logic_vector(resize(signed(rd2),rd2_direct'length));
dst_out<=destination;
/rtl/lxp32_interrupt_mux.vhd
20,14 → 20,13
irq_i: in std_logic_vector(7 downto 0);
interrupts_enabled_i: in std_logic_vector(7 downto 0);
interrupts_blocked_i: in std_logic_vector(7 downto 0);
interrupt_valid_o: out std_logic;
interrupt_vector_o: out std_logic_vector(2 downto 0);
interrupt_ready_i: in std_logic;
interrupt_return_i: in std_logic;
sp_waddr_i: in std_logic_vector(7 downto 0);
sp_we_i: in std_logic;
sp_wdata_i: in std_logic_vector(31 downto 0)
interrupt_return_i: in std_logic
);
end entity;
 
42,9 → 41,6
 
signal interrupt_valid: std_logic:='0';
 
signal interrupts_enabled: std_logic_vector(7 downto 0):=(others=>'0');
signal interrupts_blocked: std_logic_vector(7 downto 0):=(others=>'0');
 
begin
 
-- Note: "disabled" interrupts (i.e. for which interrupts_enabled_i(i)='0')
61,18 → 57,17
pending_interrupts<=(others=>'0');
state<=Ready;
interrupt_valid<='0';
interrupt_vector_o<=(others=>'-');
else
irq_reg<=irq_i;
pending_interrupts<=(pending_interrupts or
(irq_i and not irq_reg)) and
interrupts_enabled;
interrupts_enabled_i;
case state is
when Ready =>
for i in pending_interrupts'reverse_range loop -- lower interrupts have priority
if pending_interrupts(i)='1' and interrupts_blocked(i)='0' then
if pending_interrupts(i)='1' and interrupts_blocked_i(i)='0' then
pending_interrupts(i)<='0';
interrupt_valid<='1';
interrupt_vector_o<=std_logic_vector(to_unsigned(i,3));
96,17 → 91,4
 
interrupt_valid_o<=interrupt_valid;
 
process (clk_i) is
begin
if rising_edge(clk_i) then
if rst_i='1' then
interrupts_enabled<=(others=>'0');
interrupts_blocked<=(others=>'0');
elsif sp_we_i='1' and sp_waddr_i=X"FC" then
interrupts_enabled<=sp_wdata_i(7 downto 0);
interrupts_blocked<=sp_wdata_i(15 downto 8);
end if;
end if;
end process;
 
end architecture;
/rtl/lxp32_divider.vhd
5,8 → 5,8
--
-- Copyright (c) 2016 by Alex I. Kuznetsov
--
-- Based on the NRD (Non Restoring Division) algorithm. Takes
-- 36 cycles to calculate quotient (37 for remainder).
-- Based on the NRD (Non Restoring Division) algorithm. One division
-- takes 37 cycles.
---------------------------------------------------------------------
 
library ieee;
21,9 → 21,9
op1_i: in std_logic_vector(31 downto 0);
op2_i: in std_logic_vector(31 downto 0);
signed_i: in std_logic;
rem_i: in std_logic;
ce_o: out std_logic;
result_o: out std_logic_vector(31 downto 0)
quotient_o: out std_logic_vector(31 downto 0);
remainder_o: out std_logic_vector(31 downto 0)
);
end entity;
 
31,11 → 31,15
 
-- Complementor signals
 
signal compl_inv: std_logic;
signal compl_mux: std_logic_vector(31 downto 0);
signal compl_out: std_logic_vector(31 downto 0);
signal compl1_inv: std_logic;
signal compl2_inv: std_logic;
signal compl1_mux: std_logic_vector(31 downto 0);
signal compl2_mux: std_logic_vector(31 downto 0);
signal compl1_out: std_logic_vector(31 downto 0);
signal compl2_out: std_logic_vector(31 downto 0);
 
signal inv_res: std_logic;
signal inv_q: std_logic;
signal inv_r: std_logic;
 
-- Divider FSM signals
 
43,7 → 47,6
 
signal dividend: unsigned(31 downto 0);
signal divisor: unsigned(32 downto 0);
signal want_remainder: std_logic;
 
signal partial_remainder: unsigned(32 downto 0);
signal addend: unsigned(32 downto 0);
58,39 → 61,43
-- Output restoration signals
 
signal remainder_corrector: unsigned(31 downto 0);
signal remainder_corrector_1: std_logic;
signal remainder_pos: unsigned(31 downto 0);
signal result_pos: unsigned(31 downto 0);
signal remainder_res: unsigned(31 downto 0);
signal quotient_res: unsigned(31 downto 0);
 
begin
 
compl_inv<=op1_i(31) and signed_i when ce_i='1' else inv_res;
compl_mux<=op1_i when ce_i='1' else std_logic_vector(result_pos);
compl1_inv<=op1_i(31) and signed_i when ce_i='1' else inv_q;
compl2_inv<=op2_i(31) and signed_i when ce_i='1' else inv_r;
 
compl1_mux<=op1_i when ce_i='1' else std_logic_vector(quotient_res);
compl2_mux<=op2_i when ce_i='1' else std_logic_vector(remainder_res);
 
compl_op1_inst: entity work.lxp32_compl(rtl)
port map(
clk_i=>clk_i,
compl_i=>compl_inv,
d_i=>compl_mux,
d_o=>compl_out
compl_i=>compl1_inv,
d_i=>compl1_mux,
d_o=>compl1_out
);
 
compl_op2_inst: entity work.lxp32_compl(rtl)
port map(
clk_i=>clk_i,
compl_i=>compl2_inv,
d_i=>compl2_mux,
d_o=>compl2_out
);
 
process (clk_i) is
begin
if rising_edge(clk_i) then
if rst_i='1' then
fsm_ce<='0';
want_remainder<='-';
inv_res<='-';
else
fsm_ce<=ce_i;
if ce_i='1' then
want_remainder<=rem_i;
if rem_i='1' then
inv_res<=op1_i(31) and signed_i;
else
inv_res<=(op1_i(31) xor op2_i(31)) and signed_i;
end if;
inv_q<=(op1_i(31) xor op2_i(31)) and signed_i;
inv_r<=op1_i(31) and signed_i;
end if;
end if;
end if;
105,7 → 112,7
sum<=partial_remainder+addend+(to_unsigned(0,32)&sum_subtract);
sum_positive<=not sum(32);
 
-- Divider state machine
-- Divisor state machine
 
process (clk_i) is
begin
113,38 → 120,26
if rst_i='1' then
cnt<=0;
ceo<='0';
divisor<=(others=>'-');
dividend<=(others=>'-');
partial_remainder<=(others=>'-');
sum_subtract<='-';
else
if cnt=1 then
ceo<='1';
else
ceo<='0';
end if;
if ce_i='1' then
divisor(31 downto 0)<=unsigned(op2_i);
divisor(32)<=op2_i(31) and signed_i;
end if;
ceo<='0';
if fsm_ce='1' then
dividend<=unsigned(compl_out(30 downto 0)&"0");
partial_remainder<=to_unsigned(0,32)&compl_out(31);
sum_subtract<=not divisor(32);
if want_remainder='1' then
cnt<=34;
else
cnt<=33;
end if;
else
dividend<=unsigned(compl1_out(30 downto 0)&"0");
divisor<=unsigned("0"&compl2_out);
partial_remainder<=to_unsigned(0,32)&compl1_out(31);
sum_subtract<='1';
cnt<=34;
elsif cnt>0 then
partial_remainder<=sum(31 downto 0)&dividend(31);
sum_subtract<=sum_positive xor divisor(32);
sum_subtract<=sum_positive;
dividend<=dividend(30 downto 0)&sum_positive;
if cnt>0 then
cnt<=cnt-1;
if cnt=1 then
ceo<='1';
end if;
cnt<=cnt-1;
else
dividend<=(others=>'-');
divisor<=(others=>'-');
partial_remainder<=(others=>'-');
end if;
end if;
end if;
156,17 → 151,15
begin
if rising_edge(clk_i) then
for i in remainder_corrector'range loop
remainder_corrector(i)<=(divisor(i) xor divisor(32)) and not sum_positive;
remainder_corrector(i)<=divisor(i) and not sum_positive;
end loop;
remainder_corrector_1<=divisor(32) and not sum_positive;
remainder_pos<=partial_remainder(32 downto 1)+remainder_corrector+
(to_unsigned(0,31)&remainder_corrector_1);
quotient_res<=dividend;
remainder_res<=partial_remainder(32 downto 1)+remainder_corrector;
end if;
end process;
 
result_pos<=remainder_pos when want_remainder='1' else dividend;
 
result_o<=compl_out;
quotient_o<=compl1_out;
remainder_o<=compl2_out;
ce_o<=ceo;
 
end architecture;
/rtl/lxp32u_top.vhd
27,7 → 27,7
DBUS_RMW: boolean:=false;
DIVIDER_EN: boolean:=true;
MUL_ARCH: string:="dsp";
START_ADDR: std_logic_vector(31 downto 0):=(others=>'0')
START_ADDR: std_logic_vector(29 downto 0):=(others=>'0')
);
port(
clk_i: in std_logic;
/rtl/lxp32_cpu.vhd
14,7 → 14,7
DBUS_RMW: boolean;
DIVIDER_EN: boolean;
MUL_ARCH: string;
START_ADDR: std_logic_vector(31 downto 0)
START_ADDR: std_logic_vector(29 downto 0)
);
port(
clk_i: in std_logic;
42,7 → 42,6
 
signal fetch_word: std_logic_vector(31 downto 0);
signal fetch_next_ip: std_logic_vector(29 downto 0);
signal fetch_current_ip: std_logic_vector(29 downto 0);
signal fetch_valid: std_logic;
signal fetch_jump_ready: std_logic;
 
62,6 → 61,7
signal decode_cmd_jump: std_logic;
signal decode_cmd_negate_op2: std_logic;
signal decode_cmd_and: std_logic;
signal decode_cmd_or: std_logic;
signal decode_cmd_xor: std_logic;
signal decode_cmd_shift: std_logic;
signal decode_cmd_shift_right: std_logic;
89,6 → 89,8
signal interrupt_vector: std_logic_vector(2 downto 0);
signal interrupt_ready: std_logic;
signal interrupt_return: std_logic;
signal interrupts_enabled: std_logic_vector(7 downto 0);
signal interrupts_blocked: std_logic_vector(7 downto 0);
 
begin
 
107,7 → 109,6
word_o=>fetch_word,
next_ip_o=>fetch_next_ip,
current_ip_o=>fetch_current_ip,
valid_o=>fetch_valid,
ready_i=>decode_ready,
123,7 → 124,6
word_i=>fetch_word,
next_ip_i=>fetch_next_ip,
current_ip_i=>fetch_current_ip,
valid_i=>fetch_valid,
jump_valid_i=>execute_jump_valid,
ready_o=>decode_ready,
153,6 → 153,7
cmd_jump_o=>decode_cmd_jump,
cmd_negate_op2_o=>decode_cmd_negate_op2,
cmd_and_o=>decode_cmd_and,
cmd_or_o=>decode_cmd_or,
cmd_xor_o=>decode_cmd_xor,
cmd_shift_o=>decode_cmd_shift,
cmd_shift_right_o=>decode_cmd_shift_right,
188,6 → 189,7
cmd_jump_i=>decode_cmd_jump,
cmd_negate_op2_i=>decode_cmd_negate_op2,
cmd_and_i=>decode_cmd_and,
cmd_or_i=>decode_cmd_or,
cmd_xor_i=>decode_cmd_xor,
cmd_shift_i=>decode_cmd_shift,
cmd_shift_right_i=>decode_cmd_shift_right,
219,7 → 221,9
jump_dst_o=>execute_jump_dst,
jump_ready_i=>fetch_jump_ready,
interrupt_return_o=>interrupt_return
interrupt_return_o=>interrupt_return,
interrupts_enabled_o=>interrupts_enabled,
interrupts_blocked_o=>interrupts_blocked
);
 
scratchpad_inst: entity work.lxp32_scratchpad(rtl)
243,14 → 247,13
irq_i=>irq_i,
interrupts_enabled_i=>interrupts_enabled,
interrupts_blocked_i=>interrupts_blocked,
interrupt_valid_o=>interrupt_valid,
interrupt_vector_o=>interrupt_vector,
interrupt_ready_i=>interrupt_ready,
interrupt_return_i=>interrupt_return,
sp_waddr_i=>sp_waddr,
sp_we_i=>sp_we,
sp_wdata_i=>sp_wdata
interrupt_return_i=>interrupt_return
);
 
end architecture;
/rtl/lxp32_fetch.vhd
14,7 → 14,7
 
entity lxp32_fetch is
generic(
START_ADDR: std_logic_vector(31 downto 0)
START_ADDR: std_logic_vector(29 downto 0)
);
port(
clk_i: in std_logic;
26,7 → 26,6
lli_busy_i: in std_logic;
word_o: out std_logic_vector(31 downto 0);
current_ip_o: out std_logic_vector(29 downto 0);
next_ip_o: out std_logic_vector(29 downto 0);
valid_o: out std_logic;
ready_i: in std_logic;
42,7 → 41,7
signal init: std_logic:='1';
signal init_cnt: unsigned(7 downto 0):=(others=>'0');
 
signal fetch_addr: std_logic_vector(29 downto 0):=START_ADDR(31 downto 2);
signal fetch_addr: std_logic_vector(29 downto 0):=START_ADDR;
 
signal next_word: std_logic;
signal suppress_re: std_logic:='0';
51,17 → 50,14
 
signal fifo_rst: std_logic;
signal fifo_we: std_logic;
signal fifo_din: std_logic_vector(31 downto 0);
signal fifo_din: std_logic_vector(61 downto 0);
signal fifo_re: std_logic;
signal fifo_dout: std_logic_vector(31 downto 0);
signal fifo_dout: std_logic_vector(61 downto 0);
signal fifo_empty: std_logic;
signal fifo_full: std_logic;
 
signal jr: std_logic:='0';
 
signal next_ip: std_logic_vector(fetch_addr'range);
signal current_ip: std_logic_vector(fetch_addr'range);
 
begin
 
-- INIT state machine (to initialize all registers)
100,11 → 96,10
begin
if rising_edge(clk_i) then
if rst_i='1' then
fetch_addr<=START_ADDR(31 downto 2);
fetch_addr<=START_ADDR;
requested<='0';
jr<='0';
suppress_re<='0';
next_ip<=(others=>'-');
else
jr<='0';
-- Suppress LLI request if jump signal is active but will not be processed
117,13 → 112,6
requested<=re and not (jump_valid_i and not jr);
end if;
if next_word='1' then
-- It's not immediately obvious why, but current_ip and next_ip will contain
-- the addresses of the current instruction and the next instruction to be
-- fetched, respectively, by the time the instruction is passed to the decode
-- stage. Basically, this is because when either the decoder or the IBUS
-- stalls, the fetch_addr counter will also stop incrementing.
next_ip<=fetch_addr;
current_ip<=next_ip;
if jump_valid_i='1' and jr='0' then
fetch_addr<=jump_dst_i;
jr<='1';
146,12 → 134,12
 
fifo_rst<=rst_i or (jump_valid_i and not jr);
fifo_we<=requested and not lli_busy_i;
fifo_din<=lli_dat_i;
fifo_din<=fetch_addr&lli_dat_i;
fifo_re<=ready_i and not fifo_empty;
 
ubuf_inst: entity work.lxp32_ubuf(rtl)
generic map(
DATA_WIDTH=>32
DATA_WIDTH=>62
)
port map(
clk_i=>clk_i,
166,61 → 154,9
full_o=>fifo_full
);
 
next_ip_o<=next_ip;
current_ip_o<=current_ip;
word_o<=fifo_dout when init='1' else X"40"&std_logic_vector(init_cnt)&X"0000";
next_ip_o<=fifo_dout(61 downto 32);
 
word_o<=fifo_dout(31 downto 0) when init='1' else X"40"&std_logic_vector(init_cnt)&X"0000";
valid_o<=not fifo_empty or not init;
 
-- Note: the following code contains a few simulation-only assertions
-- to check that current_ip and next_ip signals, used in procedure calls
-- and interrupts, are correct.
-- This code should be ignored by a synthesizer since it doesn't drive
-- any signals, but we also surround it by metacomments, just in case.
 
-- synthesis translate_off
 
process (clk_i) is
type Pair is record
addr: std_logic_vector(fetch_addr'range);
data: std_logic_vector(31 downto 0);
end record;
type Pairs is array (7 downto 0) of Pair;
variable buf: Pairs;
variable count: integer range buf'range:=0;
variable current_pair: Pair;
begin
if rising_edge(clk_i) then
if fifo_rst='1' then -- jump
count:=0;
elsif fifo_we='1' then -- LLI returned data
current_pair.data:=fifo_din;
buf(count):=current_pair;
count:=count+1;
end if;
if re='1' and lli_busy_i='0' then -- data requested
current_pair.addr:=fetch_addr;
end if;
if fifo_empty='0' and fifo_rst='0' then -- fetch output is valid
assert count>0
report "Fetch: buffer should be empty"
severity failure;
assert buf(0).data=fifo_dout
report "Fetch: incorrect data"
severity failure;
assert buf(0).addr=current_ip
report "Fetch: incorrect current_ip"
severity failure;
assert std_logic_vector(unsigned(buf(0).addr)+1)=next_ip
report "Fetch: incorrect next_ip"
severity failure;
if ready_i='1' then
buf(buf'high-1 downto 0):=buf(buf'high downto 1); -- we don't care about the highest item
count:=count-1;
end if;
end if;
end if;
end process;
 
-- synthesis translate_on
 
end architecture;
/rtl/lxp32_dbus.vhd
54,7 → 54,6
signal sig: std_logic;
signal rmw_mode: std_logic;
 
signal dbus_rdata: std_logic_vector(31 downto 0);
signal selected_byte: std_logic_vector(7 downto 0);
 
begin
65,13 → 64,6
if rst_i='1' then
we_out<='0';
strobe<='0';
sig<='-';
byte_mode<='-';
sel<=(others=>'-');
we<='-';
rmw_mode<='-';
dbus_adr_o<=(others=>'-');
dbus_dat_o<=(others=>'-');
else
we_out<='0';
if strobe='0' then
80,10 → 72,10
sig<=cmd_signed_i;
dbus_adr_o<=addr_i(31 downto 2);
dbus_dat_o<=wdata_i;
if cmd_dbus_byte_i='0' then
byte_mode<='0';
dbus_dat_o<=wdata_i;
sel<="1111";
-- synthesis translate_off
93,14 → 85,11
-- synthesis translate_on
else
byte_mode<='1';
dbus_dat_o<=wdata_i(7 downto 0)&wdata_i(7 downto 0)&
wdata_i(7 downto 0)&wdata_i(7 downto 0);
case addr_i(1 downto 0) is
when "00" => sel<="0001";
when "01" => sel<="0010";
when "10" => sel<="0100";
when "11" => sel<="1000";
when "00" => sel<="0001"; dbus_dat_o(7 downto 0)<=wdata_i(7 downto 0);
when "01" => sel<="0010"; dbus_dat_o(15 downto 8)<=wdata_i(7 downto 0);
when "10" => sel<="0100"; dbus_dat_o(23 downto 16)<=wdata_i(7 downto 0);
when "11" => sel<="1000"; dbus_dat_o(31 downto 24)<=wdata_i(7 downto 0);
when others =>
end case;
end if;
147,24 → 136,27
dbus_sel_o<=(others=>'1');
end generate;
 
selected_byte_gen: for i in selected_byte'range generate
selected_byte(i)<=(dbus_dat_i(i) and sel(0)) or
(dbus_dat_i(i+8) and sel(1)) or
(dbus_dat_i(i+16) and sel(2)) or
(dbus_dat_i(i+24) and sel(3));
end generate;
 
process (clk_i) is
begin
if rising_edge(clk_i) then
dbus_rdata<=dbus_dat_i;
if byte_mode='0' then
rdata_o<=dbus_dat_i;
else
rdata_o(7 downto 0)<=selected_byte;
for i in rdata_o'high downto 8 loop
rdata_o(i)<=selected_byte(selected_byte'high) and sig;
end loop;
end if;
end if;
end process;
 
selected_byte_gen: for i in selected_byte'range generate
selected_byte(i)<=(dbus_rdata(i) and sel(0)) or
(dbus_rdata(i+8) and sel(1)) or
(dbus_rdata(i+16) and sel(2)) or
(dbus_rdata(i+24) and sel(3));
end generate;
 
rdata_o<=dbus_rdata when byte_mode='0' else
X"000000"&selected_byte when selected_byte(selected_byte'high)='0' or sig='0' else
X"FFFFFF"&selected_byte;
 
we_o<=we_out;
busy_o<=strobe or we_out;
 
/rtl/lxp32_ubuf.vhd
42,6 → 42,8
signal regs: regs_type;
signal regs_mux: regs_type;
 
signal wpointer: std_logic_vector(2 downto 0):="001";
 
begin
 
we<=we_i and not full;
51,23 → 53,23
begin
if rising_edge(clk_i) then
if rst_i='1' then
wpointer<="001";
empty<='1';
full<='0';
regs<=(others=>(others=>'-'));
else
if re='0' then
regs(0)<=regs_mux(0);
regs<=regs_mux;
else
regs(0)<=regs_mux(1);
end if;
regs(1)<=regs_mux(1);
if we='1' and re='0' then
wpointer<=wpointer(1 downto 0)&"0";
empty<='0';
full<=not empty;
full<=wpointer(1);
elsif we='0' and re='1' then
empty<=not full;
wpointer<="0"&wpointer(2 downto 1);
empty<=wpointer(1);
full<='0';
end if;
end if;
74,8 → 76,9
end if;
end process;
 
regs_mux(0)<=regs(0) when we='0' or empty='0' else d_i;
regs_mux(1)<=regs(1) when we='0' or empty='1' else d_i;
mux: for i in regs_mux'range generate
regs_mux(i)<=regs(i) when we='0' or wpointer(i)='0' else d_i;
end generate;
 
d_o<=regs(0);
empty_o<=empty;
/rtl/lxp32_execute.vhd
34,6 → 34,7
cmd_jump_i: in std_logic;
cmd_negate_op2_i: in std_logic;
cmd_and_i: in std_logic;
cmd_or_i: in std_logic;
cmd_xor_i: in std_logic;
cmd_shift_i: in std_logic;
cmd_shift_right_i: in std_logic;
65,7 → 66,9
jump_dst_o: out std_logic_vector(29 downto 0);
jump_ready_i: in std_logic;
interrupt_return_o: out std_logic
interrupt_return_o: out std_logic;
interrupts_enabled_o: out std_logic_vector(7 downto 0);
interrupts_blocked_o: out std_logic_vector(7 downto 0)
);
end entity;
 
113,6 → 116,8
-- Signals related to interrupt handling
 
signal interrupt_return: std_logic:='0';
signal interrupts_enabled: std_logic_vector(7 downto 0):=(others=>'0');
signal interrupts_blocked: std_logic_vector(7 downto 0):=(others=>'0');
 
begin
 
143,6 → 148,7
cmd_cmp_i=>cmd_cmp_i,
cmd_negate_op2_i=>cmd_negate_op2_i,
cmd_and_i=>cmd_and_i,
cmd_or_i=>cmd_or_i,
cmd_xor_i=>cmd_xor_i,
cmd_shift_i=>cmd_shift_i,
cmd_shift_right_i=>cmd_shift_right_i,
176,12 → 182,11
if rst_i='1' then
jump_valid<='0';
interrupt_return<='0';
jump_dst<=(others=>'-');
else
if jump_valid='0' then
jump_dst<=op1_i(31 downto 2);
if can_execute='1' and cmd_jump_i='1' and jump_condition='1' then
jump_valid<='1';
jump_dst<=op1_i(31 downto 2);
interrupt_return<=op1_i(0);
end if;
elsif jump_ready_i='1' then
257,4 → 262,22
sp_waddr_o<=result_regaddr;
sp_wdata_o<=result_mux;
 
process (clk_i) is
begin
if rising_edge(clk_i) then
if rst_i='1' then
interrupts_enabled<=(others=>'0');
interrupts_blocked<=(others=>'0');
else
if result_valid='1' and result_regaddr=X"FC" then
interrupts_enabled<=result_mux(7 downto 0);
interrupts_blocked<=result_mux(15 downto 8);
end if;
end if;
end if;
end process;
 
interrupts_enabled_o<=interrupts_enabled;
interrupts_blocked_o<=interrupts_blocked;
 
end architecture;
/rtl/lxp32_mul_dsp.vhd
28,9 → 28,9
 
architecture rtl of lxp32_mul_dsp is
 
signal pp00: std_logic_vector(31 downto 0);
signal pp01: std_logic_vector(31 downto 0);
signal pp10: std_logic_vector(31 downto 0);
signal pp00: unsigned(31 downto 0);
signal pp01: unsigned(31 downto 0);
signal pp10: unsigned(31 downto 0);
 
signal product: unsigned(31 downto 0);
 
43,7 → 43,7
clk_i=>clk_i,
a_i=>op1_i(15 downto 0),
b_i=>op2_i(15 downto 0),
p_o=>pp00
unsigned(p_o)=>pp00
);
 
mul01_inst: entity work.lxp32_mul16x16
51,7 → 51,7
clk_i=>clk_i,
a_i=>op1_i(15 downto 0),
b_i=>op2_i(31 downto 16),
p_o=>pp01
unsigned(p_o)=>pp01
);
 
mul10_inst: entity work.lxp32_mul16x16
59,11 → 59,11
clk_i=>clk_i,
a_i=>op1_i(31 downto 16),
b_i=>op2_i(15 downto 0),
p_o=>pp10
unsigned(p_o)=>pp10
);
 
product(31 downto 16)<=unsigned(pp00(31 downto 16))+unsigned(pp01(15 downto 0))+unsigned(pp10(15 downto 0));
product(15 downto 0)<=unsigned(pp00(15 downto 0));
product(31 downto 16)<=pp00(31 downto 16)+pp01(15 downto 0)+pp10(15 downto 0);
product(15 downto 0)<=pp00(15 downto 0);
result_o<=std_logic_vector(product);
 
process (clk_i) is
/rtl/lxp32_shifter.vhd
68,10 → 68,6
if rising_edge(clk_i) then
if rst_i='1' then
ceo<='0';
stage2_data<=(others=>'-');
stage2_s<=(others=>'-');
stage2_fill<='-';
stage2_right<='-';
else
ceo<=ce_i;
stage2_data<=cascades(2);
/rtl/lxp32_mul_seq.vhd
44,27 → 44,20
if rst_i='1' then
ceo<='0';
cnt<=0;
reg1<=(others=>'-');
reg2<=(others=>'-');
acc_sum<=(others=>'-');
else
if cnt=1 then
ceo<='1';
else
ceo<='0';
end if;
ceo<='0';
if ce_i='1' then
cnt<=32;
reg1<=unsigned(op1_i);
reg2<=unsigned(op2_i);
acc_sum<=(others=>'0');
else
elsif cnt>0 then
acc_sum<=acc_sum+pp;
reg1<=reg1(reg1'high-1 downto 0)&"0";
reg2<="0"&reg2(reg2'high downto 1);
if cnt>0 then
cnt<=cnt-1;
cnt<=cnt-1;
if cnt=1 then
ceo<='1';
end if;
end if;
end if;
/rtl/lxp32c_top.vhd
32,7 → 32,7
IBUS_BURST_SIZE: integer:=16;
IBUS_PREFETCH_SIZE: integer:=32;
MUL_ARCH: string:="dsp";
START_ADDR: std_logic_vector(31 downto 0):=(others=>'0')
START_ADDR: std_logic_vector(29 downto 0):=(others=>'0')
);
port(
clk_i: in std_logic;
/LICENSE.md
1,4 → 1,4
Copyright (c) 2016-2019 by Alex I. Kuznetsov
Copyright (c) 2016 by Alex I. Kuznetsov
 
Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions:
 
/verify/lxp32/run/xsim/Makefile
39,8 → 39,8
# Normal targets
########################
 
compile.stamp: $(LXP32_RTL) $(COMMON_SRC) $(PLATFORM_RTL) $(TB_SRC) $(FIRMWARE)
xvhdl$(BAT) $(LXP32_RTL) $(COMMON_SRC) $(PLATFORM_RTL) $(TB_SRC)
compile.stamp: $(LXP32_RTL) $(PLATFORM_RTL) $(TB_SRC) $(FIRMWARE)
xvhdl$(BAT) $(LXP32_RTL) $(PLATFORM_RTL) $(TB_SRC)
xelab$(BAT) work.tb -s tb_sim -debug typical
echo > compile.stamp
 
/verify/lxp32/run/ghdl/Makefile
13,7 → 13,7
 
.PHONY: all compile batch gui clean
 
.PRECIOUS: $(WAVE_OUT) $(WAVE_VCD)
.PRECIOUS: $(WAVE_OUT)
 
compile: compile.stamp $(FIRMWARE)
 
42,8 → 42,8
$(WAVE_VCD): compile.stamp $(FIRMWARE)
ghdl -r $(GHDL_FLAGS) $(TB_MOD) --vcd=$(WAVE_VCD)
 
compile.stamp: $(LXP32_RTL) $(COMMON_SRC) $(PLATFORM_RTL) $(TB_SRC)
ghdl -a $(GHDL_FLAGS) $(LXP32_RTL) $(COMMON_SRC) $(PLATFORM_RTL) $(TB_SRC)
compile.stamp: $(LXP32_RTL) $(PLATFORM_RTL) $(TB_SRC)
ghdl -a $(GHDL_FLAGS) $(LXP32_RTL) $(PLATFORM_RTL) $(TB_SRC)
ghdl -e $(GHDL_FLAGS) $(TB_MOD)
echo > compile.stamp
 
/verify/lxp32/run/vsim/Makefile
31,8 → 31,8
# Normal targets
########################
 
compile.stamp: $(LXP32_RTL) $(COMMON_SRC) $(PLATFORM_RTL) $(TB_SRC) $(FIRMWARE) | work
vcom $(VCOMFLAGS) $(LXP32_RTL) $(COMMON_SRC) $(PLATFORM_RTL) $(TB_SRC)
compile.stamp: $(LXP32_RTL) $(PLATFORM_RTL) $(TB_SRC) $(FIRMWARE) | work
vcom $(VCOMFLAGS) $(LXP32_RTL) $(PLATFORM_RTL) $(TB_SRC)
echo > compile.stamp
 
work:
/verify/lxp32/src/platform/generic_dpram.vhd
49,7 → 49,7
signal ram: ram_type;
 
attribute syn_ramstyle: string;
attribute syn_ramstyle of ram: signal is "no_rw_check";
attribute syn_ramstyle of ram: signal is "block_ram,no_rw_check";
attribute ram_style: string; -- for Xilinx
attribute ram_style of ram: signal is "block";
 
76,11 → 76,7
ram(to_integer(unsigned(addra_i)))<=da_i;
da_o<=(others=>'-');
else
if is_x(addra_i) then
da_o<=(others=>'X');
else
da_o<=ram(to_integer(unsigned(addra_i)));
end if;
da_o<=ram(to_integer(to_01(unsigned(addra_i))));
end if;
end if;
end if;
96,11 → 92,7
ram(to_integer(unsigned(addra_i)))<=da_i;
da_o<=da_i;
else
if is_x(addra_i) then
da_o<=(others=>'X');
else
da_o<=ram(to_integer(unsigned(addra_i)));
end if;
da_o<=ram(to_integer(to_01(unsigned(addra_i))));
end if;
end if;
end if;
115,11 → 107,7
if wea_i='1' then
ram(to_integer(unsigned(addra_i)))<=da_i;
end if;
if is_x(addra_i) then
da_o<=(others=>'X');
else
da_o<=ram(to_integer(unsigned(addra_i)));
end if;
da_o<=ram(to_integer(to_01(unsigned(addra_i))));
end if;
end if;
end process;
133,11 → 121,7
if wea_i='1' then
ram(to_integer(unsigned(addra_i)))<=da_i;
else
if is_x(addra_i) then
da_o<=(others=>'X');
else
da_o<=ram(to_integer(unsigned(addra_i)));
end if;
da_o<=ram(to_integer(to_01(unsigned(addra_i))));
end if;
end if;
end if;
150,11 → 134,7
begin
if rising_edge(clkb_i) then
if ceb_i='1' then
if is_x(addrb_i) then
db_o<=(others=>'X');
else
db_o<=ram(to_integer(unsigned(addrb_i)));
end if;
db_o<=ram(to_integer(to_01(unsigned(addrb_i))));
end if;
end if;
end process;
/verify/lxp32/src/platform/dbus_monitor.vhd
47,9 → 47,6
signal prbs: std_logic;
signal cycle: std_logic:='0';
 
signal cyc_ff: std_logic:='0';
signal ack_ff: std_logic:='0';
 
begin
 
-- Manage throttling
91,27 → 88,8
wbm_adr_o<=wbs_adr_i;
wbm_dat_o<=wbs_dat_i;
 
-- Check handshake correctness
assert not rising_edge(clk_i) or wbm_ack_i='0' or (wbs_cyc_i and (not prbs or cycle))='1'
report "DBUS error: ACK asserted without CYC"
severity failure;
 
process (clk_i) is
begin
if rising_edge(clk_i) then
if rst_i='1' then
cyc_ff<='0';
ack_ff<='0';
else
cyc_ff<=wbs_cyc_i;
ack_ff<=wbm_ack_i;
assert wbm_ack_i='0' or (wbs_cyc_i and (not prbs or cycle))='1'
report "DBUS error: ACK asserted without CYC"
severity failure;
assert not (wbs_cyc_i='0' and cyc_ff='1' and ack_ff/='1')
report "DBUS error: cycle terminated prematurely"
severity failure;
end if;
end if;
end process;
 
end architecture;
/verify/lxp32/src/platform/platform.vhd
17,8 → 17,6
 
entity platform is
generic(
CPU_DBUS_RMW: boolean;
CPU_MUL_ARCH: string;
MODEL_LXP32C: boolean;
THROTTLE_DBUS: boolean;
THROTTLE_IBUS: boolean
180,9 → 178,9
gen_lxp32u: if not MODEL_LXP32C generate
lxp32u_top_inst: entity work.lxp32u_top(rtl)
generic map(
DBUS_RMW=>CPU_DBUS_RMW,
DBUS_RMW=>false,
DIVIDER_EN=>true,
MUL_ARCH=>CPU_MUL_ARCH,
MUL_ARCH=>"dsp",
START_ADDR=>(others=>'0')
)
port map(
210,11 → 208,11
gen_lxp32c: if MODEL_LXP32C generate
lxp32c_top_inst: entity work.lxp32c_top(rtl)
generic map(
DBUS_RMW=>CPU_DBUS_RMW,
DBUS_RMW=>false,
DIVIDER_EN=>true,
IBUS_BURST_SIZE=>16,
IBUS_PREFETCH_SIZE=>32,
MUL_ARCH=>CPU_MUL_ARCH,
MUL_ARCH=>"dsp",
START_ADDR=>(others=>'0')
)
port map(
/verify/lxp32/src/platform/program_ram.vhd
16,8 → 16,6
library ieee;
use ieee.std_logic_1164.all;
 
use work.common_pkg.all;
 
entity program_ram is
generic(
THROTTLE: boolean
93,7 → 91,7
process (clk_i) is
begin
if rising_edge(clk_i) then
ack_read<=wbs_cyc_i and wbs_stb_i and not wbs_we_i and not ack_read;
ack_read<=wbs_cyc_i and wbs_stb_i and not wbs_we_i;
end if;
end process;
 
104,16 → 102,6
 
-- Low Latency Interface (with optional pseudo-random throttling)
 
process (clk_i) is
begin
if rising_edge(clk_i) then
assert lli_re_i='0' or lli_adr_i(lli_adr_i'high downto 14)=X"0000"
report "Attempted to fetch instruction from a non-existent address 0x"&
hex_string(lli_adr_i&"00")
severity failure;
end if;
end process;
 
gen_throttling: if THROTTLE generate
throttle_inst: entity work.scrambler(rtl)
generic map(TAP1=>9,TAP2=>11)
/verify/lxp32/src/tb/tb.vhd
8,8 → 8,6
-- Simulates LXP32 test platform, verifies results.
--
-- Parameters:
-- CPU_DBUS_RMW: DBUS_RMW CPU generic
-- CPU_MUL_ARCH: MUL_ARCH CPU generic
-- MODEL_LXP32C: when true, simulates LXP32C variant (with
-- instruction cache), otherwise LXP32U
-- TEST_CASE: If non-empty, selects a test case to run.
31,8 → 29,6
 
entity tb is
generic(
CPU_DBUS_RMW: boolean:=false;
CPU_MUL_ARCH: string:="dsp";
MODEL_LXP32C: boolean:=true;
TEST_CASE: string:="";
THROTTLE_DBUS: boolean:=true;
59,8 → 55,6
 
dut: entity work.platform(rtl)
generic map(
CPU_DBUS_RMW=>CPU_DBUS_RMW,
CPU_MUL_ARCH=>CPU_MUL_ARCH,
MODEL_LXP32C=>MODEL_LXP32C,
THROTTLE_DBUS=>THROTTLE_DBUS,
THROTTLE_IBUS=>THROTTLE_IBUS
131,10 → 125,6
run_test("test014.ram",clk,globals,soc_wbs_in,soc_wbs_out,monitor_out);
run_test("test015.ram",clk,globals,soc_wbs_in,soc_wbs_out,monitor_out);
run_test("test016.ram",clk,globals,soc_wbs_in,soc_wbs_out,monitor_out);
run_test("test017.ram",clk,globals,soc_wbs_in,soc_wbs_out,monitor_out);
run_test("test018.ram",clk,globals,soc_wbs_in,soc_wbs_out,monitor_out);
run_test("test019.ram",clk,globals,soc_wbs_in,soc_wbs_out,monitor_out);
run_test("test020.ram",clk,globals,soc_wbs_in,soc_wbs_out,monitor_out);
else
run_test(TEST_CASE,clk,globals,soc_wbs_in,soc_wbs_out,monitor_out);
end if;
/verify/lxp32/src/firmware/test001.asm
6,267 → 6,13
lc r100, 0x10000000 // test result output pointer
lc r101, halt
lc r102, bad_jump
lc r103, reg_is_nonzero
// Check that all registers are zero-initialized after reset
// Ignore r100-r103 which are already used
cjmpne r103, r0, 0
cjmpne r103, r1, 0
cjmpne r103, r2, 0
cjmpne r103, r3, 0
cjmpne r103, r4, 0
cjmpne r103, r5, 0
cjmpne r103, r6, 0
cjmpne r103, r7, 0
cjmpne r103, r8, 0
cjmpne r103, r9, 0
cjmpne r103, r10, 0
cjmpne r103, r11, 0
cjmpne r103, r12, 0
cjmpne r103, r13, 0
cjmpne r103, r14, 0
cjmpne r103, r15, 0
cjmpne r103, r16, 0
cjmpne r103, r17, 0
cjmpne r103, r18, 0
cjmpne r103, r19, 0
cjmpne r103, r20, 0
cjmpne r103, r21, 0
cjmpne r103, r22, 0
cjmpne r103, r23, 0
cjmpne r103, r24, 0
cjmpne r103, r25, 0
cjmpne r103, r26, 0
cjmpne r103, r27, 0
cjmpne r103, r28, 0
cjmpne r103, r29, 0
cjmpne r103, r30, 0
cjmpne r103, r31, 0
cjmpne r103, r32, 0
cjmpne r103, r33, 0
cjmpne r103, r34, 0
cjmpne r103, r35, 0
cjmpne r103, r36, 0
cjmpne r103, r37, 0
cjmpne r103, r38, 0
cjmpne r103, r39, 0
cjmpne r103, r40, 0
cjmpne r103, r41, 0
cjmpne r103, r42, 0
cjmpne r103, r43, 0
cjmpne r103, r44, 0
cjmpne r103, r45, 0
cjmpne r103, r46, 0
cjmpne r103, r47, 0
cjmpne r103, r48, 0
cjmpne r103, r49, 0
cjmpne r103, r50, 0
cjmpne r103, r51, 0
cjmpne r103, r52, 0
cjmpne r103, r53, 0
cjmpne r103, r54, 0
cjmpne r103, r55, 0
cjmpne r103, r56, 0
cjmpne r103, r57, 0
cjmpne r103, r58, 0
cjmpne r103, r59, 0
cjmpne r103, r60, 0
cjmpne r103, r61, 0
cjmpne r103, r62, 0
cjmpne r103, r63, 0
cjmpne r103, r64, 0
cjmpne r103, r65, 0
cjmpne r103, r66, 0
cjmpne r103, r67, 0
cjmpne r103, r68, 0
cjmpne r103, r69, 0
cjmpne r103, r70, 0
cjmpne r103, r71, 0
cjmpne r103, r72, 0
cjmpne r103, r73, 0
cjmpne r103, r74, 0
cjmpne r103, r75, 0
cjmpne r103, r76, 0
cjmpne r103, r77, 0
cjmpne r103, r78, 0
cjmpne r103, r79, 0
cjmpne r103, r80, 0
cjmpne r103, r81, 0
cjmpne r103, r82, 0
cjmpne r103, r83, 0
cjmpne r103, r84, 0
cjmpne r103, r85, 0
cjmpne r103, r86, 0
cjmpne r103, r87, 0
cjmpne r103, r88, 0
cjmpne r103, r89, 0
cjmpne r103, r90, 0
cjmpne r103, r91, 0
cjmpne r103, r92, 0
cjmpne r103, r93, 0
cjmpne r103, r94, 0
cjmpne r103, r95, 0
cjmpne r103, r96, 0
cjmpne r103, r97, 0
cjmpne r103, r98, 0
cjmpne r103, r99, 0
cjmpne r103, r104, 0
cjmpne r103, r105, 0
cjmpne r103, r106, 0
cjmpne r103, r107, 0
cjmpne r103, r108, 0
cjmpne r103, r109, 0
cjmpne r103, r110, 0
cjmpne r103, r111, 0
cjmpne r103, r112, 0
cjmpne r103, r113, 0
cjmpne r103, r114, 0
cjmpne r103, r115, 0
cjmpne r103, r116, 0
cjmpne r103, r117, 0
cjmpne r103, r118, 0
cjmpne r103, r119, 0
cjmpne r103, r120, 0
cjmpne r103, r121, 0
cjmpne r103, r122, 0
cjmpne r103, r123, 0
cjmpne r103, r124, 0
cjmpne r103, r125, 0
cjmpne r103, r126, 0
cjmpne r103, r127, 0
cjmpne r103, r128, 0
cjmpne r103, r129, 0
cjmpne r103, r130, 0
cjmpne r103, r131, 0
cjmpne r103, r132, 0
cjmpne r103, r133, 0
cjmpne r103, r134, 0
cjmpne r103, r135, 0
cjmpne r103, r136, 0
cjmpne r103, r137, 0
cjmpne r103, r138, 0
cjmpne r103, r139, 0
cjmpne r103, r140, 0
cjmpne r103, r141, 0
cjmpne r103, r142, 0
cjmpne r103, r143, 0
cjmpne r103, r144, 0
cjmpne r103, r145, 0
cjmpne r103, r146, 0
cjmpne r103, r147, 0
cjmpne r103, r148, 0
cjmpne r103, r149, 0
cjmpne r103, r150, 0
cjmpne r103, r151, 0
cjmpne r103, r152, 0
cjmpne r103, r153, 0
cjmpne r103, r154, 0
cjmpne r103, r155, 0
cjmpne r103, r156, 0
cjmpne r103, r157, 0
cjmpne r103, r158, 0
cjmpne r103, r159, 0
cjmpne r103, r160, 0
cjmpne r103, r161, 0
cjmpne r103, r162, 0
cjmpne r103, r163, 0
cjmpne r103, r164, 0
cjmpne r103, r165, 0
cjmpne r103, r166, 0
cjmpne r103, r167, 0
cjmpne r103, r168, 0
cjmpne r103, r169, 0
cjmpne r103, r170, 0
cjmpne r103, r171, 0
cjmpne r103, r172, 0
cjmpne r103, r173, 0
cjmpne r103, r174, 0
cjmpne r103, r175, 0
cjmpne r103, r176, 0
cjmpne r103, r177, 0
cjmpne r103, r178, 0
cjmpne r103, r179, 0
cjmpne r103, r180, 0
cjmpne r103, r181, 0
cjmpne r103, r182, 0
cjmpne r103, r183, 0
cjmpne r103, r184, 0
cjmpne r103, r185, 0
cjmpne r103, r186, 0
cjmpne r103, r187, 0
cjmpne r103, r188, 0
cjmpne r103, r189, 0
cjmpne r103, r190, 0
cjmpne r103, r191, 0
cjmpne r103, r192, 0
cjmpne r103, r193, 0
cjmpne r103, r194, 0
cjmpne r103, r195, 0
cjmpne r103, r196, 0
cjmpne r103, r197, 0
cjmpne r103, r198, 0
cjmpne r103, r199, 0
cjmpne r103, r200, 0
cjmpne r103, r201, 0
cjmpne r103, r202, 0
cjmpne r103, r203, 0
cjmpne r103, r204, 0
cjmpne r103, r205, 0
cjmpne r103, r206, 0
cjmpne r103, r207, 0
cjmpne r103, r208, 0
cjmpne r103, r209, 0
cjmpne r103, r210, 0
cjmpne r103, r211, 0
cjmpne r103, r212, 0
cjmpne r103, r213, 0
cjmpne r103, r214, 0
cjmpne r103, r215, 0
cjmpne r103, r216, 0
cjmpne r103, r217, 0
cjmpne r103, r218, 0
cjmpne r103, r219, 0
cjmpne r103, r220, 0
cjmpne r103, r221, 0
cjmpne r103, r222, 0
cjmpne r103, r223, 0
cjmpne r103, r224, 0
cjmpne r103, r225, 0
cjmpne r103, r226, 0
cjmpne r103, r227, 0
cjmpne r103, r228, 0
cjmpne r103, r229, 0
cjmpne r103, r230, 0
cjmpne r103, r231, 0
cjmpne r103, r232, 0
cjmpne r103, r233, 0
cjmpne r103, r234, 0
cjmpne r103, r235, 0
cjmpne r103, r236, 0
cjmpne r103, r237, 0
cjmpne r103, r238, 0
cjmpne r103, r239, 0
cjmpne r103, r240, 0
cjmpne r103, r241, 0
cjmpne r103, r242, 0
cjmpne r103, r243, 0
cjmpne r103, r244, 0
cjmpne r103, r245, 0
cjmpne r103, r246, 0
cjmpne r103, r247, 0
cjmpne r103, r248, 0
cjmpne r103, r249, 0
cjmpne r103, r250, 0
cjmpne r103, r251, 0
cjmpne r103, r252, 0
cjmpne r103, r253, 0
cjmpne r103, r254, 0
cjmpne r103, r255, 0
// All registers should be zero-initialized after reset
lc r0, jump0
jmp r0
add r1, r1, 1
cjmpe r0, r1, 1
reg_is_nonzero:
sw r100, 2 // failure: register is not initialized
sw r100, 2 // failure: r1 not initialized
jmp r101
// Test different jump conditions
428,7 → 174,7
// Copy itself to another portion of memory
mov r0, 0 // source pointer
lc r1, 0x00008000 // destination pointer
lc r2, halt@2 // size of block to copy, in bytes
lc r2, end // size of block to copy, in bytes
lc r32, copy_loop
copy_loop:
480,3 → 226,5
halt:
hlt
jmp r101
 
end:
/verify/lxp32/src/make/sources.make
22,11 → 22,6
$(LXP32_DIR)/lxp32_icache.vhd\
$(LXP32_DIR)/lxp32c_top.vhd
 
# Common package
 
COMMON_PKG_DIR=../../../common_pkg
COMMON_SRC=$(COMMON_PKG_DIR)/common_pkg.vhd $(COMMON_PKG_DIR)/common_pkg_body.vhd
 
# Platform RTL
 
PLATFORM_DIR=../../src/platform
44,7 → 39,9
 
COMMON_PKG_DIR=../../../common_pkg
TB_DIR=../../src/tb
TB_SRC=$(TB_DIR)/tb_pkg.vhd\
TB_SRC=$(COMMON_PKG_DIR)/common_pkg.vhd\
$(COMMON_PKG_DIR)/common_pkg_body.vhd\
$(TB_DIR)/tb_pkg.vhd\
$(TB_DIR)/tb_pkg_body.vhd\
$(TB_DIR)/monitor.vhd\
$(TB_DIR)/tb.vhd
69,11 → 66,7
test013.ram\
test014.ram\
test015.ram\
test016.ram\
test017.ram\
test018.ram\
test019.ram\
test020.ram
test016.ram
 
# LXP32 assembler executable
 
/verify/common_pkg/common_pkg_body.vhd
6,20 → 6,26
-- Copyright (c) 2016 by Alex I. Kuznetsov
---------------------------------------------------------------------
 
use std.textio.all;
 
library ieee;
use ieee.std_logic_1164.all;
use ieee.numeric_std.all;
use ieee.math_real.all;
 
package body common_pkg is
procedure rand(variable st: inout rng_state_type; a,b: integer; variable x: out integer) is
variable r: real;
impure function rand return integer is
variable r: unsigned(63 downto 0);
begin
r:=rand_state*to_unsigned(1103515245,32)+12345;
rand_state:=r(rand_state'range);
return to_integer(rand_state(30 downto 16));
end function;
impure function rand(a: integer; b: integer) return integer is
begin
assert a<=b report "Invalid range" severity failure;
uniform(st.seed1,st.seed2,r);
r:=r*real(b-a+1);
x:=a+integer(floor(r));
end procedure;
return (rand mod (b-a+1))+a;
end function;
function hex_string(x: std_logic_vector) return string is
variable xx: std_logic_vector(x'length-1 downto 0);
/verify/common_pkg/common_pkg.vhd
4,6 → 4,10
-- Part of the LXP32 verification environment
--
-- Copyright (c) 2016 by Alex I. Kuznetsov
--
-- Note: the "rand" function declared in this package implements
-- a linear congruent pseudo-random number generator as defined in
-- the ISO/IEC 9899:1999 standard.
---------------------------------------------------------------------
 
library ieee;
11,16 → 15,10
use ieee.numeric_std.all;
 
package common_pkg is
type rng_state_type is record
seed1: positive;
seed2: positive;
end record;
 
-- Generate a pseudo-random value of integer type from [a;b] range
-- Output is stored in x
procedure rand(variable st: inout rng_state_type; a,b: integer; variable x: out integer);
shared variable rand_state: unsigned(31 downto 0):=to_unsigned(1,32);
-- Convert std_logic_vector to a hexadecimal string (similar to
-- the "to_hstring" function from VHDL-2008
impure function rand return integer;
impure function rand(a: integer; b: integer) return integer;
function hex_string(x: std_logic_vector) return string;
end package;
/verify/icache/run/xsim/Makefile
38,7 → 38,7
# Normal targets
########################
 
compile.stamp: $(LXP32_RTL) $(COMMON_SRC) $(TB_SRC)
xvhdl$(BAT) $(LXP32_RTL) $(COMMON_SRC) $(TB_SRC)
compile.stamp: $(LXP32_RTL) $(TB_SRC)
xvhdl$(BAT) $(LXP32_RTL) $(TB_SRC)
xelab$(BAT) work.tb -s tb_sim -debug typical
echo > compile.stamp
/verify/icache/run/ghdl/Makefile
41,7 → 41,7
$(WAVE_VCD): compile.stamp
ghdl -r $(GHDL_FLAGS) $(TB_MOD) --vcd=$(WAVE_VCD)
 
compile.stamp: $(LXP32_RTL) $(COMMON_SRC) $(TB_SRC)
ghdl -a $(GHDL_FLAGS) $(LXP32_RTL) $(COMMON_SRC) $(TB_SRC)
compile.stamp: $(LXP32_RTL) $(TB_SRC)
ghdl -a $(GHDL_FLAGS) $(LXP32_RTL) $(TB_SRC)
ghdl -e $(GHDL_FLAGS) $(TB_MOD)
echo > compile.stamp
/verify/icache/run/vsim/Makefile
30,8 → 30,8
# Normal targets
########################
 
compile.stamp: $(LXP32_RTL) $(COMMON_SRC) $(TB_SRC) | work
vcom $(VCOMFLAGS) $(LXP32_RTL) $(COMMON_SRC) $(TB_SRC)
compile.stamp: $(LXP32_RTL) $(TB_SRC) | work
vcom $(VCOMFLAGS) $(LXP32_RTL) $(TB_SRC)
echo > compile.stamp
 
work:
/verify/icache/src/tb/ram_model.vhd
51,7 → 51,6
end process;
 
process is
variable rng_state: rng_state_type;
variable delay: integer;
begin
wait until rising_edge(clk_i) and wbm_cyc_i='1' and wbm_stb_i='1';
59,7 → 58,7
-- Random delay before the first beat
if cycle='0' then
rand(rng_state,0,3,delay);
delay:=rand(0,3);
if delay>0 then
for i in 1 to delay loop
wait until rising_edge(clk_i) and wbm_cyc_i='1' and wbm_stb_i='1';
/verify/icache/src/tb/cpu_model.vhd
45,10 → 45,11
 
signal finish: std_logic:='0';
 
signal current_latency: integer:=1;
signal max_latency: integer:=-1;
signal total_latency: integer:=0;
signal spurious_misses: integer:=0;
shared variable current_latency: integer:=1;
shared variable max_latency: integer:=-1;
shared variable total_latency: integer:=0;
shared variable total_requests: integer:=0;
shared variable spurious_misses: integer:=0;
 
begin
 
58,21 → 59,16
variable size: integer;
variable addr: integer:=0;
variable delay: integer;
variable rng_state: rng_state_type;
variable r: integer;
variable total_requests: integer:=0;
begin
while b<=BLOCKS loop
rand(rng_state,1,10,r);
if r=1 then -- insert large block occasionally
rand(rng_state,1,400,size);
if rand(1,10)=1 then -- insert large block occasionally
size:=rand(1,400);
else -- small block
rand(rng_state,1,32,size);
size:=rand(1,32);
end if;
rand(rng_state,0,1,r);
if r=0 then -- long jump
rand(rng_state,0,1024,start);
if rand(0,1)=0 then -- long jump
start:=rand(0,1024);
addr:=start;
if VERBOSE then
report "Fetching block #"&integer'image(b)&" at address "&integer'image(addr)&
79,8 → 75,7
" of size "&integer'image(size);
end if;
else -- short jump
rand(rng_state,-10,10,r);
start:=addr+r;
start:=addr+rand(0,20)-10;
if start<0 then
start:=0;
end if;
98,7 → 93,7
wait until rising_edge(clk_i) and lli_busy_i='0';
re<='0';
addr:=addr+1;
rand(rng_state,0,4,delay);
delay:=rand(0,4);
if delay>0 then
for i in 1 to delay loop
wait until rising_edge(clk_i);
152,28 → 147,23
if rising_edge(clk_i) then
if lli_busy_i='0' then
if request='1' then
total_latency<=total_latency+current_latency;
total_latency:=total_latency+current_latency;
if current_latency>max_latency then
max_latency<=current_latency;
max_latency:=current_latency;
end if;
end if;
current_latency<=1;
current_latency:=1;
else
if lli_dat_i=(("00"&request_addr) xor xor_constant) and current_latency=1 then
spurious_misses<=spurious_misses+1;
spurious_misses:=spurious_misses+1;
end if;
current_latency<=current_latency+1;
current_latency:=current_latency+1;
end if;
end if;
end process;
 
process (clk_i) is
begin
if rising_edge(clk_i) then
assert lli_busy_i='0' or request='1'
report "LLI busy signal asserted without a request"
severity failure;
end if;
end process;
assert not rising_edge(clk_i) or lli_busy_i='0' or request='1'
report "LLI busy signal asserted without a request"
severity failure;
 
end architecture;
/verify/icache/src/make/sources.make
4,15 → 4,13
LXP32_RTL=$(LXP32_DIR)/lxp32_ram256x32.vhd\
$(LXP32_DIR)/lxp32_icache.vhd
 
# Common package
# Testbench sources
 
COMMON_PKG_DIR=../../../common_pkg
COMMON_SRC=$(COMMON_PKG_DIR)/common_pkg.vhd $(COMMON_PKG_DIR)/common_pkg_body.vhd
 
# Testbench sources
 
TB_DIR=../../src/tb
TB_SRC=$(TB_DIR)/tb_pkg.vhd\
TB_SRC=$(COMMON_PKG_DIR)/common_pkg.vhd\
$(COMMON_PKG_DIR)/common_pkg_body.vhd\
$(TB_DIR)/tb_pkg.vhd\
$(TB_DIR)/cpu_model.vhd\
$(TB_DIR)/ram_model.vhd\
$(TB_DIR)/tb.vhd
/doc/lxp32-trm.pdf Cannot display: file marked as a binary type. svn:mime-type = application/octet-stream
/doc/src/trm/frontmatter.tex
15,7 → 15,7
\Large a lightweight open source 32-bit CPU core\par
\LARGE \textbf{Technical Reference Manual}\par
\vspace{1.2\onelineskip}
\large Version 1.1\par
\large Version 1.0\par
\vspace*{4\onelineskip}
\end{center}
\vspace*{\fill}
34,7 → 34,7
 
\vspace*{\fill}
 
Copyright \textcopyright{} 2016--2019 by Alex I. Kuznetsov.
Copyright \textcopyright{} 2016 by Alex I. Kuznetsov.
 
The entire \lxp{} IP core package, including the synthesizable RTL description, verification environment, documentation and software tools, is distributed under the terms of the MIT license reproduced below:
 
46,6 → 46,8
 
\vspace{4\baselineskip}
 
Altera and Cyclone are trademarks of Altera Corporation and registered in the U.S. Patent and Trademark Office and in other countries.
 
Mentor Graphics and ModelSim are trademarks of Mentor Graphics Corporation.
 
Microsemi and IGLOO are trademarks of Microsemi Corporation.
/doc/src/trm/preamble.tex
1,5 → 1,4
\usepackage{microtype}
\usepackage{graphicx}
\usepackage{alltt}
\usepackage{amsmath}
\usepackage[charter]{mathdesign}
/doc/src/trm/lxp32-trm.tex
16,18 → 16,26
\lxp{} (\emph{Lightweight eXecution Pipeline}) is a small 32-bit CPU IP core optimized for FPGA implementation. Its key features include:
 
\begin{itemize}
\item portability (described in behavioral VHDL-93, not tied to any particular vendor);
\item 3-stage hazard-free pipeline;
\item described in portable VHDL-93, not tied to any particular vendor;
\item 3-stage pipeline;
\item 256 registers implemented as a RAM block;
\item a simple instruction set with only 30 distinct opcodes;
\item simple instruction set with less than 30 distinct opcodes;
\item separate instruction and data buses, optional instruction cache;
\item WISHBONE compatibility;
\item WISHBONE compatible;
\item 8 interrupts with hardwired priorities;
\item optional divider.
\end{itemize}
 
As a lightweight CPU core, \lxp{} lacks some features of more advanced processors, such as nested interrupt handling, debugging support, floating-point and memory management units. \lxp{} is based on an original ISA (Instruction Set Architecture) which does not currently have a C compiler. It can be programmed in the assembly language covered by Appendix \ref{app:assemblylanguage}.
Being a lightweight IP core, \lxp{} also has certain limitations:
 
\begin{itemize}
\item no branch prediction;
\item no floating-point unit;
\item no memory management unit;
\item no nested interrupt handling;
\item no debugging facilities.
\end{itemize}
 
Two major hardware versions of the CPU are provided: \lxp{}U which does not include an instruction cache and uses the Low Latency Interface (Section \ref{sec:lli}) to fetch instructions, and \lxp{}C which fetches instructions over a cached WISHBONE bus protocol. These versions are otherwise identical and have the same instruction set architecture.
 
\section{Implementation estimates}
50,23 → 58,33
\toprule
Resource & Compact & Full \\
\midrule
\multicolumn{3}{c}{Altera\textregistered{} Cyclone\textregistered{} V 5CEBA2F23C8} \\
\midrule
Logic Array Blocks (LABs) & 79 & 119 \\
\hspace*{1em}ALMs & 630 & 972 \\
\hspace*{2em}ALUTs & 982 & 1531 \\
\hspace*{2em}Flip-flops & 537 & 942 \\
DSP blocks & 3 & 3 \\
RAM blocks (M10K) & 2 & 3 \\
Clock frequency & 103.9 MHz & 98.8 MHz \\
\midrule
\multicolumn{3}{c}{Microsemi\textregistered{} IGLOO\textregistered{}2 M2GL005-FG484} \\
\midrule
Logic elements (LUT+DFF) & 1457 & 2086 \\
\hspace*{1em}LUTs & 1421 & 1999 \\
\hspace*{1em}Flip-flops & 706 & 1110 \\
Logic elements (LUT+DFF) & 1529 & 2226 \\
\hspace*{1em}LUTs & 1471 & 2157 \\
\hspace*{1em}Flip-flops & 718 & 1181 \\
Mathblocks (MACC) & 3 & 3 \\
RAM blocks (RAM1K18) & 2 & 3 \\
Clock frequency & 107.7 MHz & 109.2 MHz \\
Clock frequency & 111.7 MHz & 107.8 MHz \\
\midrule
\multicolumn{3}{c}{Xilinx\textregistered{} Artix\textregistered{}-7 xc7a15tfgg484-1} \\
\midrule
Slices & 235 & 365 \\
\hspace*{1em}LUTs & 666 & 1011 \\
\hspace*{1em}Flip-flops & 528 & 883 \\
Slices & 264 & 381 \\
\hspace*{1em}LUTs & 809 & 1151 \\
\hspace*{1em}Flip-flops & 527 & 923 \\
DSP blocks (DSP48E1) & 4 & 4 \\
RAM blocks (RAMB18E1) & 2 & 3 \\
Clock frequency & 111.9 MHz & 120.2 MHz \\
Clock frequency & 113.6 MHz & 109.3 MHz \\
\bottomrule
\end{tabularx}
\end{table}
75,9 → 93,9
 
General description of the \lxp{} operation from a software developer's point of view can be found in Chapter \ref{ch:isa}, \styledtitleref{ch:isa}. Future versions of the \lxp{} CPU are intended to be at least backwards compatible with this architecture.
 
Topics related to hardware, such as synthesis, implementation and interfacing other IP cores, are covered in Chapter \ref{ch:integration}, \styledtitleref{ch:integration}. A brief description of the \lxp{} pipelined architecture is provided in Chapter \ref{ch:pipeline}, \styledtitleref{ch:pipeline}. The \lxp{} IP core package includes a verification environment (self-checking testbench) which can be used to simulate the design as described in Chapter \ref{ch:simulation}, \styledtitleref{ch:simulation}.
Topics related to hardware, such as synthesis, implementation and interfacing other IP cores, are covered in Chapter \ref{ch:integration}, \styledtitleref{ch:integration}. The \lxp{} IP core package also includes testbenches which can be used to simulate the design as described in Chapter \ref{ch:simulation}, \styledtitleref{ch:simulation}.
 
Documentation for tools shipped with the \lxp{} IP core package (assembler/linker, disassembler and interconnect generator) is provided in Chapter \ref{ch:developmenttools}, \styledtitleref{ch:developmenttools}.
Tools shipped as parts of the \lxp{} IP core package (assembler/linker, disassembler and interconnect generator) are documented in Chapter \ref{ch:developmenttools}, \styledtitleref{ch:developmenttools}.
 
Appendices include a detailed description of the \lxp{} instruction set, instruction cycle counts and \lxp{} assembly language definition. WISHBONE datasheet required by the WISHBONE specification is also provided.
 
189,16 → 207,15
\section{Calling procedures}
\label{sec:callingprocedures}
 
\lxp{} provides a \instr{call} instruction which saves the address of the next instruction in the \code{rp} register and transfers execution to the address stored in the register operand. Return from a procedure is performed by the \code{\instr{jmp} rp} instruction which also has a \instr{ret} alias.
\lxp{} provides a \instr{call} instruction which stores the address of the next instruction in the \code{rp} register and transfers execution to the procedure pointed by \instr{call} operand. Return from a procedure is performed by \code{\instr{jmp} rp} instruction which also has \instr{ret} alias.
 
If a procedure must in turn call a nested procedure itself, the return address in the \code{rp} register will be overwritten by the \instr{call} instruction. Hence, unless it is a tail call (see below), the procedure must save the \code{rp} value somewhere; the most general solution is to use the stack:
If a procedure must in turn call some procedure itself, the return pointer in the \code{rp} register will be overwritten by the \instr{call} instruction. Hence the procedure must save its value somewhere; the most general solution is to use the stack:
 
\begin{codepar}
\instr{sub} sp, sp, 4
\instr{sw} sp, rp
...
\instr{lc} r0, Nested_proc
\instr{call} r0
\instr{call} r1
...
\instr{lw} rp, sp
\instr{add} sp, sp, 4
205,25 → 222,16
\instr{ret}
\end{codepar}
 
Procedures that don't use the \instr{call} instruction (sometimes called \emph{leaf procedures}) don't need to save the \code{rp} value.
Procedures that don't use the \instr{call} instruction (sometimes called \emph{leaf} procedures) don't need to save the \code{rp} value.
 
Since \instr{ret} is just an alias for \code{\instr{jmp} rp}, one can also use \instrname{Compare and Jump} instructions (\instr{cjmp\emph{xxx}}) to perform a conditional procedure return. For example, consider the following procedure which calculates the absolute value of \code{r1}:
Since \instr{ret} is just an alias for \code{\instr{jmp} rp}, one can also use \instrname{Compare and Jump} instructions (\instr{cjmp\emph{xxx}}) to perform a conditional procedure return.
 
\begin{codepar}
Abs_proc:
\instr{cjmpsge} rp, r1, 0 \emph{// return immediately if r1>=0}
\instr{neg} r1, r1 \emph{// otherwise, negate r1}
\instr{ret} \emph{// jmp rp}
\end{codepar}
 
A \emph{tail call} is a special type of procedure call where the calling procedure calls a nested procedure as the last action before return. In such cases the \instr{call} instruction can be replaced with \instr{jmp}, so that when the nested procedure executes \instr{ret}, it returns directly to the caller's parent procedure.
 
Although the \lxp{} architecture doesn't mandate any particular calling convention, some general recommendations are presented below:
 
\begin{enumerate}
\item Pass arguments and return values through the \code{r1}--\code{r31} registers (a procedure can have multiple return values).
\item If necessary, the \code{r0} register can be used to load the procedure address.
\item Designate \code{r0}--\code{r31} registers as \emph{caller-saved}, that is, they are not guaranteed to be preserved during procedure calls and must be saved by the caller if needed. The procedure can use them for any purpose, regardless of whether they are used to pass arguments and/or return values.
\item Pass arguments through the \code{r1}--\code{r31} registers.
\item Return value through the \code{r0} register.
\item Designate \code{r0}--\code{r31} registers as \emph{caller-saved}, that is, they are not guaranteed to be preserved during procedure calls and must be saved by the caller if needed. The procedure can use them for any purpose, regardless of whether they are used to pass arguments and/or return values. For obvious reasons, this rule does not apply to interrupt handlers.
\end{enumerate}
 
\section{Interrupt handling}
261,24 → 269,21
 
Interrupt handlers are invoked by the CPU similarly to procedures (Section \ref{sec:callingprocedures}), the difference being that in this case return address is stored in the \code{irp} register (as opposed to \code{rp}), and the least significant bit of the register (\code{IRF} -- \emph{Interrupt Return Flag}) is set.
 
An interrupt handler returns using the \code{\instr{jmp} irp} instruction which also has an \instr{iret} alias. Until the interrupt handler returns, the CPU will defer further interrupt processing (although incoming interrupt requests will still be registered). This also means that the \code{irp} register value will not be unexpectedly overwritten. When executing the \code{\instr{jmp} irp} instruction, the CPU will recognize the \code{IRF} flag and resume interrupt processing as usual. It is also possible to perform a conditional return from the interrupt handler, similarly to the technique described in Section \ref{sec:callingprocedures} for conditional procedure returns.
An interrupt handler returns using the \code{\instr{jmp} irp} instruction which also has \instr{iret} alias. Until the interrupt handler returns, the CPU will defer further interrupt processing (although incoming interrupt requests will still be registered). This also means that \code{irp} register value will not be unexpectedly overwritten. When executing the \code{\instr{jmp} irp} instruction, the CPU will recognize the \code{IRF} flag and resume interrupt processing as usual. This behavior can be exploited to perform a conditional return from the interrupt handler, similarly to the technique described in Section \ref{sec:callingprocedures} for conditional procedure returns.
 
\subsection{Non-returnable interrupts}
Another technique can be useful when waiting for a single event, such as a coprocessor finishing its job: the interrupt handler can be set up to return to a designated address instead of the address stored in the \code{irp} register. This designated address must have the \code{IRF} flag set, otherwise all further interrupt processing will be disabled:
 
If an interrupt vector has the least significant bit (\code{IRF}) set, the CPU will resume interrupt processing immediately. One should not try to invoke \instr{iret} from such a handler since the \code{irp} register could have been overwritten by another interrupt. This technique can be useful when the CPU's only task is to process external events:
\begin{codepar}
\instr{lc} r0, continue@1 \emph{// IRF flag}
\instr{lc} iv0, handler
... \emph{// issue coprocessor command}
\instr{hlt} \emph{// wait for an interrupt}
continue:
... \emph{// the execution will continue here}
handler:
\instr{jmp} r0
\end{codepar}
 
\begin{codeparbreakable}
\emph{// Set the IRF to mark the interrupt as non-returnable}
\instr{lc} iv0, main\_loop@1
\instr{mov} cr, 1 \emph{// enable the interrupt}
\instr{hlt} \emph{// wait for an interrupt request}
main\_loop:
\emph{// Process the event...}
\instr{hlt} \emph{// wait for the next interrupt request}
\end{codeparbreakable}
 
Note that \instr{iret} is never called in this example.
 
\chapter{Integration}
\label{ch:integration}
 
309,13 → 314,13
\label{fig:symbols}
\end{figure}
 
\lxp{}U uses the Low Latency Interface (LLI) described in Section \ref{sec:lli} to fetch instructions. This interface is designed to interact with low latency on-chip peripherals such as RAM blocks. It works best with slaves that can return the instruction on the next cycle after its address has been set, although the slave can still introduce wait states if needed. Low Latency Interface can be also connected to a custom (external) instruction cache.
\lxp{}U uses the Low Latency Interface (Section \ref{sec:lli}) to fetch instructions. This interface is designed to interact with low latency on-chip peripherals such as RAM blocks or similar devices that are generally expected to return data word after one cycle since the instruction address has been set. It can be also connected to a custom (external) instruction cache.
 
To achieve the least possible latency, some LLI outputs are not registered. For this reason the LLI is not suitable for interaction with off-chip peripherals.
To achieve the least possible latency, some LLI signals are not registered. For this reason the LLI is not suitable for interaction with off-chip peripherals.
 
\lxp{}C is designed to work with high latency memory controllers and uses a simple instruction cache based on a ring buffer. The instructions are fetched over the WISHBONE instruction bus. To maximize throughput, the CPU makes use of the WISHBONE registered feedback signals [CTI\_O()] and [BTE\_O()]. All outputs on this bus are registered. This version is also recommended for use in situations where LLI combinatorial delays are unacceptable.
\lxp{}C fetches instructions over the WISHBONE instruction bus. To maximize throughput, it supports the WISHBONE registered feedback signals [CTI\_O()] and [BTE\_O()]. All outputs on this bus are registered. This version is recommended for use with high latency memory devices such as SDRAM chips, as well as for situations where LLI combinatorial delays are unacceptable.
 
Both \lxp{}U and \lxp{}C use the WISHBONE protocol for the data bus.
Both \lxp{}U and \lxp{}C use WISHBONE protocol for the data bus.
 
\section{Ports}
 
371,11 → 376,11
 
By default, \lxp{} uses the \signal{dbus\_sel\_o} (byte enable) port to perform byte-granular write transactions initiated by the \instr{sb} (\instrname{Store Byte}) instruction. If this option is set to \code{true}, \signal{dbus\_sel\_o} is always tied to \code{"1111"}, and byte-granular write access is performed using the RMW (read-modify-write) cycle. The latter method is slower, but can work with slaves that do not have the [SEL\_I()] port.
 
This feature is designed with the assumption that read and write transactions do not cause side effects, thus it can be unsuitable for some slaves.
This feature requires data bus transactions to be idempotent, that is, repeating a transaction must not alter the slave state. Care should be taken with non-memory slaves to ensure that this condition is satisfied.
 
\subsection{DIVIDER\_EN}
 
\lxp{} includes a divider unit which has quite a low performance but occupies a considerable amount of resources. It can be disabled by setting this option to \code{false}.
\lxp{} includes a divider unit which occupies a considerable amount of resources. It can be excluded by setting this option to \code{false}.
 
\subsection{IBUS\_BURST\_SIZE}
 
401,7 → 406,7
 
\subsection{START\_ADDR}
 
Address of the first instruction to be executed after CPU reset. Default value is \code{0}. The two least significant bits are ignored as instructions are always word-aligned.
Address of the first instruction to be executed after CPU reset. Default value is \code{0}. Note that it is a 30-bit value as it is used to address 32-bit words, not bytes.
 
\section{Clock and reset}
\label{sec:clockreset}
424,8 → 429,10
\section{Low Latency Interface}
\label{sec:lli}
 
Low Latency Interface (LLI) is a simple pipelined synchronous protocol with a typical latency of 1 cycle used by \lxp{}U to fetch instructions. It was designed to allow simple connection of the CPU to on-chip program RAM or cache. The timing diagram of the LLI is shown on Figure \ref{fig:llitiming}.
Low Latency Interface is a simple pipelined synchronous protocol with a typical latency of 1 cycle used by \lxp{}U to fetch instructions. Its timing diagram is shown on Figure \ref{fig:llitiming}. The request is considered valid when \signal{lli\_re\_o} is high and \signal{lli\_busy\_i} is low on the same clock cycle. On the next cycle after the request is valid the slave must either produce data on \signal{lmi\_dat\_i} or assert \signal{lli\_busy\_i} to indicate that data are not ready. Note that the values of \signal{lli\_re\_o} and \signal{lli\_adr\_o} are not guaranteed to be preserved by the CPU while the slave is busy.
 
The simplest, ``always ready'' slaves such as on-chip RAM blocks can be trivially connected to the LLI by connecting address, data and read enable ports and tying the \signal{lli\_busy\_i} signal to a logical \code{0}. Slaves are also allowed to introduce wait states, which makes it possible to implement external caching.
 
\begin{figure}[htbp]
\centering
\includegraphics[scale=1]{images/llitiming.pdf}
433,23 → 440,15
\label{fig:llitiming}
\end{figure}
 
To request a word, the master produces its address on \signal{lli\_adr\_o} and asserts \signal{lli\_re\_o}. The request is considered valid when \signal{lli\_re\_o} is high and \signal{lli\_busy\_i} is low on the same clock cycle. On the next cycle after a valid request, the slave must either produce data on \signal{lli\_dat\_i} or assert \signal{lli\_busy\_i} to indicate that data are not ready. \signal{lli\_busy\_i} must be held high until the valid data are present on the \signal{lli\_dat\_i} port.
 
The data provided by the slave are only required to be valid on the next cycle after a valid request (if \signal{lli\_busy\_i} is not asserted) or on the cycle when \signal{lli\_busy\_i} is deasserted after being held high. Otherwise \signal{lli\_dat\_i} is undefined.
 
The values of \signal{lli\_re\_o} and \signal{lli\_adr\_o} are not guaranteed to be preserved by the master while the slave is busy.
 
The simplest slaves such as on-chip RAM blocks which are never busy can be trivially connected to the LLI by connecting address, data and read enable ports and tying the \signal{lli\_busy\_i} signal to a logical \code{0} (you can even ignore \signal{lli\_re\_o} in this case, although doing so can theoretically increase power consumption).
 
Note that the \signal{lli\_adr\_o} signal has a width of 30 bits since it addresses words, not bytes (instructions are always word-aligned).
 
Since the \signal{lli\_re\_o} output signal is not registered, this interface is not suitable for interaction with off-chip peripherals. Also, care should be taken to avoid introducing too much additional combinatorial delay on its outputs.
Since this interface is not registered, it is not suitable for interaction with off-chip peripherals. Also, care should be taken to avoid introducing too much additional combinatorial delay on its outputs.
 
\section{WISHBONE instruction bus}
 
The \lxp{}C CPU fetches instructions over the WISHBONE bus. Its parameters are defined in the WISHBONE datasheet (Appendix \ref{app:wishbonedatasheet}). For a detailed description of the bus protocol refer to the WISHBONE specification, revision B3.
 
With classic WISHBONE handshake decent throughput can be only achieved when the slave is able to terminate cycles asynchronously. It is usually possible only for the simplest slaves which should probably be using the Low Latency Interface instead. To maximize throughput for complex, high latency slaves, \lxp{}C instruction bus uses optional WISHBONE address tags [CTI\_O()] (Cycle Type Identifier) and [BTE\_O()] (Burst Type Extension). These signals are hints allowing the slave to predict the address that will be set by the master in the next cycle and prepare data in advance. The slave can ignore these hints, processing requests as classic WISHBONE cycles, although performance would almost certainly suffer in this case.
With classic WISHBONE handshake decent throughput can be only achieved when the slave is able to terminate cycles asynchronously. It is usually possible only for the simplest slaves, which should probably be using the Low Latency Interface in the first place. To maximize throughput for complex, high latency slaves, \lxp{}C instruction bus uses optional WISHBONE address tags [CTI\_O()] (Cycle Type Identifier) and [BTE\_O()] (Burst Type Extension). These signals are hints allowing the slave to predict the address that will be set by the master in the next cycle and prepare data in advance. The slave can ignore these hints, processing requests as classic WISHBONE cycles, although performance would almost certainly suffer in this case.
 
A typical \lxp{}C instruction bus burst timing diagram is shown on Figure \ref{fig:ibustiming}.
 
495,8 → 494,6
 
These design units contain behavioral description of respective hardware that is recognizable by FPGA synthesis tools. Usually no adjustments are needed as the synthesizer will automatically infer an appropriate primitive from its behavioral description. If automatic inference produces unsatisfactory results, these design units can be replaced with library element wrappers. The same is true for ASIC logic synthesis software which is unlikely to infer complex primitives.
 
\lxp{} implements its own bypass logic dealing with situations when RAM read and write addresses collide. It does not depend on the read/write conflict resolution behavior of the underlying primitive.
 
\subsection{General optimization guidelines}
 
This subsection contains general advice on achieving satisfactory synthesis results regardless of the optimization goal. Some of these suggestions are also mentioned in other parts of this manual.
507,14 → 504,12
\item Ensure that the instruction bus has adequate throughput. For \lxp{}C, check that the slave supports the WISHBONE registered feedback signals [CTI\_I()] and [BTE\_I()].
\item Multiplexing instruction and data buses, or connecting them to the same interconnect that allows only one master at a time to be active (i.e. \emph{shared bus} interconnect topology) is not recommended. If you absolutely must do so, assign a higher priority level to the data bus, otherwise instruction prefetches will massively slow down data transactions.
\item For small programs, consider mapping code and data memory to the beginning or end of the address space (i.e. \code{0x00000000}--\code{0x000FFFFF} or \code{0xFFF00000}--\code{0xFFFFFFFF}) to be able to load pointers with the \instr{lcs} instruction which saves both memory and CPU cycles as compared to \instr{lc}.
\end{enumerate}
 
\subsection{Optimizing for timing}
 
\begin{enumerate}
\item Set up reasonable timing constraints. Do not overconstrain the design by more than 10--15~\%.
\item Set up reasonable timing constraints. Do not overconstrain the design by more that 10--15~\%.
\item Analyze the worst path. The natural \lxp{} timing bottleneck usually goes from the scratchpad (register file) output through the ALU (in the Execute stage) to the scratchpad input. If timing analysis lists other critical paths, the problem can lie elsewhere. If the \signal{rst\_i} signal becomes a bottleneck, promote it to a global network or, with SRAM-based FPGAs, consider operating without reset (see Section \ref{sec:clockreset}). Critical paths affecting the WISHBONE state machines could indicate problems with interconnect performance.
526,7 → 521,7
\subsection{Optimizing for area}
 
\begin{enumerate}
\item Consider disabling the divider if not using it (see Section \ref{sec:generics}).
\item Consider excluding the divider if not using it (see Section \ref{sec:generics}).
\item Relaxing timing constraints can sometimes allow the synthesizer to produce a more area-efficient circuit.
533,64 → 528,6
\item Increase the fanout limit in the synthesizer settings to reduce buffer replication.
\end{enumerate}
 
\chapter{Hardware architecture}
\label{ch:pipeline}
 
The \lxp{} CPU is based on a 3-stage hazard-free pipelined architecture and uses a large RAM-based register file (scratchpad) with two read ports and one write port. The pipeline includes the following stages:
 
\begin{itemize}
\item\emph{Fetch} -- fetches instructions from the program memory.
\item\emph{Decode} -- decodes instructions and reads register operand values from the scratchpad.
\item\emph{Execute} -- executes instructions and writes the results (if any) to the scratchpad.
\end{itemize}
 
\lxp{} instructions are encoded in such a way that operand register numbers can be known without decoding the instruction (Section \ref{sec:instructionformat}). When the \emph{Fetch} stage produces an instruction, scratchpad input addresses are set immediately, before the instruction itself is decoded. If the instruction does not use one or both of the register operands, the corresponding data read from the scratchpad are discarded. Collision bypass logic in the scratchpad detects situations where the \emph{Decode} stage tries to read a register which is currently being written by the \emph{Execute} stage and forwards its value, bypassing the RAM block and avoiding Read After Write (RAW) pipeline hazards. Other types of data hazards are also impossible with this architecture.
 
As an example, consider the following simple code chunk:
 
\begin{codepar}
\instr{mov} r0, 10 \emph{// alias for add r0, 10, 0}
\instr{mov} r1, 20 \emph{// alias for add r1, 20, 0}
\instr{add} r2, r0, r1
\end{codepar}
 
Table \ref{tab:examplepipeline} illustrates how this chunk is processed by the \lxp{} pipeline. Note that on the fourth cycle the \emph{Decode} stage requests the \code{r1} register value while the \emph{Execute} stage writes to the same register. Collision bypass logic in the scratchpad ensures that the \emph{Decode} stage reads the correct (new) value of \code{r1} without stalling the pipeline.
 
\begin{table}[htbp]
\caption{Example of the \lxp{} pipeline operation}
\small
\label{tab:examplepipeline}
\begin{tabularx}{\textwidth}{lllL}
\toprule
Cycle & Fetch & Decode & Execute \\
\midrule
1 & \code{\instr{add} r0, 10, 0} & & \\
\midrule
2 & \code{\instr{add} r1, 20, 0} & \code{\instr{add} r0, 10, 0} & \\
& & Request \code{r10} (discarded) & \\
& & Request \code{r0} (discarded) & \\
& & Pass 10 and 0 as operands & \\
\midrule
3 & \code{\instr{add} r2, r0, r1} & \code{\instr{add} r1, 20, 0} & Perform the addition \\
& & Request \code{r20} (discarded) & Write 10 to \code{r0} \\
& & Request \code{r0} (discarded) & \\
& & Pass 20 and 0 as operands & \\
\midrule
4 & & \code{\instr{add} r2, r0, r1} & Perform the addition \\
& & Request \code{r0} & Write 20 to \code{r1} \\
& & Request \code{r1} (bypass) & \\
& & Pass 10 and 20 as operands & \\
\midrule
5 & & & Perform the addition \\
& & & Write 30 to \code{r2} \\
\bottomrule
\end{tabularx}
\end{table}
 
When an instruction takes more than one cycle to execute, the \emph{Execute} stage simply stalls the pipeline.
 
Branch hazards are impossible in \lxp{} as well since the pipeline is flushed whenever an execution transfer occurs.
 
\chapter{Simulation}
\label{ch:simulation}
 
653,7 → 590,6
Produced \shellcmd{*.ram} files must be placed to the simulator's working directory.
\item Compile the \lxp{} RTL description (\shellcmd{rtl} directory).
\item Compile the common package (\shellcmd{verify/common\_pkg}).
\item Compile the test platform (\shellcmd{verify/lxp32/src/platform} directory).
\item Compile the testbench itself (\shellcmd{verify/lxp32/src/tb} directory).
\item Simulate the \shellcmd{tb} design unit defined in the \shellcmd{tb.vhd} file.
664,8 → 600,6
Simulation parameters can be configured by overriding generics defined by the \shellcmd{tb} design unit:
 
\begin{itemize}
\item \code{CPU\_DBUS\_RMW} -- \code{DBUS\_RMW} CPU generic value (see Section \ref{sec:generics}).
\item \code{CPU\_MUL\_ARCH} -- \code{MUL\_ARCH} CPU generic value (see Section \ref{sec:generics}).
\item \code{MODEL\_LXP32C} -- simulate the \lxp{}C version. By default, this option is set to \code{true}. If set to \code{false}, \lxp{}U is simulated instead.
\item \code{TEST\_CASE} -- if set to a non-empty string, specifies the file name of a test case to run. If set to an empty string (default), all tests are executed.
\item \code{THROTTLE\_DBUS} -- perform pseudo-random data bus throttling. By default, this option is set to \code{true}.
696,50 → 630,39
In the simplest case there is only one input source file which doesn't contain external symbol references. If there are multiple input files, one of them must define the \code{entry} symbol at the beginning of the code.
 
\subsection{Command line syntax}
\label{subsec:assemblercmdline}
 
\begin{codepar}
lxp32asm [ \emph{options} | \emph{input files} ]
\end{codepar}
 
\subsubsection{General options}
Options supported by \shellcmd{lxp32asm} are listed below:
 
\begin{itemize}
\item \shellcmd{-a \emph{align}} -- section alignment. Must be a multiple of 4, default value is 4. Ignored in compile-only mode.
\item \shellcmd{-b \emph{addr}} -- Base address, that is, address in memory where the executable image will be located. Must be a multiple of section alignment. Default value is 0. Ignored in compile-only mode.
\item \shellcmd{-c} -- compile only (skip the Link stage).
\item \shellcmd{-f \emph{fmt}} -- select executable image format (see below for the list of supported formats). Ignored in compile-only mode.
\item \shellcmd{-h}, \shellcmd{--help} -- display a short help message and exit.
\item \shellcmd{-o \emph{file}} -- output file name.
\item \shellcmd{--} -- do not interpret the subsequent command line arguments as options. Can be used if there are input file names starting with a dash.
\end{itemize}
 
\subsubsection{Compiler options}
 
\begin{itemize}
\item \shellcmd{-i \emph{dir}} -- add \emph{dir} to the list of directories used to search for included files. Multiple directories can be specified with multiple \shellcmd{-i} arguments.
\end{itemize}
 
\subsubsection{Linker options (ignored in compile-only mode)}
 
\begin{itemize}
\item \shellcmd{-a \emph{align}} -- object alignment. Must be a power of 2 and can't be less than 4. Default value is 4.
\item \shellcmd{-b \emph{addr}} -- base address, that is, the address in memory where the executable image will be located. Must be a multiple of object alignment. Default value is 0.
\item \shellcmd{-o \emph{file}} -- output file name.
\item \shellcmd{-f \emph{fmt}} -- executable image format. See below for the list of supported formats.
\item \shellcmd{-s \emph{size}} -- size of the executable image. Must be a multiple of 4. If total code size is less than the specified value, the executable image is padded with zeros. By default, image is not padded. This option is ignored in compile-only mode.
\item \shellcmd{-m \emph{file}} -- generate a map file. A map file is a human-readable list of all object and symbol addresses in the executable image.
\item \shellcmd{-s \emph{size}} -- size of the executable image. Must be a multiple of 4. If total code size is less than the specified value, the executable image is padded with zeros. By default, the image is not padded.
\item \shellcmd{--} -- do not interpret subsequent command line arguments as options. Can be used if there are input file names starting with dash.
\end{itemize}
 
\subsection{Output formats}
 
Output formats that can be specified with the \shellcmd{-f} command line option are listed below.
The following output formats are supported by \shellcmd{lxp32asm}:
 
\begin{itemize}
\item \shellcmd{bin} -- raw binary image (little-endian). This is the default format.
\item \shellcmd{bin} -- raw binary image. This is the default format.
\item \shellcmd{textio} -- text format representing binary data as a sequence of zeros and ones. This format can be directly read from VHDL (using the \code{std.textio} package) or Verilog\textregistered{} (using the \code{\$readmemb} function).
\item \shellcmd{dec} -- text format representing each word as a decimal number.
\item \shellcmd{hex} -- text format representing each word as a hexadecimal number.
764,8 → 687,6
\item \shellcmd{-h}, \shellcmd{--help} -- display a short help message and exit.
\item \shellcmd{-na} -- do not use instruction aliases (such as \instr{mov}, \instr{ret}, \instr{not}) and register aliases (such as \code{sp}, \code{rp}).
\item \shellcmd{-o \emph{file}} -- output file name. By default, the standard output stream is used.
\item \shellcmd{--} -- do not interpret subsequent command line arguments as options.
773,7 → 694,7
 
\section{\shellcmd{wigen} -- Interconnect generator}
 
\shellcmd{wigen} is a small tool that generates VHDL description of a simple WISHBONE interconnect based on shared bus topology. It supports any number of masters and slaves. The interconnect can then be used to create a SoC based on \lxp{}.
\shellcmd{wigen} is a small tool that generates VHDL description of a simple WISHBONE interconnect based on shared bus topology. It supports any number of masters and slaves.
 
For interconnects with multiple masters a priority-based arbitration circuit is inserted with lower-numbered masters taking precedence. However, when a bus cycle is in progress ([CYC\_O] is asserted by the active master), the arbiter will not interrupt it even if a master with a higher priority level requests bus ownership.
 
824,7 → 745,7
 
\subsection{Build procedure}
 
This software uses CMake as a build system generator. Building it involves two steps: first, the \shellcmd{cmake} program is invoked to generate a native build environment (a set of Makefiles or an IDE project); second, the generated environment is used to build the software. More details can be found in the CMake documentation.
This software uses CMake as a build system generator. Building it involves two steps: first, the \shellcmd{cmake} program is invoked to generate a native build environment (a set of Makefiles or an IDE project); second, the generated environment is used to build the software.
 
\subsubsection{Examples}
 
870,6 → 791,8
make install
\end{codepar}
 
More details can be found in the CMake documentation.
 
\appendix
 
\chapter{Instruction set reference}
887,7 → 810,6
\midrule
\hyperref[subsec:instr:mov]{\instr{mov}} & Move & alias for \code{\instr{add} dst, src, 0} \\
\hyperref[subsec:instr:lc]{\instr{lc}} & Load Constant & \code{000001} \\
\hyperref[subsec:instr:lcs]{\instr{lcs}} & Load Constant Short & \code{101xxx} \\
\hyperref[subsec:instr:lw]{\instr{lw}} & Load Word & \code{001000} \\
\hyperref[subsec:instr:lub]{\instr{lub}} & Load Unsigned Byte & \code{001010} \\
\hyperref[subsec:instr:lsb]{\instr{lsb}} & Load Signed Byte & \code{001011} \\
898,7 → 820,6
\midrule
\hyperref[subsec:instr:add]{\instr{add}} & Add & \code{010000} \\
\hyperref[subsec:instr:sub]{\instr{sub}} & Subtract & \code{010001} \\
\hyperref[subsec:instr:neg]{\instr{neg}} & Negate & alias for \code{\instr{sub} dst, 0, src} \\
\hyperref[subsec:instr:mul]{\instr{mul}} & Multiply & \code{010010} \\
\hyperref[subsec:instr:divu]{\instr{divu}} & Divide Unsigned & \code{010100} \\
\hyperref[subsec:instr:divs]{\instr{divs}} & Divide Signed & \code{010101} \\
1038,7 → 959,7
\instr{cjmpuge} & \code{111010} \\
\end{tabularx}
 
\instr{cjmpsl}, \instr{cjmpsle}, \instr{cjmpul}, \instr{cjmpule} instructions are aliases for \instr{cjmpsg}, \instr{cjmpsge}, \instr{cjmpug}, \instr{cjmpuge}, respectively, with RD1 and RD2 operands swapped.
\instr{cjmpsl}, \instr{cjmpsle}, \instr{cjmpul}, \instr{cjmpule} are aliases for \instr{cjmpsg}, \instr{cjmpsge}, \instr{cjmpug}, \instr{cjmpuge}, respectively, with RD1 and RD2 operands swapped.
 
Example: \code{\instr{cjmpuge} r2, r1, 5} $\rightarrow$ \code{0xEA020105}
 
1140,7 → 1061,7
\subsection{\instr{lc} -- Load Constant}
\label{subsec:instr:lc}
 
Load a 32-bit word to the specified register. Note that values from the [-1048576; 1048575] range can be loaded more efficiently using the \instr{lcs} instruction.
Load a 32-bit word to the specified register. Note that values in the [-128; 127] range can be loaded more efficiently using the \instr{mov} instruction alias.
 
\subsubsection{Syntax}
 
1158,25 → 1079,6
 
\code{DST := WORD32}
 
\subsection{\instr{lcs} -- Load Constant Short}
\label{subsec:instr:lcs}
 
Load a signed value from the [-1048576; 1048575] range (a sign extended 21-bit value) to the specified register. Unlike the \instr{lc} instruction, this instruction is encoded as a single word.
 
\subsubsection{Syntax}
 
\code{\instr{lcs} DST, VAL}
 
\subsubsection{Encoding}
 
\code{101 VAL[20:16] DST VAL[15:0]}
 
Example: \code{\instr{lcs} r1, -1000000} $\rightarrow$ \code{0xB001BDC0}
 
\subsubsection{Operation}
 
\code{DST := (\emph{signed}) VAL}
 
\subsection{\instr{lsb} -- Load Signed Byte}
\label{subsec:instr:lsb}
 
1318,15 → 1220,6
 
Since the product width is the same as the operand width, the result of a multiplication does not depend on operand signedness.
 
\subsection{\instr{neg} -- Negate}
\label{subsec:instr:neg}
 
\subsubsection{Syntax}
 
\code{\instr{neg} DST, RD2}
 
Alias for \code{\instr{sub} DST, 0, RD2}
 
\subsection{\instr{nop} -- No Operation}
\label{subsec:instr:nop}
 
1522,7 → 1415,7
 
\chapter{Instruction cycle counts}
 
Cycle counts for \lxp{} instructions are listed in Table \ref{tab:cycles}, based on an assumption that no pipeline stalls are caused by the instruction bus latency or cache misses. These data are provided for reference purposes; the software should not depend on them as they can change in future hardware revisions.
Cycle counts for \lxp{} instructions are listed in Table \ref{tab:cycles}. These values can change in future hardware revisions.
 
\begin{table}[htbp]
\centering
1530,19 → 1423,18
\label{tab:cycles}
\begin{tabularx}{0.8\textwidth}{LLLL}
\toprule
Instruction & Cycles & Instruction & Cycles \\
Instruction & Cycle count & Instruction & Cycle count \\
\midrule
\instr{add} & 1 & \instr{modu} & 37 \\
\instr{and} & 1 & \instr{mov} & 1 \\
\instr{call} & 4 & \instr{mul} & 2, 6 or 34\footnotemark[3] \\
\instr{cjmp\emph{xxx}} & 5 or 2\footnotemark[1] & \instr{neg} & 1 \\
\instr{divs} & 36 & \instr{nop} & 1 \\
\instr{divu} & 36 & \instr{not} & 1 \\
\instr{hlt} & N/A & \instr{or} & 1 \\
\instr{jmp} & 4 & \instr{ret} & 4 \\
\instr{iret} & 4 & \instr{sb} & $\ge$ 2\footnotemark[2] \\
\instr{lc} & 2 & \instr{sl} & 2 \\
\instr{lcs} & 1 & \instr{srs} & 2 \\
\instr{call} & $\ge$ 4\footnotemark[1] & \instr{mul} & 2, 6 or 34\footnotemark[3] \\
\instr{cjmp\emph{xxx}} & $\ge$ 5\footnotemark[1] & \instr{nop} & 1 \\
\instr{divs} & 37 & \instr{not} & 1 \\
\instr{divu} & 37 & \instr{or} & 1 \\
\instr{hlt} & N/A & \instr{ret} & $\ge$ 4\footnotemark[1] \\
\instr{jmp} & $\ge$ 4\footnotemark[1] & \instr{sb} & $\ge$ 2\footnotemark[2] \\
\instr{iret} & $\ge$ 4\footnotemark[1] & \instr{sl} & 2 \\
\instr{lc} & 2 & \instr{srs} & 2 \\
\instr{lsb} & $\ge$ 3\footnotemark[2] & \instr{sru} & 2 \\
\instr{lub} & $\ge$ 3\footnotemark[2] & \instr{sub} & 1 \\
\instr{lw} & $\ge$ 3\footnotemark[2] & \instr{sw} & $\ge$ 2\footnotemark[2] \\
1551,9 → 1443,9
\end{tabularx}
\end{table}
 
\footnotetext[1]{Depends on whether the jump is taken or not.}
\footnotetext[2]{Depends on the data bus latency.}
\footnotetext[3]{Depends on the multiplier architecture. See Section \ref{sec:generics}.}
\footnotetext[1]{Depends on instruction bus latency. Includes pipeline flushing overhead.}
\footnotetext[2]{Depends on data bus latency.}
\footnotetext[3]{Depends on multiplier architecture set with the \code{MUL\_ARCH} generic. See Section \ref{sec:generics}.}
 
\chapter{LXP32 assembly language}
\label{app:assemblylanguage}
1580,7 → 1472,7
 
Numeric literals can take form of decimal, hexadecimal or octal numbers. Literals prefixed with \code{0x} are interpreted as hexadecimal, literals prefixed with \code{0} are interpreted as octal, other literals are interpreted as decimal. A numeric literal can also start with an unary plus or minus sign which is also considered a part of the literal.
 
String literals must be enclosed in double quotes. The most common escape sequences used in C are supported (Table \ref{tab:stringescape}). Note that strings are not null-terminated in the LXP32 assembly language; when required, terminating null character must be inserted explicitly.
String literals must be enclosed in double quotes. The most common escape sequences used in C are supported (Table \ref{tab:stringescape}).
 
\begin{table}[htbp]
\caption{Escape sequences used in string literals}
1604,18 → 1496,16
\section{Symbols}
\label{sec:symbols}
 
Symbols (labels) are used to refer to data or code locations. \lxp{} assembly language does not have distinct code and data labels: symbols are used in both these contexts.
Symbols are used to refer to data or code locations. \lxp{} assembly language does not have distinct code labels and variable declarations: symbols are used in both these contexts.
 
Symbol names must be valid identifiers. A valid identifier must start with an alphabetic character or an underscore, and may contain alphanumeric characters and underscores.
 
A symbol definition must be the first token in a source code line followed by a colon. A symbol definition can occupy a separate line (in which case it refers to the following statement). Alternatively, a statement can follow the symbol definition on the same line.
 
Symbols can be used as operands to the \instr{lc} and \instr{lcs} instruction statements. A symbol reference can end with a \code{@\emph{n}} sequence, where \code{\emph{n}} is a numeric literal; in this case it is interpreted as an offset (in bytes) relative to the symbol definition. For the \instr{lcs} instruction, the resulting address must still fit into the sign extended 21-bit value range (\code{0x00000000}--\code{0x000FFFFF} or \code{0xFFF00000}--\code{0xFFFFFFFF}), otherwise the linker will report an error.
A special \code{entry} symbol is used to inform the linker about program entry point if there are multiple input files. If defined, this symbol must precede the first instruction or data definition statement in the module.
 
By default all symbols are local, that is, they can be only referenced from the module where they were defined. To make a symbol accessible from other modules, use the \instr{\#export} directive. To reference a symbol defined in another module use the \instr{\#import} directive.
Symbols can be used as operands to the \instr{lc} instruction statement. A symbol reference can end with a \code{@\emph{n}} sequence, where \code{\emph{n}} is a numeric literal; in this case it is interpreted as an offset (in bytes) relative to the symbol definition. To refer to symbols defined in other modules, they must first be declared external using the \instr{\#extern} directive.
 
A symbol named \code{entry} or \code{Entry} has a special meaning: it is used to inform the linker about the program entry point if there are multiple input files. It does not have to be exported. If defined, this symbol must precede the first instruction or data definition statement in the module. Only one module in the program can define the entry symbol.
 
\begin{codeparbreakable}
\instr{lc} r10, jump\_label
\instr{lc} r11, data\_word
1652,18 → 1542,12
Defines a macro that will be substituted with one or more tokens. The \code{\emph{identifier}} must satisfy the requirements listed in Section \ref{sec:symbols}. Tokens can be anything, including keywords, identifiers, literals and separators (i.e. comma and colon characters).
 
\begin{codepar}
\instr{\#export} \emph{identifier}
\instr{\#extern} \emph{identifier}
\end{codepar}
 
Declares \code{\emph{identifier}} as an exported symbol. Exported symbols can be referenced by other modules.
Declares \code{\emph{identifier}} as an external symbol. Used to refer to symbols defined in other modules.
 
\begin{codepar}
\instr{\#import} \emph{identifier}
\end{codepar}
 
Declares \code{\emph{identifier}} as an imported symbol. Used to refer to symbols exported by other modules.
 
\begin{codepar}
\instr{\#include} \emph{filename}
\end{codepar}
 
1683,10 → 1567,8
\instr{.align} [ \emph{alignment} ]
\end{codepar}
 
Ensures that code generated by the next data definition or instruction statement is aligned to a multiple of \code{\emph{alignment}} bytes, inserting padding zeros if needed. \code{\emph{alignment}} must be a power of 2 and can't be less than 4. Default \code{\emph{alignment}} is 4. Instructions and words are always at least word-aligned; the \instr{.align} statement can be used to align them to a larger boundary, or to align byte data (see below).
Ensures that code generated by the next data definition or instruction statement is aligned to a multiple of \code{\emph{alignment}} bytes, inserting padding zeros if needed. Default \code{\emph{alignment}} is 4. Instructions and words are always at least word-aligned; the \instr{.align} statement can be used to align them to a larger boundary, or to align byte data (see below).
 
The \instr{.align} statement is not guaranteed to work if the requested alignment is greater than the section alignment specified for the linker (see Subsection \ref{subsec:assemblercmdline}).
 
\begin{codepar}
\instr{.byte} \emph{token} [, \emph{token} ... ]
\end{codepar}
1693,8 → 1575,6
 
Inserts one or more bytes to the output code. Each \code{\emph{token}} can be either a numeric literal with a valid range of [-128; 255] or a string literal. By default, bytes are not aligned.
 
To define a null-terminated string, the terminating null character must be inserted explicitly.
 
\begin{codepar}
\instr{.reserve} \emph{n}
\end{codepar}
1794,24 → 1674,4
\bottomrule
\end{ctabular}
 
\chapter{List of changes}
 
\section*{Version 1.1 (2019-01-11)}
 
This release introduces a minor but technically breaking hardware change: the START\_ADDR generic, which used to be 30-bit, has been for convenience extended to a full 32-bit word; the two least significant bits are ignored.
 
The other breaking change affects the assembly language syntax. Previously all symbols used to be public, and multiple modules could not define symbols with the same name. As of now only symbols explicitly exported using the \instr{\#export} directive are public. \instr{\#extern} directive has been replaced by \instr{\#import}.
 
Other notable changes include:
 
\begin{itemize}
\item A new instruction, \instr{lcs} (\instrname{Load Constant Short}), has been added, which loads a 21-bit sign extended constant to a register. Unlike \instr{lc}, it is encoded as a single word and takes one cycle to execute.
\item Optimizations in the divider unit. Division instructions (\instr{divs} and \instr{divu}) now take one fewer cycle to execute (modulo instructions are unaffected).
\item LXP32 assembly language now supports a new instruction alias, \instr{neg} (\instrname{Negate}), which is equivalent to \code{\instr{sub} dst, 0, src}.
\end{itemize}
 
\section*{Version 1.0 (2016-02-20)}
 
Initial public release.
 
\end{document}
/.gitattributes
1,5 → 1,2
# Convert line endings for text files on Windows
* text=auto
 
# Prevent the GitHub parser from ignoring the "tools" directory contents
/tools/* linguist-vendored=false
/misc/highlight/notepad++/LXP32Assembly.xml
1,5 → 1,5
<NotepadPlus>
<UserLang name="LXP32 Assembly" ext="asm inc" udlVersion="2.1">
<UserLang name="LXP32 Assembly" ext="asm" udlVersion="2.1">
<Settings>
<Global caseIgnored="no" allowFoldOfComments="yes" foldCompact="no" forcePureLC="0" decimalSeparator="0" />
<Prefix Keywords1="no" Keywords2="no" Keywords3="no" Keywords4="no" Keywords5="no" Keywords6="no" Keywords7="no" Keywords8="no" />
24,9 → 24,9
<Keywords name="Folders in comment, open"></Keywords>
<Keywords name="Folders in comment, middle"></Keywords>
<Keywords name="Folders in comment, close"></Keywords>
<Keywords name="Keywords1">add and call cjmpe cjmpne cjmpsg cjmpsge cjmpsl cjmpsle cjmpug cjmpuge cjmpul cjmpule divs divu hlt jmp iret lc lcs lsb lub lw mods modu mov mul neg nop not or ret sb sl srs sru sub sw xor</Keywords>
<Keywords name="Keywords1">add and call cjmpe cjmpne cjmpsg cjmpsge cjmpsl cjmpsle cjmpug cjmpuge cjmpul cjmpule divs divu hlt jmp iret lc lsb lub lw mods modu mov mul nop not or ret sb sl srs sru sub sw xor</Keywords>
<Keywords name="Keywords2">cr irp iv0 iv1 iv2 iv3 iv4 iv5 iv6 iv7 r0 r1 r2 r3 r4 r5 r6 r7 r8 r9 r10 r11 r12 r13 r14 r15 r16 r17 r18 r19 r20 r21 r22 r23 r24 r25 r26 r27 r28 r29 r30 r31 r32 r33 r34 r35 r36 r37 r38 r39 r40 r41 r42 r43 r44 r45 r46 r47 r48 r49 r50 r51 r52 r53 r54 r55 r56 r57 r58 r59 r60 r61 r62 r63 r64 r65 r66 r67 r68 r69 r70 r71 r72 r73 r74 r75 r76 r77 r78 r79 r80 r81 r82 r83 r84 r85 r86 r87 r88 r89 r90 r91 r92 r93 r94 r95 r96 r97 r98 r99 r100 r101 r102 r103 r104 r105 r106 r107 r108 r109 r110 r111 r112 r113 r114 r115 r116 r117 r118 r119 r120 r121 r122 r123 r124 r125 r126 r127 r128 r129 r130 r131 r132 r133 r134 r135 r136 r137 r138 r139 r140 r141 r142 r143 r144 r145 r146 r147 r148 r149 r150 r151 r152 r153 r154 r155 r156 r157 r158 r159 r160 r161 r162 r163 r164 r165 r166 r167 r168 r169 r170 r171 r172 r173 r174 r175 r176 r177 r178 r179 r180 r181 r182 r183 r184 r185 r186 r187 r188 r189 r190 r191 r192 r193 r194 r195 r196 r197 r198 r199 r200 r201 r202 r203 r204 r205 r206 r207 r208 r209 r210 r211 r212 r213 r214 r215 r216 r217 r218 r219 r220 r221 r222 r223 r224 r225 r226 r227 r228 r229 r230 r231 r232 r233 r234 r235 r236 r237 r238 r239 r240 r241 r242 r243 r244 r245 r246 r247 r248 r249 r250 r251 r252 r253 r254 r255 rp sp</Keywords>
<Keywords name="Keywords3">#define #export #import #include #message</Keywords>
<Keywords name="Keywords3">#define #extern #include #message</Keywords>
<Keywords name="Keywords4">.align .byte .reserve .word</Keywords>
<Keywords name="Keywords5"></Keywords>
<Keywords name="Keywords6"></Keywords>
/misc/highlight/akelpad/asm.coder Cannot display: file marked as a binary type. svn:mime-type = application/octet-stream

powered by: WebSVN 2.1.0

© copyright 1999-2024 OpenCores.org, equivalent to Oliscience, all rights reserved. OpenCores®, registered trademark.