OpenCores
URL https://opencores.org/ocsvn/lxp32/lxp32/trunk

Subversion Repositories lxp32

Compare Revisions

  • This comparison shows the changes necessary to convert path
    /lxp32/trunk/tools/src/lxp32asm
    from Rev 6 to Rev 9
    Reverse comparison

Rev 6 → Rev 9

/CMakeLists.txt
1,12 → 1,12
cmake_minimum_required(VERSION 3.3.0)
 
add_executable(lxp32asm assembler.cpp linkableobject.cpp linker.cpp main.cpp outputwriter.cpp utils.cpp)
 
if(MSVC)
# Make the program expand wildcard command-line arguments
set_target_properties(lxp32asm PROPERTIES LINK_FLAGS "setargv.obj")
endif()
 
# Install
 
install(TARGETS lxp32asm DESTINATION .)
cmake_minimum_required(VERSION 3.3.0)
 
add_executable(lxp32asm assembler.cpp linkableobject.cpp linker.cpp main.cpp outputwriter.cpp utils.cpp)
 
if(MSVC)
# Make the program expand wildcard command-line arguments
set_target_properties(lxp32asm PROPERTIES LINK_FLAGS "setargv.obj")
endif()
 
# Install
 
install(TARGETS lxp32asm DESTINATION .)
/assembler.cpp
1,919 → 1,971
/*
* Copyright (c) 2016 by Alex I. Kuznetsov.
*
* Part of the LXP32 CPU IP core.
*
* This module implements members of the Assembler class.
*/
 
#include "assembler.h"
#include "utils.h"
 
#include <iostream>
#include <fstream>
#include <sstream>
#include <stdexcept>
#include <utility>
#include <limits>
#include <type_traits>
#include <cctype>
#include <cassert>
#include <cstdlib>
 
void Assembler::processFile(const std::string &filename) {
auto nativePath=Utils::normalizeSeparators(filename);
auto pos=nativePath.find_last_of('/');
if(pos!=std::string::npos) nativePath=filename.substr(pos+1);
_obj.setName(nativePath);
_line=0;
_state=Initial;
_currentFileName=filename;
processFileRecursive(filename);
 
// Examine symbol table
for(auto const &sym: _obj.symbols()) {
if(sym.second.type==LinkableObject::Unknown&&!sym.second.refs.empty()) {
std::ostringstream msg;
msg<<"Undefined symbol \""+sym.first+"\"";
msg<<" (referenced from "<<sym.second.refs[0].source;
msg<<":"<<sym.second.refs[0].line<<")";
throw std::runtime_error(msg.str());
}
}
for(auto const &sym: _exportedSymbols) _obj.exportSymbol(sym);
}
 
void Assembler::processFileRecursive(const std::string &filename) {
std::ifstream in(filename,std::ios_base::in);
if(!in) throw std::runtime_error("Cannot open file \""+filename+"\"");
// Process input file line-by-line
auto savedLine=_line;
auto savedState=_state;
auto savedFileName=_currentFileName;
_line=1;
_state=Initial;
_currentFileName=filename;
std::string line;
while(std::getline(in,line)) {
auto tokens=tokenize(line);
expand(tokens);
elaborate(tokens);
_line++;
}
if(_state!=Initial) throw std::runtime_error("Unexpected end of file");
_line=savedLine;
_state=savedState;
_currentFileName=savedFileName;
if(!_currentLabels.empty())
throw std::runtime_error("Symbol definition must be followed by an instruction or data definition statement");
}
 
void Assembler::addIncludeSearchDir(const std::string &dir) {
auto ndir=Utils::normalizeSeparators(dir);
if(!ndir.empty()&&ndir.back()!='/') ndir.push_back('/');
_includeSearchDirs.push_back(std::move(ndir));
}
 
int Assembler::line() const {
return _line;
}
 
std::string Assembler::currentFileName() const {
return _currentFileName;
}
 
LinkableObject &Assembler::object() {
return _obj;
}
 
const LinkableObject &Assembler::object() const {
return _obj;
}
 
Assembler::TokenList Assembler::tokenize(const std::string &str) {
TokenList tokenList;
std::string word;
std::size_t i;
for(i=0;i<str.size();i++) {
char ch=str[i];
switch(_state) {
case Initial:
if(ch==' '||ch=='\t'||ch=='\n'||ch=='\r') continue; // skip whitespace
else if(ch==','||ch==':') { // separator
tokenList.push_back(std::string(1,ch));
}
else if(std::isalnum(ch)||ch=='.'||ch=='#'||ch=='_'||ch=='-'||ch=='+') {
word=std::string(1,ch);
_state=Word;
}
else if(ch=='\"') {
word="\"";
_state=StringLiteral;
}
else if(ch=='/') {
if(++i>=str.size()) throw std::runtime_error("Unexpected end of line");
ch=str[i];
if(ch=='/') i=str.size(); // skip the rest of the line
else if(ch=='*') _state=BlockComment;
else throw std::runtime_error(std::string("Unexpected character: \"")+ch+"\"");
}
else throw std::runtime_error(std::string("Unexpected character: \"")+ch+"\"");
break;
case Word:
if(std::isalnum(ch)||ch=='_'||ch=='@'||ch=='+'||ch=='-') word+=ch;
else {
i--;
_state=Initial;
tokenList.push_back(std::move(word));
}
break;
case StringLiteral:
if(ch=='\\') {
if(++i>=str.size()) throw std::runtime_error("Unexpected end of line");
ch=str[i];
if(ch=='\\') word.push_back('\\');
else if(ch=='\"') word.push_back('\"');
else if(ch=='\'') word.push_back('\'');
else if(ch=='t') word.push_back('\t');
else if(ch=='n') word.push_back('\n');
else if(ch=='r') word.push_back('\r');
else if(ch=='x') { // hexadecimal sequence can be 1-2 digit long
std::string seq;
if(i+1<str.size()&&Utils::ishexdigit(str[i+1])) seq+=str[i+1];
if(i+2<str.size()&&Utils::ishexdigit(str[i+2])) seq+=str[i+2];
if(seq.empty()) throw std::runtime_error("Ill-formed escape sequence");
try {
word.push_back(static_cast<char>(std::stoul(seq,nullptr,16)));
}
catch(std::exception &) {
throw std::runtime_error("Ill-formed escape sequence");
}
i+=seq.size();
}
else if(Utils::isoctdigit(ch)) { // octal sequence can be 1-3 digit long
std::string seq(1,ch);
if(i+1<str.size()&&Utils::isoctdigit(str[i+1])) seq+=str[i+1];
if(i+2<str.size()&&Utils::isoctdigit(str[i+2])) seq+=str[i+2];
unsigned long value;
try {
value=std::stoul(seq,nullptr,8);
}
catch(std::exception &) {
throw std::runtime_error("Ill-formed escape sequence");
}
if(value>255) throw std::runtime_error("Octal value is out of range");
word.push_back(static_cast<char>(value));
i+=seq.size()-1;
}
else throw std::runtime_error(std::string("Unknown escape sequence: \"\\")+ch+"\"");
}
else if(ch=='\"') {
word.push_back('\"');
tokenList.push_back(std::move(word));
_state=Initial;
}
else word.push_back(ch);
break;
case BlockComment:
if(ch=='*') {
if(++i>=str.size()) break;
ch=str[i];
if(ch=='/') _state=Initial;
else i--;
}
break;
}
}
if(_state==StringLiteral) throw std::runtime_error("Unexpected end of line");
if(_state==Word) tokenList.push_back(std::move(word)); // store last word
if(_state!=BlockComment) _state=Initial; // reset state if not in block comment
return tokenList;
}
 
void Assembler::expand(TokenList &list) {
TokenList newlist;
// Perform macro substitution
for(auto &token: list) {
auto it=_macros.find(token);
// Note: we don't expand a macro identifier in the #define statement
// since that would lead to counter-intuitive results
if(it==_macros.end()||
(newlist.size()==1&&newlist[0]=="#define")||
(newlist.size()==3&&newlist[1]==":"&&newlist[2]=="#define"))
newlist.push_back(std::move(token));
else for(auto const &replace: it->second) newlist.push_back(replace);
}
list=std::move(newlist);
}
 
void Assembler::elaborate(TokenList &list) {
if(list.empty()) return;
// Process label (if present)
if(list.size()>=2&&list[1]==":") {
if(!validateIdentifier(list[0]))
throw std::runtime_error("Ill-formed identifier: \""+list[0]+"\"");
_currentLabels.push_back(std::move(list[0]));
list.erase(list.begin(),list.begin()+2);
}
if(list.empty()) return;
// Process statement itself
if(list[0][0]=='#') elaborateDirective(list);
else {
LinkableObject::Word rva;
if(list[0][0]=='.') rva=elaborateDataDefinition(list);
else rva=elaborateInstruction(list);
for(auto const &label: _currentLabels) {
_obj.addSymbol(label,rva);
}
_currentLabels.clear();
}
}
 
void Assembler::elaborateDirective(TokenList &list) {
assert(!list.empty());
if(list[0]=="#define") {
if(list.size()<3)
throw std::runtime_error("Wrong number of tokens in the directive");
if(_macros.find(list[1])!=_macros.end())
throw std::runtime_error("Macro \""+list[1]+"\" has been already defined");
if(!validateIdentifier(list[1]))
throw std::runtime_error("Ill-formed identifier: \""+list[1]+"\"");
_macros.emplace(list[1],TokenList(list.begin()+2,list.end()));
}
else if(list[0]=="#export") {
if(list.size()!=2) std::runtime_error("Wrong number of tokens in the directive");
if(!validateIdentifier(list[1])) throw std::runtime_error("Ill-formed identifier: \""+list[1]+"\"");
_exportedSymbols.push_back(list[1]);
}
else if(list[0]=="#import") {
if(list.size()!=2) std::runtime_error("Wrong number of tokens in the directive");
if(!validateIdentifier(list[1])) throw std::runtime_error("Ill-formed identifier: \""+list[1]+"\"");
_obj.addImportedSymbol(list[1]);
}
else if(list[0]=="#include") {
if(list.size()!=2) std::runtime_error("Wrong number of tokens in the directive");
auto filename=Utils::dequoteString(list[1]);
if(Utils::isAbsolutePath(filename)) return processFileRecursive(filename);
else {
auto path=Utils::relativePath(currentFileName(),filename);
if(Utils::fileExists(path)) return processFileRecursive(path);
else {
for(auto const &dir: _includeSearchDirs) {
path=Utils::nativeSeparators(dir+filename);
if(Utils::fileExists(path)) return processFileRecursive(path);
}
}
}
throw std::runtime_error("Cannot locate include file \""+filename+"\"");
}
else if(list[0]=="#message") {
if(list.size()!=2) std::runtime_error("Wrong number of tokens in the directive");
auto msg=Utils::dequoteString(list[1]);
std::cout<<currentFileName()<<":"<<line()<<": "<<msg<<std::endl;
}
else throw std::runtime_error("Unrecognized directive: \""+list[0]+"\"");
}
 
LinkableObject::Word Assembler::elaborateDataDefinition(TokenList &list) {
assert(!list.empty());
LinkableObject::Word rva=0;
if(list[0]==".align") {
if(list.size()>2) throw std::runtime_error("Unexpected token: \""+list[2]+"\"");
std::size_t align=4;
if(list.size()>1) align=static_cast<std::size_t>(numericLiteral(list[1]));
if(!Utils::isPowerOf2(align)) throw std::runtime_error("Alignment must be a power of 2");
if(align<4) throw std::runtime_error("Alignment must be at least 4");
rva=_obj.addPadding(align);
}
else if(list[0]==".reserve") {
if(list.size()<2) throw std::runtime_error("Unexpected end of statement");
else if(list.size()>2) throw std::runtime_error("Unexpected token: \""+list[2]+"\"");
auto n=static_cast<std::size_t>(numericLiteral(list[1]));
rva=_obj.addZeros(n);
}
else if(list[0]==".word") {
if(list.size()<2) throw std::runtime_error("Unexpected end of statement");
for(std::size_t i=1;i<list.size();i++) {
if(i%2!=0) {
auto w=static_cast<LinkableObject::Word>(numericLiteral(list[i]));
auto r=_obj.addWord(w);
if(i==1) rva=r;
}
else {
if(list[i]!=",") throw std::runtime_error("Comma expected");
if(i+1==list.size()) throw std::runtime_error("Unexpected end of statement");
}
}
}
else if(list[0]==".byte") {
if(list.size()<2) throw std::runtime_error("Unexpected end of statement");
for(std::size_t i=1;i<list.size();i++) {
if(i%2!=0) {
if(list[i].at(0)=='\"') { // string literal
auto bytes=Utils::dequoteString(list[i]);
auto r=_obj.addBytes(reinterpret_cast<const LinkableObject::Byte*>
(bytes.c_str()),bytes.size());
if(i==1) rva=r;
}
else {
auto n=numericLiteral(list[i]);
if(n>255||n<-128) throw std::runtime_error("\""+list[i]+"\": out of range");
auto b=static_cast<LinkableObject::Byte>(n);
auto r=_obj.addByte(b);
if(i==1) rva=r;
}
}
else {
if(list[i]!=",") throw std::runtime_error("Comma expected");
if(i+1==list.size()) throw std::runtime_error("Unexpected end of statement");
}
}
}
else throw std::runtime_error("Unrecognized statement: \""+list[0]+"\"");
return rva;
}
 
LinkableObject::Word Assembler::elaborateInstruction(TokenList &list) {
assert(!list.empty());
auto rva=_obj.addPadding();
if(list[0]=="add") encodeAdd(list);
else if(list[0]=="and") encodeAnd(list);
else if(list[0]=="call") encodeCall(list);
else if(list[0].substr(0,4)=="cjmp") encodeCjmpxx(list);
else if(list[0]=="divs") encodeDivs(list);
else if(list[0]=="divu") encodeDivu(list);
else if(list[0]=="hlt") encodeHlt(list);
else if(list[0]=="jmp") encodeJmp(list);
else if(list[0]=="iret") encodeIret(list);
else if(list[0]=="lc") encodeLc(list);
else if(list[0]=="lcs") encodeLcs(list);
else if(list[0]=="lsb") encodeLsb(list);
else if(list[0]=="lub") encodeLub(list);
else if(list[0]=="lw") encodeLw(list);
else if(list[0]=="mods") encodeMods(list);
else if(list[0]=="modu") encodeModu(list);
else if(list[0]=="mov") encodeMov(list);
else if(list[0]=="mul") encodeMul(list);
else if(list[0]=="neg") encodeNeg(list);
else if(list[0]=="nop") encodeNop(list);
else if(list[0]=="not") encodeNot(list);
else if(list[0]=="or") encodeOr(list);
else if(list[0]=="ret") encodeRet(list);
else if(list[0]=="sb") encodeSb(list);
else if(list[0]=="sl") encodeSl(list);
else if(list[0]=="srs") encodeSrs(list);
else if(list[0]=="sru") encodeSru(list);
else if(list[0]=="sub") encodeSub(list);
else if(list[0]=="sw") encodeSw(list);
else if(list[0]=="xor") encodeXor(list);
else throw std::runtime_error("Unrecognized instruction: \""+list[0]+"\"");
return rva;
}
 
bool Assembler::validateIdentifier(const std::string &str) {
/*
* Valid identifier must satisfy the following requirements:
* 1. Must not be empty
* 2. The first character must be either alphabetic or an underscore
* 3. Subsequent characters must be either alphanumeric or underscores
*/
if(str.empty()) return false;
for(std::size_t i=0;i<str.size();i++) {
char ch=str[i];
if(i==0) {
if(!std::isalpha(ch)&&ch!='_') return false;
}
else {
if(!std::isalnum(ch)&&ch!='_') return false;
}
}
return true;
}
 
Assembler::Integer Assembler::numericLiteral(const std::string &str) {
std::size_t pos;
Integer i;
try {
i=std::stoll(str,&pos,0);
}
catch(std::exception &) {
throw std::runtime_error("Ill-formed numeric literal: \""+str+"\"");
}
if(pos<str.size()) throw std::runtime_error("Ill-formed numeric literal: \""+str+"\"");
typedef std::make_signed<LinkableObject::Word>::type SignedWord;
if(i>static_cast<Integer>(std::numeric_limits<LinkableObject::Word>::max())||
i<static_cast<Integer>(std::numeric_limits<SignedWord>::min()))
throw std::runtime_error("\""+str+"\": out of range");
return i;
}
 
std::vector<Assembler::Operand> Assembler::getOperands(const TokenList &list) {
std::vector<Operand> arglist;
for(std::size_t i=1;i<list.size();i++) {
if(i%2!=0) {
Operand a;
a.str=list[i];
if(!list[i].empty()&&list[i][0]=='r') {
// Is argument a register?
char *endptr;
auto regstr=list[i].substr(1);
auto reg=std::strtol(regstr.c_str(),&endptr,10);
if(!*endptr&&reg>=0&&reg<=255) {
a.type=Operand::Register;
a.reg=static_cast<std::uint8_t>(reg);
arglist.push_back(std::move(a));
continue;
}
}
// Try alternative register names
if(list[i]=="sp") { // stack pointer
a.type=Operand::Register;
a.reg=255;
arglist.push_back(std::move(a));
}
else if(list[i]=="rp") { // return pointer
a.type=Operand::Register;
a.reg=254;
arglist.push_back(std::move(a));
}
else if(list[i]=="irp") { // interrupt return pointer
a.type=Operand::Register;
a.reg=253;
arglist.push_back(std::move(a));
}
else if(list[i]=="cr") { // control register
a.type=Operand::Register;
a.reg=252;
arglist.push_back(std::move(a));
}
else if(list[i].size()==3&&list[i].substr(0,2)=="iv"&&
list[i][2]>='0'&&list[i][2]<='7') // interrupt vector
{
a.type=Operand::Register;
a.reg=240+(list[i][2]-'0');
arglist.push_back(std::move(a));
}
else if(validateIdentifier(list[i])) {
// Is argument an identifier?
a.type=Operand::Identifier;
arglist.push_back(std::move(a));
}
else {
auto atpos=list[i].find_first_of('@');
if(atpos!=std::string::npos) {
// Identifier with an offset?
a.type=Operand::Identifier;
a.str=list[i].substr(0,atpos);
if(!validateIdentifier(a.str)) throw std::runtime_error("Ill-formed identifier");
a.i=numericLiteral(list[i].substr(atpos+1));
arglist.push_back(std::move(a));
}
else {
// Numeric literal?
a.type=Operand::NumericLiteral;
a.i=numericLiteral(list[i]);
arglist.push_back(std::move(a));
}
}
}
else {
if(list[i]!=",") throw std::runtime_error("Comma expected");
if(i+1==list.size()) throw std::runtime_error("Unexpected end of line");
}
}
return arglist;
}
 
/*
* Member functions to encode LXP32 instructions
*/
 
void Assembler::encodeDstOperand(LinkableObject::Word &word,const Operand &arg) {
if(arg.type!=Operand::Register)
throw std::runtime_error("\""+arg.str+"\": must be a register");
word|=arg.reg<<16;
}
 
void Assembler::encodeRd1Operand(LinkableObject::Word &word,const Operand &arg) {
if(arg.type==Operand::Register) {
word|=0x02000000;
word|=arg.reg<<8;
}
else if(arg.type==Operand::NumericLiteral) {
if((arg.i<-128||arg.i>127)&&(arg.i<0xFFFFFF80||arg.i>0xFFFFFFFF))
throw std::runtime_error("\""+arg.str+"\": out of range");
auto b=static_cast<LinkableObject::Byte>(arg.i);
word|=b<<8;
}
else throw std::runtime_error("\""+arg.str+"\": bad argument");
}
 
void Assembler::encodeRd2Operand(LinkableObject::Word &word,const Operand &arg) {
if(arg.type==Operand::Register) {
word|=0x01000000;
word|=arg.reg;
}
else if(arg.type==Operand::NumericLiteral) {
if((arg.i<-128||arg.i>127)&&(arg.i<0xFFFFFF80||arg.i>0xFFFFFFFF))
throw std::runtime_error("\""+arg.str+"\": out of range");
auto b=static_cast<LinkableObject::Byte>(arg.i);
word|=b;
}
else throw std::runtime_error("\""+arg.str+"\": bad argument");
}
 
void Assembler::encodeAdd(const TokenList &list) {
auto args=getOperands(list);
if(args.size()!=3) throw std::runtime_error("add instruction requires 3 operands");
LinkableObject::Word w=0x40000000;
encodeDstOperand(w,args[0]);
encodeRd1Operand(w,args[1]);
encodeRd2Operand(w,args[2]);
_obj.addWord(w);
}
 
void Assembler::encodeAnd(const TokenList &list) {
auto args=getOperands(list);
if(args.size()!=3) throw std::runtime_error("and instruction requires 3 operands");
LinkableObject::Word w=0x60000000;
encodeDstOperand(w,args[0]);
encodeRd1Operand(w,args[1]);
encodeRd2Operand(w,args[2]);
_obj.addWord(w);
}
 
void Assembler::encodeCall(const TokenList &list) {
auto args=getOperands(list);
if(args.size()!=1) throw std::runtime_error("call instruction requires 1 operand");
if(args[0].type!=Operand::Register) throw std::runtime_error("\""+args[0].str+"\": must be a register");
LinkableObject::Word w=0x86FE0000;
encodeRd1Operand(w,args[0]);
_obj.addWord(w);
}
 
void Assembler::encodeCjmpxx(const TokenList &list) {
auto args=getOperands(list);
if(args.size()!=3) throw std::runtime_error("cjmpxx instruction requires 3 operands");
LinkableObject::Word w;
bool reverse=false;
/*
* Note: cjmpul, cjmpule, cjmpsl and cjmpsle don't have distinct opcodes;
* instead, they are aliases for respective "g" or "ge" instructions
* with reversed operand order.
*/
if(list[0]=="cjmpe") w=0xE0000000;
else if(list[0]=="cjmpne") w=0xD0000000;
else if(list[0]=="cjmpug"||list[0]=="cjmpul") w=0xC8000000;
else if(list[0]=="cjmpuge"||list[0]=="cjmpule") w=0xE8000000;
else if(list[0]=="cjmpsg"||list[0]=="cjmpsl") w=0xC4000000;
else if(list[0]=="cjmpsge"||list[0]=="cjmpsle") w=0xE4000000;
else throw std::runtime_error("Unrecognized instruction: \""+list[0]+"\"");
if(list[0]=="cjmpul"||list[0]=="cjmpule"||
list[0]=="cjmpsl"||list[0]=="cjmpsle") reverse=true;
encodeDstOperand(w,args[0]);
if(!reverse) {
encodeRd1Operand(w,args[1]);
encodeRd2Operand(w,args[2]);
}
else {
encodeRd1Operand(w,args[2]);
encodeRd2Operand(w,args[1]);
}
_obj.addWord(w);
}
 
void Assembler::encodeDivs(const TokenList &list) {
auto args=getOperands(list);
if(args.size()!=3) throw std::runtime_error("divs instruction requires 3 operands");
LinkableObject::Word w=0x54000000;
encodeDstOperand(w,args[0]);
encodeRd1Operand(w,args[1]);
encodeRd2Operand(w,args[2]);
_obj.addWord(w);
}
 
void Assembler::encodeDivu(const TokenList &list) {
auto args=getOperands(list);
if(args.size()!=3) throw std::runtime_error("divu instruction requires 3 operands");
LinkableObject::Word w=0x50000000;
encodeDstOperand(w,args[0]);
encodeRd1Operand(w,args[1]);
encodeRd2Operand(w,args[2]);
_obj.addWord(w);
}
 
void Assembler::encodeHlt(const TokenList &list) {
auto args=getOperands(list);
if(!args.empty()) throw std::runtime_error("hlt instruction doesn't take operands");
_obj.addWord(0x08000000);
}
 
void Assembler::encodeJmp(const TokenList &list) {
auto args=getOperands(list);
if(args.size()!=1) throw std::runtime_error("jmp instruction requires 1 operand");
if(args[0].type!=Operand::Register) throw std::runtime_error("\""+args[0].str+"\": must be a register");
LinkableObject::Word w=0x82000000;
encodeRd1Operand(w,args[0]);
_obj.addWord(w);
}
 
void Assembler::encodeIret(const TokenList &list) {
// Note: "iret" is not a real instruction, but an alias for "jmp irp"
auto args=getOperands(list);
if(!args.empty()) throw std::runtime_error("iret instruction doesn't take operands");
_obj.addWord(0x8200FD00);
}
 
void Assembler::encodeLc(const TokenList &list) {
auto args=getOperands(list);
if(args.size()!=2) throw std::runtime_error("lc instruction requires 2 operands");
LinkableObject::Word w=0x04000000;
encodeDstOperand(w,args[0]);
_obj.addWord(w);
if(args[1].type==Operand::Identifier) {
LinkableObject::Reference ref;
ref.source=currentFileName();
ref.line=line();
ref.rva=_obj.addWord(0);
ref.offset=args[1].i;
ref.type=LinkableObject::Regular;
_obj.addReference(args[1].str,ref);
}
else if(args[1].type==Operand::NumericLiteral) {
_obj.addWord(static_cast<LinkableObject::Word>(args[1].i));
}
else throw std::runtime_error("\""+args[1].str+"\": bad argument");
}
 
void Assembler::encodeLcs(const TokenList &list) {
auto args=getOperands(list);
if(args.size()!=2) throw std::runtime_error("lcs instruction requires 2 operands");
LinkableObject::Word w=0xA0000000;
encodeDstOperand(w,args[0]);
if(args[1].type==Operand::NumericLiteral) {
if((args[1].i<-1048576||args[1].i>1048575)&&(args[1].i<0xFFF00000||args[1].i>0xFFFFFFFF))
throw std::runtime_error("\""+args[1].str+"\": out of range");
auto c=static_cast<LinkableObject::Word>(args[1].i)&0x1FFFFF;
w|=(c&0xFFFF);
w|=((c<<8)&0x1F000000);
_obj.addWord(w);
}
else if(args[1].type==Operand::Identifier) {
LinkableObject::Reference ref;
ref.source=currentFileName();
ref.line=line();
ref.rva=_obj.addWord(w);
ref.offset=args[1].i;
ref.type=LinkableObject::Short;
_obj.addReference(args[1].str,ref);
}
else throw std::runtime_error("\""+args[1].str+"\": bad argument");
}
 
void Assembler::encodeLsb(const TokenList &list) {
auto args=getOperands(list);
if(args.size()!=2) throw std::runtime_error("lsb instruction requires 2 operands");
if(args[1].type!=Operand::Register) throw std::runtime_error("\""+args[1].str+"\": must be a register");
LinkableObject::Word w=0x2E000000;
encodeDstOperand(w,args[0]);
encodeRd1Operand(w,args[1]);
_obj.addWord(w);
}
 
void Assembler::encodeLub(const TokenList &list) {
auto args=getOperands(list);
if(args.size()!=2) throw std::runtime_error("lub instruction requires 2 operands");
if(args[1].type!=Operand::Register) throw std::runtime_error("\""+args[1].str+"\": must be a register");
LinkableObject::Word w=0x2A000000;
encodeDstOperand(w,args[0]);
encodeRd1Operand(w,args[1]);
_obj.addWord(w);
}
 
void Assembler::encodeLw(const TokenList &list) {
auto args=getOperands(list);
if(args.size()!=2) throw std::runtime_error("lw instruction requires 2 operands");
if(args[1].type!=Operand::Register) throw std::runtime_error("\""+args[1].str+"\": must be a register");
LinkableObject::Word w=0x22000000;
encodeDstOperand(w,args[0]);
encodeRd1Operand(w,args[1]);
_obj.addWord(w);
}
 
void Assembler::encodeMods(const TokenList &list) {
auto args=getOperands(list);
if(args.size()!=3) throw std::runtime_error("mods instruction requires 3 operands");
LinkableObject::Word w=0x5C000000;
encodeDstOperand(w,args[0]);
encodeRd1Operand(w,args[1]);
encodeRd2Operand(w,args[2]);
_obj.addWord(w);
}
 
void Assembler::encodeModu(const TokenList &list) {
auto args=getOperands(list);
if(args.size()!=3) throw std::runtime_error("modu instruction requires 3 operands");
LinkableObject::Word w=0x58000000;
encodeDstOperand(w,args[0]);
encodeRd1Operand(w,args[1]);
encodeRd2Operand(w,args[2]);
_obj.addWord(w);
}
 
void Assembler::encodeMov(const TokenList &list) {
// Note: "mov" is not a real instruction, but an alias for "add dst, src, 0"
auto args=getOperands(list);
if(args.size()!=2) throw std::runtime_error("mov instruction requires 2 operands");
LinkableObject::Word w=0x40000000;
encodeDstOperand(w,args[0]);
encodeRd1Operand(w,args[1]);
_obj.addWord(w);
}
 
void Assembler::encodeMul(const TokenList &list) {
auto args=getOperands(list);
if(args.size()!=3) throw std::runtime_error("mul instruction requires 3 operands");
LinkableObject::Word w=0x48000000;
encodeDstOperand(w,args[0]);
encodeRd1Operand(w,args[1]);
encodeRd2Operand(w,args[2]);
_obj.addWord(w);
}
 
void Assembler::encodeNeg(const TokenList &list) {
// Note: "neg" is not a real instruction, but an alias for "sub dst, 0, src"
auto args=getOperands(list);
if(args.size()!=2) throw std::runtime_error("neg instruction requires 2 operands");
LinkableObject::Word w=0x44000000;
encodeDstOperand(w,args[0]);
encodeRd2Operand(w,args[1]);
_obj.addWord(w);
}
 
void Assembler::encodeNop(const TokenList &list) {
auto args=getOperands(list);
if(!args.empty()) throw std::runtime_error("nop instruction doesn't take operands");
_obj.addWord(0);
}
 
void Assembler::encodeNot(const TokenList &list) {
// Note: "not" is not a real instruction, but an alias for "xor dst, src, -1"
auto args=getOperands(list);
if(args.size()!=2) throw std::runtime_error("not instruction requires 2 operands");
LinkableObject::Word w=0x680000FF;
encodeDstOperand(w,args[0]);
encodeRd1Operand(w,args[1]);
_obj.addWord(w);
}
 
void Assembler::encodeOr(const TokenList &list) {
auto args=getOperands(list);
if(args.size()!=3) throw std::runtime_error("or instruction requires 3 operands");
LinkableObject::Word w=0x64000000;
encodeDstOperand(w,args[0]);
encodeRd1Operand(w,args[1]);
encodeRd2Operand(w,args[2]);
_obj.addWord(w);
}
 
void Assembler::encodeRet(const TokenList &list) {
// Note: "ret" is not a real instruction, but an alias for "jmp rp"
auto args=getOperands(list);
if(!args.empty()) throw std::runtime_error("ret instruction doesn't take operands");
_obj.addWord(0x8200FE00);
}
 
void Assembler::encodeSb(const TokenList &list) {
auto args=getOperands(list);
if(args.size()!=2) throw std::runtime_error("sb instruction requires 2 operands");
if(args[0].type!=Operand::Register) throw std::runtime_error("\""+args[0].str+"\": must be a register");
if(args[1].type==Operand::NumericLiteral) {
// If numeric literal value is between 128 and 255 (inclusive), convert
// it to a signed byte to avoid exception in encodeRd2Operand()
if(args[1].i>=128&&args[1].i<=255) args[1].i-=256;
}
LinkableObject::Word w=0x3A000000;
encodeRd1Operand(w,args[0]);
encodeRd2Operand(w,args[1]);
_obj.addWord(w);
}
 
void Assembler::encodeSl(const TokenList &list) {
auto args=getOperands(list);
if(args.size()!=3) throw std::runtime_error("sl instruction requires 3 operands");
if(args[2].type==Operand::NumericLiteral&&
(args[2].i<0||args[2].i>=static_cast<Integer>(8*sizeof(LinkableObject::Word))))
{
std::cerr<<currentFileName()<<":"<<line()<<": ";
std::cerr<<"Warning: Bitwise shift result is undefined when "
"the second operand is negative or greater than 31"<<std::endl;
}
LinkableObject::Word w=0x70000000;
encodeDstOperand(w,args[0]);
encodeRd1Operand(w,args[1]);
encodeRd2Operand(w,args[2]);
_obj.addWord(w);
}
 
void Assembler::encodeSrs(const TokenList &list) {
auto args=getOperands(list);
if(args.size()!=3) throw std::runtime_error("srs instruction requires 3 operands");
if(args[2].type==Operand::NumericLiteral&&
(args[2].i<0||args[2].i>=static_cast<Integer>(8*sizeof(LinkableObject::Word))))
{
std::cerr<<currentFileName()<<":"<<line()<<": ";
std::cerr<<"Warning: Bitwise shift result is undefined when "
"the second operand is negative or greater than 31"<<std::endl;
}
LinkableObject::Word w=0x7C000000;
encodeDstOperand(w,args[0]);
encodeRd1Operand(w,args[1]);
encodeRd2Operand(w,args[2]);
_obj.addWord(w);
}
 
void Assembler::encodeSru(const TokenList &list) {
auto args=getOperands(list);
if(args.size()!=3) throw std::runtime_error("sru instruction requires 3 operands");
if(args[2].type==Operand::NumericLiteral&&
(args[2].i<0||args[2].i>=static_cast<Integer>(8*sizeof(LinkableObject::Word))))
{
std::cerr<<currentFileName()<<":"<<line()<<": ";
std::cerr<<"Warning: Bitwise shift result is undefined when "
"the second operand is negative or greater than 31"<<std::endl;
}
LinkableObject::Word w=0x78000000;
encodeDstOperand(w,args[0]);
encodeRd1Operand(w,args[1]);
encodeRd2Operand(w,args[2]);
_obj.addWord(w);
}
 
void Assembler::encodeSub(const TokenList &list) {
auto args=getOperands(list);
if(args.size()!=3) throw std::runtime_error("sub instruction requires 3 operands");
LinkableObject::Word w=0x44000000;
encodeDstOperand(w,args[0]);
encodeRd1Operand(w,args[1]);
encodeRd2Operand(w,args[2]);
_obj.addWord(w);
}
 
void Assembler::encodeSw(const TokenList &list) {
auto args=getOperands(list);
if(args.size()!=2) throw std::runtime_error("sw instruction requires 2 operands");
if(args[0].type!=Operand::Register) throw std::runtime_error("\""+args[0].str+"\": must be a register");
LinkableObject::Word w=0x32000000;
encodeRd1Operand(w,args[0]);
encodeRd2Operand(w,args[1]);
_obj.addWord(w);
}
 
void Assembler::encodeXor(const TokenList &list) {
auto args=getOperands(list);
if(args.size()!=3) throw std::runtime_error("xor instruction requires 3 operands");
LinkableObject::Word w=0x68000000;
encodeDstOperand(w,args[0]);
encodeRd1Operand(w,args[1]);
encodeRd2Operand(w,args[2]);
_obj.addWord(w);
}
/*
* Copyright (c) 2016 by Alex I. Kuznetsov.
*
* Part of the LXP32 CPU IP core.
*
* This module implements members of the Assembler class.
*/
 
#include "assembler.h"
#include "utils.h"
 
#include <iostream>
#include <fstream>
#include <sstream>
#include <stdexcept>
#include <utility>
#include <limits>
#include <type_traits>
#include <cctype>
#include <cassert>
#include <cstdlib>
 
void Assembler::processFile(const std::string &filename) {
auto nativePath=Utils::normalizeSeparators(filename);
auto pos=nativePath.find_last_of('/');
if(pos!=std::string::npos) nativePath=filename.substr(pos+1);
_obj.setName(nativePath);
_line=0;
_state=Initial;
_currentFileName=filename;
processFileRecursive(filename);
if(!_currentLabels.empty())
throw std::runtime_error("Symbol definition must be followed by an instruction or data definition statement");
if(!_sectionEnabled.empty())
throw std::runtime_error("#endif expected");
 
// Examine symbol table
for(auto const &sym: _obj.symbols()) {
if(sym.second.type==LinkableObject::Unknown&&!sym.second.refs.empty()) {
std::ostringstream msg;
msg<<"Undefined symbol \""+sym.first+"\"";
msg<<" (referenced from "<<sym.second.refs[0].source;
msg<<":"<<sym.second.refs[0].line<<")";
throw std::runtime_error(msg.str());
}
}
for(auto const &sym: _exportedSymbols) _obj.exportSymbol(sym);
}
 
void Assembler::processFileRecursive(const std::string &filename) {
std::ifstream in(filename,std::ios_base::in);
if(!in) throw std::runtime_error("Cannot open file \""+filename+"\"");
// Process input file line-by-line
auto savedLine=_line;
auto savedState=_state;
auto savedFileName=_currentFileName;
_line=1;
_state=Initial;
_currentFileName=filename;
std::string line;
while(std::getline(in,line)) {
auto tokens=tokenize(line);
expand(tokens);
elaborate(tokens);
_line++;
}
if(_state!=Initial) throw std::runtime_error("Unexpected end of file");
_line=savedLine;
_state=savedState;
_currentFileName=savedFileName;
}
 
void Assembler::addIncludeSearchDir(const std::string &dir) {
auto ndir=Utils::normalizeSeparators(dir);
if(!ndir.empty()&&ndir.back()!='/') ndir.push_back('/');
_includeSearchDirs.push_back(std::move(ndir));
}
 
int Assembler::line() const {
return _line;
}
 
std::string Assembler::currentFileName() const {
return _currentFileName;
}
 
LinkableObject &Assembler::object() {
return _obj;
}
 
const LinkableObject &Assembler::object() const {
return _obj;
}
 
Assembler::TokenList Assembler::tokenize(const std::string &str) {
TokenList tokenList;
std::string word;
std::size_t i;
for(i=0;i<str.size();i++) {
char ch=str[i];
switch(_state) {
case Initial:
if(ch==' '||ch=='\t'||ch=='\n'||ch=='\r') continue; // skip whitespace
else if(ch==','||ch==':') { // separator
tokenList.push_back(std::string(1,ch));
}
else if(std::isalnum(ch)||ch=='.'||ch=='#'||ch=='_'||ch=='-'||ch=='+') {
word=std::string(1,ch);
_state=Word;
}
else if(ch=='\"') {
word="\"";
_state=StringLiteral;
}
else if(ch=='/') {
if(++i>=str.size()) throw std::runtime_error("Unexpected end of line");
ch=str[i];
if(ch=='/') i=str.size(); // skip the rest of the line
else if(ch=='*') _state=BlockComment;
else throw std::runtime_error(std::string("Unexpected character: \"")+ch+"\"");
}
else throw std::runtime_error(std::string("Unexpected character: \"")+ch+"\"");
break;
case Word:
if(std::isalnum(ch)||ch=='_'||ch=='@'||ch=='+'||ch=='-') word+=ch;
else {
i--;
_state=Initial;
tokenList.push_back(std::move(word));
}
break;
case StringLiteral:
if(ch=='\\') {
if(++i>=str.size()) throw std::runtime_error("Unexpected end of line");
ch=str[i];
if(ch=='\\') word.push_back('\\');
else if(ch=='\"') word.push_back('\"');
else if(ch=='\'') word.push_back('\'');
else if(ch=='t') word.push_back('\t');
else if(ch=='n') word.push_back('\n');
else if(ch=='r') word.push_back('\r');
else if(ch=='x') { // hexadecimal sequence can be 1-2 digit long
std::string seq;
if(i+1<str.size()&&Utils::ishexdigit(str[i+1])) seq+=str[i+1];
if(i+2<str.size()&&Utils::ishexdigit(str[i+2])) seq+=str[i+2];
if(seq.empty()) throw std::runtime_error("Ill-formed escape sequence");
try {
word.push_back(static_cast<char>(std::stoul(seq,nullptr,16)));
}
catch(std::exception &) {
throw std::runtime_error("Ill-formed escape sequence");
}
i+=seq.size();
}
else if(Utils::isoctdigit(ch)) { // octal sequence can be 1-3 digit long
std::string seq(1,ch);
if(i+1<str.size()&&Utils::isoctdigit(str[i+1])) seq+=str[i+1];
if(i+2<str.size()&&Utils::isoctdigit(str[i+2])) seq+=str[i+2];
unsigned long value;
try {
value=std::stoul(seq,nullptr,8);
}
catch(std::exception &) {
throw std::runtime_error("Ill-formed escape sequence");
}
if(value>255) throw std::runtime_error("Octal value is out of range");
word.push_back(static_cast<char>(value));
i+=seq.size()-1;
}
else throw std::runtime_error(std::string("Unknown escape sequence: \"\\")+ch+"\"");
}
else if(ch=='\"') {
word.push_back('\"');
tokenList.push_back(std::move(word));
_state=Initial;
}
else word.push_back(ch);
break;
case BlockComment:
if(ch=='*') {
if(++i>=str.size()) break;
ch=str[i];
if(ch=='/') _state=Initial;
else i--;
}
break;
}
}
if(_state==StringLiteral) throw std::runtime_error("Unexpected end of line");
if(_state==Word) tokenList.push_back(std::move(word)); // store last word
if(_state!=BlockComment) _state=Initial; // reset state if not in block comment
return tokenList;
}
 
void Assembler::expand(TokenList &list) {
TokenList newlist;
// Perform macro substitution
for(auto &token: list) {
auto it=_macros.find(token);
bool substitute=false;
if(it!=_macros.end()) {
substitute=true;
// Don't substitute macros for a second token in certain directives
if(newlist.size()==1) {
if(newlist[0]=="#define") substitute=false;
else if(newlist[0]=="#ifdef") substitute=false;
else if(newlist[0]=="#ifndef") substitute=false;
}
else if(newlist.size()==3&&newlist[1]==":") {
if(newlist[2]=="#define") substitute=false;
else if(newlist[2]=="#ifdef") substitute=false;
else if(newlist[2]=="#ifndef") substitute=false;
}
}
if(substitute) {
for(auto const &replace: it->second) newlist.push_back(replace);
}
else newlist.push_back(std::move(token));
}
list=std::move(newlist);
}
 
void Assembler::elaborate(TokenList &list) {
if(list.empty()) return;
// Process label (if present)
if(list.size()>=2&&list[1]==":") {
if(!validateIdentifier(list[0]))
throw std::runtime_error("Ill-formed identifier: \""+list[0]+"\"");
if(isSectionEnabled()) _currentLabels.push_back(std::move(list[0]));
list.erase(list.begin(),list.begin()+2);
}
if(list.empty()) return;
// If the section is disabled, we look only for #ifdef, #ifndef, #else or #endif
if(!isSectionEnabled()&&list[0]!="#ifdef"&&list[0]!="#ifndef"&&
list[0]!="#else"&&list[0]!="#endif") return;
// Process statement itself
if(list[0][0]=='#') elaborateDirective(list);
else {
LinkableObject::Word rva;
if(list[0][0]=='.') rva=elaborateDataDefinition(list);
else rva=elaborateInstruction(list);
for(auto const &label: _currentLabels) {
_obj.addSymbol(label,rva);
}
_currentLabels.clear();
}
}
 
void Assembler::elaborateDirective(TokenList &list) {
assert(!list.empty());
if(list[0]=="#define") {
if(list.size()<2)
throw std::runtime_error("Wrong number of tokens in the directive");
if(_macros.find(list[1])!=_macros.end())
throw std::runtime_error("Macro \""+list[1]+"\" has been already defined");
if(!validateIdentifier(list[1]))
throw std::runtime_error("Ill-formed identifier: \""+list[1]+"\"");
_macros.emplace(list[1],TokenList(list.begin()+2,list.end()));
}
else if(list[0]=="#export") {
if(list.size()!=2) throw std::runtime_error("Wrong number of tokens in the directive");
if(!validateIdentifier(list[1])) throw std::runtime_error("Ill-formed identifier: \""+list[1]+"\"");
_exportedSymbols.push_back(list[1]);
}
else if(list[0]=="#import") {
if(list.size()!=2) throw std::runtime_error("Wrong number of tokens in the directive");
if(!validateIdentifier(list[1])) throw std::runtime_error("Ill-formed identifier: \""+list[1]+"\"");
_obj.addImportedSymbol(list[1]);
}
else if(list[0]=="#include") {
if(list.size()!=2) throw std::runtime_error("Wrong number of tokens in the directive");
auto filename=Utils::dequoteString(list[1]);
if(Utils::isAbsolutePath(filename)) return processFileRecursive(filename);
else {
auto path=Utils::relativePath(currentFileName(),filename);
if(Utils::fileExists(path)) return processFileRecursive(path);
else {
for(auto const &dir: _includeSearchDirs) {
path=Utils::nativeSeparators(dir+filename);
if(Utils::fileExists(path)) return processFileRecursive(path);
}
}
}
throw std::runtime_error("Cannot locate include file \""+filename+"\"");
}
else if(list[0]=="#message") {
if(list.size()!=2) throw std::runtime_error("Wrong number of tokens in the directive");
auto msg=Utils::dequoteString(list[1]);
std::cout<<currentFileName()<<":"<<line()<<": "<<msg<<std::endl;
}
else if(list[0]=="#error") {
if(list.size()<2) throw std::runtime_error("#error directive encountered");
auto msg=Utils::dequoteString(list[1]);
throw std::runtime_error(msg);
}
else if(list[0]=="#ifdef") {
if(list.size()!=2) throw std::runtime_error("Wrong number of tokens in the directive");
if(_macros.find(list[1])!=_macros.end()) _sectionEnabled.push_back(true);
else _sectionEnabled.push_back(false);
}
else if(list[0]=="#ifndef") {
if(list.size()!=2) throw std::runtime_error("Wrong number of tokens in the directive");
if(_macros.find(list[1])!=_macros.end()) _sectionEnabled.push_back(false);
else _sectionEnabled.push_back(true);
}
else if(list[0]=="#else") {
if(list.size()!=1) throw std::runtime_error("Wrong number of tokens in the directive");
if(_sectionEnabled.empty()) throw std::runtime_error("Unexpected #else");
_sectionEnabled.back()=!_sectionEnabled.back();
}
else if(list[0]=="#endif") {
if(list.size()!=1) throw std::runtime_error("Wrong number of tokens in the directive");
if(_sectionEnabled.empty()) throw std::runtime_error("Unexpected #endif");
_sectionEnabled.pop_back();
}
else throw std::runtime_error("Unrecognized directive: \""+list[0]+"\"");
}
 
LinkableObject::Word Assembler::elaborateDataDefinition(TokenList &list) {
assert(!list.empty());
LinkableObject::Word rva=0;
if(list[0]==".align") {
if(list.size()>2) throw std::runtime_error("Unexpected token: \""+list[2]+"\"");
std::size_t align=4;
if(list.size()>1) align=static_cast<std::size_t>(numericLiteral(list[1]));
if(!Utils::isPowerOf2(align)) throw std::runtime_error("Alignment must be a power of 2");
if(align<4) throw std::runtime_error("Alignment must be at least 4");
rva=_obj.addPadding(align);
}
else if(list[0]==".reserve") {
if(list.size()<2) throw std::runtime_error("Unexpected end of statement");
else if(list.size()>2) throw std::runtime_error("Unexpected token: \""+list[2]+"\"");
auto n=static_cast<std::size_t>(numericLiteral(list[1]));
rva=_obj.addZeros(n);
}
else if(list[0]==".word") {
if(list.size()<2) throw std::runtime_error("Unexpected end of statement");
for(std::size_t i=1;i<list.size();i++) {
if(i%2!=0) {
auto w=static_cast<LinkableObject::Word>(numericLiteral(list[i]));
auto r=_obj.addWord(w);
if(i==1) rva=r;
}
else {
if(list[i]!=",") throw std::runtime_error("Comma expected");
if(i+1==list.size()) throw std::runtime_error("Unexpected end of statement");
}
}
}
else if(list[0]==".byte") {
if(list.size()<2) throw std::runtime_error("Unexpected end of statement");
for(std::size_t i=1;i<list.size();i++) {
if(i%2!=0) {
if(list[i].at(0)=='\"') { // string literal
auto bytes=Utils::dequoteString(list[i]);
auto r=_obj.addBytes(reinterpret_cast<const LinkableObject::Byte*>
(bytes.c_str()),bytes.size());
if(i==1) rva=r;
}
else {
auto n=numericLiteral(list[i]);
if(n>255||n<-128) throw std::runtime_error("\""+list[i]+"\": out of range");
auto b=static_cast<LinkableObject::Byte>(n);
auto r=_obj.addByte(b);
if(i==1) rva=r;
}
}
else {
if(list[i]!=",") throw std::runtime_error("Comma expected");
if(i+1==list.size()) throw std::runtime_error("Unexpected end of statement");
}
}
}
else throw std::runtime_error("Unrecognized statement: \""+list[0]+"\"");
return rva;
}
 
LinkableObject::Word Assembler::elaborateInstruction(TokenList &list) {
assert(!list.empty());
auto rva=_obj.addPadding();
if(list[0]=="add") encodeAdd(list);
else if(list[0]=="and") encodeAnd(list);
else if(list[0]=="call") encodeCall(list);
else if(list[0].substr(0,4)=="cjmp") encodeCjmpxx(list);
else if(list[0]=="divs") encodeDivs(list);
else if(list[0]=="divu") encodeDivu(list);
else if(list[0]=="hlt") encodeHlt(list);
else if(list[0]=="jmp") encodeJmp(list);
else if(list[0]=="iret") encodeIret(list);
else if(list[0]=="lc") encodeLc(list);
else if(list[0]=="lcs") encodeLcs(list);
else if(list[0]=="lsb") encodeLsb(list);
else if(list[0]=="lub") encodeLub(list);
else if(list[0]=="lw") encodeLw(list);
else if(list[0]=="mods") encodeMods(list);
else if(list[0]=="modu") encodeModu(list);
else if(list[0]=="mov") encodeMov(list);
else if(list[0]=="mul") encodeMul(list);
else if(list[0]=="neg") encodeNeg(list);
else if(list[0]=="nop") encodeNop(list);
else if(list[0]=="not") encodeNot(list);
else if(list[0]=="or") encodeOr(list);
else if(list[0]=="ret") encodeRet(list);
else if(list[0]=="sb") encodeSb(list);
else if(list[0]=="sl") encodeSl(list);
else if(list[0]=="srs") encodeSrs(list);
else if(list[0]=="sru") encodeSru(list);
else if(list[0]=="sub") encodeSub(list);
else if(list[0]=="sw") encodeSw(list);
else if(list[0]=="xor") encodeXor(list);
else throw std::runtime_error("Unrecognized instruction: \""+list[0]+"\"");
return rva;
}
 
bool Assembler::isSectionEnabled() const {
if(_sectionEnabled.empty()) return true;
bool enabled=true;
for(auto b: _sectionEnabled) enabled=enabled&&b;
return enabled;
}
 
bool Assembler::validateIdentifier(const std::string &str) {
/*
* Valid identifier must satisfy the following requirements:
* 1. Must not be empty
* 2. The first character must be either alphabetic or an underscore
* 3. Subsequent characters must be either alphanumeric or underscores
*/
if(str.empty()) return false;
for(std::size_t i=0;i<str.size();i++) {
char ch=str[i];
if(i==0) {
if(!std::isalpha(ch)&&ch!='_') return false;
}
else {
if(!std::isalnum(ch)&&ch!='_') return false;
}
}
return true;
}
 
Assembler::Integer Assembler::numericLiteral(const std::string &str) {
std::size_t pos;
Integer i;
try {
i=std::stoll(str,&pos,0);
}
catch(std::exception &) {
throw std::runtime_error("Ill-formed numeric literal: \""+str+"\"");
}
if(pos<str.size()) throw std::runtime_error("Ill-formed numeric literal: \""+str+"\"");
typedef std::make_signed<LinkableObject::Word>::type SignedWord;
if(i>static_cast<Integer>(std::numeric_limits<LinkableObject::Word>::max())||
i<static_cast<Integer>(std::numeric_limits<SignedWord>::min()))
throw std::runtime_error("\""+str+"\": out of range");
return i;
}
 
std::vector<Assembler::Operand> Assembler::getOperands(const TokenList &list) {
std::vector<Operand> arglist;
for(std::size_t i=1;i<list.size();i++) {
if(i%2!=0) {
Operand a;
a.str=list[i];
if(!list[i].empty()&&list[i][0]=='r') {
// Is argument a register?
char *endptr;
auto regstr=list[i].substr(1);
auto reg=std::strtol(regstr.c_str(),&endptr,10);
if(!*endptr&&reg>=0&&reg<=255) {
a.type=Operand::Register;
a.reg=static_cast<std::uint8_t>(reg);
arglist.push_back(std::move(a));
continue;
}
}
// Try alternative register names
if(list[i]=="sp") { // stack pointer
a.type=Operand::Register;
a.reg=255;
arglist.push_back(std::move(a));
}
else if(list[i]=="rp") { // return pointer
a.type=Operand::Register;
a.reg=254;
arglist.push_back(std::move(a));
}
else if(list[i]=="irp") { // interrupt return pointer
a.type=Operand::Register;
a.reg=253;
arglist.push_back(std::move(a));
}
else if(list[i]=="cr") { // control register
a.type=Operand::Register;
a.reg=252;
arglist.push_back(std::move(a));
}
else if(list[i].size()==3&&list[i].substr(0,2)=="iv"&&
list[i][2]>='0'&&list[i][2]<='7') // interrupt vector
{
a.type=Operand::Register;
a.reg=240+(list[i][2]-'0');
arglist.push_back(std::move(a));
}
else if(validateIdentifier(list[i])) {
// Is argument an identifier?
a.type=Operand::Identifier;
arglist.push_back(std::move(a));
}
else {
auto atpos=list[i].find_first_of('@');
if(atpos!=std::string::npos) {
// Identifier with an offset?
a.type=Operand::Identifier;
a.str=list[i].substr(0,atpos);
if(!validateIdentifier(a.str)) throw std::runtime_error("Ill-formed identifier");
a.i=numericLiteral(list[i].substr(atpos+1));
arglist.push_back(std::move(a));
}
else {
// Numeric literal?
a.type=Operand::NumericLiteral;
a.i=numericLiteral(list[i]);
arglist.push_back(std::move(a));
}
}
}
else {
if(list[i]!=",") throw std::runtime_error("Comma expected");
if(i+1==list.size()) throw std::runtime_error("Unexpected end of line");
}
}
return arglist;
}
 
/*
* Member functions to encode LXP32 instructions
*/
 
void Assembler::encodeDstOperand(LinkableObject::Word &word,const Operand &arg) {
if(arg.type!=Operand::Register)
throw std::runtime_error("\""+arg.str+"\": must be a register");
word|=arg.reg<<16;
}
 
void Assembler::encodeRd1Operand(LinkableObject::Word &word,const Operand &arg) {
if(arg.type==Operand::Register) {
word|=0x02000000;
word|=arg.reg<<8;
}
else if(arg.type==Operand::NumericLiteral) {
if((arg.i<-128||arg.i>127)&&(arg.i<0xFFFFFF80||arg.i>0xFFFFFFFF))
throw std::runtime_error("\""+arg.str+"\": out of range");
auto b=static_cast<LinkableObject::Byte>(arg.i);
word|=b<<8;
}
else throw std::runtime_error("\""+arg.str+"\": bad argument");
}
 
void Assembler::encodeRd2Operand(LinkableObject::Word &word,const Operand &arg) {
if(arg.type==Operand::Register) {
word|=0x01000000;
word|=arg.reg;
}
else if(arg.type==Operand::NumericLiteral) {
if((arg.i<-128||arg.i>127)&&(arg.i<0xFFFFFF80||arg.i>0xFFFFFFFF))
throw std::runtime_error("\""+arg.str+"\": out of range");
auto b=static_cast<LinkableObject::Byte>(arg.i);
word|=b;
}
else throw std::runtime_error("\""+arg.str+"\": bad argument");
}
 
void Assembler::encodeAdd(const TokenList &list) {
auto args=getOperands(list);
if(args.size()!=3) throw std::runtime_error("add instruction requires 3 operands");
LinkableObject::Word w=0x40000000;
encodeDstOperand(w,args[0]);
encodeRd1Operand(w,args[1]);
encodeRd2Operand(w,args[2]);
_obj.addWord(w);
}
 
void Assembler::encodeAnd(const TokenList &list) {
auto args=getOperands(list);
if(args.size()!=3) throw std::runtime_error("and instruction requires 3 operands");
LinkableObject::Word w=0x60000000;
encodeDstOperand(w,args[0]);
encodeRd1Operand(w,args[1]);
encodeRd2Operand(w,args[2]);
_obj.addWord(w);
}
 
void Assembler::encodeCall(const TokenList &list) {
auto args=getOperands(list);
if(args.size()!=1) throw std::runtime_error("call instruction requires 1 operand");
if(args[0].type!=Operand::Register) throw std::runtime_error("\""+args[0].str+"\": must be a register");
LinkableObject::Word w=0x86FE0000;
encodeRd1Operand(w,args[0]);
_obj.addWord(w);
}
 
void Assembler::encodeCjmpxx(const TokenList &list) {
auto args=getOperands(list);
if(args.size()!=3) throw std::runtime_error("cjmpxx instruction requires 3 operands");
LinkableObject::Word w;
bool reverse=false;
/*
* Note: cjmpul, cjmpule, cjmpsl and cjmpsle don't have distinct opcodes;
* instead, they are aliases for respective "g" or "ge" instructions
* with reversed operand order.
*/
if(list[0]=="cjmpe") w=0xE0000000;
else if(list[0]=="cjmpne") w=0xD0000000;
else if(list[0]=="cjmpug"||list[0]=="cjmpul") w=0xC8000000;
else if(list[0]=="cjmpuge"||list[0]=="cjmpule") w=0xE8000000;
else if(list[0]=="cjmpsg"||list[0]=="cjmpsl") w=0xC4000000;
else if(list[0]=="cjmpsge"||list[0]=="cjmpsle") w=0xE4000000;
else throw std::runtime_error("Unrecognized instruction: \""+list[0]+"\"");
if(list[0]=="cjmpul"||list[0]=="cjmpule"||
list[0]=="cjmpsl"||list[0]=="cjmpsle") reverse=true;
encodeDstOperand(w,args[0]);
if(!reverse) {
encodeRd1Operand(w,args[1]);
encodeRd2Operand(w,args[2]);
}
else {
encodeRd1Operand(w,args[2]);
encodeRd2Operand(w,args[1]);
}
_obj.addWord(w);
}
 
void Assembler::encodeDivs(const TokenList &list) {
auto args=getOperands(list);
if(args.size()!=3) throw std::runtime_error("divs instruction requires 3 operands");
LinkableObject::Word w=0x54000000;
encodeDstOperand(w,args[0]);
encodeRd1Operand(w,args[1]);
encodeRd2Operand(w,args[2]);
_obj.addWord(w);
}
 
void Assembler::encodeDivu(const TokenList &list) {
auto args=getOperands(list);
if(args.size()!=3) throw std::runtime_error("divu instruction requires 3 operands");
LinkableObject::Word w=0x50000000;
encodeDstOperand(w,args[0]);
encodeRd1Operand(w,args[1]);
encodeRd2Operand(w,args[2]);
_obj.addWord(w);
}
 
void Assembler::encodeHlt(const TokenList &list) {
auto args=getOperands(list);
if(!args.empty()) throw std::runtime_error("hlt instruction doesn't take operands");
_obj.addWord(0x08000000);
}
 
void Assembler::encodeJmp(const TokenList &list) {
auto args=getOperands(list);
if(args.size()!=1) throw std::runtime_error("jmp instruction requires 1 operand");
if(args[0].type!=Operand::Register) throw std::runtime_error("\""+args[0].str+"\": must be a register");
LinkableObject::Word w=0x82000000;
encodeRd1Operand(w,args[0]);
_obj.addWord(w);
}
 
void Assembler::encodeIret(const TokenList &list) {
// Note: "iret" is not a real instruction, but an alias for "jmp irp"
auto args=getOperands(list);
if(!args.empty()) throw std::runtime_error("iret instruction doesn't take operands");
_obj.addWord(0x8200FD00);
}
 
void Assembler::encodeLc(const TokenList &list) {
auto args=getOperands(list);
if(args.size()!=2) throw std::runtime_error("lc instruction requires 2 operands");
LinkableObject::Word w=0x04000000;
encodeDstOperand(w,args[0]);
_obj.addWord(w);
if(args[1].type==Operand::Identifier) {
LinkableObject::Reference ref;
ref.source=currentFileName();
ref.line=line();
ref.rva=_obj.addWord(0);
ref.offset=args[1].i;
ref.type=LinkableObject::Regular;
_obj.addReference(args[1].str,ref);
}
else if(args[1].type==Operand::NumericLiteral) {
_obj.addWord(static_cast<LinkableObject::Word>(args[1].i));
}
else throw std::runtime_error("\""+args[1].str+"\": bad argument");
}
 
void Assembler::encodeLcs(const TokenList &list) {
auto args=getOperands(list);
if(args.size()!=2) throw std::runtime_error("lcs instruction requires 2 operands");
LinkableObject::Word w=0xA0000000;
encodeDstOperand(w,args[0]);
if(args[1].type==Operand::NumericLiteral) {
if((args[1].i<-1048576||args[1].i>1048575)&&(args[1].i<0xFFF00000||args[1].i>0xFFFFFFFF))
throw std::runtime_error("\""+args[1].str+"\": out of range");
auto c=static_cast<LinkableObject::Word>(args[1].i)&0x1FFFFF;
w|=(c&0xFFFF);
w|=((c<<8)&0x1F000000);
_obj.addWord(w);
}
else if(args[1].type==Operand::Identifier) {
LinkableObject::Reference ref;
ref.source=currentFileName();
ref.line=line();
ref.rva=_obj.addWord(w);
ref.offset=args[1].i;
ref.type=LinkableObject::Short;
_obj.addReference(args[1].str,ref);
}
else throw std::runtime_error("\""+args[1].str+"\": bad argument");
}
 
void Assembler::encodeLsb(const TokenList &list) {
auto args=getOperands(list);
if(args.size()!=2) throw std::runtime_error("lsb instruction requires 2 operands");
if(args[1].type!=Operand::Register) throw std::runtime_error("\""+args[1].str+"\": must be a register");
LinkableObject::Word w=0x2E000000;
encodeDstOperand(w,args[0]);
encodeRd1Operand(w,args[1]);
_obj.addWord(w);
}
 
void Assembler::encodeLub(const TokenList &list) {
auto args=getOperands(list);
if(args.size()!=2) throw std::runtime_error("lub instruction requires 2 operands");
if(args[1].type!=Operand::Register) throw std::runtime_error("\""+args[1].str+"\": must be a register");
LinkableObject::Word w=0x2A000000;
encodeDstOperand(w,args[0]);
encodeRd1Operand(w,args[1]);
_obj.addWord(w);
}
 
void Assembler::encodeLw(const TokenList &list) {
auto args=getOperands(list);
if(args.size()!=2) throw std::runtime_error("lw instruction requires 2 operands");
if(args[1].type!=Operand::Register) throw std::runtime_error("\""+args[1].str+"\": must be a register");
LinkableObject::Word w=0x22000000;
encodeDstOperand(w,args[0]);
encodeRd1Operand(w,args[1]);
_obj.addWord(w);
}
 
void Assembler::encodeMods(const TokenList &list) {
auto args=getOperands(list);
if(args.size()!=3) throw std::runtime_error("mods instruction requires 3 operands");
LinkableObject::Word w=0x5C000000;
encodeDstOperand(w,args[0]);
encodeRd1Operand(w,args[1]);
encodeRd2Operand(w,args[2]);
_obj.addWord(w);
}
 
void Assembler::encodeModu(const TokenList &list) {
auto args=getOperands(list);
if(args.size()!=3) throw std::runtime_error("modu instruction requires 3 operands");
LinkableObject::Word w=0x58000000;
encodeDstOperand(w,args[0]);
encodeRd1Operand(w,args[1]);
encodeRd2Operand(w,args[2]);
_obj.addWord(w);
}
 
void Assembler::encodeMov(const TokenList &list) {
// Note: "mov" is not a real instruction, but an alias for "add dst, src, 0"
auto args=getOperands(list);
if(args.size()!=2) throw std::runtime_error("mov instruction requires 2 operands");
LinkableObject::Word w=0x40000000;
encodeDstOperand(w,args[0]);
encodeRd1Operand(w,args[1]);
_obj.addWord(w);
}
 
void Assembler::encodeMul(const TokenList &list) {
auto args=getOperands(list);
if(args.size()!=3) throw std::runtime_error("mul instruction requires 3 operands");
LinkableObject::Word w=0x48000000;
encodeDstOperand(w,args[0]);
encodeRd1Operand(w,args[1]);
encodeRd2Operand(w,args[2]);
_obj.addWord(w);
}
 
void Assembler::encodeNeg(const TokenList &list) {
// Note: "neg" is not a real instruction, but an alias for "sub dst, 0, src"
auto args=getOperands(list);
if(args.size()!=2) throw std::runtime_error("neg instruction requires 2 operands");
LinkableObject::Word w=0x44000000;
encodeDstOperand(w,args[0]);
encodeRd2Operand(w,args[1]);
_obj.addWord(w);
}
 
void Assembler::encodeNop(const TokenList &list) {
auto args=getOperands(list);
if(!args.empty()) throw std::runtime_error("nop instruction doesn't take operands");
_obj.addWord(0);
}
 
void Assembler::encodeNot(const TokenList &list) {
// Note: "not" is not a real instruction, but an alias for "xor dst, src, -1"
auto args=getOperands(list);
if(args.size()!=2) throw std::runtime_error("not instruction requires 2 operands");
LinkableObject::Word w=0x680000FF;
encodeDstOperand(w,args[0]);
encodeRd1Operand(w,args[1]);
_obj.addWord(w);
}
 
void Assembler::encodeOr(const TokenList &list) {
auto args=getOperands(list);
if(args.size()!=3) throw std::runtime_error("or instruction requires 3 operands");
LinkableObject::Word w=0x64000000;
encodeDstOperand(w,args[0]);
encodeRd1Operand(w,args[1]);
encodeRd2Operand(w,args[2]);
_obj.addWord(w);
}
 
void Assembler::encodeRet(const TokenList &list) {
// Note: "ret" is not a real instruction, but an alias for "jmp rp"
auto args=getOperands(list);
if(!args.empty()) throw std::runtime_error("ret instruction doesn't take operands");
_obj.addWord(0x8200FE00);
}
 
void Assembler::encodeSb(const TokenList &list) {
auto args=getOperands(list);
if(args.size()!=2) throw std::runtime_error("sb instruction requires 2 operands");
if(args[0].type!=Operand::Register) throw std::runtime_error("\""+args[0].str+"\": must be a register");
if(args[1].type==Operand::NumericLiteral) {
// If numeric literal value is between 128 and 255 (inclusive), convert
// it to a signed byte to avoid exception in encodeRd2Operand()
if(args[1].i>=128&&args[1].i<=255) args[1].i-=256;
}
LinkableObject::Word w=0x3A000000;
encodeRd1Operand(w,args[0]);
encodeRd2Operand(w,args[1]);
_obj.addWord(w);
}
 
void Assembler::encodeSl(const TokenList &list) {
auto args=getOperands(list);
if(args.size()!=3) throw std::runtime_error("sl instruction requires 3 operands");
if(args[2].type==Operand::NumericLiteral&&
(args[2].i<0||args[2].i>=static_cast<Integer>(8*sizeof(LinkableObject::Word))))
{
std::cerr<<currentFileName()<<":"<<line()<<": ";
std::cerr<<"Warning: Bitwise shift result is undefined when "
"the second operand is negative or greater than 31"<<std::endl;
}
LinkableObject::Word w=0x70000000;
encodeDstOperand(w,args[0]);
encodeRd1Operand(w,args[1]);
encodeRd2Operand(w,args[2]);
_obj.addWord(w);
}
 
void Assembler::encodeSrs(const TokenList &list) {
auto args=getOperands(list);
if(args.size()!=3) throw std::runtime_error("srs instruction requires 3 operands");
if(args[2].type==Operand::NumericLiteral&&
(args[2].i<0||args[2].i>=static_cast<Integer>(8*sizeof(LinkableObject::Word))))
{
std::cerr<<currentFileName()<<":"<<line()<<": ";
std::cerr<<"Warning: Bitwise shift result is undefined when "
"the second operand is negative or greater than 31"<<std::endl;
}
LinkableObject::Word w=0x7C000000;
encodeDstOperand(w,args[0]);
encodeRd1Operand(w,args[1]);
encodeRd2Operand(w,args[2]);
_obj.addWord(w);
}
 
void Assembler::encodeSru(const TokenList &list) {
auto args=getOperands(list);
if(args.size()!=3) throw std::runtime_error("sru instruction requires 3 operands");
if(args[2].type==Operand::NumericLiteral&&
(args[2].i<0||args[2].i>=static_cast<Integer>(8*sizeof(LinkableObject::Word))))
{
std::cerr<<currentFileName()<<":"<<line()<<": ";
std::cerr<<"Warning: Bitwise shift result is undefined when "
"the second operand is negative or greater than 31"<<std::endl;
}
LinkableObject::Word w=0x78000000;
encodeDstOperand(w,args[0]);
encodeRd1Operand(w,args[1]);
encodeRd2Operand(w,args[2]);
_obj.addWord(w);
}
 
void Assembler::encodeSub(const TokenList &list) {
auto args=getOperands(list);
if(args.size()!=3) throw std::runtime_error("sub instruction requires 3 operands");
LinkableObject::Word w=0x44000000;
encodeDstOperand(w,args[0]);
encodeRd1Operand(w,args[1]);
encodeRd2Operand(w,args[2]);
_obj.addWord(w);
}
 
void Assembler::encodeSw(const TokenList &list) {
auto args=getOperands(list);
if(args.size()!=2) throw std::runtime_error("sw instruction requires 2 operands");
if(args[0].type!=Operand::Register) throw std::runtime_error("\""+args[0].str+"\": must be a register");
LinkableObject::Word w=0x32000000;
encodeRd1Operand(w,args[0]);
encodeRd2Operand(w,args[1]);
_obj.addWord(w);
}
 
void Assembler::encodeXor(const TokenList &list) {
auto args=getOperands(list);
if(args.size()!=3) throw std::runtime_error("xor instruction requires 3 operands");
LinkableObject::Word w=0x68000000;
encodeDstOperand(w,args[0]);
encodeRd1Operand(w,args[1]);
encodeRd2Operand(w,args[2]);
_obj.addWord(w);
}
/assembler.h
1,106 → 1,108
/*
* Copyright (c) 2016 by Alex I. Kuznetsov.
*
* Part of the LXP32 CPU IP core.
*
* This module defines the Assembler class which performs
* compilation of LXP32 assembly source files.
*/
 
#ifndef ASSEMBLER_H_INCLUDED
#define ASSEMBLER_H_INCLUDED
 
#include "linkableobject.h"
 
#include <vector>
#include <map>
#include <string>
#include <cstdint>
 
class Assembler {
typedef std::vector<std::string> TokenList;
typedef std::int_least64_t Integer;
enum LexerState {
Initial,
Word,
StringLiteral,
BlockComment
};
struct Operand {
enum Type {Null,Register,Identifier,NumericLiteral};
Type type=Null;
std::string str;
Integer i=0;
std::uint8_t reg=0;
};
LinkableObject _obj;
std::map<std::string,TokenList> _macros;
LexerState _state;
int _line;
std::vector<std::string> _currentLabels;
std::string _currentFileName;
std::vector<std::string> _includeSearchDirs;
std::vector<std::string> _exportedSymbols;
public:
void processFile(const std::string &filename);
void addIncludeSearchDir(const std::string &dir);
int line() const;
std::string currentFileName() const;
LinkableObject &object();
const LinkableObject &object() const;
private:
void processFileRecursive(const std::string &filename);
TokenList tokenize(const std::string &str);
void expand(TokenList &list);
void elaborate(TokenList &list);
void elaborateDirective(TokenList &list);
LinkableObject::Word elaborateDataDefinition(TokenList &list);
LinkableObject::Word elaborateInstruction(TokenList &list);
static bool validateIdentifier(const std::string &str);
static Integer numericLiteral(const std::string &str);
static std::vector<Operand> getOperands(const TokenList &list);
// LXP32 instructions
void encodeDstOperand(LinkableObject::Word &word,const Operand &arg);
void encodeRd1Operand(LinkableObject::Word &word,const Operand &arg);
void encodeRd2Operand(LinkableObject::Word &word,const Operand &arg);
void encodeAdd(const TokenList &list);
void encodeAnd(const TokenList &list);
void encodeCall(const TokenList &list);
void encodeCjmpxx(const TokenList &list);
void encodeDivs(const TokenList &list);
void encodeDivu(const TokenList &list);
void encodeHlt(const TokenList &list);
void encodeJmp(const TokenList &list);
void encodeIret(const TokenList &list);
void encodeLc(const TokenList &list);
void encodeLcs(const TokenList &list);
void encodeLsb(const TokenList &list);
void encodeLub(const TokenList &list);
void encodeLw(const TokenList &list);
void encodeMods(const TokenList &list);
void encodeModu(const TokenList &list);
void encodeMov(const TokenList &list);
void encodeMul(const TokenList &list);
void encodeNeg(const TokenList &list);
void encodeNop(const TokenList &list);
void encodeNot(const TokenList &list);
void encodeOr(const TokenList &list);
void encodeRet(const TokenList &list);
void encodeSb(const TokenList &list);
void encodeSl(const TokenList &list);
void encodeSrs(const TokenList &list);
void encodeSru(const TokenList &list);
void encodeSub(const TokenList &list);
void encodeSw(const TokenList &list);
void encodeXor(const TokenList &list);
};
 
#endif
/*
* Copyright (c) 2016 by Alex I. Kuznetsov.
*
* Part of the LXP32 CPU IP core.
*
* This module defines the Assembler class which performs
* compilation of LXP32 assembly source files.
*/
 
#ifndef ASSEMBLER_H_INCLUDED
#define ASSEMBLER_H_INCLUDED
 
#include "linkableobject.h"
 
#include <vector>
#include <map>
#include <string>
#include <cstdint>
 
class Assembler {
typedef std::vector<std::string> TokenList;
typedef std::int_least64_t Integer;
enum LexerState {
Initial,
Word,
StringLiteral,
BlockComment
};
struct Operand {
enum Type {Null,Register,Identifier,NumericLiteral};
Type type=Null;
std::string str;
Integer i=0;
std::uint8_t reg=0;
};
LinkableObject _obj;
std::map<std::string,TokenList> _macros;
LexerState _state;
int _line;
std::vector<std::string> _currentLabels;
std::string _currentFileName;
std::vector<std::string> _includeSearchDirs;
std::vector<std::string> _exportedSymbols;
std::vector<bool> _sectionEnabled;
public:
void processFile(const std::string &filename);
void addIncludeSearchDir(const std::string &dir);
int line() const;
std::string currentFileName() const;
LinkableObject &object();
const LinkableObject &object() const;
private:
void processFileRecursive(const std::string &filename);
TokenList tokenize(const std::string &str);
void expand(TokenList &list);
void elaborate(TokenList &list);
void elaborateDirective(TokenList &list);
LinkableObject::Word elaborateDataDefinition(TokenList &list);
LinkableObject::Word elaborateInstruction(TokenList &list);
bool isSectionEnabled() const;
static bool validateIdentifier(const std::string &str);
static Integer numericLiteral(const std::string &str);
static std::vector<Operand> getOperands(const TokenList &list);
// LXP32 instructions
void encodeDstOperand(LinkableObject::Word &word,const Operand &arg);
void encodeRd1Operand(LinkableObject::Word &word,const Operand &arg);
void encodeRd2Operand(LinkableObject::Word &word,const Operand &arg);
void encodeAdd(const TokenList &list);
void encodeAnd(const TokenList &list);
void encodeCall(const TokenList &list);
void encodeCjmpxx(const TokenList &list);
void encodeDivs(const TokenList &list);
void encodeDivu(const TokenList &list);
void encodeHlt(const TokenList &list);
void encodeJmp(const TokenList &list);
void encodeIret(const TokenList &list);
void encodeLc(const TokenList &list);
void encodeLcs(const TokenList &list);
void encodeLsb(const TokenList &list);
void encodeLub(const TokenList &list);
void encodeLw(const TokenList &list);
void encodeMods(const TokenList &list);
void encodeModu(const TokenList &list);
void encodeMov(const TokenList &list);
void encodeMul(const TokenList &list);
void encodeNeg(const TokenList &list);
void encodeNop(const TokenList &list);
void encodeNot(const TokenList &list);
void encodeOr(const TokenList &list);
void encodeRet(const TokenList &list);
void encodeSb(const TokenList &list);
void encodeSl(const TokenList &list);
void encodeSrs(const TokenList &list);
void encodeSru(const TokenList &list);
void encodeSub(const TokenList &list);
void encodeSw(const TokenList &list);
void encodeXor(const TokenList &list);
};
 
#endif
/linkableobject.cpp
1,290 → 1,290
/*
* Copyright (c) 2016 by Alex I. Kuznetsov.
*
* Part of the LXP32 CPU IP core.
*
* This module implements members of the LinkableObject class.
*/
 
#include "linkableobject.h"
#include "utils.h"
 
#include <iostream>
#include <fstream>
#include <stdexcept>
#include <utility>
#include <cassert>
#include <cstdlib>
 
std::string LinkableObject::name() const {
return _name;
}
 
void LinkableObject::setName(const std::string &str) {
_name=str;
}
 
LinkableObject::Word LinkableObject::virtualAddress() const {
return _virtualAddress;
}
 
void LinkableObject::setVirtualAddress(Word addr) {
_virtualAddress=addr;
}
 
LinkableObject::Byte *LinkableObject::code() {
return _code.data();
}
 
const LinkableObject::Byte *LinkableObject::code() const {
return _code.data();
}
 
std::size_t LinkableObject::codeSize() const {
return _code.size();
}
 
LinkableObject::Word LinkableObject::addWord(Word w) {
auto rva=addPadding(sizeof(Word));
// Note: this code doesn't depend on host machine's endianness
_code.push_back(static_cast<Byte>(w));
_code.push_back(static_cast<Byte>(w>>8));
_code.push_back(static_cast<Byte>(w>>16));
_code.push_back(static_cast<Byte>(w>>24));
return rva;
}
 
LinkableObject::Word LinkableObject::addByte(Byte b) {
auto rva=static_cast<LinkableObject::Word>(_code.size());
_code.push_back(b);
return rva;
}
 
LinkableObject::Word LinkableObject::addBytes(const Byte *p,std::size_t n) {
auto rva=static_cast<LinkableObject::Word>(_code.size());
_code.insert(_code.end(),p,p+n);
return rva;
}
 
LinkableObject::Word LinkableObject::addZeros(std::size_t n) {
auto rva=static_cast<LinkableObject::Word>(_code.size());
_code.resize(_code.size()+n);
return rva;
}
 
LinkableObject::Word LinkableObject::addPadding(std::size_t size) {
auto padding=(size-_code.size()%size)%size;
if(padding>0) _code.resize(_code.size()+padding);
return static_cast<LinkableObject::Word>(_code.size());
}
 
LinkableObject::Word LinkableObject::getWord(Word rva) const {
Word w=0;
if(rva<codeSize()) w|=static_cast<Word>(_code[rva++]);
if(rva<codeSize()) w|=static_cast<Word>(_code[rva++])<<8;
if(rva<codeSize()) w|=static_cast<Word>(_code[rva++])<<16;
if(rva<codeSize()) w|=static_cast<Word>(_code[rva++])<<24;
return w;
}
 
void LinkableObject::replaceWord(Word rva,Word value) {
assert(rva+sizeof(Word)<=codeSize());
// Note: this code doesn't depend on host machine's endianness
_code[rva++]=static_cast<Byte>(value);
_code[rva++]=static_cast<Byte>(value>>8);
_code[rva++]=static_cast<Byte>(value>>16);
_code[rva++]=static_cast<Byte>(value>>24);
}
 
void LinkableObject::addSymbol(const std::string &name,Word rva) {
auto &data=symbol(name);
if(data.type!=Unknown) throw std::runtime_error("Symbol \""+name+"\" is already defined");
data.type=Local;
data.rva=rva;
}
 
void LinkableObject::addImportedSymbol(const std::string &name) {
auto &data=symbol(name);
if(data.type!=Unknown) throw std::runtime_error("Symbol \""+name+"\" is already defined");
data.type=Imported;
}
 
void LinkableObject::exportSymbol(const std::string &name) {
auto it=_symbols.find(name);
if(it==_symbols.end()||it->second.type==Unknown) throw std::runtime_error("Undefined symbol \""+name+"\"");
if(it->second.type==Imported) throw std::runtime_error("Symbol \""+name+"\" can't be both imported and exported at the same time");
if(it->second.type==Exported) throw std::runtime_error("Symbol \""+name+"\" has been already exported");
it->second.type=Exported;
}
 
void LinkableObject::addReference(const std::string &symbolName,const Reference &ref) {
auto &data=symbol(symbolName);
data.refs.push_back(ref);
}
 
LinkableObject::SymbolData &LinkableObject::symbol(const std::string &name) {
return _symbols[name];
}
 
const LinkableObject::SymbolData &LinkableObject::symbol(const std::string &name) const {
auto const it=_symbols.find(name);
if(it==_symbols.end()) throw std::runtime_error("Undefined symbol \""+name+"\"");
return it->second;
}
 
const LinkableObject::SymbolTable &LinkableObject::symbols() const {
return _symbols;
}
 
void LinkableObject::serialize(const std::string &filename) const {
std::ofstream out(filename,std::ios_base::out);
if(!out) throw std::runtime_error("Cannot open \""+filename+"\" for writing");
out<<"LinkableObject"<<std::endl;
if(!_name.empty()) out<<"Name "<<Utils::urlEncode(_name)<<std::endl;
out<<"VirtualAddress 0x"<<Utils::hex(_virtualAddress)<<std::endl;
out<<std::endl;
out<<"Start Code"<<std::endl;
for(Word rva=0;rva<_code.size();rva+=sizeof(Word)) {
out<<"\t0x"<<Utils::hex(getWord(rva))<<std::endl;
}
out<<"End Code"<<std::endl;
for(auto const &sym: _symbols) {
if(sym.second.type==Unknown)
throw std::runtime_error("Undefined symbol: \""+sym.first+"\"");
out<<std::endl;
out<<"Start Symbol"<<std::endl;
out<<"\tName "<<Utils::urlEncode(sym.first)<<std::endl;
if(sym.second.type==Local) out<<"\tType Local"<<std::endl;
else if(sym.second.type==Exported) out<<"\tType Exported"<<std::endl;
else out<<"\tType Imported"<<std::endl;
if(sym.second.type!=Imported) out<<"\tRVA 0x"<<Utils::hex(sym.second.rva)<<std::endl;
for(auto const &ref: sym.second.refs) {
out<<"\tRef ";
out<<Utils::urlEncode(ref.source)<<" ";
out<<ref.line<<" ";
out<<"0x"<<Utils::hex(ref.rva)<<" ";
out<<ref.offset<<" ";
if(ref.type==Regular) out<<"Regular"<<std::endl;
else if(ref.type==Short) out<<"Short"<<std::endl;
}
out<<"End Symbol"<<std::endl;
}
}
 
void LinkableObject::deserialize(const std::string &filename) {
std::ifstream in(filename,std::ios_base::in);
if(!in) throw std::runtime_error("Cannot open \""+filename+"\"");
operator=(LinkableObject());
std::string line;
for(;;) {
if(!std::getline(in,line)) throw std::runtime_error("Bad object format");
auto tokens=tokenize(line);
if(tokens.empty()) continue;
else if(tokens[0]!="LinkableObject") throw std::runtime_error("Bad object format");
break;
}
while(std::getline(in,line)) {
auto tokens=tokenize(line);
if(tokens.empty()) continue;
if(tokens.size()<2) throw std::runtime_error("Unexpected end of line");
else if(tokens[0]=="Name") _name=Utils::urlDecode(tokens[1]);
else if(tokens[0]=="VirtualAddress") _virtualAddress=std::strtoul(tokens[1].c_str(),NULL,0);
else if(tokens[0]=="Start") {
if(tokens[1]=="Code") deserializeCode(in);
else if(tokens[1]=="Symbol") deserializeSymbol(in);
else throw std::runtime_error("Unexpected token: \""+tokens[1]+"\"");
}
else throw std::runtime_error("Unexpected token: \""+tokens[0]+"\"");
}
}
 
/*
* Private members
*/
 
void LinkableObject::deserializeCode(std::istream &in) {
std::string line;
while(std::getline(in,line)) {
auto tokens=tokenize(line);
if(tokens.empty()) continue;
if(tokens[0]=="End") {
if(tokens.size()<2) throw std::runtime_error("Unexpected end of line");
if(tokens[1]=="Code") return;
throw std::runtime_error("Unexpected token: \""+tokens[1]+"\"");
}
auto w=static_cast<Word>(std::strtoul(tokens[0].c_str(),NULL,0));
addWord(w);
}
throw std::runtime_error("Unexpected end of file");
}
 
void LinkableObject::deserializeSymbol(std::istream &in) {
std::string line;
std::string name;
SymbolData data;
while(std::getline(in,line)) {
auto tokens=tokenize(line);
if(tokens.empty()) continue;
if(tokens[0]=="End") {
if(tokens.size()<2) throw std::runtime_error("Unexpected end of line");
if(tokens[1]=="Symbol") {
if(name.empty()) throw std::runtime_error("Symbol name is not defined");
if(data.type==Unknown) throw std::runtime_error("Bad symbol type");
_symbols.emplace(std::move(name),std::move(data));
return;
}
throw std::runtime_error("Unexpected token: \""+tokens[1]+"\"");
}
else if(tokens[0]=="Name") {
if(tokens.size()<2) throw std::runtime_error("Unexpected end of line");
name=Utils::urlDecode(tokens[1]);
}
else if(tokens[0]=="Type") {
if(tokens.size()<2) throw std::runtime_error("Unexpected end of line");
if(tokens[1]=="Local") data.type=Local;
else if(tokens[1]=="Exported") data.type=Exported;
else if(tokens[1]=="Imported") data.type=Imported;
else throw std::runtime_error("Bad symbol type");
}
else if(tokens[0]=="RVA") {
if(tokens.size()<2) throw std::runtime_error("Unexpected end of line");
data.rva=std::strtoul(tokens[1].c_str(),NULL,0);
}
else if(tokens[0]=="Ref") {
Reference ref;
if(tokens.size()<4) throw std::runtime_error("Unexpected end of line");
ref.source=Utils::urlDecode(tokens[1]);
ref.line=std::strtoul(tokens[2].c_str(),NULL,0);
ref.rva=std::strtoul(tokens[3].c_str(),NULL,0);
ref.offset=std::strtoll(tokens[4].c_str(),NULL,0);
if(tokens[5]=="Regular") ref.type=Regular;
else if(tokens[5]=="Short") ref.type=Short;
else throw std::runtime_error("Invalid reference type: \""+tokens[5]+"\"");
data.refs.push_back(std::move(ref));
}
}
throw std::runtime_error("Unexpected end of file");
}
 
std::vector<std::string> LinkableObject::tokenize(const std::string &str) {
std::vector<std::string> tokens;
for(std::size_t pos=0;;) {
auto start=str.find_first_not_of(" \t\r\n",pos);
if(start==std::string::npos) return tokens;
auto end=str.find_first_of(" \t\r\n",start);
if(end==std::string::npos) {
tokens.push_back(str.substr(start));
return tokens;
}
else tokens.push_back(str.substr(start,end-start));
pos=end;
}
}
/*
* Copyright (c) 2016 by Alex I. Kuznetsov.
*
* Part of the LXP32 CPU IP core.
*
* This module implements members of the LinkableObject class.
*/
 
#include "linkableobject.h"
#include "utils.h"
 
#include <iostream>
#include <fstream>
#include <stdexcept>
#include <utility>
#include <cassert>
#include <cstdlib>
 
std::string LinkableObject::name() const {
return _name;
}
 
void LinkableObject::setName(const std::string &str) {
_name=str;
}
 
LinkableObject::Word LinkableObject::virtualAddress() const {
return _virtualAddress;
}
 
void LinkableObject::setVirtualAddress(Word addr) {
_virtualAddress=addr;
}
 
LinkableObject::Byte *LinkableObject::code() {
return _code.data();
}
 
const LinkableObject::Byte *LinkableObject::code() const {
return _code.data();
}
 
std::size_t LinkableObject::codeSize() const {
return _code.size();
}
 
LinkableObject::Word LinkableObject::addWord(Word w) {
auto rva=addPadding(sizeof(Word));
// Note: this code doesn't depend on host machine's endianness
_code.push_back(static_cast<Byte>(w));
_code.push_back(static_cast<Byte>(w>>8));
_code.push_back(static_cast<Byte>(w>>16));
_code.push_back(static_cast<Byte>(w>>24));
return rva;
}
 
LinkableObject::Word LinkableObject::addByte(Byte b) {
auto rva=static_cast<LinkableObject::Word>(_code.size());
_code.push_back(b);
return rva;
}
 
LinkableObject::Word LinkableObject::addBytes(const Byte *p,std::size_t n) {
auto rva=static_cast<LinkableObject::Word>(_code.size());
_code.insert(_code.end(),p,p+n);
return rva;
}
 
LinkableObject::Word LinkableObject::addZeros(std::size_t n) {
auto rva=static_cast<LinkableObject::Word>(_code.size());
_code.resize(_code.size()+n);
return rva;
}
 
LinkableObject::Word LinkableObject::addPadding(std::size_t size) {
auto padding=(size-_code.size()%size)%size;
if(padding>0) _code.resize(_code.size()+padding);
return static_cast<LinkableObject::Word>(_code.size());
}
 
LinkableObject::Word LinkableObject::getWord(Word rva) const {
Word w=0;
if(rva<codeSize()) w|=static_cast<Word>(_code[rva++]);
if(rva<codeSize()) w|=static_cast<Word>(_code[rva++])<<8;
if(rva<codeSize()) w|=static_cast<Word>(_code[rva++])<<16;
if(rva<codeSize()) w|=static_cast<Word>(_code[rva++])<<24;
return w;
}
 
void LinkableObject::replaceWord(Word rva,Word value) {
assert(rva+sizeof(Word)<=codeSize());
// Note: this code doesn't depend on host machine's endianness
_code[rva++]=static_cast<Byte>(value);
_code[rva++]=static_cast<Byte>(value>>8);
_code[rva++]=static_cast<Byte>(value>>16);
_code[rva++]=static_cast<Byte>(value>>24);
}
 
void LinkableObject::addSymbol(const std::string &name,Word rva) {
auto &data=symbol(name);
if(data.type!=Unknown) throw std::runtime_error("Symbol \""+name+"\" is already defined");
data.type=Local;
data.rva=rva;
}
 
void LinkableObject::addImportedSymbol(const std::string &name) {
auto &data=symbol(name);
if(data.type!=Unknown) throw std::runtime_error("Symbol \""+name+"\" is already defined");
data.type=Imported;
}
 
void LinkableObject::exportSymbol(const std::string &name) {
auto it=_symbols.find(name);
if(it==_symbols.end()||it->second.type==Unknown) throw std::runtime_error("Undefined symbol \""+name+"\"");
if(it->second.type==Imported) throw std::runtime_error("Symbol \""+name+"\" can't be both imported and exported at the same time");
if(it->second.type==Exported) throw std::runtime_error("Symbol \""+name+"\" has been already exported");
it->second.type=Exported;
}
 
void LinkableObject::addReference(const std::string &symbolName,const Reference &ref) {
auto &data=symbol(symbolName);
data.refs.push_back(ref);
}
 
LinkableObject::SymbolData &LinkableObject::symbol(const std::string &name) {
return _symbols[name];
}
 
const LinkableObject::SymbolData &LinkableObject::symbol(const std::string &name) const {
auto const it=_symbols.find(name);
if(it==_symbols.end()) throw std::runtime_error("Undefined symbol \""+name+"\"");
return it->second;
}
 
const LinkableObject::SymbolTable &LinkableObject::symbols() const {
return _symbols;
}
 
void LinkableObject::serialize(const std::string &filename) const {
std::ofstream out(filename,std::ios_base::out);
if(!out) throw std::runtime_error("Cannot open \""+filename+"\" for writing");
out<<"LinkableObject"<<std::endl;
if(!_name.empty()) out<<"Name "<<Utils::urlEncode(_name)<<std::endl;
out<<"VirtualAddress 0x"<<Utils::hex(_virtualAddress)<<std::endl;
out<<std::endl;
out<<"Start Code"<<std::endl;
for(Word rva=0;rva<_code.size();rva+=sizeof(Word)) {
out<<"\t0x"<<Utils::hex(getWord(rva))<<std::endl;
}
out<<"End Code"<<std::endl;
for(auto const &sym: _symbols) {
if(sym.second.type==Unknown)
throw std::runtime_error("Undefined symbol: \""+sym.first+"\"");
out<<std::endl;
out<<"Start Symbol"<<std::endl;
out<<"\tName "<<Utils::urlEncode(sym.first)<<std::endl;
if(sym.second.type==Local) out<<"\tType Local"<<std::endl;
else if(sym.second.type==Exported) out<<"\tType Exported"<<std::endl;
else out<<"\tType Imported"<<std::endl;
if(sym.second.type!=Imported) out<<"\tRVA 0x"<<Utils::hex(sym.second.rva)<<std::endl;
for(auto const &ref: sym.second.refs) {
out<<"\tRef ";
out<<Utils::urlEncode(ref.source)<<" ";
out<<ref.line<<" ";
out<<"0x"<<Utils::hex(ref.rva)<<" ";
out<<ref.offset<<" ";
if(ref.type==Regular) out<<"Regular"<<std::endl;
else if(ref.type==Short) out<<"Short"<<std::endl;
}
out<<"End Symbol"<<std::endl;
}
}
 
void LinkableObject::deserialize(const std::string &filename) {
std::ifstream in(filename,std::ios_base::in);
if(!in) throw std::runtime_error("Cannot open \""+filename+"\"");
operator=(LinkableObject());
std::string line;
for(;;) {
if(!std::getline(in,line)) throw std::runtime_error("Bad object format");
auto tokens=tokenize(line);
if(tokens.empty()) continue;
else if(tokens[0]!="LinkableObject") throw std::runtime_error("Bad object format");
break;
}
while(std::getline(in,line)) {
auto tokens=tokenize(line);
if(tokens.empty()) continue;
if(tokens.size()<2) throw std::runtime_error("Unexpected end of line");
else if(tokens[0]=="Name") _name=Utils::urlDecode(tokens[1]);
else if(tokens[0]=="VirtualAddress") _virtualAddress=std::strtoul(tokens[1].c_str(),NULL,0);
else if(tokens[0]=="Start") {
if(tokens[1]=="Code") deserializeCode(in);
else if(tokens[1]=="Symbol") deserializeSymbol(in);
else throw std::runtime_error("Unexpected token: \""+tokens[1]+"\"");
}
else throw std::runtime_error("Unexpected token: \""+tokens[0]+"\"");
}
}
 
/*
* Private members
*/
 
void LinkableObject::deserializeCode(std::istream &in) {
std::string line;
while(std::getline(in,line)) {
auto tokens=tokenize(line);
if(tokens.empty()) continue;
if(tokens[0]=="End") {
if(tokens.size()<2) throw std::runtime_error("Unexpected end of line");
if(tokens[1]=="Code") return;
throw std::runtime_error("Unexpected token: \""+tokens[1]+"\"");
}
auto w=static_cast<Word>(std::strtoul(tokens[0].c_str(),NULL,0));
addWord(w);
}
throw std::runtime_error("Unexpected end of file");
}
 
void LinkableObject::deserializeSymbol(std::istream &in) {
std::string line;
std::string name;
SymbolData data;
while(std::getline(in,line)) {
auto tokens=tokenize(line);
if(tokens.empty()) continue;
if(tokens[0]=="End") {
if(tokens.size()<2) throw std::runtime_error("Unexpected end of line");
if(tokens[1]=="Symbol") {
if(name.empty()) throw std::runtime_error("Symbol name is not defined");
if(data.type==Unknown) throw std::runtime_error("Bad symbol type");
_symbols.emplace(std::move(name),std::move(data));
return;
}
throw std::runtime_error("Unexpected token: \""+tokens[1]+"\"");
}
else if(tokens[0]=="Name") {
if(tokens.size()<2) throw std::runtime_error("Unexpected end of line");
name=Utils::urlDecode(tokens[1]);
}
else if(tokens[0]=="Type") {
if(tokens.size()<2) throw std::runtime_error("Unexpected end of line");
if(tokens[1]=="Local") data.type=Local;
else if(tokens[1]=="Exported") data.type=Exported;
else if(tokens[1]=="Imported") data.type=Imported;
else throw std::runtime_error("Bad symbol type");
}
else if(tokens[0]=="RVA") {
if(tokens.size()<2) throw std::runtime_error("Unexpected end of line");
data.rva=std::strtoul(tokens[1].c_str(),NULL,0);
}
else if(tokens[0]=="Ref") {
Reference ref;
if(tokens.size()<4) throw std::runtime_error("Unexpected end of line");
ref.source=Utils::urlDecode(tokens[1]);
ref.line=std::strtoul(tokens[2].c_str(),NULL,0);
ref.rva=std::strtoul(tokens[3].c_str(),NULL,0);
ref.offset=std::strtoll(tokens[4].c_str(),NULL,0);
if(tokens[5]=="Regular") ref.type=Regular;
else if(tokens[5]=="Short") ref.type=Short;
else throw std::runtime_error("Invalid reference type: \""+tokens[5]+"\"");
data.refs.push_back(std::move(ref));
}
}
throw std::runtime_error("Unexpected end of file");
}
 
std::vector<std::string> LinkableObject::tokenize(const std::string &str) {
std::vector<std::string> tokens;
for(std::size_t pos=0;;) {
auto start=str.find_first_not_of(" \t\r\n",pos);
if(start==std::string::npos) return tokens;
auto end=str.find_first_of(" \t\r\n",start);
if(end==std::string::npos) {
tokens.push_back(str.substr(start));
return tokens;
}
else tokens.push_back(str.substr(start,end-start));
pos=end;
}
}
/linkableobject.h
1,88 → 1,88
/*
* Copyright (c) 2016 by Alex I. Kuznetsov.
*
* Part of the LXP32 CPU IP core.
*
* This module defines the LinkableObject class which represents
* compiled LXP32 binary code.
*/
 
#ifndef LINKABLEOBJECT_H_INCLUDED
#define LINKABLEOBJECT_H_INCLUDED
 
#include <iostream>
#include <vector>
#include <map>
#include <string>
#include <cstdint>
 
class LinkableObject {
public:
typedef unsigned char Byte;
typedef std::uint32_t Word;
typedef std::int_least64_t Integer;
enum SymbolType {Unknown,Local,Exported,Imported};
enum RefType {Regular,Short};
struct Reference {
std::string source;
int line;
Word rva;
Integer offset;
RefType type;
};
struct SymbolData {
SymbolType type=Unknown;
Word rva;
std::vector<Reference> refs;
};
typedef std::map<std::string,SymbolData> SymbolTable;
private:
std::string _name;
std::vector<Byte> _code;
SymbolTable _symbols;
Word _virtualAddress=0;
public:
std::string name() const;
void setName(const std::string &str);
Word virtualAddress() const;
void setVirtualAddress(Word addr);
Byte *code();
const Byte *code() const;
std::size_t codeSize() const;
Word addWord(Word w);
Word addByte(Byte b);
Word addBytes(const Byte *p,std::size_t n);
Word addZeros(std::size_t n);
Word addPadding(std::size_t size=sizeof(LinkableObject::Word));
Word getWord(Word rva) const;
void replaceWord(Word rva,Word value);
void addSymbol(const std::string &name,Word rva);
void addImportedSymbol(const std::string &name);
void exportSymbol(const std::string &name);
void addReference(const std::string &symbolName,const Reference &ref);
SymbolData &symbol(const std::string &name);
const SymbolData &symbol(const std::string &name) const;
const SymbolTable &symbols() const;
void serialize(const std::string &filename) const;
void deserialize(const std::string &filename);
 
private:
void deserializeCode(std::istream &in);
void deserializeSymbol(std::istream &in);
static std::vector<std::string> tokenize(const std::string &str);
};
 
#endif
/*
* Copyright (c) 2016 by Alex I. Kuznetsov.
*
* Part of the LXP32 CPU IP core.
*
* This module defines the LinkableObject class which represents
* compiled LXP32 binary code.
*/
 
#ifndef LINKABLEOBJECT_H_INCLUDED
#define LINKABLEOBJECT_H_INCLUDED
 
#include <iostream>
#include <vector>
#include <map>
#include <string>
#include <cstdint>
 
class LinkableObject {
public:
typedef unsigned char Byte;
typedef std::uint32_t Word;
typedef std::int_least64_t Integer;
enum SymbolType {Unknown,Local,Exported,Imported};
enum RefType {Regular,Short};
struct Reference {
std::string source;
int line;
Word rva;
Integer offset;
RefType type;
};
struct SymbolData {
SymbolType type=Unknown;
Word rva;
std::vector<Reference> refs;
};
typedef std::map<std::string,SymbolData> SymbolTable;
private:
std::string _name;
std::vector<Byte> _code;
SymbolTable _symbols;
Word _virtualAddress=0;
public:
std::string name() const;
void setName(const std::string &str);
Word virtualAddress() const;
void setVirtualAddress(Word addr);
Byte *code();
const Byte *code() const;
std::size_t codeSize() const;
Word addWord(Word w);
Word addByte(Byte b);
Word addBytes(const Byte *p,std::size_t n);
Word addZeros(std::size_t n);
Word addPadding(std::size_t size=sizeof(LinkableObject::Word));
Word getWord(Word rva) const;
void replaceWord(Word rva,Word value);
void addSymbol(const std::string &name,Word rva);
void addImportedSymbol(const std::string &name);
void exportSymbol(const std::string &name);
void addReference(const std::string &symbolName,const Reference &ref);
SymbolData &symbol(const std::string &name);
const SymbolData &symbol(const std::string &name) const;
const SymbolTable &symbols() const;
void serialize(const std::string &filename) const;
void deserialize(const std::string &filename);
 
private:
void deserializeCode(std::istream &in);
void deserializeSymbol(std::istream &in);
static std::vector<std::string> tokenize(const std::string &str);
};
 
#endif
/linker.cpp
1,272 → 1,272
/*
* Copyright (c) 2016 by Alex I. Kuznetsov.
*
* Part of the LXP32 CPU IP core.
*
* This module implements members of the Linker class.
*/
 
#include "linker.h"
 
#include "linkableobject.h"
#include "utils.h"
 
#include <iostream>
#include <fstream>
#include <sstream>
#include <map>
#include <stdexcept>
#include <cassert>
#include <algorithm>
 
void Linker::addObject(LinkableObject &obj) {
_objects.push_back(&obj);
}
 
void Linker::link(OutputWriter &writer) {
if(_objects.empty()) throw std::runtime_error("Object set is empty");
// Merge symbol tables
buildSymbolTable();
// Determine entry point
if(_objects.size()==1) _entryObject=_objects[0];
else if(_entryObject==nullptr)
throw std::runtime_error("Entry point not defined: cannot find \"entry\" or \"Entry\" symbol");
// Assign virtual addresses
placeObjects();
// Perform relocations
for(auto &obj: _objects) relocateObject(obj);
// Write binary data
writeObjects(writer);
_bytesWritten=writer.size();
}
 
void Linker::setBase(LinkableObject::Word base) {
_base=base;
}
 
void Linker::setAlignment(std::size_t align) {
_align=align;
}
 
void Linker::setImageSize(std::size_t size) {
_imageSize=size;
}
 
void Linker::generateMap(std::ostream &s) {
// Calculate the maximum length of a symbol name
std::size_t len=0;
for(auto const &obj: _objects) {
for(auto const &sym: obj->symbols()) {
if(sym.second.type!=LinkableObject::Imported)
len=std::max(len,sym.first.size());
}
}
len=std::max(len+3,std::size_t(8)); // width of the first column
s<<"Image base address: "<<Utils::hex(_base)<<std::endl;
s<<"Object alignment: "<<_align<<std::endl;
s<<"Image size: "<<(_bytesWritten/4)<<" words"<<std::endl;
s<<"Number of objects: "<<_objects.size()<<std::endl;
s<<std::endl;
for(auto const &obj: _objects) {
s<<"Object \""<<obj->name()<<"\" at address "<<Utils::hex(obj->virtualAddress())<<std::endl;
s<<std::endl;
std::multimap<LinkableObject::Word,std::pair<std::string,LinkableObject::SymbolData> > sorted;
for(auto const &sym: obj->symbols()) sorted.emplace(sym.second.rva,sym);
for(auto const &sym: sorted) {
if(sym.second.second.type==LinkableObject::Imported) continue;
s<<sym.second.first;
s<<std::string(len-sym.second.first.size(),' ');
s<<Utils::hex(obj->virtualAddress()+sym.second.second.rva);
if(sym.second.second.type==LinkableObject::Local) s<<" Local";
else s<<" Exported";
s<<std::endl;
}
s<<std::endl;
}
}
 
/*
* Private members
*/
 
void Linker::buildSymbolTable() {
_globalSymbolTable.clear();
// Build a table of exported symbols from all modules
for(auto const &obj: _objects) {
auto const &table=obj->symbols();
for(auto const &item: table) {
if((item.first=="entry"||item.first=="Entry")&&item.second.type!=LinkableObject::Imported) {
if(_entryObject) {
std::ostringstream msg;
msg<<obj->name()<<": Duplicate definition of the entry symbol ";
msg<<"(previously defined in "<<_entryObject->name()<<")";
throw std::runtime_error(msg.str());
}
if(item.second.rva!=0) {
std::ostringstream msg;
msg<<obj->name()<<": ";
msg<<"Entry point must refer to the start of the object";
throw std::runtime_error(msg.str());
}
_entryObject=obj;
}
if(item.second.type==LinkableObject::Local) continue;
// Insert item to the global symbol table if it doesn't exist yet
auto it=_globalSymbolTable.emplace(item.first,GlobalSymbolData()).first;
 
// Check that the symbol has not been already defined in another object
if(item.second.type==LinkableObject::Exported) {
if(it->second.obj) {
std::ostringstream msg;
msg<<obj->name()<<": Duplicate definition of \""<<item.first;
msg<<"\" (previously defined in "<<it->second.obj->name()<<")";
throw std::runtime_error(msg.str());
}
it->second.obj=obj;
it->second.rva=item.second.rva;
}
if(!item.second.refs.empty()) it->second.refs.insert(obj);
}
}
// Check that local symbols don't shadow the public ones
for(auto const &obj: _objects) {
auto const &table=obj->symbols();
for(auto const &item: table) {
if(item.second.type!=LinkableObject::Local) continue;
auto it=_globalSymbolTable.find(item.first);
if(it==_globalSymbolTable.end()) continue;
if(!it->second.obj) continue;
if(item.first==it->first) {
std::ostringstream msg;
msg<<obj->name()<<": Local symbol \""<<item.first<<"\" shadows the public one ";
msg<<"(defined in "<<it->second.obj->name()<<")";
throw std::runtime_error(msg.str());
}
}
}
// Check that no undefined symbols remain
for(auto const &item: _globalSymbolTable) {
if(item.second.obj==nullptr&&!item.second.refs.empty()) {
std::ostringstream msg;
msg<<"Undefined symbol: \""<<item.first<<"\"";
auto const it=item.second.refs.begin();
msg<<" (referenced from "<<(*it)->name()<<")";
throw std::runtime_error(msg.str());
}
}
}
 
void Linker::placeObjects() {
auto currentBase=_base;
// Make entry object the first
if(_objects.size()>1) {
for(auto it=_objects.begin();it!=_objects.end();++it) {
if(*it==_entryObject) {
_objects.erase(it);
break;
}
}
_objects.insert(_objects.begin(),_entryObject);
}
// Remove unreferenced objects
if(_objects.size()>1) {
std::set<const LinkableObject*> used;
markAsUsed(_objects[0],used);
for(auto it=_objects.begin();it!=_objects.end();) {
if(used.find(*it)==used.end()) {
std::cerr<<"Linker warning: skipping an unreferenced object \"";
std::cerr<<(*it)->name()<<"\""<<std::endl;
for(auto sym=_globalSymbolTable.begin();sym!=_globalSymbolTable.end();) {
if(sym->second.obj==*it) sym=_globalSymbolTable.erase(sym);
else ++sym;
}
it=_objects.erase(it);
}
else ++it;
}
}
// Set base addresses
for(auto it=_objects.begin();it!=_objects.end();++it) {
(*it)->setVirtualAddress(currentBase);
if(it+1!=_objects.end()) (*it)->addPadding(_align);
else (*it)->addPadding();
currentBase+=static_cast<LinkableObject::Word>((*it)->codeSize());
}
}
 
void Linker::relocateObject(LinkableObject *obj) {
for(auto const &sym: obj->symbols()) {
LinkableObject::Word addr;
if(sym.second.refs.empty()) continue;
if(sym.second.type==LinkableObject::Local) addr=obj->virtualAddress()+sym.second.rva;
else {
auto it=_globalSymbolTable.find(sym.first);
assert(it!=_globalSymbolTable.end());
assert(it->second.obj);
addr=it->second.obj->virtualAddress()+it->second.rva;
}
for(auto const &ref: sym.second.refs) {
if(ref.type==LinkableObject::Regular) obj->replaceWord(ref.rva,addr+ref.offset);
else {
auto target=static_cast<LinkableObject::Word>(addr+ref.offset);
if(target>0xFFFFF&&target<0xFFF00000) {
std::ostringstream msg;
msg<<"Address 0x"<<Utils::hex(target)<<" is out of the range for a short reference";
msg<<" (referenced from "<<ref.source<<":"<<ref.line<<")";
throw std::runtime_error(msg.str());
}
target&=0x1FFFFF;
auto w=obj->getWord(ref.rva);
w|=(target&0xFFFF);
w|=((target<<8)&0x1F000000);
obj->replaceWord(ref.rva,w);
}
}
}
}
 
void Linker::writeObjects(OutputWriter &writer) {
std::size_t currentSize=0;
// Write entry object
writer.write(reinterpret_cast<const char*>(_entryObject->code()),_entryObject->codeSize());
currentSize+=_entryObject->codeSize();
// Write other objects
for(auto const &obj: _objects) {
if(obj==_entryObject) continue;
writer.write(reinterpret_cast<const char*>(obj->code()),obj->codeSize());
currentSize+=obj->codeSize();
}
// Pad file if requested
if(_imageSize>0) {
if(currentSize>_imageSize)
throw std::runtime_error("Image size exceeds the specified value");
else if(currentSize<_imageSize) writer.pad(_imageSize-currentSize);
}
}
 
void Linker::markAsUsed(const LinkableObject *obj,std::set<const LinkableObject*> &used) {
if(used.find(obj)!=used.end()) return; // already processed
used.insert(obj);
for(auto const &sym: _globalSymbolTable) {
for(auto const &ref: sym.second.refs) {
if(ref==obj) markAsUsed(sym.second.obj,used);
}
}
}
/*
* Copyright (c) 2016 by Alex I. Kuznetsov.
*
* Part of the LXP32 CPU IP core.
*
* This module implements members of the Linker class.
*/
 
#include "linker.h"
 
#include "linkableobject.h"
#include "utils.h"
 
#include <iostream>
#include <fstream>
#include <sstream>
#include <map>
#include <stdexcept>
#include <cassert>
#include <algorithm>
 
void Linker::addObject(LinkableObject &obj) {
_objects.push_back(&obj);
}
 
void Linker::link(OutputWriter &writer) {
if(_objects.empty()) throw std::runtime_error("Object set is empty");
// Merge symbol tables
buildSymbolTable();
// Determine entry point
if(_objects.size()==1) _entryObject=_objects[0];
else if(_entryObject==nullptr)
throw std::runtime_error("Entry point not defined: cannot find \"entry\" or \"Entry\" symbol");
// Assign virtual addresses
placeObjects();
// Perform relocations
for(auto &obj: _objects) relocateObject(obj);
// Write binary data
writeObjects(writer);
_bytesWritten=writer.size();
}
 
void Linker::setBase(LinkableObject::Word base) {
_base=base;
}
 
void Linker::setAlignment(std::size_t align) {
_align=align;
}
 
void Linker::setImageSize(std::size_t size) {
_imageSize=size;
}
 
void Linker::generateMap(std::ostream &s) {
// Calculate the maximum length of a symbol name
std::size_t len=0;
for(auto const &obj: _objects) {
for(auto const &sym: obj->symbols()) {
if(sym.second.type!=LinkableObject::Imported)
len=std::max(len,sym.first.size());
}
}
len=std::max(len+3,std::size_t(8)); // width of the first column
s<<"Image base address: "<<Utils::hex(_base)<<std::endl;
s<<"Object alignment: "<<_align<<std::endl;
s<<"Image size: "<<(_bytesWritten/4)<<" words"<<std::endl;
s<<"Number of objects: "<<_objects.size()<<std::endl;
s<<std::endl;
for(auto const &obj: _objects) {
s<<"Object \""<<obj->name()<<"\" at address "<<Utils::hex(obj->virtualAddress())<<std::endl;
s<<std::endl;
std::multimap<LinkableObject::Word,std::pair<std::string,LinkableObject::SymbolData> > sorted;
for(auto const &sym: obj->symbols()) sorted.emplace(sym.second.rva,sym);
for(auto const &sym: sorted) {
if(sym.second.second.type==LinkableObject::Imported) continue;
s<<sym.second.first;
s<<std::string(len-sym.second.first.size(),' ');
s<<Utils::hex(obj->virtualAddress()+sym.second.second.rva);
if(sym.second.second.type==LinkableObject::Local) s<<" Local";
else s<<" Exported";
s<<std::endl;
}
s<<std::endl;
}
}
 
/*
* Private members
*/
 
void Linker::buildSymbolTable() {
_globalSymbolTable.clear();
// Build a table of exported symbols from all modules
for(auto const &obj: _objects) {
auto const &table=obj->symbols();
for(auto const &item: table) {
if((item.first=="entry"||item.first=="Entry")&&item.second.type!=LinkableObject::Imported) {
if(_entryObject) {
std::ostringstream msg;
msg<<obj->name()<<": Duplicate definition of the entry symbol ";
msg<<"(previously defined in "<<_entryObject->name()<<")";
throw std::runtime_error(msg.str());
}
if(item.second.rva!=0) {
std::ostringstream msg;
msg<<obj->name()<<": ";
msg<<"Entry point must refer to the start of the object";
throw std::runtime_error(msg.str());
}
_entryObject=obj;
}
if(item.second.type==LinkableObject::Local) continue;
// Insert item to the global symbol table if it doesn't exist yet
auto it=_globalSymbolTable.emplace(item.first,GlobalSymbolData()).first;
 
// Check that the symbol has not been already defined in another object
if(item.second.type==LinkableObject::Exported) {
if(it->second.obj) {
std::ostringstream msg;
msg<<obj->name()<<": Duplicate definition of \""<<item.first;
msg<<"\" (previously defined in "<<it->second.obj->name()<<")";
throw std::runtime_error(msg.str());
}
it->second.obj=obj;
it->second.rva=item.second.rva;
}
if(!item.second.refs.empty()) it->second.refs.insert(obj);
}
}
// Check that local symbols don't shadow the public ones
for(auto const &obj: _objects) {
auto const &table=obj->symbols();
for(auto const &item: table) {
if(item.second.type!=LinkableObject::Local) continue;
auto it=_globalSymbolTable.find(item.first);
if(it==_globalSymbolTable.end()) continue;
if(!it->second.obj) continue;
if(item.first==it->first) {
std::ostringstream msg;
msg<<obj->name()<<": Local symbol \""<<item.first<<"\" shadows the public one ";
msg<<"(defined in "<<it->second.obj->name()<<")";
throw std::runtime_error(msg.str());
}
}
}
// Check that no undefined symbols remain
for(auto const &item: _globalSymbolTable) {
if(item.second.obj==nullptr&&!item.second.refs.empty()) {
std::ostringstream msg;
msg<<"Undefined symbol: \""<<item.first<<"\"";
auto const it=item.second.refs.begin();
msg<<" (referenced from "<<(*it)->name()<<")";
throw std::runtime_error(msg.str());
}
}
}
 
void Linker::placeObjects() {
auto currentBase=_base;
// Make entry object the first
if(_objects.size()>1) {
for(auto it=_objects.begin();it!=_objects.end();++it) {
if(*it==_entryObject) {
_objects.erase(it);
break;
}
}
_objects.insert(_objects.begin(),_entryObject);
}
// Remove unreferenced objects
if(_objects.size()>1) {
std::set<const LinkableObject*> used;
markAsUsed(_objects[0],used);
for(auto it=_objects.begin();it!=_objects.end();) {
if(used.find(*it)==used.end()) {
std::cerr<<"Linker warning: skipping an unreferenced object \"";
std::cerr<<(*it)->name()<<"\""<<std::endl;
for(auto sym=_globalSymbolTable.begin();sym!=_globalSymbolTable.end();) {
if(sym->second.obj==*it) sym=_globalSymbolTable.erase(sym);
else ++sym;
}
it=_objects.erase(it);
}
else ++it;
}
}
// Set base addresses
for(auto it=_objects.begin();it!=_objects.end();++it) {
(*it)->setVirtualAddress(currentBase);
if(it+1!=_objects.end()) (*it)->addPadding(_align);
else (*it)->addPadding();
currentBase+=static_cast<LinkableObject::Word>((*it)->codeSize());
}
}
 
void Linker::relocateObject(LinkableObject *obj) {
for(auto const &sym: obj->symbols()) {
LinkableObject::Word addr;
if(sym.second.refs.empty()) continue;
if(sym.second.type==LinkableObject::Local) addr=obj->virtualAddress()+sym.second.rva;
else {
auto it=_globalSymbolTable.find(sym.first);
assert(it!=_globalSymbolTable.end());
assert(it->second.obj);
addr=it->second.obj->virtualAddress()+it->second.rva;
}
for(auto const &ref: sym.second.refs) {
if(ref.type==LinkableObject::Regular) obj->replaceWord(ref.rva,addr+ref.offset);
else {
auto target=static_cast<LinkableObject::Word>(addr+ref.offset);
if(target>0xFFFFF&&target<0xFFF00000) {
std::ostringstream msg;
msg<<"Address 0x"<<Utils::hex(target)<<" is out of the range for a short reference";
msg<<" (referenced from "<<ref.source<<":"<<ref.line<<")";
throw std::runtime_error(msg.str());
}
target&=0x1FFFFF;
auto w=obj->getWord(ref.rva);
w|=(target&0xFFFF);
w|=((target<<8)&0x1F000000);
obj->replaceWord(ref.rva,w);
}
}
}
}
 
void Linker::writeObjects(OutputWriter &writer) {
std::size_t currentSize=0;
// Write entry object
writer.write(reinterpret_cast<const char*>(_entryObject->code()),_entryObject->codeSize());
currentSize+=_entryObject->codeSize();
// Write other objects
for(auto const &obj: _objects) {
if(obj==_entryObject) continue;
writer.write(reinterpret_cast<const char*>(obj->code()),obj->codeSize());
currentSize+=obj->codeSize();
}
// Pad file if requested
if(_imageSize>0) {
if(currentSize>_imageSize)
throw std::runtime_error("Image size exceeds the specified value");
else if(currentSize<_imageSize) writer.pad(_imageSize-currentSize);
}
}
 
void Linker::markAsUsed(const LinkableObject *obj,std::set<const LinkableObject*> &used) {
if(used.find(obj)!=used.end()) return; // already processed
used.insert(obj);
for(auto const &sym: _globalSymbolTable) {
for(auto const &ref: sym.second.refs) {
if(ref==obj) markAsUsed(sym.second.obj,used);
}
}
}
/linker.h
1,53 → 1,53
/*
* Copyright (c) 2016 by Alex I. Kuznetsov.
*
* Part of the LXP32 CPU IP core.
*
* This module defines the Linker class which performs
* linking of LXP32 binary objects.
*/
 
#ifndef LINKER_H_INCLUDED
#define LINKER_H_INCLUDED
 
#include "linkableobject.h"
#include "outputwriter.h"
 
#include <iostream>
#include <map>
#include <vector>
#include <string>
#include <set>
 
class Linker {
struct GlobalSymbolData {
LinkableObject *obj=nullptr;
LinkableObject::Word rva=0;
std::set<const LinkableObject*> refs;
};
std::vector<LinkableObject*> _objects;
LinkableObject *_entryObject=nullptr;
std::map<std::string,GlobalSymbolData> _globalSymbolTable;
// Various output options
LinkableObject::Word _base=0;
std::size_t _align=4;
std::size_t _imageSize=0;
std::size_t _bytesWritten=0;
public:
void addObject(LinkableObject &obj);
void link(OutputWriter &writer);
void setBase(LinkableObject::Word base);
void setAlignment(std::size_t align);
void setImageSize(std::size_t size);
void generateMap(std::ostream &s);
private:
void buildSymbolTable();
void placeObjects();
void relocateObject(LinkableObject *obj);
void writeObjects(OutputWriter &writer);
void markAsUsed(const LinkableObject *obj,std::set<const LinkableObject*> &used);
};
 
#endif
/*
* Copyright (c) 2016 by Alex I. Kuznetsov.
*
* Part of the LXP32 CPU IP core.
*
* This module defines the Linker class which performs
* linking of LXP32 binary objects.
*/
 
#ifndef LINKER_H_INCLUDED
#define LINKER_H_INCLUDED
 
#include "linkableobject.h"
#include "outputwriter.h"
 
#include <iostream>
#include <map>
#include <vector>
#include <string>
#include <set>
 
class Linker {
struct GlobalSymbolData {
LinkableObject *obj=nullptr;
LinkableObject::Word rva=0;
std::set<const LinkableObject*> refs;
};
std::vector<LinkableObject*> _objects;
LinkableObject *_entryObject=nullptr;
std::map<std::string,GlobalSymbolData> _globalSymbolTable;
// Various output options
LinkableObject::Word _base=0;
std::size_t _align=4;
std::size_t _imageSize=0;
std::size_t _bytesWritten=0;
public:
void addObject(LinkableObject &obj);
void link(OutputWriter &writer);
void setBase(LinkableObject::Word base);
void setAlignment(std::size_t align);
void setImageSize(std::size_t size);
void generateMap(std::ostream &s);
private:
void buildSymbolTable();
void placeObjects();
void relocateObject(LinkableObject *obj);
void writeObjects(OutputWriter &writer);
void markAsUsed(const LinkableObject *obj,std::set<const LinkableObject*> &used);
};
 
#endif
/main.cpp
1,308 → 1,308
/*
* Copyright (c) 2016 by Alex I. Kuznetsov.
*
* Part of the LXP32 CPU IP core.
*
* Main translation unit for the LXP32 assembler/linker.
*/
 
#include "assembler.h"
#include "linker.h"
#include "utils.h"
 
#include <iostream>
#include <fstream>
#include <vector>
#include <string>
#include <exception>
#include <utility>
#include <memory>
#include <cstdlib>
#include <cstring>
#include <cassert>
 
struct Options {
enum OutputFormat {Bin,Textio,Dec,Hex};
bool compileOnly=false;
std::string outputFileName;
std::string mapFileName;
std::vector<std::string> includeSearchDirs;
LinkableObject::Word base=0;
std::size_t align=4;
std::size_t imageSize=0;
OutputFormat fmt=Bin;
};
 
static void displayUsage(std::ostream &os,const char *program) {
os<<std::endl;
os<<"Usage:"<<std::endl;
os<<" "<<program<<" [ option(s) | input file(s) ]"<<std::endl<<std::endl;
os<<"Options:"<<std::endl;
os<<" -a <align> Object alignment (default: 4)"<<std::endl;
os<<" -b <addr> Base address (default: 0)"<<std::endl;
os<<" -c Compile only (don't link)"<<std::endl;
os<<" -f <fmt> Output file format (see below)"<<std::endl;
os<<" -h, --help Display a short help message"<<std::endl;
os<<" -i <dir> Add directory to the list of directories used to search"<<std::endl;
os<<" for included files (multiple directories can be specified)"<<std::endl;
os<<" -m <file> Generate map file"<<std::endl;
os<<" -o <file> Output file name"<<std::endl;
os<<" -s <size> Output image size"<<std::endl;
os<<" -- Do not interpret subsequent arguments as options"<<std::endl;
os<<std::endl;
os<<"Object alignment must be a power of two and can't be less than 4."<<std::endl;
os<<"Base address must be a multiple of object alignment."<<std::endl;
os<<"Image size must be a multiple of 4."<<std::endl;
os<<std::endl;
os<<"Output file formats:"<<std::endl;
os<<" bin Raw binary image (default)"<<std::endl;
os<<" textio Text representation of binary data. Supported by"<<std::endl;
os<<" std.textio (VHDL) and $readmemb (Verilog)"<<std::endl;
os<<" dec Text format, one word per line (decimal)"<<std::endl;
os<<" hex Text format, one word per line (hexadecimal)"<<std::endl;
}
 
static bool isLinkableObject(const std::string &filename) {
static const char *id="LinkableObject";
static std::size_t idSize=std::strlen(id);
std::ifstream in(filename,std::ios_base::in);
if(!in) return false;
if(in.tellg()==static_cast<std::ifstream::pos_type>(-1))
return false; // the stream is not seekable
std::vector<char> buf(idSize);
in.read(buf.data(),idSize);
if(static_cast<std::size_t>(in.gcount())!=idSize) return false;
if(std::memcmp(buf.data(),id,idSize)) return false;
return true;
}
 
int main(int argc,char *argv[]) try {
std::vector<std::string> inputFiles;
Options options;
bool alignmentSpecified=false;
bool baseSpecified=false;
bool formatSpecified=false;
bool noMoreOptions=false;
std::cout<<"LXP32 Platform Assembler and Linker"<<std::endl;
std::cout<<"Copyright (c) 2016-2019 by Alex I. Kuznetsov"<<std::endl;
if(argc<=1) {
displayUsage(std::cout,argv[0]);
return 0;
}
for(int i=1;i<argc;i++) {
if(argv[i][0]!='-'||noMoreOptions) inputFiles.push_back(argv[i]);
else if(!strcmp(argv[i],"--")) noMoreOptions=true;
else if(!strcmp(argv[i],"-a")) {
if(++i==argc) {
displayUsage(std::cerr,argv[0]);
return EXIT_FAILURE;
}
try {
options.align=std::stoul(argv[i],nullptr,0);
if(!Utils::isPowerOf2(options.align)) throw std::exception();
if(options.align<4) throw std::exception();
alignmentSpecified=true;
}
catch(std::exception &) {
throw std::runtime_error("Invalid object alignment");
}
}
else if(!strcmp(argv[i],"-b")) {
if(++i==argc) {
displayUsage(std::cerr,argv[0]);
return EXIT_FAILURE;
}
try {
options.base=std::stoul(argv[i],nullptr,0);
baseSpecified=true;
}
catch(std::exception &) {
throw std::runtime_error("Invalid base address");
}
}
else if(!strcmp(argv[i],"-c")) {
options.compileOnly=true;
}
else if(!strcmp(argv[i],"-f")) {
if(++i==argc) {
displayUsage(std::cerr,argv[0]);
return EXIT_FAILURE;
}
if(!strcmp(argv[i],"bin")) options.fmt=Options::Bin;
else if(!strcmp(argv[i],"textio")) options.fmt=Options::Textio;
else if(!strcmp(argv[i],"dec")) options.fmt=Options::Dec;
else if(!strcmp(argv[i],"hex")) options.fmt=Options::Hex;
else throw std::runtime_error("Unrecognized output format");
formatSpecified=true;
}
else if(!strcmp(argv[i],"-h")||!strcmp(argv[i],"--help")) {
displayUsage(std::cout,argv[0]);
return 0;
}
else if(!strcmp(argv[i],"-i")) {
if(++i==argc) {
displayUsage(std::cerr,argv[0]);
return EXIT_FAILURE;
}
options.includeSearchDirs.push_back(argv[i]);
}
else if(!strcmp(argv[i],"-m")) {
if(++i==argc) {
displayUsage(std::cerr,argv[0]);
return EXIT_FAILURE;
}
options.mapFileName=argv[i];
}
else if(!strcmp(argv[i],"-o")) {
if(++i==argc) {
displayUsage(std::cerr,argv[0]);
return EXIT_FAILURE;
}
options.outputFileName=argv[i];
}
else if(!strcmp(argv[i],"-s")) {
if(++i==argc) {
displayUsage(std::cerr,argv[0]);
return EXIT_FAILURE;
}
try {
options.imageSize=std::stoul(argv[i],nullptr,0);
if(options.imageSize%4!=0||options.imageSize==0) throw std::exception();
}
catch(std::exception &) {
throw std::runtime_error("Invalid image size");
}
}
else throw std::runtime_error(std::string("Unrecognized option: \"")+argv[i]+"\"");
}
if(options.base%options.align!=0)
throw std::runtime_error("Base address must be a multiple of object alignment");
if(options.compileOnly) {
if(alignmentSpecified)
std::cerr<<"Warning: Object alignment is ignored in compile-only mode"<<std::endl;
if(baseSpecified)
std::cerr<<"Warning: Base address is ignored in compile-only mode"<<std::endl;
if(formatSpecified)
std::cerr<<"Warning: Output format is ignored in compile-only mode"<<std::endl;
if(options.imageSize>0)
std::cerr<<"Warning: Image size is ignored in compile-only mode"<<std::endl;
if(!options.mapFileName.empty())
std::cerr<<"Warning: Map file is not generated in compile-only mode"<<std::endl;
}
if(inputFiles.empty())
throw std::runtime_error("No input files were specified");
if(options.compileOnly&&inputFiles.size()>1&&!options.outputFileName.empty())
throw std::runtime_error("Output file name cannot be specified "
"for multiple files in compile-only mode");
std::vector<Assembler> assemblers;
std::vector<LinkableObject> rawObjects;
for(auto const &filename: inputFiles) {
if(options.compileOnly||!isLinkableObject(filename)) {
Assembler as;
for(auto const &dir: options.includeSearchDirs) as.addIncludeSearchDir(dir);
try {
as.processFile(filename);
}
catch(std::exception &ex) {
std::cerr<<"Assembler error in "<<as.currentFileName();
if(as.line()>0) std::cerr<<":"<<as.line();
std::cerr<<": "<<ex.what()<<std::endl;
return EXIT_FAILURE;
}
if(!options.compileOnly) assemblers.push_back(std::move(as));
else {
std::string outputFileName=options.outputFileName;
if(outputFileName.empty()) {
outputFileName=filename;
auto pos=outputFileName.find_last_of('.');
if(pos!=std::string::npos) outputFileName.erase(pos);
outputFileName+=".lo";
}
as.object().serialize(outputFileName);
}
}
else {
LinkableObject lo;
try {
lo.deserialize(filename);
}
catch(std::exception &ex) {
std::cerr<<"Error reading object file "<<filename<<": "<<ex.what()<<std::endl;
return EXIT_FAILURE;
}
rawObjects.push_back(std::move(lo));
}
}
if(options.compileOnly) return 0;
Linker linker;
for(auto &lo: rawObjects) linker.addObject(lo);
for(auto &as: assemblers) linker.addObject(as.object());
linker.setBase(options.base);
linker.setAlignment(options.align);
linker.setImageSize(options.imageSize);
std::string outputFileName=options.outputFileName;
if(outputFileName.empty()) {
outputFileName=inputFiles[0];
auto pos=outputFileName.find_last_of('.');
if(pos!=std::string::npos) outputFileName.erase(pos);
if(options.fmt==Options::Bin) outputFileName+=".bin";
else outputFileName+=".txt";
}
std::unique_ptr<OutputWriter> writer;
switch(options.fmt) {
case Options::Bin:
writer=std::unique_ptr<OutputWriter>(new BinaryOutputWriter(outputFileName));
break;
case Options::Textio:
writer=std::unique_ptr<OutputWriter>(new TextOutputWriter(outputFileName,TextOutputWriter::Bin));
break;
case Options::Dec:
writer=std::unique_ptr<OutputWriter>(new TextOutputWriter(outputFileName,TextOutputWriter::Dec));
break;
case Options::Hex:
writer=std::unique_ptr<OutputWriter>(new TextOutputWriter(outputFileName,TextOutputWriter::Hex));
break;
default:
assert(false);
}
try {
linker.link(*writer);
}
catch(std::exception &ex) {
writer->abort();
std::cerr<<"Linker error: "<<ex.what()<<std::endl;
return EXIT_FAILURE;
}
std::cout<<writer->size()/4<<" words written"<<std::endl;
if(!options.mapFileName.empty()) {
std::ofstream out(options.mapFileName);
if(!out) throw std::runtime_error("Cannot open file \""+options.mapFileName+"\" for writing");
linker.generateMap(out);
}
}
catch(std::exception &ex) {
std::cerr<<"Error: "<<ex.what()<<std::endl;
return EXIT_FAILURE;
}
/*
* Copyright (c) 2016 by Alex I. Kuznetsov.
*
* Part of the LXP32 CPU IP core.
*
* Main translation unit for the LXP32 assembler/linker.
*/
 
#include "assembler.h"
#include "linker.h"
#include "utils.h"
 
#include <iostream>
#include <fstream>
#include <vector>
#include <string>
#include <exception>
#include <utility>
#include <memory>
#include <cstdlib>
#include <cstring>
#include <cassert>
 
struct Options {
enum OutputFormat {Bin,Textio,Dec,Hex};
bool compileOnly=false;
std::string outputFileName;
std::string mapFileName;
std::vector<std::string> includeSearchDirs;
LinkableObject::Word base=0;
std::size_t align=4;
std::size_t imageSize=0;
OutputFormat fmt=Bin;
};
 
static void displayUsage(std::ostream &os,const char *program) {
os<<std::endl;
os<<"Usage:"<<std::endl;
os<<" "<<program<<" [ option(s) | input file(s) ]"<<std::endl<<std::endl;
os<<"Options:"<<std::endl;
os<<" -a <align> Object alignment (default: 4)"<<std::endl;
os<<" -b <addr> Base address (default: 0)"<<std::endl;
os<<" -c Compile only (don't link)"<<std::endl;
os<<" -f <fmt> Output file format (see below)"<<std::endl;
os<<" -h, --help Display a short help message"<<std::endl;
os<<" -i <dir> Add directory to the list of directories used to search"<<std::endl;
os<<" for included files (multiple directories can be specified)"<<std::endl;
os<<" -m <file> Generate map file"<<std::endl;
os<<" -o <file> Output file name"<<std::endl;
os<<" -s <size> Output image size"<<std::endl;
os<<" -- Do not interpret subsequent arguments as options"<<std::endl;
os<<std::endl;
os<<"Object alignment must be a power of two and can't be less than 4."<<std::endl;
os<<"Base address must be a multiple of object alignment."<<std::endl;
os<<"Image size must be a multiple of 4."<<std::endl;
os<<std::endl;
os<<"Output file formats:"<<std::endl;
os<<" bin Raw binary image (default)"<<std::endl;
os<<" textio Text representation of binary data. Supported by"<<std::endl;
os<<" std.textio (VHDL) and $readmemb (Verilog)"<<std::endl;
os<<" dec Text format, one word per line (decimal)"<<std::endl;
os<<" hex Text format, one word per line (hexadecimal)"<<std::endl;
}
 
static bool isLinkableObject(const std::string &filename) {
static const char *id="LinkableObject";
static std::size_t idSize=std::strlen(id);
std::ifstream in(filename,std::ios_base::in);
if(!in) return false;
if(in.tellg()==static_cast<std::ifstream::pos_type>(-1))
return false; // the stream is not seekable
std::vector<char> buf(idSize);
in.read(buf.data(),idSize);
if(static_cast<std::size_t>(in.gcount())!=idSize) return false;
if(std::memcmp(buf.data(),id,idSize)) return false;
return true;
}
 
int main(int argc,char *argv[]) try {
std::vector<std::string> inputFiles;
Options options;
bool alignmentSpecified=false;
bool baseSpecified=false;
bool formatSpecified=false;
bool noMoreOptions=false;
std::cout<<"LXP32 Platform Assembler and Linker"<<std::endl;
std::cout<<"Copyright (c) 2016-2019 by Alex I. Kuznetsov"<<std::endl;
if(argc<=1) {
displayUsage(std::cout,argv[0]);
return 0;
}
for(int i=1;i<argc;i++) {
if(argv[i][0]!='-'||noMoreOptions) inputFiles.push_back(argv[i]);
else if(!strcmp(argv[i],"--")) noMoreOptions=true;
else if(!strcmp(argv[i],"-a")) {
if(++i==argc) {
displayUsage(std::cerr,argv[0]);
return EXIT_FAILURE;
}
try {
options.align=std::stoul(argv[i],nullptr,0);
if(!Utils::isPowerOf2(options.align)) throw std::exception();
if(options.align<4) throw std::exception();
alignmentSpecified=true;
}
catch(std::exception &) {
throw std::runtime_error("Invalid object alignment");
}
}
else if(!strcmp(argv[i],"-b")) {
if(++i==argc) {
displayUsage(std::cerr,argv[0]);
return EXIT_FAILURE;
}
try {
options.base=std::stoul(argv[i],nullptr,0);
baseSpecified=true;
}
catch(std::exception &) {
throw std::runtime_error("Invalid base address");
}
}
else if(!strcmp(argv[i],"-c")) {
options.compileOnly=true;
}
else if(!strcmp(argv[i],"-f")) {
if(++i==argc) {
displayUsage(std::cerr,argv[0]);
return EXIT_FAILURE;
}
if(!strcmp(argv[i],"bin")) options.fmt=Options::Bin;
else if(!strcmp(argv[i],"textio")) options.fmt=Options::Textio;
else if(!strcmp(argv[i],"dec")) options.fmt=Options::Dec;
else if(!strcmp(argv[i],"hex")) options.fmt=Options::Hex;
else throw std::runtime_error("Unrecognized output format");
formatSpecified=true;
}
else if(!strcmp(argv[i],"-h")||!strcmp(argv[i],"--help")) {
displayUsage(std::cout,argv[0]);
return 0;
}
else if(!strcmp(argv[i],"-i")) {
if(++i==argc) {
displayUsage(std::cerr,argv[0]);
return EXIT_FAILURE;
}
options.includeSearchDirs.push_back(argv[i]);
}
else if(!strcmp(argv[i],"-m")) {
if(++i==argc) {
displayUsage(std::cerr,argv[0]);
return EXIT_FAILURE;
}
options.mapFileName=argv[i];
}
else if(!strcmp(argv[i],"-o")) {
if(++i==argc) {
displayUsage(std::cerr,argv[0]);
return EXIT_FAILURE;
}
options.outputFileName=argv[i];
}
else if(!strcmp(argv[i],"-s")) {
if(++i==argc) {
displayUsage(std::cerr,argv[0]);
return EXIT_FAILURE;
}
try {
options.imageSize=std::stoul(argv[i],nullptr,0);
if(options.imageSize%4!=0||options.imageSize==0) throw std::exception();
}
catch(std::exception &) {
throw std::runtime_error("Invalid image size");
}
}
else throw std::runtime_error(std::string("Unrecognized option: \"")+argv[i]+"\"");
}
if(options.base%options.align!=0)
throw std::runtime_error("Base address must be a multiple of object alignment");
if(options.compileOnly) {
if(alignmentSpecified)
std::cerr<<"Warning: Object alignment is ignored in compile-only mode"<<std::endl;
if(baseSpecified)
std::cerr<<"Warning: Base address is ignored in compile-only mode"<<std::endl;
if(formatSpecified)
std::cerr<<"Warning: Output format is ignored in compile-only mode"<<std::endl;
if(options.imageSize>0)
std::cerr<<"Warning: Image size is ignored in compile-only mode"<<std::endl;
if(!options.mapFileName.empty())
std::cerr<<"Warning: Map file is not generated in compile-only mode"<<std::endl;
}
if(inputFiles.empty())
throw std::runtime_error("No input files were specified");
if(options.compileOnly&&inputFiles.size()>1&&!options.outputFileName.empty())
throw std::runtime_error("Output file name cannot be specified "
"for multiple files in compile-only mode");
std::vector<Assembler> assemblers;
std::vector<LinkableObject> rawObjects;
for(auto const &filename: inputFiles) {
if(options.compileOnly||!isLinkableObject(filename)) {
Assembler as;
for(auto const &dir: options.includeSearchDirs) as.addIncludeSearchDir(dir);
try {
as.processFile(filename);
}
catch(std::exception &ex) {
std::cerr<<"Assembler error in "<<as.currentFileName();
if(as.line()>0) std::cerr<<":"<<as.line();
std::cerr<<": "<<ex.what()<<std::endl;
return EXIT_FAILURE;
}
if(!options.compileOnly) assemblers.push_back(std::move(as));
else {
std::string outputFileName=options.outputFileName;
if(outputFileName.empty()) {
outputFileName=filename;
auto pos=outputFileName.find_last_of('.');
if(pos!=std::string::npos) outputFileName.erase(pos);
outputFileName+=".lo";
}
as.object().serialize(outputFileName);
}
}
else {
LinkableObject lo;
try {
lo.deserialize(filename);
}
catch(std::exception &ex) {
std::cerr<<"Error reading object file "<<filename<<": "<<ex.what()<<std::endl;
return EXIT_FAILURE;
}
rawObjects.push_back(std::move(lo));
}
}
if(options.compileOnly) return 0;
Linker linker;
for(auto &lo: rawObjects) linker.addObject(lo);
for(auto &as: assemblers) linker.addObject(as.object());
linker.setBase(options.base);
linker.setAlignment(options.align);
linker.setImageSize(options.imageSize);
std::string outputFileName=options.outputFileName;
if(outputFileName.empty()) {
outputFileName=inputFiles[0];
auto pos=outputFileName.find_last_of('.');
if(pos!=std::string::npos) outputFileName.erase(pos);
if(options.fmt==Options::Bin) outputFileName+=".bin";
else outputFileName+=".txt";
}
std::unique_ptr<OutputWriter> writer;
switch(options.fmt) {
case Options::Bin:
writer=std::unique_ptr<OutputWriter>(new BinaryOutputWriter(outputFileName));
break;
case Options::Textio:
writer=std::unique_ptr<OutputWriter>(new TextOutputWriter(outputFileName,TextOutputWriter::Bin));
break;
case Options::Dec:
writer=std::unique_ptr<OutputWriter>(new TextOutputWriter(outputFileName,TextOutputWriter::Dec));
break;
case Options::Hex:
writer=std::unique_ptr<OutputWriter>(new TextOutputWriter(outputFileName,TextOutputWriter::Hex));
break;
default:
assert(false);
}
try {
linker.link(*writer);
}
catch(std::exception &ex) {
writer->abort();
std::cerr<<"Linker error: "<<ex.what()<<std::endl;
return EXIT_FAILURE;
}
std::cout<<writer->size()/4<<" words written"<<std::endl;
if(!options.mapFileName.empty()) {
std::ofstream out(options.mapFileName);
if(!out) throw std::runtime_error("Cannot open file \""+options.mapFileName+"\" for writing");
linker.generateMap(out);
}
}
catch(std::exception &ex) {
std::cerr<<"Error: "<<ex.what()<<std::endl;
return EXIT_FAILURE;
}
/outputwriter.cpp
1,109 → 1,109
/*
* Copyright (c) 2016 by Alex I. Kuznetsov.
*
* Part of the LXP32 CPU IP core.
*
* This module implements members of the OutputWriter class
* and its derived classes.
*/
 
#include "outputwriter.h"
#include "utils.h"
 
#include <iostream>
#include <iomanip>
#include <stdexcept>
#include <algorithm>
#include <cassert>
#include <cstdint>
#include <cstdio>
 
/*
* OutputWriter members
*/
 
void OutputWriter::write(const char *data,std::size_t n) {
writeData(data,n);
_size+=n;
}
 
void OutputWriter::pad(std::size_t size) {
static char zeros[256]; // static objects are zero-initialized
while(size>0) {
auto n=std::min<std::size_t>(size,256);
write(zeros,n);
size-=n;
}
}
 
std::size_t OutputWriter::size() const {
return _size;
}
 
/*
* BinaryOutputWriter members
*/
 
BinaryOutputWriter::BinaryOutputWriter(const std::string &filename):
_filename(filename),
_os(filename,std::ios_base::out|std::ios_base::binary)
{
if(!_os) throw std::runtime_error("Cannot open \""+filename+"\" for writing");
}
 
void BinaryOutputWriter::writeData(const char *data,std::size_t n) {
_os.write(data,n);
}
 
void BinaryOutputWriter::abort() {
_os.close();
std::remove(_filename.c_str());
}
 
/*
* TextOutputWriter members
*/
 
TextOutputWriter::TextOutputWriter(const std::string &filename,Format f):
_filename(filename),
_os(filename,std::ios_base::out),
_fmt(f)
{
if(!_os) throw std::runtime_error("Cannot open \""+filename+"\" for writing");
}
 
TextOutputWriter::~TextOutputWriter() {
if(!_buf.empty()) {
assert(_buf.size()<4);
pad(4-_buf.size());
}
}
 
void TextOutputWriter::writeData(const char *data,std::size_t n) {
while(n>0) {
assert(_buf.size()<4);
auto count=std::min(4-_buf.size(),n);
_buf.append(data,count);
data+=count;
n-=count;
if(_buf.size()<4) continue;
assert(_buf.size()==4);
std::uint32_t word=(static_cast<unsigned char>(_buf[3])<<24)|
(static_cast<unsigned char>(_buf[2])<<16)|
(static_cast<unsigned char>(_buf[1])<<8)|
static_cast<unsigned char>(_buf[0]);
if(_fmt==Bin) _os<<Utils::bin(word)<<std::endl;
else if(_fmt==Dec) _os<<word<<std::endl;
else _os<<Utils::hex(word)<<std::endl;
_buf.clear();
}
}
 
void TextOutputWriter::abort() {
_os.close();
std::remove(_filename.c_str());
}
/*
* Copyright (c) 2016 by Alex I. Kuznetsov.
*
* Part of the LXP32 CPU IP core.
*
* This module implements members of the OutputWriter class
* and its derived classes.
*/
 
#include "outputwriter.h"
#include "utils.h"
 
#include <iostream>
#include <iomanip>
#include <stdexcept>
#include <algorithm>
#include <cassert>
#include <cstdint>
#include <cstdio>
 
/*
* OutputWriter members
*/
 
void OutputWriter::write(const char *data,std::size_t n) {
writeData(data,n);
_size+=n;
}
 
void OutputWriter::pad(std::size_t size) {
static char zeros[256]; // static objects are zero-initialized
while(size>0) {
auto n=std::min<std::size_t>(size,256);
write(zeros,n);
size-=n;
}
}
 
std::size_t OutputWriter::size() const {
return _size;
}
 
/*
* BinaryOutputWriter members
*/
 
BinaryOutputWriter::BinaryOutputWriter(const std::string &filename):
_filename(filename),
_os(filename,std::ios_base::out|std::ios_base::binary)
{
if(!_os) throw std::runtime_error("Cannot open \""+filename+"\" for writing");
}
 
void BinaryOutputWriter::writeData(const char *data,std::size_t n) {
_os.write(data,n);
}
 
void BinaryOutputWriter::abort() {
_os.close();
std::remove(_filename.c_str());
}
 
/*
* TextOutputWriter members
*/
 
TextOutputWriter::TextOutputWriter(const std::string &filename,Format f):
_filename(filename),
_os(filename,std::ios_base::out),
_fmt(f)
{
if(!_os) throw std::runtime_error("Cannot open \""+filename+"\" for writing");
}
 
TextOutputWriter::~TextOutputWriter() {
if(!_buf.empty()) {
assert(_buf.size()<4);
pad(4-_buf.size());
}
}
 
void TextOutputWriter::writeData(const char *data,std::size_t n) {
while(n>0) {
assert(_buf.size()<4);
auto count=std::min(4-_buf.size(),n);
_buf.append(data,count);
data+=count;
n-=count;
if(_buf.size()<4) continue;
assert(_buf.size()==4);
std::uint32_t word=(static_cast<unsigned char>(_buf[3])<<24)|
(static_cast<unsigned char>(_buf[2])<<16)|
(static_cast<unsigned char>(_buf[1])<<8)|
static_cast<unsigned char>(_buf[0]);
if(_fmt==Bin) _os<<Utils::bin(word)<<std::endl;
else if(_fmt==Dec) _os<<word<<std::endl;
else _os<<Utils::hex(word)<<std::endl;
_buf.clear();
}
}
 
void TextOutputWriter::abort() {
_os.close();
std::remove(_filename.c_str());
}
/outputwriter.h
1,67 → 1,67
/*
* Copyright (c) 2016 by Alex I. Kuznetsov.
*
* Part of the LXP32 CPU IP core.
*
* This module defines the OutputWriter abstract class and its
* derived classes. These classes are used to write LXP32 executable
* code in different formats.
*/
 
#ifndef OUTPUTWRITER_H_INCLUDED
#define OUTPUTWRITER_H_INCLUDED
 
#include <fstream>
#include <string>
 
/*
* An abstract base class for all writers
*/
 
class OutputWriter {
std::size_t _size=0;
public:
virtual ~OutputWriter() {}
virtual void write(const char *data,std::size_t n);
virtual void abort() {}
void pad(std::size_t size);
std::size_t size() const;
protected:
virtual void writeData(const char *data,std::size_t n)=0;
};
 
/*
* Write a regular binary file
*/
 
class BinaryOutputWriter : public OutputWriter {
std::string _filename;
std::ofstream _os;
public:
BinaryOutputWriter(const std::string &filename);
virtual void abort() override;
protected:
virtual void writeData(const char *data,std::size_t n) override;
};
 
/*
* Write a text file (one word per line)
*/
 
class TextOutputWriter : public OutputWriter {
public:
enum Format {Bin,Dec,Hex};
private:
std::string _filename;
std::ofstream _os;
std::string _buf;
Format _fmt;
public:
TextOutputWriter(const std::string &filename,Format f);
~TextOutputWriter();
virtual void abort() override;
protected:
virtual void writeData(const char *data,std::size_t n) override;
};
 
#endif
/*
* Copyright (c) 2016 by Alex I. Kuznetsov.
*
* Part of the LXP32 CPU IP core.
*
* This module defines the OutputWriter abstract class and its
* derived classes. These classes are used to write LXP32 executable
* code in different formats.
*/
 
#ifndef OUTPUTWRITER_H_INCLUDED
#define OUTPUTWRITER_H_INCLUDED
 
#include <fstream>
#include <string>
 
/*
* An abstract base class for all writers
*/
 
class OutputWriter {
std::size_t _size=0;
public:
virtual ~OutputWriter() {}
virtual void write(const char *data,std::size_t n);
virtual void abort() {}
void pad(std::size_t size);
std::size_t size() const;
protected:
virtual void writeData(const char *data,std::size_t n)=0;
};
 
/*
* Write a regular binary file
*/
 
class BinaryOutputWriter : public OutputWriter {
std::string _filename;
std::ofstream _os;
public:
BinaryOutputWriter(const std::string &filename);
virtual void abort() override;
protected:
virtual void writeData(const char *data,std::size_t n) override;
};
 
/*
* Write a text file (one word per line)
*/
 
class TextOutputWriter : public OutputWriter {
public:
enum Format {Bin,Dec,Hex};
private:
std::string _filename;
std::ofstream _os;
std::string _buf;
Format _fmt;
public:
TextOutputWriter(const std::string &filename,Format f);
~TextOutputWriter();
virtual void abort() override;
protected:
virtual void writeData(const char *data,std::size_t n) override;
};
 
#endif
/utils.cpp
1,113 → 1,113
/*
* Copyright (c) 2016 by Alex I. Kuznetsov.
*
* Part of the LXP32 CPU IP core.
*
* This module implements members of the Utils namespace.
*/
 
#include "utils.h"
 
#include <iostream>
#include <fstream>
#include <algorithm>
#include <stdexcept>
#include <cstring>
 
std::string Utils::urlEncode(const std::string &str) {
std::string res;
for(std::size_t i=0;i<str.size();i++) {
char ch=str[i];
if(ch>='A'&&ch<='Z') res.push_back(ch);
else if(ch>='a'&&ch<='z') res.push_back(ch);
else if(ch>='0'&&ch<='9') res.push_back(ch);
else if(ch=='-'||ch=='_'||ch=='.'||ch=='~') res.push_back(ch);
else res+="%"+hex(ch);
}
return res;
}
 
std::string Utils::urlDecode(const std::string &str) {
std::string res;
for(std::size_t i=0;i<str.size();i++) {
char ch=str[i];
if(ch!='%') res.push_back(ch);
else {
auto hexcode=str.substr(i+1,2);
i+=hexcode.size();
try {
if(hexcode.size()!=2) throw std::exception();
auto u=static_cast<unsigned char>(std::stoul(hexcode,nullptr,16));
res.push_back(static_cast<char>(u));
}
catch(std::exception &) {
throw std::runtime_error("Ill-formed URL-encoded string");
}
}
}
return res;
}
 
std::string Utils::normalizeSeparators(const std::string &path) {
std::string str(path);
#ifdef _WIN32
std::replace(str.begin(),str.end(),'\\','/');
#endif
return str;
}
 
std::string Utils::nativeSeparators(const std::string &path) {
std::string str(path);
#ifdef _WIN32
std::replace(str.begin(),str.end(),'/','\\');
#endif
return str;
}
 
bool Utils::isAbsolutePath(const std::string &path) {
auto native=nativeSeparators(path);
if(native.empty()) return false;
if(native[0]=='/') return true;
#ifdef _WIN32
if(native.size()>1&&native[1]==':') return true;
#endif
return false;
}
 
bool Utils::fileExists(const std::string &path) {
std::ifstream in(nativeSeparators(path),std::ios_base::in);
if(!in) return false;
return true;
}
 
std::string Utils::relativePath(const std::string &from,const std::string &to) {
// Normalize directory separators
auto nfrom=normalizeSeparators(from);
auto nto=normalizeSeparators(to);
if(nto.empty()) return std::string();
// If "nto" is an absolute path, just return it
if(isAbsolutePath(nto)) return nativeSeparators(nto);
 
// Process relative path
auto pos=nfrom.find_last_of('/');
if(pos==std::string::npos) return nativeSeparators(nto);
else return nativeSeparators(nfrom.substr(0,pos+1)+nto);
}
 
std::string Utils::dequoteString(const std::string &str) {
if(str.size()<2) throw std::runtime_error("String literal expected");
if(str.front()!='\"'||str.back()!='\"') throw std::runtime_error("String literal expected");
return str.substr(1,str.size()-2);
}
 
bool Utils::ishexdigit(char ch) {
static const char *digits="0123456789ABCDEFabcdef";
return (std::strchr(digits,ch)!=NULL);
}
 
bool Utils::isoctdigit(char ch) {
static const char *digits="01234567";
return (std::strchr(digits,ch)!=NULL);
}
/*
* Copyright (c) 2016 by Alex I. Kuznetsov.
*
* Part of the LXP32 CPU IP core.
*
* This module implements members of the Utils namespace.
*/
 
#include "utils.h"
 
#include <iostream>
#include <fstream>
#include <algorithm>
#include <stdexcept>
#include <cstring>
 
std::string Utils::urlEncode(const std::string &str) {
std::string res;
for(std::size_t i=0;i<str.size();i++) {
char ch=str[i];
if(ch>='A'&&ch<='Z') res.push_back(ch);
else if(ch>='a'&&ch<='z') res.push_back(ch);
else if(ch>='0'&&ch<='9') res.push_back(ch);
else if(ch=='-'||ch=='_'||ch=='.'||ch=='~') res.push_back(ch);
else res+="%"+hex(ch);
}
return res;
}
 
std::string Utils::urlDecode(const std::string &str) {
std::string res;
for(std::size_t i=0;i<str.size();i++) {
char ch=str[i];
if(ch!='%') res.push_back(ch);
else {
auto hexcode=str.substr(i+1,2);
i+=hexcode.size();
try {
if(hexcode.size()!=2) throw std::exception();
auto u=static_cast<unsigned char>(std::stoul(hexcode,nullptr,16));
res.push_back(static_cast<char>(u));
}
catch(std::exception &) {
throw std::runtime_error("Ill-formed URL-encoded string");
}
}
}
return res;
}
 
std::string Utils::normalizeSeparators(const std::string &path) {
std::string str(path);
#ifdef _WIN32
std::replace(str.begin(),str.end(),'\\','/');
#endif
return str;
}
 
std::string Utils::nativeSeparators(const std::string &path) {
std::string str(path);
#ifdef _WIN32
std::replace(str.begin(),str.end(),'/','\\');
#endif
return str;
}
 
bool Utils::isAbsolutePath(const std::string &path) {
auto native=nativeSeparators(path);
if(native.empty()) return false;
if(native[0]=='/') return true;
#ifdef _WIN32
if(native.size()>1&&native[1]==':') return true;
#endif
return false;
}
 
bool Utils::fileExists(const std::string &path) {
std::ifstream in(nativeSeparators(path),std::ios_base::in);
if(!in) return false;
return true;
}
 
std::string Utils::relativePath(const std::string &from,const std::string &to) {
// Normalize directory separators
auto nfrom=normalizeSeparators(from);
auto nto=normalizeSeparators(to);
if(nto.empty()) return std::string();
// If "nto" is an absolute path, just return it
if(isAbsolutePath(nto)) return nativeSeparators(nto);
 
// Process relative path
auto pos=nfrom.find_last_of('/');
if(pos==std::string::npos) return nativeSeparators(nto);
else return nativeSeparators(nfrom.substr(0,pos+1)+nto);
}
 
std::string Utils::dequoteString(const std::string &str) {
if(str.size()<2) throw std::runtime_error("String literal expected");
if(str.front()!='\"'||str.back()!='\"') throw std::runtime_error("String literal expected");
return str.substr(1,str.size()-2);
}
 
bool Utils::ishexdigit(char ch) {
static const char *digits="0123456789ABCDEFabcdef";
return (std::strchr(digits,ch)!=NULL);
}
 
bool Utils::isoctdigit(char ch) {
static const char *digits="01234567";
return (std::strchr(digits,ch)!=NULL);
}
/utils.h
1,62 → 1,62
/*
* Copyright (c) 2016 by Alex I. Kuznetsov.
*
* Part of the LXP32 CPU IP core.
*
* This module declares the members of the Utils namespace.
*/
 
#ifndef UTILS_H_INCLUDED
#define UTILS_H_INCLUDED
 
#include <string>
#include <type_traits>
 
namespace Utils {
template <typename T> std::string hex(const T &w) {
static_assert(std::is_integral<T>::value,"Argument must be of integral type");
const char *hexstr="0123456789ABCDEF";
std::string res;
res.reserve(sizeof(T)*2);
for(int i=sizeof(T)*8-4;i>=0;i-=4) {
res.push_back(hexstr[(w>>i)&0x0F]);
}
return res;
}
template <typename T> std::string bin(const T &w) {
static_assert(std::is_integral<T>::value,"Argument must be of integral type");
std::string res;
res.reserve(sizeof(T)*8);
for(int i=sizeof(T)*8-1;i>=0;i--) {
if(((w>>i)&1)!=0) res.push_back('1');
else res.push_back('0');
}
return res;
}
std::string urlEncode(const std::string &str);
std::string urlDecode(const std::string &str);
std::string normalizeSeparators(const std::string &path);
std::string nativeSeparators(const std::string &path);
bool isAbsolutePath(const std::string &path);
bool fileExists(const std::string &path);
std::string relativePath(const std::string &from,const std::string &to);
std::string dequoteString(const std::string &str);
bool ishexdigit(char ch);
bool isoctdigit(char ch);
template <typename T> bool isPowerOf2(const T &x) {
static_assert(std::is_integral<T>::value,"Argument must be of integral type");
return (x!=0)&&((x&(x-1))==0);
}
}
 
#endif
/*
* Copyright (c) 2016 by Alex I. Kuznetsov.
*
* Part of the LXP32 CPU IP core.
*
* This module declares the members of the Utils namespace.
*/
 
#ifndef UTILS_H_INCLUDED
#define UTILS_H_INCLUDED
 
#include <string>
#include <type_traits>
 
namespace Utils {
template <typename T> std::string hex(const T &w) {
static_assert(std::is_integral<T>::value,"Argument must be of integral type");
const char *hexstr="0123456789ABCDEF";
std::string res;
res.reserve(sizeof(T)*2);
for(int i=sizeof(T)*8-4;i>=0;i-=4) {
res.push_back(hexstr[(w>>i)&0x0F]);
}
return res;
}
template <typename T> std::string bin(const T &w) {
static_assert(std::is_integral<T>::value,"Argument must be of integral type");
std::string res;
res.reserve(sizeof(T)*8);
for(int i=sizeof(T)*8-1;i>=0;i--) {
if(((w>>i)&1)!=0) res.push_back('1');
else res.push_back('0');
}
return res;
}
std::string urlEncode(const std::string &str);
std::string urlDecode(const std::string &str);
std::string normalizeSeparators(const std::string &path);
std::string nativeSeparators(const std::string &path);
bool isAbsolutePath(const std::string &path);
bool fileExists(const std::string &path);
std::string relativePath(const std::string &from,const std::string &to);
std::string dequoteString(const std::string &str);
bool ishexdigit(char ch);
bool isoctdigit(char ch);
template <typename T> bool isPowerOf2(const T &x) {
static_assert(std::is_integral<T>::value,"Argument must be of integral type");
return (x!=0)&&((x&(x-1))==0);
}
}
 
#endif

powered by: WebSVN 2.1.0

© copyright 1999-2024 OpenCores.org, equivalent to Oliscience, all rights reserved. OpenCores®, registered trademark.