OpenCores
URL https://opencores.org/ocsvn/copyblaze/copyblaze/trunk

Subversion Repositories copyblaze

[/] [copyblaze/] [trunk/] [copyblaze/] [sw/] [tools/] [asm/] [pBlazASM/] [pBlazASM/] [pbLexer.c] - Diff between revs 2 and 46

Only display areas with differences | Details | Blame | View Log

Rev 2 Rev 46
/*
/*
 *  Copyright © 2003..2010 : Henk van Kampen <henk@mediatronix.com>
 *  Copyright © 2003..2010 : Henk van Kampen <henk@mediatronix.com>
 *
 *
 *      This file is part of pBlazASM.
 *      This file is part of pBlazASM.
 *
 *
 *  pBlazASM is free software: you can redistribute it and/or modify
 *  pBlazASM is free software: you can redistribute it and/or modify
 *  it under the terms of the GNU General Public License as published by
 *  it under the terms of the GNU General Public License as published by
 *  the Free Software Foundation, either version 3 of the License, or
 *  the Free Software Foundation, either version 3 of the License, or
 *  (at your option) any later version.
 *  (at your option) any later version.
 *
 *
 *  pBlazASM is distributed in the hope that it will be useful,
 *  pBlazASM is distributed in the hope that it will be useful,
 *  but WITHOUT ANY WARRANTY; without even the implied warranty of
 *  but WITHOUT ANY WARRANTY; without even the implied warranty of
 *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 *  GNU General Public License for more details.
 *  GNU General Public License for more details.
 *
 *
 *  You should have received a copy of the GNU General Public License
 *  You should have received a copy of the GNU General Public License
 *  along with pBlazASM.  If not, see <http://www.gnu.org/licenses/>.
 *  along with pBlazASM.  If not, see <http://www.gnu.org/licenses/>.
 */
 */
 
 
#include <ctype.h>
#include <ctype.h>
#include <string.h>
#include <string.h>
#include <stdint.h>
#include <stdint.h>
#include <stdlib.h>
#include <stdlib.h>
 
 
#include "pbTypes.h"
#include "pbTypes.h"
#include "pbErrors.h"
#include "pbErrors.h"
 
 
// lexer states
// lexer states
typedef enum {
typedef enum {
        lsBin,
        lsBin,
        lsChar,
        lsChar,
        lsComment,
        lsComment,
        lsDec,
        lsDec,
        lsCopy,
        lsCopy,
        lsError,
        lsError,
        lsHex,
        lsHex,
        lsHexBin,
        lsHexBin,
        lsIdent,
        lsIdent,
        lsIdle,
        lsIdle,
        lsInit,
        lsInit,
        lsOperator,
        lsOperator,
        lsDoubleOp,
        lsDoubleOp,
        lsPunct,
        lsPunct,
        lsIndex,
        lsIndex,
        lsString
        lsString
} LexState ;
} LexState ;
 
 
// global token list
// global token list
static symbol_t tokens[ 256 ] ; // global token list
static symbol_t tokens[ 256 ] ; // global token list
static symbol_t * ptok = 0 ; // pointer to current token, index in 'tokens[]'
static symbol_t * ptok = 0 ; // pointer to current token, index in 'tokens[]'
 
 
symbol_t * tok_first( void ) {
symbol_t * tok_first( void ) {
        ptok = tokens ;
        ptok = tokens ;
        return ptok ;
        return ptok ;
}
}
 
 
symbol_t * tok_current( void ) {
symbol_t * tok_current( void ) {
        return ptok ;
        return ptok ;
}
}
 
 
symbol_t * tok_next( void ) {
symbol_t * tok_next( void ) {
        if ( ptok < &tokens[ 256 ] )
        if ( ptok < &tokens[ 256 ] )
                return ptok++ ;
                return ptok++ ;
        else {
        else {
                ptok->type = tNONE ;
                ptok->type = tNONE ;
                return ptok ;
                return ptok ;
        }
        }
}
}
 
 
void tok_back(symbol_t * back ){
void tok_back(symbol_t * back ){
        ptok = back ;
        ptok = back ;
}
}
 
 
void tok_free( void ) {
void tok_free( void ) {
        for ( ptok = tokens ; ptok->text != NULL ; ptok++ ) {
        for ( ptok = tokens ; ptok->text != NULL ; ptok++ ) {
                free( ptok->text ) ;
                free( ptok->text ) ;
 
 
                ptok->type = tNONE ;
                ptok->type = tNONE ;
                ptok->subtype = stNONE ;
                ptok->subtype = stNONE ;
                ptok->text = NULL ;
                ptok->text = NULL ;
                ptok->value = 0 ;
                ptok->value = 0 ;
        }
        }
}
}
 
 
// state machine based lexer
// state machine based lexer
// tokens are recorded in 'tokens', ended by a NONE token
// tokens are recorded in 'tokens', ended by a NONE token
bool lex( char * line, const bool mode ) {
bool lex( char * line, const bool mode ) {
        char * start = NULL, *end = NULL, *s = line ;
        char * start = NULL, *end = NULL, *s = line ;
        char term[ 256 ], *pterm = NULL ;
        char term[ 256 ], *pterm = NULL ;
        LexState state = lsInit ;
        LexState state = lsInit ;
 
 
        // state machine
        // state machine
        for ( ptok = tokens ; ptok < &tokens[ 256 ] ; ) {
        for ( ptok = tokens ; ptok < &tokens[ 256 ] ; ) {
                switch ( state ) {
                switch ( state ) {
                case lsInit :
                case lsInit :
                        ptok->type = tNONE ;
                        ptok->type = tNONE ;
                        ptok->subtype = stNONE ;
                        ptok->subtype = stNONE ;
                        ptok->value = 0 ;
                        ptok->value = 0 ;
                        ptok->text = NULL ;
                        ptok->text = NULL ;
 
 
                        pterm = term ;
                        pterm = term ;
                        *pterm = '\0' ;
                        *pterm = '\0' ;
                        state = lsIdle ;
                        state = lsIdle ;
                        break ;
                        break ;
 
 
                case lsIdle :
                case lsIdle :
                        // starting characters of tokens to be
                        // starting characters of tokens to be
                        if ( *s == '\0' || *s == '\r' || *s == '\n' ) {
                        if ( *s == '\0' || *s == '\r' || *s == '\n' ) {
                                // end of line
                                // end of line
                                return true ;
                                return true ;
                        } else if ( *s == ' ' || iscntrl( *s ) ) {
                        } else if ( *s == ' ' || iscntrl( *s ) ) {
                                // white space, 'space' and all control characters, except \0, \r and \n
                                // white space, 'space' and all control characters, except \0, \r and \n
                                s++ ;
                                s++ ;
                        } else if ( mode && ( isalnum( *s ) ) ) {
                        } else if ( mode && ( isalnum( *s ) || *s == '_'  ) ) {
                                // KCPSM mode, all alphanum is accepted for idents, could be hex values
                                // KCPSM mode, all alphanum is accepted for idents, could be hex values
                                // ident
                                // ident
                                start = s++ ;
                                start = s++ ;
                                state = lsIdent ;
                                state = lsIdent ;
                        } else if ( !mode && ( isalpha( *s ) || *s == '_' ) ) {
                        } else if ( !mode && ( isalpha( *s ) || *s == '_' ) ) {
                                // ident
                                // ident
                                start = s++ ;
                                start = s++ ;
                                state = lsIdent ;
                                state = lsIdent ;
                        } else if ( *s == ';' ) {
                        } else if ( *s == ';' ) {
                                // comment
                                // comment
                                start = s++ ;
                                start = s++ ;
                                state = lsComment ;
                                state = lsComment ;
                        } else if ( *s == '0' ) {
                        } else if ( *s == '0' ) {
                                // maybe hex or bin
                                // maybe hex or bin
                                start = s++ ;
                                start = s++ ;
                                state = lsHexBin ;
                                state = lsHexBin ;
                        } else if ( isdigit( *s ) ) {
                        } else if ( isdigit( *s ) ) {
                                // decimal number
                                // decimal number
                                start = s++ ;
                                start = s++ ;
                                state = lsDec ;
                                state = lsDec ;
                        } else if ( *s == '$' ) {
                        } else if ( *s == '$' ) {
                                // hexadecimal number
                                // hexadecimal number
                                start = ++s ;
                                start = ++s ;
                                state = lsHex ;
                                state = lsHex ;
                        } else if ( *s == '%' ) {
                        } else if ( *s == '%' ) {
                                // binary number
                                // binary number
                                start = ++s ;
                                start = ++s ;
                                state = lsBin ;
                                state = lsBin ;
                        } else if ( *s == '.' ) {
                        } else if ( *s == '.' ) {
                                // directives, indexing, local labels, etc
                                // directives, indexing, local labels, etc
                                start = s++ ;
                                start = s++ ;
                                state = lsIndex ;
                                state = lsIndex ;
                        } else if ( *s == ':' || *s == ',' || *s == '(' || *s == ')' ) {
                        } else if ( *s == ':' || *s == ',' || *s == '(' || *s == ')' ) {
                                // punctuation ',', ':', '(', ')', '~'
                                // punctuation ',', ':', '(', ')', '~'
                                start = s++ ;
                                start = s++ ;
                                state = lsPunct ;
                                state = lsPunct ;
                        } else if ( *s == '*' || *s == '/' || *s == '#' || *s == '+' || *s == '-' ||
                        } else if ( *s == '*' || *s == '/' || *s == '#' || *s == '+' || *s == '-' ||
                                        *s == '|' || *s == '&' || *s == '^' || *s == '~' ) {
                                        *s == '|' || *s == '&' || *s == '^' || *s == '~' ) {
                                // operators
                                // operators
                                start = s++ ;
                                start = s++ ;
                                state = lsOperator ;
                                state = lsOperator ;
                        } else if ( *s == '<' || *s == '>' ) {
                        } else if ( *s == '<' || *s == '>' ) {
                                // double char operators
                                // double char operators
                                start = s++ ;
                                start = s++ ;
                                state = lsDoubleOp ;
                                state = lsDoubleOp ;
                        } else if ( *s == '\'' ) {
                        } else if ( *s == '\'' ) {
                                // 'c'
                                // 'c'
                                start = ++s ;
                                start = ++s ;
                                state = lsChar ;
                                state = lsChar ;
                        } else if ( *s == '"' ) {
                        } else if ( *s == '"' ) {
                                // "string"
                                // "string"
                                start = ++s ;
                                start = ++s ;
                                state = lsString ;
                                state = lsString ;
                        } else
                        } else
                                state = lsError ;
                                state = lsError ;
                        break ;
                        break ;
 
 
                case lsComment :
                case lsComment :
                        if ( *s != '\0' && *s != '\r' && *s != '\n' )
                        if ( *s != '\0' && *s != '\r' && *s != '\n' )
                                // anything till end of line
                                // anything till end of line
                                s++ ;
                                s++ ;
                        else {
                        else {
                                end = s ;
                                end = s ;
                                ptok->type = tNONE ;
                                ptok->type = tNONE ;
                                ptok->subtype = stCOMMENT ;
                                ptok->subtype = stCOMMENT ;
                                state = lsCopy ;
                                state = lsCopy ;
                        }
                        }
                        break ;
                        break ;
 
 
                case lsChar :
                case lsChar :
                        if ( *s == '\'' ) {
                        if ( *s == '\'' ) {
                                ptok->type = tCHAR ;
                                ptok->type = tCHAR ;
                                end = s++ ;
                                end = s++ ;
                                state = lsCopy ;
                                state = lsCopy ;
                        } else if ( *s == '\\' ) {
                        } else if ( *s == '\\' ) {
                                s += 1 ;
                                s += 1 ;
                                if ( *s != '\0' )
                                if ( *s != '\0' )
                                        s += 1 ;
                                        s += 1 ;
                        } else if ( isgraph( *s ) || *s == ' ' ) {
                        } else if ( isgraph( *s ) || *s == ' ' ) {
                                s++ ;
                                s++ ;
                        } else
                        } else
                                state = lsError ;
                                state = lsError ;
                        break ;
                        break ;
 
 
                case lsString :
                case lsString :
                        if ( *s == '"' ) {
                        if ( *s == '"' ) {
                                ptok->type = tSTRING ;
                                ptok->type = tSTRING ;
                                end = s++ ;
                                end = s++ ;
                                state = lsCopy ;
                                state = lsCopy ;
                        } else if ( *s == '\\' ) {
                        } else if ( *s == '\\' ) {
                                s += 1 ;
                                s += 1 ;
                                if ( *s != '\0' )
                                if ( *s != '\0' )
                                        s += 1 ;
                                        s += 1 ;
                        } else if ( isgraph( *s ) || *s == ' ' )
                        } else if ( isgraph( *s ) || *s == ' ' )
                                s++ ;
                                s++ ;
                        else
                        else
                                state = lsError ;
                                state = lsError ;
                        break ;
                        break ;
 
 
                case lsIdent :
                case lsIdent :
                        if ( isalnum( *s ) || *s == '_' )
                        if ( isalnum( *s ) || *s == '_' )
                                s++ ;
                                s++ ;
                        else {
                        else {
                                end = s ;
                                end = s ;
                                ptok->type = tIDENT ;
                                ptok->type = tIDENT ;
                                ptok->subtype = stNONE ;
                                ptok->subtype = stNONE ;
                                state = lsCopy ;
                                state = lsCopy ;
                        }
                        }
                        break ;
                        break ;
 
 
                case lsHexBin :
                case lsHexBin :
                        if ( *s == 'x' ) {
                        if ( *s == 'x' ) {
                                start = ++s ;
                                start = ++s ;
                                state = lsHex ;
                                state = lsHex ;
                        } else if ( *s == 'b' ) {
                        } else if ( *s == 'b' ) {
                                start = ++s ;
                                start = ++s ;
                                state = lsBin ;
                                state = lsBin ;
                        } else
                        } else
                                // missing the first '0' doesn't hurt here
                                // missing the first '0' doesn't hurt here
                                state = lsDec ;
                                state = lsDec ;
                        break ;
                        break ;
 
 
                case lsHex :
                case lsHex :
                        if ( isxdigit( *s ) )
                        if ( isxdigit( *s ) )
                                s++ ;
                                s++ ;
                        else {
                        else {
                                end = s ;
                                end = s ;
                                ptok->type = tHEX ;
                                ptok->type = tHEX ;
                                state = lsCopy ;
                                state = lsCopy ;
                        }
                        }
                        break ;
                        break ;
 
 
                case lsBin :
                case lsBin :
                        if ( *s == '0' || *s == '1' )
                        if ( *s == '0' || *s == '1' )
                                s++ ;
                                s++ ;
                        else {
                        else {
                                end = s ;
                                end = s ;
                                ptok->type = tBIN ;
                                ptok->type = tBIN ;
                                state = lsCopy ;
                                state = lsCopy ;
                        }
                        }
                        break ;
                        break ;
 
 
                case lsDec :
                case lsDec :
                        if ( isdigit( *s ) )
                        if ( isdigit( *s ) )
                                s++ ;
                                s++ ;
                        else {
                        else {
                                end = s ;
                                end = s ;
                                ptok->type = tDEC ;
                                ptok->type = tDEC ;
                                state = lsCopy ;
                                state = lsCopy ;
                        }
                        }
                        break ;
                        break ;
 
 
                case lsOperator :
                case lsOperator :
                        ptok->type = tOPERATOR ;
                        ptok->type = tOPERATOR ;
                        switch ( *start ) {
                        switch ( *start ) {
                        case '*' :
                        case '*' :
                                ptok->subtype = stMUL ;
                                ptok->subtype = stMUL ;
                                break ;
                                break ;
                        case '/' :
                        case '/' :
                                ptok->subtype = stDIV ;
                                ptok->subtype = stDIV ;
                                break ;
                                break ;
                        case '#' :
                        case '#' :
                                ptok->subtype = stMOD ;
                                ptok->subtype = stMOD ;
                                break ;
                                break ;
                        case '+' :
                        case '+' :
                                ptok->subtype = stADD ;
                                ptok->subtype = stADD ;
                                break ;
                                break ;
                        case '-' :
                        case '-' :
                                ptok->subtype = stSUB ;
                                ptok->subtype = stSUB ;
                                break ;
                                break ;
                        case '|' :
                        case '|' :
                                ptok->subtype = stIOR ;
                                ptok->subtype = stIOR ;
                                break ;
                                break ;
                        case '&' :
                        case '&' :
                                ptok->subtype = stAND ;
                                ptok->subtype = stAND ;
                                break ;
                                break ;
                        case '^' :
                        case '^' :
                                ptok->subtype = stXOR ;
                                ptok->subtype = stXOR ;
                                break ;
                                break ;
                        case '~' :
                        case '~' :
                                ptok->subtype = stTILDA ;
                                ptok->subtype = stTILDA ;
                                break ;
                                break ;
                        }
                        }
                        end = s ;
                        end = s ;
                        state = lsCopy ;
                        state = lsCopy ;
                        break ;
                        break ;
 
 
                case lsDoubleOp :
                case lsDoubleOp :
                        if ( *start == *s ) { // << or >>
                        if ( *start == *s ) { // << or >>
                                ptok->type = tOPERATOR ;
                                ptok->type = tOPERATOR ;
                                switch ( *start ) {
                                switch ( *start ) {
                                case '<' :
                                case '<' :
                                        ptok->subtype = stSHL ;
                                        ptok->subtype = stSHL ;
                                        break ;
                                        break ;
                                case '>' :
                                case '>' :
                                        ptok->subtype = stSHR ;
                                        ptok->subtype = stSHR ;
                                        break ;
                                        break ;
                                }
                                }
                                end = ++s ;
                                end = ++s ;
                                state = lsCopy ;
                                state = lsCopy ;
                        } else
                        } else
                                state = lsError ;
                                state = lsError ;
                        break ;
                        break ;
 
 
                case lsPunct :
                case lsPunct :
                        end = s ;
                        end = s ;
                        state = lsCopy ;
                        state = lsCopy ;
                        switch ( *start ) {
                        switch ( *start ) {
                        case ':' :
                        case ':' :
                                ptok->type = tCOLON ;
                                ptok->type = tCOLON ;
                                break ;
                                break ;
                        case '(' :
                        case '(' :
                                ptok->type = tLPAREN ;
                                ptok->type = tLPAREN ;
                                break ;
                                break ;
                        case ')' :
                        case ')' :
                                ptok->type = tRPAREN ;
                                ptok->type = tRPAREN ;
                                break ;
                                break ;
                        case ',' :
                        case ',' :
                                ptok->type = tCOMMA ;
                                ptok->type = tCOMMA ;
                                break ;
                                break ;
                        default :
                        default :
                                state = lsError ;
                                state = lsError ;
                        }
                        }
                        break ;
                        break ;
 
 
                case lsIndex :
                case lsIndex :
                        // any of .IX, .IX++, .--IX, .-IX+
                        // any of .IX, .IX++, .--IX, .-IX+
                        if ( isalnum( *s ) || *s == '-' || *s == '+' )
                        if ( isalnum( *s ) || *s == '-' || *s == '+' )
                                s++ ;
                                s++ ;
                        else {
                        else {
                                end = s ;
                                end = s ;
                                ptok->type = tIDENT ;
                                ptok->type = tIDENT ;
                                ptok->subtype = stDOT ;
                                ptok->subtype = stDOT ;
                                state = lsCopy ;
                                state = lsCopy ;
                        }
                        }
                        break ;
                        break ;
 
 
                        // final token collector
                        // final token collector
                case lsCopy :
                case lsCopy :
                        while ( start < end )
                        while ( start < end )
                                *pterm++ = *start++ ;
                                *pterm++ = *start++ ;
                        *pterm = '\0' ;
                        *pterm = '\0' ;
                        ptok->text = strdup( term ) ;
                        ptok->text = strdup( term ) ;
                        ptok++ ;
                        ptok++ ;
                        state = lsInit ;
                        state = lsInit ;
                        break ;
                        break ;
 
 
                        // any errors
                        // any errors
                case lsError :
                case lsError :
                        *pterm = '\0' ;
                        *pterm = '\0' ;
                        ptok->type = tERROR ;
                        ptok->type = tERROR ;
                        return false ;
                        return false ;
                }
                }
        }
        }
        return false ;
        return false ;
}
}
 
 

powered by: WebSVN 2.1.0

© copyright 1999-2024 OpenCores.org, equivalent to Oliscience, all rights reserved. OpenCores®, registered trademark.