OpenCores

Rev 2	Rev 46
`/*`	`/*`
`* Copyright � 2003..2010 : Henk van Kampen <henk@mediatronix.com>`	`* Copyright � 2003..2010 : Henk van Kampen <henk@mediatronix.com>`
`*`	`*`
`* This file is part of pBlazASM.`	`* This file is part of pBlazASM.`
`*`	`*`
`* pBlazASM is free software: you can redistribute it and/or modify`	`* pBlazASM is free software: you can redistribute it and/or modify`
`* it under the terms of the GNU General Public License as published by`	`* it under the terms of the GNU General Public License as published by`
`* the Free Software Foundation, either version 3 of the License, or`	`* the Free Software Foundation, either version 3 of the License, or`
`* (at your option) any later version.`	`* (at your option) any later version.`
`*`	`*`
`* pBlazASM is distributed in the hope that it will be useful,`	`* pBlazASM is distributed in the hope that it will be useful,`
`* but WITHOUT ANY WARRANTY; without even the implied warranty of`	`* but WITHOUT ANY WARRANTY; without even the implied warranty of`
`* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the`	`* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the`
`* GNU General Public License for more details.`	`* GNU General Public License for more details.`
`*`	`*`
`* You should have received a copy of the GNU General Public License`	`* You should have received a copy of the GNU General Public License`
`* along with pBlazASM. If not, see <http://www.gnu.org/licenses/>.`	`* along with pBlazASM. If not, see <http://www.gnu.org/licenses/>.`
`*/`	`*/`

`#include <ctype.h>`	`#include <ctype.h>`
`#include <string.h>`	`#include <string.h>`
`#include <stdint.h>`	`#include <stdint.h>`
`#include <stdlib.h>`	`#include <stdlib.h>`

`#include "pbTypes.h"`	`#include "pbTypes.h"`
`#include "pbErrors.h"`	`#include "pbErrors.h"`

`// lexer states`	`// lexer states`
`typedef enum {`	`typedef enum {`
`lsBin,`	`lsBin,`
`lsChar,`	`lsChar,`
`lsComment,`	`lsComment,`
`lsDec,`	`lsDec,`
`lsCopy,`	`lsCopy,`
`lsError,`	`lsError,`
`lsHex,`	`lsHex,`
`lsHexBin,`	`lsHexBin,`
`lsIdent,`	`lsIdent,`
`lsIdle,`	`lsIdle,`
`lsInit,`	`lsInit,`
`lsOperator,`	`lsOperator,`
`lsDoubleOp,`	`lsDoubleOp,`
`lsPunct,`	`lsPunct,`
`lsIndex,`	`lsIndex,`
`lsString`	`lsString`
`} LexState ;`	`} LexState ;`

`// global token list`	`// global token list`
`static symbol_t tokens[ 256 ] ; // global token list`	`static symbol_t tokens[ 256 ] ; // global token list`
`static symbol_t * ptok = 0 ; // pointer to current token, index in 'tokens[]'`	`static symbol_t * ptok = 0 ; // pointer to current token, index in 'tokens[]'`

`symbol_t * tok_first( void ) {`	`symbol_t * tok_first( void ) {`
`ptok = tokens ;`	`ptok = tokens ;`
`return ptok ;`	`return ptok ;`
`}`	`}`

`symbol_t * tok_current( void ) {`	`symbol_t * tok_current( void ) {`
`return ptok ;`	`return ptok ;`
`}`	`}`

`symbol_t * tok_next( void ) {`	`symbol_t * tok_next( void ) {`
`if ( ptok < &tokens[ 256 ] )`	`if ( ptok < &tokens[ 256 ] )`
`return ptok++ ;`	`return ptok++ ;`
`else {`	`else {`
`ptok->type = tNONE ;`	`ptok->type = tNONE ;`
`return ptok ;`	`return ptok ;`
`}`	`}`
`}`	`}`

`void tok_back(symbol_t * back ){`	`void tok_back(symbol_t * back ){`
`ptok = back ;`	`ptok = back ;`
`}`	`}`

`void tok_free( void ) {`	`void tok_free( void ) {`
`for ( ptok = tokens ; ptok->text != NULL ; ptok++ ) {`	`for ( ptok = tokens ; ptok->text != NULL ; ptok++ ) {`
`free( ptok->text ) ;`	`free( ptok->text ) ;`

`ptok->type = tNONE ;`	`ptok->type = tNONE ;`
`ptok->subtype = stNONE ;`	`ptok->subtype = stNONE ;`
`ptok->text = NULL ;`	`ptok->text = NULL ;`
`ptok->value = 0 ;`	`ptok->value = 0 ;`
`}`	`}`
`}`	`}`

`// state machine based lexer`	`// state machine based lexer`
`// tokens are recorded in 'tokens', ended by a NONE token`	`// tokens are recorded in 'tokens', ended by a NONE token`
`bool lex( char * line, const bool mode ) {`	`bool lex( char * line, const bool mode ) {`
`char * start = NULL, end = NULL, s = line ;`	`char * start = NULL, end = NULL, s = line ;`
`char term[ 256 ], *pterm = NULL ;`	`char term[ 256 ], *pterm = NULL ;`
`LexState state = lsInit ;`	`LexState state = lsInit ;`

`// state machine`	`// state machine`
`for ( ptok = tokens ; ptok < &tokens[ 256 ] ; ) {`	`for ( ptok = tokens ; ptok < &tokens[ 256 ] ; ) {`
`switch ( state ) {`	`switch ( state ) {`
`case lsInit :`	`case lsInit :`
`ptok->type = tNONE ;`	`ptok->type = tNONE ;`
`ptok->subtype = stNONE ;`	`ptok->subtype = stNONE ;`
`ptok->value = 0 ;`	`ptok->value = 0 ;`
`ptok->text = NULL ;`	`ptok->text = NULL ;`

`pterm = term ;`	`pterm = term ;`
`*pterm = '\0' ;`	`*pterm = '\0' ;`
`state = lsIdle ;`	`state = lsIdle ;`
`break ;`	`break ;`

`case lsIdle :`	`case lsIdle :`
`// starting characters of tokens to be`	`// starting characters of tokens to be`
`if ( s == '\0' \|\| s == '\r' \|\| *s == '\n' ) {`	`if ( s == '\0' \|\| s == '\r' \|\| *s == '\n' ) {`
`// end of line`	`// end of line`
`return true ;`	`return true ;`
`} else if ( s == ' ' \|\| iscntrl( s ) ) {`	`} else if ( s == ' ' \|\| iscntrl( s ) ) {`
`// white space, 'space' and all control characters, except \0, \r and \n`	`// white space, 'space' and all control characters, except \0, \r and \n`
`s++ ;`	`s++ ;`
`} else if ( mode && ( isalnum( *s ) ) ) {`	`} else if ( mode && ( isalnum( s ) \|\| s == '_' ) ) {`
`// KCPSM mode, all alphanum is accepted for idents, could be hex values`	`// KCPSM mode, all alphanum is accepted for idents, could be hex values`
`// ident`	`// ident`
`start = s++ ;`	`start = s++ ;`
`state = lsIdent ;`	`state = lsIdent ;`
`} else if ( !mode && ( isalpha( s ) \|\| s == '_' ) ) {`	`} else if ( !mode && ( isalpha( s ) \|\| s == '_' ) ) {`
`// ident`	`// ident`
`start = s++ ;`	`start = s++ ;`
`state = lsIdent ;`	`state = lsIdent ;`
`} else if ( *s == ';' ) {`	`} else if ( *s == ';' ) {`
`// comment`	`// comment`
`start = s++ ;`	`start = s++ ;`
`state = lsComment ;`	`state = lsComment ;`
`} else if ( *s == '0' ) {`	`} else if ( *s == '0' ) {`
`// maybe hex or bin`	`// maybe hex or bin`
`start = s++ ;`	`start = s++ ;`
`state = lsHexBin ;`	`state = lsHexBin ;`
`} else if ( isdigit( *s ) ) {`	`} else if ( isdigit( *s ) ) {`
`// decimal number`	`// decimal number`
`start = s++ ;`	`start = s++ ;`
`state = lsDec ;`	`state = lsDec ;`
`} else if ( *s == '$' ) {`	`} else if ( *s == '$' ) {`
`// hexadecimal number`	`// hexadecimal number`
`start = ++s ;`	`start = ++s ;`
`state = lsHex ;`	`state = lsHex ;`
`} else if ( *s == '%' ) {`	`} else if ( *s == '%' ) {`
`// binary number`	`// binary number`
`start = ++s ;`	`start = ++s ;`
`state = lsBin ;`	`state = lsBin ;`
`} else if ( *s == '.' ) {`	`} else if ( *s == '.' ) {`
`// directives, indexing, local labels, etc`	`// directives, indexing, local labels, etc`
`start = s++ ;`	`start = s++ ;`
`state = lsIndex ;`	`state = lsIndex ;`
`} else if ( s == ':' \|\| s == ',' \|\| s == '(' \|\| s == ')' ) {`	`} else if ( s == ':' \|\| s == ',' \|\| s == '(' \|\| s == ')' ) {`
`// punctuation ',', ':', '(', ')', '~'`	`// punctuation ',', ':', '(', ')', '~'`
`start = s++ ;`	`start = s++ ;`
`state = lsPunct ;`	`state = lsPunct ;`
`} else if ( s == '' \|\| s == '/' \|\| s == '#' \|\| s == '+' \|\| s == '-' \|\|`	`} else if ( s == '' \|\| s == '/' \|\| s == '#' \|\| s == '+' \|\| s == '-' \|\|`
`s == '\|' \|\| s == '&' \|\| s == '^' \|\| s == '~' ) {`	`s == '\|' \|\| s == '&' \|\| s == '^' \|\| s == '~' ) {`
`// operators`	`// operators`
`start = s++ ;`	`start = s++ ;`
`state = lsOperator ;`	`state = lsOperator ;`
`} else if ( s == '<' \|\| s == '>' ) {`	`} else if ( s == '<' \|\| s == '>' ) {`
`// double char operators`	`// double char operators`
`start = s++ ;`	`start = s++ ;`
`state = lsDoubleOp ;`	`state = lsDoubleOp ;`
`} else if ( *s == '\'' ) {`	`} else if ( *s == '\'' ) {`
`// 'c'`	`// 'c'`
`start = ++s ;`	`start = ++s ;`
`state = lsChar ;`	`state = lsChar ;`
`} else if ( *s == '"' ) {`	`} else if ( *s == '"' ) {`
`// "string"`	`// "string"`
`start = ++s ;`	`start = ++s ;`
`state = lsString ;`	`state = lsString ;`
`} else`	`} else`
`state = lsError ;`	`state = lsError ;`
`break ;`	`break ;`

`case lsComment :`	`case lsComment :`
`if ( s != '\0' && s != '\r' && *s != '\n' )`	`if ( s != '\0' && s != '\r' && *s != '\n' )`
`// anything till end of line`	`// anything till end of line`
`s++ ;`	`s++ ;`
`else {`	`else {`
`end = s ;`	`end = s ;`
`ptok->type = tNONE ;`	`ptok->type = tNONE ;`
`ptok->subtype = stCOMMENT ;`	`ptok->subtype = stCOMMENT ;`
`state = lsCopy ;`	`state = lsCopy ;`
`}`	`}`
`break ;`	`break ;`

`case lsChar :`	`case lsChar :`
`if ( *s == '\'' ) {`	`if ( *s == '\'' ) {`
`ptok->type = tCHAR ;`	`ptok->type = tCHAR ;`
`end = s++ ;`	`end = s++ ;`
`state = lsCopy ;`	`state = lsCopy ;`
`} else if ( *s == '\\' ) {`	`} else if ( *s == '\\' ) {`
`s += 1 ;`	`s += 1 ;`
`if ( *s != '\0' )`	`if ( *s != '\0' )`
`s += 1 ;`	`s += 1 ;`
`} else if ( isgraph( s ) \|\| s == ' ' ) {`	`} else if ( isgraph( s ) \|\| s == ' ' ) {`
`s++ ;`	`s++ ;`
`} else`	`} else`
`state = lsError ;`	`state = lsError ;`
`break ;`	`break ;`

`case lsString :`	`case lsString :`
`if ( *s == '"' ) {`	`if ( *s == '"' ) {`
`ptok->type = tSTRING ;`	`ptok->type = tSTRING ;`
`end = s++ ;`	`end = s++ ;`
`state = lsCopy ;`	`state = lsCopy ;`
`} else if ( *s == '\\' ) {`	`} else if ( *s == '\\' ) {`
`s += 1 ;`	`s += 1 ;`
`if ( *s != '\0' )`	`if ( *s != '\0' )`
`s += 1 ;`	`s += 1 ;`
`} else if ( isgraph( s ) \|\| s == ' ' )`	`} else if ( isgraph( s ) \|\| s == ' ' )`
`s++ ;`	`s++ ;`
`else`	`else`
`state = lsError ;`	`state = lsError ;`
`break ;`	`break ;`

`case lsIdent :`	`case lsIdent :`
`if ( isalnum( s ) \|\| s == '_' )`	`if ( isalnum( s ) \|\| s == '_' )`
`s++ ;`	`s++ ;`
`else {`	`else {`
`end = s ;`	`end = s ;`
`ptok->type = tIDENT ;`	`ptok->type = tIDENT ;`
`ptok->subtype = stNONE ;`	`ptok->subtype = stNONE ;`
`state = lsCopy ;`	`state = lsCopy ;`
`}`	`}`
`break ;`	`break ;`

`case lsHexBin :`	`case lsHexBin :`
`if ( *s == 'x' ) {`	`if ( *s == 'x' ) {`
`start = ++s ;`	`start = ++s ;`
`state = lsHex ;`	`state = lsHex ;`
`} else if ( *s == 'b' ) {`	`} else if ( *s == 'b' ) {`
`start = ++s ;`	`start = ++s ;`
`state = lsBin ;`	`state = lsBin ;`
`} else`	`} else`
`// missing the first '0' doesn't hurt here`	`// missing the first '0' doesn't hurt here`
`state = lsDec ;`	`state = lsDec ;`
`break ;`	`break ;`

`case lsHex :`	`case lsHex :`
`if ( isxdigit( *s ) )`	`if ( isxdigit( *s ) )`
`s++ ;`	`s++ ;`
`else {`	`else {`
`end = s ;`	`end = s ;`
`ptok->type = tHEX ;`	`ptok->type = tHEX ;`
`state = lsCopy ;`	`state = lsCopy ;`
`}`	`}`
`break ;`	`break ;`

`case lsBin :`	`case lsBin :`
`if ( s == '0' \|\| s == '1' )`	`if ( s == '0' \|\| s == '1' )`
`s++ ;`	`s++ ;`
`else {`	`else {`
`end = s ;`	`end = s ;`
`ptok->type = tBIN ;`	`ptok->type = tBIN ;`
`state = lsCopy ;`	`state = lsCopy ;`
`}`	`}`
`break ;`	`break ;`

`case lsDec :`	`case lsDec :`
`if ( isdigit( *s ) )`	`if ( isdigit( *s ) )`
`s++ ;`	`s++ ;`
`else {`	`else {`
`end = s ;`	`end = s ;`
`ptok->type = tDEC ;`	`ptok->type = tDEC ;`
`state = lsCopy ;`	`state = lsCopy ;`
`}`	`}`
`break ;`	`break ;`

`case lsOperator :`	`case lsOperator :`
`ptok->type = tOPERATOR ;`	`ptok->type = tOPERATOR ;`
`switch ( *start ) {`	`switch ( *start ) {`
`case '*' :`	`case '*' :`
`ptok->subtype = stMUL ;`	`ptok->subtype = stMUL ;`
`break ;`	`break ;`
`case '/' :`	`case '/' :`
`ptok->subtype = stDIV ;`	`ptok->subtype = stDIV ;`
`break ;`	`break ;`
`case '#' :`	`case '#' :`
`ptok->subtype = stMOD ;`	`ptok->subtype = stMOD ;`
`break ;`	`break ;`
`case '+' :`	`case '+' :`
`ptok->subtype = stADD ;`	`ptok->subtype = stADD ;`
`break ;`	`break ;`
`case '-' :`	`case '-' :`
`ptok->subtype = stSUB ;`	`ptok->subtype = stSUB ;`
`break ;`	`break ;`
`case '\|' :`	`case '\|' :`
`ptok->subtype = stIOR ;`	`ptok->subtype = stIOR ;`
`break ;`	`break ;`
`case '&' :`	`case '&' :`
`ptok->subtype = stAND ;`	`ptok->subtype = stAND ;`
`break ;`	`break ;`
`case '^' :`	`case '^' :`
`ptok->subtype = stXOR ;`	`ptok->subtype = stXOR ;`
`break ;`	`break ;`
`case '~' :`	`case '~' :`
`ptok->subtype = stTILDA ;`	`ptok->subtype = stTILDA ;`
`break ;`	`break ;`
`}`	`}`
`end = s ;`	`end = s ;`
`state = lsCopy ;`	`state = lsCopy ;`
`break ;`	`break ;`

`case lsDoubleOp :`	`case lsDoubleOp :`
`if ( start == s ) { // << or >>`	`if ( start == s ) { // << or >>`
`ptok->type = tOPERATOR ;`	`ptok->type = tOPERATOR ;`
`switch ( *start ) {`	`switch ( *start ) {`
`case '<' :`	`case '<' :`
`ptok->subtype = stSHL ;`	`ptok->subtype = stSHL ;`
`break ;`	`break ;`
`case '>' :`	`case '>' :`
`ptok->subtype = stSHR ;`	`ptok->subtype = stSHR ;`
`break ;`	`break ;`
`}`	`}`
`end = ++s ;`	`end = ++s ;`
`state = lsCopy ;`	`state = lsCopy ;`
`} else`	`} else`
`state = lsError ;`	`state = lsError ;`
`break ;`	`break ;`

`case lsPunct :`	`case lsPunct :`
`end = s ;`	`end = s ;`
`state = lsCopy ;`	`state = lsCopy ;`
`switch ( *start ) {`	`switch ( *start ) {`
`case ':' :`	`case ':' :`
`ptok->type = tCOLON ;`	`ptok->type = tCOLON ;`
`break ;`	`break ;`
`case '(' :`	`case '(' :`
`ptok->type = tLPAREN ;`	`ptok->type = tLPAREN ;`
`break ;`	`break ;`
`case ')' :`	`case ')' :`
`ptok->type = tRPAREN ;`	`ptok->type = tRPAREN ;`
`break ;`	`break ;`
`case ',' :`	`case ',' :`
`ptok->type = tCOMMA ;`	`ptok->type = tCOMMA ;`
`break ;`	`break ;`
`default :`	`default :`
`state = lsError ;`	`state = lsError ;`
`}`	`}`
`break ;`	`break ;`

`case lsIndex :`	`case lsIndex :`
`// any of .IX, .IX++, .--IX, .-IX+`	`// any of .IX, .IX++, .--IX, .-IX+`
`if ( isalnum( s ) \|\| s == '-' \|\| *s == '+' )`	`if ( isalnum( s ) \|\| s == '-' \|\| *s == '+' )`
`s++ ;`	`s++ ;`
`else {`	`else {`
`end = s ;`	`end = s ;`
`ptok->type = tIDENT ;`	`ptok->type = tIDENT ;`
`ptok->subtype = stDOT ;`	`ptok->subtype = stDOT ;`
`state = lsCopy ;`	`state = lsCopy ;`
`}`	`}`
`break ;`	`break ;`

`// final token collector`	`// final token collector`
`case lsCopy :`	`case lsCopy :`
`while ( start < end )`	`while ( start < end )`
`pterm++ = start++ ;`	`pterm++ = start++ ;`
`*pterm = '\0' ;`	`*pterm = '\0' ;`
`ptok->text = strdup( term ) ;`	`ptok->text = strdup( term ) ;`
`ptok++ ;`	`ptok++ ;`
`state = lsInit ;`	`state = lsInit ;`
`break ;`	`break ;`

`// any errors`	`// any errors`
`case lsError :`	`case lsError :`
`*pterm = '\0' ;`	`*pterm = '\0' ;`
`ptok->type = tERROR ;`	`ptok->type = tERROR ;`
`return false ;`	`return false ;`
`}`	`}`
`}`	`}`
`return false ;`	`return false ;`
`}`	`}`

/*

/*

 *  Copyright � 2003..2010 : Henk van Kampen <henk@mediatronix.com>

 *  Copyright � 2003..2010 : Henk van Kampen <henk@mediatronix.com>

 *      This file is part of pBlazASM.

 *      This file is part of pBlazASM.

 *  pBlazASM is free software: you can redistribute it and/or modify

 *  pBlazASM is free software: you can redistribute it and/or modify

 *  it under the terms of the GNU General Public License as published by

 *  it under the terms of the GNU General Public License as published by

 *  the Free Software Foundation, either version 3 of the License, or

 *  the Free Software Foundation, either version 3 of the License, or

 *  (at your option) any later version.

 *  (at your option) any later version.

 *  pBlazASM is distributed in the hope that it will be useful,

 *  pBlazASM is distributed in the hope that it will be useful,

 *  but WITHOUT ANY WARRANTY; without even the implied warranty of

 *  but WITHOUT ANY WARRANTY; without even the implied warranty of

 *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the

 *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the

 *  GNU General Public License for more details.

 *  GNU General Public License for more details.

 *  You should have received a copy of the GNU General Public License

 *  You should have received a copy of the GNU General Public License

 *  along with pBlazASM.  If not, see <http://www.gnu.org/licenses/>.

 *  along with pBlazASM.  If not, see <http://www.gnu.org/licenses/>.

*/

*/

#include <ctype.h>

#include <ctype.h>

#include <string.h>

#include <string.h>

#include <stdint.h>

#include <stdint.h>

#include <stdlib.h>

#include <stdlib.h>

#include "pbTypes.h"

#include "pbTypes.h"

#include "pbErrors.h"

#include "pbErrors.h"

// lexer states

// lexer states

typedef enum {

typedef enum {

        lsBin,

        lsBin,

        lsChar,

        lsChar,

        lsComment,

        lsComment,

        lsDec,

        lsDec,

        lsCopy,

        lsCopy,

        lsError,

        lsError,

        lsHex,

        lsHex,

        lsHexBin,

        lsHexBin,

        lsIdent,

        lsIdent,

        lsIdle,

        lsIdle,

        lsInit,

        lsInit,

        lsOperator,

        lsOperator,

        lsDoubleOp,

        lsDoubleOp,

        lsPunct,

        lsPunct,

        lsIndex,

        lsIndex,

        lsString

        lsString

} LexState ;

} LexState ;

// global token list

// global token list

static symbol_t tokens[ 256 ] ; // global token list

static symbol_t tokens[ 256 ] ; // global token list

static symbol_t * ptok = 0 ; // pointer to current token, index in 'tokens[]'

static symbol_t * ptok = 0 ; // pointer to current token, index in 'tokens[]'

symbol_t * tok_first( void ) {

symbol_t * tok_first( void ) {

        ptok = tokens ;

        ptok = tokens ;

        return ptok ;

        return ptok ;

symbol_t * tok_current( void ) {

symbol_t * tok_current( void ) {

        return ptok ;

        return ptok ;

symbol_t * tok_next( void ) {

symbol_t * tok_next( void ) {

        if ( ptok < &tokens[ 256 ] )

        if ( ptok < &tokens[ 256 ] )

                return ptok++ ;

                return ptok++ ;

        else {

        else {

                ptok->type = tNONE ;

                ptok->type = tNONE ;

                return ptok ;

                return ptok ;

void tok_back(symbol_t * back ){

void tok_back(symbol_t * back ){

        ptok = back ;

        ptok = back ;

void tok_free( void ) {

void tok_free( void ) {

        for ( ptok = tokens ; ptok->text != NULL ; ptok++ ) {

        for ( ptok = tokens ; ptok->text != NULL ; ptok++ ) {

                free( ptok->text ) ;

                free( ptok->text ) ;

                ptok->type = tNONE ;

                ptok->type = tNONE ;

                ptok->subtype = stNONE ;

                ptok->subtype = stNONE ;

                ptok->text = NULL ;

                ptok->text = NULL ;

                ptok->value = 0 ;

                ptok->value = 0 ;

// state machine based lexer

// state machine based lexer

// tokens are recorded in 'tokens', ended by a NONE token

// tokens are recorded in 'tokens', ended by a NONE token

bool lex( char * line, const bool mode ) {

bool lex( char * line, const bool mode ) {

        char * start = NULL, *end = NULL, *s = line ;

        char * start = NULL, *end = NULL, *s = line ;

        char term[ 256 ], *pterm = NULL ;

        char term[ 256 ], *pterm = NULL ;

        LexState state = lsInit ;

        LexState state = lsInit ;

        // state machine

        // state machine

        for ( ptok = tokens ; ptok < &tokens[ 256 ] ; ) {

        for ( ptok = tokens ; ptok < &tokens[ 256 ] ; ) {

                switch ( state ) {

                switch ( state ) {

                case lsInit :

                case lsInit :

                        ptok->type = tNONE ;

                        ptok->type = tNONE ;

                        ptok->subtype = stNONE ;

                        ptok->subtype = stNONE ;

                        ptok->value = 0 ;

                        ptok->value = 0 ;

                        ptok->text = NULL ;

                        ptok->text = NULL ;

                        pterm = term ;

                        pterm = term ;

                        *pterm = '\0' ;

                        *pterm = '\0' ;

                        state = lsIdle ;

                        state = lsIdle ;

                        break ;

                        break ;

                case lsIdle :

                case lsIdle :

                        // starting characters of tokens to be

                        // starting characters of tokens to be

                        if ( *s == '\0' || *s == '\r' || *s == '\n' ) {

                        if ( *s == '\0' || *s == '\r' || *s == '\n' ) {

                                // end of line

                                // end of line

                                return true ;

                                return true ;

                        } else if ( *s == ' ' || iscntrl( *s ) ) {

                        } else if ( *s == ' ' || iscntrl( *s ) ) {

                                // white space, 'space' and all control characters, except \0, \r and \n

                                // white space, 'space' and all control characters, except \0, \r and \n

                                s++ ;

                                s++ ;

                        } else if ( mode && ( isalnum( *s ) ) ) {

                        } else if ( mode && ( isalnum( *s ) || *s == '_'  ) ) {

                                // KCPSM mode, all alphanum is accepted for idents, could be hex values

                                // KCPSM mode, all alphanum is accepted for idents, could be hex values

                                // ident

                                // ident

                                start = s++ ;

                                start = s++ ;

                                state = lsIdent ;

                                state = lsIdent ;

                        } else if ( !mode && ( isalpha( *s ) || *s == '_' ) ) {

                        } else if ( !mode && ( isalpha( *s ) || *s == '_' ) ) {

                                // ident

                                // ident

                                start = s++ ;

                                start = s++ ;

                                state = lsIdent ;

                                state = lsIdent ;

                        } else if ( *s == ';' ) {

                        } else if ( *s == ';' ) {

                                // comment

                                // comment

                                start = s++ ;

                                start = s++ ;

                                state = lsComment ;

                                state = lsComment ;

                        } else if ( *s == '0' ) {

                        } else if ( *s == '0' ) {

                                // maybe hex or bin

                                // maybe hex or bin

                                start = s++ ;

                                start = s++ ;

                                state = lsHexBin ;

                                state = lsHexBin ;

                        } else if ( isdigit( *s ) ) {

                        } else if ( isdigit( *s ) ) {

                                // decimal number

                                // decimal number

                                start = s++ ;

                                start = s++ ;

                                state = lsDec ;

                                state = lsDec ;

                        } else if ( *s == '$' ) {

                        } else if ( *s == '$' ) {

                                // hexadecimal number

                                // hexadecimal number

                                start = ++s ;

                                start = ++s ;

                                state = lsHex ;

                                state = lsHex ;

                        } else if ( *s == '%' ) {

                        } else if ( *s == '%' ) {

                                // binary number

                                // binary number

                                start = ++s ;

                                start = ++s ;

                                state = lsBin ;

                                state = lsBin ;

                        } else if ( *s == '.' ) {

                        } else if ( *s == '.' ) {

                                // directives, indexing, local labels, etc

                                // directives, indexing, local labels, etc

                                start = s++ ;

                                start = s++ ;

                                state = lsIndex ;

                                state = lsIndex ;

                        } else if ( *s == ':' || *s == ',' || *s == '(' || *s == ')' ) {

                        } else if ( *s == ':' || *s == ',' || *s == '(' || *s == ')' ) {

                                // punctuation ',', ':', '(', ')', '~'

                                // punctuation ',', ':', '(', ')', '~'

                                start = s++ ;

                                start = s++ ;

                                state = lsPunct ;

                                state = lsPunct ;

                        } else if ( *s == '*' || *s == '/' || *s == '#' || *s == '+' || *s == '-' ||

                        } else if ( *s == '*' || *s == '/' || *s == '#' || *s == '+' || *s == '-' ||

                                        *s == '|' || *s == '&' || *s == '^' || *s == '~' ) {

                                        *s == '|' || *s == '&' || *s == '^' || *s == '~' ) {

                                // operators

                                // operators

                                start = s++ ;

                                start = s++ ;

                                state = lsOperator ;

                                state = lsOperator ;

                        } else if ( *s == '<' || *s == '>' ) {

                        } else if ( *s == '<' || *s == '>' ) {

                                // double char operators

                                // double char operators

                                start = s++ ;

                                start = s++ ;

                                state = lsDoubleOp ;

                                state = lsDoubleOp ;

                        } else if ( *s == '\'' ) {

                        } else if ( *s == '\'' ) {

                                // 'c'

                                // 'c'

                                start = ++s ;

                                start = ++s ;

                                state = lsChar ;

                                state = lsChar ;

                        } else if ( *s == '"' ) {

                        } else if ( *s == '"' ) {

                                // "string"

                                // "string"

                                start = ++s ;

                                start = ++s ;

                                state = lsString ;

                                state = lsString ;

                        } else

                        } else

                                state = lsError ;

                                state = lsError ;

                        break ;

                        break ;

                case lsComment :

                case lsComment :

                        if ( *s != '\0' && *s != '\r' && *s != '\n' )

                        if ( *s != '\0' && *s != '\r' && *s != '\n' )

                                // anything till end of line

                                // anything till end of line

                                s++ ;

                                s++ ;

                        else {

                        else {

                                end = s ;

                                end = s ;

                                ptok->type = tNONE ;

                                ptok->type = tNONE ;

                                ptok->subtype = stCOMMENT ;

                                ptok->subtype = stCOMMENT ;

                                state = lsCopy ;

                                state = lsCopy ;

                        break ;

                        break ;

                case lsChar :

                case lsChar :

                        if ( *s == '\'' ) {

                        if ( *s == '\'' ) {

                                ptok->type = tCHAR ;

                                ptok->type = tCHAR ;

                                end = s++ ;

                                end = s++ ;

                                state = lsCopy ;

                                state = lsCopy ;

                        } else if ( *s == '\\' ) {

                        } else if ( *s == '\\' ) {

                                s += 1 ;

                                s += 1 ;

                                if ( *s != '\0' )

                                if ( *s != '\0' )

                                        s += 1 ;

                                        s += 1 ;

                        } else if ( isgraph( *s ) || *s == ' ' ) {

                        } else if ( isgraph( *s ) || *s == ' ' ) {

                                s++ ;

                                s++ ;

                        } else

                        } else

                                state = lsError ;

                                state = lsError ;

                        break ;

                        break ;

                case lsString :

                case lsString :

                        if ( *s == '"' ) {

                        if ( *s == '"' ) {

                                ptok->type = tSTRING ;

                                ptok->type = tSTRING ;

                                end = s++ ;

                                end = s++ ;

                                state = lsCopy ;

                                state = lsCopy ;

                        } else if ( *s == '\\' ) {

                        } else if ( *s == '\\' ) {

                                s += 1 ;

                                s += 1 ;

                                if ( *s != '\0' )

                                if ( *s != '\0' )

                                        s += 1 ;

                                        s += 1 ;

                        } else if ( isgraph( *s ) || *s == ' ' )

                        } else if ( isgraph( *s ) || *s == ' ' )

                                s++ ;

                                s++ ;

                        else

                        else

                                state = lsError ;

                                state = lsError ;

                        break ;

                        break ;

                case lsIdent :

                case lsIdent :

                        if ( isalnum( *s ) || *s == '_' )

                        if ( isalnum( *s ) || *s == '_' )

                                s++ ;

                                s++ ;

                        else {

                        else {

                                end = s ;

                                end = s ;

                                ptok->type = tIDENT ;

                                ptok->type = tIDENT ;

                                ptok->subtype = stNONE ;

                                ptok->subtype = stNONE ;

                                state = lsCopy ;

                                state = lsCopy ;

                        break ;

                        break ;

                case lsHexBin :

                case lsHexBin :

                        if ( *s == 'x' ) {

                        if ( *s == 'x' ) {

                                start = ++s ;

                                start = ++s ;

                                state = lsHex ;

                                state = lsHex ;

                        } else if ( *s == 'b' ) {

                        } else if ( *s == 'b' ) {

                                start = ++s ;

                                start = ++s ;

                                state = lsBin ;

                                state = lsBin ;

                        } else

                        } else

                                // missing the first '0' doesn't hurt here

                                // missing the first '0' doesn't hurt here

                                state = lsDec ;

                                state = lsDec ;

                        break ;

                        break ;

                case lsHex :

                case lsHex :

                        if ( isxdigit( *s ) )

                        if ( isxdigit( *s ) )

                                s++ ;

                                s++ ;

                        else {

                        else {

                                end = s ;

                                end = s ;

                                ptok->type = tHEX ;

                                ptok->type = tHEX ;

                                state = lsCopy ;

                                state = lsCopy ;

                        break ;

                        break ;

                case lsBin :

                case lsBin :

                        if ( *s == '0' || *s == '1' )

                        if ( *s == '0' || *s == '1' )

                                s++ ;

                                s++ ;

                        else {

                        else {

                                end = s ;

                                end = s ;

                                ptok->type = tBIN ;

                                ptok->type = tBIN ;

                                state = lsCopy ;

                                state = lsCopy ;

                        break ;

                        break ;

                case lsDec :

                case lsDec :

                        if ( isdigit( *s ) )

                        if ( isdigit( *s ) )

                                s++ ;

                                s++ ;

                        else {

                        else {

                                end = s ;

                                end = s ;

                                ptok->type = tDEC ;

                                ptok->type = tDEC ;

                                state = lsCopy ;

                                state = lsCopy ;

                        break ;

                        break ;

                case lsOperator :

                case lsOperator :

                        ptok->type = tOPERATOR ;

                        ptok->type = tOPERATOR ;

                        switch ( *start ) {

                        switch ( *start ) {

                        case '*' :

                        case '*' :

                                ptok->subtype = stMUL ;

                                ptok->subtype = stMUL ;

                                break ;

                                break ;

                        case '/' :

                        case '/' :

                                ptok->subtype = stDIV ;

                                ptok->subtype = stDIV ;

                                break ;

                                break ;

                        case '#' :

                        case '#' :

                                ptok->subtype = stMOD ;

                                ptok->subtype = stMOD ;

                                break ;

                                break ;

                        case '+' :

                        case '+' :

                                ptok->subtype = stADD ;

                                ptok->subtype = stADD ;

                                break ;

                                break ;

                        case '-' :

                        case '-' :

                                ptok->subtype = stSUB ;

                                ptok->subtype = stSUB ;

                                break ;

                                break ;

                        case '|' :

                        case '|' :

                                ptok->subtype = stIOR ;

                                ptok->subtype = stIOR ;

                                break ;

                                break ;

                        case '&' :

                        case '&' :

                                ptok->subtype = stAND ;

                                ptok->subtype = stAND ;

                                break ;

                                break ;

                        case '^' :

                        case '^' :

                                ptok->subtype = stXOR ;

                                ptok->subtype = stXOR ;

                                break ;

                                break ;

                        case '~' :

                        case '~' :

                                ptok->subtype = stTILDA ;

                                ptok->subtype = stTILDA ;

                                break ;

                                break ;

                        end = s ;

                        end = s ;

                        state = lsCopy ;

                        state = lsCopy ;

                        break ;

                        break ;

                case lsDoubleOp :

                case lsDoubleOp :

                        if ( *start == *s ) { // << or >>

                        if ( *start == *s ) { // << or >>

                                ptok->type = tOPERATOR ;

                                ptok->type = tOPERATOR ;

                                switch ( *start ) {

                                switch ( *start ) {

                                case '<' :

                                case '<' :

                                        ptok->subtype = stSHL ;

                                        ptok->subtype = stSHL ;

                                        break ;

                                        break ;

                                case '>' :

                                case '>' :

                                        ptok->subtype = stSHR ;

                                        ptok->subtype = stSHR ;

                                        break ;

                                        break ;

                                end = ++s ;

                                end = ++s ;

                                state = lsCopy ;

                                state = lsCopy ;

                        } else

                        } else

                                state = lsError ;

                                state = lsError ;

                        break ;

                        break ;

                case lsPunct :

                case lsPunct :

                        end = s ;

                        end = s ;

                        state = lsCopy ;

                        state = lsCopy ;

                        switch ( *start ) {

                        switch ( *start ) {

                        case ':' :

                        case ':' :

                                ptok->type = tCOLON ;

                                ptok->type = tCOLON ;

                                break ;

                                break ;

                        case '(' :

                        case '(' :

                                ptok->type = tLPAREN ;

                                ptok->type = tLPAREN ;

                                break ;

                                break ;

                        case ')' :

                        case ')' :

                                ptok->type = tRPAREN ;

                                ptok->type = tRPAREN ;

                                break ;

                                break ;

                        case ',' :

                        case ',' :

                                ptok->type = tCOMMA ;

                                ptok->type = tCOMMA ;

                                break ;

                                break ;

                        default :

                        default :

                                state = lsError ;

                                state = lsError ;

                        break ;

                        break ;

                case lsIndex :

                case lsIndex :

                        // any of .IX, .IX++, .--IX, .-IX+

                        // any of .IX, .IX++, .--IX, .-IX+

                        if ( isalnum( *s ) || *s == '-' || *s == '+' )

                        if ( isalnum( *s ) || *s == '-' || *s == '+' )

                                s++ ;

                                s++ ;

                        else {

                        else {

                                end = s ;

                                end = s ;

                                ptok->type = tIDENT ;

                                ptok->type = tIDENT ;

                                ptok->subtype = stDOT ;

                                ptok->subtype = stDOT ;

                                state = lsCopy ;

                                state = lsCopy ;

                        break ;

                        break ;

                        // final token collector

                        // final token collector

                case lsCopy :

                case lsCopy :

                        while ( start < end )

                        while ( start < end )

                                *pterm++ = *start++ ;

                                *pterm++ = *start++ ;

                        *pterm = '\0' ;

                        *pterm = '\0' ;

                        ptok->text = strdup( term ) ;

                        ptok->text = strdup( term ) ;

                        ptok++ ;

                        ptok++ ;

                        state = lsInit ;

                        state = lsInit ;

                        break ;

                        break ;

                        // any errors

                        // any errors

                case lsError :

                case lsError :

                        *pterm = '\0' ;

                        *pterm = '\0' ;

                        ptok->type = tERROR ;

                        ptok->type = tERROR ;

                        return false ;

                        return false ;

        return false ;

        return false ;

Browse

Tools

Subversion Repositories copyblaze

[/] [copyblaze/] [trunk/] [copyblaze/] [sw/] [tools/] [asm/] [pBlazASM/] [pBlazASM/] [pbLexer.c] - Diff between revs 2 and 46