URL
https://opencores.org/ocsvn/openrisc/openrisc/trunk
Subversion Repositories openrisc
[/] [openrisc/] [trunk/] [gnu-dev/] [or1k-gcc/] [libgo/] [go/] [go/] [scanner/] [scanner.go] - Rev 747
Compare with Previous | Blame | View Log
// Copyright 2009 The Go Authors. All rights reserved.// Use of this source code is governed by a BSD-style// license that can be found in the LICENSE file.// Package scanner implements a scanner for Go source text. Takes a []byte as// source which can then be tokenized through repeated calls to the Scan// function. Typical use://// var s scanner.Scanner// fset := token.NewFileSet() // position information is relative to fset// file := fset.AddFile(filename, fset.Base(), len(src)) // register file// s.Init(file, src, nil /* no error handler */, 0)// for {// pos, tok, lit := s.Scan()// if tok == token.EOF {// break// }// // do something here with pos, tok, and lit// }//package scannerimport ("bytes""fmt""go/token""path/filepath""strconv""unicode""unicode/utf8")// A Scanner holds the scanner's internal state while processing// a given text. It can be allocated as part of another data// structure but must be initialized via Init before use.//type Scanner struct {// immutable statefile *token.File // source file handledir string // directory portion of file.Name()src []byte // sourceerr ErrorHandler // error reporting; or nilmode Mode // scanning mode// scanning statech rune // current characteroffset int // character offsetrdOffset int // reading offset (position after current character)lineOffset int // current line offsetinsertSemi bool // insert a semicolon before next newline// public state - ok to modifyErrorCount int // number of errors encountered}// Read the next Unicode char into s.ch.// s.ch < 0 means end-of-file.//func (s *Scanner) next() {if s.rdOffset < len(s.src) {s.offset = s.rdOffsetif s.ch == '\n' {s.lineOffset = s.offsets.file.AddLine(s.offset)}r, w := rune(s.src[s.rdOffset]), 1switch {case r == 0:s.error(s.offset, "illegal character NUL")case r >= 0x80:// not ASCIIr, w = utf8.DecodeRune(s.src[s.rdOffset:])if r == utf8.RuneError && w == 1 {s.error(s.offset, "illegal UTF-8 encoding")}}s.rdOffset += ws.ch = r} else {s.offset = len(s.src)if s.ch == '\n' {s.lineOffset = s.offsets.file.AddLine(s.offset)}s.ch = -1 // eof}}// A mode value is set of flags (or 0).// They control scanner behavior.//type Mode uintconst (ScanComments Mode = 1 << iota // return comments as COMMENT tokensdontInsertSemis // do not automatically insert semicolons - for testing only)// Init prepares the scanner s to tokenize the text src by setting the// scanner at the beginning of src. The scanner uses the file set file// for position information and it adds line information for each line.// It is ok to re-use the same file when re-scanning the same file as// line information which is already present is ignored. Init causes a// panic if the file size does not match the src size.//// Calls to Scan will use the error handler err if they encounter a// syntax error and err is not nil. Also, for each error encountered,// the Scanner field ErrorCount is incremented by one. The mode parameter// determines how comments are handled.//// Note that Init may call err if there is an error in the first character// of the file.//func (s *Scanner) Init(file *token.File, src []byte, err ErrorHandler, mode Mode) {// Explicitly initialize all fields since a scanner may be reused.if file.Size() != len(src) {panic("file size does not match src len")}s.file = files.dir, _ = filepath.Split(file.Name())s.src = srcs.err = errs.mode = modes.ch = ' 's.offset = 0s.rdOffset = 0s.lineOffset = 0s.insertSemi = falses.ErrorCount = 0s.next()}func (s *Scanner) error(offs int, msg string) {if s.err != nil {s.err.Error(s.file.Position(s.file.Pos(offs)), msg)}s.ErrorCount++}var prefix = []byte("//line ")func (s *Scanner) interpretLineComment(text []byte) {if bytes.HasPrefix(text, prefix) {// get filename and line number, if anyif i := bytes.LastIndex(text, []byte{':'}); i > 0 {if line, err := strconv.Atoi(string(text[i+1:])); err == nil && line > 0 {// valid //line filename:line comment;filename := filepath.Clean(string(text[len(prefix):i]))if !filepath.IsAbs(filename) {// make filename relative to current directoryfilename = filepath.Join(s.dir, filename)}// update scanner positions.file.AddLineInfo(s.lineOffset+len(text)+1, filename, line) // +len(text)+1 since comment applies to next line}}}}func (s *Scanner) scanComment() string {// initial '/' already consumed; s.ch == '/' || s.ch == '*'offs := s.offset - 1 // position of initial '/'if s.ch == '/' {//-style comments.next()for s.ch != '\n' && s.ch >= 0 {s.next()}if offs == s.lineOffset {// comment starts at the beginning of the current lines.interpretLineComment(s.src[offs:s.offset])}goto exit}/*-style comment */s.next()for s.ch >= 0 {ch := s.chs.next()if ch == '*' && s.ch == '/' {s.next()goto exit}}s.error(offs, "comment not terminated")exit:return string(s.src[offs:s.offset])}func (s *Scanner) findLineEnd() bool {// initial '/' already consumeddefer func(offs int) {// reset scanner state to where it was upon calling findLineEnds.ch = '/'s.offset = offss.rdOffset = offs + 1s.next() // consume initial '/' again}(s.offset - 1)// read ahead until a newline, EOF, or non-comment token is foundfor s.ch == '/' || s.ch == '*' {if s.ch == '/' {//-style comment always contains a newlinereturn true}/*-style comment: look for newline */s.next()for s.ch >= 0 {ch := s.chif ch == '\n' {return true}s.next()if ch == '*' && s.ch == '/' {s.next()break}}s.skipWhitespace() // s.insertSemi is setif s.ch < 0 || s.ch == '\n' {return true}if s.ch != '/' {// non-comment tokenreturn false}s.next() // consume '/'}return false}func isLetter(ch rune) bool {return 'a' <= ch && ch <= 'z' || 'A' <= ch && ch <= 'Z' || ch == '_' || ch >= 0x80 && unicode.IsLetter(ch)}func isDigit(ch rune) bool {return '0' <= ch && ch <= '9' || ch >= 0x80 && unicode.IsDigit(ch)}func (s *Scanner) scanIdentifier() string {offs := s.offsetfor isLetter(s.ch) || isDigit(s.ch) {s.next()}return string(s.src[offs:s.offset])}func digitVal(ch rune) int {switch {case '0' <= ch && ch <= '9':return int(ch - '0')case 'a' <= ch && ch <= 'f':return int(ch - 'a' + 10)case 'A' <= ch && ch <= 'F':return int(ch - 'A' + 10)}return 16 // larger than any legal digit val}func (s *Scanner) scanMantissa(base int) {for digitVal(s.ch) < base {s.next()}}func (s *Scanner) scanNumber(seenDecimalPoint bool) (token.Token, string) {// digitVal(s.ch) < 10offs := s.offsettok := token.INTif seenDecimalPoint {offs--tok = token.FLOATs.scanMantissa(10)goto exponent}if s.ch == '0' {// int or floatoffs := s.offsets.next()if s.ch == 'x' || s.ch == 'X' {// hexadecimal ints.next()s.scanMantissa(16)if s.offset-offs <= 2 {// only scanned "0x" or "0X"s.error(offs, "illegal hexadecimal number")}} else {// octal int or floatseenDecimalDigit := falses.scanMantissa(8)if s.ch == '8' || s.ch == '9' {// illegal octal int or floatseenDecimalDigit = trues.scanMantissa(10)}if s.ch == '.' || s.ch == 'e' || s.ch == 'E' || s.ch == 'i' {goto fraction}// octal intif seenDecimalDigit {s.error(offs, "illegal octal number")}}goto exit}// decimal int or floats.scanMantissa(10)fraction:if s.ch == '.' {tok = token.FLOATs.next()s.scanMantissa(10)}exponent:if s.ch == 'e' || s.ch == 'E' {tok = token.FLOATs.next()if s.ch == '-' || s.ch == '+' {s.next()}s.scanMantissa(10)}if s.ch == 'i' {tok = token.IMAGs.next()}exit:return tok, string(s.src[offs:s.offset])}func (s *Scanner) scanEscape(quote rune) {offs := s.offsetvar i, base, max uint32switch s.ch {case 'a', 'b', 'f', 'n', 'r', 't', 'v', '\\', quote:s.next()returncase '0', '1', '2', '3', '4', '5', '6', '7':i, base, max = 3, 8, 255case 'x':s.next()i, base, max = 2, 16, 255case 'u':s.next()i, base, max = 4, 16, unicode.MaxRunecase 'U':s.next()i, base, max = 8, 16, unicode.MaxRunedefault:s.next() // always make progresss.error(offs, "unknown escape sequence")return}var x uint32for ; i > 0 && s.ch != quote && s.ch >= 0; i-- {d := uint32(digitVal(s.ch))if d >= base {s.error(s.offset, "illegal character in escape sequence")break}x = x*base + ds.next()}// in case of an error, consume remaining charsfor ; i > 0 && s.ch != quote && s.ch >= 0; i-- {s.next()}if x > max || 0xd800 <= x && x < 0xe000 {s.error(offs, "escape sequence is invalid Unicode code point")}}func (s *Scanner) scanChar() string {// '\'' opening already consumedoffs := s.offset - 1n := 0for s.ch != '\'' {ch := s.chn++s.next()if ch == '\n' || ch < 0 {s.error(offs, "character literal not terminated")n = 1break}if ch == '\\' {s.scanEscape('\'')}}s.next()if n != 1 {s.error(offs, "illegal character literal")}return string(s.src[offs:s.offset])}func (s *Scanner) scanString() string {// '"' opening already consumedoffs := s.offset - 1for s.ch != '"' {ch := s.chs.next()if ch == '\n' || ch < 0 {s.error(offs, "string not terminated")break}if ch == '\\' {s.scanEscape('"')}}s.next()return string(s.src[offs:s.offset])}func stripCR(b []byte) []byte {c := make([]byte, len(b))i := 0for _, ch := range b {if ch != '\r' {c[i] = chi++}}return c[:i]}func (s *Scanner) scanRawString() string {// '`' opening already consumedoffs := s.offset - 1hasCR := falsefor s.ch != '`' {ch := s.chs.next()if ch == '\r' {hasCR = true}if ch < 0 {s.error(offs, "string not terminated")break}}s.next()lit := s.src[offs:s.offset]if hasCR {lit = stripCR(lit)}return string(lit)}func (s *Scanner) skipWhitespace() {for s.ch == ' ' || s.ch == '\t' || s.ch == '\n' && !s.insertSemi || s.ch == '\r' {s.next()}}// Helper functions for scanning multi-byte tokens such as >> += >>= .// Different routines recognize different length tok_i based on matches// of ch_i. If a token ends in '=', the result is tok1 or tok3// respectively. Otherwise, the result is tok0 if there was no other// matching character, or tok2 if the matching character was ch2.func (s *Scanner) switch2(tok0, tok1 token.Token) token.Token {if s.ch == '=' {s.next()return tok1}return tok0}func (s *Scanner) switch3(tok0, tok1 token.Token, ch2 rune, tok2 token.Token) token.Token {if s.ch == '=' {s.next()return tok1}if s.ch == ch2 {s.next()return tok2}return tok0}func (s *Scanner) switch4(tok0, tok1 token.Token, ch2 rune, tok2, tok3 token.Token) token.Token {if s.ch == '=' {s.next()return tok1}if s.ch == ch2 {s.next()if s.ch == '=' {s.next()return tok3}return tok2}return tok0}// Scan scans the next token and returns the token position, the token,// and its literal string if applicable. The source end is indicated by// token.EOF.//// If the returned token is a literal (token.IDENT, token.INT, token.FLOAT,// token.IMAG, token.CHAR, token.STRING) or token.COMMENT, the literal string// has the corresponding value.//// If the returned token is token.SEMICOLON, the corresponding// literal string is ";" if the semicolon was present in the source,// and "\n" if the semicolon was inserted because of a newline or// at EOF.//// If the returned token is token.ILLEGAL, the literal string is the// offending character.//// In all other cases, Scan returns an empty literal string.//// For more tolerant parsing, Scan will return a valid token if// possible even if a syntax error was encountered. Thus, even// if the resulting token sequence contains no illegal tokens,// a client may not assume that no error occurred. Instead it// must check the scanner's ErrorCount or the number of calls// of the error handler, if there was one installed.//// Scan adds line information to the file added to the file// set with Init. Token positions are relative to that file// and thus relative to the file set.//func (s *Scanner) Scan() (pos token.Pos, tok token.Token, lit string) {scanAgain:s.skipWhitespace()// current token startpos = s.file.Pos(s.offset)// determine token valueinsertSemi := falseswitch ch := s.ch; {case isLetter(ch):lit = s.scanIdentifier()tok = token.Lookup(lit)switch tok {case token.IDENT, token.BREAK, token.CONTINUE, token.FALLTHROUGH, token.RETURN:insertSemi = true}case digitVal(ch) < 10:insertSemi = truetok, lit = s.scanNumber(false)default:s.next() // always make progressswitch ch {case -1:if s.insertSemi {s.insertSemi = false // EOF consumedreturn pos, token.SEMICOLON, "\n"}tok = token.EOFcase '\n':// we only reach here if s.insertSemi was// set in the first place and exited early// from s.skipWhitespace()s.insertSemi = false // newline consumedreturn pos, token.SEMICOLON, "\n"case '"':insertSemi = truetok = token.STRINGlit = s.scanString()case '\'':insertSemi = truetok = token.CHARlit = s.scanChar()case '`':insertSemi = truetok = token.STRINGlit = s.scanRawString()case ':':tok = s.switch2(token.COLON, token.DEFINE)case '.':if digitVal(s.ch) < 10 {insertSemi = truetok, lit = s.scanNumber(true)} else if s.ch == '.' {s.next()if s.ch == '.' {s.next()tok = token.ELLIPSIS}} else {tok = token.PERIOD}case ',':tok = token.COMMAcase ';':tok = token.SEMICOLONlit = ";"case '(':tok = token.LPARENcase ')':insertSemi = truetok = token.RPARENcase '[':tok = token.LBRACKcase ']':insertSemi = truetok = token.RBRACKcase '{':tok = token.LBRACEcase '}':insertSemi = truetok = token.RBRACEcase '+':tok = s.switch3(token.ADD, token.ADD_ASSIGN, '+', token.INC)if tok == token.INC {insertSemi = true}case '-':tok = s.switch3(token.SUB, token.SUB_ASSIGN, '-', token.DEC)if tok == token.DEC {insertSemi = true}case '*':tok = s.switch2(token.MUL, token.MUL_ASSIGN)case '/':if s.ch == '/' || s.ch == '*' {// commentif s.insertSemi && s.findLineEnd() {// reset position to the beginning of the comments.ch = '/'s.offset = s.file.Offset(pos)s.rdOffset = s.offset + 1s.insertSemi = false // newline consumedreturn pos, token.SEMICOLON, "\n"}lit = s.scanComment()if s.mode&ScanComments == 0 {// skip comments.insertSemi = false // newline consumedgoto scanAgain}tok = token.COMMENT} else {tok = s.switch2(token.QUO, token.QUO_ASSIGN)}case '%':tok = s.switch2(token.REM, token.REM_ASSIGN)case '^':tok = s.switch2(token.XOR, token.XOR_ASSIGN)case '<':if s.ch == '-' {s.next()tok = token.ARROW} else {tok = s.switch4(token.LSS, token.LEQ, '<', token.SHL, token.SHL_ASSIGN)}case '>':tok = s.switch4(token.GTR, token.GEQ, '>', token.SHR, token.SHR_ASSIGN)case '=':tok = s.switch2(token.ASSIGN, token.EQL)case '!':tok = s.switch2(token.NOT, token.NEQ)case '&':if s.ch == '^' {s.next()tok = s.switch2(token.AND_NOT, token.AND_NOT_ASSIGN)} else {tok = s.switch3(token.AND, token.AND_ASSIGN, '&', token.LAND)}case '|':tok = s.switch3(token.OR, token.OR_ASSIGN, '|', token.LOR)default:s.error(s.file.Offset(pos), fmt.Sprintf("illegal character %#U", ch))insertSemi = s.insertSemi // preserve insertSemi infotok = token.ILLEGALlit = string(ch)}}if s.mode&dontInsertSemis == 0 {s.insertSemi = insertSemi}return}
