URL https://opencores.org/ocsvn/openrisc/openrisc/trunk

Subversion Repositories openrisc

[/] [openrisc/] [trunk/] [gnu-dev/] [or1k-gcc/] [libgo/] [go/] [go/] [scanner/] [scanner.go] - Blame information for rev 774

Go to most recent revision | Details | Compare with Previous | View Log


// Copyright 2009 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
 
// Package scanner implements a scanner for Go source text. Takes a []byte as
// source which can then be tokenized through repeated calls to the Scan
// function. Typical use:
//
//      var s scanner.Scanner
//      fset := token.NewFileSet()  // position information is relative to fset
//      file := fset.AddFile(filename, fset.Base(), len(src))  // register file
//      s.Init(file, src, nil /* no error handler */, 0)
//      for {
//              pos, tok, lit := s.Scan()
//              if tok == token.EOF {
//                      break
//              }
//              // do something here with pos, tok, and lit
//      }
//
package scanner
 
import (
        "bytes"
        "fmt"
        "go/token"
        "path/filepath"
        "strconv"
        "unicode"
        "unicode/utf8"
)
 
// A Scanner holds the scanner's internal state while processing
// a given text.  It can be allocated as part of another data
// structure but must be initialized via Init before use.
//
type Scanner struct {
        // immutable state
        file *token.File  // source file handle
        dir  string       // directory portion of file.Name()
        src  []byte       // source
        err  ErrorHandler // error reporting; or nil
        mode Mode         // scanning mode
 
        // scanning state
        ch         rune // current character
        offset     int  // character offset
        rdOffset   int  // reading offset (position after current character)
        lineOffset int  // current line offset
        insertSemi bool // insert a semicolon before next newline
 
        // public state - ok to modify
        ErrorCount int // number of errors encountered
}
 
// Read the next Unicode char into s.ch.
// s.ch < 0 means end-of-file.
//
func (s *Scanner) next() {
        if s.rdOffset < len(s.src) {
                s.offset = s.rdOffset
                if s.ch == '\n' {
                        s.lineOffset = s.offset
                        s.file.AddLine(s.offset)
                }
                r, w := rune(s.src[s.rdOffset]), 1
                switch {
                case r == 0:
                        s.error(s.offset, "illegal character NUL")
                case r >= 0x80:
                        // not ASCII
                        r, w = utf8.DecodeRune(s.src[s.rdOffset:])
                        if r == utf8.RuneError && w == 1 {
                                s.error(s.offset, "illegal UTF-8 encoding")
                        }
                }
                s.rdOffset += w
                s.ch = r
        } else {
                s.offset = len(s.src)
                if s.ch == '\n' {
                        s.lineOffset = s.offset
                        s.file.AddLine(s.offset)
                }
                s.ch = -1 // eof
        }
}
 
// A mode value is set of flags (or 0).
// They control scanner behavior.
//
type Mode uint
 
const (
        ScanComments    Mode = 1 << iota // return comments as COMMENT tokens
        dontInsertSemis                  // do not automatically insert semicolons - for testing only
)
 
// Init prepares the scanner s to tokenize the text src by setting the
// scanner at the beginning of src. The scanner uses the file set file
// for position information and it adds line information for each line.
// It is ok to re-use the same file when re-scanning the same file as
// line information which is already present is ignored. Init causes a
// panic if the file size does not match the src size.
//
// Calls to Scan will use the error handler err if they encounter a
// syntax error and err is not nil. Also, for each error encountered,
// the Scanner field ErrorCount is incremented by one. The mode parameter
// determines how comments are handled.
//
// Note that Init may call err if there is an error in the first character
// of the file.
//
func (s *Scanner) Init(file *token.File, src []byte, err ErrorHandler, mode Mode) {
        // Explicitly initialize all fields since a scanner may be reused.
        if file.Size() != len(src) {
                panic("file size does not match src len")
        }
        s.file = file
        s.dir, _ = filepath.Split(file.Name())
        s.src = src
        s.err = err
        s.mode = mode
 
        s.ch = ' '
        s.offset = 0
        s.rdOffset = 0
        s.lineOffset = 0
        s.insertSemi = false
        s.ErrorCount = 0
 
        s.next()
}
 
func (s *Scanner) error(offs int, msg string) {
        if s.err != nil {
                s.err.Error(s.file.Position(s.file.Pos(offs)), msg)
        }
        s.ErrorCount++
}
 
var prefix = []byte("//line ")
 
func (s *Scanner) interpretLineComment(text []byte) {
        if bytes.HasPrefix(text, prefix) {
                // get filename and line number, if any
                if i := bytes.LastIndex(text, []byte{':'}); i > 0 {
                        if line, err := strconv.Atoi(string(text[i+1:])); err == nil && line > 0 {
                                // valid //line filename:line comment;
                                filename := filepath.Clean(string(text[len(prefix):i]))
                                if !filepath.IsAbs(filename) {
                                        // make filename relative to current directory
                                        filename = filepath.Join(s.dir, filename)
                                }
                                // update scanner position
                                s.file.AddLineInfo(s.lineOffset+len(text)+1, filename, line) // +len(text)+1 since comment applies to next line
                        }
                }
        }
}
 
func (s *Scanner) scanComment() string {
        // initial '/' already consumed; s.ch == '/' || s.ch == '*'
        offs := s.offset - 1 // position of initial '/'
 
        if s.ch == '/' {
                //-style comment
                s.next()
                for s.ch != '\n' && s.ch >= 0 {
                        s.next()
                }
                if offs == s.lineOffset {
                        // comment starts at the beginning of the current line
                        s.interpretLineComment(s.src[offs:s.offset])
                }
                goto exit
        }
 
        /*-style comment */
        s.next()
        for s.ch >= 0 {
                ch := s.ch
                s.next()
                if ch == '*' && s.ch == '/' {
                        s.next()
                        goto exit
                }
        }
 
        s.error(offs, "comment not terminated")
 
exit:
        return string(s.src[offs:s.offset])
}
 
func (s *Scanner) findLineEnd() bool {
        // initial '/' already consumed
 
        defer func(offs int) {
                // reset scanner state to where it was upon calling findLineEnd
                s.ch = '/'
                s.offset = offs
                s.rdOffset = offs + 1
                s.next() // consume initial '/' again
        }(s.offset - 1)
 
        // read ahead until a newline, EOF, or non-comment token is found
        for s.ch == '/' || s.ch == '*' {
                if s.ch == '/' {
                        //-style comment always contains a newline
                        return true
                }
                /*-style comment: look for newline */
                s.next()
                for s.ch >= 0 {
                        ch := s.ch
                        if ch == '\n' {
                                return true
                        }
                        s.next()
                        if ch == '*' && s.ch == '/' {
                                s.next()
                                break
                        }
                }
                s.skipWhitespace() // s.insertSemi is set
                if s.ch < 0 || s.ch == '\n' {
                        return true
                }
                if s.ch != '/' {
                        // non-comment token
                        return false
                }
                s.next() // consume '/'
        }
 
        return false
}
 
func isLetter(ch rune) bool {
        return 'a' <= ch && ch <= 'z' || 'A' <= ch && ch <= 'Z' || ch == '_' || ch >= 0x80 && unicode.IsLetter(ch)
}
 
func isDigit(ch rune) bool {
        return '0' <= ch && ch <= '9' || ch >= 0x80 && unicode.IsDigit(ch)
}
 
func (s *Scanner) scanIdentifier() string {
        offs := s.offset
        for isLetter(s.ch) || isDigit(s.ch) {
                s.next()
        }
        return string(s.src[offs:s.offset])
}
 
func digitVal(ch rune) int {
        switch {
        case '0' <= ch && ch <= '9':
                return int(ch - '0')
        case 'a' <= ch && ch <= 'f':
                return int(ch - 'a' + 10)
        case 'A' <= ch && ch <= 'F':
                return int(ch - 'A' + 10)
        }
        return 16 // larger than any legal digit val
}
 
func (s *Scanner) scanMantissa(base int) {
        for digitVal(s.ch) < base {
                s.next()
        }
}
 
func (s *Scanner) scanNumber(seenDecimalPoint bool) (token.Token, string) {
        // digitVal(s.ch) < 10
        offs := s.offset
        tok := token.INT
 
        if seenDecimalPoint {
                offs--
                tok = token.FLOAT
                s.scanMantissa(10)
                goto exponent
        }
 
        if s.ch == '0' {
                // int or float
                offs := s.offset
                s.next()
                if s.ch == 'x' || s.ch == 'X' {
                        // hexadecimal int
                        s.next()
                        s.scanMantissa(16)
                        if s.offset-offs <= 2 {
                                // only scanned "0x" or "0X"
                                s.error(offs, "illegal hexadecimal number")
                        }
                } else {
                        // octal int or float
                        seenDecimalDigit := false
                        s.scanMantissa(8)
                        if s.ch == '8' || s.ch == '9' {
                                // illegal octal int or float
                                seenDecimalDigit = true
                                s.scanMantissa(10)
                        }
                        if s.ch == '.' || s.ch == 'e' || s.ch == 'E' || s.ch == 'i' {
                                goto fraction
                        }
                        // octal int
                        if seenDecimalDigit {
                                s.error(offs, "illegal octal number")
                        }
                }
                goto exit
        }
 
        // decimal int or float
        s.scanMantissa(10)
 
fraction:
        if s.ch == '.' {
                tok = token.FLOAT
                s.next()
                s.scanMantissa(10)
        }
 
exponent:
        if s.ch == 'e' || s.ch == 'E' {
                tok = token.FLOAT
                s.next()
                if s.ch == '-' || s.ch == '+' {
                        s.next()
                }
                s.scanMantissa(10)
        }
 
        if s.ch == 'i' {
                tok = token.IMAG
                s.next()
        }
 
exit:
        return tok, string(s.src[offs:s.offset])
}
 
func (s *Scanner) scanEscape(quote rune) {
        offs := s.offset
 
        var i, base, max uint32
        switch s.ch {
        case 'a', 'b', 'f', 'n', 'r', 't', 'v', '\\', quote:
                s.next()
                return
        case '0', '1', '2', '3', '4', '5', '6', '7':
                i, base, max = 3, 8, 255
        case 'x':
                s.next()
                i, base, max = 2, 16, 255
        case 'u':
                s.next()
                i, base, max = 4, 16, unicode.MaxRune
        case 'U':
                s.next()
                i, base, max = 8, 16, unicode.MaxRune
        default:
                s.next() // always make progress
                s.error(offs, "unknown escape sequence")
                return
        }
 
        var x uint32
        for ; i > 0 && s.ch != quote && s.ch >= 0; i-- {
                d := uint32(digitVal(s.ch))
                if d >= base {
                        s.error(s.offset, "illegal character in escape sequence")
                        break
                }
                x = x*base + d
                s.next()
        }
        // in case of an error, consume remaining chars
        for ; i > 0 && s.ch != quote && s.ch >= 0; i-- {
                s.next()
        }
        if x > max || 0xd800 <= x && x < 0xe000 {
                s.error(offs, "escape sequence is invalid Unicode code point")
        }
}
 
func (s *Scanner) scanChar() string {
        // '\'' opening already consumed
        offs := s.offset - 1
 
        n := 0
        for s.ch != '\'' {
                ch := s.ch
                n++
                s.next()
                if ch == '\n' || ch < 0 {
                        s.error(offs, "character literal not terminated")
                        n = 1
                        break
                }
                if ch == '\\' {
                        s.scanEscape('\'')
                }
        }
 
        s.next()
 
        if n != 1 {
                s.error(offs, "illegal character literal")
        }
 
        return string(s.src[offs:s.offset])
}
 
func (s *Scanner) scanString() string {
        // '"' opening already consumed
        offs := s.offset - 1
 
        for s.ch != '"' {
                ch := s.ch
                s.next()
                if ch == '\n' || ch < 0 {
                        s.error(offs, "string not terminated")
                        break
                }
                if ch == '\\' {
                        s.scanEscape('"')
                }
        }
 
        s.next()
 
        return string(s.src[offs:s.offset])
}
 
func stripCR(b []byte) []byte {
        c := make([]byte, len(b))
        i := 0
        for _, ch := range b {
                if ch != '\r' {
                        c[i] = ch
                        i++
                }
        }
        return c[:i]
}
 
func (s *Scanner) scanRawString() string {
        // '`' opening already consumed
        offs := s.offset - 1
 
        hasCR := false
        for s.ch != '`' {
                ch := s.ch
                s.next()
                if ch == '\r' {
                        hasCR = true
                }
                if ch < 0 {
                        s.error(offs, "string not terminated")
                        break
                }
        }
 
        s.next()
 
        lit := s.src[offs:s.offset]
        if hasCR {
                lit = stripCR(lit)
        }
 
        return string(lit)
}
 
func (s *Scanner) skipWhitespace() {
        for s.ch == ' ' || s.ch == '\t' || s.ch == '\n' && !s.insertSemi || s.ch == '\r' {
                s.next()
        }
}
 
// Helper functions for scanning multi-byte tokens such as >> += >>= .
// Different routines recognize different length tok_i based on matches
// of ch_i. If a token ends in '=', the result is tok1 or tok3
// respectively. Otherwise, the result is tok0 if there was no other
// matching character, or tok2 if the matching character was ch2.
 
func (s *Scanner) switch2(tok0, tok1 token.Token) token.Token {
        if s.ch == '=' {
                s.next()
                return tok1
        }
        return tok0
}
 
func (s *Scanner) switch3(tok0, tok1 token.Token, ch2 rune, tok2 token.Token) token.Token {
        if s.ch == '=' {
                s.next()
                return tok1
        }
        if s.ch == ch2 {
                s.next()
                return tok2
        }
        return tok0
}
 
func (s *Scanner) switch4(tok0, tok1 token.Token, ch2 rune, tok2, tok3 token.Token) token.Token {
        if s.ch == '=' {
                s.next()
                return tok1
        }
        if s.ch == ch2 {
                s.next()
                if s.ch == '=' {
                        s.next()
                        return tok3
                }
                return tok2
        }
        return tok0
}
 
// Scan scans the next token and returns the token position, the token,
// and its literal string if applicable. The source end is indicated by
// token.EOF.
//
// If the returned token is a literal (token.IDENT, token.INT, token.FLOAT,
// token.IMAG, token.CHAR, token.STRING) or token.COMMENT, the literal string
// has the corresponding value.
//
// If the returned token is token.SEMICOLON, the corresponding
// literal string is ";" if the semicolon was present in the source,
// and "\n" if the semicolon was inserted because of a newline or
// at EOF.
//
// If the returned token is token.ILLEGAL, the literal string is the
// offending character.
//
// In all other cases, Scan returns an empty literal string.
//
// For more tolerant parsing, Scan will return a valid token if
// possible even if a syntax error was encountered. Thus, even
// if the resulting token sequence contains no illegal tokens,
// a client may not assume that no error occurred. Instead it
// must check the scanner's ErrorCount or the number of calls
// of the error handler, if there was one installed.
//
// Scan adds line information to the file added to the file
// set with Init. Token positions are relative to that file
// and thus relative to the file set.
//
func (s *Scanner) Scan() (pos token.Pos, tok token.Token, lit string) {
scanAgain:
        s.skipWhitespace()
 
        // current token start
        pos = s.file.Pos(s.offset)
 
        // determine token value
        insertSemi := false
        switch ch := s.ch; {
        case isLetter(ch):
                lit = s.scanIdentifier()
                tok = token.Lookup(lit)
                switch tok {
                case token.IDENT, token.BREAK, token.CONTINUE, token.FALLTHROUGH, token.RETURN:
                        insertSemi = true
                }
        case digitVal(ch) < 10:
                insertSemi = true
                tok, lit = s.scanNumber(false)
        default:
                s.next() // always make progress
                switch ch {
                case -1:
                        if s.insertSemi {
                                s.insertSemi = false // EOF consumed
                                return pos, token.SEMICOLON, "\n"
                        }
                        tok = token.EOF
                case '\n':
                        // we only reach here if s.insertSemi was
                        // set in the first place and exited early
                        // from s.skipWhitespace()
                        s.insertSemi = false // newline consumed
                        return pos, token.SEMICOLON, "\n"
                case '"':
                        insertSemi = true
                        tok = token.STRING
                        lit = s.scanString()
                case '\'':
                        insertSemi = true
                        tok = token.CHAR
                        lit = s.scanChar()
                case '`':
                        insertSemi = true
                        tok = token.STRING
                        lit = s.scanRawString()
                case ':':
                        tok = s.switch2(token.COLON, token.DEFINE)
                case '.':
                        if digitVal(s.ch) < 10 {
                                insertSemi = true
                                tok, lit = s.scanNumber(true)
                        } else if s.ch == '.' {
                                s.next()
                                if s.ch == '.' {
                                        s.next()
                                        tok = token.ELLIPSIS
                                }
                        } else {
                                tok = token.PERIOD
                        }
                case ',':
                        tok = token.COMMA
                case ';':
                        tok = token.SEMICOLON
                        lit = ";"
                case '(':
                        tok = token.LPAREN
                case ')':
                        insertSemi = true
                        tok = token.RPAREN
                case '[':
                        tok = token.LBRACK
                case ']':
                        insertSemi = true
                        tok = token.RBRACK
                case '{':
                        tok = token.LBRACE
                case '}':
                        insertSemi = true
                        tok = token.RBRACE
                case '+':
                        tok = s.switch3(token.ADD, token.ADD_ASSIGN, '+', token.INC)
                        if tok == token.INC {
                                insertSemi = true
                        }
                case '-':
                        tok = s.switch3(token.SUB, token.SUB_ASSIGN, '-', token.DEC)
                        if tok == token.DEC {
                                insertSemi = true
                        }
                case '*':
                        tok = s.switch2(token.MUL, token.MUL_ASSIGN)
                case '/':
                        if s.ch == '/' || s.ch == '*' {
                                // comment
                                if s.insertSemi && s.findLineEnd() {
                                        // reset position to the beginning of the comment
                                        s.ch = '/'
                                        s.offset = s.file.Offset(pos)
                                        s.rdOffset = s.offset + 1
                                        s.insertSemi = false // newline consumed
                                        return pos, token.SEMICOLON, "\n"
                                }
                                lit = s.scanComment()
                                if s.mode&ScanComments == 0 {
                                        // skip comment
                                        s.insertSemi = false // newline consumed
                                        goto scanAgain
                                }
                                tok = token.COMMENT
                        } else {
                                tok = s.switch2(token.QUO, token.QUO_ASSIGN)
                        }
                case '%':
                        tok = s.switch2(token.REM, token.REM_ASSIGN)
                case '^':
                        tok = s.switch2(token.XOR, token.XOR_ASSIGN)
                case '<':
                        if s.ch == '-' {
                                s.next()
                                tok = token.ARROW
                        } else {
                                tok = s.switch4(token.LSS, token.LEQ, '<', token.SHL, token.SHL_ASSIGN)
                        }
                case '>':
                        tok = s.switch4(token.GTR, token.GEQ, '>', token.SHR, token.SHR_ASSIGN)
                case '=':
                        tok = s.switch2(token.ASSIGN, token.EQL)
                case '!':
                        tok = s.switch2(token.NOT, token.NEQ)
                case '&':
                        if s.ch == '^' {
                                s.next()
                                tok = s.switch2(token.AND_NOT, token.AND_NOT_ASSIGN)
                        } else {
                                tok = s.switch3(token.AND, token.AND_ASSIGN, '&', token.LAND)
                        }
                case '|':
                        tok = s.switch3(token.OR, token.OR_ASSIGN, '|', token.LOR)
                default:
                        s.error(s.file.Offset(pos), fmt.Sprintf("illegal character %#U", ch))
                        insertSemi = s.insertSemi // preserve insertSemi info
                        tok = token.ILLEGAL
                        lit = string(ch)
                }
        }
        if s.mode&dontInsertSemis == 0 {
                s.insertSemi = insertSemi
        }
 
        return
}

Browse

Tools

Subversion Repositories openrisc

[/] [openrisc/] [trunk/] [gnu-dev/] [or1k-gcc/] [libgo/] [go/] [go/] [scanner/] [scanner.go] - Blame information for rev 774

Line No.	Rev	Author	Line
1	747	jeremybenn	`// Copyright 2009 The Go Authors. All rights reserved.`
2			`// Use of this source code is governed by a BSD-style`
3			`// license that can be found in the LICENSE file.`
4
5			`// Package scanner implements a scanner for Go source text. Takes a []byte as`
6			`// source which can then be tokenized through repeated calls to the Scan`
7			`// function. Typical use:`
8			`//`
9			`// var s scanner.Scanner`
10			`// fset := token.NewFileSet() // position information is relative to fset`
11			`// file := fset.AddFile(filename, fset.Base(), len(src)) // register file`
12			`// s.Init(file, src, nil /* no error handler */, 0)`
13			`// for {`
14			`// pos, tok, lit := s.Scan()`
15			`// if tok == token.EOF {`
16			`// break`
17			`// }`
18			`// // do something here with pos, tok, and lit`
19			`// }`
20			`//`
21			`package scanner`
22
23			`import (`
24			`"bytes"`
25			`"fmt"`
26			`"go/token"`
27			`"path/filepath"`
28			`"strconv"`
29			`"unicode"`
30			`"unicode/utf8"`
31			`)`
32
33			`// A Scanner holds the scanner's internal state while processing`
34			`// a given text. It can be allocated as part of another data`
35			`// structure but must be initialized via Init before use.`
36			`//`
37			`type Scanner struct {`
38			`// immutable state`
39			`file *token.File // source file handle`
40			`dir string // directory portion of file.Name()`
41			`src []byte // source`
42			`err ErrorHandler // error reporting; or nil`
43			`mode Mode // scanning mode`
44
45			`// scanning state`
46			`ch rune // current character`
47			`offset int // character offset`
48			`rdOffset int // reading offset (position after current character)`
49			`lineOffset int // current line offset`
50			`insertSemi bool // insert a semicolon before next newline`
51
52			`// public state - ok to modify`
53			`ErrorCount int // number of errors encountered`
54			`}`
55
56			`// Read the next Unicode char into s.ch.`
57			`// s.ch < 0 means end-of-file.`
58			`//`
59			`func (s *Scanner) next() {`
60			`if s.rdOffset < len(s.src) {`
61			`s.offset = s.rdOffset`
62			`if s.ch == '\n' {`
63			`s.lineOffset = s.offset`
64			`s.file.AddLine(s.offset)`
65			`}`
66			`r, w := rune(s.src[s.rdOffset]), 1`
67			`switch {`
68			`case r == 0:`
69			`s.error(s.offset, "illegal character NUL")`
70			`case r >= 0x80:`
71			`// not ASCII`
72			`r, w = utf8.DecodeRune(s.src[s.rdOffset:])`
73			`if r == utf8.RuneError && w == 1 {`
74			`s.error(s.offset, "illegal UTF-8 encoding")`
75			`}`
76			`}`
77			`s.rdOffset += w`
78			`s.ch = r`
79			`} else {`
80			`s.offset = len(s.src)`
81			`if s.ch == '\n' {`
82			`s.lineOffset = s.offset`
83			`s.file.AddLine(s.offset)`
84			`}`
85			`s.ch = -1 // eof`
86			`}`
87			`}`
88
89			`// A mode value is set of flags (or 0).`
90			`// They control scanner behavior.`
91			`//`
92			`type Mode uint`
93
94			`const (`
95			`ScanComments Mode = 1 << iota // return comments as COMMENT tokens`
96			`dontInsertSemis // do not automatically insert semicolons - for testing only`
97			`)`
98
99			`// Init prepares the scanner s to tokenize the text src by setting the`
100			`// scanner at the beginning of src. The scanner uses the file set file`
101			`// for position information and it adds line information for each line.`
102			`// It is ok to re-use the same file when re-scanning the same file as`
103			`// line information which is already present is ignored. Init causes a`
104			`// panic if the file size does not match the src size.`
105			`//`
106			`// Calls to Scan will use the error handler err if they encounter a`
107			`// syntax error and err is not nil. Also, for each error encountered,`
108			`// the Scanner field ErrorCount is incremented by one. The mode parameter`
109			`// determines how comments are handled.`
110			`//`
111			`// Note that Init may call err if there is an error in the first character`
112			`// of the file.`
113			`//`
114			`func (s Scanner) Init(file token.File, src []byte, err ErrorHandler, mode Mode) {`
115			`// Explicitly initialize all fields since a scanner may be reused.`
116			`if file.Size() != len(src) {`
117			`panic("file size does not match src len")`
118			`}`
119			`s.file = file`
120			`s.dir, _ = filepath.Split(file.Name())`
121			`s.src = src`
122			`s.err = err`
123			`s.mode = mode`
124
125			`s.ch = ' '`
126			`s.offset = 0`
127			`s.rdOffset = 0`
128			`s.lineOffset = 0`
129			`s.insertSemi = false`
130			`s.ErrorCount = 0`
131
132			`s.next()`
133			`}`
134
135			`func (s *Scanner) error(offs int, msg string) {`
136			`if s.err != nil {`
137			`s.err.Error(s.file.Position(s.file.Pos(offs)), msg)`
138			`}`
139			`s.ErrorCount++`
140			`}`
141
142			`var prefix = []byte("//line ")`
143
144			`func (s *Scanner) interpretLineComment(text []byte) {`
145			`if bytes.HasPrefix(text, prefix) {`
146			`// get filename and line number, if any`
147			`if i := bytes.LastIndex(text, []byte{':'}); i > 0 {`
148			`if line, err := strconv.Atoi(string(text[i+1:])); err == nil && line > 0 {`
149			`// valid //line filename:line comment;`
150			`filename := filepath.Clean(string(text[len(prefix):i]))`
151			`if !filepath.IsAbs(filename) {`
152			`// make filename relative to current directory`
153			`filename = filepath.Join(s.dir, filename)`
154			`}`
155			`// update scanner position`
156			`s.file.AddLineInfo(s.lineOffset+len(text)+1, filename, line) // +len(text)+1 since comment applies to next line`
157			`}`
158			`}`
159			`}`
160			`}`
161
162			`func (s *Scanner) scanComment() string {`
163			`// initial '/' already consumed; s.ch == '/' \|\| s.ch == '*'`
164			`offs := s.offset - 1 // position of initial '/'`
165
166			`if s.ch == '/' {`
167			`//-style comment`
168			`s.next()`
169			`for s.ch != '\n' && s.ch >= 0 {`
170			`s.next()`
171			`}`
172			`if offs == s.lineOffset {`
173			`// comment starts at the beginning of the current line`
174			`s.interpretLineComment(s.src[offs:s.offset])`
175			`}`
176			`goto exit`
177			`}`
178
179			`/-style comment /`
180			`s.next()`
181			`for s.ch >= 0 {`
182			`ch := s.ch`
183			`s.next()`
184			`if ch == '*' && s.ch == '/' {`
185			`s.next()`
186			`goto exit`
187			`}`
188			`}`
189
190			`s.error(offs, "comment not terminated")`
191
192			`exit:`
193			`return string(s.src[offs:s.offset])`
194			`}`
195
196			`func (s *Scanner) findLineEnd() bool {`
197			`// initial '/' already consumed`
198
199			`defer func(offs int) {`
200			`// reset scanner state to where it was upon calling findLineEnd`
201			`s.ch = '/'`
202			`s.offset = offs`
203			`s.rdOffset = offs + 1`
204			`s.next() // consume initial '/' again`
205			`}(s.offset - 1)`
206
207			`// read ahead until a newline, EOF, or non-comment token is found`
208			`for s.ch == '/' \|\| s.ch == '*' {`
209			`if s.ch == '/' {`
210			`//-style comment always contains a newline`
211			`return true`
212			`}`
213			`/-style comment: look for newline /`
214			`s.next()`
215			`for s.ch >= 0 {`
216			`ch := s.ch`
217			`if ch == '\n' {`
218			`return true`
219			`}`
220			`s.next()`
221			`if ch == '*' && s.ch == '/' {`
222			`s.next()`
223			`break`
224			`}`
225			`}`
226			`s.skipWhitespace() // s.insertSemi is set`
227			`if s.ch < 0 \|\| s.ch == '\n' {`
228			`return true`
229			`}`
230			`if s.ch != '/' {`
231			`// non-comment token`
232			`return false`
233			`}`
234			`s.next() // consume '/'`
235			`}`
236
237			`return false`
238			`}`
239
240			`func isLetter(ch rune) bool {`
241			`return 'a' <= ch && ch <= 'z' \|\| 'A' <= ch && ch <= 'Z' \|\| ch == '_' \|\| ch >= 0x80 && unicode.IsLetter(ch)`
242			`}`
243
244			`func isDigit(ch rune) bool {`
245			`return '0' <= ch && ch <= '9' \|\| ch >= 0x80 && unicode.IsDigit(ch)`
246			`}`
247
248			`func (s *Scanner) scanIdentifier() string {`
249			`offs := s.offset`
250			`for isLetter(s.ch) \|\| isDigit(s.ch) {`
251			`s.next()`
252			`}`
253			`return string(s.src[offs:s.offset])`
254			`}`
255
256			`func digitVal(ch rune) int {`
257			`switch {`
258			`case '0' <= ch && ch <= '9':`
259			`return int(ch - '0')`
260			`case 'a' <= ch && ch <= 'f':`
261			`return int(ch - 'a' + 10)`
262			`case 'A' <= ch && ch <= 'F':`
263			`return int(ch - 'A' + 10)`
264			`}`
265			`return 16 // larger than any legal digit val`
266			`}`
267
268			`func (s *Scanner) scanMantissa(base int) {`
269			`for digitVal(s.ch) < base {`
270			`s.next()`
271			`}`
272			`}`
273
274			`func (s *Scanner) scanNumber(seenDecimalPoint bool) (token.Token, string) {`
275			`// digitVal(s.ch) < 10`
276			`offs := s.offset`
277			`tok := token.INT`
278
279			`if seenDecimalPoint {`
280			`offs--`
281			`tok = token.FLOAT`
282			`s.scanMantissa(10)`
283			`goto exponent`
284			`}`
285
286			`if s.ch == '0' {`
287			`// int or float`
288			`offs := s.offset`
289			`s.next()`
290			`if s.ch == 'x' \|\| s.ch == 'X' {`
291			`// hexadecimal int`
292			`s.next()`
293			`s.scanMantissa(16)`
294			`if s.offset-offs <= 2 {`
295			`// only scanned "0x" or "0X"`
296			`s.error(offs, "illegal hexadecimal number")`
297			`}`
298			`} else {`
299			`// octal int or float`
300			`seenDecimalDigit := false`
301			`s.scanMantissa(8)`
302			`if s.ch == '8' \|\| s.ch == '9' {`
303			`// illegal octal int or float`
304			`seenDecimalDigit = true`
305			`s.scanMantissa(10)`
306			`}`
307			`if s.ch == '.' \|\| s.ch == 'e' \|\| s.ch == 'E' \|\| s.ch == 'i' {`
308			`goto fraction`
309			`}`
310			`// octal int`
311			`if seenDecimalDigit {`
312			`s.error(offs, "illegal octal number")`
313			`}`
314			`}`
315			`goto exit`
316			`}`
317
318			`// decimal int or float`
319			`s.scanMantissa(10)`
320
321			`fraction:`
322			`if s.ch == '.' {`
323			`tok = token.FLOAT`
324			`s.next()`
325			`s.scanMantissa(10)`
326			`}`
327
328			`exponent:`
329			`if s.ch == 'e' \|\| s.ch == 'E' {`
330			`tok = token.FLOAT`
331			`s.next()`
332			`if s.ch == '-' \|\| s.ch == '+' {`
333			`s.next()`
334			`}`
335			`s.scanMantissa(10)`
336			`}`
337
338			`if s.ch == 'i' {`
339			`tok = token.IMAG`
340			`s.next()`
341			`}`
342
343			`exit:`
344			`return tok, string(s.src[offs:s.offset])`
345			`}`
346
347			`func (s *Scanner) scanEscape(quote rune) {`
348			`offs := s.offset`
349
350			`var i, base, max uint32`
351			`switch s.ch {`
352			`case 'a', 'b', 'f', 'n', 'r', 't', 'v', '\\', quote:`
353			`s.next()`
354			`return`
355			`case '0', '1', '2', '3', '4', '5', '6', '7':`
356			`i, base, max = 3, 8, 255`
357			`case 'x':`
358			`s.next()`
359			`i, base, max = 2, 16, 255`
360			`case 'u':`
361			`s.next()`
362			`i, base, max = 4, 16, unicode.MaxRune`
363			`case 'U':`
364			`s.next()`
365			`i, base, max = 8, 16, unicode.MaxRune`
366			`default:`
367			`s.next() // always make progress`
368			`s.error(offs, "unknown escape sequence")`
369			`return`
370			`}`
371
372			`var x uint32`
373			`for ; i > 0 && s.ch != quote && s.ch >= 0; i-- {`
374			`d := uint32(digitVal(s.ch))`
375			`if d >= base {`
376			`s.error(s.offset, "illegal character in escape sequence")`
377			`break`
378			`}`
379			`x = x*base + d`
380			`s.next()`
381			`}`
382			`// in case of an error, consume remaining chars`
383			`for ; i > 0 && s.ch != quote && s.ch >= 0; i-- {`
384			`s.next()`
385			`}`
386			`if x > max \|\| 0xd800 <= x && x < 0xe000 {`
387			`s.error(offs, "escape sequence is invalid Unicode code point")`
388			`}`
389			`}`
390
391			`func (s *Scanner) scanChar() string {`
392			`// '\'' opening already consumed`
393			`offs := s.offset - 1`
394
395			`n := 0`
396			`for s.ch != '\'' {`
397			`ch := s.ch`
398			`n++`
399			`s.next()`
400			`if ch == '\n' \|\| ch < 0 {`
401			`s.error(offs, "character literal not terminated")`
402			`n = 1`
403			`break`
404			`}`
405			`if ch == '\\' {`
406			`s.scanEscape('\'')`
407			`}`
408			`}`
409
410			`s.next()`
411
412			`if n != 1 {`
413			`s.error(offs, "illegal character literal")`
414			`}`
415
416			`return string(s.src[offs:s.offset])`
417			`}`
418
419			`func (s *Scanner) scanString() string {`
420			`// '"' opening already consumed`
421			`offs := s.offset - 1`
422
423			`for s.ch != '"' {`
424			`ch := s.ch`
425			`s.next()`
426			`if ch == '\n' \|\| ch < 0 {`
427			`s.error(offs, "string not terminated")`
428			`break`
429			`}`
430			`if ch == '\\' {`
431			`s.scanEscape('"')`
432			`}`
433			`}`
434
435			`s.next()`
436
437			`return string(s.src[offs:s.offset])`
438			`}`
439
440			`func stripCR(b []byte) []byte {`
441			`c := make([]byte, len(b))`
442			`i := 0`
443			`for _, ch := range b {`
444			`if ch != '\r' {`
445			`c[i] = ch`
446			`i++`
447			`}`
448			`}`
449			`return c[:i]`
450			`}`
451
452			`func (s *Scanner) scanRawString() string {`
453			// '`' opening already consumed
454			`offs := s.offset - 1`
455
456			`hasCR := false`
457			for s.ch != '`' {
458			`ch := s.ch`
459			`s.next()`
460			`if ch == '\r' {`
461			`hasCR = true`
462			`}`
463			`if ch < 0 {`
464			`s.error(offs, "string not terminated")`
465			`break`
466			`}`
467			`}`
468
469			`s.next()`
470
471			`lit := s.src[offs:s.offset]`
472			`if hasCR {`
473			`lit = stripCR(lit)`
474			`}`
475
476			`return string(lit)`
477			`}`
478
479			`func (s *Scanner) skipWhitespace() {`
480			`for s.ch == ' ' \|\| s.ch == '\t' \|\| s.ch == '\n' && !s.insertSemi \|\| s.ch == '\r' {`
481			`s.next()`
482			`}`
483			`}`
484
485			`// Helper functions for scanning multi-byte tokens such as >> += >>= .`
486			`// Different routines recognize different length tok_i based on matches`
487			`// of ch_i. If a token ends in '=', the result is tok1 or tok3`
488			`// respectively. Otherwise, the result is tok0 if there was no other`
489			`// matching character, or tok2 if the matching character was ch2.`
490
491			`func (s *Scanner) switch2(tok0, tok1 token.Token) token.Token {`
492			`if s.ch == '=' {`
493			`s.next()`
494			`return tok1`
495			`}`
496			`return tok0`
497			`}`
498
499			`func (s *Scanner) switch3(tok0, tok1 token.Token, ch2 rune, tok2 token.Token) token.Token {`
500			`if s.ch == '=' {`
501			`s.next()`
502			`return tok1`
503			`}`
504			`if s.ch == ch2 {`
505			`s.next()`
506			`return tok2`
507			`}`
508			`return tok0`
509			`}`
510
511			`func (s *Scanner) switch4(tok0, tok1 token.Token, ch2 rune, tok2, tok3 token.Token) token.Token {`
512			`if s.ch == '=' {`
513			`s.next()`
514			`return tok1`
515			`}`
516			`if s.ch == ch2 {`
517			`s.next()`
518			`if s.ch == '=' {`
519			`s.next()`
520			`return tok3`
521			`}`
522			`return tok2`
523			`}`
524			`return tok0`
525			`}`
526
527			`// Scan scans the next token and returns the token position, the token,`
528			`// and its literal string if applicable. The source end is indicated by`
529			`// token.EOF.`
530			`//`
531			`// If the returned token is a literal (token.IDENT, token.INT, token.FLOAT,`
532			`// token.IMAG, token.CHAR, token.STRING) or token.COMMENT, the literal string`
533			`// has the corresponding value.`
534			`//`
535			`// If the returned token is token.SEMICOLON, the corresponding`
536			`// literal string is ";" if the semicolon was present in the source,`
537			`// and "\n" if the semicolon was inserted because of a newline or`
538			`// at EOF.`
539			`//`
540			`// If the returned token is token.ILLEGAL, the literal string is the`
541			`// offending character.`
542			`//`
543			`// In all other cases, Scan returns an empty literal string.`
544			`//`
545			`// For more tolerant parsing, Scan will return a valid token if`
546			`// possible even if a syntax error was encountered. Thus, even`
547			`// if the resulting token sequence contains no illegal tokens,`
548			`// a client may not assume that no error occurred. Instead it`
549			`// must check the scanner's ErrorCount or the number of calls`
550			`// of the error handler, if there was one installed.`
551			`//`
552			`// Scan adds line information to the file added to the file`
553			`// set with Init. Token positions are relative to that file`
554			`// and thus relative to the file set.`
555			`//`
556			`func (s *Scanner) Scan() (pos token.Pos, tok token.Token, lit string) {`
557			`scanAgain:`
558			`s.skipWhitespace()`
559
560			`// current token start`
561			`pos = s.file.Pos(s.offset)`
562
563			`// determine token value`
564			`insertSemi := false`
565			`switch ch := s.ch; {`
566			`case isLetter(ch):`
567			`lit = s.scanIdentifier()`
568			`tok = token.Lookup(lit)`
569			`switch tok {`
570			`case token.IDENT, token.BREAK, token.CONTINUE, token.FALLTHROUGH, token.RETURN:`
571			`insertSemi = true`
572			`}`
573			`case digitVal(ch) < 10:`
574			`insertSemi = true`
575			`tok, lit = s.scanNumber(false)`
576			`default:`
577			`s.next() // always make progress`
578			`switch ch {`
579			`case -1:`
580			`if s.insertSemi {`
581			`s.insertSemi = false // EOF consumed`
582			`return pos, token.SEMICOLON, "\n"`
583			`}`
584			`tok = token.EOF`
585			`case '\n':`
586			`// we only reach here if s.insertSemi was`
587			`// set in the first place and exited early`
588			`// from s.skipWhitespace()`
589			`s.insertSemi = false // newline consumed`
590			`return pos, token.SEMICOLON, "\n"`
591			`case '"':`
592			`insertSemi = true`
593			`tok = token.STRING`
594			`lit = s.scanString()`
595			`case '\'':`
596			`insertSemi = true`
597			`tok = token.CHAR`
598			`lit = s.scanChar()`
599			case '`':
600			`insertSemi = true`
601			`tok = token.STRING`
602			`lit = s.scanRawString()`
603			`case ':':`
604			`tok = s.switch2(token.COLON, token.DEFINE)`
605			`case '.':`
606			`if digitVal(s.ch) < 10 {`
607			`insertSemi = true`
608			`tok, lit = s.scanNumber(true)`
609			`} else if s.ch == '.' {`
610			`s.next()`
611			`if s.ch == '.' {`
612			`s.next()`
613			`tok = token.ELLIPSIS`
614			`}`
615			`} else {`
616			`tok = token.PERIOD`
617			`}`
618			`case ',':`
619			`tok = token.COMMA`
620			`case ';':`
621			`tok = token.SEMICOLON`
622			`lit = ";"`
623			`case '(':`
624			`tok = token.LPAREN`
625			`case ')':`
626			`insertSemi = true`
627			`tok = token.RPAREN`
628			`case '[':`
629			`tok = token.LBRACK`
630			`case ']':`
631			`insertSemi = true`
632			`tok = token.RBRACK`
633			`case '{':`
634			`tok = token.LBRACE`
635			`case '}':`
636			`insertSemi = true`
637			`tok = token.RBRACE`
638			`case '+':`
639			`tok = s.switch3(token.ADD, token.ADD_ASSIGN, '+', token.INC)`
640			`if tok == token.INC {`
641			`insertSemi = true`
642			`}`
643			`case '-':`
644			`tok = s.switch3(token.SUB, token.SUB_ASSIGN, '-', token.DEC)`
645			`if tok == token.DEC {`
646			`insertSemi = true`
647			`}`
648			`case '*':`
649			`tok = s.switch2(token.MUL, token.MUL_ASSIGN)`
650			`case '/':`
651			`if s.ch == '/' \|\| s.ch == '*' {`
652			`// comment`
653			`if s.insertSemi && s.findLineEnd() {`
654			`// reset position to the beginning of the comment`
655			`s.ch = '/'`
656			`s.offset = s.file.Offset(pos)`
657			`s.rdOffset = s.offset + 1`
658			`s.insertSemi = false // newline consumed`
659			`return pos, token.SEMICOLON, "\n"`
660			`}`
661			`lit = s.scanComment()`
662			`if s.mode&ScanComments == 0 {`
663			`// skip comment`
664			`s.insertSemi = false // newline consumed`
665			`goto scanAgain`
666			`}`
667			`tok = token.COMMENT`
668			`} else {`
669			`tok = s.switch2(token.QUO, token.QUO_ASSIGN)`
670			`}`
671			`case '%':`
672			`tok = s.switch2(token.REM, token.REM_ASSIGN)`
673			`case '^':`
674			`tok = s.switch2(token.XOR, token.XOR_ASSIGN)`
675			`case '<':`
676			`if s.ch == '-' {`
677			`s.next()`
678			`tok = token.ARROW`
679			`} else {`
680			`tok = s.switch4(token.LSS, token.LEQ, '<', token.SHL, token.SHL_ASSIGN)`
681			`}`
682			`case '>':`
683			`tok = s.switch4(token.GTR, token.GEQ, '>', token.SHR, token.SHR_ASSIGN)`
684			`case '=':`
685			`tok = s.switch2(token.ASSIGN, token.EQL)`
686			`case '!':`
687			`tok = s.switch2(token.NOT, token.NEQ)`
688			`case '&':`
689			`if s.ch == '^' {`
690			`s.next()`
691			`tok = s.switch2(token.AND_NOT, token.AND_NOT_ASSIGN)`
692			`} else {`
693			`tok = s.switch3(token.AND, token.AND_ASSIGN, '&', token.LAND)`
694			`}`
695			`case '\|':`
696			`tok = s.switch3(token.OR, token.OR_ASSIGN, '\|', token.LOR)`
697			`default:`
698			`s.error(s.file.Offset(pos), fmt.Sprintf("illegal character %#U", ch))`
699			`insertSemi = s.insertSemi // preserve insertSemi info`
700			`tok = token.ILLEGAL`
701			`lit = string(ch)`
702			`}`
703			`}`
704			`if s.mode&dontInsertSemis == 0 {`
705			`s.insertSemi = insertSemi`
706			`}`
707
708			`return`
709			`}`