URL https://opencores.org/ocsvn/openrisc/openrisc/trunk

Subversion Repositories openrisc

[/] [openrisc/] [trunk/] [gnu-dev/] [or1k-gcc/] [libgo/] [go/] [exp/] [ebnf/] [ebnf.go] - Blame information for rev 747

Details | Compare with Previous | View Log


// Copyright 2009 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
 
// Package ebnf is a library for EBNF grammars. The input is text ([]byte)
// satisfying the following grammar (represented itself in EBNF):
//
//      Production  = name "=" [ Expression ] "." .
//      Expression  = Alternative { "|" Alternative } .
//      Alternative = Term { Term } .
//      Term        = name | token [ "…" token ] | Group | Option | Repetition .
//      Group       = "(" Expression ")" .
//      Option      = "[" Expression "]" .
//      Repetition  = "{" Expression "}" .
//
// A name is a Go identifier, a token is a Go string, and comments
// and white space follow the same rules as for the Go language.
// Production names starting with an uppercase Unicode letter denote
// non-terminal productions (i.e., productions which allow white-space
// and comments between tokens); all other production names denote
// lexical productions.
//
package ebnf
 
import (
        "errors"
        "fmt"
        "text/scanner"
        "unicode"
        "unicode/utf8"
)
 
// ----------------------------------------------------------------------------
// Error handling
 
type errorList []error
 
func (list errorList) Err() error {
        if len(list) == 0 {
                return nil
        }
        return list
}
 
func (list errorList) Error() string {
        switch len(list) {
        case 0:
                return "no errors"
        case 1:
                return list[0].Error()
        }
        return fmt.Sprintf("%s (and %d more errors)", list[0], len(list)-1)
}
 
func newError(pos scanner.Position, msg string) error {
        return errors.New(fmt.Sprintf("%s: %s", pos, msg))
}
 
// ----------------------------------------------------------------------------
// Internal representation
 
type (
        // An Expression node represents a production expression.
        Expression interface {
                // Pos is the position of the first character of the syntactic construct
                Pos() scanner.Position
        }
 
        // An Alternative node represents a non-empty list of alternative expressions.
        Alternative []Expression // x | y | z
 
        // A Sequence node represents a non-empty list of sequential expressions.
        Sequence []Expression // x y z
 
        // A Name node represents a production name.
        Name struct {
                StringPos scanner.Position
                String    string
        }
 
        // A Token node represents a literal.
        Token struct {
                StringPos scanner.Position
                String    string
        }
 
        // A List node represents a range of characters.
        Range struct {
                Begin, End *Token // begin ... end
        }
 
        // A Group node represents a grouped expression.
        Group struct {
                Lparen scanner.Position
                Body   Expression // (body)
        }
 
        // An Option node represents an optional expression.
        Option struct {
                Lbrack scanner.Position
                Body   Expression // [body]
        }
 
        // A Repetition node represents a repeated expression.
        Repetition struct {
                Lbrace scanner.Position
                Body   Expression // {body}
        }
 
        // A Production node represents an EBNF production.
        Production struct {
                Name *Name
                Expr Expression
        }
 
        // A Bad node stands for pieces of source code that lead to a parse error.
        Bad struct {
                TokPos scanner.Position
                Error  string // parser error message
        }
 
        // A Grammar is a set of EBNF productions. The map
        // is indexed by production name.
        //
        Grammar map[string]*Production
)
 
func (x Alternative) Pos() scanner.Position { return x[0].Pos() } // the parser always generates non-empty Alternative
func (x Sequence) Pos() scanner.Position    { return x[0].Pos() } // the parser always generates non-empty Sequences
func (x *Name) Pos() scanner.Position       { return x.StringPos }
func (x *Token) Pos() scanner.Position      { return x.StringPos }
func (x *Range) Pos() scanner.Position      { return x.Begin.Pos() }
func (x *Group) Pos() scanner.Position      { return x.Lparen }
func (x *Option) Pos() scanner.Position     { return x.Lbrack }
func (x *Repetition) Pos() scanner.Position { return x.Lbrace }
func (x *Production) Pos() scanner.Position { return x.Name.Pos() }
func (x *Bad) Pos() scanner.Position        { return x.TokPos }
 
// ----------------------------------------------------------------------------
// Grammar verification
 
func isLexical(name string) bool {
        ch, _ := utf8.DecodeRuneInString(name)
        return !unicode.IsUpper(ch)
}
 
type verifier struct {
        errors   errorList
        worklist []*Production
        reached  Grammar // set of productions reached from (and including) the root production
        grammar  Grammar
}
 
func (v *verifier) error(pos scanner.Position, msg string) {
        v.errors = append(v.errors, newError(pos, msg))
}
 
func (v *verifier) push(prod *Production) {
        name := prod.Name.String
        if _, found := v.reached[name]; !found {
                v.worklist = append(v.worklist, prod)
                v.reached[name] = prod
        }
}
 
func (v *verifier) verifyChar(x *Token) rune {
        s := x.String
        if utf8.RuneCountInString(s) != 1 {
                v.error(x.Pos(), "single char expected, found "+s)
                return 0
        }
        ch, _ := utf8.DecodeRuneInString(s)
        return ch
}
 
func (v *verifier) verifyExpr(expr Expression, lexical bool) {
        switch x := expr.(type) {
        case nil:
                // empty expression
        case Alternative:
                for _, e := range x {
                        v.verifyExpr(e, lexical)
                }
        case Sequence:
                for _, e := range x {
                        v.verifyExpr(e, lexical)
                }
        case *Name:
                // a production with this name must exist;
                // add it to the worklist if not yet processed
                if prod, found := v.grammar[x.String]; found {
                        v.push(prod)
                } else {
                        v.error(x.Pos(), "missing production "+x.String)
                }
                // within a lexical production references
                // to non-lexical productions are invalid
                if lexical && !isLexical(x.String) {
                        v.error(x.Pos(), "reference to non-lexical production "+x.String)
                }
        case *Token:
                // nothing to do for now
        case *Range:
                i := v.verifyChar(x.Begin)
                j := v.verifyChar(x.End)
                if i >= j {
                        v.error(x.Pos(), "decreasing character range")
                }
        case *Group:
                v.verifyExpr(x.Body, lexical)
        case *Option:
                v.verifyExpr(x.Body, lexical)
        case *Repetition:
                v.verifyExpr(x.Body, lexical)
        case *Bad:
                v.error(x.Pos(), x.Error)
        default:
                panic(fmt.Sprintf("internal error: unexpected type %T", expr))
        }
}
 
func (v *verifier) verify(grammar Grammar, start string) {
        // find root production
        root, found := grammar[start]
        if !found {
                var noPos scanner.Position
                v.error(noPos, "no start production "+start)
                return
        }
 
        // initialize verifier
        v.worklist = v.worklist[0:0]
        v.reached = make(Grammar)
        v.grammar = grammar
 
        // work through the worklist
        v.push(root)
        for {
                n := len(v.worklist) - 1
                if n < 0 {
                        break
                }
                prod := v.worklist[n]
                v.worklist = v.worklist[0:n]
                v.verifyExpr(prod.Expr, isLexical(prod.Name.String))
        }
 
        // check if all productions were reached
        if len(v.reached) < len(v.grammar) {
                for name, prod := range v.grammar {
                        if _, found := v.reached[name]; !found {
                                v.error(prod.Pos(), name+" is unreachable")
                        }
                }
        }
}
 
// Verify checks that:
//      - all productions used are defined
//      - all productions defined are used when beginning at start
//      - lexical productions refer only to other lexical productions
//
// Position information is interpreted relative to the file set fset.
//
func Verify(grammar Grammar, start string) error {
        var v verifier
        v.verify(grammar, start)
        return v.errors.Err()
}

Browse

Tools

Subversion Repositories openrisc

[/] [openrisc/] [trunk/] [gnu-dev/] [or1k-gcc/] [libgo/] [go/] [exp/] [ebnf/] [ebnf.go] - Blame information for rev 747

Line No.	Rev	Author	Line
1	747	jeremybenn	`// Copyright 2009 The Go Authors. All rights reserved.`
2			`// Use of this source code is governed by a BSD-style`
3			`// license that can be found in the LICENSE file.`
4
5			`// Package ebnf is a library for EBNF grammars. The input is text ([]byte)`
6			`// satisfying the following grammar (represented itself in EBNF):`
7			`//`
8			`// Production = name "=" [ Expression ] "." .`
9			`// Expression = Alternative { "\|" Alternative } .`
10			`// Alternative = Term { Term } .`
11			`// Term = name \| token [ "…" token ] \| Group \| Option \| Repetition .`
12			`// Group = "(" Expression ")" .`
13			`// Option = "[" Expression "]" .`
14			`// Repetition = "{" Expression "}" .`
15			`//`
16			`// A name is a Go identifier, a token is a Go string, and comments`
17			`// and white space follow the same rules as for the Go language.`
18			`// Production names starting with an uppercase Unicode letter denote`
19			`// non-terminal productions (i.e., productions which allow white-space`
20			`// and comments between tokens); all other production names denote`
21			`// lexical productions.`
22			`//`
23			`package ebnf`
24
25			`import (`
26			`"errors"`
27			`"fmt"`
28			`"text/scanner"`
29			`"unicode"`
30			`"unicode/utf8"`
31			`)`
32
33			`// ----------------------------------------------------------------------------`
34			`// Error handling`
35
36			`type errorList []error`
37
38			`func (list errorList) Err() error {`
39			`if len(list) == 0 {`
40			`return nil`
41			`}`
42			`return list`
43			`}`
44
45			`func (list errorList) Error() string {`
46			`switch len(list) {`
47			`case 0:`
48			`return "no errors"`
49			`case 1:`
50			`return list[0].Error()`
51			`}`
52			`return fmt.Sprintf("%s (and %d more errors)", list[0], len(list)-1)`
53			`}`
54
55			`func newError(pos scanner.Position, msg string) error {`
56			`return errors.New(fmt.Sprintf("%s: %s", pos, msg))`
57			`}`
58
59			`// ----------------------------------------------------------------------------`
60			`// Internal representation`
61
62			`type (`
63			`// An Expression node represents a production expression.`
64			`Expression interface {`
65			`// Pos is the position of the first character of the syntactic construct`
66			`Pos() scanner.Position`
67			`}`
68
69			`// An Alternative node represents a non-empty list of alternative expressions.`
70			`Alternative []Expression // x \| y \| z`
71
72			`// A Sequence node represents a non-empty list of sequential expressions.`
73			`Sequence []Expression // x y z`
74
75			`// A Name node represents a production name.`
76			`Name struct {`
77			`StringPos scanner.Position`
78			`String string`
79			`}`
80
81			`// A Token node represents a literal.`
82			`Token struct {`
83			`StringPos scanner.Position`
84			`String string`
85			`}`
86
87			`// A List node represents a range of characters.`
88			`Range struct {`
89			`Begin, End *Token // begin ... end`
90			`}`
91
92			`// A Group node represents a grouped expression.`
93			`Group struct {`
94			`Lparen scanner.Position`
95			`Body Expression // (body)`
96			`}`
97
98			`// An Option node represents an optional expression.`
99			`Option struct {`
100			`Lbrack scanner.Position`
101			`Body Expression // [body]`
102			`}`
103
104			`// A Repetition node represents a repeated expression.`
105			`Repetition struct {`
106			`Lbrace scanner.Position`
107			`Body Expression // {body}`
108			`}`
109
110			`// A Production node represents an EBNF production.`
111			`Production struct {`
112			`Name *Name`
113			`Expr Expression`
114			`}`
115
116			`// A Bad node stands for pieces of source code that lead to a parse error.`
117			`Bad struct {`
118			`TokPos scanner.Position`
119			`Error string // parser error message`
120			`}`
121
122			`// A Grammar is a set of EBNF productions. The map`
123			`// is indexed by production name.`
124			`//`
125			`Grammar map[string]*Production`
126			`)`
127
128			`func (x Alternative) Pos() scanner.Position { return x[0].Pos() } // the parser always generates non-empty Alternative`
129			`func (x Sequence) Pos() scanner.Position { return x[0].Pos() } // the parser always generates non-empty Sequences`
130			`func (x *Name) Pos() scanner.Position { return x.StringPos }`
131			`func (x *Token) Pos() scanner.Position { return x.StringPos }`
132			`func (x *Range) Pos() scanner.Position { return x.Begin.Pos() }`
133			`func (x *Group) Pos() scanner.Position { return x.Lparen }`
134			`func (x *Option) Pos() scanner.Position { return x.Lbrack }`
135			`func (x *Repetition) Pos() scanner.Position { return x.Lbrace }`
136			`func (x *Production) Pos() scanner.Position { return x.Name.Pos() }`
137			`func (x *Bad) Pos() scanner.Position { return x.TokPos }`
138
139			`// ----------------------------------------------------------------------------`
140			`// Grammar verification`
141
142			`func isLexical(name string) bool {`
143			`ch, _ := utf8.DecodeRuneInString(name)`
144			`return !unicode.IsUpper(ch)`
145			`}`
146
147			`type verifier struct {`
148			`errors errorList`
149			`worklist []*Production`
150			`reached Grammar // set of productions reached from (and including) the root production`
151			`grammar Grammar`
152			`}`
153
154			`func (v *verifier) error(pos scanner.Position, msg string) {`
155			`v.errors = append(v.errors, newError(pos, msg))`
156			`}`
157
158			`func (v verifier) push(prod Production) {`
159			`name := prod.Name.String`
160			`if _, found := v.reached[name]; !found {`
161			`v.worklist = append(v.worklist, prod)`
162			`v.reached[name] = prod`
163			`}`
164			`}`
165
166			`func (v verifier) verifyChar(x Token) rune {`
167			`s := x.String`
168			`if utf8.RuneCountInString(s) != 1 {`
169			`v.error(x.Pos(), "single char expected, found "+s)`
170			`return 0`
171			`}`
172			`ch, _ := utf8.DecodeRuneInString(s)`
173			`return ch`
174			`}`
175
176			`func (v *verifier) verifyExpr(expr Expression, lexical bool) {`
177			`switch x := expr.(type) {`
178			`case nil:`
179			`// empty expression`
180			`case Alternative:`
181			`for _, e := range x {`
182			`v.verifyExpr(e, lexical)`
183			`}`
184			`case Sequence:`
185			`for _, e := range x {`
186			`v.verifyExpr(e, lexical)`
187			`}`
188			`case *Name:`
189			`// a production with this name must exist;`
190			`// add it to the worklist if not yet processed`
191			`if prod, found := v.grammar[x.String]; found {`
192			`v.push(prod)`
193			`} else {`
194			`v.error(x.Pos(), "missing production "+x.String)`
195			`}`
196			`// within a lexical production references`
197			`// to non-lexical productions are invalid`
198			`if lexical && !isLexical(x.String) {`
199			`v.error(x.Pos(), "reference to non-lexical production "+x.String)`
200			`}`
201			`case *Token:`
202			`// nothing to do for now`
203			`case *Range:`
204			`i := v.verifyChar(x.Begin)`
205			`j := v.verifyChar(x.End)`
206			`if i >= j {`
207			`v.error(x.Pos(), "decreasing character range")`
208			`}`
209			`case *Group:`
210			`v.verifyExpr(x.Body, lexical)`
211			`case *Option:`
212			`v.verifyExpr(x.Body, lexical)`
213			`case *Repetition:`
214			`v.verifyExpr(x.Body, lexical)`
215			`case *Bad:`
216			`v.error(x.Pos(), x.Error)`
217			`default:`
218			`panic(fmt.Sprintf("internal error: unexpected type %T", expr))`
219			`}`
220			`}`
221
222			`func (v *verifier) verify(grammar Grammar, start string) {`
223			`// find root production`
224			`root, found := grammar[start]`
225			`if !found {`
226			`var noPos scanner.Position`
227			`v.error(noPos, "no start production "+start)`
228			`return`
229			`}`
230
231			`// initialize verifier`
232			`v.worklist = v.worklist[0:0]`
233			`v.reached = make(Grammar)`
234			`v.grammar = grammar`
235
236			`// work through the worklist`
237			`v.push(root)`
238			`for {`
239			`n := len(v.worklist) - 1`
240			`if n < 0 {`
241			`break`
242			`}`
243			`prod := v.worklist[n]`
244			`v.worklist = v.worklist[0:n]`
245			`v.verifyExpr(prod.Expr, isLexical(prod.Name.String))`
246			`}`
247
248			`// check if all productions were reached`
249			`if len(v.reached) < len(v.grammar) {`
250			`for name, prod := range v.grammar {`
251			`if _, found := v.reached[name]; !found {`
252			`v.error(prod.Pos(), name+" is unreachable")`
253			`}`
254			`}`
255			`}`
256			`}`
257
258			`// Verify checks that:`
259			`// - all productions used are defined`
260			`// - all productions defined are used when beginning at start`
261			`// - lexical productions refer only to other lexical productions`
262			`//`
263			`// Position information is interpreted relative to the file set fset.`
264			`//`
265			`func Verify(grammar Grammar, start string) error {`
266			`var v verifier`
267			`v.verify(grammar, start)`
268			`return v.errors.Err()`
269			`}`