OpenCores
URL https://opencores.org/ocsvn/openrisc/openrisc/trunk

Subversion Repositories openrisc

[/] [openrisc/] [trunk/] [gnu-dev/] [or1k-gcc/] [libgo/] [go/] [html/] [template/] [transition.go] - Blame information for rev 747

Details | Compare with Previous | View Log

Line No. Rev Author Line
1 747 jeremybenn
// Copyright 2011 The Go Authors. All rights reserved.
2
// Use of this source code is governed by a BSD-style
3
// license that can be found in the LICENSE file.
4
 
5
package template
6
 
7
import (
8
        "bytes"
9
        "strings"
10
)
11
 
12
// transitionFunc is the array of context transition functions for text nodes.
13
// A transition function takes a context and template text input, and returns
14
// the updated context and the number of bytes consumed from the front of the
15
// input.
16
var transitionFunc = [...]func(context, []byte) (context, int){
17
        stateText:        tText,
18
        stateTag:         tTag,
19
        stateAttrName:    tAttrName,
20
        stateAfterName:   tAfterName,
21
        stateBeforeValue: tBeforeValue,
22
        stateHTMLCmt:     tHTMLCmt,
23
        stateRCDATA:      tSpecialTagEnd,
24
        stateAttr:        tAttr,
25
        stateURL:         tURL,
26
        stateJS:          tJS,
27
        stateJSDqStr:     tJSDelimited,
28
        stateJSSqStr:     tJSDelimited,
29
        stateJSRegexp:    tJSDelimited,
30
        stateJSBlockCmt:  tBlockCmt,
31
        stateJSLineCmt:   tLineCmt,
32
        stateCSS:         tCSS,
33
        stateCSSDqStr:    tCSSStr,
34
        stateCSSSqStr:    tCSSStr,
35
        stateCSSDqURL:    tCSSStr,
36
        stateCSSSqURL:    tCSSStr,
37
        stateCSSURL:      tCSSStr,
38
        stateCSSBlockCmt: tBlockCmt,
39
        stateCSSLineCmt:  tLineCmt,
40
        stateError:       tError,
41
}
42
 
43
var commentStart = []byte("")
45
 
46
// tText is the context transition function for the text state.
47
func tText(c context, s []byte) (context, int) {
48
        k := 0
49
        for {
50
                i := k + bytes.IndexByte(s[k:], '<')
51
                if i < k || i+1 == len(s) {
52
                        return c, len(s)
53
                } else if i+4 <= len(s) && bytes.Equal(commentStart, s[i:i+4]) {
54
                        return context{state: stateHTMLCmt}, i + 4
55
                }
56
                i++
57
                end := false
58
                if s[i] == '/' {
59
                        if i+1 == len(s) {
60
                                return c, len(s)
61
                        }
62
                        end, i = true, i+1
63
                }
64
                j, e := eatTagName(s, i)
65
                if j != i {
66
                        if end {
67
                                e = elementNone
68
                        }
69
                        // We've found an HTML tag.
70
                        return context{state: stateTag, element: e}, j
71
                }
72
                k = j
73
        }
74
        panic("unreachable")
75
}
76
 
77
var elementContentType = [...]state{
78
        elementNone:     stateText,
79
        elementScript:   stateJS,
80
        elementStyle:    stateCSS,
81
        elementTextarea: stateRCDATA,
82
        elementTitle:    stateRCDATA,
83
}
84
 
85
// tTag is the context transition function for the tag state.
86
func tTag(c context, s []byte) (context, int) {
87
        // Find the attribute name.
88
        i := eatWhiteSpace(s, 0)
89
        if i == len(s) {
90
                return c, len(s)
91
        }
92
        if s[i] == '>' {
93
                return context{
94
                        state:   elementContentType[c.element],
95
                        element: c.element,
96
                }, i + 1
97
        }
98
        j, err := eatAttrName(s, i)
99
        if err != nil {
100
                return context{state: stateError, err: err}, len(s)
101
        }
102
        state, attr := stateTag, attrNone
103
        if i == j {
104
                return context{
105
                        state: stateError,
106
                        err:   errorf(ErrBadHTML, 0, "expected space, attr name, or end of tag, but got %q", s[i:]),
107
                }, len(s)
108
        }
109
        switch attrType(string(s[i:j])) {
110
        case contentTypeURL:
111
                attr = attrURL
112
        case contentTypeCSS:
113
                attr = attrStyle
114
        case contentTypeJS:
115
                attr = attrScript
116
        }
117
        if j == len(s) {
118
                state = stateAttrName
119
        } else {
120
                state = stateAfterName
121
        }
122
        return context{state: state, element: c.element, attr: attr}, j
123
}
124
 
125
// tAttrName is the context transition function for stateAttrName.
126
func tAttrName(c context, s []byte) (context, int) {
127
        i, err := eatAttrName(s, 0)
128
        if err != nil {
129
                return context{state: stateError, err: err}, len(s)
130
        } else if i != len(s) {
131
                c.state = stateAfterName
132
        }
133
        return c, i
134
}
135
 
136
// tAfterName is the context transition function for stateAfterName.
137
func tAfterName(c context, s []byte) (context, int) {
138
        // Look for the start of the value.
139
        i := eatWhiteSpace(s, 0)
140
        if i == len(s) {
141
                return c, len(s)
142
        } else if s[i] != '=' {
143
                // Occurs due to tag ending '>', and valueless attribute.
144
                c.state = stateTag
145
                return c, i
146
        }
147
        c.state = stateBeforeValue
148
        // Consume the "=".
149
        return c, i + 1
150
}
151
 
152
var attrStartStates = [...]state{
153
        attrNone:   stateAttr,
154
        attrScript: stateJS,
155
        attrStyle:  stateCSS,
156
        attrURL:    stateURL,
157
}
158
 
159
// tBeforeValue is the context transition function for stateBeforeValue.
160
func tBeforeValue(c context, s []byte) (context, int) {
161
        i := eatWhiteSpace(s, 0)
162
        if i == len(s) {
163
                return c, len(s)
164
        }
165
        // Find the attribute delimiter.
166
        delim := delimSpaceOrTagEnd
167
        switch s[i] {
168
        case '\'':
169
                delim, i = delimSingleQuote, i+1
170
        case '"':
171
                delim, i = delimDoubleQuote, i+1
172
        }
173
        c.state, c.delim, c.attr = attrStartStates[c.attr], delim, attrNone
174
        return c, i
175
}
176
 
177
// tHTMLCmt is the context transition function for stateHTMLCmt.
178
func tHTMLCmt(c context, s []byte) (context, int) {
179
        if i := bytes.Index(s, commentEnd); i != -1 {
180
                return context{}, i + 3
181
        }
182
        return c, len(s)
183
}
184
 
185
// specialTagEndMarkers maps element types to the character sequence that
186
// case-insensitively signals the end of the special tag body.
187
var specialTagEndMarkers = [...]string{
188
        elementScript:   "
189
        elementStyle:    "
190
        elementTextarea: "
191
        elementTitle:    "
192
}
193
 
194
// tSpecialTagEnd is the context transition function for raw text and RCDATA
195
// element states.
196
func tSpecialTagEnd(c context, s []byte) (context, int) {
197
        if c.element != elementNone {
198
                if i := strings.Index(strings.ToLower(string(s)), specialTagEndMarkers[c.element]); i != -1 {
199
                        return context{}, i
200
                }
201
        }
202
        return c, len(s)
203
}
204
 
205
// tAttr is the context transition function for the attribute state.
206
func tAttr(c context, s []byte) (context, int) {
207
        return c, len(s)
208
}
209
 
210
// tURL is the context transition function for the URL state.
211
func tURL(c context, s []byte) (context, int) {
212
        if bytes.IndexAny(s, "#?") >= 0 {
213
                c.urlPart = urlPartQueryOrFrag
214
        } else if len(s) != eatWhiteSpace(s, 0) && c.urlPart == urlPartNone {
215
                // HTML5 uses "Valid URL potentially surrounded by spaces" for
216
                // attrs: http://www.w3.org/TR/html5/index.html#attributes-1
217
                c.urlPart = urlPartPreQuery
218
        }
219
        return c, len(s)
220
}
221
 
222
// tJS is the context transition function for the JS state.
223
func tJS(c context, s []byte) (context, int) {
224
        i := bytes.IndexAny(s, `"'/`)
225
        if i == -1 {
226
                // Entire input is non string, comment, regexp tokens.
227
                c.jsCtx = nextJSCtx(s, c.jsCtx)
228
                return c, len(s)
229
        }
230
        c.jsCtx = nextJSCtx(s[:i], c.jsCtx)
231
        switch s[i] {
232
        case '"':
233
                c.state, c.jsCtx = stateJSDqStr, jsCtxRegexp
234
        case '\'':
235
                c.state, c.jsCtx = stateJSSqStr, jsCtxRegexp
236
        case '/':
237
                switch {
238
                case i+1 < len(s) && s[i+1] == '/':
239
                        c.state, i = stateJSLineCmt, i+1
240
                case i+1 < len(s) && s[i+1] == '*':
241
                        c.state, i = stateJSBlockCmt, i+1
242
                case c.jsCtx == jsCtxRegexp:
243
                        c.state = stateJSRegexp
244
                case c.jsCtx == jsCtxDivOp:
245
                        c.jsCtx = jsCtxRegexp
246
                default:
247
                        return context{
248
                                state: stateError,
249
                                err:   errorf(ErrSlashAmbig, 0, "'/' could start a division or regexp: %.32q", s[i:]),
250
                        }, len(s)
251
                }
252
        default:
253
                panic("unreachable")
254
        }
255
        return c, i + 1
256
}
257
 
258
// tJSDelimited is the context transition function for the JS string and regexp
259
// states.
260
func tJSDelimited(c context, s []byte) (context, int) {
261
        specials := `\"`
262
        switch c.state {
263
        case stateJSSqStr:
264
                specials = `\'`
265
        case stateJSRegexp:
266
                specials = `\/[]`
267
        }
268
 
269
        k, inCharset := 0, false
270
        for {
271
                i := k + bytes.IndexAny(s[k:], specials)
272
                if i < k {
273
                        break
274
                }
275
                switch s[i] {
276
                case '\\':
277
                        i++
278
                        if i == len(s) {
279
                                return context{
280
                                        state: stateError,
281
                                        err:   errorf(ErrPartialEscape, 0, "unfinished escape sequence in JS string: %q", s),
282
                                }, len(s)
283
                        }
284
                case '[':
285
                        inCharset = true
286
                case ']':
287
                        inCharset = false
288
                default:
289
                        // end delimiter
290
                        if !inCharset {
291
                                c.state, c.jsCtx = stateJS, jsCtxDivOp
292
                                return c, i + 1
293
                        }
294
                }
295
                k = i + 1
296
        }
297
 
298
        if inCharset {
299
                // This can be fixed by making context richer if interpolation
300
                // into charsets is desired.
301
                return context{
302
                        state: stateError,
303
                        err:   errorf(ErrPartialCharset, 0, "unfinished JS regexp charset: %q", s),
304
                }, len(s)
305
        }
306
 
307
        return c, len(s)
308
}
309
 
310
var blockCommentEnd = []byte("*/")
311
 
312
// tBlockCmt is the context transition function for /*comment*/ states.
313
func tBlockCmt(c context, s []byte) (context, int) {
314
        i := bytes.Index(s, blockCommentEnd)
315
        if i == -1 {
316
                return c, len(s)
317
        }
318
        switch c.state {
319
        case stateJSBlockCmt:
320
                c.state = stateJS
321
        case stateCSSBlockCmt:
322
                c.state = stateCSS
323
        default:
324
                panic(c.state.String())
325
        }
326
        return c, i + 2
327
}
328
 
329
// tLineCmt is the context transition function for //comment states.
330
func tLineCmt(c context, s []byte) (context, int) {
331
        var lineTerminators string
332
        var endState state
333
        switch c.state {
334
        case stateJSLineCmt:
335
                lineTerminators, endState = "\n\r\u2028\u2029", stateJS
336
        case stateCSSLineCmt:
337
                lineTerminators, endState = "\n\f\r", stateCSS
338
                // Line comments are not part of any published CSS standard but
339
                // are supported by the 4 major browsers.
340
                // This defines line comments as
341
                //     LINECOMMENT ::= "//" [^\n\f\d]*
342
                // since http://www.w3.org/TR/css3-syntax/#SUBTOK-nl defines
343
                // newlines:
344
                //     nl ::= #xA | #xD #xA | #xD | #xC
345
        default:
346
                panic(c.state.String())
347
        }
348
 
349
        i := bytes.IndexAny(s, lineTerminators)
350
        if i == -1 {
351
                return c, len(s)
352
        }
353
        c.state = endState
354
        // Per section 7.4 of EcmaScript 5 : http://es5.github.com/#x7.4
355
        // "However, the LineTerminator at the end of the line is not
356
        // considered to be part of the single-line comment; it is
357
        // recognized separately by the lexical grammar and becomes part
358
        // of the stream of input elements for the syntactic grammar."
359
        return c, i
360
}
361
 
362
// tCSS is the context transition function for the CSS state.
363
func tCSS(c context, s []byte) (context, int) {
364
        // CSS quoted strings are almost never used except for:
365
        // (1) URLs as in background: "/foo.png"
366
        // (2) Multiword font-names as in font-family: "Times New Roman"
367
        // (3) List separators in content values as in inline-lists:
368
        //    
374
        //    
  • One
  • Two
  • Three
375
        // (4) Attribute value selectors as in a[href="http://example.com/"]
376
        //
377
        // We conservatively treat all strings as URLs, but make some
378
        // allowances to avoid confusion.
379
        //
380
        // In (1), our conservative assumption is justified.
381
        // In (2), valid font names do not contain ':', '?', or '#', so our
382
        // conservative assumption is fine since we will never transition past
383
        // urlPartPreQuery.
384
        // In (3), our protocol heuristic should not be tripped, and there
385
        // should not be non-space content after a '?' or '#', so as long as
386
        // we only %-encode RFC 3986 reserved characters we are ok.
387
        // In (4), we should URL escape for URL attributes, and for others we
388
        // have the attribute name available if our conservative assumption
389
        // proves problematic for real code.
390
 
391
        k := 0
392
        for {
393
                i := k + bytes.IndexAny(s[k:], `("'/`)
394
                if i < k {
395
                        return c, len(s)
396
                }
397
                switch s[i] {
398
                case '(':
399
                        // Look for url to the left.
400
                        p := bytes.TrimRight(s[:i], "\t\n\f\r ")
401
                        if endsWithCSSKeyword(p, "url") {
402
                                j := len(s) - len(bytes.TrimLeft(s[i+1:], "\t\n\f\r "))
403
                                switch {
404
                                case j != len(s) && s[j] == '"':
405
                                        c.state, j = stateCSSDqURL, j+1
406
                                case j != len(s) && s[j] == '\'':
407
                                        c.state, j = stateCSSSqURL, j+1
408
                                default:
409
                                        c.state = stateCSSURL
410
                                }
411
                                return c, j
412
                        }
413
                case '/':
414
                        if i+1 < len(s) {
415
                                switch s[i+1] {
416
                                case '/':
417
                                        c.state = stateCSSLineCmt
418
                                        return c, i + 2
419
                                case '*':
420
                                        c.state = stateCSSBlockCmt
421
                                        return c, i + 2
422
                                }
423
                        }
424
                case '"':
425
                        c.state = stateCSSDqStr
426
                        return c, i + 1
427
                case '\'':
428
                        c.state = stateCSSSqStr
429
                        return c, i + 1
430
                }
431
                k = i + 1
432
        }
433
        panic("unreachable")
434
}
435
 
436
// tCSSStr is the context transition function for the CSS string and URL states.
437
func tCSSStr(c context, s []byte) (context, int) {
438
        var endAndEsc string
439
        switch c.state {
440
        case stateCSSDqStr, stateCSSDqURL:
441
                endAndEsc = `\"`
442
        case stateCSSSqStr, stateCSSSqURL:
443
                endAndEsc = `\'`
444
        case stateCSSURL:
445
                // Unquoted URLs end with a newline or close parenthesis.
446
                // The below includes the wc (whitespace character) and nl.
447
                endAndEsc = "\\\t\n\f\r )"
448
        default:
449
                panic(c.state.String())
450
        }
451
 
452
        k := 0
453
        for {
454
                i := k + bytes.IndexAny(s[k:], endAndEsc)
455
                if i < k {
456
                        c, nread := tURL(c, decodeCSS(s[k:]))
457
                        return c, k + nread
458
                }
459
                if s[i] == '\\' {
460
                        i++
461
                        if i == len(s) {
462
                                return context{
463
                                        state: stateError,
464
                                        err:   errorf(ErrPartialEscape, 0, "unfinished escape sequence in CSS string: %q", s),
465
                                }, len(s)
466
                        }
467
                } else {
468
                        c.state = stateCSS
469
                        return c, i + 1
470
                }
471
                c, _ = tURL(c, decodeCSS(s[:i+1]))
472
                k = i + 1
473
        }
474
        panic("unreachable")
475
}
476
 
477
// tError is the context transition function for the error state.
478
func tError(c context, s []byte) (context, int) {
479
        return c, len(s)
480
}
481
 
482
// eatAttrName returns the largest j such that s[i:j] is an attribute name.
483
// It returns an error if s[i:] does not look like it begins with an
484
// attribute name, such as encountering a quote mark without a preceding
485
// equals sign.
486
func eatAttrName(s []byte, i int) (int, *Error) {
487
        for j := i; j < len(s); j++ {
488
                switch s[j] {
489
                case ' ', '\t', '\n', '\f', '\r', '=', '>':
490
                        return j, nil
491
                case '\'', '"', '<':
492
                        // These result in a parse warning in HTML5 and are
493
                        // indicative of serious problems if seen in an attr
494
                        // name in a template.
495
                        return -1, errorf(ErrBadHTML, 0, "%q in attribute name: %.32q", s[j:j+1], s)
496
                default:
497
                        // No-op.
498
                }
499
        }
500
        return len(s), nil
501
}
502
 
503
var elementNameMap = map[string]element{
504
        "script":   elementScript,
505
        "style":    elementStyle,
506
        "textarea": elementTextarea,
507
        "title":    elementTitle,
508
}
509
 
510
// asciiAlpha returns whether c is an ASCII letter.
511
func asciiAlpha(c byte) bool {
512
        return 'A' <= c && c <= 'Z' || 'a' <= c && c <= 'z'
513
}
514
 
515
// asciiAlphaNum returns whether c is an ASCII letter or digit.
516
func asciiAlphaNum(c byte) bool {
517
        return asciiAlpha(c) || '0' <= c && c <= '9'
518
}
519
 
520
// eatTagName returns the largest j such that s[i:j] is a tag name and the tag type.
521
func eatTagName(s []byte, i int) (int, element) {
522
        if i == len(s) || !asciiAlpha(s[i]) {
523
                return i, elementNone
524
        }
525
        j := i + 1
526
        for j < len(s) {
527
                x := s[j]
528
                if asciiAlphaNum(x) {
529
                        j++
530
                        continue
531
                }
532
                // Allow "x-y" or "x:y" but not "x-", "-y", or "x--y".
533
                if (x == ':' || x == '-') && j+1 < len(s) && asciiAlphaNum(s[j+1]) {
534
                        j += 2
535
                        continue
536
                }
537
                break
538
        }
539
        return j, elementNameMap[strings.ToLower(string(s[i:j]))]
540
}
541
 
542
// eatWhiteSpace returns the largest j such that s[i:j] is white space.
543
func eatWhiteSpace(s []byte, i int) int {
544
        for j := i; j < len(s); j++ {
545
                switch s[j] {
546
                case ' ', '\t', '\n', '\f', '\r':
547
                        // No-op.
548
                default:
549
                        return j
550
                }
551
        }
552
        return len(s)
553
}

powered by: WebSVN 2.1.0

© copyright 1999-2024 OpenCores.org, equivalent to Oliscience, all rights reserved. OpenCores®, registered trademark.