URL https://opencores.org/ocsvn/openrisc/openrisc/trunk
Subversion Repositories openrisc

[/] [openrisc/] [trunk/] [gnu-dev/] [or1k-gcc/] [libgo/] [go/] [html/] [template/] [transition.go] - Rev 761

Go to most recent revision | Compare with Previous | Blame | View Log
// Copyright 2011 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.

package template

import (
        "bytes"
        "strings"
)

// transitionFunc is the array of context transition functions for text nodes.
// A transition function takes a context and template text input, and returns
// the updated context and the number of bytes consumed from the front of the
// input.
var transitionFunc = [...]func(context, []byte) (context, int){
        stateText:        tText,
        stateTag:         tTag,
        stateAttrName:    tAttrName,
        stateAfterName:   tAfterName,
        stateBeforeValue: tBeforeValue,
        stateHTMLCmt:     tHTMLCmt,
        stateRCDATA:      tSpecialTagEnd,
        stateAttr:        tAttr,
        stateURL:         tURL,
        stateJS:          tJS,
        stateJSDqStr:     tJSDelimited,
        stateJSSqStr:     tJSDelimited,
        stateJSRegexp:    tJSDelimited,
        stateJSBlockCmt:  tBlockCmt,
        stateJSLineCmt:   tLineCmt,
        stateCSS:         tCSS,
        stateCSSDqStr:    tCSSStr,
        stateCSSSqStr:    tCSSStr,
        stateCSSDqURL:    tCSSStr,
        stateCSSSqURL:    tCSSStr,
        stateCSSURL:      tCSSStr,
        stateCSSBlockCmt: tBlockCmt,
        stateCSSLineCmt:  tLineCmt,
        stateError:       tError,
}

var commentStart = []byte("<!--")
var commentEnd = []byte("-->")

// tText is the context transition function for the text state.
func tText(c context, s []byte) (context, int) {
        k := 0
        for {
                i := k + bytes.IndexByte(s[k:], '<')
                if i < k || i+1 == len(s) {
                        return c, len(s)
                } else if i+4 <= len(s) && bytes.Equal(commentStart, s[i:i+4]) {
                        return context{state: stateHTMLCmt}, i + 4
                }
                i++
                end := false
                if s[i] == '/' {
                        if i+1 == len(s) {
                                return c, len(s)
                        }
                        end, i = true, i+1
                }
                j, e := eatTagName(s, i)
                if j != i {
                        if end {
                                e = elementNone
                        }
                        // We've found an HTML tag.
                        return context{state: stateTag, element: e}, j
                }
                k = j
        }
        panic("unreachable")
}

var elementContentType = [...]state{
        elementNone:     stateText,
        elementScript:   stateJS,
        elementStyle:    stateCSS,
        elementTextarea: stateRCDATA,
        elementTitle:    stateRCDATA,
}

// tTag is the context transition function for the tag state.
func tTag(c context, s []byte) (context, int) {
        // Find the attribute name.
        i := eatWhiteSpace(s, 0)
        if i == len(s) {
                return c, len(s)
        }
        if s[i] == '>' {
                return context{
                        state:   elementContentType[c.element],
                        element: c.element,
                }, i + 1
        }
        j, err := eatAttrName(s, i)
        if err != nil {
                return context{state: stateError, err: err}, len(s)
        }
        state, attr := stateTag, attrNone
        if i == j {
                return context{
                        state: stateError,
                        err:   errorf(ErrBadHTML, 0, "expected space, attr name, or end of tag, but got %q", s[i:]),
                }, len(s)
        }
        switch attrType(string(s[i:j])) {
        case contentTypeURL:
                attr = attrURL
        case contentTypeCSS:
                attr = attrStyle
        case contentTypeJS:
                attr = attrScript
        }
        if j == len(s) {
                state = stateAttrName
        } else {
                state = stateAfterName
        }
        return context{state: state, element: c.element, attr: attr}, j
}

// tAttrName is the context transition function for stateAttrName.
func tAttrName(c context, s []byte) (context, int) {
        i, err := eatAttrName(s, 0)
        if err != nil {
                return context{state: stateError, err: err}, len(s)
        } else if i != len(s) {
                c.state = stateAfterName
        }
        return c, i
}

// tAfterName is the context transition function for stateAfterName.
func tAfterName(c context, s []byte) (context, int) {
        // Look for the start of the value.
        i := eatWhiteSpace(s, 0)
        if i == len(s) {
                return c, len(s)
        } else if s[i] != '=' {
                // Occurs due to tag ending '>', and valueless attribute.
                c.state = stateTag
                return c, i
        }
        c.state = stateBeforeValue
        // Consume the "=".
        return c, i + 1
}

var attrStartStates = [...]state{
        attrNone:   stateAttr,
        attrScript: stateJS,
        attrStyle:  stateCSS,
        attrURL:    stateURL,
}

// tBeforeValue is the context transition function for stateBeforeValue.
func tBeforeValue(c context, s []byte) (context, int) {
        i := eatWhiteSpace(s, 0)
        if i == len(s) {
                return c, len(s)
        }
        // Find the attribute delimiter.
        delim := delimSpaceOrTagEnd
        switch s[i] {
        case '\'':
                delim, i = delimSingleQuote, i+1
        case '"':
                delim, i = delimDoubleQuote, i+1
        }
        c.state, c.delim, c.attr = attrStartStates[c.attr], delim, attrNone
        return c, i
}

// tHTMLCmt is the context transition function for stateHTMLCmt.
func tHTMLCmt(c context, s []byte) (context, int) {
        if i := bytes.Index(s, commentEnd); i != -1 {
                return context{}, i + 3
        }
        return c, len(s)
}

// specialTagEndMarkers maps element types to the character sequence that
// case-insensitively signals the end of the special tag body.
var specialTagEndMarkers = [...]string{
        elementScript:   "</script",
        elementStyle:    "</style",
        elementTextarea: "</textarea",
        elementTitle:    "</title",
}

// tSpecialTagEnd is the context transition function for raw text and RCDATA
// element states.
func tSpecialTagEnd(c context, s []byte) (context, int) {
        if c.element != elementNone {
                if i := strings.Index(strings.ToLower(string(s)), specialTagEndMarkers[c.element]); i != -1 {
                        return context{}, i
                }
        }
        return c, len(s)
}

// tAttr is the context transition function for the attribute state.
func tAttr(c context, s []byte) (context, int) {
        return c, len(s)
}

// tURL is the context transition function for the URL state.
func tURL(c context, s []byte) (context, int) {
        if bytes.IndexAny(s, "#?") >= 0 {
                c.urlPart = urlPartQueryOrFrag
        } else if len(s) != eatWhiteSpace(s, 0) && c.urlPart == urlPartNone {
                // HTML5 uses "Valid URL potentially surrounded by spaces" for
                // attrs: http://www.w3.org/TR/html5/index.html#attributes-1
                c.urlPart = urlPartPreQuery
        }
        return c, len(s)
}

// tJS is the context transition function for the JS state.
func tJS(c context, s []byte) (context, int) {
        i := bytes.IndexAny(s, `"'/`)
        if i == -1 {
                // Entire input is non string, comment, regexp tokens.
                c.jsCtx = nextJSCtx(s, c.jsCtx)
                return c, len(s)
        }
        c.jsCtx = nextJSCtx(s[:i], c.jsCtx)
        switch s[i] {
        case '"':
                c.state, c.jsCtx = stateJSDqStr, jsCtxRegexp
        case '\'':
                c.state, c.jsCtx = stateJSSqStr, jsCtxRegexp
        case '/':
                switch {
                case i+1 < len(s) && s[i+1] == '/':
                        c.state, i = stateJSLineCmt, i+1
                case i+1 < len(s) && s[i+1] == '*':
                        c.state, i = stateJSBlockCmt, i+1
                case c.jsCtx == jsCtxRegexp:
                        c.state = stateJSRegexp
                case c.jsCtx == jsCtxDivOp:
                        c.jsCtx = jsCtxRegexp
                default:
                        return context{
                                state: stateError,
                                err:   errorf(ErrSlashAmbig, 0, "'/' could start a division or regexp: %.32q", s[i:]),
                        }, len(s)
                }
        default:
                panic("unreachable")
        }
        return c, i + 1
}

// tJSDelimited is the context transition function for the JS string and regexp
// states.
func tJSDelimited(c context, s []byte) (context, int) {
        specials := `\"`
        switch c.state {
        case stateJSSqStr:
                specials = `\'`
        case stateJSRegexp:
                specials = `\/[]`
        }

        k, inCharset := 0, false
        for {
                i := k + bytes.IndexAny(s[k:], specials)
                if i < k {
                        break
                }
                switch s[i] {
                case '\\':
                        i++
                        if i == len(s) {
                                return context{
                                        state: stateError,
                                        err:   errorf(ErrPartialEscape, 0, "unfinished escape sequence in JS string: %q", s),
                                }, len(s)
                        }
                case '[':
                        inCharset = true
                case ']':
                        inCharset = false
                default:
                        // end delimiter
                        if !inCharset {
                                c.state, c.jsCtx = stateJS, jsCtxDivOp
                                return c, i + 1
                        }
                }
                k = i + 1
        }

        if inCharset {
                // This can be fixed by making context richer if interpolation
                // into charsets is desired.
                return context{
                        state: stateError,
                        err:   errorf(ErrPartialCharset, 0, "unfinished JS regexp charset: %q", s),
                }, len(s)
        }

        return c, len(s)
}

var blockCommentEnd = []byte("*/")

// tBlockCmt is the context transition function for /*comment*/ states.
func tBlockCmt(c context, s []byte) (context, int) {
        i := bytes.Index(s, blockCommentEnd)
        if i == -1 {
                return c, len(s)
        }
        switch c.state {
        case stateJSBlockCmt:
                c.state = stateJS
        case stateCSSBlockCmt:
                c.state = stateCSS
        default:
                panic(c.state.String())
        }
        return c, i + 2
}

// tLineCmt is the context transition function for //comment states.
func tLineCmt(c context, s []byte) (context, int) {
        var lineTerminators string
        var endState state
        switch c.state {
        case stateJSLineCmt:
                lineTerminators, endState = "\n\r\u2028\u2029", stateJS
        case stateCSSLineCmt:
                lineTerminators, endState = "\n\f\r", stateCSS
                // Line comments are not part of any published CSS standard but
                // are supported by the 4 major browsers.
                // This defines line comments as
                //     LINECOMMENT ::= "//" [^\n\f\d]*
                // since http://www.w3.org/TR/css3-syntax/#SUBTOK-nl defines
                // newlines:
                //     nl ::= #xA | #xD #xA | #xD | #xC
        default:
                panic(c.state.String())
        }

        i := bytes.IndexAny(s, lineTerminators)
        if i == -1 {
                return c, len(s)
        }
        c.state = endState
        // Per section 7.4 of EcmaScript 5 : http://es5.github.com/#x7.4
        // "However, the LineTerminator at the end of the line is not
        // considered to be part of the single-line comment; it is
        // recognized separately by the lexical grammar and becomes part
        // of the stream of input elements for the syntactic grammar."
        return c, i
}

// tCSS is the context transition function for the CSS state.
func tCSS(c context, s []byte) (context, int) {
        // CSS quoted strings are almost never used except for:
        // (1) URLs as in background: "/foo.png"
        // (2) Multiword font-names as in font-family: "Times New Roman"
        // (3) List separators in content values as in inline-lists:
        //    <style>
        //    ul.inlineList { list-style: none; padding:0 }
        //    ul.inlineList > li { display: inline }
        //    ul.inlineList > li:before { content: ", " }
        //    ul.inlineList > li:first-child:before { content: "" }
        //    </style>
        //    <ul class=inlineList><li>One<li>Two<li>Three</ul>
        // (4) Attribute value selectors as in a[href="http://example.com/"]
        //
        // We conservatively treat all strings as URLs, but make some
        // allowances to avoid confusion.
        //
        // In (1), our conservative assumption is justified.
        // In (2), valid font names do not contain ':', '?', or '#', so our
        // conservative assumption is fine since we will never transition past
        // urlPartPreQuery.
        // In (3), our protocol heuristic should not be tripped, and there
        // should not be non-space content after a '?' or '#', so as long as
        // we only %-encode RFC 3986 reserved characters we are ok.
        // In (4), we should URL escape for URL attributes, and for others we
        // have the attribute name available if our conservative assumption
        // proves problematic for real code.

        k := 0
        for {
                i := k + bytes.IndexAny(s[k:], `("'/`)
                if i < k {
                        return c, len(s)
                }
                switch s[i] {
                case '(':
                        // Look for url to the left.
                        p := bytes.TrimRight(s[:i], "\t\n\f\r ")
                        if endsWithCSSKeyword(p, "url") {
                                j := len(s) - len(bytes.TrimLeft(s[i+1:], "\t\n\f\r "))
                                switch {
                                case j != len(s) && s[j] == '"':
                                        c.state, j = stateCSSDqURL, j+1
                                case j != len(s) && s[j] == '\'':
                                        c.state, j = stateCSSSqURL, j+1
                                default:
                                        c.state = stateCSSURL
                                }
                                return c, j
                        }
                case '/':
                        if i+1 < len(s) {
                                switch s[i+1] {
                                case '/':
                                        c.state = stateCSSLineCmt
                                        return c, i + 2
                                case '*':
                                        c.state = stateCSSBlockCmt
                                        return c, i + 2
                                }
                        }
                case '"':
                        c.state = stateCSSDqStr
                        return c, i + 1
                case '\'':
                        c.state = stateCSSSqStr
                        return c, i + 1
                }
                k = i + 1
        }
        panic("unreachable")
}

// tCSSStr is the context transition function for the CSS string and URL states.
func tCSSStr(c context, s []byte) (context, int) {
        var endAndEsc string
        switch c.state {
        case stateCSSDqStr, stateCSSDqURL:
                endAndEsc = `\"`
        case stateCSSSqStr, stateCSSSqURL:
                endAndEsc = `\'`
        case stateCSSURL:
                // Unquoted URLs end with a newline or close parenthesis.
                // The below includes the wc (whitespace character) and nl.
                endAndEsc = "\\\t\n\f\r )"
        default:
                panic(c.state.String())
        }

        k := 0
        for {
                i := k + bytes.IndexAny(s[k:], endAndEsc)
                if i < k {
                        c, nread := tURL(c, decodeCSS(s[k:]))
                        return c, k + nread
                }
                if s[i] == '\\' {
                        i++
                        if i == len(s) {
                                return context{
                                        state: stateError,
                                        err:   errorf(ErrPartialEscape, 0, "unfinished escape sequence in CSS string: %q", s),
                                }, len(s)
                        }
                } else {
                        c.state = stateCSS
                        return c, i + 1
                }
                c, _ = tURL(c, decodeCSS(s[:i+1]))
                k = i + 1
        }
        panic("unreachable")
}

// tError is the context transition function for the error state.
func tError(c context, s []byte) (context, int) {
        return c, len(s)
}

// eatAttrName returns the largest j such that s[i:j] is an attribute name.
// It returns an error if s[i:] does not look like it begins with an
// attribute name, such as encountering a quote mark without a preceding
// equals sign.
func eatAttrName(s []byte, i int) (int, *Error) {
        for j := i; j < len(s); j++ {
                switch s[j] {
                case ' ', '\t', '\n', '\f', '\r', '=', '>':
                        return j, nil
                case '\'', '"', '<':
                        // These result in a parse warning in HTML5 and are
                        // indicative of serious problems if seen in an attr
                        // name in a template.
                        return -1, errorf(ErrBadHTML, 0, "%q in attribute name: %.32q", s[j:j+1], s)
                default:
                        // No-op.
                }
        }
        return len(s), nil
}

var elementNameMap = map[string]element{
        "script":   elementScript,
        "style":    elementStyle,
        "textarea": elementTextarea,
        "title":    elementTitle,
}

// asciiAlpha returns whether c is an ASCII letter.
func asciiAlpha(c byte) bool {
        return 'A' <= c && c <= 'Z' || 'a' <= c && c <= 'z'
}

// asciiAlphaNum returns whether c is an ASCII letter or digit.
func asciiAlphaNum(c byte) bool {
        return asciiAlpha(c) || '0' <= c && c <= '9'
}

// eatTagName returns the largest j such that s[i:j] is a tag name and the tag type.
func eatTagName(s []byte, i int) (int, element) {
        if i == len(s) || !asciiAlpha(s[i]) {
                return i, elementNone
        }
        j := i + 1
        for j < len(s) {
                x := s[j]
                if asciiAlphaNum(x) {
                        j++
                        continue
                }
                // Allow "x-y" or "x:y" but not "x-", "-y", or "x--y".
                if (x == ':' || x == '-') && j+1 < len(s) && asciiAlphaNum(s[j+1]) {
                        j += 2
                        continue
                }
                break
        }
        return j, elementNameMap[strings.ToLower(string(s[i:j]))]
}

// eatWhiteSpace returns the largest j such that s[i:j] is white space.
func eatWhiteSpace(s []byte, i int) int {
        for j := i; j < len(s); j++ {
                switch s[j] {
                case ' ', '\t', '\n', '\f', '\r':
                        // No-op.
                default:
                        return j
                }
        }
        return len(s)
}
Go to most recent revision | Compare with Previous | Blame | View Log
Browse

Tools

Subversion Repositories openrisc

[/] [openrisc/] [trunk/] [gnu-dev/] [or1k-gcc/] [libgo/] [go/] [html/] [template/] [transition.go] - Rev 761