URL
https://opencores.org/ocsvn/openrisc/openrisc/trunk
Subversion Repositories openrisc
[/] [openrisc/] [trunk/] [gnu-dev/] [or1k-gcc/] [libgo/] [go/] [exp/] [html/] [escape.go] - Rev 747
Compare with Previous | Blame | View Log
// Copyright 2010 The Go Authors. All rights reserved.// Use of this source code is governed by a BSD-style// license that can be found in the LICENSE file.package htmlimport ("bytes""strings""unicode/utf8")// These replacements permit compatibility with old numeric entities that// assumed Windows-1252 encoding.// http://www.whatwg.org/specs/web-apps/current-work/multipage/tokenization.html#consume-a-character-referencevar replacementTable = [...]rune{'\u20AC', // First entry is what 0x80 should be replaced with.'\u0081','\u201A','\u0192','\u201E','\u2026','\u2020','\u2021','\u02C6','\u2030','\u0160','\u2039','\u0152','\u008D','\u017D','\u008F','\u0090','\u2018','\u2019','\u201C','\u201D','\u2022','\u2013','\u2014','\u02DC','\u2122','\u0161','\u203A','\u0153','\u009D','\u017E','\u0178', // Last entry is 0x9F.// 0x00->'\uFFFD' is handled programmatically.// 0x0D->'\u000D' is a no-op.}// unescapeEntity reads an entity like "<" from b[src:] and writes the// corresponding "<" to b[dst:], returning the incremented dst and src cursors.// Precondition: b[src] == '&' && dst <= src.// attribute should be true if parsing an attribute value.func unescapeEntity(b []byte, dst, src int, attribute bool) (dst1, src1 int) {// http://www.whatwg.org/specs/web-apps/current-work/multipage/tokenization.html#consume-a-character-reference// i starts at 1 because we already know that s[0] == '&'.i, s := 1, b[src:]if len(s) <= 1 {b[dst] = b[src]return dst + 1, src + 1}if s[i] == '#' {if len(s) <= 3 { // We need to have at least "&#.".b[dst] = b[src]return dst + 1, src + 1}i++c := s[i]hex := falseif c == 'x' || c == 'X' {hex = truei++}x := '\x00'for i < len(s) {c = s[i]i++if hex {if '0' <= c && c <= '9' {x = 16*x + rune(c) - '0'continue} else if 'a' <= c && c <= 'f' {x = 16*x + rune(c) - 'a' + 10continue} else if 'A' <= c && c <= 'F' {x = 16*x + rune(c) - 'A' + 10continue}} else if '0' <= c && c <= '9' {x = 10*x + rune(c) - '0'continue}if c != ';' {i--}break}if i <= 3 { // No characters matched.b[dst] = b[src]return dst + 1, src + 1}if 0x80 <= x && x <= 0x9F {// Replace characters from Windows-1252 with UTF-8 equivalents.x = replacementTable[x-0x80]} else if x == 0 || (0xD800 <= x && x <= 0xDFFF) || x > 0x10FFFF {// Replace invalid characters with the replacement character.x = '\uFFFD'}return dst + utf8.EncodeRune(b[dst:], x), src + i}// Consume the maximum number of characters possible, with the// consumed characters matching one of the named references.for i < len(s) {c := s[i]i++// Lower-cased characters are more common in entities, so we check for them first.if 'a' <= c && c <= 'z' || 'A' <= c && c <= 'Z' || '0' <= c && c <= '9' {continue}if c != ';' {i--}break}entityName := string(s[1:i])if entityName == "" {// No-op.} else if attribute && entityName[len(entityName)-1] != ';' && len(s) > i && s[i] == '=' {// No-op.} else if x := entity[entityName]; x != 0 {return dst + utf8.EncodeRune(b[dst:], x), src + i} else if x := entity2[entityName]; x[0] != 0 {dst1 := dst + utf8.EncodeRune(b[dst:], x[0])return dst1 + utf8.EncodeRune(b[dst1:], x[1]), src + i} else if !attribute {maxLen := len(entityName) - 1if maxLen > longestEntityWithoutSemicolon {maxLen = longestEntityWithoutSemicolon}for j := maxLen; j > 1; j-- {if x := entity[entityName[:j]]; x != 0 {return dst + utf8.EncodeRune(b[dst:], x), src + j + 1}}}dst1, src1 = dst+i, src+icopy(b[dst:dst1], b[src:src1])return dst1, src1}// unescape unescapes b's entities in-place, so that "a<b" becomes "a<b".func unescape(b []byte) []byte {for i, c := range b {if c == '&' {dst, src := unescapeEntity(b, i, i, false)for src < len(b) {c := b[src]if c == '&' {dst, src = unescapeEntity(b, dst, src, false)} else {b[dst] = cdst, src = dst+1, src+1}}return b[0:dst]}}return b}// lower lower-cases the A-Z bytes in b in-place, so that "aBc" becomes "abc".func lower(b []byte) []byte {for i, c := range b {if 'A' <= c && c <= 'Z' {b[i] = c + 'a' - 'A'}}return b}const escapedChars = `&'<>"`func escape(w writer, s string) error {i := strings.IndexAny(s, escapedChars)for i != -1 {if _, err := w.WriteString(s[:i]); err != nil {return err}var esc stringswitch s[i] {case '&':esc = "&"case '\'':esc = "'"case '<':esc = "<"case '>':esc = ">"case '"':esc = """default:panic("unrecognized escape character")}s = s[i+1:]if _, err := w.WriteString(esc); err != nil {return err}i = strings.IndexAny(s, escapedChars)}_, err := w.WriteString(s)return err}// EscapeString escapes special characters like "<" to become "<". It// escapes only five such characters: amp, apos, lt, gt and quot.// UnescapeString(EscapeString(s)) == s always holds, but the converse isn't// always true.func EscapeString(s string) string {if strings.IndexAny(s, escapedChars) == -1 {return s}var buf bytes.Bufferescape(&buf, s)return buf.String()}// UnescapeString unescapes entities like "<" to become "<". It unescapes a// larger range of entities than EscapeString escapes. For example, "á"// unescapes to "á", as does "á" and "&xE1;".// UnescapeString(EscapeString(s)) == s always holds, but the converse isn't// always true.func UnescapeString(s string) string {for _, c := range s {if c == '&' {return string(unescape([]byte(s)))}}return s}
