OpenCores
URL https://opencores.org/ocsvn/openrisc/openrisc/trunk

Subversion Repositories openrisc

[/] [openrisc/] [trunk/] [gnu-dev/] [or1k-gcc/] [libgo/] [go/] [html/] [template/] [escape.go] - Blame information for rev 801

Go to most recent revision | Details | Compare with Previous | View Log

Line No. Rev Author Line
1 747 jeremybenn
// Copyright 2011 The Go Authors. All rights reserved.
2
// Use of this source code is governed by a BSD-style
3
// license that can be found in the LICENSE file.
4
 
5
package template
6
 
7
import (
8
        "bytes"
9
        "fmt"
10
        "html"
11
        "text/template"
12
        "text/template/parse"
13
)
14
 
15
// escapeTemplates rewrites the named templates, which must be
16
// associated with t, to guarantee that the output of any of the named
17
// templates is properly escaped.  Names should include the names of
18
// all templates that might be Executed but need not include helper
19
// templates.  If no error is returned, then the named templates have
20
// been modified.  Otherwise the named templates have been rendered
21
// unusable.
22
func escapeTemplates(tmpl *Template, names ...string) error {
23
        e := newEscaper(tmpl)
24
        for _, name := range names {
25
                c, _ := e.escapeTree(context{}, name, 0)
26
                var err error
27
                if c.err != nil {
28
                        err, c.err.Name = c.err, name
29
                } else if c.state != stateText {
30
                        err = &Error{ErrEndContext, name, 0, fmt.Sprintf("ends in a non-text context: %v", c)}
31
                }
32
                if err != nil {
33
                        // Prevent execution of unsafe templates.
34
                        for _, name := range names {
35
                                if t := tmpl.set[name]; t != nil {
36
                                        t.text.Tree = nil
37
                                }
38
                        }
39
                        return err
40
                }
41
                tmpl.escaped = true
42
        }
43
        e.commit()
44
        return nil
45
}
46
 
47
// funcMap maps command names to functions that render their inputs safe.
48
var funcMap = template.FuncMap{
49
        "exp_template_html_attrescaper":     attrEscaper,
50
        "exp_template_html_commentescaper":  commentEscaper,
51
        "exp_template_html_cssescaper":      cssEscaper,
52
        "exp_template_html_cssvaluefilter":  cssValueFilter,
53
        "exp_template_html_htmlnamefilter":  htmlNameFilter,
54
        "exp_template_html_htmlescaper":     htmlEscaper,
55
        "exp_template_html_jsregexpescaper": jsRegexpEscaper,
56
        "exp_template_html_jsstrescaper":    jsStrEscaper,
57
        "exp_template_html_jsvalescaper":    jsValEscaper,
58
        "exp_template_html_nospaceescaper":  htmlNospaceEscaper,
59
        "exp_template_html_rcdataescaper":   rcdataEscaper,
60
        "exp_template_html_urlescaper":      urlEscaper,
61
        "exp_template_html_urlfilter":       urlFilter,
62
        "exp_template_html_urlnormalizer":   urlNormalizer,
63
}
64
 
65
// equivEscapers matches contextual escapers to equivalent template builtins.
66
var equivEscapers = map[string]string{
67
        "exp_template_html_attrescaper":    "html",
68
        "exp_template_html_htmlescaper":    "html",
69
        "exp_template_html_nospaceescaper": "html",
70
        "exp_template_html_rcdataescaper":  "html",
71
        "exp_template_html_urlescaper":     "urlquery",
72
        "exp_template_html_urlnormalizer":  "urlquery",
73
}
74
 
75
// escaper collects type inferences about templates and changes needed to make
76
// templates injection safe.
77
type escaper struct {
78
        tmpl *Template
79
        // output[templateName] is the output context for a templateName that
80
        // has been mangled to include its input context.
81
        output map[string]context
82
        // derived[c.mangle(name)] maps to a template derived from the template
83
        // named name templateName for the start context c.
84
        derived map[string]*template.Template
85
        // called[templateName] is a set of called mangled template names.
86
        called map[string]bool
87
        // xxxNodeEdits are the accumulated edits to apply during commit.
88
        // Such edits are not applied immediately in case a template set
89
        // executes a given template in different escaping contexts.
90
        actionNodeEdits   map[*parse.ActionNode][]string
91
        templateNodeEdits map[*parse.TemplateNode]string
92
        textNodeEdits     map[*parse.TextNode][]byte
93
}
94
 
95
// newEscaper creates a blank escaper for the given set.
96
func newEscaper(t *Template) *escaper {
97
        return &escaper{
98
                t,
99
                map[string]context{},
100
                map[string]*template.Template{},
101
                map[string]bool{},
102
                map[*parse.ActionNode][]string{},
103
                map[*parse.TemplateNode]string{},
104
                map[*parse.TextNode][]byte{},
105
        }
106
}
107
 
108
// filterFailsafe is an innocuous word that is emitted in place of unsafe values
109
// by sanitizer functions. It is not a keyword in any programming language,
110
// contains no special characters, is not empty, and when it appears in output
111
// it is distinct enough that a developer can find the source of the problem
112
// via a search engine.
113
const filterFailsafe = "ZgotmplZ"
114
 
115
// escape escapes a template node.
116
func (e *escaper) escape(c context, n parse.Node) context {
117
        switch n := n.(type) {
118
        case *parse.ActionNode:
119
                return e.escapeAction(c, n)
120
        case *parse.IfNode:
121
                return e.escapeBranch(c, &n.BranchNode, "if")
122
        case *parse.ListNode:
123
                return e.escapeList(c, n)
124
        case *parse.RangeNode:
125
                return e.escapeBranch(c, &n.BranchNode, "range")
126
        case *parse.TemplateNode:
127
                return e.escapeTemplate(c, n)
128
        case *parse.TextNode:
129
                return e.escapeText(c, n)
130
        case *parse.WithNode:
131
                return e.escapeBranch(c, &n.BranchNode, "with")
132
        }
133
        panic("escaping " + n.String() + " is unimplemented")
134
}
135
 
136
// escapeAction escapes an action template node.
137
func (e *escaper) escapeAction(c context, n *parse.ActionNode) context {
138
        if len(n.Pipe.Decl) != 0 {
139
                // A local variable assignment, not an interpolation.
140
                return c
141
        }
142
        c = nudge(c)
143
        s := make([]string, 0, 3)
144
        switch c.state {
145
        case stateError:
146
                return c
147
        case stateURL, stateCSSDqStr, stateCSSSqStr, stateCSSDqURL, stateCSSSqURL, stateCSSURL:
148
                switch c.urlPart {
149
                case urlPartNone:
150
                        s = append(s, "exp_template_html_urlfilter")
151
                        fallthrough
152
                case urlPartPreQuery:
153
                        switch c.state {
154
                        case stateCSSDqStr, stateCSSSqStr:
155
                                s = append(s, "exp_template_html_cssescaper")
156
                        default:
157
                                s = append(s, "exp_template_html_urlnormalizer")
158
                        }
159
                case urlPartQueryOrFrag:
160
                        s = append(s, "exp_template_html_urlescaper")
161
                case urlPartUnknown:
162
                        return context{
163
                                state: stateError,
164
                                err:   errorf(ErrAmbigContext, n.Line, "%s appears in an ambiguous URL context", n),
165
                        }
166
                default:
167
                        panic(c.urlPart.String())
168
                }
169
        case stateJS:
170
                s = append(s, "exp_template_html_jsvalescaper")
171
                // A slash after a value starts a div operator.
172
                c.jsCtx = jsCtxDivOp
173
        case stateJSDqStr, stateJSSqStr:
174
                s = append(s, "exp_template_html_jsstrescaper")
175
        case stateJSRegexp:
176
                s = append(s, "exp_template_html_jsregexpescaper")
177
        case stateCSS:
178
                s = append(s, "exp_template_html_cssvaluefilter")
179
        case stateText:
180
                s = append(s, "exp_template_html_htmlescaper")
181
        case stateRCDATA:
182
                s = append(s, "exp_template_html_rcdataescaper")
183
        case stateAttr:
184
                // Handled below in delim check.
185
        case stateAttrName, stateTag:
186
                c.state = stateAttrName
187
                s = append(s, "exp_template_html_htmlnamefilter")
188
        default:
189
                if isComment(c.state) {
190
                        s = append(s, "exp_template_html_commentescaper")
191
                } else {
192
                        panic("unexpected state " + c.state.String())
193
                }
194
        }
195
        switch c.delim {
196
        case delimNone:
197
                // No extra-escaping needed for raw text content.
198
        case delimSpaceOrTagEnd:
199
                s = append(s, "exp_template_html_nospaceescaper")
200
        default:
201
                s = append(s, "exp_template_html_attrescaper")
202
        }
203
        e.editActionNode(n, s)
204
        return c
205
}
206
 
207
// ensurePipelineContains ensures that the pipeline has commands with
208
// the identifiers in s in order.
209
// If the pipeline already has some of the sanitizers, do not interfere.
210
// For example, if p is (.X | html) and s is ["escapeJSVal", "html"] then it
211
// has one matching, "html", and one to insert, "escapeJSVal", to produce
212
// (.X | escapeJSVal | html).
213
func ensurePipelineContains(p *parse.PipeNode, s []string) {
214
        if len(s) == 0 {
215
                return
216
        }
217
        n := len(p.Cmds)
218
        // Find the identifiers at the end of the command chain.
219
        idents := p.Cmds
220
        for i := n - 1; i >= 0; i-- {
221
                if cmd := p.Cmds[i]; len(cmd.Args) != 0 {
222
                        if id, ok := cmd.Args[0].(*parse.IdentifierNode); ok {
223
                                if id.Ident == "noescape" {
224
                                        return
225
                                }
226
                                continue
227
                        }
228
                }
229
                idents = p.Cmds[i+1:]
230
        }
231
        dups := 0
232
        for _, id := range idents {
233
                if escFnsEq(s[dups], (id.Args[0].(*parse.IdentifierNode)).Ident) {
234
                        dups++
235
                        if dups == len(s) {
236
                                return
237
                        }
238
                }
239
        }
240
        newCmds := make([]*parse.CommandNode, n-len(idents), n+len(s)-dups)
241
        copy(newCmds, p.Cmds)
242
        // Merge existing identifier commands with the sanitizers needed.
243
        for _, id := range idents {
244
                i := indexOfStr((id.Args[0].(*parse.IdentifierNode)).Ident, s, escFnsEq)
245
                if i != -1 {
246
                        for _, name := range s[:i] {
247
                                newCmds = appendCmd(newCmds, newIdentCmd(name))
248
                        }
249
                        s = s[i+1:]
250
                }
251
                newCmds = appendCmd(newCmds, id)
252
        }
253
        // Create any remaining sanitizers.
254
        for _, name := range s {
255
                newCmds = appendCmd(newCmds, newIdentCmd(name))
256
        }
257
        p.Cmds = newCmds
258
}
259
 
260
// redundantFuncs[a][b] implies that funcMap[b](funcMap[a](x)) == funcMap[a](x)
261
// for all x.
262
var redundantFuncs = map[string]map[string]bool{
263
        "exp_template_html_commentescaper": {
264
                "exp_template_html_attrescaper":    true,
265
                "exp_template_html_nospaceescaper": true,
266
                "exp_template_html_htmlescaper":    true,
267
        },
268
        "exp_template_html_cssescaper": {
269
                "exp_template_html_attrescaper": true,
270
        },
271
        "exp_template_html_jsregexpescaper": {
272
                "exp_template_html_attrescaper": true,
273
        },
274
        "exp_template_html_jsstrescaper": {
275
                "exp_template_html_attrescaper": true,
276
        },
277
        "exp_template_html_urlescaper": {
278
                "exp_template_html_urlnormalizer": true,
279
        },
280
}
281
 
282
// appendCmd appends the given command to the end of the command pipeline
283
// unless it is redundant with the last command.
284
func appendCmd(cmds []*parse.CommandNode, cmd *parse.CommandNode) []*parse.CommandNode {
285
        if n := len(cmds); n != 0 {
286
                last, ok := cmds[n-1].Args[0].(*parse.IdentifierNode)
287
                next, _ := cmd.Args[0].(*parse.IdentifierNode)
288
                if ok && redundantFuncs[last.Ident][next.Ident] {
289
                        return cmds
290
                }
291
        }
292
        return append(cmds, cmd)
293
}
294
 
295
// indexOfStr is the first i such that eq(s, strs[i]) or -1 if s was not found.
296
func indexOfStr(s string, strs []string, eq func(a, b string) bool) int {
297
        for i, t := range strs {
298
                if eq(s, t) {
299
                        return i
300
                }
301
        }
302
        return -1
303
}
304
 
305
// escFnsEq returns whether the two escaping functions are equivalent.
306
func escFnsEq(a, b string) bool {
307
        if e := equivEscapers[a]; e != "" {
308
                a = e
309
        }
310
        if e := equivEscapers[b]; e != "" {
311
                b = e
312
        }
313
        return a == b
314
}
315
 
316
// newIdentCmd produces a command containing a single identifier node.
317
func newIdentCmd(identifier string) *parse.CommandNode {
318
        return &parse.CommandNode{
319
                NodeType: parse.NodeCommand,
320
                Args:     []parse.Node{parse.NewIdentifier(identifier)},
321
        }
322
}
323
 
324
// nudge returns the context that would result from following empty string
325
// transitions from the input context.
326
// For example, parsing:
327
//     `
328
// will end in context{stateBeforeValue, attrURL}, but parsing one extra rune:
329
//     `
330
// will end in context{stateURL, delimSpaceOrTagEnd, ...}.
331
// There are two transitions that happen when the 'x' is seen:
332
// (1) Transition from a before-value state to a start-of-value state without
333
//     consuming any character.
334
// (2) Consume 'x' and transition past the first value character.
335
// In this case, nudging produces the context after (1) happens.
336
func nudge(c context) context {
337
        switch c.state {
338
        case stateTag:
339
                // In `
340
                c.state = stateAttrName
341
        case stateBeforeValue:
342
                // In `
343
                c.state, c.delim, c.attr = attrStartStates[c.attr], delimSpaceOrTagEnd, attrNone
344
        case stateAfterName:
345
                // In `
346
                c.state, c.attr = stateAttrName, attrNone
347
        }
348
        return c
349
}
350
 
351
// join joins the two contexts of a branch template node. The result is an
352
// error context if either of the input contexts are error contexts, or if the
353
// the input contexts differ.
354
func join(a, b context, line int, nodeName string) context {
355
        if a.state == stateError {
356
                return a
357
        }
358
        if b.state == stateError {
359
                return b
360
        }
361
        if a.eq(b) {
362
                return a
363
        }
364
 
365
        c := a
366
        c.urlPart = b.urlPart
367
        if c.eq(b) {
368
                // The contexts differ only by urlPart.
369
                c.urlPart = urlPartUnknown
370
                return c
371
        }
372
 
373
        c = a
374
        c.jsCtx = b.jsCtx
375
        if c.eq(b) {
376
                // The contexts differ only by jsCtx.
377
                c.jsCtx = jsCtxUnknown
378
                return c
379
        }
380
 
381
        // Allow a nudged context to join with an unnudged one.
382
        // This means that
383
        //   

384
        // ends in an unquoted value state even though the else branch
385
        // ends in stateBeforeValue.
386
        if c, d := nudge(a), nudge(b); !(c.eq(a) && d.eq(b)) {
387
                if e := join(c, d, line, nodeName); e.state != stateError {
388
                        return e
389
                }
390
        }
391
 
392
        return context{
393
                state: stateError,
394
                err:   errorf(ErrBranchEnd, line, "{{%s}} branches end in different contexts: %v, %v", nodeName, a, b),
395
        }
396
}
397
 
398
// escapeBranch escapes a branch template node: "if", "range" and "with".
399
func (e *escaper) escapeBranch(c context, n *parse.BranchNode, nodeName string) context {
400
        c0 := e.escapeList(c, n.List)
401
        if nodeName == "range" && c0.state != stateError {
402
                // The "true" branch of a "range" node can execute multiple times.
403
                // We check that executing n.List once results in the same context
404
                // as executing n.List twice.
405
                c1, _ := e.escapeListConditionally(c0, n.List, nil)
406
                c0 = join(c0, c1, n.Line, nodeName)
407
                if c0.state == stateError {
408
                        // Make clear that this is a problem on loop re-entry
409
                        // since developers tend to overlook that branch when
410
                        // debugging templates.
411
                        c0.err.Line = n.Line
412
                        c0.err.Description = "on range loop re-entry: " + c0.err.Description
413
                        return c0
414
                }
415
        }
416
        c1 := e.escapeList(c, n.ElseList)
417
        return join(c0, c1, n.Line, nodeName)
418
}
419
 
420
// escapeList escapes a list template node.
421
func (e *escaper) escapeList(c context, n *parse.ListNode) context {
422
        if n == nil {
423
                return c
424
        }
425
        for _, m := range n.Nodes {
426
                c = e.escape(c, m)
427
        }
428
        return c
429
}
430
 
431
// escapeListConditionally escapes a list node but only preserves edits and
432
// inferences in e if the inferences and output context satisfy filter.
433
// It returns the best guess at an output context, and the result of the filter
434
// which is the same as whether e was updated.
435
func (e *escaper) escapeListConditionally(c context, n *parse.ListNode, filter func(*escaper, context) bool) (context, bool) {
436
        e1 := newEscaper(e.tmpl)
437
        // Make type inferences available to f.
438
        for k, v := range e.output {
439
                e1.output[k] = v
440
        }
441
        c = e1.escapeList(c, n)
442
        ok := filter != nil && filter(e1, c)
443
        if ok {
444
                // Copy inferences and edits from e1 back into e.
445
                for k, v := range e1.output {
446
                        e.output[k] = v
447
                }
448
                for k, v := range e1.derived {
449
                        e.derived[k] = v
450
                }
451
                for k, v := range e1.called {
452
                        e.called[k] = v
453
                }
454
                for k, v := range e1.actionNodeEdits {
455
                        e.editActionNode(k, v)
456
                }
457
                for k, v := range e1.templateNodeEdits {
458
                        e.editTemplateNode(k, v)
459
                }
460
                for k, v := range e1.textNodeEdits {
461
                        e.editTextNode(k, v)
462
                }
463
        }
464
        return c, ok
465
}
466
 
467
// escapeTemplate escapes a {{template}} call node.
468
func (e *escaper) escapeTemplate(c context, n *parse.TemplateNode) context {
469
        c, name := e.escapeTree(c, n.Name, n.Line)
470
        if name != n.Name {
471
                e.editTemplateNode(n, name)
472
        }
473
        return c
474
}
475
 
476
// escapeTree escapes the named template starting in the given context as
477
// necessary and returns its output context.
478
func (e *escaper) escapeTree(c context, name string, line int) (context, string) {
479
        // Mangle the template name with the input context to produce a reliable
480
        // identifier.
481
        dname := c.mangle(name)
482
        e.called[dname] = true
483
        if out, ok := e.output[dname]; ok {
484
                // Already escaped.
485
                return out, dname
486
        }
487
        t := e.template(name)
488
        if t == nil {
489
                // Two cases: The template exists but is empty, or has never been mentioned at
490
                // all. Distinguish the cases in the error messages.
491
                if e.tmpl.set[name] != nil {
492
                        return context{
493
                                state: stateError,
494
                                err:   errorf(ErrNoSuchTemplate, line, "%q is an incomplete or empty template", name),
495
                        }, dname
496
                }
497
                return context{
498
                        state: stateError,
499
                        err:   errorf(ErrNoSuchTemplate, line, "no such template %q", name),
500
                }, dname
501
        }
502
        if dname != name {
503
                // Use any template derived during an earlier call to escapeTemplate
504
                // with different top level templates, or clone if necessary.
505
                dt := e.template(dname)
506
                if dt == nil {
507
                        dt = template.New(dname)
508
                        dt.Tree = &parse.Tree{Name: dname, Root: cloneList(t.Root)}
509
                        e.derived[dname] = dt
510
                }
511
                t = dt
512
        }
513
        return e.computeOutCtx(c, t), dname
514
}
515
 
516
// computeOutCtx takes a template and its start context and computes the output
517
// context while storing any inferences in e.
518
func (e *escaper) computeOutCtx(c context, t *template.Template) context {
519
        // Propagate context over the body.
520
        c1, ok := e.escapeTemplateBody(c, t)
521
        if !ok {
522
                // Look for a fixed point by assuming c1 as the output context.
523
                if c2, ok2 := e.escapeTemplateBody(c1, t); ok2 {
524
                        c1, ok = c2, true
525
                }
526
                // Use c1 as the error context if neither assumption worked.
527
        }
528
        if !ok && c1.state != stateError {
529
                return context{
530
                        state: stateError,
531
                        // TODO: Find the first node with a line in t.text.Tree.Root
532
                        err: errorf(ErrOutputContext, 0, "cannot compute output context for template %s", t.Name()),
533
                }
534
        }
535
        return c1
536
}
537
 
538
// escapeTemplateBody escapes the given template assuming the given output
539
// context, and returns the best guess at the output context and whether the
540
// assumption was correct.
541
func (e *escaper) escapeTemplateBody(c context, t *template.Template) (context, bool) {
542
        filter := func(e1 *escaper, c1 context) bool {
543
                if c1.state == stateError {
544
                        // Do not update the input escaper, e.
545
                        return false
546
                }
547
                if !e1.called[t.Name()] {
548
                        // If t is not recursively called, then c1 is an
549
                        // accurate output context.
550
                        return true
551
                }
552
                // c1 is accurate if it matches our assumed output context.
553
                return c.eq(c1)
554
        }
555
        // We need to assume an output context so that recursive template calls
556
        // take the fast path out of escapeTree instead of infinitely recursing.
557
        // Naively assuming that the input context is the same as the output
558
        // works >90% of the time.
559
        e.output[t.Name()] = c
560
        return e.escapeListConditionally(c, t.Tree.Root, filter)
561
}
562
 
563
// delimEnds maps each delim to a string of characters that terminate it.
564
var delimEnds = [...]string{
565
        delimDoubleQuote: `"`,
566
        delimSingleQuote: "'",
567
        // Determined empirically by running the below in various browsers.
568
        // var div = document.createElement("DIV");
569
        // for (var i = 0; i < 0x10000; ++i) {
570
        //   div.innerHTML = "";
571
        //   if (div.getElementsByTagName("SPAN")[0].title.indexOf("bar") < 0)
572
        //     document.write("

U+" + i.toString(16));

573
        // }
574
        delimSpaceOrTagEnd: " \t\n\f\r>",
575
}
576
 
577
var doctypeBytes = []byte("
578
 
579
// escapeText escapes a text template node.
580
func (e *escaper) escapeText(c context, n *parse.TextNode) context {
581
        s, written, i, b := n.Text, 0, 0, new(bytes.Buffer)
582
        for i != len(s) {
583
                c1, nread := contextAfterText(c, s[i:])
584
                i1 := i + nread
585
                if c.state == stateText || c.state == stateRCDATA {
586
                        end := i1
587
                        if c1.state != c.state {
588
                                for j := end - 1; j >= i; j-- {
589
                                        if s[j] == '<' {
590
                                                end = j
591
                                                break
592
                                        }
593
                                }
594
                        }
595
                        for j := i; j < end; j++ {
596
                                if s[j] == '<' && !bytes.HasPrefix(s[j:], doctypeBytes) {
597
                                        b.Write(s[written:j])
598
                                        b.WriteString("<")
599
                                        written = j + 1
600
                                }
601
                        }
602
                } else if isComment(c.state) && c.delim == delimNone {
603
                        switch c.state {
604
                        case stateJSBlockCmt:
605
                                // http://es5.github.com/#x7.4:
606
                                // "Comments behave like white space and are
607
                                // discarded except that, if a MultiLineComment
608
                                // contains a line terminator character, then
609
                                // the entire comment is considered to be a
610
                                // LineTerminator for purposes of parsing by
611
                                // the syntactic grammar."
612
                                if bytes.IndexAny(s[written:i1], "\n\r\u2028\u2029") != -1 {
613
                                        b.WriteByte('\n')
614
                                } else {
615
                                        b.WriteByte(' ')
616
                                }
617
                        case stateCSSBlockCmt:
618
                                b.WriteByte(' ')
619
                        }
620
                        written = i1
621
                }
622
                if c.state != c1.state && isComment(c1.state) && c1.delim == delimNone {
623
                        // Preserve the portion between written and the comment start.
624
                        cs := i1 - 2
625
                        if c1.state == stateHTMLCmt {
626
                                // "