OpenCores
URL https://opencores.org/ocsvn/openrisc/openrisc/trunk

Subversion Repositories openrisc

[/] [openrisc/] [trunk/] [gnu-dev/] [or1k-gcc/] [libgo/] [go/] [exp/] [html/] [parse.go] - Blame information for rev 747

Details | Compare with Previous | View Log

Line No. Rev Author Line
1 747 jeremybenn
// Copyright 2010 The Go Authors. All rights reserved.
2
// Use of this source code is governed by a BSD-style
3
// license that can be found in the LICENSE file.
4
 
5
package html
6
 
7
import (
8
        "io"
9
        "strings"
10
)
11
 
12
// A parser implements the HTML5 parsing algorithm:
13
// http://www.whatwg.org/specs/web-apps/current-work/multipage/tokenization.html#tree-construction
14
type parser struct {
15
        // tokenizer provides the tokens for the parser.
16
        tokenizer *Tokenizer
17
        // tok is the most recently read token.
18
        tok Token
19
        // Self-closing tags like 
are re-interpreted as a two-token sequence:
20
        // 
followed by . hasSelfClosingToken is true if we have just read
21
        // the synthetic start tag and the next one due is the matching end tag.
22
        hasSelfClosingToken bool
23
        // doc is the document root element.
24
        doc *Node
25
        // The stack of open elements (section 12.2.3.2) and active formatting
26
        // elements (section 12.2.3.3).
27
        oe, afe nodeStack
28
        // Element pointers (section 12.2.3.4).
29
        head, form *Node
30
        // Other parsing state flags (section 12.2.3.5).
31
        scripting, framesetOK bool
32
        // im is the current insertion mode.
33
        im insertionMode
34
        // originalIM is the insertion mode to go back to after completing a text
35
        // or inTableText insertion mode.
36
        originalIM insertionMode
37
        // fosterParenting is whether new elements should be inserted according to
38
        // the foster parenting rules (section 12.2.5.3).
39
        fosterParenting bool
40
        // quirks is whether the parser is operating in "quirks mode."
41
        quirks bool
42
        // context is the context element when parsing an HTML fragment
43
        // (section 12.4).
44
        context *Node
45
}
46
 
47
func (p *parser) top() *Node {
48
        if n := p.oe.top(); n != nil {
49
                return n
50
        }
51
        return p.doc
52
}
53
 
54
// Stop tags for use in popUntil. These come from section 12.2.3.2.
55
var (
56
        defaultScopeStopTags = map[string][]string{
57
                "":     {"applet", "caption", "html", "table", "td", "th", "marquee", "object"},
58
                "math": {"annotation-xml", "mi", "mn", "mo", "ms", "mtext"},
59
                "svg":  {"desc", "foreignObject", "title"},
60
        }
61
)
62
 
63
type scope int
64
 
65
const (
66
        defaultScope scope = iota
67
        listItemScope
68
        buttonScope
69
        tableScope
70
        tableRowScope
71
)
72
 
73
// popUntil pops the stack of open elements at the highest element whose tag
74
// is in matchTags, provided there is no higher element in the scope's stop
75
// tags (as defined in section 12.2.3.2). It returns whether or not there was
76
// such an element. If there was not, popUntil leaves the stack unchanged.
77
//
78
// For example, the set of stop tags for table scope is: "html", "table". If
79
// the stack was:
80
// ["html", "body", "font", "table", "b", "i", "u"]
81
// then popUntil(tableScope, "font") would return false, but
82
// popUntil(tableScope, "i") would return true and the stack would become:
83
// ["html", "body", "font", "table", "b"]
84
//
85
// If an element's tag is in both the stop tags and matchTags, then the stack
86
// will be popped and the function returns true (provided, of course, there was
87
// no higher element in the stack that was also in the stop tags). For example,
88
// popUntil(tableScope, "table") returns true and leaves:
89
// ["html", "body", "font"]
90
func (p *parser) popUntil(s scope, matchTags ...string) bool {
91
        if i := p.indexOfElementInScope(s, matchTags...); i != -1 {
92
                p.oe = p.oe[:i]
93
                return true
94
        }
95
        return false
96
}
97
 
98
// indexOfElementInScope returns the index in p.oe of the highest element whose
99
// tag is in matchTags that is in scope. If no matching element is in scope, it
100
// returns -1.
101
func (p *parser) indexOfElementInScope(s scope, matchTags ...string) int {
102
        for i := len(p.oe) - 1; i >= 0; i-- {
103
                tag := p.oe[i].Data
104
                if p.oe[i].Namespace == "" {
105
                        for _, t := range matchTags {
106
                                if t == tag {
107
                                        return i
108
                                }
109
                        }
110
                        switch s {
111
                        case defaultScope:
112
                                // No-op.
113
                        case listItemScope:
114
                                if tag == "ol" || tag == "ul" {
115
                                        return -1
116
                                }
117
                        case buttonScope:
118
                                if tag == "button" {
119
                                        return -1
120
                                }
121
                        case tableScope:
122
                                if tag == "html" || tag == "table" {
123
                                        return -1
124
                                }
125
                        default:
126
                                panic("unreachable")
127
                        }
128
                }
129
                switch s {
130
                case defaultScope, listItemScope, buttonScope:
131
                        for _, t := range defaultScopeStopTags[p.oe[i].Namespace] {
132
                                if t == tag {
133
                                        return -1
134
                                }
135
                        }
136
                }
137
        }
138
        return -1
139
}
140
 
141
// elementInScope is like popUntil, except that it doesn't modify the stack of
142
// open elements.
143
func (p *parser) elementInScope(s scope, matchTags ...string) bool {
144
        return p.indexOfElementInScope(s, matchTags...) != -1
145
}
146
 
147
// clearStackToContext pops elements off the stack of open elements until a
148
// scope-defined element is found.
149
func (p *parser) clearStackToContext(s scope) {
150
        for i := len(p.oe) - 1; i >= 0; i-- {
151
                tag := p.oe[i].Data
152
                switch s {
153
                case tableScope:
154
                        if tag == "html" || tag == "table" {
155
                                p.oe = p.oe[:i+1]
156
                                return
157
                        }
158
                case tableRowScope:
159
                        if tag == "html" || tag == "tr" {
160
                                p.oe = p.oe[:i+1]
161
                                return
162
                        }
163
                default:
164
                        panic("unreachable")
165
                }
166
        }
167
}
168
 
169
// addChild adds a child node n to the top element, and pushes n onto the stack
170
// of open elements if it is an element node.
171
func (p *parser) addChild(n *Node) {
172
        if p.fosterParenting {
173
                p.fosterParent(n)
174
        } else {
175
                p.top().Add(n)
176
        }
177
 
178
        if n.Type == ElementNode {
179
                p.oe = append(p.oe, n)
180
        }
181
}
182
 
183
// fosterParent adds a child node according to the foster parenting rules.
184
// Section 12.2.5.3, "foster parenting".
185
func (p *parser) fosterParent(n *Node) {
186
        p.fosterParenting = false
187
        var table, parent *Node
188
        var i int
189
        for i = len(p.oe) - 1; i >= 0; i-- {
190
                if p.oe[i].Data == "table" {
191
                        table = p.oe[i]
192
                        break
193
                }
194
        }
195
 
196
        if table == nil {
197
                // The foster parent is the html element.
198
                parent = p.oe[0]
199
        } else {
200
                parent = table.Parent
201
        }
202
        if parent == nil {
203
                parent = p.oe[i-1]
204
        }
205
 
206
        var child *Node
207
        for i, child = range parent.Child {
208
                if child == table {
209
                        break
210
                }
211
        }
212
 
213
        if i > 0 && parent.Child[i-1].Type == TextNode && n.Type == TextNode {
214
                parent.Child[i-1].Data += n.Data
215
                return
216
        }
217
 
218
        if i == len(parent.Child) {
219
                parent.Add(n)
220
        } else {
221
                // Insert n into parent.Child at index i.
222
                parent.Child = append(parent.Child[:i+1], parent.Child[i:]...)
223
                parent.Child[i] = n
224
                n.Parent = parent
225
        }
226
}
227
 
228
// addText adds text to the preceding node if it is a text node, or else it
229
// calls addChild with a new text node.
230
func (p *parser) addText(text string) {
231
        // TODO: distinguish whitespace text from others.
232
        t := p.top()
233
        if i := len(t.Child); i > 0 && t.Child[i-1].Type == TextNode {
234
                t.Child[i-1].Data += text
235
                return
236
        }
237
        p.addChild(&Node{
238
                Type: TextNode,
239
                Data: text,
240
        })
241
}
242
 
243
// addElement calls addChild with an element node.
244
func (p *parser) addElement(tag string, attr []Attribute) {
245
        p.addChild(&Node{
246
                Type: ElementNode,
247
                Data: tag,
248
                Attr: attr,
249
        })
250
}
251
 
252
// Section 12.2.3.3.
253
func (p *parser) addFormattingElement(tag string, attr []Attribute) {
254
        p.addElement(tag, attr)
255
        p.afe = append(p.afe, p.top())
256
        // TODO.
257
}
258
 
259
// Section 12.2.3.3.
260
func (p *parser) clearActiveFormattingElements() {
261
        for {
262
                n := p.afe.pop()
263
                if len(p.afe) == 0 || n.Type == scopeMarkerNode {
264
                        return
265
                }
266
        }
267
}
268
 
269
// Section 12.2.3.3.
270
func (p *parser) reconstructActiveFormattingElements() {
271
        n := p.afe.top()
272
        if n == nil {
273
                return
274
        }
275
        if n.Type == scopeMarkerNode || p.oe.index(n) != -1 {
276
                return
277
        }
278
        i := len(p.afe) - 1
279
        for n.Type != scopeMarkerNode && p.oe.index(n) == -1 {
280
                if i == 0 {
281
                        i = -1
282
                        break
283
                }
284
                i--
285
                n = p.afe[i]
286
        }
287
        for {
288
                i++
289
                clone := p.afe[i].clone()
290
                p.addChild(clone)
291
                p.afe[i] = clone
292
                if i == len(p.afe)-1 {
293
                        break
294
                }
295
        }
296
}
297
 
298
// read reads the next token. This is usually from the tokenizer, but it may
299
// be the synthesized end tag implied by a self-closing tag.
300
func (p *parser) read() error {
301
        if p.hasSelfClosingToken {
302
                p.hasSelfClosingToken = false
303
                p.tok.Type = EndTagToken
304
                p.tok.Attr = nil
305
                return nil
306
        }
307
        p.tokenizer.Next()
308
        p.tok = p.tokenizer.Token()
309
        switch p.tok.Type {
310
        case ErrorToken:
311
                return p.tokenizer.Err()
312
        case SelfClosingTagToken:
313
                p.hasSelfClosingToken = true
314
                p.tok.Type = StartTagToken
315
        }
316
        return nil
317
}
318
 
319
// Section 12.2.4.
320
func (p *parser) acknowledgeSelfClosingTag() {
321
        p.hasSelfClosingToken = false
322
}
323
 
324
// An insertion mode (section 12.2.3.1) is the state transition function from
325
// a particular state in the HTML5 parser's state machine. It updates the
326
// parser's fields depending on parser.tok (where ErrorToken means EOF).
327
// It returns whether the token was consumed.
328
type insertionMode func(*parser) bool
329
 
330
// setOriginalIM sets the insertion mode to return to after completing a text or
331
// inTableText insertion mode.
332
// Section 12.2.3.1, "using the rules for".
333
func (p *parser) setOriginalIM() {
334
        if p.originalIM != nil {
335
                panic("html: bad parser state: originalIM was set twice")
336
        }
337
        p.originalIM = p.im
338
}
339
 
340
// Section 12.2.3.1, "reset the insertion mode".
341
func (p *parser) resetInsertionMode() {
342
        for i := len(p.oe) - 1; i >= 0; i-- {
343
                n := p.oe[i]
344
                if i == 0 && p.context != nil {
345
                        n = p.context
346
                }
347
 
348
                switch n.Data {
349
                case "select":
350
                        p.im = inSelectIM
351
                case "td", "th":
352
                        p.im = inCellIM
353
                case "tr":
354
                        p.im = inRowIM
355
                case "tbody", "thead", "tfoot":
356
                        p.im = inTableBodyIM
357
                case "caption":
358
                        p.im = inCaptionIM
359
                case "colgroup":
360
                        p.im = inColumnGroupIM
361
                case "table":
362
                        p.im = inTableIM
363
                case "head":
364
                        p.im = inBodyIM
365
                case "body":
366
                        p.im = inBodyIM
367
                case "frameset":
368
                        p.im = inFramesetIM
369
                case "html":
370
                        p.im = beforeHeadIM
371
                default:
372
                        continue
373
                }
374
                return
375
        }
376
        p.im = inBodyIM
377
}
378
 
379
const whitespace = " \t\r\n\f"
380
 
381
// Section 12.2.5.4.1.
382
func initialIM(p *parser) bool {
383
        switch p.tok.Type {
384
        case TextToken:
385
                p.tok.Data = strings.TrimLeft(p.tok.Data, whitespace)
386
                if len(p.tok.Data) == 0 {
387
                        // It was all whitespace, so ignore it.
388
                        return true
389
                }
390
        case CommentToken:
391
                p.doc.Add(&Node{
392
                        Type: CommentNode,
393
                        Data: p.tok.Data,
394
                })
395
                return true
396
        case DoctypeToken:
397
                n, quirks := parseDoctype(p.tok.Data)
398
                p.doc.Add(n)
399
                p.quirks = quirks
400
                p.im = beforeHTMLIM
401
                return true
402
        }
403
        p.quirks = true
404
        p.im = beforeHTMLIM
405
        return false
406
}
407
 
408
// Section 12.2.5.4.2.
409
func beforeHTMLIM(p *parser) bool {
410
        switch p.tok.Type {
411
        case TextToken:
412
                p.tok.Data = strings.TrimLeft(p.tok.Data, whitespace)
413
                if len(p.tok.Data) == 0 {
414
                        // It was all whitespace, so ignore it.
415
                        return true
416
                }
417
        case StartTagToken:
418
                if p.tok.Data == "html" {
419
                        p.addElement(p.tok.Data, p.tok.Attr)
420
                        p.im = beforeHeadIM
421
                        return true
422
                }
423
        case EndTagToken:
424
                switch p.tok.Data {
425
                case "head", "body", "html", "br":
426
                        // Drop down to creating an implied  tag.
427
                default:
428
                        // Ignore the token.
429
                        return true
430
                }
431
        case CommentToken:
432
                p.doc.Add(&Node{
433
                        Type: CommentNode,
434
                        Data: p.tok.Data,
435
                })
436
                return true
437
        }
438
        // Create an implied  tag.
439
        p.addElement("html", nil)
440
        p.im = beforeHeadIM
441
        return false
442
}
443
 
444
// Section 12.2.5.4.3.
445
func beforeHeadIM(p *parser) bool {
446
        var (
447
                add     bool
448
                attr    []Attribute
449
                implied bool
450
        )
451
        switch p.tok.Type {
452
        case ErrorToken:
453
                implied = true
454
        case TextToken:
455
                p.tok.Data = strings.TrimLeft(p.tok.Data, whitespace)
456
                if len(p.tok.Data) == 0 {
457
                        // It was all whitespace, so ignore it.
458
                        return true
459
                }
460
                implied = true
461
        case StartTagToken:
462
                switch p.tok.Data {
463
                case "head":
464
                        add = true
465
                        attr = p.tok.Attr
466
                case "html":
467
                        return inBodyIM(p)
468
                default:
469
                        implied = true
470
                }
471
        case EndTagToken:
472
                switch p.tok.Data {
473
                case "head", "body", "html", "br":
474
                        implied = true
475
                default:
476
                        // Ignore the token.
477
                }
478
        case CommentToken:
479
                p.addChild(&Node{
480
                        Type: CommentNode,
481
                        Data: p.tok.Data,
482
                })
483
                return true
484
        }
485
        if add || implied {
486
                p.addElement("head", attr)
487
                p.head = p.top()
488
        }
489
        p.im = inHeadIM
490
        return !implied
491
}
492
 
493
// Section 12.2.5.4.4.
494
func inHeadIM(p *parser) bool {
495
        var (
496
                pop     bool
497
                implied bool
498
        )
499
        switch p.tok.Type {
500
        case ErrorToken:
501
                implied = true
502
        case TextToken:
503
                s := strings.TrimLeft(p.tok.Data, whitespace)
504
                if len(s) < len(p.tok.Data) {
505
                        // Add the initial whitespace to the current node.
506
                        p.addText(p.tok.Data[:len(p.tok.Data)-len(s)])
507
                        if s == "" {
508
                                return true
509
                        }
510
                        p.tok.Data = s
511
                }
512
                implied = true
513
        case StartTagToken:
514
                switch p.tok.Data {
515
                case "html":
516
                        return inBodyIM(p)
517
                case "base", "basefont", "bgsound", "command", "link", "meta":
518
                        p.addElement(p.tok.Data, p.tok.Attr)
519
                        p.oe.pop()
520
                        p.acknowledgeSelfClosingTag()
521
                case "script", "title", "noscript", "noframes", "style":
522
                        p.addElement(p.tok.Data, p.tok.Attr)
523
                        p.setOriginalIM()
524
                        p.im = textIM
525
                        return true
526
                case "head":
527
                        // Ignore the token.
528
                        return true
529
                default:
530
                        implied = true
531
                }
532
        case EndTagToken:
533
                switch p.tok.Data {
534
                case "head":
535
                        pop = true
536
                case "body", "html", "br":
537
                        implied = true
538
                default:
539
                        // Ignore the token.
540
                        return true
541
                }
542
        case CommentToken:
543
                p.addChild(&Node{
544
                        Type: CommentNode,
545
                        Data: p.tok.Data,
546
                })
547
                return true
548
        }
549
        if pop || implied {
550
                n := p.oe.pop()
551
                if n.Data != "head" {
552
                        panic("html: bad parser state:  element not found, in the in-head insertion mode")
553
                }
554
                p.im = afterHeadIM
555
                return !implied
556
        }
557
        return true
558
}
559
 
560
// Section 12.2.5.4.6.
561
func afterHeadIM(p *parser) bool {
562
        var (
563
                add        bool
564
                attr       []Attribute
565
                framesetOK bool
566
                implied    bool
567
        )
568
        switch p.tok.Type {
569
        case ErrorToken:
570
                implied = true
571
                framesetOK = true
572
        case TextToken:
573
                s := strings.TrimLeft(p.tok.Data, whitespace)
574
                if len(s) < len(p.tok.Data) {
575
                        // Add the initial whitespace to the current node.
576
                        p.addText(p.tok.Data[:len(p.tok.Data)-len(s)])
577
                        if s == "" {
578
                                return true
579
                        }
580
                        p.tok.Data = s
581
                }
582
                implied = true
583
                framesetOK = true
584
        case StartTagToken:
585
                switch p.tok.Data {
586
                case "html":
587
                        // TODO.
588
                case "body":
589
                        add = true
590
                        attr = p.tok.Attr
591
                        framesetOK = false
592
                case "frameset":
593
                        p.addElement(p.tok.Data, p.tok.Attr)
594
                        p.im = inFramesetIM
595
                        return true
596
                case "base", "basefont", "bgsound", "link", "meta", "noframes", "script", "style", "title":
597
                        p.oe = append(p.oe, p.head)
598
                        defer p.oe.pop()
599
                        return inHeadIM(p)
600
                case "head":
601
                        // Ignore the token.
602
                        return true
603
                default:
604
                        implied = true
605
                        framesetOK = true
606
                }
607
        case EndTagToken:
608
                switch p.tok.Data {
609
                case "body", "html", "br":
610
                        implied = true
611
                        framesetOK = true
612
                default:
613
                        // Ignore the token.
614
                        return true
615
                }
616
        case CommentToken:
617
                p.addChild(&Node{
618
                        Type: CommentNode,
619
                        Data: p.tok.Data,
620
                })
621
                return true
622
        }
623
        if add || implied {
624
                p.addElement("body", attr)
625
                p.framesetOK = framesetOK
626
        }
627
        p.im = inBodyIM
628
        return !implied
629
}
630
 
631
// copyAttributes copies attributes of src not found on dst to dst.
632
func copyAttributes(dst *Node, src Token) {
633
        if len(src.Attr) == 0 {
634
                return
635
        }
636
        attr := map[string]string{}
637
        for _, a := range dst.Attr {
638
                attr[a.Key] = a.Val
639
        }
640
        for _, a := range src.Attr {
641
                if _, ok := attr[a.Key]; !ok {
642
                        dst.Attr = append(dst.Attr, a)
643
                        attr[a.Key] = a.Val
644
                }
645
        }
646
}
647
 
648
// Section 12.2.5.4.7.
649
func inBodyIM(p *parser) bool {
650
        switch p.tok.Type {
651
        case TextToken:
652
                switch n := p.oe.top(); n.Data {
653
                case "pre", "listing", "textarea":
654
                        if len(n.Child) == 0 {
655
                                // Ignore a newline at the start of a 
 block.
656
                                d := p.tok.Data
657
                                if d != "" && d[0] == '\r' {
658
                                        d = d[1:]
659
                                }
660
                                if d != "" && d[0] == '\n' {
661
                                        d = d[1:]
662
                                }
663
                                if d == "" {
664
                                        return true
665
                                }
666
                                p.tok.Data = d
667
                        }
668
                }
669
                p.reconstructActiveFormattingElements()
670
                p.addText(p.tok.Data)
671
                p.framesetOK = false
672
        case StartTagToken:
673
                switch p.tok.Data {
674
                case "html":
675
                        copyAttributes(p.oe[0], p.tok)
676
                case "address", "article", "aside", "blockquote", "center", "details", "dir", "div", "dl", "fieldset", "figcaption", "figure", "footer", "header", "hgroup", "menu", "nav", "ol", "p", "section", "summary", "ul":
677
                        p.popUntil(buttonScope, "p")
678
                        p.addElement(p.tok.Data, p.tok.Attr)
679
                case "h1", "h2", "h3", "h4", "h5", "h6":
680
                        p.popUntil(buttonScope, "p")
681
                        switch n := p.top(); n.Data {
682
                        case "h1", "h2", "h3", "h4", "h5", "h6":
683
                                p.oe.pop()
684
                        }
685
                        p.addElement(p.tok.Data, p.tok.Attr)
686
                case "a":
687
                        for i := len(p.afe) - 1; i >= 0 && p.afe[i].Type != scopeMarkerNode; i-- {
688
                                if n := p.afe[i]; n.Type == ElementNode && n.Data == "a" {
689
                                        p.inBodyEndTagFormatting("a")
690
                                        p.oe.remove(n)
691
                                        p.afe.remove(n)
692
                                        break
693
                                }
694
                        }
695
                        p.reconstructActiveFormattingElements()
696
                        p.addFormattingElement(p.tok.Data, p.tok.Attr)
697
                case "b", "big", "code", "em", "font", "i", "s", "small", "strike", "strong", "tt", "u":
698
                        p.reconstructActiveFormattingElements()
699
                        p.addFormattingElement(p.tok.Data, p.tok.Attr)
700
                case "nobr":
701
                        p.reconstructActiveFormattingElements()
702
                        if p.elementInScope(defaultScope, "nobr") {
703
                                p.inBodyEndTagFormatting("nobr")
704
                                p.reconstructActiveFormattingElements()
705
                        }
706
                        p.addFormattingElement(p.tok.Data, p.tok.Attr)
707
                case "applet", "marquee", "object":
708
                        p.reconstructActiveFormattingElements()
709
                        p.addElement(p.tok.Data, p.tok.Attr)
710
                        p.afe = append(p.afe, &scopeMarker)
711
                        p.framesetOK = false
712
                case "area", "br", "embed", "img", "input", "keygen", "wbr":
713
                        p.reconstructActiveFormattingElements()
714
                        p.addElement(p.tok.Data, p.tok.Attr)
715
                        p.oe.pop()
716
                        p.acknowledgeSelfClosingTag()
717
                        p.framesetOK = false
718
                case "table":
719
                        if !p.quirks {
720
                                p.popUntil(buttonScope, "p")
721
                        }
722
                        p.addElement(p.tok.Data, p.tok.Attr)
723
                        p.framesetOK = false
724
                        p.im = inTableIM
725
                        return true
726
                case "hr":
727
                        p.popUntil(buttonScope, "p")
728
                        p.addElement(p.tok.Data, p.tok.Attr)
729
                        p.oe.pop()
730
                        p.acknowledgeSelfClosingTag()
731
                        p.framesetOK = false
732
                case "select":
733
                        p.reconstructActiveFormattingElements()
734
                        p.addElement(p.tok.Data, p.tok.Attr)
735
                        p.framesetOK = false
736
                        p.im = inSelectIM
737
                        return true
738
                case "form":
739
                        if p.form == nil {
740
                                p.popUntil(buttonScope, "p")
741
                                p.addElement(p.tok.Data, p.tok.Attr)
742
                                p.form = p.top()
743
                        }
744
                case "li":
745
                        p.framesetOK = false
746
                        for i := len(p.oe) - 1; i >= 0; i-- {
747
                                node := p.oe[i]
748
                                switch node.Data {
749
                                case "li":
750
                                        p.popUntil(listItemScope, "li")
751
                                case "address", "div", "p":
752
                                        continue
753
                                default:
754
                                        if !isSpecialElement(node) {
755
                                                continue
756
                                        }
757
                                }
758
                                break
759
                        }
760
                        p.popUntil(buttonScope, "p")
761
                        p.addElement(p.tok.Data, p.tok.Attr)
762
                case "dd", "dt":
763
                        p.framesetOK = false
764
                        for i := len(p.oe) - 1; i >= 0; i-- {
765
                                node := p.oe[i]
766
                                switch node.Data {
767
                                case "dd", "dt":
768
                                        p.oe = p.oe[:i]
769
                                case "address", "div", "p":
770
                                        continue
771
                                default:
772
                                        if !isSpecialElement(node) {
773
                                                continue
774
                                        }
775
                                }
776
                                break
777
                        }
778
                        p.popUntil(buttonScope, "p")
779
                        p.addElement(p.tok.Data, p.tok.Attr)
780
                case "plaintext":
781
                        p.popUntil(buttonScope, "p")
782
                        p.addElement(p.tok.Data, p.tok.Attr)
783
                case "button":
784
                        p.popUntil(defaultScope, "button")
785
                        p.reconstructActiveFormattingElements()
786
                        p.addElement(p.tok.Data, p.tok.Attr)
787
                        p.framesetOK = false
788
                case "optgroup", "option":
789
                        if p.top().Data == "option" {
790
                                p.oe.pop()
791
                        }
792
                        p.reconstructActiveFormattingElements()
793
                        p.addElement(p.tok.Data, p.tok.Attr)
794
                case "body":
795
                        if len(p.oe) >= 2 {
796
                                body := p.oe[1]
797
                                if body.Type == ElementNode && body.Data == "body" {
798
                                        p.framesetOK = false
799
                                        copyAttributes(body, p.tok)
800
                                }
801
                        }
802
                case "frameset":
803
                        if !p.framesetOK || len(p.oe) < 2 || p.oe[1].Data != "body" {
804
                                // Ignore the token.
805
                                return true
806
                        }
807
                        body := p.oe[1]
808
                        if body.Parent != nil {
809
                                body.Parent.Remove(body)
810
                        }
811
                        p.oe = p.oe[:1]
812
                        p.addElement(p.tok.Data, p.tok.Attr)
813
                        p.im = inFramesetIM
814
                        return true
815
                case "base", "basefont", "bgsound", "command", "link", "meta", "noframes", "script", "style", "title":
816
                        return inHeadIM(p)
817
                case "image":
818
                        p.tok.Data = "img"
819
                        return false
820
                case "isindex":
821
                        if p.form != nil {
822
                                // Ignore the token.
823
                                return true
824
                        }
825
                        action := ""
826
                        prompt := "This is a searchable index. Enter search keywords: "
827
                        attr := []Attribute{{Key: "name", Val: "isindex"}}
828
                        for _, a := range p.tok.Attr {
829
                                switch a.Key {
830
                                case "action":
831
                                        action = a.Val
832
                                case "name":
833
                                        // Ignore the attribute.
834
                                case "prompt":
835
                                        prompt = a.Val
836
                                default:
837
                                        attr = append(attr, a)
838
                                }
839
                        }
840
                        p.acknowledgeSelfClosingTag()
841
                        p.popUntil(buttonScope, "p")
842
                        p.addElement("form", nil)
843
                        p.form = p.top()
844
                        if action != "" {
845
                                p.form.Attr = []Attribute{{Key: "action", Val: action}}
846
                        }
847
                        p.addElement("hr", nil)
848
                        p.oe.pop()
849
                        p.addElement("label", nil)
850
                        p.addText(prompt)
851
                        p.addElement("input", attr)
852
                        p.oe.pop()
853
                        p.oe.pop()
854
                        p.addElement("hr", nil)
855
                        p.oe.pop()
856
                        p.oe.pop()
857
                        p.form = nil
858
                case "xmp":
859
                        p.popUntil(buttonScope, "p")
860
                        p.reconstructActiveFormattingElements()
861
                        p.framesetOK = false
862
                        p.addElement(p.tok.Data, p.tok.Attr)
863
                case "math", "svg":
864
                        p.reconstructActiveFormattingElements()
865
                        if p.tok.Data == "math" {
866
                                // TODO: adjust MathML attributes.
867
                        } else {
868
                                // TODO: adjust SVG attributes.
869
                        }
870
                        adjustForeignAttributes(p.tok.Attr)
871
                        p.addElement(p.tok.Data, p.tok.Attr)
872
                        p.top().Namespace = p.tok.Data
873
                        return true
874
                case "caption", "col", "colgroup", "frame", "head", "tbody", "td", "tfoot", "th", "thead", "tr":
875
                        // Ignore the token.
876
                default:
877
                        // TODO.
878
                        p.addElement(p.tok.Data, p.tok.Attr)
879
                }
880
        case EndTagToken:
881
                switch p.tok.Data {
882
                case "body":
883
                        // TODO: autoclose the stack of open elements.
884
                        p.im = afterBodyIM
885
                        return true
886
                case "p":
887
                        if !p.elementInScope(buttonScope, "p") {
888
                                p.addElement("p", nil)
889
                        }
890
                        p.popUntil(buttonScope, "p")
891
                case "a", "b", "big", "code", "em", "font", "i", "nobr", "s", "small", "strike", "strong", "tt", "u":
892
                        p.inBodyEndTagFormatting(p.tok.Data)
893
                case "address", "article", "aside", "blockquote", "button", "center", "details", "dir", "div", "dl", "fieldset", "figcaption", "figure", "footer", "header", "hgroup", "listing", "menu", "nav", "ol", "pre", "section", "summary", "ul":
894
                        p.popUntil(defaultScope, p.tok.Data)
895
                case "applet", "marquee", "object":
896
                        if p.popUntil(defaultScope, p.tok.Data) {
897
                                p.clearActiveFormattingElements()
898
                        }
899
                case "br":
900
                        p.tok.Type = StartTagToken
901
                        return false
902
                default:
903
                        p.inBodyEndTagOther(p.tok.Data)
904
                }
905
        case CommentToken:
906
                p.addChild(&Node{
907
                        Type: CommentNode,
908
                        Data: p.tok.Data,
909
                })
910
        }
911
 
912
        return true
913
}
914
 
915
func (p *parser) inBodyEndTagFormatting(tag string) {
916
        // This is the "adoption agency" algorithm, described at
917
        // http://www.whatwg.org/specs/web-apps/current-work/multipage/tokenization.html#adoptionAgency
918
 
919
        // TODO: this is a fairly literal line-by-line translation of that algorithm.
920
        // Once the code successfully parses the comprehensive test suite, we should
921
        // refactor this code to be more idiomatic.
922
 
923
        // Steps 1-3. The outer loop.
924
        for i := 0; i < 8; i++ {
925
                // Step 4. Find the formatting element.
926
                var formattingElement *Node
927
                for j := len(p.afe) - 1; j >= 0; j-- {
928
                        if p.afe[j].Type == scopeMarkerNode {
929
                                break
930
                        }
931
                        if p.afe[j].Data == tag {
932
                                formattingElement = p.afe[j]
933
                                break
934
                        }
935
                }
936
                if formattingElement == nil {
937
                        p.inBodyEndTagOther(tag)
938
                        return
939
                }
940
                feIndex := p.oe.index(formattingElement)
941
                if feIndex == -1 {
942
                        p.afe.remove(formattingElement)
943
                        return
944
                }
945
                if !p.elementInScope(defaultScope, tag) {
946
                        // Ignore the tag.
947
                        return
948
                }
949
 
950
                // Steps 5-6. Find the furthest block.
951
                var furthestBlock *Node
952
                for _, e := range p.oe[feIndex:] {
953
                        if isSpecialElement(e) {
954
                                furthestBlock = e
955
                                break
956
                        }
957
                }
958
                if furthestBlock == nil {
959
                        e := p.oe.pop()
960
                        for e != formattingElement {
961
                                e = p.oe.pop()
962
                        }
963
                        p.afe.remove(e)
964
                        return
965
                }
966
 
967
                // Steps 7-8. Find the common ancestor and bookmark node.
968
                commonAncestor := p.oe[feIndex-1]
969
                bookmark := p.afe.index(formattingElement)
970
 
971
                // Step 9. The inner loop. Find the lastNode to reparent.
972
                lastNode := furthestBlock
973
                node := furthestBlock
974
                x := p.oe.index(node)
975
                // Steps 9.1-9.3.
976
                for j := 0; j < 3; j++ {
977
                        // Step 9.4.
978
                        x--
979
                        node = p.oe[x]
980
                        // Step 9.5.
981
                        if p.afe.index(node) == -1 {
982
                                p.oe.remove(node)
983
                                continue
984
                        }
985
                        // Step 9.6.
986
                        if node == formattingElement {
987
                                break
988
                        }
989
                        // Step 9.7.
990
                        clone := node.clone()
991
                        p.afe[p.afe.index(node)] = clone
992
                        p.oe[p.oe.index(node)] = clone
993
                        node = clone
994
                        // Step 9.8.
995
                        if lastNode == furthestBlock {
996
                                bookmark = p.afe.index(node) + 1
997
                        }
998
                        // Step 9.9.
999
                        if lastNode.Parent != nil {
1000
                                lastNode.Parent.Remove(lastNode)
1001
                        }
1002
                        node.Add(lastNode)
1003
                        // Step 9.10.
1004
                        lastNode = node
1005
                }
1006
 
1007
                // Step 10. Reparent lastNode to the common ancestor,
1008
                // or for misnested table nodes, to the foster parent.
1009
                if lastNode.Parent != nil {
1010
                        lastNode.Parent.Remove(lastNode)
1011
                }
1012
                switch commonAncestor.Data {
1013
                case "table", "tbody", "tfoot", "thead", "tr":
1014
                        p.fosterParent(lastNode)
1015
                default:
1016
                        commonAncestor.Add(lastNode)
1017
                }
1018
 
1019
                // Steps 11-13. Reparent nodes from the furthest block's children
1020
                // to a clone of the formatting element.
1021
                clone := formattingElement.clone()
1022
                reparentChildren(clone, furthestBlock)
1023
                furthestBlock.Add(clone)
1024
 
1025
                // Step 14. Fix up the list of active formatting elements.
1026
                if oldLoc := p.afe.index(formattingElement); oldLoc != -1 && oldLoc < bookmark {
1027
                        // Move the bookmark with the rest of the list.
1028
                        bookmark--
1029
                }
1030
                p.afe.remove(formattingElement)
1031
                p.afe.insert(bookmark, clone)
1032
 
1033
                // Step 15. Fix up the stack of open elements.
1034
                p.oe.remove(formattingElement)
1035
                p.oe.insert(p.oe.index(furthestBlock)+1, clone)
1036
        }
1037
}
1038
 
1039
// inBodyEndTagOther performs the "any other end tag" algorithm for inBodyIM.
1040
func (p *parser) inBodyEndTagOther(tag string) {
1041
        for i := len(p.oe) - 1; i >= 0; i-- {
1042
                if p.oe[i].Data == tag {
1043
                        p.oe = p.oe[:i]
1044
                        break
1045
                }
1046
                if isSpecialElement(p.oe[i]) {
1047
                        break
1048
                }
1049
        }
1050
}
1051
 
1052
// Section 12.2.5.4.8.
1053
func textIM(p *parser) bool {
1054
        switch p.tok.Type {
1055
        case ErrorToken:
1056
                p.oe.pop()
1057
        case TextToken:
1058
                p.addText(p.tok.Data)
1059
                return true
1060
        case EndTagToken:
1061
                p.oe.pop()
1062
        }
1063
        p.im = p.originalIM
1064
        p.originalIM = nil
1065
        return p.tok.Type == EndTagToken
1066
}
1067
 
1068
// Section 12.2.5.4.9.
1069
func inTableIM(p *parser) bool {
1070
        switch p.tok.Type {
1071
        case ErrorToken:
1072
                // Stop parsing.
1073
                return true
1074
        case TextToken:
1075
                // TODO.
1076
        case StartTagToken:
1077
                switch p.tok.Data {
1078
                case "caption":
1079
                        p.clearStackToContext(tableScope)
1080
                        p.afe = append(p.afe, &scopeMarker)
1081
                        p.addElement(p.tok.Data, p.tok.Attr)
1082
                        p.im = inCaptionIM
1083
                        return true
1084
                case "tbody", "tfoot", "thead":
1085
                        p.clearStackToContext(tableScope)
1086
                        p.addElement(p.tok.Data, p.tok.Attr)
1087
                        p.im = inTableBodyIM
1088
                        return true
1089
                case "td", "th", "tr":
1090
                        p.clearStackToContext(tableScope)
1091
                        p.addElement("tbody", nil)
1092
                        p.im = inTableBodyIM
1093
                        return false
1094
                case "table":
1095
                        if p.popUntil(tableScope, "table") {
1096
                                p.resetInsertionMode()
1097
                                return false
1098
                        }
1099
                        // Ignore the token.
1100
                        return true
1101
                case "colgroup":
1102
                        p.clearStackToContext(tableScope)
1103
                        p.addElement(p.tok.Data, p.tok.Attr)
1104
                        p.im = inColumnGroupIM
1105
                        return true
1106
                case "col":
1107
                        p.clearStackToContext(tableScope)
1108
                        p.addElement("colgroup", p.tok.Attr)
1109
                        p.im = inColumnGroupIM
1110
                        return false
1111
                case "select":
1112
                        p.reconstructActiveFormattingElements()
1113
                        switch p.top().Data {
1114
                        case "table", "tbody", "tfoot", "thead", "tr":
1115
                                p.fosterParenting = true
1116
                        }
1117
                        p.addElement(p.tok.Data, p.tok.Attr)
1118
                        p.fosterParenting = false
1119
                        p.framesetOK = false
1120
                        p.im = inSelectInTableIM
1121
                        return true
1122
                default:
1123
                        // TODO.
1124
                }
1125
        case EndTagToken:
1126
                switch p.tok.Data {
1127
                case "table":
1128
                        if p.popUntil(tableScope, "table") {
1129
                                p.resetInsertionMode()
1130
                                return true
1131
                        }
1132
                        // Ignore the token.
1133
                        return true
1134
                case "body", "caption", "col", "colgroup", "html", "tbody", "td", "tfoot", "th", "thead", "tr":
1135
                        // Ignore the token.
1136
                        return true
1137
                }
1138
        case CommentToken:
1139
                p.addChild(&Node{
1140
                        Type: CommentNode,
1141
                        Data: p.tok.Data,
1142
                })
1143
                return true
1144
        }
1145
 
1146
        switch p.top().Data {
1147
        case "table", "tbody", "tfoot", "thead", "tr":
1148
                p.fosterParenting = true
1149
                defer func() { p.fosterParenting = false }()
1150
        }
1151
 
1152
        return inBodyIM(p)
1153
}
1154
 
1155
// Section 12.2.5.4.11.
1156
func inCaptionIM(p *parser) bool {
1157
        switch p.tok.Type {
1158
        case StartTagToken:
1159
                switch p.tok.Data {
1160
                case "caption", "col", "colgroup", "tbody", "td", "tfoot", "thead", "tr":
1161
                        if p.popUntil(tableScope, "caption") {
1162
                                p.clearActiveFormattingElements()
1163
                                p.im = inTableIM
1164
                                return false
1165
                        } else {
1166
                                // Ignore the token.
1167
                                return true
1168
                        }
1169
                case "select":
1170
                        p.reconstructActiveFormattingElements()
1171
                        p.addElement(p.tok.Data, p.tok.Attr)
1172
                        p.framesetOK = false
1173
                        p.im = inSelectInTableIM
1174
                        return true
1175
                }
1176
        case EndTagToken:
1177
                switch p.tok.Data {
1178
                case "caption":
1179
                        if p.popUntil(tableScope, "caption") {
1180
                                p.clearActiveFormattingElements()
1181
                                p.im = inTableIM
1182
                        }
1183
                        return true
1184
                case "table":
1185
                        if p.popUntil(tableScope, "caption") {
1186
                                p.clearActiveFormattingElements()
1187
                                p.im = inTableIM
1188
                                return false
1189
                        } else {
1190
                                // Ignore the token.
1191
                                return true
1192
                        }
1193
                case "body", "col", "colgroup", "html", "tbody", "td", "tfoot", "th", "thead", "tr":
1194
                        // Ignore the token.
1195
                        return true
1196
                }
1197
        }
1198
        return inBodyIM(p)
1199
}
1200
 
1201
// Section 12.2.5.4.12.
1202
func inColumnGroupIM(p *parser) bool {
1203
        switch p.tok.Type {
1204
        case CommentToken:
1205
                p.addChild(&Node{
1206
                        Type: CommentNode,
1207
                        Data: p.tok.Data,
1208
                })
1209
                return true
1210
        case DoctypeToken:
1211
                // Ignore the token.
1212
                return true
1213
        case StartTagToken:
1214
                switch p.tok.Data {
1215
                case "html":
1216
                        return inBodyIM(p)
1217
                case "col":
1218
                        p.addElement(p.tok.Data, p.tok.Attr)
1219
                        p.oe.pop()
1220
                        p.acknowledgeSelfClosingTag()
1221
                        return true
1222
                }
1223
        case EndTagToken:
1224
                switch p.tok.Data {
1225
                case "colgroup":
1226
                        if p.oe.top().Data != "html" {
1227
                                p.oe.pop()
1228
                                p.im = inTableIM
1229
                        }
1230
                        return true
1231
                case "col":
1232
                        // Ignore the token.
1233
                        return true
1234
                }
1235
        }
1236
        if p.oe.top().Data != "html" {
1237
                p.oe.pop()
1238
                p.im = inTableIM
1239
                return false
1240
        }
1241
        return true
1242
}
1243
 
1244
// Section 12.2.5.4.13.
1245
func inTableBodyIM(p *parser) bool {
1246
        var (
1247
                add      bool
1248
                data     string
1249
                attr     []Attribute
1250
                consumed bool
1251
        )
1252
        switch p.tok.Type {
1253
        case ErrorToken:
1254
                // TODO.
1255
        case TextToken:
1256
                // TODO.
1257
        case StartTagToken:
1258
                switch p.tok.Data {
1259
                case "tr":
1260
                        add = true
1261
                        data = p.tok.Data
1262
                        attr = p.tok.Attr
1263
                        consumed = true
1264
                case "td", "th":
1265
                        add = true
1266
                        data = "tr"
1267
                        consumed = false
1268
                case "caption", "col", "colgroup", "tbody", "tfoot", "thead":
1269
                        if !p.popUntil(tableScope, "tbody", "thead", "tfoot") {
1270
                                // Ignore the token.
1271
                                return true
1272
                        }
1273
                        p.im = inTableIM
1274
                        return false
1275
                default:
1276
                        // TODO.
1277
                }
1278
        case EndTagToken:
1279
                switch p.tok.Data {
1280
                case "table":
1281
                        if p.popUntil(tableScope, "tbody", "thead", "tfoot") {
1282
                                p.im = inTableIM
1283
                                return false
1284
                        }
1285
                        // Ignore the token.
1286
                        return true
1287
                case "body", "caption", "col", "colgroup", "html", "td", "th", "tr":
1288
                        // Ignore the token.
1289
                        return true
1290
                }
1291
        case CommentToken:
1292
                p.addChild(&Node{
1293
                        Type: CommentNode,
1294
                        Data: p.tok.Data,
1295
                })
1296
                return true
1297
        }
1298
        if add {
1299
                // TODO: clear the stack back to a table body context.
1300
                p.addElement(data, attr)
1301
                p.im = inRowIM
1302
                return consumed
1303
        }
1304
        return inTableIM(p)
1305
}
1306
 
1307
// Section 12.2.5.4.14.
1308
func inRowIM(p *parser) bool {
1309
        switch p.tok.Type {
1310
        case ErrorToken:
1311
                // TODO.
1312
        case TextToken:
1313
                // TODO.
1314
        case StartTagToken:
1315
                switch p.tok.Data {
1316
                case "td", "th":
1317
                        p.clearStackToContext(tableRowScope)
1318
                        p.addElement(p.tok.Data, p.tok.Attr)
1319
                        p.afe = append(p.afe, &scopeMarker)
1320
                        p.im = inCellIM
1321
                        return true
1322
                case "caption", "col", "colgroup", "tbody", "tfoot", "thead", "tr":
1323
                        if p.popUntil(tableScope, "tr") {
1324
                                p.im = inTableBodyIM
1325
                                return false
1326
                        }
1327
                        // Ignore the token.
1328
                        return true
1329
                default:
1330
                        // TODO.
1331
                }
1332
        case EndTagToken:
1333
                switch p.tok.Data {
1334
                case "tr":
1335
                        if p.popUntil(tableScope, "tr") {
1336
                                p.im = inTableBodyIM
1337
                                return true
1338
                        }
1339
                        // Ignore the token.
1340
                        return true
1341
                case "table":
1342
                        if p.popUntil(tableScope, "tr") {
1343
                                p.im = inTableBodyIM
1344
                                return false
1345
                        }
1346
                        // Ignore the token.
1347
                        return true
1348
                case "tbody", "tfoot", "thead":
1349
                        // TODO.
1350
                case "body", "caption", "col", "colgroup", "html", "td", "th":
1351
                        // Ignore the token.
1352
                        return true
1353
                default:
1354
                        // TODO.
1355
                }
1356
        case CommentToken:
1357
                p.addChild(&Node{
1358
                        Type: CommentNode,
1359
                        Data: p.tok.Data,
1360
                })
1361
                return true
1362
        }
1363
        return inTableIM(p)
1364
}
1365
 
1366
// Section 12.2.5.4.15.
1367
func inCellIM(p *parser) bool {
1368
        var (
1369
                closeTheCellAndReprocess bool
1370
        )
1371
        switch p.tok.Type {
1372
        case StartTagToken:
1373
                switch p.tok.Data {
1374
                case "caption", "col", "colgroup", "tbody", "td", "tfoot", "th", "thead", "tr":
1375
                        // TODO: check for "td" or "th" in table scope.
1376
                        closeTheCellAndReprocess = true
1377
                case "select":
1378
                        p.reconstructActiveFormattingElements()
1379
                        p.addElement(p.tok.Data, p.tok.Attr)
1380
                        p.framesetOK = false
1381
                        p.im = inSelectInTableIM
1382
                        return true
1383
                }
1384
        case EndTagToken:
1385
                switch p.tok.Data {
1386
                case "td", "th":
1387
                        if !p.popUntil(tableScope, p.tok.Data) {
1388
                                // Ignore the token.
1389
                                return true
1390
                        }
1391
                        p.clearActiveFormattingElements()
1392
                        p.im = inRowIM
1393
                        return true
1394
                case "body", "caption", "col", "colgroup", "html":
1395
                        // TODO.
1396
                case "table", "tbody", "tfoot", "thead", "tr":
1397
                        // TODO: check for matching element in table scope.
1398
                        closeTheCellAndReprocess = true
1399
                }
1400
        case CommentToken:
1401
                p.addChild(&Node{
1402
                        Type: CommentNode,
1403
                        Data: p.tok.Data,
1404
                })
1405
                return true
1406
        }
1407
        if closeTheCellAndReprocess {
1408
                if p.popUntil(tableScope, "td") || p.popUntil(tableScope, "th") {
1409
                        p.clearActiveFormattingElements()
1410
                        p.im = inRowIM
1411
                        return false
1412
                }
1413
        }
1414
        return inBodyIM(p)
1415
}
1416
 
1417
// Section 12.2.5.4.16.
1418
func inSelectIM(p *parser) bool {
1419
        endSelect := false
1420
        switch p.tok.Type {
1421
        case ErrorToken:
1422
                // TODO.
1423
        case TextToken:
1424
                p.addText(p.tok.Data)
1425
        case StartTagToken:
1426
                switch p.tok.Data {
1427
                case "html":
1428
                        // TODO.
1429
                case "option":
1430
                        if p.top().Data == "option" {
1431
                                p.oe.pop()
1432
                        }
1433
                        p.addElement(p.tok.Data, p.tok.Attr)
1434
                case "optgroup":
1435
                        if p.top().Data == "option" {
1436
                                p.oe.pop()
1437
                        }
1438
                        if p.top().Data == "optgroup" {
1439
                                p.oe.pop()
1440
                        }
1441
                        p.addElement(p.tok.Data, p.tok.Attr)
1442
                case "select":
1443
                        endSelect = true
1444
                case "input", "keygen", "textarea":
1445
                        // TODO.
1446
                case "script":
1447
                        // TODO.
1448
                default:
1449
                        // Ignore the token.
1450
                }
1451
        case EndTagToken:
1452
                switch p.tok.Data {
1453
                case "option":
1454
                        if p.top().Data == "option" {
1455
                                p.oe.pop()
1456
                        }
1457
                case "optgroup":
1458
                        i := len(p.oe) - 1
1459
                        if p.oe[i].Data == "option" {
1460
                                i--
1461
                        }
1462
                        if p.oe[i].Data == "optgroup" {
1463
                                p.oe = p.oe[:i]
1464
                        }
1465
                case "select":
1466
                        endSelect = true
1467
                default:
1468
                        // Ignore the token.
1469
                }
1470
        case CommentToken:
1471
                p.doc.Add(&Node{
1472
                        Type: CommentNode,
1473
                        Data: p.tok.Data,
1474
                })
1475
        }
1476
        if endSelect {
1477
                p.endSelect()
1478
        }
1479
        return true
1480
}
1481
 
1482
// Section 12.2.5.4.17.
1483
func inSelectInTableIM(p *parser) bool {
1484
        switch p.tok.Type {
1485
        case StartTagToken, EndTagToken:
1486
                switch p.tok.Data {
1487
                case "caption", "table", "tbody", "tfoot", "thead", "tr", "td", "th":
1488
                        if p.tok.Type == StartTagToken || p.elementInScope(tableScope, p.tok.Data) {
1489
                                p.endSelect()
1490
                                return false
1491
                        } else {
1492
                                // Ignore the token.
1493
                                return true
1494
                        }
1495
                }
1496
        }
1497
        return inSelectIM(p)
1498
}
1499
 
1500
func (p *parser) endSelect() {
1501
        for i := len(p.oe) - 1; i >= 0; i-- {
1502
                switch p.oe[i].Data {
1503
                case "option", "optgroup":
1504
                        continue
1505
                case "select":
1506
                        p.oe = p.oe[:i]
1507
                        p.resetInsertionMode()
1508
                }
1509
                return
1510
        }
1511
}
1512
 
1513
// Section 12.2.5.4.18.
1514
func afterBodyIM(p *parser) bool {
1515
        switch p.tok.Type {
1516
        case ErrorToken:
1517
                // Stop parsing.
1518
                return true
1519
        case StartTagToken:
1520
                if p.tok.Data == "html" {
1521
                        return inBodyIM(p)
1522
                }
1523
        case EndTagToken:
1524
                if p.tok.Data == "html" {
1525
                        p.im = afterAfterBodyIM
1526
                        return true
1527
                }
1528
        case CommentToken:
1529
                // The comment is attached to the  element.
1530
                if len(p.oe) < 1 || p.oe[0].Data != "html" {
1531
                        panic("html: bad parser state:  element not found, in the after-body insertion mode")
1532
                }
1533
                p.oe[0].Add(&Node{
1534
                        Type: CommentNode,
1535
                        Data: p.tok.Data,
1536
                })
1537
                return true
1538
        }
1539
        p.im = inBodyIM
1540
        return false
1541
}
1542
 
1543
// Section 12.2.5.4.19.
1544
func inFramesetIM(p *parser) bool {
1545
        switch p.tok.Type {
1546
        case CommentToken:
1547
                p.addChild(&Node{
1548
                        Type: CommentNode,
1549
                        Data: p.tok.Data,
1550
                })
1551
        case TextToken:
1552
                // Ignore all text but whitespace.
1553
                s := strings.Map(func(c rune) rune {
1554
                        switch c {
1555
                        case ' ', '\t', '\n', '\f', '\r':
1556
                                return c
1557
                        }
1558
                        return -1
1559
                }, p.tok.Data)
1560
                if s != "" {
1561
                        p.addText(s)
1562
                }
1563
        case StartTagToken:
1564
                switch p.tok.Data {
1565
                case "html":
1566
                        return inBodyIM(p)
1567
                case "frameset":
1568
                        p.addElement(p.tok.Data, p.tok.Attr)
1569
                case "frame":
1570
                        p.addElement(p.tok.Data, p.tok.Attr)
1571
                        p.oe.pop()
1572
                        p.acknowledgeSelfClosingTag()
1573
                case "noframes":
1574
                        return inHeadIM(p)
1575
                }
1576
        case EndTagToken:
1577
                switch p.tok.Data {
1578
                case "frameset":
1579
                        if p.oe.top().Data != "html" {
1580
                                p.oe.pop()
1581
                                if p.oe.top().Data != "frameset" {
1582
                                        p.im = afterFramesetIM
1583
                                        return true
1584
                                }
1585
                        }
1586
                }
1587
        default:
1588
                // Ignore the token.
1589
        }
1590
        return true
1591
}
1592
 
1593
// Section 12.2.5.4.20.
1594
func afterFramesetIM(p *parser) bool {
1595
        switch p.tok.Type {
1596
        case CommentToken:
1597
                p.addChild(&Node{
1598
                        Type: CommentNode,
1599
                        Data: p.tok.Data,
1600
                })
1601
        case TextToken:
1602
                // Ignore all text but whitespace.
1603
                s := strings.Map(func(c rune) rune {
1604
                        switch c {
1605
                        case ' ', '\t', '\n', '\f', '\r':
1606
                                return c
1607
                        }
1608
                        return -1
1609
                }, p.tok.Data)
1610
                if s != "" {
1611
                        p.addText(s)
1612
                }
1613
        case StartTagToken:
1614
                switch p.tok.Data {
1615
                case "html":
1616
                        return inBodyIM(p)
1617
                case "noframes":
1618
                        return inHeadIM(p)
1619
                }
1620
        case EndTagToken:
1621
                switch p.tok.Data {
1622
                case "html":
1623
                        p.im = afterAfterFramesetIM
1624
                        return true
1625
                }
1626
        default:
1627
                // Ignore the token.
1628
        }
1629
        return true
1630
}
1631
 
1632
// Section 12.2.5.4.21.
1633
func afterAfterBodyIM(p *parser) bool {
1634
        switch p.tok.Type {
1635
        case ErrorToken:
1636
                // Stop parsing.
1637
                return true
1638
        case TextToken:
1639
                // TODO.
1640
        case StartTagToken:
1641
                if p.tok.Data == "html" {
1642
                        return inBodyIM(p)
1643
                }
1644
        case CommentToken:
1645
                p.doc.Add(&Node{
1646
                        Type: CommentNode,
1647
                        Data: p.tok.Data,
1648
                })
1649
                return true
1650
        }
1651
        p.im = inBodyIM
1652
        return false
1653
}
1654
 
1655
// Section 12.2.5.4.22.
1656
func afterAfterFramesetIM(p *parser) bool {
1657
        switch p.tok.Type {
1658
        case CommentToken:
1659
                p.addChild(&Node{
1660
                        Type: CommentNode,
1661
                        Data: p.tok.Data,
1662
                })
1663
        case TextToken:
1664
                // Ignore all text but whitespace.
1665
                s := strings.Map(func(c rune) rune {
1666
                        switch c {
1667
                        case ' ', '\t', '\n', '\f', '\r':
1668
                                return c
1669
                        }
1670
                        return -1
1671
                }, p.tok.Data)
1672
                if s != "" {
1673
                        p.reconstructActiveFormattingElements()
1674
                        p.addText(s)
1675
                }
1676
        case StartTagToken:
1677
                switch p.tok.Data {
1678
                case "html":
1679
                        return inBodyIM(p)
1680
                case "noframes":
1681
                        return inHeadIM(p)
1682
                }
1683
        default:
1684
                // Ignore the token.
1685
        }
1686
        return true
1687
}
1688
 
1689
// Section 12.2.5.5.
1690
func parseForeignContent(p *parser) bool {
1691
        switch p.tok.Type {
1692
        case TextToken:
1693
                // TODO: HTML integration points.
1694
                if p.top().Namespace == "" {
1695
                        inBodyIM(p)
1696
                        p.resetInsertionMode()
1697
                        return true
1698
                }
1699
                if p.framesetOK {
1700
                        p.framesetOK = strings.TrimLeft(p.tok.Data, whitespace) == ""
1701
                }
1702
                p.addText(p.tok.Data)
1703
        case CommentToken:
1704
                p.addChild(&Node{
1705
                        Type: CommentNode,
1706
                        Data: p.tok.Data,
1707
                })
1708
        case StartTagToken:
1709
                if htmlIntegrationPoint(p.top()) {
1710
                        inBodyIM(p)
1711
                        p.resetInsertionMode()
1712
                        return true
1713
                }
1714
                if breakout[p.tok.Data] {
1715
                        for i := len(p.oe) - 1; i >= 0; i-- {
1716
                                // TODO: MathML integration points.
1717
                                if p.oe[i].Namespace == "" || htmlIntegrationPoint(p.oe[i]) {
1718
                                        p.oe = p.oe[:i+1]
1719
                                        break
1720
                                }
1721
                        }
1722
                        return false
1723
                }
1724
                switch p.top().Namespace {
1725
                case "math":
1726
                        // TODO: adjust MathML attributes.
1727
                case "svg":
1728
                        // Adjust SVG tag names. The tokenizer lower-cases tag names, but
1729
                        // SVG wants e.g. "foreignObject" with a capital second "O".
1730
                        if x := svgTagNameAdjustments[p.tok.Data]; x != "" {
1731
                                p.tok.Data = x
1732
                        }
1733
                        // TODO: adjust SVG attributes.
1734
                default:
1735
                        panic("html: bad parser state: unexpected namespace")
1736
                }
1737
                adjustForeignAttributes(p.tok.Attr)
1738
                namespace := p.top().Namespace
1739
                p.addElement(p.tok.Data, p.tok.Attr)
1740
                p.top().Namespace = namespace
1741
        case EndTagToken:
1742
                for i := len(p.oe) - 1; i >= 0; i-- {
1743
                        if p.oe[i].Namespace == "" {
1744
                                return p.im(p)
1745
                        }
1746
                        if strings.EqualFold(p.oe[i].Data, p.tok.Data) {
1747
                                p.oe = p.oe[:i]
1748
                                break
1749
                        }
1750
                }
1751
                return true
1752
        default:
1753
                // Ignore the token.
1754
        }
1755
        return true
1756
}
1757
 
1758
// Section 12.2.5.
1759
func (p *parser) inForeignContent() bool {
1760
        if len(p.oe) == 0 {
1761
                return false
1762
        }
1763
        n := p.oe[len(p.oe)-1]
1764
        if n.Namespace == "" {
1765
                return false
1766
        }
1767
        // TODO: MathML, HTML integration points.
1768
        // TODO: MathML's annotation-xml combining with SVG's svg.
1769
        return true
1770
}
1771
 
1772
func (p *parser) parse() error {
1773
        // Iterate until EOF. Any other error will cause an early return.
1774
        consumed := true
1775
        for {
1776
                if consumed {
1777
                        if err := p.read(); err != nil {
1778
                                if err == io.EOF {
1779
                                        break
1780
                                }
1781
                                return err
1782
                        }
1783
                }
1784
                if p.inForeignContent() {
1785
                        consumed = parseForeignContent(p)
1786
                } else {
1787
                        consumed = p.im(p)
1788
                }
1789
        }
1790
        // Loop until the final token (the ErrorToken signifying EOF) is consumed.
1791
        for {
1792
                if consumed = p.im(p); consumed {
1793
                        break
1794
                }
1795
        }
1796
        return nil
1797
}
1798
 
1799
// Parse returns the parse tree for the HTML from the given Reader.
1800
// The input is assumed to be UTF-8 encoded.
1801
func Parse(r io.Reader) (*Node, error) {
1802
        p := &parser{
1803
                tokenizer: NewTokenizer(r),
1804
                doc: &Node{
1805
                        Type: DocumentNode,
1806
                },
1807
                scripting:  true,
1808
                framesetOK: true,
1809
                im:         initialIM,
1810
        }
1811
        err := p.parse()
1812
        if err != nil {
1813
                return nil, err
1814
        }
1815
        return p.doc, nil
1816
}
1817
 
1818
// ParseFragment parses a fragment of HTML and returns the nodes that were
1819
// found. If the fragment is the InnerHTML for an existing element, pass that
1820
// element in context.
1821
func ParseFragment(r io.Reader, context *Node) ([]*Node, error) {
1822
        p := &parser{
1823
                tokenizer: NewTokenizer(r),
1824
                doc: &Node{
1825
                        Type: DocumentNode,
1826
                },
1827
                scripting: true,
1828
                context:   context,
1829
        }
1830
 
1831
        if context != nil {
1832
                switch context.Data {
1833
                case "iframe", "noembed", "noframes", "noscript", "plaintext", "script", "style", "title", "textarea", "xmp":
1834
                        p.tokenizer.rawTag = context.Data
1835
                }
1836
        }
1837
 
1838
        root := &Node{
1839
                Type: ElementNode,
1840
                Data: "html",
1841
        }
1842
        p.doc.Add(root)
1843
        p.oe = nodeStack{root}
1844
        p.resetInsertionMode()
1845
 
1846
        for n := context; n != nil; n = n.Parent {
1847
                if n.Type == ElementNode && n.Data == "form" {
1848
                        p.form = n
1849
                        break
1850
                }
1851
        }
1852
 
1853
        err := p.parse()
1854
        if err != nil {
1855
                return nil, err
1856
        }
1857
 
1858
        parent := p.doc
1859
        if context != nil {
1860
                parent = root
1861
        }
1862
 
1863
        result := parent.Child
1864
        parent.Child = nil
1865
        for _, n := range result {
1866
                n.Parent = nil
1867
        }
1868
        return result, nil
1869
}

powered by: WebSVN 2.1.0

© copyright 1999-2024 OpenCores.org, equivalent to Oliscience, all rights reserved. OpenCores®, registered trademark.