OpenCores
URL https://opencores.org/ocsvn/openmsp430/openmsp430/trunk

Subversion Repositories openmsp430

[/] [openmsp430/] [trunk/] [tools/] [lib/] [tcl-lib/] [xml.tcl] - Blame information for rev 186

Go to most recent revision | Details | Compare with Previous | View Log

Line No. Rev Author Line
1 110 olivier.gi
 #############################################################################
2
 #
3
 # xml.tcl -- Simple XML parser
4
 # by Keith Vetter, March 2004
5
 #
6
 
7
 namespace eval ::XML { variable XML "" loc 0}
8
 
9
 proc ::XML::Init {xmlData} {
10
    variable XML
11
    variable loc
12
 
13
    set XML [string trim $xmlData];
14
    regsub -all {<!--.*?-->} $XML {} XML        ;# Remove all comments
15
    set loc 0
16
 }
17
 
18
 # Returns {XML|TXT|EOF|PI value attributes START|END|EMPTY}
19
 proc ::XML::NextToken {{peek 0}} {
20
    variable XML
21
    variable loc
22
 
23
    set n [regexp -start $loc -indices {(.*?)\s*?<(/?)(.*?)(/?)>} \
24
               $XML all txt stok tok etok]
25
    if {! $n} {return [list EOF]}
26
    foreach {all0 all1} $all {txt0 txt1} $txt \
27
        {stok0 stok1} $stok {tok0 tok1} $tok {etok0 etok1} $etok break
28
 
29
    if {$txt1 >= $txt0} {                       ;# Got text
30
        set txt [string range $XML $txt0 $txt1]
31
        if {! $peek} {set loc [expr {$txt1 + 1}]}
32
        return [list TXT $txt]
33
    }
34
 
35
    set token [string range $XML $tok0 $tok1]   ;# Got something in brackets
36
    if {! $peek} {set loc [expr {$all1 + 1}]}
37
    if {[regexp {^!\[CDATA\[(.*)\]\]} $token => txt]} { ;# Is it CDATA stuff?
38
        return [list TXT $txt]
39
    }
40
 
41
    # Check for Processing Instruction <?...?>
42
    set type XML
43
    if {[regexp {^\?(.*)\?$} $token => token]} {
44
        set type PI
45
    }
46
    set attr ""
47
    regexp {^(.*?)\s+(.*?)$} $token => token attr
48
 
49
    set etype START                             ;# Entity type
50
    if {$etok0 <= $etok1} {
51
        if {$stok0 <= $stok1} { set token "/$token"} ;# Bad XML
52
        set etype EMPTY
53
    } elseif {$stok0 <= $stok1} {
54
        set etype END
55
    }
56
    return [list $type $token $attr $etype]
57
 }
58
 # ::XML::IsWellFormed
59
 #  checks if the XML is well-formed )http://www.w3.org/TR/1998/REC-xml-19980210)
60
 #
61
 # Returns "" if well-formed, error message otherwise
62
 # missing:
63
 #  characters: doesn't check valid extended characters
64
 #  attributes: doesn't check anything: quotes, equals, unique, etc.
65
 #  text stuff: references, entities, parameters, etc.
66
 #  doctype internal stuff
67
 #
68
 proc ::XML::IsWellFormed {} {
69
    set result [::XML::_IsWellFormed]
70
    set ::XML::loc 0
71
    return $result
72
 }
73
 ;proc ::XML::_IsWellFormed {} {
74
    array set emsg {
75
        XMLDECLFIRST "The XML declaration must come first"
76
        MULTIDOCTYPE "Only one DOCTYPE is allowed"
77
        INVALID "Invalid document structure"
78
        MISMATCH "Ending tag '$val' doesn't match starting tag"
79
        BADELEMENT "Bad element name '$val'"
80
        EOD "Only processing instructions allowed at end of document"
81
        BADNAME "Bad name '$val'"
82
        BADPI "No processing instruction starts with 'xml'"
83
    }
84
 
85
    # [1] document ::= prolog element Misc*
86
    # [22] prolog ::= XMLDecl? Misc* (doctypedecl Misc*)?
87
    # [27] Misc ::= Comment | PI | S
88
    # [28] doctypedecl ::= <!DOCTYPE...>
89
    # [16] PI ::= <? Name ...?>
90
    set seen 0                                  ;# 1 xml, 2 pi, 4 doctype
91
    while {1} {
92
        foreach {type val attr etype} [::XML::NextToken] break
93
        if {$type eq "PI"} {
94
            if {! [regexp {^[a-zA-Z_:][a-zA-Z0-9.-_:\xB7]+$} $val]} {
95
                return [subst $emsg(BADNAME)]
96
            }
97
            if {$val eq "xml"} {                ;# XMLDecl
98
                if {$seen != 0} { return $emsg(XMLDECLFIRST) }
99
                # TODO: check version number exist and only encoding and
100
                # standalone attributes are allowed
101
                incr seen                       ;# Mark as seen XMLDecl
102
                continue
103
            }
104
            if {[string equal -nocase "xml" $val]} {return $emsg(BADPI)}
105
            set seen [expr {$seen | 2}]         ;# Mark as seen PI
106
            continue
107
        } elseif {$type eq "XML" && $val eq "!DOCTYPE"} { ;# Doctype
108
            if {$seen & 4} { return $emsg(MULTIDOCTYPE) }
109
            set seen [expr {$seen | 4}]
110
            continue
111
        }
112
        break
113
    }
114
 
115
    # [39] element ::= EmptyElemTag | STag content ETag
116
    # [40] STag ::= < Name (S Attribute)* S? >
117
    # [42] ETag ::= </ Name S? >
118
    # [43] content ::= CharData? ((element | Reference | CDSect | PI | Comment) CharData?)*
119
    # [44] EmptyElemTag ::= < Name (S Attribute)* S? />
120
    #
121
 
122
    set stack {}
123
    set first 1
124
    while {1} {
125
        if {! $first} {                         ;# Skip first time in
126
            foreach {type val attr etype} [::XML::NextToken] break
127
        } else {
128
            if {$type ne "XML" && $type ne "EOF"} { return $emsg(INVALID) }
129
            set first 0
130
        }
131
 
132
        if {$type eq "EOF"} break
133
        ;# TODO: check attributes: quotes, equals and unique
134
 
135
        if {$type eq "TXT"} continue
136
        if {! [regexp {^[a-zA-Z_:][a-zA-Z0-9.-_:\xB7]+$} $val]} {
137
            return [subst $emsg(BADNAME)]
138
        }
139
 
140
        if {$type eq "PI"} {
141
            if {[string equal -nocase xml $val]} { return $emsg(BADPI) }
142
            continue
143
        }
144
        if {$etype eq "START"} {                ;# Starting tag
145
            lappend stack $val
146
        } elseif {$etype eq "END"} {            ;# </tag>
147
            if {$val ne [lindex $stack end]} { return [subst $emsg(MISMATCH)] }
148
            set stack [lrange $stack 0 end-1]
149
            if {[llength $stack] == 0} break    ;# Empty stack
150
        } elseif {$etype eq "EMPTY"} {          ;# <tag/>
151
        }
152
    }
153
 
154
    # End-of-Document can only contain processing instructions
155
    while {1} {
156
        foreach {type val attr etype} [::XML::NextToken] break
157
        if {$type eq "EOF"} break
158
        if {$type eq "PI"} {
159
            if {[string equal -nocase xml $val]} { return $emsg(BADPI) }
160
            continue
161
        }
162
        return $emsg(EOD)
163
    }
164
    return ""
165
 }
166
 
167
 ################################################################
168
 #
169
 # Demo code
170
 #
171
 #set xml {<?xml version="1.0" encoding="ISO-8859-1"?>
172
 #   <loc version="1.0" src="Groundspeak">
173
 #   <waypoint>
174
 #   <name id="GCGPXK"><![CDATA[Playing Poker with the Squirrels by Rino 'n Rinette]]></name>
175
 #   <coord lat="40.1548166" lon="-82.5202833"/>
176
 #   <type>Geocache</type>
177
 #   <link text="Cache Details">http://www.geocaching.com/seek/cache_details.aspx?wp=GCGPXK</link>
178
 #   </waypoint><waypoint>
179
 #   <name id="GC19DF"><![CDATA[Great Playground Caper by Treasure Hunters Inc.]]></name>
180
 #   <coord lat="40.0667166666667" lon="-82.5358"/>
181
 #   <type>Geocache</type>
182
 #   <link text="Cache Details">http://www.geocaching.com/seek/cache_details.aspx?wp=GC19DF</link>
183
 #   </waypoint>
184
 #   </loc>
185
 #}
186
 
187
 #::XML::Init $xml
188
 #set wellFormed [::XML::IsWellFormed]
189
 #if {$wellFormed ne ""} {
190
 #   puts "The xml is not well-formed: $wellFormed"
191
 #} else {
192
 #   puts "The xml is well-formed"
193
 #   while {1} {
194
 #      foreach {type val attr etype} [::XML::NextToken] break
195
 #      puts "looking at: $type '$val' '$attr' '$etype'"
196
 #      if {$type == "EOF"} break
197
 #   }
198
 #}

powered by: WebSVN 2.1.0

© copyright 1999-2024 OpenCores.org, equivalent to Oliscience, all rights reserved. OpenCores®, registered trademark.