OpenCores
URL https://opencores.org/ocsvn/openrisc/openrisc/trunk

Subversion Repositories openrisc

[/] [openrisc/] [trunk/] [gnu-dev/] [or1k-gcc/] [libjava/] [classpath/] [gnu/] [xml/] [pipeline/] [ValidationConsumer.java] - Blame information for rev 769

Details | Compare with Previous | View Log

Line No. Rev Author Line
1 769 jeremybenn
/* ValidationConsumer.java --
2
   Copyright (C) 1999,2000,2001 Free Software Foundation, Inc.
3
 
4
This file is part of GNU Classpath.
5
 
6
GNU Classpath is free software; you can redistribute it and/or modify
7
it under the terms of the GNU General Public License as published by
8
the Free Software Foundation; either version 2, or (at your option)
9
any later version.
10
 
11
GNU Classpath is distributed in the hope that it will be useful, but
12
WITHOUT ANY WARRANTY; without even the implied warranty of
13
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
14
General Public License for more details.
15
 
16
You should have received a copy of the GNU General Public License
17
along with GNU Classpath; see the file COPYING.  If not, write to the
18
Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
19
02110-1301 USA.
20
 
21
Linking this library statically or dynamically with other modules is
22
making a combined work based on this library.  Thus, the terms and
23
conditions of the GNU General Public License cover the whole
24
combination.
25
 
26
As a special exception, the copyright holders of this library give you
27
permission to link this library with independent modules to produce an
28
executable, regardless of the license terms of these independent
29
modules, and to copy and distribute the resulting executable under
30
terms of your choice, provided that you also meet, for each linked
31
independent module, the terms and conditions of the license of that
32
module.  An independent module is a module which is not derived from
33
or based on this library.  If you modify this library, you may extend
34
this exception to your version of the library, but you are not
35
obligated to do so.  If you do not wish to do so, delete this
36
exception statement from your version. */
37
 
38
package gnu.xml.pipeline;
39
 
40
import java.io.IOException;
41
import java.io.StringReader;
42
import java.io.StringWriter;
43
import java.util.EmptyStackException;
44
import java.util.Enumeration;
45
import java.util.Hashtable;
46
import java.util.Stack;
47
import java.util.StringTokenizer;
48
import java.util.Vector;
49
 
50
import org.xml.sax.Attributes;
51
import org.xml.sax.EntityResolver;
52
import org.xml.sax.ErrorHandler;
53
import org.xml.sax.InputSource;
54
import org.xml.sax.Locator;
55
import org.xml.sax.SAXException;
56
import org.xml.sax.SAXParseException;
57
import org.xml.sax.XMLReader;
58
import org.xml.sax.helpers.XMLReaderFactory;
59
 
60
/**
61
 * This class checks SAX2 events to report validity errors; it works as
62
 * both a filter and a terminus on an event pipeline.  It relies on the
63
 * producer of SAX events to:  </p> <ol>
64
 *
65
 *      <li> Conform to the specification of a non-validating XML parser that
66
 *      reads all external entities, reported using SAX2 events. </li>
67
 *
68
 *      <li> Report ignorable whitespace as such (through the ContentHandler
69
 *      interface).  This is, strictly speaking, optional for nonvalidating
70
 *      XML processors.  </li>
71
 *
72
 *      <li> Make SAX2 DeclHandler callbacks, with default
73
 *      attribute values already normalized (and without "&lt;").</li>
74
 *
75
 *      <li> Make SAX2 LexicalHandler startDTD() and endDTD ()
76
 *      callbacks. </li>
77
 *
78
 *      <li> Act as if the <em>(URI)/namespace-prefixes</em> property were
79
 *      set to true, by providing XML 1.0 names and all <code>xmlns*</code>
80
 *      attributes (rather than omitting either or both). </li>
81
 *
82
 *      </ol>
83
 *
84
 * <p> At this writing, the major SAX2 parsers (such as &AElig;lfred2,
85
 * Crimson, and Xerces) meet these requirements, and this validation
86
 * module is used by the optional &AElig;lfred2 validation support.
87
 * </p>
88
 *
89
 * <p> Note that because this is a layered validator, it has to duplicate some
90
 * work that the parser is doing; there are also other cost to layering.
91
 * However, <em>because of layering it doesn't need a parser</em> in order
92
 * to work! You can use it with anything that generates SAX events, such
93
 * as an application component that wants to detect invalid content in
94
 * a changed area without validating an entire document, or which wants to
95
 * ensure that it doesn't write invalid data to a communications partner.</p>
96
 *
97
 * <p> Also, note that because this is a layered validator, the line numbers
98
 * reported for some errors may seem strange.  For example, if an element does
99
 * not permit character content, the validator
100
 * will use the locator provided to it.
101
 * That might reflect the last character of a <em>characters</em> event
102
 * callback, rather than the first non-whitespace character. </p>
103
 *
104
 * <hr />
105
 *
106
 * <!--
107
 * <p> Of interest is the fact that unlike most currently known XML validators,
108
 * this one can report some cases of non-determinism in element content models.
109
 * It is a compile-time option, enabled by default.  This will only report
110
 * such XML errors if they relate to content actually appearing in a document;
111
 * content models aren't aggressively scanned for non-deterministic structure.
112
 * Documents which trigger such non-deterministic transitions may be handled
113
 * differently by different validating parsers, without losing conformance
114
 * to the XML specification. </p>
115
 * -->
116
 *
117
 * <p> Current limitations of the validation performed are in roughly three
118
 * categories.  </p>
119
 *
120
 * <p> The first category represents constraints which demand violations
121
 * of software layering:  exposing lexical details, one of the first things
122
 * that <em>application</em> programming interfaces (APIs) hide.  These
123
 * invariably relate to XML entity handling, and to historical oddities
124
 * of the XML validation semantics.  Curiously,
125
 * recent (Autumn 1999) conformance testing showed that these constraints are
126
 * among those handled worst by existing XML validating parsers.  Arguments
127
 * have been made that each of these VCs should be turned into WFCs (most
128
 * of them) or discarded (popular for the standalone declaration); in short,
129
 * that these are bugs in the XML specification (not all via SGML): </p><ul>
130
 *
131
 *      <li> The <em>Proper Declaration/PE Nesting</em> and
132
 *      <em>Proper Group/PE Nesting</em> VCs can't be tested because they
133
 *      require access to particularly low level lexical level information.
134
 *      In essence, the reason XML isn't a simple thing to parse is that
135
 *      it's not a context free grammar, and these constraints elevate that
136
 *      SGML-derived context sensitivity to the level of a semantic rule.
137
 *
138
 *      <li> The <em>Standalone Document Declaration</em> VC can't be
139
 *      tested.  This is for two reasons.  First, this flag isn't made
140
 *      available through SAX2.  Second, it also requires breaking that
141
 *      lexical layering boundary.  (If you ever wondered why classes
142
 *      in compiler construction or language design barely mention the
143
 *      existence of context-sensitive grammars, it's because of messy
144
 *      issues like these.)
145
 *
146
 *      <li> The <em>Entity Declared</em> VC can't be tested, because it
147
 *      also requires breaking that lexical layering boundary!  There's also
148
 *      another issue: the VC wording (and seemingly intent) is ambiguous.
149
 *      (This is still true in the "Second edition" XML spec.)
150
 *      Since there is a WFC of the same name, everyone's life would be
151
 *      easier if references to undeclared parsed entities were always well
152
 *      formedness errors, regardless of whether they're parameter entities
153
 *      or not.  (Note that nonvalidating parsers are not required
154
 *      to report all such well formedness errors if they don't read external
155
 *      parameter entities, although currently most XML parsers read them
156
 *      in an attempt to avoid problems from inconsistent parser behavior.)
157
 *
158
 *      </ul>
159
 *
160
 * <p> The second category of limitations on this validation represent
161
 * constraints associated with information that is not guaranteed to be
162
 * available (or in one case, <em>is guaranteed not to be available</em>,
163
 * through the SAX2 API: </p><ul>
164
 *
165
 *      <li> The <em>Unique Element Type Declaration</em> VC may not be
166
 *      reportable, if the underlying parser happens not to expose
167
 *      multiple declarations.   (&AElig;lfred2 reports these validity
168
 *      errors directly.)</li>
169
 *
170
 *      <li> Similarly, the <em>Unique Notation Name</em> VC, added in the
171
 *      14-January-2000 XML spec errata to restrict typing models used by
172
 *      elements, may not be reportable.  (&AElig;lfred reports these
173
 *      validity errors directly.) </li>
174
 *
175
 *      </ul>
176
 *
177
 * <p> A third category relates to ease of implementation.  (Think of this
178
 * as "bugs".)  The most notable issue here is character handling.  Rather
179
 * than attempting to implement the voluminous character tables in the XML
180
 * specification (Appendix B), Unicode rules are used directly from
181
 * the java.lang.Character class.  Recent JVMs have begun to diverge from
182
 * the original specification for that class (Unicode 2.0), meaning that
183
 * different JVMs may handle that aspect of conformance differently.
184
 * </p>
185
 *
186
 * <p> Note that for some of the validity errors that SAX2 does not
187
 * expose, a nonvalidating parser is permitted (by the XML specification)
188
 * to report validity errors.  When used with a parser that does so for
189
 * the validity constraints mentioned above (or any other SAX2 event
190
 * stream producer that does the same thing), overall conformance is
191
 * substantially improved.
192
 *
193
 * @see gnu.xml.aelfred2.SAXDriver
194
 * @see gnu.xml.aelfred2.XmlReader
195
 *
196
 * @author David Brownell
197
 */
198
public final class ValidationConsumer extends EventFilter
199
{
200
    // report error if we happen to notice a non-deterministic choice?
201
    // we won't report buggy content models; just buggy instances
202
    private static final boolean        warnNonDeterministic = false;
203
 
204
    // for tracking active content models
205
    private String              rootName;
206
    private Stack               contentStack = new Stack ();
207
 
208
    // flags for "saved DTD" processing
209
    private boolean             disableDeclarations;
210
    private boolean             disableReset;
211
 
212
    //
213
    // most VCs get tested when we see element start tags.  the per-element
214
    // info (including attributes) recorded here duplicates that found inside
215
    // many nonvalidating parsers, hence dual lookups etc ... that's why a
216
    // layered validator isn't going to be as fast as a non-layered one.
217
    //
218
 
219
    // key = element name; value = ElementInfo
220
    private Hashtable           elements = new Hashtable ();
221
 
222
    // some VCs relate to ID/IDREF/IDREFS attributes
223
    // key = id; value = boolean true (defd) or false (refd)
224
    private Hashtable           ids = new Hashtable ();
225
 
226
    // we just record declared notation and unparsed entity names.
227
    // the implementation here is simple/slow; these features
228
    // are seldom used, one hopes they'll wither away soon
229
    private Vector              notations = new Vector (5, 5);
230
    private Vector              nDeferred = new Vector (5, 5);
231
    private Vector              unparsed = new Vector (5, 5);
232
    private Vector              uDeferred = new Vector (5, 5);
233
 
234
        // note: DocBk 3.1.7 XML defines over 2 dozen notations,
235
        // used when defining unparsed entities for graphics
236
        // (and maybe in other places)
237
 
238
 
239
 
240
    /**
241
     * Creates a pipeline terminus which consumes all events passed to
242
     * it; this will report validity errors as if they were fatal errors,
243
     * unless an error handler is assigned.
244
     *
245
     * @see #setErrorHandler
246
     */
247
        // constructor used by PipelineFactory
248
            // ... and want one taking system ID of an external subset
249
    public ValidationConsumer ()
250
    {
251
        this (null);
252
    }
253
 
254
    /**
255
     * Creates a pipeline filter which reports validity errors and then
256
     * passes events on to the next consumer if they were not fatal.
257
     *
258
     * @see #setErrorHandler
259
     */
260
        // constructor used by PipelineFactory
261
            // ... and want one taking system ID of an external subset
262
            // (which won't send declaration events)
263
    public ValidationConsumer (EventConsumer next)
264
    {
265
        super (next);
266
 
267
        setContentHandler (this);
268
        setDTDHandler (this);
269
        try { setProperty (DECL_HANDLER, this); }
270
        catch (Exception e) { /* "can't happen" */ }
271
        try { setProperty (LEXICAL_HANDLER, this); }
272
        catch (Exception e) { /* "can't happen" */ }
273
    }
274
 
275
 
276
    private static final String fakeRootName
277
        = ":Nobody:in:their_Right.Mind_would:use:this-name:1x:";
278
 
279
    /**
280
     * Creates a validation consumer which is preloaded with the DTD provided.
281
     * It does this by constructing a document with that DTD, then parsing
282
     * that document and recording its DTD declarations.  Then it arranges
283
     * not to modify that information.
284
     *
285
     * <p> The resulting validation consumer will only validate against
286
     * the specified DTD, regardless of whether some other DTD is found
287
     * in a document being parsed.
288
     *
289
     * @param rootName The name of the required root element; if this is
290
     *  null, any root element name will be accepted.
291
     * @param publicId If non-null and there is a non-null systemId, this
292
     *  identifier provides an alternate access identifier for the DTD's
293
     *  external subset.
294
     * @param systemId If non-null, this is a URI (normally URL) that
295
     *  may be used to access the DTD's external subset.
296
     * @param internalSubset If non-null, holds literal markup declarations
297
     *  comprising the DTD's internal subset.
298
     * @param resolver If non-null, this will be provided to the parser for
299
     *  use when resolving parameter entities (including any external subset).
300
     * @param resolver If non-null, this will be provided to the parser for
301
     *  use when resolving parameter entities (including any external subset).
302
     * @param minimalElement If non-null, a minimal valid document.
303
     *
304
     * @exception SAXNotSupportedException If the default SAX parser does
305
     *  not support the standard lexical or declaration handlers.
306
     * @exception SAXParseException If the specified DTD has either
307
     *  well-formedness or validity errors
308
     * @exception IOException If the specified DTD can't be read for
309
     *  some reason
310
     */
311
    public ValidationConsumer (
312
        String          rootName,
313
        String          publicId,
314
        String          systemId,
315
        String          internalSubset,
316
        EntityResolver  resolver,
317
        String          minimalDocument
318
    ) throws SAXException, IOException
319
    {
320
        this (null);
321
 
322
        disableReset = true;
323
        if (rootName == null)
324
            rootName = fakeRootName;
325
 
326
        //
327
        // Synthesize document with that DTD; is it possible to do
328
        // better for the declaration of the root element?
329
        //
330
        // NOTE:  can't use SAX2 to write internal subsets.
331
        //
332
        StringWriter    writer = new StringWriter ();
333
 
334
        writer.write ("<!DOCTYPE ");
335
        writer.write (rootName);
336
        if (systemId != null) {
337
            writer.write ("\n  ");
338
            if (publicId != null) {
339
                writer.write ("PUBLIC '");
340
                writer.write (publicId);
341
                writer.write ("'\n\t'");
342
            } else
343
                writer.write ("SYSTEM '");
344
            writer.write (systemId);
345
            writer.write ("'");
346
        }
347
        writer.write (" [ ");
348
        if (rootName == fakeRootName) {
349
            writer.write ("\n<!ELEMENT ");
350
            writer.write (rootName);
351
            writer.write (" EMPTY>");
352
        }
353
        if (internalSubset != null)
354
            writer.write (internalSubset);
355
        writer.write ("\n ]>");
356
 
357
        if (minimalDocument != null) {
358
            writer.write ("\n");
359
            writer.write (minimalDocument);
360
            writer.write ("\n");
361
        } else {
362
            writer.write (" <");
363
            writer.write (rootName);
364
            writer.write ("/>\n");
365
        }
366
        minimalDocument = writer.toString ();
367
 
368
        //
369
        // OK, load it
370
        //
371
        XMLReader       producer;
372
 
373
        producer = XMLReaderFactory.createXMLReader ();
374
        bind (producer, this);
375
 
376
        if (resolver != null)
377
            producer.setEntityResolver (resolver);
378
 
379
        InputSource     in;
380
 
381
        in = new InputSource (new StringReader (minimalDocument));
382
        producer.parse (in);
383
 
384
        disableDeclarations = true;
385
        if (rootName == fakeRootName)
386
            this.rootName = null;
387
    }
388
 
389
    private void resetState ()
390
    {
391
        if (!disableReset) {
392
            rootName = null;
393
            contentStack.removeAllElements ();
394
            elements.clear ();
395
            ids.clear ();
396
 
397
            notations.removeAllElements ();
398
            nDeferred.removeAllElements ();
399
            unparsed.removeAllElements ();
400
            uDeferred.removeAllElements ();
401
        }
402
    }
403
 
404
 
405
    private void warning (String description)
406
    throws SAXException
407
    {
408
        ErrorHandler            errHandler = getErrorHandler ();
409
        Locator                 locator = getDocumentLocator ();
410
        SAXParseException       err;
411
 
412
        if (errHandler == null)
413
            return;
414
 
415
        if (locator == null)
416
            err = new SAXParseException (description, null, null, -1, -1);
417
        else
418
            err = new SAXParseException (description, locator);
419
        errHandler.warning (err);
420
    }
421
 
422
    // package private (for ChildrenRecognizer)
423
    private void error (String description)
424
    throws SAXException
425
    {
426
        ErrorHandler            errHandler = getErrorHandler ();
427
        Locator                 locator = getDocumentLocator ();
428
        SAXParseException       err;
429
 
430
        if (locator == null)
431
            err = new SAXParseException (description, null, null, -1, -1);
432
        else
433
            err = new SAXParseException (description, locator);
434
        if (errHandler != null)
435
            errHandler.error (err);
436
        else    // else we always treat it as fatal!
437
            throw err;
438
    }
439
 
440
    private void fatalError (String description)
441
    throws SAXException
442
    {
443
        ErrorHandler            errHandler = getErrorHandler ();
444
        Locator                 locator = getDocumentLocator ();
445
        SAXParseException       err;
446
 
447
        if (locator != null)
448
            err = new SAXParseException (description, locator);
449
        else
450
            err = new SAXParseException (description, null, null, -1, -1);
451
        if (errHandler != null)
452
            errHandler.fatalError (err);
453
        // we always treat this as fatal, regardless of the handler
454
        throw err;
455
    }
456
 
457
 
458
    private static boolean isExtender (char c)
459
    {
460
        // [88] Extender ::= ...
461
        return c == 0x00b7 || c == 0x02d0 || c == 0x02d1 || c == 0x0387
462
               || c == 0x0640 || c == 0x0e46 || c == 0x0ec6 || c == 0x3005
463
               || (c >= 0x3031 && c <= 0x3035)
464
               || (c >= 0x309d && c <= 0x309e)
465
               || (c >= 0x30fc && c <= 0x30fe);
466
    }
467
 
468
 
469
    // use augmented Unicode rules, not full XML rules
470
    private boolean isName (String name, String context, String id)
471
    throws SAXException
472
    {
473
        char    buf [] = name.toCharArray ();
474
        boolean pass = true;
475
 
476
        if (!Character.isUnicodeIdentifierStart (buf [0])
477
                && ":_".indexOf (buf [0]) == -1)
478
            pass = false;
479
        else {
480
            int max = buf.length;
481
            for (int i = 1; pass && i < max; i++) {
482
                char c = buf [i];
483
                if (!Character.isUnicodeIdentifierPart (c)
484
                        && ":-_.".indexOf (c) == -1
485
                        && !isExtender (c))
486
                    pass = false;
487
            }
488
        }
489
 
490
        if (!pass)
491
            error ("In " + context + " for " + id
492
                + ", '" + name + "' is not a name");
493
        return pass;    // true == OK
494
    }
495
 
496
    // use augmented Unicode rules, not full XML rules
497
    private boolean isNmtoken (String nmtoken, String context, String id)
498
    throws SAXException
499
    {
500
        char    buf [] = nmtoken.toCharArray ();
501
        boolean pass = true;
502
        int     max = buf.length;
503
 
504
        // XXX make this share code with isName
505
 
506
        for (int i = 0; pass && i < max; i++) {
507
                char c = buf [i];
508
            if (!Character.isUnicodeIdentifierPart (c)
509
                    && ":-_.".indexOf (c) == -1
510
                    && !isExtender (c))
511
                pass = false;
512
        }
513
 
514
        if (!pass)
515
            error ("In " + context + " for " + id
516
                + ", '" + nmtoken + "' is not a name token");
517
        return pass;    // true == OK
518
    }
519
 
520
    private void checkEnumeration (String value, String type, String name)
521
    throws SAXException
522
    {
523
        if (!hasMatch (value, type))
524
            // VC: Enumeration
525
            error ("Value '" + value
526
                + "' for attribute '" + name
527
                + "' is not permitted: " + type);
528
    }
529
 
530
    // used to test enumerated attributes and mixed content models
531
    // package private
532
    static boolean hasMatch (String value, String orList)
533
    {
534
        int len = value.length ();
535
        int max = orList.length () - len;
536
 
537
        for (int start = 0;
538
                (start = orList.indexOf (value, start)) != -1;
539
                start++) {
540
            char c;
541
 
542
            if (start > max)
543
                break;
544
            c = orList.charAt (start - 1);
545
            if (c != '|' && c != '('/*)*/)
546
                continue;
547
            c = orList.charAt (start + len);
548
            if (c != '|' && /*(*/ c != ')')
549
                continue;
550
            return true;
551
        }
552
        return false;
553
    }
554
 
555
    /**
556
     * <b>LexicalHandler</b> Records the declaration of the root
557
     * element, so it can be verified later.
558
     * Passed to the next consumer, unless this one was
559
     * preloaded with a particular DTD.
560
     */
561
    public void startDTD (String name, String publicId, String systemId)
562
    throws SAXException
563
    {
564
        if (disableDeclarations)
565
            return;
566
 
567
        rootName = name;
568
        super.startDTD (name, publicId, systemId);
569
    }
570
 
571
    /**
572
     * <b>LexicalHandler</b> Verifies that all referenced notations
573
     * and unparsed entities have been declared.
574
     * Passed to the next consumer, unless this one was
575
     * preloaded with a particular DTD.
576
     */
577
    public void endDTD ()
578
    throws SAXException
579
    {
580
        if (disableDeclarations)
581
            return;
582
 
583
        // this is a convenient hook for end-of-dtd checks, but we
584
        // could also trigger it in the first startElement call.
585
        // locator info is more appropriate here though.
586
 
587
        // VC: Notation Declared (NDATA can refer to them before decls,
588
        //      as can NOTATION attribute enumerations and defaults)
589
        int length = nDeferred.size ();
590
        for (int i = 0; i < length; i++) {
591
            String notation = (String) nDeferred.elementAt (i);
592
            if (!notations.contains (notation)) {
593
                error ("A declaration referred to notation '" + notation
594
                        + "' which was never declared");
595
            }
596
        }
597
        nDeferred.removeAllElements ();
598
 
599
        // VC: Entity Name (attribute values can refer to them
600
        //      before they're declared); VC Attribute Default Legal
601
        length = uDeferred.size ();
602
        for (int i = 0; i < length; i++) {
603
            String entity = (String) uDeferred.elementAt (i);
604
            if (!unparsed.contains (entity)) {
605
                error ("An attribute default referred to entity '" + entity
606
                        + "' which was never declared");
607
            }
608
        }
609
        uDeferred.removeAllElements ();
610
        super.endDTD ();
611
    }
612
 
613
 
614
    // These are interned, so we can rely on "==" to find the type of
615
    // all attributes except enumerations ...
616
    // "(this|or|that|...)" and "NOTATION (this|or|that|...)"
617
    static final String types [] = {
618
        "CDATA",
619
        "ID", "IDREF", "IDREFS",
620
        "NMTOKEN", "NMTOKENS",
621
        "ENTITY", "ENTITIES"
622
    };
623
 
624
 
625
    /**
626
     * <b>DecllHandler</b> Records attribute declaration for later use
627
     * in validating document content, and checks validity constraints
628
     * that are applicable to attribute declarations.
629
     * Passed to the next consumer, unless this one was
630
     * preloaded with a particular DTD.
631
     */
632
    public void attributeDecl (
633
        String eName,
634
        String aName,
635
        String type,
636
        String mode,
637
        String value
638
    ) throws SAXException
639
    {
640
        if (disableDeclarations)
641
            return;
642
 
643
        ElementInfo     info = (ElementInfo) elements.get (eName);
644
        AttributeInfo   ainfo = new AttributeInfo ();
645
        boolean         checkOne = false;
646
        boolean         interned = false;
647
 
648
        // cheap interning of type names and #FIXED, #REQUIRED
649
        // for faster startElement (we can use "==")
650
        for (int i = 0; i < types.length; i++) {
651
            if (types [i].equals (type)) {
652
                type = types [i];
653
                interned = true;
654
                break;
655
            }
656
        }
657
        if ("#FIXED".equals (mode))
658
            mode = "#FIXED";
659
        else if ("#REQUIRED".equals (mode))
660
            mode = "#REQUIRED";
661
 
662
        ainfo.type = type;
663
        ainfo.mode = mode;
664
        ainfo.value = value;
665
 
666
        // we might not have seen the content model yet
667
        if (info == null) {
668
            info = new ElementInfo (eName);
669
            elements.put (eName, info);
670
        }
671
        if ("ID" == type) {
672
            checkOne = true;
673
            if (!("#REQUIRED" == mode || "#IMPLIED".equals (mode))) {
674
                // VC: ID Attribute Default
675
                error ("ID attribute '" + aName
676
                    + "' must be #IMPLIED or #REQUIRED");
677
            }
678
 
679
        } else if (!interned && type.startsWith ("NOTATION ")) {
680
            checkOne = true;
681
 
682
            // VC: Notation Attributes (notations must be declared)
683
            StringTokenizer     tokens = new StringTokenizer (
684
                type.substring (10, type.lastIndexOf (')')),
685
                "|");
686
            while (tokens.hasMoreTokens ()) {
687
                String  token = tokens.nextToken ();
688
                if (!notations.contains (token))
689
                    nDeferred.addElement (token);
690
            }
691
        }
692
        if (checkOne) {
693
            for (Enumeration e = info.attributes.keys ();
694
                    e.hasMoreElements ();
695
                    /* NOP */) {
696
                String          name;
697
                AttributeInfo   ainfo2;
698
 
699
                name = (String) e.nextElement ();
700
                ainfo2 = (AttributeInfo) info.attributes.get (name);
701
                if (type == ainfo2.type || !interned /* NOTATION */) {
702
                    // VC: One ID per Element Type
703
                    // VC: One Notation per Element TYpe
704
                    error ("Element '" + eName
705
                        + "' already has an attribute of type "
706
                        + (interned ? "NOTATION" : type)
707
                        + " ('" + name
708
                        + "') so '" + aName
709
                        + "' is a validity error");
710
                }
711
            }
712
        }
713
 
714
        // VC: Attribute Default Legal
715
        if (value != null) {
716
 
717
            if ("CDATA" == type) {
718
                // event source rejected '<'
719
 
720
            } else if ("NMTOKEN" == type) {
721
                // VC: Name Token (is a nmtoken)
722
                isNmtoken (value, "attribute default", aName);
723
 
724
            } else if ("NMTOKENS" == type) {
725
                // VC: Name Token (is a nmtoken; at least one value)
726
                StringTokenizer tokens = new StringTokenizer (value);
727
                if (!tokens.hasMoreTokens ())
728
                    error ("Default for attribute '" + aName
729
                        + "' must have at least one name token.");
730
                else do {
731
                    String token = tokens.nextToken ();
732
                    isNmtoken (token, "attribute default", aName);
733
                } while (tokens.hasMoreTokens ());
734
 
735
            } else if ("IDREF" == type || "ENTITY" == type) {
736
                // VC: Entity Name (is a name)
737
                // VC: IDREF (is a name) (is declared)
738
                isName (value, "attribute default", aName);
739
                if ("ENTITY" == type && !unparsed.contains (value))
740
                    uDeferred.addElement (value);
741
 
742
            } else if ("IDREFS" == type || "ENTITIES" == type) {
743
                // VC: Entity Name (is a name; at least one value)
744
                // VC: IDREF (is a name; at least one value)
745
                StringTokenizer names = new StringTokenizer (value);
746
                if (!names.hasMoreTokens ())
747
                    error ("Default for attribute '" + aName
748
                        + "' must have at least one name.");
749
                else do {
750
                    String name = names.nextToken ();
751
                    isName (name, "attribute default", aName);
752
                    if ("ENTITIES" == type && !unparsed.contains (name))
753
                        uDeferred.addElement (value);
754
                } while (names.hasMoreTokens ());
755
 
756
            } else if (type.charAt (0) == '(' /*)*/ ) {
757
                // VC: Enumeration (must match)
758
                checkEnumeration (value, type, aName);
759
 
760
            } else if (!interned && checkOne) { /* NOTATION */
761
                // VC: Notation attributes (must be names)
762
                isName (value, "attribute default", aName);
763
 
764
                // VC: Notation attributes (must be declared)
765
                if (!notations.contains (value))
766
                    nDeferred.addElement (value);
767
 
768
                // VC: Enumeration (must match)
769
                checkEnumeration (value, type, aName);
770
 
771
            } else if ("ID" != type)
772
                throw new RuntimeException ("illegal attribute type: " + type);
773
        }
774
 
775
        if (info.attributes.get (aName) == null)
776
            info.attributes.put (aName, ainfo);
777
        /*
778
        else
779
            warning ("Element '" + eName
780
                + "' already has an attribute named '" + aName + "'");
781
        */
782
 
783
        if ("xml:space".equals (aName)) {
784
            if (!("(default|preserve)".equals (type)
785
                    || "(preserve|default)".equals (type)
786
                        // these next two are arguable; XHTML's DTD doesn't
787
                        // deserve errors.  After all, it's not like any
788
                        // illegal _value_ could pass ...
789
                    || "(preserve)".equals (type)
790
                    || "(default)".equals (type)
791
                    ))
792
                error (
793
                    "xml:space attribute type must be like '(default|preserve)'"
794
                    + " not '" + type + "'"
795
                    );
796
 
797
        }
798
        super.attributeDecl (eName, aName, type, mode, value);
799
    }
800
 
801
    /**
802
     * <b>DecllHandler</b> Records the element declaration for later use
803
     * when checking document content, and checks validity constraints that
804
     * apply to element declarations.  Passed to the next consumer, unless
805
     * this one was preloaded with a particular DTD.
806
     */
807
    public void elementDecl (String name, String model)
808
    throws SAXException
809
    {
810
        if (disableDeclarations)
811
            return;
812
 
813
        ElementInfo     info = (ElementInfo) elements.get (name);
814
 
815
        // we might have seen an attribute decl already
816
        if (info == null) {
817
            info = new ElementInfo (name);
818
            elements.put (name, info);
819
        }
820
        if (info.model != null) {
821
            // NOTE:  not all parsers can report such duplicates.
822
            // VC: Unique Element Type Declaration
823
            error ("Element type '" + name
824
                + "' was already declared.");
825
        } else {
826
            info.model = model;
827
 
828
            // VC: No Duplicate Types (in mixed content models)
829
            if (model.charAt (1) == '#')        // (#PCDATA...
830
                info.getRecognizer (this);
831
        }
832
        super.elementDecl (name, model);
833
    }
834
 
835
    /**
836
     * <b>DecllHandler</b> passed to the next consumer, unless this
837
     * one was preloaded with a particular DTD
838
     */
839
    public void internalEntityDecl (String name, String value)
840
    throws SAXException
841
    {
842
        if (!disableDeclarations)
843
            super.internalEntityDecl (name, value);
844
    }
845
 
846
    /**
847
     * <b>DecllHandler</b> passed to the next consumer, unless this
848
     * one was preloaded with a particular DTD
849
     */
850
    public void externalEntityDecl (String name,
851
        String publicId, String systemId)
852
    throws SAXException
853
    {
854
        if (!disableDeclarations)
855
            super.externalEntityDecl (name, publicId, systemId);
856
    }
857
 
858
 
859
    /**
860
     * <b>DTDHandler</b> Records the notation name, for checking
861
     * NOTATIONS attribute values and declararations of unparsed
862
     * entities.  Passed to the next consumer, unless this one was
863
     * preloaded with a particular DTD.
864
     */
865
    public void notationDecl (String name, String publicId, String systemId)
866
    throws SAXException
867
    {
868
        if (disableDeclarations)
869
            return;
870
 
871
        notations.addElement (name);
872
        super.notationDecl (name, publicId, systemId);
873
    }
874
 
875
    /**
876
     * <b>DTDHandler</b> Records the entity name, for checking
877
     * ENTITY and ENTITIES attribute values; records the notation
878
     * name if it hasn't yet been declared.  Passed to the next consumer,
879
     * unless this one was preloaded with a particular DTD.
880
     */
881
    public void unparsedEntityDecl (
882
        String name,
883
        String publicId,
884
        String systemId,
885
        String notationName
886
    ) throws SAXException
887
    {
888
        if (disableDeclarations)
889
            return;
890
 
891
        unparsed.addElement (name);
892
        if (!notations.contains (notationName))
893
            nDeferred.addElement (notationName);
894
        super.unparsedEntityDecl (name, publicId, systemId, notationName);
895
    }
896
 
897
 
898
    /**
899
     * <b>ContentHandler</b> Ensures that state from any previous parse
900
     * has been deleted.
901
     * Passed to the next consumer.
902
     */
903
    public void startDocument ()
904
    throws SAXException
905
    {
906
        resetState ();
907
        super.startDocument ();
908
    }
909
 
910
 
911
    private static boolean isAsciiLetter (char c)
912
    {
913
        return (c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z');
914
    }
915
 
916
 
917
    /**
918
     * <b>ContentHandler</b> Reports a fatal exception.  Validating
919
     * XML processors may not skip any entities.
920
     */
921
    public void skippedEntity (String name)
922
    throws SAXException
923
    {
924
        fatalError ("may not skip entities");
925
    }
926
 
927
    /*
928
     * SAX2 doesn't expand non-PE refs in attribute defaults...
929
     */
930
    private String expandDefaultRefs (String s)
931
    throws SAXException
932
    {
933
        if (s.indexOf ('&') < 0)
934
            return s;
935
 
936
// FIXME: handle &#nn; &#xnn; &name;
937
        String message = "Can't expand refs in attribute default: " + s;
938
        warning (message);
939
 
940
        return s;
941
    }
942
 
943
    /**
944
     * <b>ContentHandler</b> Performs validity checks against element
945
     * (and document) content models, and attribute values.
946
     * Passed to the next consumer.
947
     */
948
    public void startElement (
949
        String          uri,
950
        String          localName,
951
        String          qName,
952
        Attributes      atts
953
    ) throws SAXException
954
    {
955
        //
956
        // First check content model for the enclosing scope.
957
        //
958
        if (contentStack.isEmpty ()) {
959
            // VC:  Root Element Type
960
            if (!qName.equals (rootName)) {
961
                if (rootName == null)
962
                    warning ("This document has no DTD, can't be valid");
963
                else
964
                    error ("Root element type '" + qName
965
                        + "' was declared to be '" + rootName + "'");
966
            }
967
        } else {
968
            Recognizer state = (Recognizer) contentStack.peek ();
969
 
970
            if (state != null) {
971
                Recognizer newstate = state.acceptElement (qName);
972
 
973
                if (newstate == null)
974
                    error ("Element type '" + qName
975
                        + "' in element '" + state.type.name
976
                        + "' violates content model " + state.type.model
977
                        );
978
                if (newstate != state) {
979
                    contentStack.pop ();
980
                    contentStack.push (newstate);
981
                }
982
            }
983
        }
984
 
985
        //
986
        // Then check that this element was declared, and push the
987
        // object used to validate its content model onto our stack.
988
        //
989
        // This is where the recognizer gets created, if needed; if
990
        // it's a "children" (elements) content model, an NDFA is
991
        // created.  (One recognizer is used per content type, no
992
        // matter how complex that recognizer is.)
993
        //
994
        ElementInfo             info;
995
 
996
        info = (ElementInfo) elements.get (qName);
997
        if (info == null || info.model == null) {
998
            // VC: Element Valid (base clause)
999
            error ("Element type '" + qName + "' was not declared");
1000
            contentStack.push (null);
1001
 
1002
            // for less diagnostic noise, fake a declaration.
1003
            elementDecl (qName, "ANY");
1004
        } else
1005
            contentStack.push (info.getRecognizer (this));
1006
 
1007
        //
1008
        // Then check each attribute present
1009
        //
1010
        int                     len;
1011
        String                  aname;
1012
        AttributeInfo           ainfo;
1013
 
1014
        if (atts != null)
1015
            len = atts.getLength ();
1016
        else
1017
            len = 0;
1018
 
1019
        for (int i = 0; i < len; i++) {
1020
            aname = atts.getQName (i);
1021
 
1022
            if (info == null
1023
                    || (ainfo = (AttributeInfo) info.attributes.get (aname))
1024
                            == null) {
1025
                // VC: Attribute Value Type
1026
                error ("Attribute '" + aname
1027
                    + "' was not declared for element type " + qName);
1028
                continue;
1029
            }
1030
 
1031
            String value = atts.getValue (i);
1032
 
1033
            // note that "==" for type names and "#FIXED" is correct
1034
            // (and fast) since we've interned those literals.
1035
 
1036
            if ("#FIXED" == ainfo.mode) {
1037
                String expanded = expandDefaultRefs (ainfo.value);
1038
 
1039
                // VC: Fixed Attribute Default
1040
                if (!value.equals (expanded)) {
1041
                    error ("Attribute '" + aname
1042
                        + "' must match " + expanded
1043
                        );
1044
                    continue;
1045
                }
1046
            }
1047
 
1048
            if ("CDATA" == ainfo.type)
1049
                continue;
1050
 
1051
            //
1052
            // For all other attribute types, there are various
1053
            // rules to follow.
1054
            //
1055
 
1056
            if ("ID" == ainfo.type) {
1057
                // VC: ID (must be a name)
1058
                if (isName (value, "ID attribute", aname)) {
1059
                    if (Boolean.TRUE == ids.get (value))
1060
                        // VC: ID (appears once)
1061
                        error ("ID attribute " + aname
1062
                            + " uses an ID value '" + value
1063
                            + "' which was already declared.");
1064
                    else
1065
                        // any forward refs are no longer problems
1066
                        ids.put (value, Boolean.TRUE);
1067
                }
1068
                continue;
1069
            }
1070
 
1071
            if ("IDREF" == ainfo.type) {
1072
                // VC: IDREF (value must be a name)
1073
                if (isName (value, "IDREF attribute", aname)) {
1074
                    // VC: IDREF (must match some ID attribute)
1075
                    if (ids.get (value) == null)
1076
                        // new -- assume it's a forward ref
1077
                        ids.put (value, Boolean.FALSE);
1078
                }
1079
                continue;
1080
            }
1081
 
1082
            if ("IDREFS" == ainfo.type) {
1083
                StringTokenizer tokens = new StringTokenizer (value, " ");
1084
 
1085
                if (!tokens.hasMoreTokens ()) {
1086
                    // VC: IDREF (one or more values)
1087
                    error ("IDREFS attribute " + aname
1088
                        + " must have at least one ID ref");
1089
                } else do {
1090
                    String id = tokens.nextToken ();
1091
 
1092
                    // VC: IDREF (value must be a name)
1093
                    if (isName (id, "IDREFS attribute", aname)) {
1094
                        // VC: IDREF (must match some ID attribute)
1095
                        if (ids.get (id) == null)
1096
                            // new -- assume it's a forward ref
1097
                            ids.put (id, Boolean.FALSE);
1098
                    }
1099
                } while (tokens.hasMoreTokens ());
1100
                continue;
1101
            }
1102
 
1103
            if ("NMTOKEN" == ainfo.type) {
1104
                // VC: Name Token (is a name token)
1105
                isNmtoken (value, "NMTOKEN attribute", aname);
1106
                continue;
1107
            }
1108
 
1109
            if ("NMTOKENS" == ainfo.type) {
1110
                StringTokenizer tokens = new StringTokenizer (value, " ");
1111
 
1112
                if (!tokens.hasMoreTokens ()) {
1113
                    // VC: Name Token (one or more values)
1114
                    error ("NMTOKENS attribute " + aname
1115
                        + " must have at least one name token");
1116
                } else do {
1117
                    String token = tokens.nextToken ();
1118
 
1119
                    // VC: Name Token (is a name token)
1120
                    isNmtoken (token, "NMTOKENS attribute", aname);
1121
                } while (tokens.hasMoreTokens ());
1122
                continue;
1123
            }
1124
 
1125
            if ("ENTITY" == ainfo.type) {
1126
                if (!unparsed.contains (value))
1127
                    // VC: Entity Name
1128
                    error ("Value of attribute '" + aname
1129
                        + "' refers to unparsed entity '" + value
1130
                        + "' which was not declared.");
1131
                continue;
1132
            }
1133
 
1134
            if ("ENTITIES" == ainfo.type) {
1135
                StringTokenizer tokens = new StringTokenizer (value, " ");
1136
 
1137
                if (!tokens.hasMoreTokens ()) {
1138
                    // VC: Entity Name (one or more values)
1139
                    error ("ENTITIES attribute " + aname
1140
                        + " must have at least one name token");
1141
                } else do {
1142
                    String entity = tokens.nextToken ();
1143
 
1144
                    if (!unparsed.contains (entity))
1145
                        // VC: Entity Name
1146
                        error ("Value of attribute '" + aname
1147
                            + "' refers to unparsed entity '" + entity
1148
                            + "' which was not declared.");
1149
                } while (tokens.hasMoreTokens ());
1150
                continue;
1151
            }
1152
 
1153
            //
1154
            // check for enumerations last; more expensive
1155
            //
1156
            if (ainfo.type.charAt (0) == '(' /*)*/
1157
                    || ainfo.type.startsWith ("NOTATION ")
1158
                    ) {
1159
                // VC: Enumeration (value must be defined)
1160
                checkEnumeration (value, ainfo.type, aname);
1161
                continue;
1162
            }
1163
        }
1164
 
1165
        //
1166
        // Last, check that all #REQUIRED attributes were provided
1167
        //
1168
        if (info != null) {
1169
            Hashtable   table = info.attributes;
1170
 
1171
            if (table.size () != 0) {
1172
                Enumeration     e = table.keys ();
1173
 
1174
                // XXX table.keys uses the heap, bleech -- slows things
1175
 
1176
                while (e.hasMoreElements ()) {
1177
                    aname = (String) e.nextElement ();
1178
                    ainfo = (AttributeInfo) table.get (aname);
1179
 
1180
                    // "#REQUIRED" mode was interned in attributeDecl
1181
                    if ("#REQUIRED" == ainfo.mode
1182
                            && atts.getValue (aname) == null) {
1183
                        // VC: Required Attribute
1184
                        error ("Attribute '" + aname + "' must be specified "
1185
                            + "for element type " + qName);
1186
                    }
1187
                }
1188
            }
1189
        }
1190
        super.startElement (uri, localName, qName, atts);
1191
    }
1192
 
1193
    /**
1194
     * <b>ContentHandler</b> Reports a validity error if the element's content
1195
     * model does not permit character data.
1196
     * Passed to the next consumer.
1197
     */
1198
    public void characters (char ch [], int start, int length)
1199
    throws SAXException
1200
    {
1201
        Recognizer state;
1202
 
1203
        if (contentStack.empty ())
1204
            state = null;
1205
        else
1206
            state = (Recognizer) contentStack.peek ();
1207
 
1208
        // NOTE:  if this ever supports with SAX parsers that don't
1209
        // report ignorable whitespace as such (only XP?), this class
1210
        // needs to morph it into ignorableWhitespace() as needed ...
1211
 
1212
        if (state != null && !state.acceptCharacters ())
1213
            // VC: Element Valid (clauses three, four -- see recognizer)
1214
            error ("Character content not allowed in element "
1215
                + state.type.name);
1216
 
1217
        super.characters (ch, start, length);
1218
    }
1219
 
1220
 
1221
    /**
1222
     * <b>ContentHandler</b> Reports a validity error if the element's content
1223
     * model does not permit end-of-element yet, or a well formedness error
1224
     * if there was no matching startElement call.
1225
     * Passed to the next consumer.
1226
     */
1227
    public void endElement (String uri, String localName, String qName)
1228
    throws SAXException
1229
    {
1230
        try {
1231
            Recognizer state = (Recognizer) contentStack.pop ();
1232
 
1233
            if (state != null && !state.completed ())
1234
                // VC: Element valid (clauses two, three, four; see Recognizer)
1235
                error ("Premature end for element '"
1236
                    + state.type.name
1237
                    + "', content model "
1238
                    + state.type.model);
1239
 
1240
            // could insist on match of start element, but that's
1241
            // something the input stream must to guarantee.
1242
 
1243
        } catch (EmptyStackException e) {
1244
            fatalError ("endElement without startElement: " + qName
1245
                + ((uri == null)
1246
                    ? ""
1247
                    : ( " { '" + uri + "', " + localName + " }")));
1248
        }
1249
        super.endElement (uri, localName, qName);
1250
    }
1251
 
1252
    /**
1253
     * <b>ContentHandler</b> Checks whether all ID values that were
1254
     * referenced have been declared, and releases all resources.
1255
     * Passed to the next consumer.
1256
     *
1257
     * @see #setDocumentLocator
1258
     */
1259
    public void endDocument ()
1260
    throws SAXException
1261
    {
1262
        for (Enumeration idNames = ids.keys ();
1263
                idNames.hasMoreElements ();
1264
                /* NOP */) {
1265
            String id = (String) idNames.nextElement ();
1266
 
1267
            if (Boolean.FALSE == ids.get (id)) {
1268
                // VC: IDREF (must match ID)
1269
                error ("Undeclared ID value '" + id
1270
                    + "' was referred to by an IDREF/IDREFS attribute");
1271
            }
1272
        }
1273
 
1274
        resetState ();
1275
        super.endDocument ();
1276
    }
1277
 
1278
 
1279
    /** Holds per-element declarations */
1280
    static private final class ElementInfo
1281
    {
1282
        String                  name;
1283
        String                  model;
1284
 
1285
        // key = attribute name; value = AttributeInfo
1286
        Hashtable               attributes = new Hashtable (11);
1287
 
1288
        ElementInfo (String n) { name = n; }
1289
 
1290
        private Recognizer      recognizer;
1291
 
1292
        // for validating content models:  one per type, shared,
1293
        // and constructed only on demand ... so unused elements do
1294
        // not need to consume resources.
1295
        Recognizer      getRecognizer (ValidationConsumer consumer)
1296
        throws SAXException
1297
        {
1298
            if (recognizer == null) {
1299
                if ("ANY".equals (model))
1300
                    recognizer = ANY;
1301
                else if ("EMPTY".equals (model))
1302
                    recognizer = new EmptyRecognizer (this);
1303
                else if ('#' == model.charAt (1))
1304
                    // n.b. this constructor does a validity check
1305
                    recognizer = new MixedRecognizer (this, consumer);
1306
                else
1307
                    recognizer = new ChildrenRecognizer (this, consumer);
1308
            }
1309
            return recognizer;
1310
        }
1311
    }
1312
 
1313
    /** Holds per-attribute declarations */
1314
    static private final class AttributeInfo
1315
    {
1316
        String  type;
1317
        String  mode;           // #REQUIRED, etc (or null)
1318
        String  value;          // or null
1319
    }
1320
 
1321
 
1322
    //
1323
    // Content model validation
1324
    //
1325
 
1326
    static private final Recognizer     ANY = new Recognizer (null);
1327
 
1328
 
1329
    // Base class defines the calls used to validate content,
1330
    // and supports the "ANY" content model
1331
    static private class Recognizer
1332
    {
1333
        final ElementInfo       type;
1334
 
1335
        Recognizer (ElementInfo t) { type = t; }
1336
 
1337
        // return true iff character data is legal here
1338
        boolean acceptCharacters ()
1339
        throws SAXException
1340
            // VC: Element Valid (third and fourth clauses)
1341
            { return true; }
1342
 
1343
        // null return = failure
1344
        // otherwise, next state (like an FSM)
1345
        // prerequisite: tested that name was declared
1346
        Recognizer acceptElement (String name)
1347
        throws SAXException
1348
            // VC: Element Valid (fourth clause)
1349
            { return this; }
1350
 
1351
        // return true iff model is completed, can finish
1352
        boolean completed ()
1353
        throws SAXException
1354
            // VC: Element Valid (fourth clause)
1355
            { return true; }
1356
 
1357
        public String toString ()
1358
            // n.b. "children" is the interesting case!
1359
            { return (type == null) ? "ANY" : type.model; }
1360
    }
1361
 
1362
    // "EMPTY" content model -- no characters or elements
1363
    private static final class EmptyRecognizer extends Recognizer
1364
    {
1365
        public EmptyRecognizer (ElementInfo type)
1366
            { super (type); }
1367
 
1368
        // VC: Element Valid (first clause)
1369
        boolean acceptCharacters ()
1370
            { return false; }
1371
 
1372
        // VC: Element Valid (first clause)
1373
        Recognizer acceptElement (String name)
1374
            { return null; }
1375
    }
1376
 
1377
    // "Mixed" content model -- ANY, but restricts elements
1378
    private static final class MixedRecognizer extends Recognizer
1379
    {
1380
        private String  permitted [];
1381
 
1382
        // N.B. constructor tests for duplicated element names (VC)
1383
        public MixedRecognizer (ElementInfo t, ValidationConsumer v)
1384
        throws SAXException
1385
        {
1386
            super (t);
1387
 
1388
            // (#PCDATA...)* or (#PCDATA) ==> ... or empty
1389
            // with the "..." being "|elname|..."
1390
            StringTokenizer     tokens = new StringTokenizer (
1391
                t.model.substring (8, t.model.lastIndexOf (')')),
1392
                "|");
1393
            Vector              vec = new Vector ();
1394
 
1395
            while (tokens.hasMoreTokens ()) {
1396
                String token = tokens.nextToken ();
1397
 
1398
                if (vec.contains (token))
1399
                    v.error ("element " + token
1400
                        + " is repeated in mixed content model: "
1401
                        + t.model);
1402
                else
1403
                    vec.addElement (token.intern ());
1404
            }
1405
            permitted = new String [vec.size ()];
1406
            for (int i = 0; i < permitted.length; i++)
1407
                permitted [i] = (String) vec.elementAt (i);
1408
 
1409
            // in one large machine-derived DTD sample, most of about
1410
            // 250 mixed content models were empty, and 25 had ten or
1411
            // more entries.  2 had over a hundred elements.  Linear
1412
            // search isn't obviously wrong.
1413
        }
1414
 
1415
        // VC: Element Valid (third clause)
1416
        Recognizer acceptElement (String name)
1417
        {
1418
            int         length = permitted.length;
1419
 
1420
            // first pass -- optimistic w.r.t. event source interning
1421
            // (and document validity)
1422
            for (int i = 0; i < length; i++)
1423
                if (permitted [i] == name)
1424
                    return this;
1425
            // second pass -- pessimistic w.r.t. event source interning
1426
            for (int i = 0; i < length; i++)
1427
                if (permitted [i].equals (name))
1428
                    return this;
1429
            return null;
1430
        }
1431
    }
1432
 
1433
 
1434
    // recognizer loop flags, see later
1435
    private static final int            F_LOOPHEAD = 0x01;
1436
    private static final int            F_LOOPNEXT = 0x02;
1437
 
1438
    // for debugging -- used to label/count nodes in toString()
1439
    private static int                  nodeCount;
1440
 
1441
    /**
1442
     * "Children" content model -- these are nodes in NDFA state graphs.
1443
     * They work in fixed space.  Note that these graphs commonly have
1444
     * cycles, handling features such as zero-or-more and one-or-more.
1445
     *
1446
     * <p>It's readonly, so only one copy is ever needed.  The content model
1447
     * stack may have any number of pointers into each graph, when a model
1448
     * happens to be needed more than once due to element nesting.  Since
1449
     * traversing the graph just moves to another node, and never changes
1450
     * it, traversals never interfere with each other.
1451
     *
1452
     * <p>There is an option to report non-deterministic models.  These are
1453
     * always XML errors, but ones which are not often reported despite the
1454
     * fact that they can lead to different validating parsers giving
1455
     * different results for the same input.  (The XML spec doesn't require
1456
     * them to be reported.)
1457
     *
1458
     * <p><b>FIXME</b> There's currently at least one known bug here, in that
1459
     * it's not actually detecting the non-determinism it tries to detect.
1460
     * (Of the "optional.xml" test, the once-or-twice-2* tests are all non-D;
1461
     * maybe some others.)  This may relate to the issue flagged below as
1462
     * "should not" happen (but it was), which showed up when patching the
1463
     * graph to have one exit node (or more EMPTY nodes).
1464
     */
1465
    private static final class ChildrenRecognizer extends Recognizer
1466
        implements Cloneable
1467
    {
1468
        // for reporting non-deterministic content models
1469
        // ... a waste of space if we're not reporting those!
1470
        // ... along with the 'model' member (in base class)
1471
        private ValidationConsumer      consumer;
1472
 
1473
        // for CHOICE nodes -- each component is an arc that
1474
        // accepts a different NAME (or is EMPTY indicating
1475
        // NDFA termination).
1476
        private Recognizer              components [];
1477
 
1478
        // for NAME/SEQUENCE nodes -- accepts that NAME and
1479
        // then goes to the next node (CHOICE, NAME, EMPTY).
1480
        private String                  name;
1481
        private Recognizer              next;
1482
 
1483
        // loops always point back to a CHOICE node. we mark such choice
1484
        // nodes (F_LOOPHEAD) for diagnostics and faster deep cloning.
1485
        // We also mark nodes before back pointers (F_LOOPNEXT), to ensure
1486
        // termination when we patch sequences and loops.
1487
        private int                     flags;
1488
 
1489
 
1490
        // prevent a needless indirection between 'this' and 'node'
1491
        private void copyIn (ChildrenRecognizer node)
1492
        {
1493
            // model & consumer are already set
1494
            components = node.components;
1495
            name = node.name;
1496
            next = node.next;
1497
            flags = node.flags;
1498
        }
1499
 
1500
        // used to construct top level "children" content models,
1501
        public ChildrenRecognizer (ElementInfo type, ValidationConsumer vc)
1502
        {
1503
            this (vc, type);
1504
            populate (type.model.toCharArray (), 0);
1505
            patchNext (new EmptyRecognizer (type), null);
1506
        }
1507
 
1508
        // used internally; populating is separate
1509
        private ChildrenRecognizer (ValidationConsumer vc, ElementInfo type)
1510
        {
1511
            super (type);
1512
            consumer = vc;
1513
        }
1514
 
1515
 
1516
        //
1517
        // When rewriting some graph nodes we need deep clones in one case;
1518
        // mostly shallow clones (what the JVM handles for us) are fine.
1519
        //
1520
        private ChildrenRecognizer shallowClone ()
1521
        {
1522
            try {
1523
                return (ChildrenRecognizer) clone ();
1524
            } catch (CloneNotSupportedException e) {
1525
                throw new Error ("clone");
1526
            }
1527
        }
1528
 
1529
        private ChildrenRecognizer deepClone ()
1530
        {
1531
            return deepClone (new Hashtable (37));
1532
        }
1533
 
1534
        private ChildrenRecognizer deepClone (Hashtable table)
1535
        {
1536
            ChildrenRecognizer retval;
1537
 
1538
            if ((flags & F_LOOPHEAD) != 0) {
1539
                retval = (ChildrenRecognizer) table.get (this);
1540
                if (retval != null)
1541
                    return this;
1542
 
1543
                retval = shallowClone ();
1544
                table.put (this, retval);
1545
            } else
1546
                retval = shallowClone ();
1547
 
1548
            if (next != null) {
1549
                if (next instanceof ChildrenRecognizer)
1550
                    retval.next = ((ChildrenRecognizer)next)
1551
                            .deepClone (table);
1552
                else if (!(next instanceof EmptyRecognizer))
1553
                    throw new RuntimeException ("deepClone");
1554
            }
1555
 
1556
            if (components != null) {
1557
                retval.components = new Recognizer [components.length];
1558
                for (int i = 0; i < components.length; i++) {
1559
                    Recognizer temp = components [i];
1560
 
1561
                    if (temp == null)
1562
                        retval.components [i] = null;
1563
                    else if (temp instanceof ChildrenRecognizer)
1564
                        retval.components [i] = ((ChildrenRecognizer)temp)
1565
                                .deepClone (table);
1566
                    else if (!(temp instanceof EmptyRecognizer))
1567
                        throw new RuntimeException ("deepClone");
1568
                }
1569
            }
1570
 
1571
            return retval;
1572
        }
1573
 
1574
        // connect subgraphs, first to next (sequencing)
1575
        private void patchNext (Recognizer theNext, Hashtable table)
1576
        {
1577
            // backpointers must not be repatched or followed
1578
            if ((flags & F_LOOPNEXT) != 0)
1579
                return;
1580
 
1581
            // XXX this table "shouldn't" be needed, right?
1582
            // but some choice nodes looped if it isn't there.
1583
            if (table != null && table.get (this) != null)
1584
                return;
1585
            if (table == null)
1586
                table = new Hashtable ();
1587
 
1588
            // NAME/SEQUENCE
1589
            if (name != null) {
1590
                if (next == null)
1591
                    next = theNext;
1592
                else if (next instanceof ChildrenRecognizer) {
1593
                    ((ChildrenRecognizer)next).patchNext (theNext, table);
1594
                } else if (!(next instanceof EmptyRecognizer))
1595
                    throw new RuntimeException ("patchNext");
1596
                return;
1597
            }
1598
 
1599
            // CHOICE
1600
            for (int i = 0; i < components.length; i++) {
1601
                if (components [i] == null)
1602
                    components [i] = theNext;
1603
                else if (components [i] instanceof ChildrenRecognizer) {
1604
                    ((ChildrenRecognizer)components [i])
1605
                            .patchNext (theNext, table);
1606
                } else if (!(components [i] instanceof EmptyRecognizer))
1607
                    throw new RuntimeException ("patchNext");
1608
            }
1609
 
1610
            if (table != null && (flags & F_LOOPHEAD) != 0)
1611
                table.put (this, this);
1612
        }
1613
 
1614
        /**
1615
         * Parses a 'children' spec (or recursively 'cp') and makes this
1616
         * become a regular graph node.
1617
         *
1618
         * @return index after this particle
1619
         */
1620
        private int populate (char parseBuf [], int startPos)
1621
        {
1622
            int         nextPos = startPos + 1;
1623
            char        c;
1624
 
1625
            if (nextPos < 0 || nextPos >= parseBuf.length)
1626
                throw new IndexOutOfBoundsException ();
1627
 
1628
            // Grammar of the string is from the XML spec, but
1629
            // with whitespace removed by the SAX parser.
1630
 
1631
            // children ::= (choice | seq) ('?' | '*' | '+')?
1632
            // cp ::= (Name | choice | seq) ('?' | '*' | '+')?
1633
            // choice ::= '(' cp ('|' choice)* ')'
1634
            // seq ::= '(' cp (',' choice)* ')'
1635
 
1636
            // interior nodes only
1637
            //   cp ::= name ...
1638
            if (parseBuf [startPos] != '('/*)*/) {
1639
                boolean         done = false;
1640
                do {
1641
                    switch (c = parseBuf [nextPos]) {
1642
                        case '?': case '*': case '+':
1643
                        case '|': case ',':
1644
                        case /*(*/ ')':
1645
                            done = true;
1646
                            continue;
1647
                        default:
1648
                            nextPos++;
1649
                            continue;
1650
                    }
1651
                } while (!done);
1652
                name = new String (parseBuf, startPos, nextPos - startPos);
1653
 
1654
            // interior OR toplevel nodes
1655
            //   cp ::= choice ..
1656
            //   cp ::= seq ..
1657
            } else {
1658
                // collect everything as a separate list, and merge it
1659
                // into "this" later if we can (SEQUENCE or singleton)
1660
                ChildrenRecognizer      first;
1661
 
1662
                first = new ChildrenRecognizer (consumer, type);
1663
                nextPos = first.populate (parseBuf, nextPos);
1664
                c = parseBuf [nextPos++];
1665
 
1666
                if (c == ',' || c == '|') {
1667
                    ChildrenRecognizer  current = first;
1668
                    char                separator = c;
1669
                    Vector              v = null;
1670
 
1671
                    if (separator == '|') {
1672
                        v = new Vector ();
1673
                        v.addElement (first);
1674
                    }
1675
 
1676
                    do {
1677
                        ChildrenRecognizer link;
1678
 
1679
                        link = new ChildrenRecognizer (consumer, type);
1680
                        nextPos = link.populate (parseBuf, nextPos);
1681
 
1682
                        if (separator == ',') {
1683
                            current.patchNext (link, null);
1684
                            current = link;
1685
                        } else
1686
                            v.addElement (link);
1687
 
1688
                        c = parseBuf [nextPos++];
1689
                    } while (c == separator);
1690
 
1691
                    // choice ... collect everything into one array.
1692
                    if (separator == '|') {
1693
                        // assert v.size() > 1
1694
                        components = new Recognizer [v.size ()];
1695
                        for (int i = 0; i < components.length; i++) {
1696
                            components [i] = (Recognizer)
1697
                                    v.elementAt (i);
1698
                        }
1699
                        // assert flags == 0
1700
 
1701
                    // sequence ... merge into "this" to be smaller.
1702
                    } else
1703
                        copyIn (first);
1704
 
1705
                // treat singletons like one-node sequences.
1706
                } else
1707
                    copyIn (first);
1708
 
1709
                if (c != /*(*/ ')')
1710
                    throw new RuntimeException ("corrupt content model");
1711
            }
1712
 
1713
            //
1714
            // Arity is optional, and the root of all fun.  We keep the
1715
            // FSM state graph simple by only having NAME/SEQUENCE and
1716
            // CHOICE nodes (or EMPTY to terminate a model), easily
1717
            // evaluated.  So we rewrite each node that has arity, using
1718
            // those primitives.  We create loops here, if needed.
1719
            //
1720
            if (nextPos < parseBuf.length) {
1721
                c = parseBuf [nextPos];
1722
                if (c == '?' || c == '*' || c == '+') {
1723
                    nextPos++;
1724
 
1725
                    // Rewrite 'zero-or-one' "?" arity to a CHOICE:
1726
                    //   - SEQUENCE (clone, what's next)
1727
                    //   - or, what's next
1728
                    // Size cost: N --> N + 1
1729
                    if (c == '?') {
1730
                        Recognizer              once = shallowClone ();
1731
 
1732
                        components = new Recognizer [2];
1733
                        components [0] = once;
1734
                        // components [1] initted to null
1735
                        name = null;
1736
                        next = null;
1737
                        flags = 0;
1738
 
1739
 
1740
                    // Rewrite 'zero-or-more' "*" arity to a CHOICE.
1741
                    //   - LOOP (clone, back to this CHOICE)
1742
                    //   - or, what's next
1743
                    // Size cost: N --> N + 1
1744
                    } else if (c == '*') {
1745
                        ChildrenRecognizer      loop = shallowClone ();
1746
 
1747
                        loop.patchNext (this, null);
1748
                        loop.flags |= F_LOOPNEXT;
1749
                        flags = F_LOOPHEAD;
1750
 
1751
                        components = new Recognizer [2];
1752
                        components [0] = loop;
1753
                        // components [1] initted to null
1754
                        name = null;
1755
                        next = null;
1756
 
1757
 
1758
                    // Rewrite 'one-or-more' "+" arity to a SEQUENCE.
1759
                    // Basically (a)+ --> ((a),(a)*).
1760
                    //   - this
1761
                    //   - CHOICE
1762
                    //      * LOOP (clone, back to the CHOICE)
1763
                    //      * or, whatever's next
1764
                    // Size cost: N --> 2N + 1
1765
                    } else if (c == '+') {
1766
                        ChildrenRecognizer loop = deepClone ();
1767
                        ChildrenRecognizer choice;
1768
 
1769
                        choice = new ChildrenRecognizer (consumer, type);
1770
                        loop.patchNext (choice, null);
1771
                        loop.flags |= F_LOOPNEXT;
1772
                        choice.flags = F_LOOPHEAD;
1773
 
1774
                        choice.components = new Recognizer [2];
1775
                        choice.components [0] = loop;
1776
                        // choice.components [1] initted to null
1777
                        // choice.name, choice.next initted to null
1778
 
1779
                        patchNext (choice, null);
1780
                    }
1781
                }
1782
            }
1783
 
1784
            return nextPos;
1785
        }
1786
 
1787
        // VC: Element Valid (second clause)
1788
        boolean acceptCharacters ()
1789
            { return false; }
1790
 
1791
        // VC: Element Valid (second clause)
1792
        Recognizer acceptElement (String type)
1793
        throws SAXException
1794
        {
1795
            // NAME/SEQUENCE
1796
            if (name != null) {
1797
                if (name.equals (type))
1798
                    return next;
1799
                return null;
1800
            }
1801
 
1802
            // CHOICE ... optionally reporting nondeterminism we
1803
            // run across.  we won't check out every transition
1804
            // for nondeterminism; only the ones we follow.
1805
            Recognizer  retval = null;
1806
 
1807
            for (int i = 0; i < components.length; i++) {
1808
                Recognizer temp = components [i].acceptElement (type);
1809
 
1810
                if (temp == null)
1811
                    continue;
1812
                else if (!warnNonDeterministic)
1813
                    return temp;
1814
                else if (retval == null)
1815
                    retval = temp;
1816
                else if (retval != temp)
1817
                    consumer.error ("Content model " + this.type.model
1818
                        + " is non-deterministic for " + type);
1819
            }
1820
            return retval;
1821
        }
1822
 
1823
        // VC: Element Valid (second clause)
1824
        boolean completed ()
1825
        throws SAXException
1826
        {
1827
            // expecting a specific element
1828
            if (name != null)
1829
                return false;
1830
 
1831
            // choice, some sequences
1832
            for (int i = 0; i < components.length; i++) {
1833
                if (components [i].completed ())
1834
                    return true;
1835
            }
1836
 
1837
            return false;
1838
        }
1839
 
1840
/** /
1841
        // FOR DEBUGGING ... flattens the graph for printing.
1842
 
1843
        public String toString ()
1844
        {
1845
            StringBuffer buf = new StringBuffer ();
1846
 
1847
            // only one set of loop labels can be generated
1848
            // at a time...
1849
            synchronized (ANY) {
1850
                nodeCount = 0;
1851
 
1852
                toString (buf, new Hashtable ());
1853
                return buf.toString ();
1854
            }
1855
        }
1856
 
1857
        private void toString (StringBuffer buf, Hashtable table)
1858
        {
1859
            // When we visit a node, label and count it.
1860
            // Nodes are never visited/counted more than once.
1861
            // For small models labels waste space, but if arity
1862
            // mappings were used the savings are substantial.
1863
            // (Plus, the output can be more readily understood.)
1864
            String temp = (String) table.get (this);
1865
 
1866
            if (temp != null) {
1867
                buf.append ('{');
1868
                buf.append (temp);
1869
                buf.append ('}');
1870
                return;
1871
            } else {
1872
                StringBuffer scratch = new StringBuffer (15);
1873
 
1874
                if ((flags & F_LOOPHEAD) != 0)
1875
                    scratch.append ("loop");
1876
                else
1877
                    scratch.append ("node");
1878
                scratch.append ('-');
1879
                scratch.append (++nodeCount);
1880
                temp = scratch.toString ();
1881
 
1882
                table.put (this, temp);
1883
                buf.append ('[');
1884
                buf.append (temp);
1885
                buf.append (']');
1886
                buf.append (':');
1887
            }
1888
 
1889
            // NAME/SEQUENCE
1890
            if (name != null) {
1891
                // n.b. some output encodings turn some name chars into '?'
1892
                // e.g. with Japanese names and ASCII output
1893
                buf.append (name);
1894
                if (components != null)         // bug!
1895
                    buf.append ('$');
1896
                if (next == null)
1897
                    buf.append (",*");
1898
                else if (next instanceof EmptyRecognizer) // patch-to-next
1899
                    buf.append (",{}");
1900
                else if (next instanceof ChildrenRecognizer) {
1901
                    buf.append (',');
1902
                    ((ChildrenRecognizer)next).toString (buf, table);
1903
                } else                          // bug!
1904
                    buf.append (",+");
1905
                return;
1906
            }
1907
 
1908
            // CHOICE
1909
            buf.append ("<");
1910
            for (int i = 0; i < components.length; i++) {
1911
                if (i != 0)
1912
                    buf.append ("|");
1913
                if (components [i] instanceof EmptyRecognizer) {
1914
                    buf.append ("{}");
1915
                } else if (components [i] == null) {    // patch-to-next
1916
                    buf.append ('*');
1917
                } else {
1918
                    ChildrenRecognizer r;
1919
 
1920
                    r = (ChildrenRecognizer) components [i];
1921
                    r.toString (buf, table);
1922
                }
1923
            }
1924
            buf.append (">");
1925
        }
1926
/**/
1927
    }
1928
}

powered by: WebSVN 2.1.0

© copyright 1999-2024 OpenCores.org, equivalent to Oliscience, all rights reserved. OpenCores®, registered trademark.