OpenCores
URL https://opencores.org/ocsvn/openrisc/openrisc/trunk

Subversion Repositories openrisc

[/] [openrisc/] [trunk/] [gnu-dev/] [or1k-gcc/] [libjava/] [classpath/] [gnu/] [xml/] [util/] [XMLWriter.java] - Blame information for rev 769

Details | Compare with Previous | View Log

Line No. Rev Author Line
1 769 jeremybenn
/* XMLWriter.java --
2
   Copyright (C) 1999,2000,2001 Free Software Foundation, Inc.
3
 
4
This file is part of GNU Classpath.
5
 
6
GNU Classpath is free software; you can redistribute it and/or modify
7
it under the terms of the GNU General Public License as published by
8
the Free Software Foundation; either version 2, or (at your option)
9
any later version.
10
 
11
GNU Classpath is distributed in the hope that it will be useful, but
12
WITHOUT ANY WARRANTY; without even the implied warranty of
13
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
14
General Public License for more details.
15
 
16
You should have received a copy of the GNU General Public License
17
along with GNU Classpath; see the file COPYING.  If not, write to the
18
Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
19
02110-1301 USA.
20
 
21
Linking this library statically or dynamically with other modules is
22
making a combined work based on this library.  Thus, the terms and
23
conditions of the GNU General Public License cover the whole
24
combination.
25
 
26
As a special exception, the copyright holders of this library give you
27
permission to link this library with independent modules to produce an
28
executable, regardless of the license terms of these independent
29
modules, and to copy and distribute the resulting executable under
30
terms of your choice, provided that you also meet, for each linked
31
independent module, the terms and conditions of the license of that
32
module.  An independent module is a module which is not derived from
33
or based on this library.  If you modify this library, you may extend
34
this exception to your version of the library, but you are not
35
obligated to do so.  If you do not wish to do so, delete this
36
exception statement from your version. */
37
 
38
package gnu.xml.util;
39
 
40
import gnu.java.lang.CPStringBuilder;
41
 
42
import java.io.BufferedWriter;
43
import java.io.CharConversionException;
44
import java.io.IOException;
45
import java.io.OutputStream;
46
import java.io.OutputStreamWriter;
47
import java.io.Writer;
48
import java.util.Stack;
49
 
50
import org.xml.sax.*;
51
import org.xml.sax.ext.*;
52
import org.xml.sax.helpers.*;
53
 
54
 
55
/**
56
 * This class is a SAX handler which writes all its input as a well formed
57
 * XML or XHTML document.  If driven using SAX2 events, this output may
58
 * include a recreated document type declaration, subject to limitations
59
 * of SAX (no internal subset exposed) or DOM (the important declarations,
60
 * with their documentation, are discarded).
61
 *
62
 * <p> By default, text is generated "as-is", but some optional modes
63
 * are supported.  Pretty-printing is supported, to make life easier
64
 * for people reading the output.  XHTML (1.0) output has can be made
65
 * particularly pretty; all the built-in character entities are known.
66
 * Canonical XML can also be generated, assuming the input is properly
67
 * formed.
68
 *
69
 * <hr>
70
 *
71
 * <p> Some of the methods on this class are intended for applications to
72
 * use directly, rather than as pure SAX2 event callbacks.  Some of those
73
 * methods access the JavaBeans properties (used to tweak output formats,
74
 * for example canonicalization and pretty printing).  Subclasses
75
 * are expected to add new behaviors, not to modify current behavior, so
76
 * many such methods are final.</p>
77
 *
78
 * <p> The <em>write*()</em> methods may be slightly simpler for some
79
 * applications to use than direct callbacks.  For example, they support
80
 * a simple policy for encoding data items as the content of a single element.
81
 *
82
 * <p> To reuse an XMLWriter you must provide it with a new Writer, since
83
 * this handler closes the writer it was given as part of its endDocument()
84
 * handling.  (XML documents have an end of input, and the way to encode
85
 * that on a stream is to close it.) </p>
86
 *
87
 * <hr>
88
 *
89
 * <p> Note that any relative URIs in the source document, as found in
90
 * entity and notation declarations, ought to have been fully resolved by
91
 * the parser providing events to this handler.  This means that the
92
 * output text should only have fully resolved URIs, which may not be
93
 * the desired behavior in cases where later binding is desired. </p>
94
 *
95
 * <p> <em>Note that due to SAX2 defaults, you may need to manually
96
 * ensure that the input events are XML-conformant with respect to namespace
97
 * prefixes and declarations.  {@link gnu.xml.pipeline.NSFilter} is
98
 * one solution to this problem, in the context of processing pipelines.</em>
99
 * Something as simple as connecting this handler to a parser might not
100
 * generate the correct output.  Another workaround is to ensure that the
101
 * <em>namespace-prefixes</em> feature is always set to true, if you're
102
 * hooking this directly up to some XMLReader implementation.
103
 *
104
 * @see gnu.xml.pipeline.TextConsumer
105
 *
106
 * @author David Brownell
107
 *
108
 * @deprecated Please use the javax.xml.stream APIs instead
109
 */
110
public class XMLWriter
111
    implements ContentHandler, LexicalHandler, DTDHandler, DeclHandler
112
{
113
    // text prints/escapes differently depending on context
114
    //  CTX_ENTITY ... entity literal value
115
    //  CTX_ATTRIBUTE ... attribute literal value
116
    //  CTX_CONTENT ... content of an element
117
    //  CTX_UNPARSED ... CDATA, comment, PI, names, etc
118
    //  CTX_NAME ... name or nmtoken, no escapes possible
119
    private static final int    CTX_ENTITY = 1;
120
    private static final int    CTX_ATTRIBUTE = 2;
121
    private static final int    CTX_CONTENT = 3;
122
    private static final int    CTX_UNPARSED = 4;
123
    private static final int    CTX_NAME = 5;
124
 
125
// FIXME: names (element, attribute, PI, notation, etc) are not
126
// currently written out with range checks (escapeChars).
127
// In non-XHTML, some names can't be directly written; panic!
128
 
129
    private static String       sysEOL;
130
 
131
    static {
132
        try {
133
            sysEOL = System.getProperty ("line.separator", "\n");
134
 
135
            // don't use the system's EOL if it's illegal XML.
136
            if (!isLineEnd (sysEOL))
137
                sysEOL = "\n";
138
 
139
        } catch (SecurityException e) {
140
            sysEOL = "\n";
141
        }
142
    }
143
 
144
    private static boolean isLineEnd (String eol)
145
    {
146
        return "\n".equals (eol)
147
                    || "\r".equals (eol)
148
                    || "\r\n".equals (eol);
149
    }
150
 
151
    private Writer              out;
152
    private boolean             inCDATA;
153
    private int                 elementNestLevel;
154
    private String              eol = sysEOL;
155
 
156
    private short               dangerMask;
157
    private CPStringBuilder     stringBuf;
158
    private Locator             locator;
159
    private ErrorHandler        errHandler;
160
 
161
    private boolean             expandingEntities = false;
162
    private int                 entityNestLevel;
163
    private boolean             xhtml;
164
    private boolean             startedDoctype;
165
    private String              encoding;
166
 
167
    private boolean             canonical;
168
    private boolean             inDoctype;
169
    private boolean             inEpilogue;
170
 
171
    // pretty printing controls
172
    private boolean             prettyPrinting;
173
    private int                 column;
174
    private boolean             noWrap;
175
    private Stack               space = new Stack ();
176
 
177
    // this is not a hard'n'fast rule -- longer lines are OK,
178
    // but are to be avoided.  Here, prettyprinting is more to
179
    // show structure "cleanly" than to be precise about it.
180
    // better to have ragged layout than one line 24Kb long.
181
    private static final int    lineLength = 75;
182
 
183
 
184
    /**
185
     * Constructs this handler with System.out used to write SAX events
186
     * using the UTF-8 encoding.  Avoid using this except when you know
187
     * it's safe to close System.out at the end of the document.
188
     */
189
    public XMLWriter () throws IOException
190
        { this (System.out); }
191
 
192
    /**
193
     * Constructs a handler which writes all input to the output stream
194
     * in the UTF-8 encoding, and closes it when endDocument is called.
195
     * (Yes it's annoying that this throws an exception -- but there's
196
     * really no way around it, since it's barely possible a JDK may
197
     * exist somewhere that doesn't know how to emit UTF-8.)
198
     */
199
    public XMLWriter (OutputStream out) throws IOException
200
    {
201
        this (new OutputStreamWriter (out, "UTF8"));
202
    }
203
 
204
    /**
205
     * Constructs a handler which writes all input to the writer, and then
206
     * closes the writer when the document ends.  If an XML declaration is
207
     * written onto the output, and this class can determine the name of
208
     * the character encoding for this writer, that encoding name will be
209
     * included in the XML declaration.
210
     *
211
     * <P> See the description of the constructor which takes an encoding
212
     * name for imporant information about selection of encodings.
213
     *
214
     * @param writer XML text is written to this writer.
215
     */
216
    public XMLWriter (Writer writer)
217
    {
218
        this (writer, null);
219
    }
220
 
221
    /**
222
     * Constructs a handler which writes all input to the writer, and then
223
     * closes the writer when the document ends.  If an XML declaration is
224
     * written onto the output, this class will use the specified encoding
225
     * name in that declaration.  If no encoding name is specified, no
226
     * encoding name will be declared unless this class can otherwise
227
     * determine the name of the character encoding for this writer.
228
     *
229
     * <P> At this time, only the UTF-8 ("UTF8") and UTF-16 ("Unicode")
230
     * output encodings are fully lossless with respect to XML data.  If you
231
     * use any other encoding you risk having your data be silently mangled
232
     * on output, as the standard Java character encoding subsystem silently
233
     * maps non-encodable characters to a question mark ("?") and will not
234
     * report such errors to applications.
235
     *
236
     * <p> For a few other encodings the risk can be reduced. If the writer is
237
     * a java.io.OutputStreamWriter, and uses either the ISO-8859-1 ("8859_1",
238
     * "ISO8859_1", etc) or US-ASCII ("ASCII") encodings, content which
239
     * can't be encoded in those encodings will be written safely.  Where
240
     * relevant, the XHTML entity names will be used; otherwise, numeric
241
     * character references will be emitted.
242
     *
243
     * <P> However, there remain a number of cases where substituting such
244
     * entity or character references is not an option.  Such references are
245
     * not usable within a DTD, comment, PI, or CDATA section.  Neither may
246
     * they be used when element, attribute, entity, or notation names have
247
     * the problematic characters.
248
     *
249
     * @param writer XML text is written to this writer.
250
     * @param encoding if non-null, and an XML declaration is written,
251
     *  this is the name that will be used for the character encoding.
252
     */
253
    public XMLWriter (Writer writer, String encoding)
254
    {
255
        setWriter (writer, encoding);
256
    }
257
 
258
    private void setEncoding (String encoding)
259
    {
260
        if (encoding == null && out instanceof OutputStreamWriter)
261
            encoding = ((OutputStreamWriter)out).getEncoding ();
262
 
263
        if (encoding != null) {
264
            encoding = encoding.toUpperCase ();
265
 
266
            // Use official encoding names where we know them,
267
            // avoiding the Java-only names.  When using common
268
            // encodings where we can easily tell if characters
269
            // are out of range, we'll escape out-of-range
270
            // characters using character refs for safety.
271
 
272
            // I _think_ these are all the main synonyms for these!
273
            if ("UTF8".equals (encoding)) {
274
                encoding = "UTF-8";
275
            } else if ("US-ASCII".equals (encoding)
276
                    || "ASCII".equals (encoding)) {
277
                dangerMask = (short) 0xff80;
278
                encoding = "US-ASCII";
279
            } else if ("ISO-8859-1".equals (encoding)
280
                    || "8859_1".equals (encoding)
281
                    || "ISO8859_1".equals (encoding)) {
282
                dangerMask = (short) 0xff00;
283
                encoding = "ISO-8859-1";
284
            } else if ("UNICODE".equals (encoding)
285
                    || "UNICODE-BIG".equals (encoding)
286
                    || "UNICODE-LITTLE".equals (encoding)) {
287
                encoding = "UTF-16";
288
 
289
                // TODO: UTF-16BE, UTF-16LE ... no BOM; what
290
                // release of JDK supports those Unicode names?
291
            }
292
 
293
            if (dangerMask != 0)
294
                stringBuf = new CPStringBuilder ();
295
        }
296
 
297
        this.encoding = encoding;
298
    }
299
 
300
 
301
    /**
302
     * Resets the handler to write a new text document.
303
     *
304
     * @param writer XML text is written to this writer.
305
     * @param encoding if non-null, and an XML declaration is written,
306
     *  this is the name that will be used for the character encoding.
307
     *
308
     * @exception IllegalStateException if the current
309
     *  document hasn't yet ended (with {@link #endDocument})
310
     */
311
    final public void setWriter (Writer writer, String encoding)
312
    {
313
        if (out != null)
314
            throw new IllegalStateException (
315
                "can't change stream in mid course");
316
        out = writer;
317
        if (out != null)
318
            setEncoding (encoding);
319
        if (!(out instanceof BufferedWriter))
320
            out = new BufferedWriter (out);
321
        space.push ("default");
322
    }
323
 
324
    /**
325
     * Assigns the line ending style to be used on output.
326
     * @param eolString null to use the system default; else
327
     *  "\n", "\r", or "\r\n".
328
     */
329
    final public void setEOL (String eolString)
330
    {
331
        if (eolString == null)
332
            eol = sysEOL;
333
        else if (!isLineEnd (eolString))
334
            eol = eolString;
335
        else
336
            throw new IllegalArgumentException (eolString);
337
    }
338
 
339
    /**
340
     * Assigns the error handler to be used to present most fatal
341
     * errors.
342
     */
343
    public void setErrorHandler (ErrorHandler handler)
344
    {
345
        errHandler = handler;
346
    }
347
 
348
    /**
349
     * Used internally and by subclasses, this encapsulates the logic
350
     * involved in reporting fatal errors.  It uses locator information
351
     * for good diagnostics, if available, and gives the application's
352
     * ErrorHandler the opportunity to handle the error before throwing
353
     * an exception.
354
     */
355
    protected void fatal (String message, Exception e)
356
    throws SAXException
357
    {
358
        SAXParseException       x;
359
 
360
        if (locator == null)
361
            x = new SAXParseException (message, null, null, -1, -1, e);
362
        else
363
            x = new SAXParseException (message, locator, e);
364
        if (errHandler != null)
365
            errHandler.fatalError (x);
366
        throw x;
367
    }
368
 
369
 
370
    // JavaBeans properties
371
 
372
    /**
373
     * Controls whether the output should attempt to follow the "transitional"
374
     * XHTML rules so that it meets the "HTML Compatibility Guidelines"
375
     * appendix in the XHTML specification.  A "transitional" Document Type
376
     * Declaration (DTD) is placed near the beginning of the output document,
377
     * instead of whatever DTD would otherwise have been placed there, and
378
     * XHTML empty elements are printed specially.  When writing text in
379
     * US-ASCII or ISO-8859-1 encodings, the predefined XHTML internal
380
     * entity names are used (in preference to character references) when
381
     * writing content characters which can't be expressed in those encodings.
382
     *
383
     * <p> When this option is enabled, it is the caller's responsibility
384
     * to ensure that the input is otherwise valid as XHTML.  Things to
385
     * be careful of in all cases, as described in the appendix referenced
386
     * above, include:  <ul>
387
     *
388
     *  <li> Element and attribute names must be in lower case, both
389
     *          in the document and in any CSS style sheet.
390
     *  <li> All XML constructs must be valid as defined by the XHTML
391
     *          "transitional" DTD (including all familiar constructs,
392
     *          even deprecated ones).
393
     *  <li> The root element must be "html".
394
     *  <li> Elements that must be empty (such as <em>&lt;br&gt;</em>
395
     *          must have no content.
396
     *  <li> Use both <em>lang</em> and <em>xml:lang</em> attributes
397
     *          when specifying language.
398
     *  <li> Similarly, use both <em>id</em> and <em>name</em> attributes
399
     *          when defining elements that may be referred to through
400
     *          URI fragment identifiers ... and make sure that the
401
     *          value is a legal NMTOKEN, since not all such HTML 4.0
402
     *          identifiers are valid in XML.
403
     *  <li> Be careful with character encodings; make sure you provide
404
     *          a <em>&lt;meta http-equiv="Content-type"
405
     *          content="text/xml;charset=..." /&gt;</em> element in
406
     *          the HTML "head" element, naming the same encoding
407
     *          used to create this handler.  Also, if that encoding
408
     *          is anything other than US-ASCII, make sure that if
409
     *          the document is given a MIME content type, it has
410
     *          a <em>charset=...</em> attribute with that encoding.
411
     *  </ul>
412
     *
413
     * <p> Additionally, some of the oldest browsers have additional
414
     * quirks, to address with guidelines such as: <ul>
415
     *
416
     *  <li> Processing instructions may be rendered, so avoid them.
417
     *          (Similarly for an XML declaration.)
418
     *  <li> Embedded style sheets and scripts should not contain XML
419
     *          markup delimiters:  &amp;, &lt;, and ]]&gt; are trouble.
420
     *  <li> Attribute values should not have line breaks or multiple
421
     *          consecutive white space characters.
422
     *  <li> Use no more than one of the deprecated (transitional)
423
     *          <em>&lt;isindex&gt;</em> elements.
424
     *  <li> Some boolean attributes (such as <em>compact, checked,
425
     *          disabled, readonly, selected,</em> and more) confuse
426
     *          some browsers, since they only understand minimized
427
     *          versions which are illegal in XML.
428
     *  </ul>
429
     *
430
     * <p> Also, some characteristics of the resulting output may be
431
     * a function of whether the document is later given a MIME
432
     * content type of <em>text/html</em> rather than one indicating
433
     * XML (<em>application/xml</em> or <em>text/xml</em>).  Worse,
434
     * some browsers ignore MIME content types and prefer to rely URI
435
     * name suffixes -- so an "index.xml" could always be XML, never
436
     * XHTML, no matter its MIME type.
437
     */
438
    final public void setXhtml (boolean value)
439
    {
440
        if (locator != null)
441
            throw new IllegalStateException ("started parsing");
442
        xhtml = value;
443
        if (xhtml)
444
            canonical = false;
445
    }
446
 
447
    /**
448
     * Returns true if the output attempts to echo the input following
449
     * "transitional" XHTML rules and matching the "HTML Compatibility
450
     * Guidelines" so that an HTML version 3 browser can read the output
451
     * as HTML; returns false (the default) othewise.
452
     */
453
    final public boolean isXhtml ()
454
    {
455
        return xhtml;
456
    }
457
 
458
    /**
459
     * Controls whether the output text contains references to
460
     * entities (the default), or instead contains the expanded
461
     * values of those entities.
462
     */
463
    final public void setExpandingEntities (boolean value)
464
    {
465
        if (locator != null)
466
            throw new IllegalStateException ("started parsing");
467
        expandingEntities = value;
468
        if (!expandingEntities)
469
            canonical = false;
470
    }
471
 
472
    /**
473
     * Returns true if the output will have no entity references;
474
     * returns false (the default) otherwise.
475
     */
476
    final public boolean isExpandingEntities ()
477
    {
478
        return expandingEntities;
479
    }
480
 
481
    /**
482
     * Controls pretty-printing, which by default is not enabled
483
     * (and currently is most useful for XHTML output).
484
     * Pretty printing enables structural indentation, sorting of attributes
485
     * by name, line wrapping, and potentially other mechanisms for making
486
     * output more or less readable.
487
     *
488
     * <p> At this writing, structural indentation and line wrapping are
489
     * enabled when pretty printing is enabled and the <em>xml:space</em>
490
     * attribute has the value <em>default</em> (its other legal value is
491
     * <em>preserve</em>, as defined in the XML specification).  The three
492
     * XHTML element types which use another value are recognized by their
493
     * names (namespaces are ignored).
494
     *
495
     * <p> Also, for the record, the "pretty" aspect of printing here
496
     * is more to provide basic structure on outputs that would otherwise
497
     * risk being a single long line of text.  For now, expect the
498
     * structure to be ragged ... unless you'd like to submit a patch
499
     * to make this be more strictly formatted!
500
     *
501
     * @exception IllegalStateException thrown if this method is invoked
502
     *  after output has begun.
503
     */
504
    final public void setPrettyPrinting (boolean value)
505
    {
506
        if (locator != null)
507
            throw new IllegalStateException ("started parsing");
508
        prettyPrinting = value;
509
        if (prettyPrinting)
510
            canonical = false;
511
    }
512
 
513
    /**
514
     * Returns value of flag controlling pretty printing.
515
     */
516
    final public boolean isPrettyPrinting ()
517
    {
518
        return prettyPrinting;
519
    }
520
 
521
 
522
    /**
523
     * Sets the output style to be canonicalized.  Input events must
524
     * meet requirements that are slightly more stringent than the
525
     * basic well-formedness ones, and include:  <ul>
526
     *
527
     *  <li> Namespace prefixes must not have been changed from those
528
     *  in the original document.  (This may only be ensured by setting
529
     *  the SAX2 XMLReader <em>namespace-prefixes</em> feature flag;
530
     *  by default, it is cleared.)
531
     *
532
     *  <li> Redundant namespace declaration attributes have been
533
     *  removed.  (If an ancestor element defines a namespace prefix
534
     *  and that declaration hasn't been overriden, an element must
535
     *  not redeclare it.)
536
     *
537
     *  <li> If comments are not to be included in the canonical output,
538
     *  they must first be removed from the input event stream; this
539
     *  <em>Canonical XML with comments</em> by default.
540
     *
541
     *  <li> If the input character encoding was not UCS-based, the
542
     *  character data must have been normalized using Unicode
543
     *  Normalization Form C.  (UTF-8 and UTF-16 are UCS-based.)
544
     *
545
     *  <li> Attribute values must have been normalized, as is done
546
     *  by any conformant XML processor which processes all external
547
     *  parameter entities.
548
     *
549
     *  <li> Similarly, attribute value defaulting has been performed.
550
     *
551
     *  </ul>
552
     *
553
     * <p> Note that fragments of XML documents, as specified by an XPath
554
     * node set, may be canonicalized.  In such cases, elements may need
555
     * some fixup (for <em>xml:*</em> attributes and application-specific
556
     * context).
557
     *
558
     * @exception IllegalArgumentException if the output encoding
559
     *  is anything other than UTF-8.
560
     */
561
    final public void setCanonical (boolean value)
562
    {
563
        if (value && !"UTF-8".equals (encoding))
564
            throw new IllegalArgumentException ("encoding != UTF-8");
565
        canonical = value;
566
        if (canonical) {
567
            prettyPrinting = xhtml = false;
568
            expandingEntities = true;
569
            eol = "\n";
570
        }
571
    }
572
 
573
 
574
    /**
575
     * Returns value of flag controlling canonical output.
576
     */
577
    final public boolean isCanonical ()
578
    {
579
        return canonical;
580
    }
581
 
582
 
583
    /**
584
     * Flushes the output stream.  When this handler is used in long lived
585
     * pipelines, it can be important to flush buffered state, for example
586
     * so that it can reach the disk as part of a state checkpoint.
587
     */
588
    final public void flush ()
589
    throws IOException
590
    {
591
        if (out != null)
592
            out.flush ();
593
    }
594
 
595
 
596
    // convenience routines
597
 
598
// FIXME:  probably want a subclass that holds a lot of these...
599
// and maybe more!
600
 
601
    /**
602
     * Writes the string as if characters() had been called on the contents
603
     * of the string.  This is particularly useful when applications act as
604
     * producers and write data directly to event consumers.
605
     */
606
    final public void write (String data)
607
    throws SAXException
608
    {
609
        char    buf [] = data.toCharArray ();
610
        characters (buf, 0, buf.length);
611
    }
612
 
613
 
614
    /**
615
     * Writes an element that has content consisting of a single string.
616
     * @see #writeEmptyElement
617
     * @see #startElement
618
     */
619
    public void writeElement (
620
        String uri,
621
        String localName,
622
        String qName,
623
        Attributes atts,
624
        String content
625
    ) throws SAXException
626
    {
627
        if (content == null || content.length () == 0) {
628
            writeEmptyElement (uri, localName, qName, atts);
629
            return;
630
        }
631
        startElement (uri, localName, qName, atts);
632
        char chars [] = content.toCharArray ();
633
        characters (chars, 0, chars.length);
634
        endElement (uri, localName, qName);
635
    }
636
 
637
 
638
    /**
639
     * Writes an element that has content consisting of a single integer,
640
     * encoded as a decimal string.
641
     * @see #writeEmptyElement
642
     * @see #startElement
643
     */
644
    public void writeElement (
645
        String uri,
646
        String localName,
647
        String qName,
648
        Attributes atts,
649
        int content
650
    ) throws SAXException
651
    {
652
        writeElement (uri, localName, qName, atts, Integer.toString (content));
653
    }
654
 
655
 
656
    // SAX1 ContentHandler
657
    /** <b>SAX1</b>:  provides parser status information */
658
    final public void setDocumentLocator (Locator l)
659
    {
660
        locator = l;
661
    }
662
 
663
 
664
    // URL for dtd that validates against all normal HTML constructs
665
    private static final String xhtmlFullDTD =
666
    "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd";
667
 
668
 
669
    /**
670
     * <b>SAX1</b>:  indicates the beginning of a document parse.
671
     * If you're writing (well formed) fragments of XML, neither
672
     * this nor endDocument should be called.
673
     */
674
    // NOT final
675
    public void startDocument ()
676
    throws SAXException
677
    {
678
        try {
679
            if (out == null)
680
                throw new IllegalStateException (
681
                    "null Writer given to XMLWriter");
682
 
683
            // Not all parsers provide the locator we want; this also
684
            // flags whether events are being sent to this object yet.
685
            // We could only have this one call if we only printed whole
686
            // documents ... but we also print fragments, so most of the
687
            // callbacks here replicate this test.
688
 
689
            if (locator == null)
690
                locator = new LocatorImpl ();
691
 
692
            // Unless the data is in US-ASCII or we're canonicalizing, write
693
            // the XML declaration if we know the encoding.  US-ASCII won't
694
            // normally get mangled by web server confusion about the
695
            // character encodings used.  Plus, it's an easy way to
696
            // ensure we can write ASCII that's unlikely to confuse
697
            // elderly HTML parsers.
698
 
699
            if (!canonical
700
                    && dangerMask != (short) 0xff80
701
                    && encoding != null) {
702
                rawWrite ("<?xml version='1.0'");
703
                rawWrite (" encoding='" + encoding + "'");
704
                rawWrite ("?>");
705
                newline ();
706
            }
707
 
708
            if (xhtml) {
709
 
710
                rawWrite ("<!DOCTYPE html PUBLIC");
711
                newline ();
712
                rawWrite ("  '-//W3C//DTD XHTML 1.0 Transitional//EN'");
713
                newline ();
714
                rawWrite ("  '");
715
                    // NOTE:  URL (above) matches the REC
716
                rawWrite (xhtmlFullDTD);
717
                rawWrite ("'>");
718
                newline ();
719
                newline ();
720
 
721
                // fake the rest of the handler into ignoring
722
                // everything until the root element, so any
723
                // XHTML DTD comments, PIs, etc are ignored
724
                startedDoctype = true;
725
            }
726
 
727
            entityNestLevel = 0;
728
 
729
        } catch (IOException e) {
730
            fatal ("can't write", e);
731
        }
732
    }
733
 
734
    /**
735
     * <b>SAX1</b>:  indicates the completion of a parse.
736
     * Note that all complete SAX event streams make this call, even
737
     * if an error is reported during a parse.
738
     */
739
    // NOT final
740
    public void endDocument ()
741
    throws SAXException
742
    {
743
        try {
744
            if (!canonical) {
745
                newline ();
746
                newline ();
747
            }
748
            out.close ();
749
            out = null;
750
            locator = null;
751
        } catch (IOException e) {
752
            fatal ("can't write", e);
753
        }
754
    }
755
 
756
    // XHTML elements declared as EMPTY print differently
757
    final private static boolean isEmptyElementTag (String tag)
758
    {
759
        switch (tag.charAt (0)) {
760
          case 'a':     return "area".equals (tag);
761
          case 'b':     return "base".equals (tag)
762
                            || "basefont".equals (tag)
763
                            || "br".equals (tag);
764
          case 'c':     return "col".equals (tag);
765
          case 'f':     return "frame".equals (tag);
766
          case 'h':     return "hr".equals (tag);
767
          case 'i':     return "img".equals (tag)
768
                            || "input".equals (tag)
769
                            || "isindex".equals (tag);
770
          case 'l':     return "link".equals (tag);
771
          case 'm':     return "meta".equals (tag);
772
          case 'p':     return "param".equals (tag);
773
        }
774
        return false;
775
    }
776
 
777
    private static boolean indentBefore (String tag)
778
    {
779
        // basically indent before block content
780
        // and within structure like tables, lists
781
        switch (tag.charAt (0)) {
782
          case 'a':     return "applet".equals (tag);
783
          case 'b':     return "body".equals (tag)
784
                            || "blockquote".equals (tag);
785
          case 'c':     return "center".equals (tag);
786
          case 'f':     return "frame".equals (tag)
787
                            || "frameset".equals (tag);
788
          case 'h':     return "head".equals (tag);
789
          case 'm':     return "meta".equals (tag);
790
          case 'o':     return "object".equals (tag);
791
          case 'p':     return "param".equals (tag)
792
                            || "pre".equals (tag);
793
          case 's':     return "style".equals (tag);
794
          case 't':     return "title".equals (tag)
795
                            || "td".equals (tag)
796
                            || "th".equals (tag);
797
        }
798
        // ... but not inline elements like "em", "b", "font"
799
        return false;
800
    }
801
 
802
    private static boolean spaceBefore (String tag)
803
    {
804
        // blank line AND INDENT before certain structural content
805
        switch (tag.charAt (0)) {
806
          case 'h':     return "h1".equals (tag)
807
                            || "h2".equals (tag)
808
                            || "h3".equals (tag)
809
                            || "h4".equals (tag)
810
                            || "h5".equals (tag)
811
                            || "h6".equals (tag)
812
                            || "hr".equals (tag);
813
          case 'l':     return "li".equals (tag);
814
          case 'o':     return "ol".equals (tag);
815
          case 'p':     return "p".equals (tag);
816
          case 't':     return "table".equals (tag)
817
                            || "tr".equals (tag);
818
          case 'u':     return "ul".equals (tag);
819
        }
820
        return false;
821
    }
822
 
823
    // XHTML DTDs say these three have xml:space="preserve"
824
    private static boolean spacePreserve (String tag)
825
    {
826
        return "pre".equals (tag)
827
                || "style".equals (tag)
828
                || "script".equals (tag);
829
    }
830
 
831
    /**
832
     * <b>SAX2</b>:  ignored.
833
     */
834
    final public void startPrefixMapping (String prefix, String uri)
835
        {}
836
 
837
    /**
838
     * <b>SAX2</b>:  ignored.
839
     */
840
    final public void endPrefixMapping (String prefix)
841
        {}
842
 
843
    private void writeStartTag (
844
        String name,
845
        Attributes atts,
846
        boolean isEmpty
847
    ) throws SAXException, IOException
848
    {
849
        rawWrite ('<');
850
        rawWrite (name);
851
 
852
        // write out attributes ... sorting is particularly useful
853
        // with output that's been heavily defaulted.
854
        if (atts != null && atts.getLength () != 0) {
855
 
856
            // Set up to write, with optional sorting
857
            int         indices [] = new int [atts.getLength ()];
858
 
859
            for (int i= 0; i < indices.length; i++)
860
                indices [i] = i;
861
 
862
            // optionally sort
863
 
864
// FIXME:  canon xml demands xmlns nodes go first,
865
// and sorting by URI first (empty first) then localname
866
// it should maybe use a different sort
867
 
868
            if (canonical || prettyPrinting) {
869
 
870
                // insertion sort by attribute name
871
                for (int i = 1; i < indices.length; i++) {
872
                    int n = indices [i], j;
873
                    String      s = atts.getQName (n);
874
 
875
                    for (j = i - 1; j >= 0; j--) {
876
                        if (s.compareTo (atts.getQName (indices [j]))
877
                                >= 0)
878
                            break;
879
                        indices [j + 1] = indices [j];
880
                    }
881
                    indices [j + 1] = n;
882
                }
883
            }
884
 
885
            // write, sorted or no
886
            for (int i= 0; i < indices.length; i++) {
887
                String  s = atts.getQName (indices [i]);
888
 
889
                    if (s == null || "".equals (s))
890
                        throw new IllegalArgumentException ("no XML name");
891
                rawWrite (" ");
892
                rawWrite (s);
893
                rawWrite ("=");
894
                writeQuotedValue (atts.getValue (indices [i]),
895
                    CTX_ATTRIBUTE);
896
            }
897
        }
898
        if (isEmpty)
899
            rawWrite (" /");
900
        rawWrite ('>');
901
    }
902
 
903
    /**
904
     * <b>SAX2</b>:  indicates the start of an element.
905
     * When XHTML is in use, avoid attribute values with
906
     * line breaks or multiple whitespace characters, since
907
     * not all user agents handle them correctly.
908
     */
909
    final public void startElement (
910
        String uri,
911
        String localName,
912
        String qName,
913
        Attributes atts
914
    ) throws SAXException
915
    {
916
        startedDoctype = false;
917
 
918
        if (locator == null)
919
            locator = new LocatorImpl ();
920
 
921
        if (qName == null || "".equals (qName))
922
            throw new IllegalArgumentException ("no XML name");
923
 
924
        try {
925
            if (entityNestLevel != 0)
926
                return;
927
            if (prettyPrinting) {
928
                String whitespace = null;
929
 
930
                if (xhtml && spacePreserve (qName))
931
                    whitespace = "preserve";
932
                else if (atts != null)
933
                    whitespace = atts.getValue ("xml:space");
934
                if (whitespace == null)
935
                    whitespace = (String) space.peek ();
936
                space.push (whitespace);
937
 
938
                if ("default".equals (whitespace)) {
939
                    if (xhtml) {
940
                        if (spaceBefore (qName)) {
941
                            newline ();
942
                            doIndent ();
943
                        } else if (indentBefore (qName))
944
                            doIndent ();
945
                        // else it's inlined, modulo line length
946
                        // FIXME: incrementing element nest level
947
                        // for inlined elements causes ugliness
948
                    } else
949
                        doIndent ();
950
                }
951
            }
952
            elementNestLevel++;
953
            writeStartTag (qName, atts, xhtml && isEmptyElementTag (qName));
954
 
955
            if (xhtml) {
956
// FIXME: if this is an XHTML "pre" element, turn
957
// off automatic wrapping.
958
            }
959
 
960
        } catch (IOException e) {
961
            fatal ("can't write", e);
962
        }
963
    }
964
 
965
    /**
966
     * Writes an empty element.
967
     * @see #startElement
968
     */
969
    public void writeEmptyElement (
970
        String uri,
971
        String localName,
972
        String qName,
973
        Attributes atts
974
    ) throws SAXException
975
    {
976
        if (canonical) {
977
            startElement (uri, localName, qName, atts);
978
            endElement (uri, localName, qName);
979
        } else {
980
            try {
981
                writeStartTag (qName, atts, true);
982
            } catch (IOException e) {
983
                fatal ("can't write", e);
984
            }
985
        }
986
    }
987
 
988
 
989
    /** <b>SAX2</b>:  indicates the end of an element */
990
    final public void endElement (String uri, String localName, String qName)
991
    throws SAXException
992
    {
993
        if (qName == null || "".equals (qName))
994
            throw new IllegalArgumentException ("no XML name");
995
 
996
        try {
997
            elementNestLevel--;
998
            if (entityNestLevel != 0)
999
                return;
1000
            if (xhtml && isEmptyElementTag (qName))
1001
                return;
1002
            rawWrite ("</");
1003
            rawWrite (qName);
1004
            rawWrite ('>');
1005
 
1006
            if (prettyPrinting) {
1007
                if (!space.empty ())
1008
                    space.pop ();
1009
                else
1010
                    fatal ("stack discipline", null);
1011
            }
1012
            if (elementNestLevel == 0)
1013
                inEpilogue = true;
1014
 
1015
        } catch (IOException e) {
1016
            fatal ("can't write", e);
1017
        }
1018
    }
1019
 
1020
    /** <b>SAX1</b>:  reports content characters */
1021
    final public void characters (char ch [], int start, int length)
1022
    throws SAXException
1023
    {
1024
        if (locator == null)
1025
            locator = new LocatorImpl ();
1026
 
1027
        try {
1028
            if (entityNestLevel != 0)
1029
                return;
1030
            if (inCDATA) {
1031
                escapeChars (ch, start, length, CTX_UNPARSED);
1032
            } else {
1033
                escapeChars (ch, start, length, CTX_CONTENT);
1034
            }
1035
        } catch (IOException e) {
1036
            fatal ("can't write", e);
1037
        }
1038
    }
1039
 
1040
    /** <b>SAX1</b>:  reports ignorable whitespace */
1041
    final public void ignorableWhitespace (char ch [], int start, int length)
1042
    throws SAXException
1043
    {
1044
        if (locator == null)
1045
            locator = new LocatorImpl ();
1046
 
1047
        try {
1048
            if (entityNestLevel != 0)
1049
                return;
1050
            // don't forget to map NL to CRLF, CR, etc
1051
            escapeChars (ch, start, length, CTX_CONTENT);
1052
        } catch (IOException e) {
1053
            fatal ("can't write", e);
1054
        }
1055
    }
1056
 
1057
    /**
1058
     * <b>SAX1</b>:  reports a PI.
1059
     * This doesn't check for illegal target names, such as "xml" or "XML",
1060
     * or namespace-incompatible ones like "big:dog"; the caller is
1061
     * responsible for ensuring those names are legal.
1062
     */
1063
    final public void processingInstruction (String target, String data)
1064
    throws SAXException
1065
    {
1066
        if (locator == null)
1067
            locator = new LocatorImpl ();
1068
 
1069
        // don't print internal subset for XHTML
1070
        if (xhtml && startedDoctype)
1071
            return;
1072
 
1073
        // ancient HTML browsers might render these ... their loss.
1074
        // to prevent:  "if (xhtml) return;".
1075
 
1076
        try {
1077
            if (entityNestLevel != 0)
1078
                return;
1079
            if (canonical && inEpilogue)
1080
                newline ();
1081
            rawWrite ("<?");
1082
            rawWrite (target);
1083
            rawWrite (' ');
1084
            escapeChars (data.toCharArray (), -1, -1, CTX_UNPARSED);
1085
            rawWrite ("?>");
1086
            if (elementNestLevel == 0 && !(canonical && inEpilogue))
1087
                newline ();
1088
        } catch (IOException e) {
1089
            fatal ("can't write", e);
1090
        }
1091
    }
1092
 
1093
    /** <b>SAX1</b>: indicates a non-expanded entity reference */
1094
    public void skippedEntity (String name)
1095
    throws SAXException
1096
    {
1097
        try {
1098
            rawWrite ("&");
1099
            rawWrite (name);
1100
            rawWrite (";");
1101
        } catch (IOException e) {
1102
            fatal ("can't write", e);
1103
        }
1104
    }
1105
 
1106
    // SAX2 LexicalHandler
1107
 
1108
    /** <b>SAX2</b>:  called before parsing CDATA characters */
1109
    final public void startCDATA ()
1110
    throws SAXException
1111
    {
1112
        if (locator == null)
1113
            locator = new LocatorImpl ();
1114
 
1115
        if (canonical)
1116
            return;
1117
 
1118
        try {
1119
            inCDATA = true;
1120
            if (entityNestLevel == 0)
1121
                rawWrite ("<![CDATA[");
1122
        } catch (IOException e) {
1123
            fatal ("can't write", e);
1124
        }
1125
    }
1126
 
1127
    /** <b>SAX2</b>:  called after parsing CDATA characters */
1128
    final public void endCDATA ()
1129
    throws SAXException
1130
    {
1131
        if (canonical)
1132
            return;
1133
 
1134
        try {
1135
            inCDATA = false;
1136
            if (entityNestLevel == 0)
1137
                rawWrite ("]]>");
1138
        } catch (IOException e) {
1139
            fatal ("can't write", e);
1140
        }
1141
    }
1142
 
1143
    /**
1144
     * <b>SAX2</b>:  called when the doctype is partially parsed
1145
     * Note that this, like other doctype related calls, is ignored
1146
     * when XHTML is in use.
1147
     */
1148
    final public void startDTD (String name, String publicId, String systemId)
1149
    throws SAXException
1150
    {
1151
        if (locator == null)
1152
            locator = new LocatorImpl ();
1153
        if (xhtml)
1154
            return;
1155
        try {
1156
            inDoctype = startedDoctype = true;
1157
            if (canonical)
1158
                return;
1159
            rawWrite ("<!DOCTYPE ");
1160
            rawWrite (name);
1161
            rawWrite (' ');
1162
 
1163
            if (!expandingEntities) {
1164
                if (publicId != null)
1165
                    rawWrite ("PUBLIC '" + publicId + "' '" + systemId + "' ");
1166
                else if (systemId != null)
1167
                    rawWrite ("SYSTEM '" + systemId + "' ");
1168
            }
1169
 
1170
            rawWrite ('[');
1171
            newline ();
1172
        } catch (IOException e) {
1173
            fatal ("can't write", e);
1174
        }
1175
    }
1176
 
1177
    /** <b>SAX2</b>:  called after the doctype is parsed */
1178
    final public void endDTD ()
1179
    throws SAXException
1180
    {
1181
        inDoctype = false;
1182
        if (canonical || xhtml)
1183
            return;
1184
        try {
1185
            rawWrite ("]>");
1186
            newline ();
1187
        } catch (IOException e) {
1188
            fatal ("can't write", e);
1189
        }
1190
    }
1191
 
1192
    /**
1193
     * <b>SAX2</b>:  called before parsing a general entity in content
1194
     */
1195
    final public void startEntity (String name)
1196
    throws SAXException
1197
    {
1198
        try {
1199
            boolean     writeEOL = true;
1200
 
1201
            // Predefined XHTML entities (for characters) will get
1202
            // mapped back later.
1203
            if (xhtml || expandingEntities)
1204
                return;
1205
 
1206
            entityNestLevel++;
1207
            if (name.equals ("[dtd]"))
1208
                return;
1209
            if (entityNestLevel != 1)
1210
                return;
1211
            if (!name.startsWith ("%")) {
1212
                writeEOL = false;
1213
                rawWrite ('&');
1214
            }
1215
            rawWrite (name);
1216
            rawWrite (';');
1217
            if (writeEOL)
1218
                newline ();
1219
        } catch (IOException e) {
1220
            fatal ("can't write", e);
1221
        }
1222
    }
1223
 
1224
    /**
1225
     * <b>SAX2</b>:  called after parsing a general entity in content
1226
     */
1227
    final public void endEntity (String name)
1228
    throws SAXException
1229
    {
1230
        if (xhtml || expandingEntities)
1231
            return;
1232
        entityNestLevel--;
1233
    }
1234
 
1235
    /**
1236
     * <b>SAX2</b>:  called when comments are parsed.
1237
     * When XHTML is used, the old HTML tradition of using comments
1238
     * to for inline CSS, or for JavaScript code is  discouraged.
1239
     * This is because XML processors are encouraged to discard, on
1240
     * the grounds that comments are for users (and perhaps text
1241
     * editors) not programs.  Instead, use external scripts
1242
     */
1243
    final public void comment (char ch [], int start, int length)
1244
    throws SAXException
1245
    {
1246
        if (locator == null)
1247
            locator = new LocatorImpl ();
1248
 
1249
        // don't print internal subset for XHTML
1250
        if (xhtml && startedDoctype)
1251
            return;
1252
        // don't print comment in doctype for canon xml
1253
        if (canonical && inDoctype)
1254
            return;
1255
 
1256
        try {
1257
            boolean indent;
1258
 
1259
            if (prettyPrinting && space.empty ())
1260
                fatal ("stack discipline", null);
1261
            indent = prettyPrinting && "default".equals (space.peek ());
1262
            if (entityNestLevel != 0)
1263
                return;
1264
            if (indent)
1265
                doIndent ();
1266
            if (canonical && inEpilogue)
1267
                newline ();
1268
            rawWrite ("<!--");
1269
            escapeChars (ch, start, length, CTX_UNPARSED);
1270
            rawWrite ("-->");
1271
            if (indent)
1272
                doIndent ();
1273
            if (elementNestLevel == 0 && !(canonical && inEpilogue))
1274
                newline ();
1275
        } catch (IOException e) {
1276
            fatal ("can't write", e);
1277
        }
1278
    }
1279
 
1280
    // SAX1 DTDHandler
1281
 
1282
    /** <b>SAX1</b>:  called on notation declarations */
1283
    final public void notationDecl (String name,
1284
        String publicId, String systemId)
1285
    throws SAXException
1286
    {
1287
        if (xhtml)
1288
            return;
1289
        try {
1290
            // At this time, only SAX2 callbacks start these.
1291
            if (!startedDoctype)
1292
                return;
1293
 
1294
            if (entityNestLevel != 0)
1295
                return;
1296
            rawWrite ("<!NOTATION " + name + " ");
1297
            if (publicId != null)
1298
                rawWrite ("PUBLIC \"" + publicId + '"');
1299
            else
1300
                rawWrite ("SYSTEM ");
1301
            if (systemId != null)
1302
                rawWrite ('"' + systemId + '"');
1303
            rawWrite (">");
1304
            newline ();
1305
        } catch (IOException e) {
1306
            fatal ("can't write", e);
1307
        }
1308
    }
1309
 
1310
    /** <b>SAX1</b>:  called on unparsed entity declarations */
1311
    final public void unparsedEntityDecl (String name,
1312
        String publicId, String systemId,
1313
        String notationName)
1314
    throws SAXException
1315
    {
1316
        if (xhtml)
1317
            return;
1318
        try {
1319
            // At this time, only SAX2 callbacks start these.
1320
            if (!startedDoctype)  {
1321
                // FIXME: write to temporary buffer, and make the start
1322
                // of the root element write these declarations.
1323
                return;
1324
            }
1325
 
1326
            if (entityNestLevel != 0)
1327
                return;
1328
            rawWrite ("<!ENTITY " + name + " ");
1329
            if (publicId != null)
1330
                rawWrite ("PUBLIC \"" + publicId + '"');
1331
            else
1332
                rawWrite ("SYSTEM ");
1333
            rawWrite ('"' + systemId + '"');
1334
            rawWrite (" NDATA " + notationName + ">");
1335
            newline ();
1336
        } catch (IOException e) {
1337
            fatal ("can't write", e);
1338
        }
1339
    }
1340
 
1341
    // SAX2 DeclHandler
1342
 
1343
    /** <b>SAX2</b>:  called on attribute declarations */
1344
    final public void attributeDecl (String eName, String aName,
1345
            String type, String mode, String value)
1346
    throws SAXException
1347
    {
1348
        if (xhtml)
1349
            return;
1350
        try {
1351
            // At this time, only SAX2 callbacks start these.
1352
            if (!startedDoctype)
1353
                return;
1354
            if (entityNestLevel != 0)
1355
                return;
1356
            rawWrite ("<!ATTLIST " + eName + ' ' + aName + ' ');
1357
            rawWrite (type);
1358
            rawWrite (' ');
1359
            if (mode != null)
1360
                rawWrite (mode + ' ');
1361
            if (value != null)
1362
                writeQuotedValue (value, CTX_ATTRIBUTE);
1363
            rawWrite ('>');
1364
            newline ();
1365
        } catch (IOException e) {
1366
            fatal ("can't write", e);
1367
        }
1368
    }
1369
 
1370
    /** <b>SAX2</b>:  called on element declarations */
1371
    final public void elementDecl (String name, String model)
1372
    throws SAXException
1373
    {
1374
        if (xhtml)
1375
            return;
1376
        try {
1377
            // At this time, only SAX2 callbacks start these.
1378
            if (!startedDoctype)
1379
                return;
1380
            if (entityNestLevel != 0)
1381
                return;
1382
            rawWrite ("<!ELEMENT " + name + ' ' + model + '>');
1383
            newline ();
1384
        } catch (IOException e) {
1385
            fatal ("can't write", e);
1386
        }
1387
    }
1388
 
1389
    /** <b>SAX2</b>:  called on external entity declarations */
1390
    final public void externalEntityDecl (
1391
        String name,
1392
        String publicId,
1393
        String systemId)
1394
    throws SAXException
1395
    {
1396
        if (xhtml)
1397
            return;
1398
        try {
1399
            // At this time, only SAX2 callbacks start these.
1400
            if (!startedDoctype)
1401
                return;
1402
            if (entityNestLevel != 0)
1403
                return;
1404
            rawWrite ("<!ENTITY ");
1405
            if (name.startsWith ("%")) {
1406
                rawWrite ("% ");
1407
                rawWrite (name.substring (1));
1408
            } else
1409
                rawWrite (name);
1410
            if (publicId != null)
1411
                rawWrite (" PUBLIC \"" + publicId + '"');
1412
            else
1413
                rawWrite (" SYSTEM ");
1414
            rawWrite ('"' + systemId + "\">");
1415
            newline ();
1416
        } catch (IOException e) {
1417
            fatal ("can't write", e);
1418
        }
1419
    }
1420
 
1421
    /** <b>SAX2</b>:  called on internal entity declarations */
1422
    final public void internalEntityDecl (String name, String value)
1423
    throws SAXException
1424
    {
1425
        if (xhtml)
1426
            return;
1427
        try {
1428
            // At this time, only SAX2 callbacks start these.
1429
            if (!startedDoctype)
1430
                return;
1431
            if (entityNestLevel != 0)
1432
                return;
1433
            rawWrite ("<!ENTITY ");
1434
            if (name.startsWith ("%")) {
1435
                rawWrite ("% ");
1436
                rawWrite (name.substring (1));
1437
            } else
1438
                rawWrite (name);
1439
            rawWrite (' ');
1440
            writeQuotedValue (value, CTX_ENTITY);
1441
            rawWrite ('>');
1442
            newline ();
1443
        } catch (IOException e) {
1444
            fatal ("can't write", e);
1445
        }
1446
    }
1447
 
1448
    private void writeQuotedValue (String value, int code)
1449
    throws SAXException, IOException
1450
    {
1451
        char    buf [] = value.toCharArray ();
1452
        int     off = 0, len = buf.length;
1453
 
1454
        // we can't add line breaks to attribute/entity/... values
1455
        noWrap = true;
1456
        rawWrite ('"');
1457
        escapeChars (buf, off, len, code);
1458
        rawWrite ('"');
1459
        noWrap = false;
1460
    }
1461
 
1462
    // From "HTMLlat1x.ent" ... names of entities for ISO-8859-1
1463
    // (Latin/1) characters, all codes:  160-255 (0xA0-0xFF).
1464
    // Codes 128-159 have no assigned values.
1465
    private static final String HTMLlat1x [] = {
1466
        // 160
1467
        "nbsp", "iexcl", "cent", "pound", "curren",
1468
        "yen", "brvbar", "sect", "uml", "copy",
1469
 
1470
        // 170
1471
        "ordf", "laquo", "not", "shy", "reg",
1472
        "macr", "deg", "plusmn", "sup2", "sup3",
1473
 
1474
        // 180
1475
        "acute", "micro", "para", "middot", "cedil",
1476
        "sup1", "ordm", "raquo", "frac14", "frac12",
1477
 
1478
        // 190
1479
        "frac34", "iquest", "Agrave", "Aacute", "Acirc",
1480
        "Atilde", "Auml", "Aring", "AElig", "Ccedil",
1481
 
1482
        // 200
1483
        "Egrave", "Eacute", "Ecirc", "Euml", "Igrave",
1484
        "Iacute", "Icirc", "Iuml", "ETH", "Ntilde",
1485
 
1486
        // 210
1487
        "Ograve", "Oacute", "Ocirc", "Otilde", "Ouml",
1488
        "times", "Oslash", "Ugrave", "Uacute", "Ucirc",
1489
 
1490
        // 220
1491
        "Uuml", "Yacute", "THORN", "szlig", "agrave",
1492
        "aacute", "acirc", "atilde", "auml", "aring",
1493
 
1494
        // 230
1495
        "aelig", "ccedil", "egrave", "eacute", "ecirc",
1496
        "euml", "igrave", "iacute", "icirc", "iuml",
1497
 
1498
        // 240
1499
        "eth", "ntilde", "ograve", "oacute", "ocirc",
1500
        "otilde", "ouml", "divide", "oslash", "ugrave",
1501
 
1502
        // 250
1503
        "uacute", "ucirc", "uuml", "yacute", "thorn",
1504
        "yuml"
1505
    };
1506
 
1507
    // From "HTMLsymbolx.ent" ... some of the symbols that
1508
    // we can conveniently handle.  Entities for the Greek.
1509
    // alphabet (upper and lower cases) are compact.
1510
    private static final String HTMLsymbolx_GR [] = {
1511
        // 913
1512
        "Alpha", "Beta", "Gamma", "Delta", "Epsilon",
1513
        "Zeta", "Eta", "Theta", "Iota", "Kappa",
1514
 
1515
        // 923
1516
        "Lambda", "Mu", "Nu", "Xi", "Omicron",
1517
        "Pi", "Rho", null, "Sigma", "Tau",
1518
 
1519
        // 933
1520
        "Upsilon", "Phi", "Chi", "Psi", "Omega"
1521
    };
1522
 
1523
    private static final String HTMLsymbolx_gr [] = {
1524
        // 945
1525
        "alpha", "beta", "gamma", "delta", "epsilon",
1526
        "zeta", "eta", "theta", "iota", "kappa",
1527
 
1528
        // 955
1529
        "lambda", "mu", "nu", "xi", "omicron",
1530
        "pi", "rho", "sigmaf", "sigma", "tau",
1531
 
1532
        // 965
1533
        "upsilon", "phi", "chi", "psi", "omega"
1534
    };
1535
 
1536
 
1537
    // General routine to write text and substitute predefined
1538
    // entities (XML, and a special case for XHTML) as needed.
1539
    private void escapeChars (char buf [], int off, int len, int code)
1540
    throws SAXException, IOException
1541
    {
1542
        int     first = 0;
1543
 
1544
        if (off < 0) {
1545
            off = 0;
1546
            len = buf.length;
1547
        }
1548
        for (int i = 0; i < len; i++) {
1549
            String      esc;
1550
            char        c = buf [off + i];
1551
 
1552
            switch (c) {
1553
              // Note that CTX_ATTRIBUTE isn't explicitly tested here;
1554
              // all syntax delimiters are escaped in CTX_ATTRIBUTE,
1555
              // otherwise it's similar to CTX_CONTENT
1556
 
1557
              // ampersand flags entity references; entity replacement
1558
              // text has unexpanded references, other text doesn't.
1559
              case '&':
1560
                if (code == CTX_ENTITY || code == CTX_UNPARSED)
1561
                    continue;
1562
                esc = "amp";
1563
                break;
1564
 
1565
              // attributes and text may NOT have literal '<', but
1566
              // entities may have markup constructs
1567
              case '<':
1568
                if (code == CTX_ENTITY || code == CTX_UNPARSED)
1569
                    continue;
1570
                esc = "lt";
1571
                break;
1572
 
1573
              // as above re markup constructs; but otherwise
1574
              // except when canonicalizing, this is for consistency
1575
              case '>':
1576
                if (code == CTX_ENTITY || code == CTX_UNPARSED)
1577
                    continue;
1578
                esc = "gt";
1579
                break;
1580
              case '\'':
1581
                if (code == CTX_CONTENT || code == CTX_UNPARSED)
1582
                    continue;
1583
                if (canonical)
1584
                    continue;
1585
                esc = "apos";
1586
                break;
1587
 
1588
              // needed when printing quoted attribute/entity values
1589
              case '"':
1590
                if (code == CTX_CONTENT || code == CTX_UNPARSED)
1591
                    continue;
1592
                esc = "quot";
1593
                break;
1594
 
1595
              // make line ends work per host OS convention
1596
              case '\n':
1597
                esc = eol;
1598
                break;
1599
 
1600
              //
1601
              // No other characters NEED special treatment ... except
1602
              // for encoding-specific issues, like whether the character
1603
              // can really be represented in that encoding.
1604
              //
1605
              default:
1606
                //
1607
                // There are characters we can never write safely; getting
1608
                // them is an error.
1609
                //
1610
                //   (a) They're never legal in XML ... detected by range
1611
                //      checks, and (eventually) by remerging surrogate
1612
                //      pairs on output.  (Easy error for apps to prevent.)
1613
                //
1614
                //   (b) This encoding can't represent them, and we
1615
                //      can't make reference substitution (e.g. inside
1616
                //      CDATA sections, names, PI data, etc).  (Hard for
1617
                //      apps to prevent, except by using UTF-8 or UTF-16
1618
                //      as their output encoding.)
1619
                //
1620
                // We know a very little bit about what characters
1621
                // the US-ASCII and ISO-8859-1 encodings support.  For
1622
                // other encodings we can't detect the second type of
1623
                // error at all.  (Never an issue for UTF-8 or UTF-16.)
1624
                //
1625
 
1626
// FIXME:  CR in CDATA is an error; in text, turn to a char ref
1627
 
1628
// FIXME:  CR/LF/TAB in attributes should become char refs
1629
 
1630
                if ((c > 0xfffd)
1631
                        || ((c < 0x0020) && !((c == 0x0009)
1632
                                || (c == 0x000A) || (c == 0x000D)))
1633
                        || (((c & dangerMask) != 0)
1634
                            && (code == CTX_UNPARSED))) {
1635
 
1636
                    // if case (b) in CDATA, we might end the section,
1637
                    // write a reference, then restart ... possible
1638
                    // in one DOM L3 draft.
1639
 
1640
                    throw new CharConversionException (
1641
                            "Illegal or non-writable character: U+"
1642
                            + Integer.toHexString (c));
1643
                }
1644
 
1645
                //
1646
                // If the output encoding represents the character
1647
                // directly, let it do so!  Else we'll escape it.
1648
                //
1649
                if ((c & dangerMask) == 0)
1650
                    continue;
1651
                esc = null;
1652
 
1653
                // Avoid numeric refs where symbolic ones exist, as
1654
                // symbolic ones make more sense to humans reading!
1655
                if (xhtml) {
1656
                    // all the HTMLlat1x.ent entities
1657
                    // (all the "ISO-8859-1" characters)
1658
                    if (c >= 160 && c <= 255)
1659
                        esc = HTMLlat1x [c - 160];
1660
 
1661
                    // not quite half the HTMLsymbolx.ent entities
1662
                    else if (c >= 913 && c <= 937)
1663
                        esc = HTMLsymbolx_GR [c - 913];
1664
                    else if (c >= 945 && c <= 969)
1665
                        esc = HTMLsymbolx_gr [c - 945];
1666
 
1667
                    else switch (c) {
1668
                        // all of the HTMLspecialx.ent entities
1669
                        case  338: esc = "OElig";       break;
1670
                        case  339: esc = "oelig";       break;
1671
                        case  352: esc = "Scaron";      break;
1672
                        case  353: esc = "scaron";      break;
1673
                        case  376: esc = "Yuml";        break;
1674
                        case  710: esc = "circ";        break;
1675
                        case  732: esc = "tilde";       break;
1676
                        case 8194: esc = "ensp";        break;
1677
                        case 8195: esc = "emsp";        break;
1678
                        case 8201: esc = "thinsp";      break;
1679
                        case 8204: esc = "zwnj";        break;
1680
                        case 8205: esc = "zwj";         break;
1681
                        case 8206: esc = "lrm";         break;
1682
                        case 8207: esc = "rlm";         break;
1683
                        case 8211: esc = "ndash";       break;
1684
                        case 8212: esc = "mdash";       break;
1685
                        case 8216: esc = "lsquo";       break;
1686
                        case 8217: esc = "rsquo";       break;
1687
                        case 8218: esc = "sbquo";       break;
1688
                        case 8220: esc = "ldquo";       break;
1689
                        case 8221: esc = "rdquo";       break;
1690
                        case 8222: esc = "bdquo";       break;
1691
                        case 8224: esc = "dagger";      break;
1692
                        case 8225: esc = "Dagger";      break;
1693
                        case 8240: esc = "permil";      break;
1694
                        case 8249: esc = "lsaquo";      break;
1695
                        case 8250: esc = "rsaquo";      break;
1696
                        case 8364: esc = "euro";        break;
1697
 
1698
                        // the other HTMLsymbox.ent entities
1699
                        case  402: esc = "fnof";        break;
1700
                        case  977: esc = "thetasym";    break;
1701
                        case  978: esc = "upsih";       break;
1702
                        case  982: esc = "piv";         break;
1703
                        case 8226: esc = "bull";        break;
1704
                        case 8230: esc = "hellip";      break;
1705
                        case 8242: esc = "prime";       break;
1706
                        case 8243: esc = "Prime";       break;
1707
                        case 8254: esc = "oline";       break;
1708
                        case 8260: esc = "frasl";       break;
1709
                        case 8472: esc = "weierp";      break;
1710
                        case 8465: esc = "image";       break;
1711
                        case 8476: esc = "real";        break;
1712
                        case 8482: esc = "trade";       break;
1713
                        case 8501: esc = "alefsym";     break;
1714
                        case 8592: esc = "larr";        break;
1715
                        case 8593: esc = "uarr";        break;
1716
                        case 8594: esc = "rarr";        break;
1717
                        case 8595: esc = "darr";        break;
1718
                        case 8596: esc = "harr";        break;
1719
                        case 8629: esc = "crarr";       break;
1720
                        case 8656: esc = "lArr";        break;
1721
                        case 8657: esc = "uArr";        break;
1722
                        case 8658: esc = "rArr";        break;
1723
                        case 8659: esc = "dArr";        break;
1724
                        case 8660: esc = "hArr";        break;
1725
                        case 8704: esc = "forall";      break;
1726
                        case 8706: esc = "part";        break;
1727
                        case 8707: esc = "exist";       break;
1728
                        case 8709: esc = "empty";       break;
1729
                        case 8711: esc = "nabla";       break;
1730
                        case 8712: esc = "isin";        break;
1731
                        case 8713: esc = "notin";       break;
1732
                        case 8715: esc = "ni";          break;
1733
                        case 8719: esc = "prod";        break;
1734
                        case 8721: esc = "sum";         break;
1735
                        case 8722: esc = "minus";       break;
1736
                        case 8727: esc = "lowast";      break;
1737
                        case 8730: esc = "radic";       break;
1738
                        case 8733: esc = "prop";        break;
1739
                        case 8734: esc = "infin";       break;
1740
                        case 8736: esc = "ang";         break;
1741
                        case 8743: esc = "and";         break;
1742
                        case 8744: esc = "or";          break;
1743
                        case 8745: esc = "cap";         break;
1744
                        case 8746: esc = "cup";         break;
1745
                        case 8747: esc = "int";         break;
1746
                        case 8756: esc = "there4";      break;
1747
                        case 8764: esc = "sim";         break;
1748
                        case 8773: esc = "cong";        break;
1749
                        case 8776: esc = "asymp";       break;
1750
                        case 8800: esc = "ne";          break;
1751
                        case 8801: esc = "equiv";       break;
1752
                        case 8804: esc = "le";          break;
1753
                        case 8805: esc = "ge";          break;
1754
                        case 8834: esc = "sub";         break;
1755
                        case 8835: esc = "sup";         break;
1756
                        case 8836: esc = "nsub";        break;
1757
                        case 8838: esc = "sube";        break;
1758
                        case 8839: esc = "supe";        break;
1759
                        case 8853: esc = "oplus";       break;
1760
                        case 8855: esc = "otimes";      break;
1761
                        case 8869: esc = "perp";        break;
1762
                        case 8901: esc = "sdot";        break;
1763
                        case 8968: esc = "lceil";       break;
1764
                        case 8969: esc = "rceil";       break;
1765
                        case 8970: esc = "lfloor";      break;
1766
                        case 8971: esc = "rfloor";      break;
1767
                        case 9001: esc = "lang";        break;
1768
                        case 9002: esc = "rang";        break;
1769
                        case 9674: esc = "loz";         break;
1770
                        case 9824: esc = "spades";      break;
1771
                        case 9827: esc = "clubs";       break;
1772
                        case 9829: esc = "hearts";      break;
1773
                        case 9830: esc = "diams";       break;
1774
                    }
1775
                }
1776
 
1777
                // else escape with numeric char refs
1778
                if (esc == null) {
1779
                    stringBuf.setLength (0);
1780
                    stringBuf.append ("#x");
1781
                    stringBuf.append (Integer.toHexString (c).toUpperCase ());
1782
                    esc = stringBuf.toString ();
1783
 
1784
                    // FIXME:  We don't write surrogate pairs correctly.
1785
                    // They should work as one ref per character, since
1786
                    // each pair is one character.  For reading back into
1787
                    // Unicode, it matters beginning in Unicode 3.1 ...
1788
                }
1789
                break;
1790
            }
1791
            if (i != first)
1792
                rawWrite (buf, off + first, i - first);
1793
            first = i + 1;
1794
            if (esc == eol)
1795
                newline ();
1796
            else {
1797
                rawWrite ('&');
1798
                rawWrite (esc);
1799
                rawWrite (';');
1800
            }
1801
        }
1802
        if (first < len)
1803
            rawWrite (buf, off + first, len - first);
1804
    }
1805
 
1806
 
1807
 
1808
    private void newline ()
1809
    throws SAXException, IOException
1810
    {
1811
        out.write (eol);
1812
        column = 0;
1813
    }
1814
 
1815
    private void doIndent ()
1816
    throws SAXException, IOException
1817
    {
1818
        int     space = elementNestLevel * 2;
1819
 
1820
        newline ();
1821
        column = space;
1822
        // track tabs only at line starts
1823
        while (space > 8) {
1824
            out.write ("\t");
1825
            space -= 8;
1826
        }
1827
        while (space > 0) {
1828
            out.write ("  ");
1829
            space -= 2;
1830
        }
1831
    }
1832
 
1833
    private void rawWrite (char c)
1834
    throws IOException
1835
    {
1836
        out.write (c);
1837
        column++;
1838
    }
1839
 
1840
    private void rawWrite (String s)
1841
    throws SAXException, IOException
1842
    {
1843
        if (prettyPrinting && "default".equals (space.peek ())) {
1844
            char data [] = s.toCharArray ();
1845
            rawWrite (data, 0, data.length);
1846
        } else {
1847
            out.write (s);
1848
            column += s.length ();
1849
        }
1850
    }
1851
 
1852
    // NOTE:  if xhtml, the REC gives some rules about whitespace
1853
    // which we could follow ... notably, many places where conformant
1854
    // agents "must" consolidate/normalize whitespace.  Line ends can
1855
    // be removed there, etc.  This may not be the right place to do
1856
    // such mappings though.
1857
 
1858
    // Line buffering may help clarify algorithms and improve results.
1859
 
1860
    // It's likely xml:space needs more attention.
1861
 
1862
    private void rawWrite (char buf [], int offset, int length)
1863
    throws SAXException, IOException
1864
    {
1865
        boolean         wrap;
1866
 
1867
        if (prettyPrinting && space.empty ())
1868
            fatal ("stack discipline", null);
1869
 
1870
        wrap = prettyPrinting && "default".equals (space.peek ());
1871
        if (!wrap) {
1872
            out.write (buf, offset, length);
1873
            column += length;
1874
            return;
1875
        }
1876
 
1877
        // we're pretty printing and want to fill lines out only
1878
        // to the desired line length.
1879
        while (length > 0) {
1880
            int         target = lineLength - column;
1881
            boolean     wrote = false;
1882
 
1883
            // Do we even have a problem?
1884
            if (target > length || noWrap) {
1885
                out.write (buf, offset, length);
1886
                column += length;
1887
                return;
1888
            }
1889
 
1890
            // break the line at a space character, trying to fill
1891
            // as much of the line as possible.
1892
            char        c;
1893
 
1894
            for (int i = target - 1; i >= 0; i--) {
1895
                if ((c = buf [offset + i]) == ' ' || c == '\t') {
1896
                    i++;
1897
                    out.write (buf, offset, i);
1898
                    doIndent ();
1899
                    offset += i;
1900
                    length -= i;
1901
                    wrote = true;
1902
                    break;
1903
                }
1904
            }
1905
            if (wrote)
1906
                continue;
1907
 
1908
            // no space character permitting break before target
1909
            // line length is filled.  So, take the next one.
1910
            if (target < 0)
1911
                target = 0;
1912
            for (int i = target; i < length; i++)
1913
                if ((c = buf [offset + i]) == ' ' || c == '\t') {
1914
                    i++;
1915
                    out.write (buf, offset, i);
1916
                    doIndent ();
1917
                    offset += i;
1918
                    length -= i;
1919
                    wrote = true;
1920
                    break;
1921
                }
1922
            if (wrote)
1923
                continue;
1924
 
1925
            // no such luck.
1926
            out.write (buf, offset, length);
1927
            column += length;
1928
            break;
1929
        }
1930
    }
1931
}

powered by: WebSVN 2.1.0

© copyright 1999-2024 OpenCores.org, equivalent to Oliscience, all rights reserved. OpenCores®, registered trademark.