OpenCores
URL https://opencores.org/ocsvn/scarts/scarts/trunk

Subversion Repositories scarts

[/] [scarts/] [trunk/] [toolchain/] [scarts-gcc/] [gcc-4.1.1/] [libjava/] [classpath/] [gnu/] [xml/] [util/] [XMLWriter.java] - Blame information for rev 14

Details | Compare with Previous | View Log

Line No. Rev Author Line
1 14 jlechner
/* XMLWriter.java --
2
   Copyright (C) 1999,2000,2001 Free Software Foundation, Inc.
3
 
4
This file is part of GNU Classpath.
5
 
6
GNU Classpath is free software; you can redistribute it and/or modify
7
it under the terms of the GNU General Public License as published by
8
the Free Software Foundation; either version 2, or (at your option)
9
any later version.
10
 
11
GNU Classpath is distributed in the hope that it will be useful, but
12
WITHOUT ANY WARRANTY; without even the implied warranty of
13
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
14
General Public License for more details.
15
 
16
You should have received a copy of the GNU General Public License
17
along with GNU Classpath; see the file COPYING.  If not, write to the
18
Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
19
02110-1301 USA.
20
 
21
Linking this library statically or dynamically with other modules is
22
making a combined work based on this library.  Thus, the terms and
23
conditions of the GNU General Public License cover the whole
24
combination.
25
 
26
As a special exception, the copyright holders of this library give you
27
permission to link this library with independent modules to produce an
28
executable, regardless of the license terms of these independent
29
modules, and to copy and distribute the resulting executable under
30
terms of your choice, provided that you also meet, for each linked
31
independent module, the terms and conditions of the license of that
32
module.  An independent module is a module which is not derived from
33
or based on this library.  If you modify this library, you may extend
34
this exception to your version of the library, but you are not
35
obligated to do so.  If you do not wish to do so, delete this
36
exception statement from your version. */
37
 
38
package gnu.xml.util;
39
 
40
import java.io.BufferedWriter;
41
import java.io.CharConversionException;
42
import java.io.IOException;
43
import java.io.OutputStream;
44
import java.io.OutputStreamWriter;
45
import java.io.Writer;
46
import java.util.Stack;
47
 
48
import org.xml.sax.*;
49
import org.xml.sax.ext.*;
50
import org.xml.sax.helpers.*;
51
 
52
 
53
/**
54
 * This class is a SAX handler which writes all its input as a well formed
55
 * XML or XHTML document.  If driven using SAX2 events, this output may
56
 * include a recreated document type declaration, subject to limitations
57
 * of SAX (no internal subset exposed) or DOM (the important declarations,
58
 * with their documentation, are discarded).
59
 *
60
 * <p> By default, text is generated "as-is", but some optional modes
61
 * are supported.  Pretty-printing is supported, to make life easier
62
 * for people reading the output.  XHTML (1.0) output has can be made
63
 * particularly pretty; all the built-in character entities are known.
64
 * Canonical XML can also be generated, assuming the input is properly
65
 * formed.
66
 *
67
 * <hr>
68
 *
69
 * <p> Some of the methods on this class are intended for applications to
70
 * use directly, rather than as pure SAX2 event callbacks.  Some of those
71
 * methods access the JavaBeans properties (used to tweak output formats,
72
 * for example canonicalization and pretty printing).  Subclasses
73
 * are expected to add new behaviors, not to modify current behavior, so
74
 * many such methods are final.</p>
75
 *
76
 * <p> The <em>write*()</em> methods may be slightly simpler for some
77
 * applications to use than direct callbacks.  For example, they support
78
 * a simple policy for encoding data items as the content of a single element.
79
 *
80
 * <p> To reuse an XMLWriter you must provide it with a new Writer, since
81
 * this handler closes the writer it was given as part of its endDocument()
82
 * handling.  (XML documents have an end of input, and the way to encode
83
 * that on a stream is to close it.) </p>
84
 *
85
 * <hr>
86
 *
87
 * <p> Note that any relative URIs in the source document, as found in
88
 * entity and notation declarations, ought to have been fully resolved by
89
 * the parser providing events to this handler.  This means that the
90
 * output text should only have fully resolved URIs, which may not be
91
 * the desired behavior in cases where later binding is desired. </p>
92
 *
93
 * <p> <em>Note that due to SAX2 defaults, you may need to manually
94
 * ensure that the input events are XML-conformant with respect to namespace
95
 * prefixes and declarations.  {@link gnu.xml.pipeline.NSFilter} is
96
 * one solution to this problem, in the context of processing pipelines.</em>
97
 * Something as simple as connecting this handler to a parser might not
98
 * generate the correct output.  Another workaround is to ensure that the
99
 * <em>namespace-prefixes</em> feature is always set to true, if you're
100
 * hooking this directly up to some XMLReader implementation.
101
 *
102
 * @see gnu.xml.pipeline.TextConsumer
103
 *
104
 * @author David Brownell
105
 */
106
public class XMLWriter
107
    implements ContentHandler, LexicalHandler, DTDHandler, DeclHandler
108
{
109
    // text prints/escapes differently depending on context
110
    //  CTX_ENTITY ... entity literal value
111
    //  CTX_ATTRIBUTE ... attribute literal value
112
    //  CTX_CONTENT ... content of an element
113
    //  CTX_UNPARSED ... CDATA, comment, PI, names, etc
114
    //  CTX_NAME ... name or nmtoken, no escapes possible
115
    private static final int    CTX_ENTITY = 1;
116
    private static final int    CTX_ATTRIBUTE = 2;
117
    private static final int    CTX_CONTENT = 3;
118
    private static final int    CTX_UNPARSED = 4;
119
    private static final int    CTX_NAME = 5;
120
 
121
// FIXME: names (element, attribute, PI, notation, etc) are not
122
// currently written out with range checks (escapeChars).
123
// In non-XHTML, some names can't be directly written; panic!
124
 
125
    private static String       sysEOL;
126
 
127
    static {
128
        try {
129
            sysEOL = System.getProperty ("line.separator", "\n");
130
 
131
            // don't use the system's EOL if it's illegal XML.
132
            if (!isLineEnd (sysEOL))
133
                sysEOL = "\n";
134
 
135
        } catch (SecurityException e) {
136
            sysEOL = "\n";
137
        }
138
    }
139
 
140
    private static boolean isLineEnd (String eol)
141
    {
142
        return "\n".equals (eol)
143
                    || "\r".equals (eol)
144
                    || "\r\n".equals (eol);
145
    }
146
 
147
    private Writer              out;
148
    private boolean             inCDATA;
149
    private int                 elementNestLevel;
150
    private String              eol = sysEOL;
151
 
152
    private short               dangerMask;
153
    private StringBuffer        stringBuf;
154
    private Locator             locator;
155
    private ErrorHandler        errHandler;
156
 
157
    private boolean             expandingEntities = false;
158
    private int                 entityNestLevel;
159
    private boolean             xhtml;
160
    private boolean             startedDoctype;
161
    private String              encoding;
162
 
163
    private boolean             canonical;
164
    private boolean             inDoctype;
165
    private boolean             inEpilogue;
166
 
167
    // pretty printing controls
168
    private boolean             prettyPrinting;
169
    private int                 column;
170
    private boolean             noWrap;
171
    private Stack               space = new Stack ();
172
 
173
    // this is not a hard'n'fast rule -- longer lines are OK,
174
    // but are to be avoided.  Here, prettyprinting is more to
175
    // show structure "cleanly" than to be precise about it.
176
    // better to have ragged layout than one line 24Kb long.
177
    private static final int    lineLength = 75;
178
 
179
 
180
    /**
181
     * Constructs this handler with System.out used to write SAX events
182
     * using the UTF-8 encoding.  Avoid using this except when you know
183
     * it's safe to close System.out at the end of the document.
184
     */
185
    public XMLWriter () throws IOException
186
        { this (System.out); }
187
 
188
    /**
189
     * Constructs a handler which writes all input to the output stream
190
     * in the UTF-8 encoding, and closes it when endDocument is called.
191
     * (Yes it's annoying that this throws an exception -- but there's
192
     * really no way around it, since it's barely possible a JDK may
193
     * exist somewhere that doesn't know how to emit UTF-8.)
194
     */
195
    public XMLWriter (OutputStream out) throws IOException
196
    {
197
        this (new OutputStreamWriter (out, "UTF8"));
198
    }
199
 
200
    /**
201
     * Constructs a handler which writes all input to the writer, and then
202
     * closes the writer when the document ends.  If an XML declaration is
203
     * written onto the output, and this class can determine the name of
204
     * the character encoding for this writer, that encoding name will be
205
     * included in the XML declaration.
206
     *
207
     * <P> See the description of the constructor which takes an encoding
208
     * name for imporant information about selection of encodings.
209
     *
210
     * @param writer XML text is written to this writer.
211
     */
212
    public XMLWriter (Writer writer)
213
    {
214
        this (writer, null);
215
    }
216
 
217
    /**
218
     * Constructs a handler which writes all input to the writer, and then
219
     * closes the writer when the document ends.  If an XML declaration is
220
     * written onto the output, this class will use the specified encoding
221
     * name in that declaration.  If no encoding name is specified, no
222
     * encoding name will be declared unless this class can otherwise
223
     * determine the name of the character encoding for this writer.
224
     *
225
     * <P> At this time, only the UTF-8 ("UTF8") and UTF-16 ("Unicode")
226
     * output encodings are fully lossless with respect to XML data.  If you
227
     * use any other encoding you risk having your data be silently mangled
228
     * on output, as the standard Java character encoding subsystem silently
229
     * maps non-encodable characters to a question mark ("?") and will not
230
     * report such errors to applications.
231
     *
232
     * <p> For a few other encodings the risk can be reduced. If the writer is
233
     * a java.io.OutputStreamWriter, and uses either the ISO-8859-1 ("8859_1",
234
     * "ISO8859_1", etc) or US-ASCII ("ASCII") encodings, content which
235
     * can't be encoded in those encodings will be written safely.  Where
236
     * relevant, the XHTML entity names will be used; otherwise, numeric
237
     * character references will be emitted.
238
     *
239
     * <P> However, there remain a number of cases where substituting such
240
     * entity or character references is not an option.  Such references are
241
     * not usable within a DTD, comment, PI, or CDATA section.  Neither may
242
     * they be used when element, attribute, entity, or notation names have
243
     * the problematic characters.
244
     *
245
     * @param writer XML text is written to this writer.
246
     * @param encoding if non-null, and an XML declaration is written,
247
     *  this is the name that will be used for the character encoding.
248
     */
249
    public XMLWriter (Writer writer, String encoding)
250
    {
251
        setWriter (writer, encoding);
252
    }
253
 
254
    private void setEncoding (String encoding)
255
    {
256
        if (encoding == null && out instanceof OutputStreamWriter)
257
            encoding = ((OutputStreamWriter)out).getEncoding ();
258
 
259
        if (encoding != null) {
260
            encoding = encoding.toUpperCase ();
261
 
262
            // Use official encoding names where we know them,
263
            // avoiding the Java-only names.  When using common
264
            // encodings where we can easily tell if characters
265
            // are out of range, we'll escape out-of-range
266
            // characters using character refs for safety.
267
 
268
            // I _think_ these are all the main synonyms for these!
269
            if ("UTF8".equals (encoding)) {
270
                encoding = "UTF-8";
271
            } else if ("US-ASCII".equals (encoding)
272
                    || "ASCII".equals (encoding)) {
273
                dangerMask = (short) 0xff80;
274
                encoding = "US-ASCII";
275
            } else if ("ISO-8859-1".equals (encoding)
276
                    || "8859_1".equals (encoding)
277
                    || "ISO8859_1".equals (encoding)) {
278
                dangerMask = (short) 0xff00;
279
                encoding = "ISO-8859-1";
280
            } else if ("UNICODE".equals (encoding)
281
                    || "UNICODE-BIG".equals (encoding)
282
                    || "UNICODE-LITTLE".equals (encoding)) {
283
                encoding = "UTF-16";
284
 
285
                // TODO: UTF-16BE, UTF-16LE ... no BOM; what
286
                // release of JDK supports those Unicode names?
287
            }
288
 
289
            if (dangerMask != 0)
290
                stringBuf = new StringBuffer ();
291
        }
292
 
293
        this.encoding = encoding;
294
    }
295
 
296
 
297
    /**
298
     * Resets the handler to write a new text document.
299
     *
300
     * @param writer XML text is written to this writer.
301
     * @param encoding if non-null, and an XML declaration is written,
302
     *  this is the name that will be used for the character encoding.
303
     *
304
     * @exception IllegalStateException if the current
305
     *  document hasn't yet ended (with {@link #endDocument})
306
     */
307
    final public void setWriter (Writer writer, String encoding)
308
    {
309
        if (out != null)
310
            throw new IllegalStateException (
311
                "can't change stream in mid course");
312
        out = writer;
313
        if (out != null)
314
            setEncoding (encoding);
315
        if (!(out instanceof BufferedWriter))
316
            out = new BufferedWriter (out);
317
        space.push ("default");
318
    }
319
 
320
    /**
321
     * Assigns the line ending style to be used on output.
322
     * @param eolString null to use the system default; else
323
     *  "\n", "\r", or "\r\n".
324
     */
325
    final public void setEOL (String eolString)
326
    {
327
        if (eolString == null)
328
            eol = sysEOL;
329
        else if (!isLineEnd (eolString))
330
            eol = eolString;
331
        else
332
            throw new IllegalArgumentException (eolString);
333
    }
334
 
335
    /**
336
     * Assigns the error handler to be used to present most fatal
337
     * errors.
338
     */
339
    public void setErrorHandler (ErrorHandler handler)
340
    {
341
        errHandler = handler;
342
    }
343
 
344
    /**
345
     * Used internally and by subclasses, this encapsulates the logic
346
     * involved in reporting fatal errors.  It uses locator information
347
     * for good diagnostics, if available, and gives the application's
348
     * ErrorHandler the opportunity to handle the error before throwing
349
     * an exception.
350
     */
351
    protected void fatal (String message, Exception e)
352
    throws SAXException
353
    {
354
        SAXParseException       x;
355
 
356
        if (locator == null)
357
            x = new SAXParseException (message, null, null, -1, -1, e);
358
        else
359
            x = new SAXParseException (message, locator, e);
360
        if (errHandler != null)
361
            errHandler.fatalError (x);
362
        throw x;
363
    }
364
 
365
 
366
    // JavaBeans properties
367
 
368
    /**
369
     * Controls whether the output should attempt to follow the "transitional"
370
     * XHTML rules so that it meets the "HTML Compatibility Guidelines"
371
     * appendix in the XHTML specification.  A "transitional" Document Type
372
     * Declaration (DTD) is placed near the beginning of the output document,
373
     * instead of whatever DTD would otherwise have been placed there, and
374
     * XHTML empty elements are printed specially.  When writing text in
375
     * US-ASCII or ISO-8859-1 encodings, the predefined XHTML internal
376
     * entity names are used (in preference to character references) when
377
     * writing content characters which can't be expressed in those encodings.
378
     *
379
     * <p> When this option is enabled, it is the caller's responsibility
380
     * to ensure that the input is otherwise valid as XHTML.  Things to
381
     * be careful of in all cases, as described in the appendix referenced
382
     * above, include:  <ul>
383
     *
384
     *  <li> Element and attribute names must be in lower case, both
385
     *          in the document and in any CSS style sheet.
386
     *  <li> All XML constructs must be valid as defined by the XHTML
387
     *          "transitional" DTD (including all familiar constructs,
388
     *          even deprecated ones).
389
     *  <li> The root element must be "html".
390
     *  <li> Elements that must be empty (such as <em>&lt;br&gt;</em>
391
     *          must have no content.
392
     *  <li> Use both <em>lang</em> and <em>xml:lang</em> attributes
393
     *          when specifying language.
394
     *  <li> Similarly, use both <em>id</em> and <em>name</em> attributes
395
     *          when defining elements that may be referred to through
396
     *          URI fragment identifiers ... and make sure that the
397
     *          value is a legal NMTOKEN, since not all such HTML 4.0
398
     *          identifiers are valid in XML.
399
     *  <li> Be careful with character encodings; make sure you provide
400
     *          a <em>&lt;meta http-equiv="Content-type"
401
     *          content="text/xml;charset=..." /&gt;</em> element in
402
     *          the HTML "head" element, naming the same encoding
403
     *          used to create this handler.  Also, if that encoding
404
     *          is anything other than US-ASCII, make sure that if
405
     *          the document is given a MIME content type, it has
406
     *          a <em>charset=...</em> attribute with that encoding.
407
     *  </ul>
408
     *
409
     * <p> Additionally, some of the oldest browsers have additional
410
     * quirks, to address with guidelines such as: <ul>
411
     *
412
     *  <li> Processing instructions may be rendered, so avoid them.
413
     *          (Similarly for an XML declaration.)
414
     *  <li> Embedded style sheets and scripts should not contain XML
415
     *          markup delimiters:  &amp;, &lt;, and ]]&gt; are trouble.
416
     *  <li> Attribute values should not have line breaks or multiple
417
     *          consecutive white space characters.
418
     *  <li> Use no more than one of the deprecated (transitional)
419
     *          <em>&lt;isindex&gt;</em> elements.
420
     *  <li> Some boolean attributes (such as <em>compact, checked,
421
     *          disabled, readonly, selected,</em> and more) confuse
422
     *          some browsers, since they only understand minimized
423
     *          versions which are illegal in XML.
424
     *  </ul>
425
     *
426
     * <p> Also, some characteristics of the resulting output may be
427
     * a function of whether the document is later given a MIME
428
     * content type of <em>text/html</em> rather than one indicating
429
     * XML (<em>application/xml</em> or <em>text/xml</em>).  Worse,
430
     * some browsers ignore MIME content types and prefer to rely URI
431
     * name suffixes -- so an "index.xml" could always be XML, never
432
     * XHTML, no matter its MIME type.
433
     */
434
    final public void setXhtml (boolean value)
435
    {
436
        if (locator != null)
437
            throw new IllegalStateException ("started parsing");
438
        xhtml = value;
439
        if (xhtml)
440
            canonical = false;
441
    }
442
 
443
    /**
444
     * Returns true if the output attempts to echo the input following
445
     * "transitional" XHTML rules and matching the "HTML Compatibility
446
     * Guidelines" so that an HTML version 3 browser can read the output
447
     * as HTML; returns false (the default) othewise.
448
     */
449
    final public boolean isXhtml ()
450
    {
451
        return xhtml;
452
    }
453
 
454
    /**
455
     * Controls whether the output text contains references to
456
     * entities (the default), or instead contains the expanded
457
     * values of those entities.
458
     */
459
    final public void setExpandingEntities (boolean value)
460
    {
461
        if (locator != null)
462
            throw new IllegalStateException ("started parsing");
463
        expandingEntities = value;
464
        if (!expandingEntities)
465
            canonical = false;
466
    }
467
 
468
    /**
469
     * Returns true if the output will have no entity references;
470
     * returns false (the default) otherwise.
471
     */
472
    final public boolean isExpandingEntities ()
473
    {
474
        return expandingEntities;
475
    }
476
 
477
    /**
478
     * Controls pretty-printing, which by default is not enabled
479
     * (and currently is most useful for XHTML output).
480
     * Pretty printing enables structural indentation, sorting of attributes
481
     * by name, line wrapping, and potentially other mechanisms for making
482
     * output more or less readable.
483
     *
484
     * <p> At this writing, structural indentation and line wrapping are
485
     * enabled when pretty printing is enabled and the <em>xml:space</em>
486
     * attribute has the value <em>default</em> (its other legal value is
487
     * <em>preserve</em>, as defined in the XML specification).  The three
488
     * XHTML element types which use another value are recognized by their
489
     * names (namespaces are ignored).
490
     *
491
     * <p> Also, for the record, the "pretty" aspect of printing here
492
     * is more to provide basic structure on outputs that would otherwise
493
     * risk being a single long line of text.  For now, expect the
494
     * structure to be ragged ... unless you'd like to submit a patch
495
     * to make this be more strictly formatted!
496
     *
497
     * @exception IllegalStateException thrown if this method is invoked
498
     *  after output has begun.
499
     */
500
    final public void setPrettyPrinting (boolean value)
501
    {
502
        if (locator != null)
503
            throw new IllegalStateException ("started parsing");
504
        prettyPrinting = value;
505
        if (prettyPrinting)
506
            canonical = false;
507
    }
508
 
509
    /**
510
     * Returns value of flag controlling pretty printing.
511
     */
512
    final public boolean isPrettyPrinting ()
513
    {
514
        return prettyPrinting;
515
    }
516
 
517
 
518
    /**
519
     * Sets the output style to be canonicalized.  Input events must
520
     * meet requirements that are slightly more stringent than the
521
     * basic well-formedness ones, and include:  <ul>
522
     *
523
     *  <li> Namespace prefixes must not have been changed from those
524
     *  in the original document.  (This may only be ensured by setting
525
     *  the SAX2 XMLReader <em>namespace-prefixes</em> feature flag;
526
     *  by default, it is cleared.)
527
     *
528
     *  <li> Redundant namespace declaration attributes have been
529
     *  removed.  (If an ancestor element defines a namespace prefix
530
     *  and that declaration hasn't been overriden, an element must
531
     *  not redeclare it.)
532
     *
533
     *  <li> If comments are not to be included in the canonical output,
534
     *  they must first be removed from the input event stream; this
535
     *  <em>Canonical XML with comments</em> by default.
536
     *
537
     *  <li> If the input character encoding was not UCS-based, the
538
     *  character data must have been normalized using Unicode
539
     *  Normalization Form C.  (UTF-8 and UTF-16 are UCS-based.)
540
     *
541
     *  <li> Attribute values must have been normalized, as is done
542
     *  by any conformant XML processor which processes all external
543
     *  parameter entities.
544
     *
545
     *  <li> Similarly, attribute value defaulting has been performed.
546
     *
547
     *  </ul>
548
     *
549
     * <p> Note that fragments of XML documents, as specified by an XPath
550
     * node set, may be canonicalized.  In such cases, elements may need
551
     * some fixup (for <em>xml:*</em> attributes and application-specific
552
     * context).
553
     *
554
     * @exception IllegalArgumentException if the output encoding
555
     *  is anything other than UTF-8.
556
     */
557
    final public void setCanonical (boolean value)
558
    {
559
        if (value && !"UTF-8".equals (encoding))
560
            throw new IllegalArgumentException ("encoding != UTF-8");
561
        canonical = value;
562
        if (canonical) {
563
            prettyPrinting = xhtml = false;
564
            expandingEntities = true;
565
            eol = "\n";
566
        }
567
    }
568
 
569
 
570
    /**
571
     * Returns value of flag controlling canonical output.
572
     */
573
    final public boolean isCanonical ()
574
    {
575
        return canonical;
576
    }
577
 
578
 
579
    /**
580
     * Flushes the output stream.  When this handler is used in long lived
581
     * pipelines, it can be important to flush buffered state, for example
582
     * so that it can reach the disk as part of a state checkpoint.
583
     */
584
    final public void flush ()
585
    throws IOException
586
    {
587
        if (out != null)
588
            out.flush ();
589
    }
590
 
591
 
592
    // convenience routines
593
 
594
// FIXME:  probably want a subclass that holds a lot of these...
595
// and maybe more!
596
 
597
    /**
598
     * Writes the string as if characters() had been called on the contents
599
     * of the string.  This is particularly useful when applications act as
600
     * producers and write data directly to event consumers.
601
     */
602
    final public void write (String data)
603
    throws SAXException
604
    {
605
        char    buf [] = data.toCharArray ();
606
        characters (buf, 0, buf.length);
607
    }
608
 
609
 
610
    /**
611
     * Writes an element that has content consisting of a single string.
612
     * @see #writeEmptyElement
613
     * @see #startElement
614
     */
615
    public void writeElement (
616
        String uri,
617
        String localName,
618
        String qName,
619
        Attributes atts,
620
        String content
621
    ) throws SAXException
622
    {
623
        if (content == null || content.length () == 0) {
624
            writeEmptyElement (uri, localName, qName, atts);
625
            return;
626
        }
627
        startElement (uri, localName, qName, atts);
628
        char chars [] = content.toCharArray ();
629
        characters (chars, 0, chars.length);
630
        endElement (uri, localName, qName);
631
    }
632
 
633
 
634
    /**
635
     * Writes an element that has content consisting of a single integer,
636
     * encoded as a decimal string.
637
     * @see #writeEmptyElement
638
     * @see #startElement
639
     */
640
    public void writeElement (
641
        String uri,
642
        String localName,
643
        String qName,
644
        Attributes atts,
645
        int content
646
    ) throws SAXException
647
    {
648
        writeElement (uri, localName, qName, atts, Integer.toString (content));
649
    }
650
 
651
 
652
    // SAX1 ContentHandler
653
    /** <b>SAX1</b>:  provides parser status information */
654
    final public void setDocumentLocator (Locator l)
655
    {
656
        locator = l;
657
    }
658
 
659
 
660
    // URL for dtd that validates against all normal HTML constructs
661
    private static final String xhtmlFullDTD =
662
    "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd";
663
 
664
 
665
    /**
666
     * <b>SAX1</b>:  indicates the beginning of a document parse.
667
     * If you're writing (well formed) fragments of XML, neither
668
     * this nor endDocument should be called.
669
     */
670
    // NOT final
671
    public void startDocument ()
672
    throws SAXException
673
    {
674
        try {
675
            if (out == null)
676
                throw new IllegalStateException (
677
                    "null Writer given to XMLWriter");
678
 
679
            // Not all parsers provide the locator we want; this also
680
            // flags whether events are being sent to this object yet.
681
            // We could only have this one call if we only printed whole
682
            // documents ... but we also print fragments, so most of the
683
            // callbacks here replicate this test.
684
 
685
            if (locator == null)
686
                locator = new LocatorImpl ();
687
 
688
            // Unless the data is in US-ASCII or we're canonicalizing, write
689
            // the XML declaration if we know the encoding.  US-ASCII won't
690
            // normally get mangled by web server confusion about the
691
            // character encodings used.  Plus, it's an easy way to
692
            // ensure we can write ASCII that's unlikely to confuse
693
            // elderly HTML parsers.
694
 
695
            if (!canonical
696
                    && dangerMask != (short) 0xff80
697
                    && encoding != null) {
698
                rawWrite ("<?xml version='1.0'");
699
                rawWrite (" encoding='" + encoding + "'");
700
                rawWrite ("?>");
701
                newline ();
702
            }
703
 
704
            if (xhtml) {
705
 
706
                rawWrite ("<!DOCTYPE html PUBLIC");
707
                newline ();
708
                rawWrite ("  '-//W3C//DTD XHTML 1.0 Transitional//EN'");
709
                newline ();
710
                rawWrite ("  '");
711
                    // NOTE:  URL (above) matches the REC
712
                rawWrite (xhtmlFullDTD);
713
                rawWrite ("'>");
714
                newline ();
715
                newline ();
716
 
717
                // fake the rest of the handler into ignoring
718
                // everything until the root element, so any
719
                // XHTML DTD comments, PIs, etc are ignored
720
                startedDoctype = true;
721
            }
722
 
723
            entityNestLevel = 0;
724
 
725
        } catch (IOException e) {
726
            fatal ("can't write", e);
727
        }
728
    }
729
 
730
    /**
731
     * <b>SAX1</b>:  indicates the completion of a parse.
732
     * Note that all complete SAX event streams make this call, even
733
     * if an error is reported during a parse.
734
     */
735
    // NOT final
736
    public void endDocument ()
737
    throws SAXException
738
    {
739
        try {
740
            if (!canonical) {
741
                newline ();
742
                newline ();
743
            }
744
            out.close ();
745
            out = null;
746
            locator = null;
747
        } catch (IOException e) {
748
            fatal ("can't write", e);
749
        }
750
    }
751
 
752
    // XHTML elements declared as EMPTY print differently
753
    final private static boolean isEmptyElementTag (String tag)
754
    {
755
        switch (tag.charAt (0)) {
756
          case 'a':     return "area".equals (tag);
757
          case 'b':     return "base".equals (tag)
758
                            || "basefont".equals (tag)
759
                            || "br".equals (tag);
760
          case 'c':     return "col".equals (tag);
761
          case 'f':     return "frame".equals (tag);
762
          case 'h':     return "hr".equals (tag);
763
          case 'i':     return "img".equals (tag)
764
                            || "input".equals (tag)
765
                            || "isindex".equals (tag);
766
          case 'l':     return "link".equals (tag);
767
          case 'm':     return "meta".equals (tag);
768
          case 'p':     return "param".equals (tag);
769
        }
770
        return false;
771
    }
772
 
773
    private static boolean indentBefore (String tag)
774
    {
775
        // basically indent before block content
776
        // and within structure like tables, lists
777
        switch (tag.charAt (0)) {
778
          case 'a':     return "applet".equals (tag);
779
          case 'b':     return "body".equals (tag)
780
                            || "blockquote".equals (tag);
781
          case 'c':     return "center".equals (tag);
782
          case 'f':     return "frame".equals (tag)
783
                            || "frameset".equals (tag);
784
          case 'h':     return "head".equals (tag);
785
          case 'm':     return "meta".equals (tag);
786
          case 'o':     return "object".equals (tag);
787
          case 'p':     return "param".equals (tag)
788
                            || "pre".equals (tag);
789
          case 's':     return "style".equals (tag);
790
          case 't':     return "title".equals (tag)
791
                            || "td".equals (tag)
792
                            || "th".equals (tag);
793
        }
794
        // ... but not inline elements like "em", "b", "font"
795
        return false;
796
    }
797
 
798
    private static boolean spaceBefore (String tag)
799
    {
800
        // blank line AND INDENT before certain structural content
801
        switch (tag.charAt (0)) {
802
          case 'h':     return "h1".equals (tag)
803
                            || "h2".equals (tag)
804
                            || "h3".equals (tag)
805
                            || "h4".equals (tag)
806
                            || "h5".equals (tag)
807
                            || "h6".equals (tag)
808
                            || "hr".equals (tag);
809
          case 'l':     return "li".equals (tag);
810
          case 'o':     return "ol".equals (tag);
811
          case 'p':     return "p".equals (tag);
812
          case 't':     return "table".equals (tag)
813
                            || "tr".equals (tag);
814
          case 'u':     return "ul".equals (tag);
815
        }
816
        return false;
817
    }
818
 
819
    // XHTML DTDs say these three have xml:space="preserve"
820
    private static boolean spacePreserve (String tag)
821
    {
822
        return "pre".equals (tag)
823
                || "style".equals (tag)
824
                || "script".equals (tag);
825
    }
826
 
827
    /**
828
     * <b>SAX2</b>:  ignored.
829
     */
830
    final public void startPrefixMapping (String prefix, String uri)
831
        {}
832
 
833
    /**
834
     * <b>SAX2</b>:  ignored.
835
     */
836
    final public void endPrefixMapping (String prefix)
837
        {}
838
 
839
    private void writeStartTag (
840
        String name,
841
        Attributes atts,
842
        boolean isEmpty
843
    ) throws SAXException, IOException
844
    {
845
        rawWrite ('<');
846
        rawWrite (name);
847
 
848
        // write out attributes ... sorting is particularly useful
849
        // with output that's been heavily defaulted.
850
        if (atts != null && atts.getLength () != 0) {
851
 
852
            // Set up to write, with optional sorting
853
            int         indices [] = new int [atts.getLength ()];
854
 
855
            for (int i= 0; i < indices.length; i++)
856
                indices [i] = i;
857
 
858
            // optionally sort
859
 
860
// FIXME:  canon xml demands xmlns nodes go first,
861
// and sorting by URI first (empty first) then localname
862
// it should maybe use a different sort
863
 
864
            if (canonical || prettyPrinting) {
865
 
866
                // insertion sort by attribute name
867
                for (int i = 1; i < indices.length; i++) {
868
                    int n = indices [i], j;
869
                    String      s = atts.getQName (n);
870
 
871
                    for (j = i - 1; j >= 0; j--) {
872
                        if (s.compareTo (atts.getQName (indices [j]))
873
                                >= 0)
874
                            break;
875
                        indices [j + 1] = indices [j];
876
                    }
877
                    indices [j + 1] = n;
878
                }
879
            }
880
 
881
            // write, sorted or no
882
            for (int i= 0; i < indices.length; i++) {
883
                String  s = atts.getQName (indices [i]);
884
 
885
                    if (s == null || "".equals (s))
886
                        throw new IllegalArgumentException ("no XML name");
887
                rawWrite (" ");
888
                rawWrite (s);
889
                rawWrite ("=");
890
                writeQuotedValue (atts.getValue (indices [i]),
891
                    CTX_ATTRIBUTE);
892
            }
893
        }
894
        if (isEmpty)
895
            rawWrite (" /");
896
        rawWrite ('>');
897
    }
898
 
899
    /**
900
     * <b>SAX2</b>:  indicates the start of an element.
901
     * When XHTML is in use, avoid attribute values with
902
     * line breaks or multiple whitespace characters, since
903
     * not all user agents handle them correctly.
904
     */
905
    final public void startElement (
906
        String uri,
907
        String localName,
908
        String qName,
909
        Attributes atts
910
    ) throws SAXException
911
    {
912
        startedDoctype = false;
913
 
914
        if (locator == null)
915
            locator = new LocatorImpl ();
916
 
917
        if (qName == null || "".equals (qName))
918
            throw new IllegalArgumentException ("no XML name");
919
 
920
        try {
921
            if (entityNestLevel != 0)
922
                return;
923
            if (prettyPrinting) {
924
                String whitespace = null;
925
 
926
                if (xhtml && spacePreserve (qName))
927
                    whitespace = "preserve";
928
                else if (atts != null)
929
                    whitespace = atts.getValue ("xml:space");
930
                if (whitespace == null)
931
                    whitespace = (String) space.peek ();
932
                space.push (whitespace);
933
 
934
                if ("default".equals (whitespace)) {
935
                    if (xhtml) {
936
                        if (spaceBefore (qName)) {
937
                            newline ();
938
                            doIndent ();
939
                        } else if (indentBefore (qName))
940
                            doIndent ();
941
                        // else it's inlined, modulo line length
942
                        // FIXME: incrementing element nest level
943
                        // for inlined elements causes ugliness
944
                    } else
945
                        doIndent ();
946
                }
947
            }
948
            elementNestLevel++;
949
            writeStartTag (qName, atts, xhtml && isEmptyElementTag (qName));
950
 
951
            if (xhtml) {
952
// FIXME: if this is an XHTML "pre" element, turn
953
// off automatic wrapping.
954
            }
955
 
956
        } catch (IOException e) {
957
            fatal ("can't write", e);
958
        }
959
    }
960
 
961
    /**
962
     * Writes an empty element.
963
     * @see #startElement
964
     */
965
    public void writeEmptyElement (
966
        String uri,
967
        String localName,
968
        String qName,
969
        Attributes atts
970
    ) throws SAXException
971
    {
972
        if (canonical) {
973
            startElement (uri, localName, qName, atts);
974
            endElement (uri, localName, qName);
975
        } else {
976
            try {
977
                writeStartTag (qName, atts, true);
978
            } catch (IOException e) {
979
                fatal ("can't write", e);
980
            }
981
        }
982
    }
983
 
984
 
985
    /** <b>SAX2</b>:  indicates the end of an element */
986
    final public void endElement (String uri, String localName, String qName)
987
    throws SAXException
988
    {
989
        if (qName == null || "".equals (qName))
990
            throw new IllegalArgumentException ("no XML name");
991
 
992
        try {
993
            elementNestLevel--;
994
            if (entityNestLevel != 0)
995
                return;
996
            if (xhtml && isEmptyElementTag (qName))
997
                return;
998
            rawWrite ("</");
999
            rawWrite (qName);
1000
            rawWrite ('>');
1001
 
1002
            if (prettyPrinting) {
1003
                if (!space.empty ())
1004
                    space.pop ();
1005
                else
1006
                    fatal ("stack discipline", null);
1007
            }
1008
            if (elementNestLevel == 0)
1009
                inEpilogue = true;
1010
 
1011
        } catch (IOException e) {
1012
            fatal ("can't write", e);
1013
        }
1014
    }
1015
 
1016
    /** <b>SAX1</b>:  reports content characters */
1017
    final public void characters (char ch [], int start, int length)
1018
    throws SAXException
1019
    {
1020
        if (locator == null)
1021
            locator = new LocatorImpl ();
1022
 
1023
        try {
1024
            if (entityNestLevel != 0)
1025
                return;
1026
            if (inCDATA) {
1027
                escapeChars (ch, start, length, CTX_UNPARSED);
1028
            } else {
1029
                escapeChars (ch, start, length, CTX_CONTENT);
1030
            }
1031
        } catch (IOException e) {
1032
            fatal ("can't write", e);
1033
        }
1034
    }
1035
 
1036
    /** <b>SAX1</b>:  reports ignorable whitespace */
1037
    final public void ignorableWhitespace (char ch [], int start, int length)
1038
    throws SAXException
1039
    {
1040
        if (locator == null)
1041
            locator = new LocatorImpl ();
1042
 
1043
        try {
1044
            if (entityNestLevel != 0)
1045
                return;
1046
            // don't forget to map NL to CRLF, CR, etc
1047
            escapeChars (ch, start, length, CTX_CONTENT);
1048
        } catch (IOException e) {
1049
            fatal ("can't write", e);
1050
        }
1051
    }
1052
 
1053
    /**
1054
     * <b>SAX1</b>:  reports a PI.
1055
     * This doesn't check for illegal target names, such as "xml" or "XML",
1056
     * or namespace-incompatible ones like "big:dog"; the caller is
1057
     * responsible for ensuring those names are legal.
1058
     */
1059
    final public void processingInstruction (String target, String data)
1060
    throws SAXException
1061
    {
1062
        if (locator == null)
1063
            locator = new LocatorImpl ();
1064
 
1065
        // don't print internal subset for XHTML
1066
        if (xhtml && startedDoctype)
1067
            return;
1068
 
1069
        // ancient HTML browsers might render these ... their loss.
1070
        // to prevent:  "if (xhtml) return;".
1071
 
1072
        try {
1073
            if (entityNestLevel != 0)
1074
                return;
1075
            if (canonical && inEpilogue)
1076
                newline ();
1077
            rawWrite ("<?");
1078
            rawWrite (target);
1079
            rawWrite (' ');
1080
            escapeChars (data.toCharArray (), -1, -1, CTX_UNPARSED);
1081
            rawWrite ("?>");
1082
            if (elementNestLevel == 0 && !(canonical && inEpilogue))
1083
                newline ();
1084
        } catch (IOException e) {
1085
            fatal ("can't write", e);
1086
        }
1087
    }
1088
 
1089
    /** <b>SAX1</b>: indicates a non-expanded entity reference */
1090
    public void skippedEntity (String name)
1091
    throws SAXException
1092
    {
1093
        try {
1094
            rawWrite ("&");
1095
            rawWrite (name);
1096
            rawWrite (";");
1097
        } catch (IOException e) {
1098
            fatal ("can't write", e);
1099
        }
1100
    }
1101
 
1102
    // SAX2 LexicalHandler
1103
 
1104
    /** <b>SAX2</b>:  called before parsing CDATA characters */
1105
    final public void startCDATA ()
1106
    throws SAXException
1107
    {
1108
        if (locator == null)
1109
            locator = new LocatorImpl ();
1110
 
1111
        if (canonical)
1112
            return;
1113
 
1114
        try {
1115
            inCDATA = true;
1116
            if (entityNestLevel == 0)
1117
                rawWrite ("<![CDATA[");
1118
        } catch (IOException e) {
1119
            fatal ("can't write", e);
1120
        }
1121
    }
1122
 
1123
    /** <b>SAX2</b>:  called after parsing CDATA characters */
1124
    final public void endCDATA ()
1125
    throws SAXException
1126
    {
1127
        if (canonical)
1128
            return;
1129
 
1130
        try {
1131
            inCDATA = false;
1132
            if (entityNestLevel == 0)
1133
                rawWrite ("]]>");
1134
        } catch (IOException e) {
1135
            fatal ("can't write", e);
1136
        }
1137
    }
1138
 
1139
    /**
1140
     * <b>SAX2</b>:  called when the doctype is partially parsed
1141
     * Note that this, like other doctype related calls, is ignored
1142
     * when XHTML is in use.
1143
     */
1144
    final public void startDTD (String name, String publicId, String systemId)
1145
    throws SAXException
1146
    {
1147
        if (locator == null)
1148
            locator = new LocatorImpl ();
1149
        if (xhtml)
1150
            return;
1151
        try {
1152
            inDoctype = startedDoctype = true;
1153
            if (canonical)
1154
                return;
1155
            rawWrite ("<!DOCTYPE ");
1156
            rawWrite (name);
1157
            rawWrite (' ');
1158
 
1159
            if (!expandingEntities) {
1160
                if (publicId != null)
1161
                    rawWrite ("PUBLIC '" + publicId + "' '" + systemId + "' ");
1162
                else if (systemId != null)
1163
                    rawWrite ("SYSTEM '" + systemId + "' ");
1164
            }
1165
 
1166
            rawWrite ('[');
1167
            newline ();
1168
        } catch (IOException e) {
1169
            fatal ("can't write", e);
1170
        }
1171
    }
1172
 
1173
    /** <b>SAX2</b>:  called after the doctype is parsed */
1174
    final public void endDTD ()
1175
    throws SAXException
1176
    {
1177
        inDoctype = false;
1178
        if (canonical || xhtml)
1179
            return;
1180
        try {
1181
            rawWrite ("]>");
1182
            newline ();
1183
        } catch (IOException e) {
1184
            fatal ("can't write", e);
1185
        }
1186
    }
1187
 
1188
    /**
1189
     * <b>SAX2</b>:  called before parsing a general entity in content
1190
     */
1191
    final public void startEntity (String name)
1192
    throws SAXException
1193
    {
1194
        try {
1195
            boolean     writeEOL = true;
1196
 
1197
            // Predefined XHTML entities (for characters) will get
1198
            // mapped back later.
1199
            if (xhtml || expandingEntities)
1200
                return;
1201
 
1202
            entityNestLevel++;
1203
            if (name.equals ("[dtd]"))
1204
                return;
1205
            if (entityNestLevel != 1)
1206
                return;
1207
            if (!name.startsWith ("%")) {
1208
                writeEOL = false;
1209
                rawWrite ('&');
1210
            }
1211
            rawWrite (name);
1212
            rawWrite (';');
1213
            if (writeEOL)
1214
                newline ();
1215
        } catch (IOException e) {
1216
            fatal ("can't write", e);
1217
        }
1218
    }
1219
 
1220
    /**
1221
     * <b>SAX2</b>:  called after parsing a general entity in content
1222
     */
1223
    final public void endEntity (String name)
1224
    throws SAXException
1225
    {
1226
        if (xhtml || expandingEntities)
1227
            return;
1228
        entityNestLevel--;
1229
    }
1230
 
1231
    /**
1232
     * <b>SAX2</b>:  called when comments are parsed.
1233
     * When XHTML is used, the old HTML tradition of using comments
1234
     * to for inline CSS, or for JavaScript code is  discouraged.
1235
     * This is because XML processors are encouraged to discard, on
1236
     * the grounds that comments are for users (and perhaps text
1237
     * editors) not programs.  Instead, use external scripts
1238
     */
1239
    final public void comment (char ch [], int start, int length)
1240
    throws SAXException
1241
    {
1242
        if (locator == null)
1243
            locator = new LocatorImpl ();
1244
 
1245
        // don't print internal subset for XHTML
1246
        if (xhtml && startedDoctype)
1247
            return;
1248
        // don't print comment in doctype for canon xml
1249
        if (canonical && inDoctype)
1250
            return;
1251
 
1252
        try {
1253
            boolean indent;
1254
 
1255
            if (prettyPrinting && space.empty ())
1256
                fatal ("stack discipline", null);
1257
            indent = prettyPrinting && "default".equals (space.peek ());
1258
            if (entityNestLevel != 0)
1259
                return;
1260
            if (indent)
1261
                doIndent ();
1262
            if (canonical && inEpilogue)
1263
                newline ();
1264
            rawWrite ("<!--");
1265
            escapeChars (ch, start, length, CTX_UNPARSED);
1266
            rawWrite ("-->");
1267
            if (indent)
1268
                doIndent ();
1269
            if (elementNestLevel == 0 && !(canonical && inEpilogue))
1270
                newline ();
1271
        } catch (IOException e) {
1272
            fatal ("can't write", e);
1273
        }
1274
    }
1275
 
1276
    // SAX1 DTDHandler
1277
 
1278
    /** <b>SAX1</b>:  called on notation declarations */
1279
    final public void notationDecl (String name,
1280
        String publicId, String systemId)
1281
    throws SAXException
1282
    {
1283
        if (xhtml)
1284
            return;
1285
        try {
1286
            // At this time, only SAX2 callbacks start these.
1287
            if (!startedDoctype)
1288
                return;
1289
 
1290
            if (entityNestLevel != 0)
1291
                return;
1292
            rawWrite ("<!NOTATION " + name + " ");
1293
            if (publicId != null)
1294
                rawWrite ("PUBLIC \"" + publicId + '"');
1295
            else
1296
                rawWrite ("SYSTEM ");
1297
            if (systemId != null)
1298
                rawWrite ('"' + systemId + '"');
1299
            rawWrite (">");
1300
            newline ();
1301
        } catch (IOException e) {
1302
            fatal ("can't write", e);
1303
        }
1304
    }
1305
 
1306
    /** <b>SAX1</b>:  called on unparsed entity declarations */
1307
    final public void unparsedEntityDecl (String name,
1308
        String publicId, String systemId,
1309
        String notationName)
1310
    throws SAXException
1311
    {
1312
        if (xhtml)
1313
            return;
1314
        try {
1315
            // At this time, only SAX2 callbacks start these.
1316
            if (!startedDoctype)  {
1317
                // FIXME: write to temporary buffer, and make the start
1318
                // of the root element write these declarations.
1319
                return;
1320
            }
1321
 
1322
            if (entityNestLevel != 0)
1323
                return;
1324
            rawWrite ("<!ENTITY " + name + " ");
1325
            if (publicId != null)
1326
                rawWrite ("PUBLIC \"" + publicId + '"');
1327
            else
1328
                rawWrite ("SYSTEM ");
1329
            rawWrite ('"' + systemId + '"');
1330
            rawWrite (" NDATA " + notationName + ">");
1331
            newline ();
1332
        } catch (IOException e) {
1333
            fatal ("can't write", e);
1334
        }
1335
    }
1336
 
1337
    // SAX2 DeclHandler
1338
 
1339
    /** <b>SAX2</b>:  called on attribute declarations */
1340
    final public void attributeDecl (String eName, String aName,
1341
            String type, String mode, String value)
1342
    throws SAXException
1343
    {
1344
        if (xhtml)
1345
            return;
1346
        try {
1347
            // At this time, only SAX2 callbacks start these.
1348
            if (!startedDoctype)
1349
                return;
1350
            if (entityNestLevel != 0)
1351
                return;
1352
            rawWrite ("<!ATTLIST " + eName + ' ' + aName + ' ');
1353
            rawWrite (type);
1354
            rawWrite (' ');
1355
            if (mode != null)
1356
                rawWrite (mode + ' ');
1357
            if (value != null)
1358
                writeQuotedValue (value, CTX_ATTRIBUTE);
1359
            rawWrite ('>');
1360
            newline ();
1361
        } catch (IOException e) {
1362
            fatal ("can't write", e);
1363
        }
1364
    }
1365
 
1366
    /** <b>SAX2</b>:  called on element declarations */
1367
    final public void elementDecl (String name, String model)
1368
    throws SAXException
1369
    {
1370
        if (xhtml)
1371
            return;
1372
        try {
1373
            // At this time, only SAX2 callbacks start these.
1374
            if (!startedDoctype)
1375
                return;
1376
            if (entityNestLevel != 0)
1377
                return;
1378
            rawWrite ("<!ELEMENT " + name + ' ' + model + '>');
1379
            newline ();
1380
        } catch (IOException e) {
1381
            fatal ("can't write", e);
1382
        }
1383
    }
1384
 
1385
    /** <b>SAX2</b>:  called on external entity declarations */
1386
    final public void externalEntityDecl (
1387
        String name,
1388
        String publicId,
1389
        String systemId)
1390
    throws SAXException
1391
    {
1392
        if (xhtml)
1393
            return;
1394
        try {
1395
            // At this time, only SAX2 callbacks start these.
1396
            if (!startedDoctype)
1397
                return;
1398
            if (entityNestLevel != 0)
1399
                return;
1400
            rawWrite ("<!ENTITY ");
1401
            if (name.startsWith ("%")) {
1402
                rawWrite ("% ");
1403
                rawWrite (name.substring (1));
1404
            } else
1405
                rawWrite (name);
1406
            if (publicId != null)
1407
                rawWrite (" PUBLIC \"" + publicId + '"');
1408
            else
1409
                rawWrite (" SYSTEM ");
1410
            rawWrite ('"' + systemId + "\">");
1411
            newline ();
1412
        } catch (IOException e) {
1413
            fatal ("can't write", e);
1414
        }
1415
    }
1416
 
1417
    /** <b>SAX2</b>:  called on internal entity declarations */
1418
    final public void internalEntityDecl (String name, String value)
1419
    throws SAXException
1420
    {
1421
        if (xhtml)
1422
            return;
1423
        try {
1424
            // At this time, only SAX2 callbacks start these.
1425
            if (!startedDoctype)
1426
                return;
1427
            if (entityNestLevel != 0)
1428
                return;
1429
            rawWrite ("<!ENTITY ");
1430
            if (name.startsWith ("%")) {
1431
                rawWrite ("% ");
1432
                rawWrite (name.substring (1));
1433
            } else
1434
                rawWrite (name);
1435
            rawWrite (' ');
1436
            writeQuotedValue (value, CTX_ENTITY);
1437
            rawWrite ('>');
1438
            newline ();
1439
        } catch (IOException e) {
1440
            fatal ("can't write", e);
1441
        }
1442
    }
1443
 
1444
    private void writeQuotedValue (String value, int code)
1445
    throws SAXException, IOException
1446
    {
1447
        char    buf [] = value.toCharArray ();
1448
        int     off = 0, len = buf.length;
1449
 
1450
        // we can't add line breaks to attribute/entity/... values
1451
        noWrap = true;
1452
        rawWrite ('"');
1453
        escapeChars (buf, off, len, code);
1454
        rawWrite ('"');
1455
        noWrap = false;
1456
    }
1457
 
1458
    // From "HTMLlat1x.ent" ... names of entities for ISO-8859-1
1459
    // (Latin/1) characters, all codes:  160-255 (0xA0-0xFF).
1460
    // Codes 128-159 have no assigned values.
1461
    private static final String HTMLlat1x [] = {
1462
        // 160
1463
        "nbsp", "iexcl", "cent", "pound", "curren",
1464
        "yen", "brvbar", "sect", "uml", "copy",
1465
 
1466
        // 170
1467
        "ordf", "laquo", "not", "shy", "reg",
1468
        "macr", "deg", "plusmn", "sup2", "sup3",
1469
 
1470
        // 180
1471
        "acute", "micro", "para", "middot", "cedil",
1472
        "sup1", "ordm", "raquo", "frac14", "frac12",
1473
 
1474
        // 190
1475
        "frac34", "iquest", "Agrave", "Aacute", "Acirc",
1476
        "Atilde", "Auml", "Aring", "AElig", "Ccedil",
1477
 
1478
        // 200
1479
        "Egrave", "Eacute", "Ecirc", "Euml", "Igrave",
1480
        "Iacute", "Icirc", "Iuml", "ETH", "Ntilde",
1481
 
1482
        // 210
1483
        "Ograve", "Oacute", "Ocirc", "Otilde", "Ouml",
1484
        "times", "Oslash", "Ugrave", "Uacute", "Ucirc",
1485
 
1486
        // 220
1487
        "Uuml", "Yacute", "THORN", "szlig", "agrave",
1488
        "aacute", "acirc", "atilde", "auml", "aring",
1489
 
1490
        // 230
1491
        "aelig", "ccedil", "egrave", "eacute", "ecirc",
1492
        "euml", "igrave", "iacute", "icirc", "iuml",
1493
 
1494
        // 240
1495
        "eth", "ntilde", "ograve", "oacute", "ocirc",
1496
        "otilde", "ouml", "divide", "oslash", "ugrave",
1497
 
1498
        // 250
1499
        "uacute", "ucirc", "uuml", "yacute", "thorn",
1500
        "yuml"
1501
    };
1502
 
1503
    // From "HTMLsymbolx.ent" ... some of the symbols that
1504
    // we can conveniently handle.  Entities for the Greek.
1505
    // alphabet (upper and lower cases) are compact.
1506
    private static final String HTMLsymbolx_GR [] = {
1507
        // 913
1508
        "Alpha", "Beta", "Gamma", "Delta", "Epsilon",
1509
        "Zeta", "Eta", "Theta", "Iota", "Kappa",
1510
 
1511
        // 923
1512
        "Lambda", "Mu", "Nu", "Xi", "Omicron",
1513
        "Pi", "Rho", null, "Sigma", "Tau",
1514
 
1515
        // 933
1516
        "Upsilon", "Phi", "Chi", "Psi", "Omega"
1517
    };
1518
 
1519
    private static final String HTMLsymbolx_gr [] = {
1520
        // 945
1521
        "alpha", "beta", "gamma", "delta", "epsilon",
1522
        "zeta", "eta", "theta", "iota", "kappa",
1523
 
1524
        // 955
1525
        "lambda", "mu", "nu", "xi", "omicron",
1526
        "pi", "rho", "sigmaf", "sigma", "tau",
1527
 
1528
        // 965
1529
        "upsilon", "phi", "chi", "psi", "omega"
1530
    };
1531
 
1532
 
1533
    // General routine to write text and substitute predefined
1534
    // entities (XML, and a special case for XHTML) as needed.
1535
    private void escapeChars (char buf [], int off, int len, int code)
1536
    throws SAXException, IOException
1537
    {
1538
        int     first = 0;
1539
 
1540
        if (off < 0) {
1541
            off = 0;
1542
            len = buf.length;
1543
        }
1544
        for (int i = 0; i < len; i++) {
1545
            String      esc;
1546
            char        c = buf [off + i];
1547
 
1548
            switch (c) {
1549
              // Note that CTX_ATTRIBUTE isn't explicitly tested here;
1550
              // all syntax delimiters are escaped in CTX_ATTRIBUTE,
1551
              // otherwise it's similar to CTX_CONTENT
1552
 
1553
              // ampersand flags entity references; entity replacement
1554
              // text has unexpanded references, other text doesn't.
1555
              case '&':
1556
                if (code == CTX_ENTITY || code == CTX_UNPARSED)
1557
                    continue;
1558
                esc = "amp";
1559
                break;
1560
 
1561
              // attributes and text may NOT have literal '<', but
1562
              // entities may have markup constructs
1563
              case '<':
1564
                if (code == CTX_ENTITY || code == CTX_UNPARSED)
1565
                    continue;
1566
                esc = "lt";
1567
                break;
1568
 
1569
              // as above re markup constructs; but otherwise
1570
              // except when canonicalizing, this is for consistency
1571
              case '>':
1572
                if (code == CTX_ENTITY || code == CTX_UNPARSED)
1573
                    continue;
1574
                esc = "gt";
1575
                break;
1576
              case '\'':
1577
                if (code == CTX_CONTENT || code == CTX_UNPARSED)
1578
                    continue;
1579
                if (canonical)
1580
                    continue;
1581
                esc = "apos";
1582
                break;
1583
 
1584
              // needed when printing quoted attribute/entity values
1585
              case '"':
1586
                if (code == CTX_CONTENT || code == CTX_UNPARSED)
1587
                    continue;
1588
                esc = "quot";
1589
                break;
1590
 
1591
              // make line ends work per host OS convention
1592
              case '\n':
1593
                esc = eol;
1594
                break;
1595
 
1596
              //
1597
              // No other characters NEED special treatment ... except
1598
              // for encoding-specific issues, like whether the character
1599
              // can really be represented in that encoding.
1600
              //
1601
              default:
1602
                //
1603
                // There are characters we can never write safely; getting
1604
                // them is an error.
1605
                //
1606
                //   (a) They're never legal in XML ... detected by range 
1607
                //      checks, and (eventually) by remerging surrogate
1608
                //      pairs on output.  (Easy error for apps to prevent.)
1609
                //
1610
                //   (b) This encoding can't represent them, and we
1611
                //      can't make reference substitution (e.g. inside
1612
                //      CDATA sections, names, PI data, etc).  (Hard for
1613
                //      apps to prevent, except by using UTF-8 or UTF-16
1614
                //      as their output encoding.)
1615
                //
1616
                // We know a very little bit about what characters
1617
                // the US-ASCII and ISO-8859-1 encodings support.  For
1618
                // other encodings we can't detect the second type of
1619
                // error at all.  (Never an issue for UTF-8 or UTF-16.)
1620
                //
1621
 
1622
// FIXME:  CR in CDATA is an error; in text, turn to a char ref
1623
 
1624
// FIXME:  CR/LF/TAB in attributes should become char refs
1625
 
1626
                if ((c > 0xfffd)
1627
                        || ((c < 0x0020) && !((c == 0x0009)
1628
                                || (c == 0x000A) || (c == 0x000D)))
1629
                        || (((c & dangerMask) != 0)
1630
                            && (code == CTX_UNPARSED))) {
1631
 
1632
                    // if case (b) in CDATA, we might end the section,
1633
                    // write a reference, then restart ... possible
1634
                    // in one DOM L3 draft.
1635
 
1636
                    throw new CharConversionException (
1637
                            "Illegal or non-writable character: U+"
1638
                            + Integer.toHexString (c));
1639
                }
1640
 
1641
                //
1642
                // If the output encoding represents the character
1643
                // directly, let it do so!  Else we'll escape it.
1644
                //
1645
                if ((c & dangerMask) == 0)
1646
                    continue;
1647
                esc = null;
1648
 
1649
                // Avoid numeric refs where symbolic ones exist, as
1650
                // symbolic ones make more sense to humans reading!
1651
                if (xhtml) {
1652
                    // all the HTMLlat1x.ent entities
1653
                    // (all the "ISO-8859-1" characters)
1654
                    if (c >= 160 && c <= 255)
1655
                        esc = HTMLlat1x [c - 160];
1656
 
1657
                    // not quite half the HTMLsymbolx.ent entities
1658
                    else if (c >= 913 && c <= 937)
1659
                        esc = HTMLsymbolx_GR [c - 913];
1660
                    else if (c >= 945 && c <= 969)
1661
                        esc = HTMLsymbolx_gr [c - 945];
1662
 
1663
                    else switch (c) {
1664
                        // all of the HTMLspecialx.ent entities
1665
                        case  338: esc = "OElig";       break;
1666
                        case  339: esc = "oelig";       break;
1667
                        case  352: esc = "Scaron";      break;
1668
                        case  353: esc = "scaron";      break;
1669
                        case  376: esc = "Yuml";        break;
1670
                        case  710: esc = "circ";        break;
1671
                        case  732: esc = "tilde";       break;
1672
                        case 8194: esc = "ensp";        break;
1673
                        case 8195: esc = "emsp";        break;
1674
                        case 8201: esc = "thinsp";      break;
1675
                        case 8204: esc = "zwnj";        break;
1676
                        case 8205: esc = "zwj";         break;
1677
                        case 8206: esc = "lrm";         break;
1678
                        case 8207: esc = "rlm";         break;
1679
                        case 8211: esc = "ndash";       break;
1680
                        case 8212: esc = "mdash";       break;
1681
                        case 8216: esc = "lsquo";       break;
1682
                        case 8217: esc = "rsquo";       break;
1683
                        case 8218: esc = "sbquo";       break;
1684
                        case 8220: esc = "ldquo";       break;
1685
                        case 8221: esc = "rdquo";       break;
1686
                        case 8222: esc = "bdquo";       break;
1687
                        case 8224: esc = "dagger";      break;
1688
                        case 8225: esc = "Dagger";      break;
1689
                        case 8240: esc = "permil";      break;
1690
                        case 8249: esc = "lsaquo";      break;
1691
                        case 8250: esc = "rsaquo";      break;
1692
                        case 8364: esc = "euro";        break;
1693
 
1694
                        // the other HTMLsymbox.ent entities
1695
                        case  402: esc = "fnof";        break;
1696
                        case  977: esc = "thetasym";    break;
1697
                        case  978: esc = "upsih";       break;
1698
                        case  982: esc = "piv";         break;
1699
                        case 8226: esc = "bull";        break;
1700
                        case 8230: esc = "hellip";      break;
1701
                        case 8242: esc = "prime";       break;
1702
                        case 8243: esc = "Prime";       break;
1703
                        case 8254: esc = "oline";       break;
1704
                        case 8260: esc = "frasl";       break;
1705
                        case 8472: esc = "weierp";      break;
1706
                        case 8465: esc = "image";       break;
1707
                        case 8476: esc = "real";        break;
1708
                        case 8482: esc = "trade";       break;
1709
                        case 8501: esc = "alefsym";     break;
1710
                        case 8592: esc = "larr";        break;
1711
                        case 8593: esc = "uarr";        break;
1712
                        case 8594: esc = "rarr";        break;
1713
                        case 8595: esc = "darr";        break;
1714
                        case 8596: esc = "harr";        break;
1715
                        case 8629: esc = "crarr";       break;
1716
                        case 8656: esc = "lArr";        break;
1717
                        case 8657: esc = "uArr";        break;
1718
                        case 8658: esc = "rArr";        break;
1719
                        case 8659: esc = "dArr";        break;
1720
                        case 8660: esc = "hArr";        break;
1721
                        case 8704: esc = "forall";      break;
1722
                        case 8706: esc = "part";        break;
1723
                        case 8707: esc = "exist";       break;
1724
                        case 8709: esc = "empty";       break;
1725
                        case 8711: esc = "nabla";       break;
1726
                        case 8712: esc = "isin";        break;
1727
                        case 8713: esc = "notin";       break;
1728
                        case 8715: esc = "ni";          break;
1729
                        case 8719: esc = "prod";        break;
1730
                        case 8721: esc = "sum";         break;
1731
                        case 8722: esc = "minus";       break;
1732
                        case 8727: esc = "lowast";      break;
1733
                        case 8730: esc = "radic";       break;
1734
                        case 8733: esc = "prop";        break;
1735
                        case 8734: esc = "infin";       break;
1736
                        case 8736: esc = "ang";         break;
1737
                        case 8743: esc = "and";         break;
1738
                        case 8744: esc = "or";          break;
1739
                        case 8745: esc = "cap";         break;
1740
                        case 8746: esc = "cup";         break;
1741
                        case 8747: esc = "int";         break;
1742
                        case 8756: esc = "there4";      break;
1743
                        case 8764: esc = "sim";         break;
1744
                        case 8773: esc = "cong";        break;
1745
                        case 8776: esc = "asymp";       break;
1746
                        case 8800: esc = "ne";          break;
1747
                        case 8801: esc = "equiv";       break;
1748
                        case 8804: esc = "le";          break;
1749
                        case 8805: esc = "ge";          break;
1750
                        case 8834: esc = "sub";         break;
1751
                        case 8835: esc = "sup";         break;
1752
                        case 8836: esc = "nsub";        break;
1753
                        case 8838: esc = "sube";        break;
1754
                        case 8839: esc = "supe";        break;
1755
                        case 8853: esc = "oplus";       break;
1756
                        case 8855: esc = "otimes";      break;
1757
                        case 8869: esc = "perp";        break;
1758
                        case 8901: esc = "sdot";        break;
1759
                        case 8968: esc = "lceil";       break;
1760
                        case 8969: esc = "rceil";       break;
1761
                        case 8970: esc = "lfloor";      break;
1762
                        case 8971: esc = "rfloor";      break;
1763
                        case 9001: esc = "lang";        break;
1764
                        case 9002: esc = "rang";        break;
1765
                        case 9674: esc = "loz";         break;
1766
                        case 9824: esc = "spades";      break;
1767
                        case 9827: esc = "clubs";       break;
1768
                        case 9829: esc = "hearts";      break;
1769
                        case 9830: esc = "diams";       break;
1770
                    }
1771
                }
1772
 
1773
                // else escape with numeric char refs
1774
                if (esc == null) {
1775
                    stringBuf.setLength (0);
1776
                    stringBuf.append ("#x");
1777
                    stringBuf.append (Integer.toHexString (c).toUpperCase ());
1778
                    esc = stringBuf.toString ();
1779
 
1780
                    // FIXME:  We don't write surrogate pairs correctly.
1781
                    // They should work as one ref per character, since
1782
                    // each pair is one character.  For reading back into
1783
                    // Unicode, it matters beginning in Unicode 3.1 ...
1784
                }
1785
                break;
1786
            }
1787
            if (i != first)
1788
                rawWrite (buf, off + first, i - first);
1789
            first = i + 1;
1790
            if (esc == eol)
1791
                newline ();
1792
            else {
1793
                rawWrite ('&');
1794
                rawWrite (esc);
1795
                rawWrite (';');
1796
            }
1797
        }
1798
        if (first < len)
1799
            rawWrite (buf, off + first, len - first);
1800
    }
1801
 
1802
 
1803
 
1804
    private void newline ()
1805
    throws SAXException, IOException
1806
    {
1807
        out.write (eol);
1808
        column = 0;
1809
    }
1810
 
1811
    private void doIndent ()
1812
    throws SAXException, IOException
1813
    {
1814
        int     space = elementNestLevel * 2;
1815
 
1816
        newline ();
1817
        column = space;
1818
        // track tabs only at line starts
1819
        while (space > 8) {
1820
            out.write ("\t");
1821
            space -= 8;
1822
        }
1823
        while (space > 0) {
1824
            out.write ("  ");
1825
            space -= 2;
1826
        }
1827
    }
1828
 
1829
    private void rawWrite (char c)
1830
    throws IOException
1831
    {
1832
        out.write (c);
1833
        column++;
1834
    }
1835
 
1836
    private void rawWrite (String s)
1837
    throws SAXException, IOException
1838
    {
1839
        if (prettyPrinting && "default".equals (space.peek ())) {
1840
            char data [] = s.toCharArray ();
1841
            rawWrite (data, 0, data.length);
1842
        } else {
1843
            out.write (s);
1844
            column += s.length ();
1845
        }
1846
    }
1847
 
1848
    // NOTE:  if xhtml, the REC gives some rules about whitespace
1849
    // which we could follow ... notably, many places where conformant
1850
    // agents "must" consolidate/normalize whitespace.  Line ends can
1851
    // be removed there, etc.  This may not be the right place to do
1852
    // such mappings though.
1853
 
1854
    // Line buffering may help clarify algorithms and improve results.
1855
 
1856
    // It's likely xml:space needs more attention.
1857
 
1858
    private void rawWrite (char buf [], int offset, int length)
1859
    throws SAXException, IOException
1860
    {
1861
        boolean         wrap;
1862
 
1863
        if (prettyPrinting && space.empty ())
1864
            fatal ("stack discipline", null);
1865
 
1866
        wrap = prettyPrinting && "default".equals (space.peek ());
1867
        if (!wrap) {
1868
            out.write (buf, offset, length);
1869
            column += length;
1870
            return;
1871
        }
1872
 
1873
        // we're pretty printing and want to fill lines out only
1874
        // to the desired line length.
1875
        while (length > 0) {
1876
            int         target = lineLength - column;
1877
            boolean     wrote = false;
1878
 
1879
            // Do we even have a problem?
1880
            if (target > length || noWrap) {
1881
                out.write (buf, offset, length);
1882
                column += length;
1883
                return;
1884
            }
1885
 
1886
            // break the line at a space character, trying to fill
1887
            // as much of the line as possible.
1888
            char        c;
1889
 
1890
            for (int i = target - 1; i >= 0; i--) {
1891
                if ((c = buf [offset + i]) == ' ' || c == '\t') {
1892
                    i++;
1893
                    out.write (buf, offset, i);
1894
                    doIndent ();
1895
                    offset += i;
1896
                    length -= i;
1897
                    wrote = true;
1898
                    break;
1899
                }
1900
            }
1901
            if (wrote)
1902
                continue;
1903
 
1904
            // no space character permitting break before target
1905
            // line length is filled.  So, take the next one.
1906
            if (target < 0)
1907
                target = 0;
1908
            for (int i = target; i < length; i++)
1909
                if ((c = buf [offset + i]) == ' ' || c == '\t') {
1910
                    i++;
1911
                    out.write (buf, offset, i);
1912
                    doIndent ();
1913
                    offset += i;
1914
                    length -= i;
1915
                    wrote = true;
1916
                    break;
1917
                }
1918
            if (wrote)
1919
                continue;
1920
 
1921
            // no such luck.
1922
            out.write (buf, offset, length);
1923
            column += length;
1924
            break;
1925
        }
1926
    }
1927
}

powered by: WebSVN 2.1.0

© copyright 1999-2024 OpenCores.org, equivalent to Oliscience, all rights reserved. OpenCores®, registered trademark.