OpenCores
URL https://opencores.org/ocsvn/openrisc/openrisc/trunk

Subversion Repositories openrisc

[/] [openrisc/] [trunk/] [gnu-dev/] [or1k-gcc/] [libjava/] [classpath/] [javax/] [swing/] [text/] [html/] [parser/] [Parser.java] - Blame information for rev 775

Go to most recent revision | Details | Compare with Previous | View Log

Line No. Rev Author Line
1 772 jeremybenn
/* Parser.java -- HTML parser
2
   Copyright (C) 2005 Free Software Foundation, Inc.
3
 
4
This file is part of GNU Classpath.
5
 
6
GNU Classpath is free software; you can redistribute it and/or modify
7
it under the terms of the GNU General Public License as published by
8
the Free Software Foundation; either version 2, or (at your option)
9
any later version.
10
 
11
GNU Classpath is distributed in the hope that it will be useful, but
12
WITHOUT ANY WARRANTY; without even the implied warranty of
13
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
14
General Public License for more details.
15
 
16
You should have received a copy of the GNU General Public License
17
along with GNU Classpath; see the file COPYING.  If not, write to the
18
Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
19
02110-1301 USA.
20
 
21
Linking this library statically or dynamically with other modules is
22
making a combined work based on this library.  Thus, the terms and
23
conditions of the GNU General Public License cover the whole
24
combination.
25
 
26
As a special exception, the copyright holders of this library give you
27
permission to link this library with independent modules to produce an
28
executable, regardless of the license terms of these independent
29
modules, and to copy and distribute the resulting executable under
30
terms of your choice, provided that you also meet, for each linked
31
independent module, the terms and conditions of the license of that
32
module.  An independent module is a module which is not derived from
33
or based on this library.  If you modify this library, you may extend
34
this exception to your version of the library, but you are not
35
obligated to do so.  If you do not wish to do so, delete this
36
exception statement from your version. */
37
 
38
 
39
package javax.swing.text.html.parser;
40
 
41
import java.io.IOException;
42
import java.io.Reader;
43
 
44
import javax.swing.text.ChangedCharSetException;
45
import javax.swing.text.SimpleAttributeSet;
46
 
47
/*
48
 * FOR DEVELOPERS: To avoid regression, please run the package test
49
 * textsuite/javax.swing.text.html.parser/AllParserTests after your
50
 * modifications.
51
 */
52
 
53
/**
54
 * <p>A simple error-tolerant HTML parser that uses a DTD document
55
 * to access data on the possible tokens, arguments and syntax.</p>
56
 * <p> The parser reads an HTML content from a Reader and calls various
57
 * notifying methods (which should be overridden in a subclass)
58
 * when tags or data are encountered.</p>
59
 * <p>Some HTML elements need no opening or closing tags. The
60
 * task of this parser is to invoke the tag handling methods also when
61
 * the tags are not explicitly specified and must be supposed using
62
 * information, stored in the DTD.
63
 * For  example, parsing the document
64
 * <p>&lt;table&gt;&lt;tr&gt;&lt;td&gt;a&lt;td&gt;b&lt;td&gt;c&lt;/tr&gt; <br>
65
 * will invoke exactly the handling methods exactly in the same order
66
 * (and with the same parameters) as if parsing the document: <br>
67
 * <em>&lt;html&gt;&lt;head&gt;&lt;/head&gt;&lt;body&gt;&lt;table&gt;&lt;
68
 * tbody&gt;</em>&lt;tr&gt;&lt;td&gt;a<em>&lt;/td&gt;</em>&lt;td&gt;b<em>
69
 * &lt;/td&gt;</em>&lt;td&gt;c<em>&lt;/td&gt;&lt;/tr&gt;</em>&lt;
70
 * <em>/tbody&gt;&lt;/table&gt;&lt;/body&gt;&lt;/html&gt;</em></p>
71
 * (supposed tags are given in italics). The parser also supports
72
 * obsolete elements of HTML syntax.<p>
73
 * </p>
74
 * @author Audrius Meskauskas, Lithuania (AudriusA@Bioinformatics.org)
75
 */
76
public class Parser
77
  implements DTDConstants
78
{
79
  /**
80
   * The document template description that will be used to parse the documents.
81
   */
82
  protected DTD dtd;
83
 
84
  /**
85
   * The value of this field determines whether or not the Parser will be
86
   * strict in enforcing SGML compatibility. The default value is false,
87
   * stating that the parser should do everything to parse and get at least
88
   * some information even from the incorrectly written HTML input.
89
   */
90
  protected boolean strict;
91
 
92
  /**
93
   * The package level reference to the working HTML parser in this
94
   * implementation.
95
   */
96
  final gnu.javax.swing.text.html.parser.support.Parser gnu;
97
 
98
  /**
99
   * Creates a new parser that uses the given DTD to access data on the
100
   * possible tokens, arguments and syntax. There is no single - step way
101
   * to get a default DTD; you must either refer to the implementation -
102
   * specific packages, write your own DTD or obtain the working instance
103
   * of parser in other way, for example, by calling
104
   * {@link javax.swing.text.html.HTMLEditorKit#getParser() }.
105
   * @param a_dtd A DTD to use.
106
   */
107
  public Parser(DTD a_dtd)
108
  {
109
    dtd = a_dtd;
110
 
111
    final Parser j = this;
112
 
113
    gnu =
114
      new gnu.javax.swing.text.html.parser.support.Parser(dtd)
115
        {
116
          protected final void handleComment(char[] comment)
117
          {
118
            j.handleComment(comment);
119
          }
120
 
121
          protected final void handleEOFInComment()
122
          {
123
            j.handleEOFInComment();
124
          }
125
 
126
          protected final void handleEmptyTag(TagElement tag)
127
            throws javax.swing.text.ChangedCharSetException
128
          {
129
            j.handleEmptyTag(tag);
130
          }
131
 
132
          protected final void handleStartTag(TagElement tag)
133
          {
134
            j.handleStartTag(tag);
135
          }
136
 
137
          protected final void handleEndTag(TagElement tag)
138
          {
139
            j.handleEndTag(tag);
140
          }
141
 
142
          protected final void handleError(int line, String message)
143
          {
144
            j.handleError(line, message);
145
          }
146
 
147
          protected final void handleText(char[] text)
148
          {
149
            j.handleText(text);
150
          }
151
 
152
          protected final void handleTitle(char[] title)
153
          {
154
            j.handleTitle(title);
155
          }
156
 
157
          protected final void markFirstTime(Element element)
158
          {
159
            j.markFirstTime(element);
160
          }
161
 
162
          protected final void startTag(TagElement tag)
163
            throws ChangedCharSetException
164
          {
165
            j.startTag(tag);
166
          }
167
 
168
          protected final void endTag(boolean omitted)
169
          {
170
            j.endTag(omitted);
171
          }
172
 
173
          protected TagElement makeTag(Element element)
174
          {
175
            return j.makeTag(element);
176
          }
177
 
178
          protected TagElement makeTag(Element element, boolean isSupposed)
179
          {
180
            return j.makeTag(element, isSupposed);
181
          }
182
        };
183
  }
184
 
185
  /**
186
   * Parse the HTML text, calling various methods in response to the
187
   * occurence of the corresponding HTML constructions.
188
   * @param reader The reader to read the source HTML from.
189
   * @throws IOException If the reader throws one.
190
   */
191
  public synchronized void parse(Reader reader)
192
    throws IOException
193
  {
194
    gnu.parse(reader);
195
  }
196
 
197
  /**
198
   * Parses DTD markup declaration. Currently returns without action.
199
   * @return null.
200
   * @throws java.io.IOException
201
   */
202
  public String parseDTDMarkup()
203
    throws IOException
204
  {
205
    return gnu.parseDTDMarkup();
206
  }
207
 
208
  /**
209
   * Parse DTD document declarations. Currently only parses the document
210
   * type declaration markup.
211
   * @param strBuff
212
   * @return true if this is a valid DTD markup declaration.
213
   * @throws IOException
214
   */
215
  protected boolean parseMarkupDeclarations(StringBuffer strBuff)
216
    throws IOException
217
  {
218
    return gnu.parseMarkupDeclarations(strBuff);
219
  }
220
 
221
  /**
222
   * Get the attributes of the current tag.
223
   * @return The attribute set, representing the attributes of the current tag.
224
   */
225
  protected SimpleAttributeSet getAttributes()
226
  {
227
    return gnu.getAttributes();
228
  }
229
 
230
  /**
231
   * Get the number of the document line being parsed.
232
   * @return The current line.
233
   */
234
  protected int getCurrentLine()
235
  {
236
    return gnu.hTag.where.beginLine;
237
  }
238
 
239
  /**
240
   * Get the current position in the document being parsed.
241
   * @return The current position.
242
   */
243
  protected int getCurrentPos()
244
  {
245
    return gnu.hTag.where.startPosition;
246
  }
247
 
248
  /**
249
   * The method is called when the HTML end (closing) tag is found or if
250
   * the parser concludes that the one should be present in the
251
   * current position. The method is called immediatly
252
   * before calling the handleEndTag().
253
   * @param omitted True if the tag is no actually present in the document,
254
   * but is supposed by the parser (like &lt;/html&gt; at the end of the
255
   * document).
256
   */
257
  protected void endTag(boolean omitted)
258
  {
259
    // This default implementation does nothing.
260
  }
261
 
262
  /**
263
   * Invokes the error handler. The default method in this implementation
264
   * finally delegates the call to handleError, also providing the number of the
265
   * current line.
266
   */
267
  protected void error(String msg)
268
  {
269
    gnu.error(msg);
270
  }
271
 
272
  /**
273
   * Invokes the error handler. The default method in this implementation
274
   * finally delegates the call to error (msg+": '"+invalid+"'").
275
   */
276
  protected void error(String msg, String invalid)
277
  {
278
    gnu.error(msg, invalid);
279
  }
280
 
281
  /**
282
   * Invokes the error handler. The default method in this implementation
283
   * finally delegates the call to error (parm1+" "+ parm2+" "+ parm3).
284
   */
285
  protected void error(String parm1, String parm2, String parm3)
286
  {
287
    gnu.error(parm1, parm2, parm3);
288
  }
289
 
290
  /**
291
   * Invokes the error handler. The default method in this implementation
292
   * finally delegates the call to error
293
   * (parm1+" "+ parm2+" "+ parm3+" "+ parm4).
294
   */
295
  protected void error(String parm1, String parm2, String parm3, String parm4)
296
  {
297
    gnu.error(parm1, parm2, parm3, parm4);
298
  }
299
 
300
  /**
301
   * In this implementation, this is never called and returns without action.
302
   */
303
  protected void flushAttributes()
304
  {
305
    gnu.flushAttributes();
306
  }
307
 
308
  /**
309
   * Handle HTML comment. The default method returns without action.
310
   * @param comment The comment being handled
311
   */
312
  protected void handleComment(char[] comment)
313
  {
314
    // This default implementation does nothing.
315
  }
316
 
317
  /**
318
   * This is additionally called in when the HTML content terminates
319
   * without closing the HTML comment. This can only happen if the
320
   * HTML document contains errors (for example, the closing --;gt is
321
   * missing. The default method calls the error handler.
322
   */
323
  protected void handleEOFInComment()
324
  {
325
    gnu.error("Unclosed comment");
326
  }
327
 
328
  /**
329
   * Handle the tag with no content, like &lt;br&gt;. The method is
330
   * called for the elements that, in accordance with the current DTD,
331
   * has an empty content.
332
   * @param tag The tag being handled.
333
   * @throws javax.swing.text.ChangedCharSetException
334
   */
335
  protected void handleEmptyTag(TagElement tag)
336
    throws ChangedCharSetException
337
  {
338
    // This default implementation does nothing.
339
  }
340
 
341
  /**
342
   * The method is called when the HTML closing tag ((like &lt;/table&gt;)
343
   * is found or if the parser concludes that the one should be present
344
   * in the current position.
345
   * @param tag The tag being handled
346
   */
347
  protected void handleEndTag(TagElement tag)
348
  {
349
    // This default implementation does nothing.
350
  }
351
 
352
  /* Handle error that has occured in the given line. */
353
  protected void handleError(int line, String message)
354
  {
355
    // This default implementation does nothing.
356
  }
357
 
358
  /**
359
   * The method is called when the HTML opening tag ((like &lt;table&gt;)
360
   * is found or if the parser concludes that the one should be present
361
   * in the current position.
362
   * @param tag The tag being handled
363
   */
364
  protected void handleStartTag(TagElement tag)
365
  {
366
    // This default implementation does nothing.
367
  }
368
 
369
  /**
370
   * Handle the text section.
371
   * <p> For non-preformatted section, the parser replaces
372
   * \t, \r and \n by spaces and then multiple spaces
373
   * by a single space. Additionaly, all whitespace around
374
   * tags is discarded.
375
   * </p>
376
   * <p> For pre-formatted text (inside TEXAREA and PRE), the parser preserves
377
   * all tabs and spaces, but removes <b>one</b>  bounding \r, \n or \r\n,
378
   * if it is present. Additionally, it replaces each occurence of \r or \r\n
379
   * by a single \n.</p>
380
   *
381
   * @param text A section text.
382
   */
383
  protected void handleText(char[] text)
384
  {
385
    // This default implementation does nothing.
386
  }
387
 
388
  /**
389
   * Handle HTML &lt;title&gt; tag. This method is invoked when
390
   * both title starting and closing tags are already behind.
391
   * The passed argument contains the concatenation of all
392
   * title text sections.
393
   * @param title The title text.
394
   */
395
  protected void handleTitle(char[] title)
396
  {
397
    // This default implementation does nothing.
398
  }
399
 
400
  /**
401
   * Constructs the tag from the given element. In this implementation,
402
   * this is defined, but never called.
403
   * @param element the base element of the tag.
404
   * @return the tag
405
   */
406
  protected TagElement makeTag(Element element)
407
  {
408
    return makeTag(element, false);
409
  }
410
 
411
  /**
412
   * Constructs the tag from the given element.
413
   * @param element the tag base {@link javax.swing.text.html.parser.Element}
414
   * @param isSupposed true if the tag is not actually present in the
415
   * html input, but the parser supposes that it should to occur in
416
   * the current location.
417
   * @return the tag
418
   */
419
  protected TagElement makeTag(Element element, boolean isSupposed)
420
  {
421
    return new TagElement(element, isSupposed);
422
  }
423
 
424
  /**
425
   * This is called when the tag, representing the given element,
426
   * occurs first time in the document.
427
   * @param element
428
   */
429
  protected void markFirstTime(Element element)
430
  {
431
    // This default implementation does nothing.
432
  }
433
 
434
  /**
435
   * The method is called when the HTML opening tag ((like &lt;table&gt;)
436
   * is found or if the parser concludes that the one should be present
437
   * in the current position. The method is called immediately before
438
   * calling the handleStartTag.
439
   * @param tag The tag
440
   */
441
  protected void startTag(TagElement tag)
442
    throws ChangedCharSetException
443
  {
444
    // This default implementation does nothing.
445
  }
446
}

powered by: WebSVN 2.1.0

© copyright 1999-2024 OpenCores.org, equivalent to Oliscience, all rights reserved. OpenCores®, registered trademark.