OpenCores
URL https://opencores.org/ocsvn/openrisc/openrisc/trunk

Subversion Repositories openrisc

[/] [openrisc/] [trunk/] [gnu-dev/] [or1k-gcc/] [libjava/] [classpath/] [gnu/] [javax/] [swing/] [text/] [html/] [parser/] [support/] [low/] [Constants.java] - Blame information for rev 769

Details | Compare with Previous | View Log

Line No. Rev Author Line
1 769 jeremybenn
/* Constants.java --
2
   Copyright (C) 2005 Free Software Foundation, Inc.
3
 
4
This file is part of GNU Classpath.
5
 
6
GNU Classpath is free software; you can redistribute it and/or modify
7
it under the terms of the GNU General Public License as published by
8
the Free Software Foundation; either version 2, or (at your option)
9
any later version.
10
 
11
GNU Classpath is distributed in the hope that it will be useful, but
12
WITHOUT ANY WARRANTY; without even the implied warranty of
13
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
14
General Public License for more details.
15
 
16
You should have received a copy of the GNU General Public License
17
along with GNU Classpath; see the file COPYING.  If not, write to the
18
Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
19
02110-1301 USA.
20
 
21
Linking this library statically or dynamically with other modules is
22
making a combined work based on this library.  Thus, the terms and
23
conditions of the GNU General Public License cover the whole
24
combination.
25
 
26
As a special exception, the copyright holders of this library give you
27
permission to link this library with independent modules to produce an
28
executable, regardless of the license terms of these independent
29
modules, and to copy and distribute the resulting executable under
30
terms of your choice, provided that you also meet, for each linked
31
independent module, the terms and conditions of the license of that
32
module.  An independent module is a module which is not derived from
33
or based on this library.  If you modify this library, you may extend
34
this exception to your version of the library, but you are not
35
obligated to do so.  If you do not wish to do so, delete this
36
exception statement from your version. */
37
 
38
 
39
package gnu.javax.swing.text.html.parser.support.low;
40
 
41
import java.util.BitSet;
42
 
43
/**
44
 * The parser constants and operations, directly related to the parser
45
 * constants.
46
 * @author Audrius Meskauskas, Lithuania (AudriusA@Bioinformatics.org)
47
 */
48
public class Constants
49
{
50
  /* Single character tokens are reflected into they ASCII codes. */
51
 
52
  /**
53
   * Start of HTML token.
54
   */
55
  public static final int BEGIN = '<';
56
 
57
  /**
58
   * End of HTML token.
59
   */
60
  public static final int END = '>';
61
 
62
  /**
63
   * Exclamation (indicates SGML or comment).
64
   */
65
  public static final int EXCLAMATION = '!';
66
 
67
  /**
68
   * Slash (indicates closing tag).
69
   */
70
  public static final int SLASH = '/';
71
 
72
  /**
73
   * Equals sign.
74
   */
75
  public static final int EQ = '=';
76
 
77
  /**
78
   * Quoting sign.
79
   */
80
  public static final int AP = '\'';
81
 
82
  /**
83
   * Quoting sign.
84
   */
85
  public static final int QUOT = '"';
86
 
87
  /* The numbers of other tokens start outside the ascii space. */
88
  /* String tokens */
89
 
90
  /**
91
   * Double dash (--)
92
   */
93
  public static final int DOUBLE_DASH = 1000;
94
 
95
  /**
96
   * The STYLE tag (needs special handling).
97
   */
98
  public static final int STYLE = 1001;
99
 
100
  /**
101
   * The SCRIPT tag (needs special handling).
102
   */
103
  public static final int SCRIPT = 1002;
104
 
105
  /* Pattern tokens */
106
 
107
  /**
108
   * HTML whitespace.
109
   */
110
  public static final int WS = 1003;
111
 
112
  /**
113
   * Named or numeric entity,
114
   */
115
  public static final int ENTITY = 1004;
116
 
117
  /**
118
   * Sequence of valid name characters (can start from digit).
119
   */
120
  public static final int NUMTOKEN = 1005;
121
 
122
  /* Complex tokens */
123
 
124
  /**
125
   * Comment opening sequence.
126
   */
127
  public static final pattern COMMENT_OPEN =
128
    new pattern(new node[]
129
                {
130
                  new node(BEGIN), new node(WS, true), new node(EXCLAMATION),
131
                  new node(WS, true), new node(DOUBLE_DASH),
132
                }
133
               );
134
 
135
  /**
136
   * Comment closing sequence
137
   */
138
  public static final pattern COMMENT_END =
139
    new pattern(new node[]
140
                {
141
                  new node(DOUBLE_DASH), new node(WS, true), new node(END)
142
                }
143
               );
144
 
145
  /**
146
   * Special case ---> (also is treated as end of comment).
147
   */
148
  public static final pattern COMMENT_TRIPLEDASH_END =
149
    new pattern(new node[]
150
                {
151
                  new node(DOUBLE_DASH), new node(NUMTOKEN), new node(END)
152
                }
153
               );
154
 
155
  /**
156
   * STYLE element heading pattern.
157
   */
158
  public static final pattern STYLE_OPEN =
159
    new pattern(new node[] { new node(BEGIN), new node(WS, true), new node(STYLE) });
160
 
161
  /**
162
   * SCRIPT element heading pattern.
163
   */
164
  public static final pattern SCRIPT_OPEN =
165
    new pattern(new node[] { new node(BEGIN), new node(WS, true), new node(SCRIPT) });
166
 
167
  /**
168
   * SGML element heading pattern.
169
   */
170
  public static final pattern SGML =
171
    new pattern(new node[]
172
                {
173
                  new node(BEGIN), new node(WS, true), new node(EXCLAMATION)
174
                }
175
               );
176
 
177
  /**
178
   * SCRIPT element closing pattern.
179
   */
180
  public static final pattern SCRIPT_CLOSE =
181
    new pattern(new node[]
182
                {
183
                  new node(BEGIN), new node(WS, true), new node(SLASH),
184
                  new node(WS, true), new node(SCRIPT), new node(WS, true),
185
                  new node(END)
186
                }
187
               );
188
 
189
  /**
190
   * STYLE element closing pattern.
191
   */
192
  public static final pattern STYLE_CLOSE =
193
    new pattern(new node[]
194
                {
195
                  new node(BEGIN), new node(WS, true), new node(SLASH),
196
                  new node(WS, true), new node(STYLE), new node(WS, true),
197
                  new node(END)
198
                }
199
               );
200
 
201
  /**
202
   * Ordinary HTML tag heading pattern.
203
   */
204
  public static final pattern TAG =
205
    new pattern(new node[]
206
                {
207
                  new node(BEGIN), new node(WS, true), new node(SLASH, true),
208
                  new node(WS, true), new node(NUMTOKEN)
209
                }
210
               );
211
 
212
  /**
213
   * Ordinary HTML tag closing pattern.
214
   */
215
  public static final pattern TAG_CLOSE =
216
    new pattern(new node[]
217
                {
218
                  new node(BEGIN), new node(WS, true), new node(SLASH),
219
                  new node(WS, true), new node(NUMTOKEN)
220
                }
221
               );
222
 
223
  /* Special tokens */
224
 
225
  /**
226
   * All other tokens.
227
   */
228
  public static final int OTHER = 1999;
229
 
230
  /**
231
   * The UNICODE "end of text" control code
232
   */
233
  static final char ETX = 3;
234
 
235
  /**
236
   * End of file.
237
   */
238
  public static final int EOF = ETX;
239
 
240
  /* Character categories */
241
 
242
  /**
243
   * All single char tokens.
244
   */
245
  public static final BitSet bSINGLE_CHAR_TOKEN = new BitSet();
246
 
247
  /**
248
   * Non letters and non numbers, allowed in HTML names.
249
   */
250
  public static final BitSet bSPECIAL = new BitSet();
251
 
252
  /**
253
   * All letters, used in HTML names.
254
   */
255
  public static final BitSet bLETTER = new BitSet();
256
 
257
  /**
258
   * Digits.
259
   */
260
  public static final BitSet bDIGIT = new BitSet();
261
 
262
  /**
263
   * Both line breaks.
264
   */
265
  public static final BitSet bLINEBREAK = new BitSet();
266
 
267
  /**
268
   * All whitespace.
269
   */
270
  public static final BitSet bWHITESPACE = new BitSet();
271
 
272
  /**
273
   * Both quoting characters.
274
   */
275
  public static final BitSet bQUOTING = new BitSet();
276
 
277
  /**
278
   * Valid name characters.
279
   */
280
  public static final BitSet bNAME = new BitSet();
281
 
282
  /* Entity subcategories */
283
 
284
  /**
285
   * Named entity.
286
   */
287
  public static final int ENTITY_NAMED = 1;
288
 
289
  /**
290
   * Numeric entity.
291
   */
292
  public static final int ENTITY_NUMERIC = 2;
293
 
294
  static
295
  {
296
    bQUOTING.set(AP);
297
    bQUOTING.set(QUOT);
298
 
299
    bSINGLE_CHAR_TOKEN.set(BEGIN);
300
    bSINGLE_CHAR_TOKEN.set(END);
301
    bSINGLE_CHAR_TOKEN.set(EXCLAMATION);
302
    bSINGLE_CHAR_TOKEN.set(SLASH);
303
    bSINGLE_CHAR_TOKEN.set(EQ);
304
    bSINGLE_CHAR_TOKEN.set(EOF);
305
 
306
    bSINGLE_CHAR_TOKEN.or(bQUOTING);
307
 
308
    bLINEBREAK.set('\r');
309
    bLINEBREAK.set('\n');
310
 
311
    bWHITESPACE.set(' ');
312
    bWHITESPACE.set('\t');
313
    bWHITESPACE.set(0xC);
314
    bWHITESPACE.or(bLINEBREAK);
315
 
316
    for (char i = '0'; i <= '9'; i++)
317
      {
318
        bDIGIT.set(i);
319
      }
320
 
321
    for (char i = 'a'; i <= 'z'; i++)
322
      {
323
        bLETTER.set(i);
324
      }
325
 
326
    for (char i = 'A'; i <= 'Z'; i++)
327
      {
328
        bLETTER.set(i);
329
      }
330
 
331
    bSPECIAL.set('-');
332
    bSPECIAL.set('_');
333
    bSPECIAL.set(':');
334
    bSPECIAL.set('.');
335
 
336
    bNAME.or(bLETTER);
337
    bNAME.or(bDIGIT);
338
    bNAME.or(bSPECIAL);
339
  }
340
 
341
  /**
342
   * Verifies if one of the tokens matches the end of string
343
   * buffer. The last character in the string buffer is the
344
   * "future character", some tokens needs to verify it the
345
   * token does not continue "towards the future". If the token
346
   * matches, it matches till "pre-last" character in the buffer.
347
   * @param b
348
   * @return
349
   */
350
  public Token endMatches(Buffer b)
351
  {
352
    if (b.length() < 2)
353
      return null;
354
 
355
    int p = b.length() - 2;
356
 
357
    if (b.length() > 2 && b.charAt(p) == '-' && b.charAt(p - 1) == '-')
358
      return new Token(DOUBLE_DASH, "--", b.getLocation(p - 1, p + 1));
359
 
360
    char last = b.charAt(p);
361
 
362
    if (bSINGLE_CHAR_TOKEN.get(last))
363
      return new Token(last, last, b.getLocation(p, p + 1));
364
 
365
    char future = b.charAt(p + 1);
366
 
367
    // Check for numtokens, script and style:
368
    if (bNAME.get(last) && !bNAME.get(future))
369
      {
370
        // Scan the history up:
371
        int u = p - 1;
372
        while (u >= 0 && bNAME.get(b.charAt(u)))
373
          u--;
374
        u++;
375
 
376
        char[] token = new char[ p - u + 1 ];
377
 
378
        // Found a numtoken
379
        b.getChars(u, p + 1, token, 0);
380
 
381
        // Verify for the built-in tokens:
382
        String e = new String(token);
383
 
384
        // found the entity reference
385
        if (u > 0 && b.charAt(u - 1) == '&')
386
          {
387
            // The subsequent semicolon may be the part of the token
388
            // as well. The semicolon must be ignored. This must be
389
            // handled elsewhere.
390
            return new Token(ENTITY, ENTITY_NAMED, "&" + e,
391
                             b.getLocation(u - 1, p + 1)
392
                            );
393
          }
394
 
395
        // found the numeric entity reference
396
        if (u > 1 && b.charAt(u - 1) == '#' && b.charAt(u - 2) == '&')
397
          {
398
            // The subsequent semicolon may be the part of the token
399
            // as well. The semicolon must be ignored. This must be
400
            // handled elsewhere.
401
            return new Token(ENTITY, ENTITY_NUMERIC, "&#" + e,
402
                             b.getLocation(u - 2, p + 2)
403
                            );
404
          }
405
 
406
        Location le = b.getLocation(u, p + 1);
407
 
408
        if (e.equalsIgnoreCase("SCRIPT"))
409
          return new Token(SCRIPT, e, le);
410
        else if (e.equalsIgnoreCase("STYLE"))
411
          return new Token(STYLE, e, le);
412
        else
413
          return new Token(NUMTOKEN, e, le);
414
      }
415
 
416
    // Check for whitespace
417
    if (bWHITESPACE.get(last) && !bWHITESPACE.get(future))
418
      {
419
        // Scan the history up:
420
        int u = p - 1;
421
        while (u >= 0 && bWHITESPACE.get(b.charAt(u)))
422
          u--;
423
        u++;
424
 
425
        char[] token = new char[ p - u + 1 ];
426
        b.getChars(u, p + 1, token, 0);
427
 
428
        return new Token(WS, new String(token), b.getLocation(u, p + 1));
429
      }
430
 
431
    return null;
432
  }
433
}

powered by: WebSVN 2.1.0

© copyright 1999-2024 OpenCores.org, equivalent to Oliscience, all rights reserved. OpenCores®, registered trademark.