OpenCores
URL https://opencores.org/ocsvn/scarts/scarts/trunk

Subversion Repositories scarts

[/] [scarts/] [trunk/] [toolchain/] [scarts-gcc/] [gcc-4.1.1/] [libjava/] [classpath/] [gnu/] [javax/] [swing/] [text/] [html/] [parser/] [support/] [low/] [Constants.java] - Blame information for rev 14

Details | Compare with Previous | View Log

Line No. Rev Author Line
1 14 jlechner
/* Constants.java --
2
   Copyright (C) 2005 Free Software Foundation, Inc.
3
 
4
This file is part of GNU Classpath.
5
 
6
GNU Classpath is free software; you can redistribute it and/or modify
7
it under the terms of the GNU General Public License as published by
8
the Free Software Foundation; either version 2, or (at your option)
9
any later version.
10
 
11
GNU Classpath is distributed in the hope that it will be useful, but
12
WITHOUT ANY WARRANTY; without even the implied warranty of
13
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
14
General Public License for more details.
15
 
16
You should have received a copy of the GNU General Public License
17
along with GNU Classpath; see the file COPYING.  If not, write to the
18
Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
19
02110-1301 USA.
20
 
21
Linking this library statically or dynamically with other modules is
22
making a combined work based on this library.  Thus, the terms and
23
conditions of the GNU General Public License cover the whole
24
combination.
25
 
26
As a special exception, the copyright holders of this library give you
27
permission to link this library with independent modules to produce an
28
executable, regardless of the license terms of these independent
29
modules, and to copy and distribute the resulting executable under
30
terms of your choice, provided that you also meet, for each linked
31
independent module, the terms and conditions of the license of that
32
module.  An independent module is a module which is not derived from
33
or based on this library.  If you modify this library, you may extend
34
this exception to your version of the library, but you are not
35
obligated to do so.  If you do not wish to do so, delete this
36
exception statement from your version. */
37
 
38
 
39
package gnu.javax.swing.text.html.parser.support.low;
40
 
41
import java.util.BitSet;
42
 
43
/**
44
 * The parser constants and operations, directly related to the parser
45
 * constants.
46
 * @author Audrius Meskauskas, Lithuania (AudriusA@Bioinformatics.org)
47
 */
48
public class Constants
49
{
50
  /* Single character tokens are reflected into they ASCII codes. */
51
 
52
  /**
53
   * Start of HTML token.
54
   */
55
  public static final int BEGIN = '<';
56
 
57
  /**
58
   * End of HTML token.
59
   */
60
  public static final int END = '>';
61
 
62
  /**
63
   * Exclamation (indicates SGML or comment).
64
   */
65
  public static final int EXCLAMATION = '!';
66
 
67
  /**
68
   * Slash (indicates closing tag).
69
   */
70
  public static final int SLASH = '/';
71
 
72
  /**
73
   * Equals sign.
74
   */
75
  public static final int EQ = '=';
76
 
77
  /**
78
   * Quoting sign.
79
   */
80
  public static final int AP = '\'';
81
 
82
  /**
83
   * Quoting sign.
84
   */
85
  public static final int QUOT = '"';
86
 
87
  /* The numbers of other tokens start outside the ascii space. */
88
  /* String tokens */
89
 
90
  /**
91
   * Double dash (--)
92
   */
93
  public static final int DOUBLE_DASH = 1000;
94
 
95
  /**
96
   * The STYLE tag (needs special handling).
97
   */
98
  public static final int STYLE = 1001;
99
 
100
  /**
101
   * The SCRIPT tag (needs special handling).
102
   */
103
  public static final int SCRIPT = 1002;
104
 
105
  /* Pattern tokens */
106
 
107
  /**
108
   * HTML whitespace.
109
   */
110
  public static final int WS = 1003;
111
 
112
  /**
113
   * Named or numeric entity,
114
   */
115
  public static final int ENTITY = 1004;
116
 
117
  /**
118
   * Sequence of valid name characters (can start from digit).
119
   */
120
  public static final int NUMTOKEN = 1005;
121
 
122
  /* Complex tokens */
123
 
124
  /**
125
   * Comment opening sequence.
126
   */
127
  public static final pattern COMMENT_OPEN =
128
    new pattern(new node[]
129
                {
130
                  new node(BEGIN), new node(WS, true), new node(EXCLAMATION),
131
                  new node(WS, true), new node(DOUBLE_DASH),
132
                }
133
               );
134
 
135
  /**
136
   * Comment closing sequence
137
   */
138
  public static final pattern COMMENT_END =
139
    new pattern(new node[]
140
                {
141
                  new node(DOUBLE_DASH), new node(WS, true), new node(END)
142
                }
143
               );
144
 
145
  /**
146
   * Special case ---> (also is treated as end of comment).
147
   */
148
  public static final pattern COMMENT_TRIPLEDASH_END =
149
    new pattern(new node[]
150
                {
151
                  new node(DOUBLE_DASH), new node(NUMTOKEN), new node(END)
152
                }
153
               );
154
 
155
  /**
156
   * STYLE element heading pattern.
157
   */
158
  public static final pattern STYLE_OPEN =
159
    new pattern(new node[] { new node(BEGIN), new node(WS, true), new node(STYLE) });
160
 
161
  /**
162
   * SCRIPT element heading pattern.
163
   */
164
  public static final pattern SCRIPT_OPEN =
165
    new pattern(new node[] { new node(BEGIN), new node(WS, true), new node(SCRIPT) });
166
 
167
  /**
168
   * SGML element heading pattern.
169
   */
170
  public static final pattern SGML =
171
    new pattern(new node[]
172
                {
173
                  new node(BEGIN), new node(WS, true), new node(EXCLAMATION)
174
                }
175
               );
176
 
177
  /**
178
   * SCRIPT element closing pattern.
179
   */
180
  public static final pattern SCRIPT_CLOSE =
181
    new pattern(new node[]
182
                {
183
                  new node(BEGIN), new node(WS, true), new node(SLASH),
184
                  new node(WS, true), new node(SCRIPT), new node(WS, true),
185
                  new node(END)
186
                }
187
               );
188
 
189
  /**
190
   * STYLE element closing pattern.
191
   */
192
  public static final pattern STYLE_CLOSE =
193
    new pattern(new node[]
194
                {
195
                  new node(BEGIN), new node(WS, true), new node(SLASH),
196
                  new node(WS, true), new node(STYLE), new node(WS, true),
197
                  new node(END)
198
                }
199
               );
200
 
201
  /**
202
   * Ordinary HTML tag heading pattern.
203
   */
204
  public static final pattern TAG =
205
    new pattern(new node[]
206
                {
207
                  new node(BEGIN), new node(WS, true), new node(SLASH, true),
208
                  new node(WS, true), new node(NUMTOKEN)
209
                }
210
               );
211
 
212
  /* Special tokens */
213
 
214
  /**
215
   * All other tokens.
216
   */
217
  public static final int OTHER = 1999;
218
 
219
  /**
220
   * The UNICODE "end of text" control code
221
   */
222
  static final char ETX = 3;
223
 
224
  /**
225
   * End of file.
226
   */
227
  public static final int EOF = ETX;
228
 
229
  /* Character categories */
230
 
231
  /**
232
   * All single char tokens.
233
   */
234
  public static final BitSet bSINGLE_CHAR_TOKEN = new BitSet();
235
 
236
  /**
237
   * Non letters and non numbers, allowed in HTML names.
238
   */
239
  public static final BitSet bSPECIAL = new BitSet();
240
 
241
  /**
242
   * All letters, used in HTML names.
243
   */
244
  public static final BitSet bLETTER = new BitSet();
245
 
246
  /**
247
   * Digits.
248
   */
249
  public static final BitSet bDIGIT = new BitSet();
250
 
251
  /**
252
   * Both line breaks.
253
   */
254
  public static final BitSet bLINEBREAK = new BitSet();
255
 
256
  /**
257
   * All whitespace.
258
   */
259
  public static final BitSet bWHITESPACE = new BitSet();
260
 
261
  /**
262
   * Both quoting characters.
263
   */
264
  public static final BitSet bQUOTING = new BitSet();
265
 
266
  /**
267
   * Valid name characters.
268
   */
269
  public static final BitSet bNAME = new BitSet();
270
 
271
  /* Entity subcategories */
272
 
273
  /**
274
   * Named entity.
275
   */
276
  public static final int ENTITY_NAMED = 1;
277
 
278
  /**
279
   * Numeric entity.
280
   */
281
  public static final int ENTITY_NUMERIC = 2;
282
 
283
  static
284
  {
285
    bQUOTING.set(AP);
286
    bQUOTING.set(QUOT);
287
 
288
    bSINGLE_CHAR_TOKEN.set(BEGIN);
289
    bSINGLE_CHAR_TOKEN.set(END);
290
    bSINGLE_CHAR_TOKEN.set(EXCLAMATION);
291
    bSINGLE_CHAR_TOKEN.set(SLASH);
292
    bSINGLE_CHAR_TOKEN.set(EQ);
293
    bSINGLE_CHAR_TOKEN.set(EOF);
294
 
295
    bSINGLE_CHAR_TOKEN.or(bQUOTING);
296
 
297
    bLINEBREAK.set('\r');
298
    bLINEBREAK.set('\n');
299
 
300
    bWHITESPACE.set(' ');
301
    bWHITESPACE.set('\t');
302
    bWHITESPACE.set(0xC);
303
    bWHITESPACE.or(bLINEBREAK);
304
 
305
    for (char i = '0'; i <= '9'; i++)
306
      {
307
        bDIGIT.set(i);
308
      }
309
 
310
    for (char i = 'a'; i <= 'z'; i++)
311
      {
312
        bLETTER.set(i);
313
      }
314
 
315
    for (char i = 'A'; i <= 'Z'; i++)
316
      {
317
        bLETTER.set(i);
318
      }
319
 
320
    bSPECIAL.set('-');
321
    bSPECIAL.set('_');
322
    bSPECIAL.set(':');
323
    bSPECIAL.set('.');
324
 
325
    bNAME.or(bLETTER);
326
    bNAME.or(bDIGIT);
327
    bNAME.or(bSPECIAL);
328
  }
329
 
330
  /**
331
   * Verifies if one of the tokens matches the end of string
332
   * buffer. The last character in the string buffer is the
333
   * "future character", some tokens needs to verify it the
334
   * token does not continue "towards the future". If the token
335
   * matches, it matches till "pre-last" character in the buffer.
336
   * @param b
337
   * @return
338
   */
339
  public Token endMatches(Buffer b)
340
  {
341
    if (b.length() < 2)
342
      return null;
343
 
344
    int p = b.length() - 2;
345
 
346
    if (b.length() > 2 && b.charAt(p) == '-' && b.charAt(p - 1) == '-')
347
      return new Token(DOUBLE_DASH, "--", b.getLocation(p - 1, p + 1));
348
 
349
    char last = b.charAt(p);
350
 
351
    if (bSINGLE_CHAR_TOKEN.get(last))
352
      return new Token(last, last, b.getLocation(p, p + 1));
353
 
354
    char future = b.charAt(p + 1);
355
 
356
    // Check for numtokens, script and style:
357
    if (bNAME.get(last) && !bNAME.get(future))
358
      {
359
        // Scan the history up:
360
        int u = p - 1;
361
        while (u >= 0 && bNAME.get(b.charAt(u)))
362
          u--;
363
        u++;
364
 
365
        char[] token = new char[ p - u + 1 ];
366
 
367
        // Found a numtoken
368
        b.getChars(u, p + 1, token, 0);
369
 
370
        // Verify for the built-in tokens:
371
        String e = new String(token);
372
 
373
        // found the entity reference
374
        if (u > 0 && b.charAt(u - 1) == '&')
375
          {
376
            // The subsequent semicolon may be the part of the token
377
            // as well. The semicolon must be ignored. This must be
378
            // handled elsewhere.
379
            return new Token(ENTITY, ENTITY_NAMED, "&" + e,
380
                             b.getLocation(u - 1, p + 1)
381
                            );
382
          }
383
 
384
        // found the numeric entity reference
385
        if (u > 1 && b.charAt(u - 1) == '#' && b.charAt(u - 2) == '&')
386
          {
387
            // The subsequent semicolon may be the part of the token
388
            // as well. The semicolon must be ignored. This must be
389
            // handled elsewhere.
390
            return new Token(ENTITY, ENTITY_NUMERIC, "&#" + e,
391
                             b.getLocation(u - 2, p + 2)
392
                            );
393
          }
394
 
395
        Location le = b.getLocation(u, p + 1);
396
 
397
        if (e.equalsIgnoreCase("SCRIPT"))
398
          return new Token(SCRIPT, e, le);
399
        else if (e.equalsIgnoreCase("STYLE"))
400
          return new Token(STYLE, e, le);
401
        else
402
          return new Token(NUMTOKEN, e, le);
403
      }
404
 
405
    // Check for whitespace
406
    if (bWHITESPACE.get(last) && !bWHITESPACE.get(future))
407
      {
408
        // Scan the history up:
409
        int u = p - 1;
410
        while (u >= 0 && bWHITESPACE.get(b.charAt(u)))
411
          u--;
412
        u++;
413
 
414
        char[] token = new char[ p - u + 1 ];
415
        b.getChars(u, p + 1, token, 0);
416
 
417
        return new Token(WS, new String(token), b.getLocation(u, p + 1));
418
      }
419
 
420
    return null;
421
  }
422
}

powered by: WebSVN 2.1.0

© copyright 1999-2024 OpenCores.org, equivalent to Oliscience, all rights reserved. OpenCores®, registered trademark.