OpenCores
URL https://opencores.org/ocsvn/openrisc/openrisc/trunk

Subversion Repositories openrisc

[/] [openrisc/] [trunk/] [gnu-dev/] [or1k-gcc/] [libjava/] [classpath/] [gnu/] [javax/] [swing/] [text/] [html/] [css/] [CSSScanner.java] - Blame information for rev 769

Details | Compare with Previous | View Log

Line No. Rev Author Line
1 769 jeremybenn
/* CSSScanner.java -- A parser for CSS stylesheets
2
   Copyright (C) 2006 Free Software Foundation, Inc.
3
 
4
This file is part of GNU Classpath.
5
 
6
GNU Classpath is free software; you can redistribute it and/or modify
7
it under the terms of the GNU General Public License as published by
8
the Free Software Foundation; either version 2, or (at your option)
9
any later version.
10
 
11
GNU Classpath is distributed in the hope that it will be useful, but
12
WITHOUT ANY WARRANTY; without even the implied warranty of
13
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
14
General Public License for more details.
15
 
16
You should have received a copy of the GNU General Public License
17
along with GNU Classpath; see the file COPYING.  If not, write to the
18
Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
19
02110-1301 USA.
20
 
21
Linking this library statically or dynamically with other modules is
22
making a combined work based on this library.  Thus, the terms and
23
conditions of the GNU General Public License cover the whole
24
combination.
25
 
26
As a special exception, the copyright holders of this library give you
27
permission to link this library with independent modules to produce an
28
executable, regardless of the license terms of these independent
29
modules, and to copy and distribute the resulting executable under
30
terms of your choice, provided that you also meet, for each linked
31
independent module, the terms and conditions of the license of that
32
module.  An independent module is a module which is not derived from
33
or based on this library.  If you modify this library, you may extend
34
this exception to your version of the library, but you are not
35
obligated to do so.  If you do not wish to do so, delete this
36
exception statement from your version. */
37
 
38
 
39
package gnu.javax.swing.text.html.css;
40
 
41
import java.io.BufferedInputStream;
42
import java.io.IOException;
43
import java.io.InputStream;
44
import java.io.InputStreamReader;
45
import java.io.Reader;
46
 
47
/**
48
 * A tokenizer for CSS stylesheets. This is based on the scanner definition
49
 * from:
50
 *
51
 * http://www.w3.org/TR/CSS21/syndata.html#tokenization
52
 *
53
 * @author Roman Kennke (kennke@aicas.com)
54
 */
55
// TODO: Maybe implement more restrictive scanner:
56
// http://www.w3.org/TR/CSS21/grammar.html#q2
57
class CSSScanner
58
{
59
 
60
  // The tokens. This list is taken from:
61
  // http://www.w3.org/TR/CSS21/syndata.html#tokenization
62
  static final int IDENT = 1;
63
  static final int ATKEYWORD = 2;
64
  static final int STRING = 3;
65
  static final int INVALID = 4;
66
  static final int HASH = 5;
67
  static final int NUMBER = 6;
68
  static final int PERCENTAGE = 7;
69
  static final int DIMENSION = 8;
70
  static final int URI = 9;
71
  static final int UNICODE_RANGE = 10;
72
  static final int CDO = 11;
73
  static final int CDC = 12;
74
  static final int SEMICOLON = 13;
75
  static final int CURLY_LEFT = 14;
76
  static final int CURLY_RIGHT = 15;
77
  static final int PAREN_LEFT = 16;
78
  static final int PAREN_RIGHT = 17;
79
  static final int BRACE_LEFT = 16;
80
  static final int BRACE_RIGHT = 17;
81
  static final int S = 18;
82
  static final int COMMENT = 19;
83
  static final int FUNCTION = 20;
84
  static final int INCLUDES = 21;
85
  static final int DASHMATCH = 22;
86
  static final int DELIM = 23;
87
 
88
  // Additional tokens defined for convenience.
89
  static final int EOF = -1;
90
 
91
  /**
92
   * The input source.
93
   */
94
  private Reader in;
95
 
96
  /**
97
   * The parse buffer.
98
   */
99
  char[] parseBuffer;
100
 
101
  /**
102
   * The end index in the parseBuffer of the current token.
103
   */
104
  int tokenEnd;
105
 
106
  /**
107
   * The lookahead 'buffer'.
108
   */
109
  private int[] lookahead;
110
 
111
  CSSScanner(Reader r)
112
  {
113
    lookahead = new int[2];
114
    lookahead[0] = -1;
115
    lookahead[1] = -1;
116
    parseBuffer = new char[2048];
117
    in = r;
118
  }
119
 
120
  /**
121
   * Fetches the next token. The actual character data is in the parseBuffer
122
   * afterwards with the tokenStart at index 0 and the tokenEnd field
123
   * pointing to the end of the token.
124
   *
125
   * @return the next token
126
   */
127
  int nextToken()
128
    throws IOException
129
  {
130
    tokenEnd = 0;
131
    int token = -1;
132
    int next = read();
133
    if (next != -1)
134
      {
135
        switch (next)
136
        {
137
          case ';':
138
            parseBuffer[0] = (char) next;
139
            tokenEnd = 1;
140
            token = SEMICOLON;
141
            break;
142
          case '{':
143
            parseBuffer[0] = (char) next;
144
            tokenEnd = 1;
145
            token = CURLY_LEFT;
146
            break;
147
          case '}':
148
            parseBuffer[0] = (char) next;
149
            tokenEnd = 1;
150
            token = CURLY_RIGHT;
151
            break;
152
          case '(':
153
            parseBuffer[0] = (char) next;
154
            tokenEnd = 1;
155
            token = PAREN_LEFT;
156
            break;
157
          case ')':
158
            parseBuffer[0] = (char) next;
159
            tokenEnd = 1;
160
            token = PAREN_RIGHT;
161
            break;
162
          case '[':
163
            parseBuffer[0] = (char) next;
164
            tokenEnd = 1;
165
            token = BRACE_LEFT;
166
            break;
167
          case ']':
168
            parseBuffer[0] = (char) next;
169
            tokenEnd = 1;
170
            token = BRACE_RIGHT;
171
            break;
172
          case '@':
173
            parseBuffer[0] = (char) next;
174
            tokenEnd = 1;
175
            readIdent();
176
            token = ATKEYWORD;
177
            break;
178
          case '#':
179
            parseBuffer[0] = (char) next;
180
            tokenEnd = 1;
181
            readName();
182
            token = HASH;
183
            break;
184
          case '\'':
185
          case '"':
186
            lookahead[0] = next;
187
            readString();
188
            token = STRING;
189
            break;
190
          case ' ':
191
          case '\t':
192
          case '\r':
193
          case '\n':
194
          case '\f':
195
            lookahead[0] = next;
196
            readWhitespace();
197
            token = S;
198
            break;
199
            // FIXME: Detecting an URI involves several characters lookahead.
200
//          case 'u':
201
//            lookahead[0] = ch;
202
//            readURI();
203
//            token = URI;
204
//            break;
205
          case '<':
206
            parseBuffer[0] = (char) next;
207
            parseBuffer[1] = (char) read();
208
            parseBuffer[2] = (char) read();
209
            parseBuffer[3] = (char) read();
210
            if (parseBuffer[1] == '!' && parseBuffer[2] == '-'
211
              && parseBuffer[3] == '-')
212
              {
213
                token = CDO;
214
                tokenEnd = 4;
215
              }
216
            else
217
              throw new CSSLexicalException("expected CDO token");
218
            break;
219
          case '/':
220
            lookahead[0] = next;
221
            readComment();
222
            token = COMMENT;
223
            break;
224
          case '~':
225
            parseBuffer[0] = (char) next;
226
            parseBuffer[1] = (char) read();
227
            if (parseBuffer[1] == '=')
228
              token = INCLUDES;
229
            else
230
              throw new CSSLexicalException("expected INCLUDES token");
231
            break;
232
          case '|':
233
            parseBuffer[0] = (char) next;
234
            parseBuffer[1] = (char) read();
235
            if (parseBuffer[1] == '=')
236
              token = DASHMATCH;
237
            else
238
              throw new CSSLexicalException("expected DASHMATCH token");
239
            break;
240
          case '-':
241
            int ch2 = read();
242
            if (ch2 == '-')
243
              {
244
                int ch3 = read();
245
                if (ch3 == '>')
246
                  {
247
                    parseBuffer[0] = (char) next;
248
                    parseBuffer[1] = (char) ch2;
249
                    parseBuffer[2] = (char) ch3;
250
                    tokenEnd = 3;
251
                    token = CDC;
252
                  }
253
                else
254
                  throw new CSSLexicalException("expected CDC token");
255
              }
256
            else
257
              {
258
                lookahead[0] = next;
259
                lookahead[1] = ch2;
260
                readIdent();
261
                int ch3 = read();
262
                if (ch3 == -1 || ch3 != '(')
263
                  {
264
                    lookahead[0] = ch3;
265
                    token = IDENT;
266
                  }
267
                else
268
                  {
269
                    parseBuffer[tokenEnd] = (char) ch3;
270
                    tokenEnd++;
271
                    token = FUNCTION;
272
                  }
273
              }
274
            break;
275
          case '0':
276
          case '1':
277
          case '2':
278
          case '3':
279
          case '4':
280
          case '5':
281
          case '6':
282
          case '7':
283
          case '8':
284
          case '9':
285
            lookahead[0] = next;
286
            readNum();
287
            int ch3 = read();
288
            if (ch3 == '%')
289
              {
290
                parseBuffer[tokenEnd] = (char) ch3;
291
                tokenEnd++;
292
                token = PERCENTAGE;
293
              }
294
            else if (ch3 == -1 || (! (ch3 == '_'
295
                                      || (ch3 >= 'a' && ch3 <= 'z')
296
                                      || (ch3 >= 'A' && ch3 <= 'Z')
297
                                      || ch3 == '\\' || ch3 > 177)))
298
              {
299
                lookahead[0] = ch3;
300
                token = NUMBER;
301
              }
302
            else
303
              {
304
                lookahead[0] = ch3;
305
                readIdent();
306
                token = DIMENSION;
307
              }
308
            break;
309
          default:
310
            // Handle IDENT that don't begin with '-'.
311
            if (next == '_' || (next >= 'a' && next <= 'z')
312
                || (next >= 'A' && next <= 'Z') || next == '\\' || next > 177)
313
              {
314
                lookahead[0] = next;
315
                readIdent();
316
                int ch4 = read();
317
                if (ch4 == -1 || ch4 != '(')
318
                  {
319
                    lookahead[0] = ch4;
320
                    token = IDENT;
321
                  }
322
                else
323
                  {
324
                    parseBuffer[tokenEnd] = (char) ch4;
325
                    tokenEnd++;
326
                    token = FUNCTION;
327
                  }
328
              }
329
            else
330
              {
331
                parseBuffer[0] = (char) next;
332
                tokenEnd = 1;
333
                token = DELIM;
334
              }
335
          break;
336
        }
337
      }
338
    return token;
339
  }
340
 
341
  String currentTokenString()
342
  {
343
    return new String(parseBuffer, 0, tokenEnd);
344
  }
345
 
346
  /**
347
   * Reads one character from the input stream or from the lookahead
348
   * buffer, if it contains one character.
349
   *
350
   * @return the next character
351
   *
352
   * @throws IOException if problems occur on the input source
353
   */
354
  private int read()
355
    throws IOException
356
  {
357
    int ret;
358
    if (lookahead[0] != -1)
359
      {
360
        ret = lookahead[0];
361
        lookahead[0] = -1;
362
      }
363
    else if (lookahead[1] != -1)
364
      {
365
        ret = lookahead[1];
366
        lookahead[1] = -1;
367
      }
368
    else
369
      {
370
        ret = in.read();
371
      }
372
    return ret;
373
  }
374
 
375
  /**
376
   * Reads and identifier.
377
   *
378
   * @throws IOException if something goes wrong in the input source or if
379
   *         the lexical analyser fails to read an identifier
380
   */
381
  private void readIdent()
382
    throws IOException
383
  {
384
    int ch1 = read();
385
    // Read possibly leading '-'.
386
    if (ch1 == '-')
387
      {
388
        parseBuffer[tokenEnd] = (char) ch1;
389
        tokenEnd++;
390
        ch1 = read();
391
      }
392
    // What follows must be '_' or a-z or A-Z or nonascii (>177) or an
393
    // escape.
394
    if (ch1 == '_' || (ch1 >= 'a' && ch1 <= 'z')
395
        || (ch1 >= 'A' && ch1 <= 'Z') || ch1 > 177)
396
      {
397
        parseBuffer[tokenEnd] = (char) ch1;
398
        tokenEnd++;
399
      }
400
    else if (ch1 == '\\')
401
      {
402
        // Try to read an escape.
403
        lookahead[0] = ch1;
404
        readEscape();
405
      }
406
    else
407
      throw new CSSLexicalException("First character of identifier incorrect");
408
 
409
    // Read any number of [_a-zA-Z0-9-] chars.
410
    int ch = read();
411
    while (ch != -1 && (ch == '_' || ch == '-' || (ch >= 'a' && ch <= 'z')
412
           || (ch >= 'A' && ch <= 'Z') || (ch >= '0' && ch <= '9')))
413
      {
414
        parseBuffer[tokenEnd] = (char) ch;
415
        tokenEnd++;
416
        ch = read();
417
      }
418
 
419
    // Push back last read character since it doesn't belong to the IDENT.
420
    lookahead[0] = ch;
421
  }
422
 
423
  /**
424
   * Reads an escape.
425
   *
426
   * @throws IOException if something goes wrong in the input source or if
427
   *         the lexical analyser fails to read an escape
428
   */
429
  private void readEscape()
430
    throws IOException
431
  {
432
    int ch = read();
433
    if (ch != -1 && ch == '\\')
434
      {
435
        parseBuffer[tokenEnd] = (char) ch;
436
        tokenEnd++;
437
        ch = read();
438
        if ((ch >= '0' && ch <= '9') || (ch >= 'a' && ch <= 'f'))
439
          {
440
            // Read unicode escape.
441
            // Zero to five 0-9a-f chars can follow.
442
            int hexcount = 0;
443
            ch = read();
444
            while (((ch >= '0' && ch <= '9') || (ch >= 'a' && ch <= 'f'))
445
                   && hexcount < 5)
446
              {
447
                parseBuffer[tokenEnd] = (char) ch;
448
                tokenEnd++;
449
                hexcount++;
450
                ch = read();
451
              }
452
            // Now we can have a \r\n or any whitespace character following.
453
            if (ch == '\r')
454
              {
455
                parseBuffer[tokenEnd] = (char) ch;
456
                tokenEnd++;
457
                ch = read();
458
                if (ch == '\n')
459
                  {
460
                    parseBuffer[tokenEnd] = (char) ch;
461
                    tokenEnd++;
462
                  }
463
                else
464
                  {
465
                    lookahead[0] = ch;
466
                  }
467
              }
468
            else if (ch == ' ' || ch == '\n' || ch == '\f' || ch == '\t')
469
              {
470
                parseBuffer[tokenEnd] = (char) ch;
471
                tokenEnd++;
472
              }
473
            else
474
              {
475
                lookahead[0] = ch;
476
              }
477
          }
478
        else if (ch != '\n' && ch != '\r' && ch != '\f')
479
          {
480
            parseBuffer[tokenEnd] = (char) ch;
481
            tokenEnd++;
482
          }
483
        else
484
          throw new CSSLexicalException("Can't read escape");
485
      }
486
    else
487
      throw new CSSLexicalException("Escape must start with '\\'");
488
 
489
  }
490
 
491
  private void readName()
492
    throws IOException
493
  {
494
    // Read first name character.
495
    int ch = read();
496
    if (ch != -1 && (ch == '_' || ch == '-' || (ch >= 'a' && ch <= 'z')
497
           || (ch >= 'A' && ch <= 'Z') || (ch >= '0' && ch <= '9')))
498
      {
499
        parseBuffer[tokenEnd] = (char) ch;
500
        tokenEnd++;
501
      }
502
    else
503
      throw new CSSLexicalException("Invalid name");
504
 
505
    // Read any number (at least one) of [_a-zA-Z0-9-] chars.
506
    ch = read();
507
    while (ch != -1 && (ch == '_' || ch == '-' || (ch >= 'a' && ch <= 'z')
508
           || (ch >= 'A' && ch <= 'Z') || (ch >= '0' && ch <= '9')))
509
      {
510
        parseBuffer[tokenEnd] = (char) ch;
511
        tokenEnd++;
512
        ch = read();
513
      }
514
 
515
    // Push back last read character since it doesn't belong to the IDENT.
516
    lookahead[0] = ch;
517
  }
518
 
519
  /**
520
   * Reads in a string.
521
   *
522
   * @throws IOException
523
   */
524
  private void readString()
525
    throws IOException
526
  {
527
    int ch1 = read();
528
    if (ch1 != -1 && (ch1 == '\'' || ch1 == '\"'))
529
      {
530
        parseBuffer[tokenEnd] = (char) ch1;
531
        tokenEnd++;
532
 
533
        // Read any number of chars until we hit another chc1 char.
534
        // Reject newlines, except if prefixed with \.
535
        int ch = read();
536
        while (ch != -1 && ch != ch1)
537
          {
538
            // Every non-newline and non-\ char should be ok.
539
            if (ch != '\n' && ch != '\r' && ch != '\f' && ch != '\\')
540
              {
541
                parseBuffer[tokenEnd] = (char) ch;
542
                tokenEnd++;
543
              }
544
            // Ok when followed by newline or as part of escape.
545
            else if (ch == '\\')
546
              {
547
                int ch2 = read();
548
                if (ch2 == '\n' || ch2 == '\r')
549
                  {
550
                    parseBuffer[tokenEnd] = (char) ch;
551
                    parseBuffer[tokenEnd + 1] = (char) ch2;
552
                    tokenEnd += 2;
553
                  }
554
                else
555
                  {
556
                    // Try to parse an escape.
557
                    lookahead[0] = ch;
558
                    lookahead[1] = ch2;
559
                    readEscape();
560
                  }
561
              }
562
            else
563
              throw new CSSLexicalException("Invalid string");
564
 
565
            ch = read();
566
          }
567
        if (ch != -1)
568
          {
569
            // Push the final char on the buffer.
570
            parseBuffer[tokenEnd] = (char) ch;
571
            tokenEnd++;
572
          }
573
        else
574
          throw new CSSLexicalException("Unterminated string");
575
      }
576
    else
577
      throw new CSSLexicalException("Invalid string");
578
  }
579
 
580
  /**
581
   * Reads a chunk of whitespace.
582
   *
583
   * @throws IOException
584
   */
585
  private void readWhitespace()
586
    throws IOException
587
  {
588
    int ch = read();
589
    while (ch != -1 && (ch == ' ' || ch == '\t' || ch == '\r' || ch == '\n'
590
           || ch == '\f'))
591
      {
592
        parseBuffer[tokenEnd] = (char) ch;
593
        tokenEnd++;
594
        ch = read();
595
      }
596
    // Push back last character read.
597
    lookahead[0] = ch;
598
 
599
  }
600
 
601
  private void readURI()
602
    throws IOException
603
  {
604
    // FIXME: Implement.
605
  }
606
 
607
  /**
608
   * Reads a comment block.
609
   *
610
   * @throws IOException
611
   */
612
  private void readComment()
613
    throws IOException
614
  {
615
    // First we need a / and a *
616
    int ch = read();
617
    if (ch != -1 && ch == '/')
618
      {
619
        parseBuffer[tokenEnd] = (char) ch;
620
        tokenEnd++;
621
        ch = read();
622
        if (ch != -1 && ch == '*')
623
          {
624
            parseBuffer[tokenEnd] = (char) ch;
625
            tokenEnd++;
626
            ch = read();
627
            parseBuffer[tokenEnd] = (char) ch;
628
            tokenEnd++;
629
            boolean finished = false;
630
            int lastChar = ch;
631
            ch = read();
632
            while (! finished && ch != -1)
633
              {
634
                if (lastChar == '*' && ch == '/')
635
                  finished = true;
636
                parseBuffer[tokenEnd] = (char) ch;
637
                tokenEnd++;
638
                lastChar = ch;
639
                ch = read();
640
              }
641
          }
642
      }
643
    if (ch == -1)
644
      throw new CSSLexicalException("Unterminated comment");
645
 
646
    // Push back last character read.
647
    lookahead[0] = ch;
648
  }
649
 
650
  /**
651
   * Reads a number.
652
   *
653
   * @throws IOException
654
   */
655
  private void readNum()
656
    throws IOException
657
  {
658
    boolean hadDot = false;
659
    // First char must be number or .
660
    int ch = read();
661
    if (ch != -1 && ((ch >= '0' && ch <= '9') || ch == '.'))
662
      {
663
        if (ch == '.')
664
          hadDot = true;
665
        parseBuffer[tokenEnd] = (char) ch;
666
        tokenEnd++;
667
        // Now read in any number of digits afterwards, and maybe one dot,
668
        // if we hadn't one already.
669
        ch = read();
670
        while (ch != -1 && ((ch >= '0' && ch <= '9')
671
                            || (ch == '.' && ! hadDot)))
672
          {
673
            if (ch == '.')
674
              hadDot = true;
675
            parseBuffer[tokenEnd] = (char) ch;
676
            tokenEnd++;
677
            ch = read();
678
          }
679
      }
680
    else
681
      throw new CSSLexicalException("Invalid number");
682
 
683
    // Check if we haven't accidentally finished with a dot.
684
    if (parseBuffer[tokenEnd - 1] == '.')
685
      throw new CSSLexicalException("Invalid number");
686
 
687
    // Push back last character read.
688
    lookahead[0] = ch;
689
  }
690
 
691
  /**
692
   * For testing, we read in the default.css in javax/swing/text/html
693
   *
694
   * @param args
695
   */
696
  public static void main(String[] args)
697
  {
698
    try
699
      {
700
        String name = "/javax/swing/text/html/default.css";
701
        InputStream in = CSSScanner.class.getResourceAsStream(name);
702
        BufferedInputStream bin = new BufferedInputStream(in);
703
        InputStreamReader r = new InputStreamReader(bin);
704
        CSSScanner s = new CSSScanner(r);
705
        int token;
706
        do
707
          {
708
            token = s.nextToken();
709
            System.out.println("token: " + token + ": "
710
                               + s.currentTokenString());
711
          } while (token != -1);
712
      }
713
    catch (IOException ex)
714
      {
715
        ex.printStackTrace();
716
      }
717
  }
718
}

powered by: WebSVN 2.1.0

© copyright 1999-2024 OpenCores.org, equivalent to Oliscience, all rights reserved. OpenCores®, registered trademark.