OpenCores
URL https://opencores.org/ocsvn/openrisc/openrisc/trunk

Subversion Repositories openrisc

[/] [openrisc/] [trunk/] [gnu-dev/] [or1k-gcc/] [libjava/] [classpath/] [gnu/] [java/] [util/] [regex/] [RESyntax.java] - Blame information for rev 791

Go to most recent revision | Details | Compare with Previous | View Log

Line No. Rev Author Line
1 769 jeremybenn
/* gnu/regexp/RESyntax.java
2
   Copyright (C) 2006 Free Software Foundation, Inc.
3
 
4
This file is part of GNU Classpath.
5
 
6
GNU Classpath is free software; you can redistribute it and/or modify
7
it under the terms of the GNU General Public License as published by
8
the Free Software Foundation; either version 2, or (at your option)
9
any later version.
10
 
11
GNU Classpath is distributed in the hope that it will be useful, but
12
WITHOUT ANY WARRANTY; without even the implied warranty of
13
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
14
General Public License for more details.
15
 
16
You should have received a copy of the GNU General Public License
17
along with GNU Classpath; see the file COPYING.  If not, write to the
18
Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
19
02110-1301 USA.
20
 
21
Linking this library statically or dynamically with other modules is
22
making a combined work based on this library.  Thus, the terms and
23
conditions of the GNU General Public License cover the whole
24
combination.
25
 
26
As a special exception, the copyright holders of this library give you
27
permission to link this library with independent modules to produce an
28
executable, regardless of the license terms of these independent
29
modules, and to copy and distribute the resulting executable under
30
terms of your choice, provided that you also meet, for each linked
31
independent module, the terms and conditions of the license of that
32
module.  An independent module is a module which is not derived from
33
or based on this library.  If you modify this library, you may extend
34
this exception to your version of the library, but you are not
35
obligated to do so.  If you do not wish to do so, delete this
36
exception statement from your version. */
37
 
38
 
39
package gnu.java.util.regex;
40
import java.io.Serializable;
41
import java.util.BitSet;
42
 
43
/**
44
 * An RESyntax specifies the way a regular expression will be compiled.
45
 * This class provides a number of predefined useful constants for
46
 * emulating popular regular expression syntaxes.  Additionally the
47
 * user may construct his or her own syntax, using any combination of the
48
 * syntax bit constants.  The syntax is an optional argument to any of the
49
 * matching methods on class RE.
50
 *
51
 * @author <A HREF="mailto:wes@cacas.org">Wes Biggs</A>
52
 */
53
 
54
public final class RESyntax implements Serializable
55
{
56
  static final String DEFAULT_LINE_SEPARATOR =
57
    System.getProperty ("line.separator");
58
 
59
  private BitSet bits;
60
 
61
  // true for the constant defined syntaxes
62
  private boolean isFinal = false;
63
 
64
  private String lineSeparator = DEFAULT_LINE_SEPARATOR;
65
 
66
  // Values for constants are bit indexes
67
 
68
  /**
69
   * Syntax bit. Backslash is an escape character in lists.
70
   */
71
  public static final int RE_BACKSLASH_ESCAPE_IN_LISTS = 0;
72
 
73
  /**
74
   * Syntax bit. Use \? instead of ? and \+ instead of +.
75
   */
76
  public static final int RE_BK_PLUS_QM = 1;
77
 
78
  /**
79
   * Syntax bit. POSIX character classes ([:...:]) in lists are allowed.
80
   */
81
  public static final int RE_CHAR_CLASSES = 2;
82
 
83
  /**
84
   * Syntax bit. ^ and $ are special everywhere.
85
   * <B>Not implemented.</B>
86
   */
87
  public static final int RE_CONTEXT_INDEP_ANCHORS = 3;
88
 
89
  /**
90
   * Syntax bit. Repetition operators are only special in valid positions.
91
   * <B>Not implemented.</B>
92
   */
93
  public static final int RE_CONTEXT_INDEP_OPS = 4;
94
 
95
  /**
96
   * Syntax bit. Repetition and alternation operators are invalid
97
   * at start and end of pattern and other places.
98
   * <B>Not implemented</B>.
99
   */
100
  public static final int RE_CONTEXT_INVALID_OPS = 5;
101
 
102
  /**
103
   * Syntax bit. Match-any-character operator (.) matches a newline.
104
   */
105
  public static final int RE_DOT_NEWLINE = 6;
106
 
107
  /**
108
   * Syntax bit. Match-any-character operator (.) does not match a null.
109
   */
110
  public static final int RE_DOT_NOT_NULL = 7;
111
 
112
  /**
113
   * Syntax bit. Intervals ({x}, {x,}, {x,y}) are allowed.
114
   */
115
  public static final int RE_INTERVALS = 8;
116
 
117
  /**
118
   * Syntax bit. No alternation (|), match one-or-more (+), or
119
   * match zero-or-one (?) operators.
120
   */
121
  public static final int RE_LIMITED_OPS = 9;
122
 
123
  /**
124
   * Syntax bit. Newline is an alternation operator.
125
   */
126
  public static final int RE_NEWLINE_ALT = 10;  // impl.
127
 
128
  /**
129
   * Syntax bit. Intervals use { } instead of \{ \}
130
   */
131
  public static final int RE_NO_BK_BRACES = 11;
132
 
133
  /**
134
   * Syntax bit. Grouping uses ( ) instead of \( \).
135
   */
136
  public static final int RE_NO_BK_PARENS = 12;
137
 
138
  /**
139
   * Syntax bit. Backreferences not allowed.
140
   */
141
  public static final int RE_NO_BK_REFS = 13;
142
 
143
  /**
144
   * Syntax bit. Alternation uses | instead of \|
145
   */
146
  public static final int RE_NO_BK_VBAR = 14;
147
 
148
  /**
149
   * Syntax bit. <B>Not implemented</B>.
150
   */
151
  public static final int RE_NO_EMPTY_RANGES = 15;
152
 
153
  /**
154
   * Syntax bit. An unmatched right parenthesis (')' or '\)', depending
155
   * on RE_NO_BK_PARENS) will throw an exception when compiling.
156
   */
157
  public static final int RE_UNMATCHED_RIGHT_PAREN_ORD = 16;
158
 
159
  /**
160
   * Syntax bit. <B>Not implemented.</B>
161
   */
162
  public static final int RE_HAT_LISTS_NOT_NEWLINE = 17;
163
 
164
  /**
165
   * Syntax bit.  Stingy matching is allowed (+?, *?, ??, {x,y}?).
166
   */
167
  public static final int RE_STINGY_OPS = 18;
168
 
169
  /**
170
   * Syntax bit. Allow character class escapes (\d, \D, \s, \S, \w, \W).
171
   */
172
  public static final int RE_CHAR_CLASS_ESCAPES = 19;
173
 
174
  /**
175
   * Syntax bit. Allow use of (?:xxx) grouping (subexpression is not saved).
176
   */
177
  public static final int RE_PURE_GROUPING = 20;
178
 
179
  /**
180
   * Syntax bit. Allow use of (?=xxx) and (?!xxx) apply the subexpression
181
   * to the text following the current position without consuming that text.
182
   */
183
  public static final int RE_LOOKAHEAD = 21;
184
 
185
  /**
186
   * Syntax bit. Allow beginning- and end-of-string anchors (\A, \Z).
187
   */
188
  public static final int RE_STRING_ANCHORS = 22;
189
 
190
  /**
191
   * Syntax bit. Allow embedded comments, (?#comment), as in Perl5.
192
   */
193
  public static final int RE_COMMENTS = 23;
194
 
195
  /**
196
   * Syntax bit. Allow character class escapes within lists, as in Perl5.
197
   */
198
  public static final int RE_CHAR_CLASS_ESC_IN_LISTS = 24;
199
 
200
  /**
201
   * Syntax bit.  Possessive matching is allowed (++, *+, ?+, {x,y}+).
202
   */
203
  public static final int RE_POSSESSIVE_OPS = 25;
204
 
205
  /**
206
   * Syntax bit.  Allow embedded flags, (?is-x), as in Perl5.
207
   */
208
  public static final int RE_EMBEDDED_FLAGS = 26;
209
 
210
  /**
211
   * Syntax bit.  Allow octal char (\0377), as in Perl5.
212
   */
213
  public static final int RE_OCTAL_CHAR = 27;
214
 
215
  /**
216
   * Syntax bit.  Allow hex char (\x1b), as in Perl5.
217
   */
218
  public static final int RE_HEX_CHAR = 28;
219
 
220
  /**
221
   * Syntax bit.  Allow Unicode char (\u1234), as in Java 1.4.
222
   */
223
  public static final int RE_UNICODE_CHAR = 29;
224
 
225
  /**
226
   * Syntax bit.  Allow named property (\p{P}, \P{p}), as in Perl5.
227
   */
228
  public static final int RE_NAMED_PROPERTY = 30;
229
 
230
  /**
231
   * Syntax bit.  Allow nested characterclass ([a-z&&[^p-r]]), as in Java 1.4.
232
   */
233
  public static final int RE_NESTED_CHARCLASS = 31;
234
 
235
  private static final int BIT_TOTAL = 32;
236
 
237
  /**
238
   * Predefined syntax.
239
   * Emulates regular expression support in the awk utility.
240
   */
241
  public static final RESyntax RE_SYNTAX_AWK;
242
 
243
  /**
244
   * Predefined syntax.
245
   * Emulates regular expression support in the ed utility.
246
   */
247
  public static final RESyntax RE_SYNTAX_ED;
248
 
249
  /**
250
   * Predefined syntax.
251
   * Emulates regular expression support in the egrep utility.
252
   */
253
  public static final RESyntax RE_SYNTAX_EGREP;
254
 
255
  /**
256
   * Predefined syntax.
257
   * Emulates regular expression support in the GNU Emacs editor.
258
   */
259
  public static final RESyntax RE_SYNTAX_EMACS;
260
 
261
  /**
262
   * Predefined syntax.
263
   * Emulates regular expression support in the grep utility.
264
   */
265
  public static final RESyntax RE_SYNTAX_GREP;
266
 
267
  /**
268
   * Predefined syntax.
269
   * Emulates regular expression support in the POSIX awk specification.
270
   */
271
  public static final RESyntax RE_SYNTAX_POSIX_AWK;
272
 
273
  /**
274
   * Predefined syntax.
275
   * Emulates POSIX basic regular expression support.
276
   */
277
  public static final RESyntax RE_SYNTAX_POSIX_BASIC;
278
 
279
  /**
280
   * Predefined syntax.
281
   * Emulates regular expression support in the POSIX egrep specification.
282
   */
283
  public static final RESyntax RE_SYNTAX_POSIX_EGREP;
284
 
285
  /**
286
   * Predefined syntax.
287
   * Emulates POSIX extended regular expression support.
288
   */
289
  public static final RESyntax RE_SYNTAX_POSIX_EXTENDED;
290
 
291
  /**
292
   * Predefined syntax.
293
   * Emulates POSIX basic minimal regular expressions.
294
   */
295
  public static final RESyntax RE_SYNTAX_POSIX_MINIMAL_BASIC;
296
 
297
  /**
298
   * Predefined syntax.
299
   * Emulates POSIX extended minimal regular expressions.
300
   */
301
  public static final RESyntax RE_SYNTAX_POSIX_MINIMAL_EXTENDED;
302
 
303
  /**
304
   * Predefined syntax.
305
   * Emulates regular expression support in the sed utility.
306
   */
307
  public static final RESyntax RE_SYNTAX_SED;
308
 
309
  /**
310
   * Predefined syntax.
311
   * Emulates regular expression support in Larry Wall's perl, version 4,
312
   */
313
  public static final RESyntax RE_SYNTAX_PERL4;
314
 
315
  /**
316
   * Predefined syntax.
317
   * Emulates regular expression support in Larry Wall's perl, version 4,
318
   * using single line mode (/s modifier).
319
   */
320
  public static final RESyntax RE_SYNTAX_PERL4_S;       // single line mode (/s)
321
 
322
  /**
323
   * Predefined syntax.
324
   * Emulates regular expression support in Larry Wall's perl, version 5.
325
   */
326
  public static final RESyntax RE_SYNTAX_PERL5;
327
 
328
  /**
329
   * Predefined syntax.
330
   * Emulates regular expression support in Larry Wall's perl, version 5,
331
   * using single line mode (/s modifier).
332
   */
333
  public static final RESyntax RE_SYNTAX_PERL5_S;
334
 
335
    /**
336
     * Predefined syntax.
337
     * Emulates regular expression support in Java 1.4's java.util.regex
338
     * package.
339
     */
340
  public static final RESyntax RE_SYNTAX_JAVA_1_4;
341
 
342
  static
343
  {
344
    // Define syntaxes
345
 
346
    RE_SYNTAX_EMACS = new RESyntax ().makeFinal ();
347
 
348
    RESyntax RE_SYNTAX_POSIX_COMMON =
349
      new RESyntax ().set (RE_CHAR_CLASSES).set (RE_DOT_NEWLINE).
350
      set (RE_DOT_NOT_NULL).set (RE_INTERVALS).set (RE_NO_EMPTY_RANGES).
351
      makeFinal ();
352
 
353
      RE_SYNTAX_POSIX_BASIC =
354
      new RESyntax (RE_SYNTAX_POSIX_COMMON).set (RE_BK_PLUS_QM).makeFinal ();
355
 
356
      RE_SYNTAX_POSIX_EXTENDED =
357
      new RESyntax (RE_SYNTAX_POSIX_COMMON).set (RE_CONTEXT_INDEP_ANCHORS).
358
      set (RE_CONTEXT_INDEP_OPS).set (RE_NO_BK_BRACES).set (RE_NO_BK_PARENS).
359
      set (RE_NO_BK_VBAR).set (RE_UNMATCHED_RIGHT_PAREN_ORD).makeFinal ();
360
 
361
      RE_SYNTAX_AWK =
362
      new RESyntax ().set (RE_BACKSLASH_ESCAPE_IN_LISTS).
363
      set (RE_DOT_NOT_NULL).set (RE_NO_BK_PARENS).set (RE_NO_BK_REFS).
364
      set (RE_NO_BK_VBAR).set (RE_NO_EMPTY_RANGES).
365
      set (RE_UNMATCHED_RIGHT_PAREN_ORD).makeFinal ();
366
 
367
      RE_SYNTAX_POSIX_AWK =
368
      new RESyntax (RE_SYNTAX_POSIX_EXTENDED).
369
      set (RE_BACKSLASH_ESCAPE_IN_LISTS).makeFinal ();
370
 
371
      RE_SYNTAX_GREP =
372
      new RESyntax ().set (RE_BK_PLUS_QM).set (RE_CHAR_CLASSES).
373
      set (RE_HAT_LISTS_NOT_NEWLINE).set (RE_INTERVALS).set (RE_NEWLINE_ALT).
374
      makeFinal ();
375
 
376
      RE_SYNTAX_EGREP =
377
      new RESyntax ().set (RE_CHAR_CLASSES).set (RE_CONTEXT_INDEP_ANCHORS).
378
      set (RE_CONTEXT_INDEP_OPS).set (RE_HAT_LISTS_NOT_NEWLINE).
379
      set (RE_NEWLINE_ALT).set (RE_NO_BK_PARENS).set (RE_NO_BK_VBAR).
380
      makeFinal ();
381
 
382
      RE_SYNTAX_POSIX_EGREP =
383
      new RESyntax (RE_SYNTAX_EGREP).set (RE_INTERVALS).set (RE_NO_BK_BRACES).
384
      makeFinal ();
385
 
386
    /* P1003.2/D11.2, section 4.20.7.1, lines 5078ff.  */
387
 
388
      RE_SYNTAX_ED = new RESyntax (RE_SYNTAX_POSIX_BASIC).makeFinal ();
389
 
390
      RE_SYNTAX_SED = new RESyntax (RE_SYNTAX_POSIX_BASIC).makeFinal ();
391
 
392
      RE_SYNTAX_POSIX_MINIMAL_BASIC =
393
      new RESyntax (RE_SYNTAX_POSIX_COMMON).set (RE_LIMITED_OPS).makeFinal ();
394
 
395
    /* Differs from RE_SYNTAX_POSIX_EXTENDED in that RE_CONTEXT_INVALID_OPS
396
       replaces RE_CONTEXT_INDEP_OPS and RE_NO_BK_REFS is added. */
397
 
398
      RE_SYNTAX_POSIX_MINIMAL_EXTENDED =
399
      new RESyntax (RE_SYNTAX_POSIX_COMMON).set (RE_CONTEXT_INDEP_ANCHORS).
400
      set (RE_CONTEXT_INVALID_OPS).set (RE_NO_BK_BRACES).
401
      set (RE_NO_BK_PARENS).set (RE_NO_BK_REFS).set (RE_NO_BK_VBAR).
402
      set (RE_UNMATCHED_RIGHT_PAREN_ORD).makeFinal ();
403
 
404
    /* There is no official Perl spec, but here's a "best guess" */
405
 
406
      RE_SYNTAX_PERL4 = new RESyntax ().set (RE_BACKSLASH_ESCAPE_IN_LISTS).set (RE_CONTEXT_INDEP_ANCHORS).set (RE_CONTEXT_INDEP_OPS)    // except for '{', apparently
407
      .set (RE_INTERVALS).set (RE_NO_BK_BRACES).set (RE_NO_BK_PARENS).set (RE_NO_BK_VBAR).set (RE_NO_EMPTY_RANGES).set (RE_CHAR_CLASS_ESCAPES)  // \d,\D,\w,\W,\s,\S
408
      .makeFinal ();
409
 
410
      RE_SYNTAX_PERL4_S =
411
      new RESyntax (RE_SYNTAX_PERL4).set (RE_DOT_NEWLINE).makeFinal ();
412
 
413
      RE_SYNTAX_PERL5 = new RESyntax (RE_SYNTAX_PERL4).set (RE_PURE_GROUPING)   // (?:)
414
      .set (RE_STINGY_OPS)      // *?,??,+?,{}?
415
      .set (RE_LOOKAHEAD)       // (?=)(?!)
416
      .set (RE_STRING_ANCHORS)  // \A,\Z
417
      .set (RE_CHAR_CLASS_ESC_IN_LISTS) // \d,\D,\w,\W,\s,\S within []
418
      .set (RE_COMMENTS)        // (?#)
419
      .set (RE_EMBEDDED_FLAGS)  // (?imsx-imsx)
420
      .set (RE_OCTAL_CHAR)      // \0377
421
      .set (RE_HEX_CHAR)        // \x1b
422
      .set (RE_NAMED_PROPERTY)  // \p{prop}, \P{prop}
423
      .makeFinal ();
424
 
425
      RE_SYNTAX_PERL5_S =
426
      new RESyntax (RE_SYNTAX_PERL5).set (RE_DOT_NEWLINE).makeFinal ();
427
 
428
      RE_SYNTAX_JAVA_1_4 = new RESyntax (RE_SYNTAX_PERL5)
429
      // XXX
430
      .set (RE_POSSESSIVE_OPS)  // *+,?+,++,{}+
431
      .set (RE_UNICODE_CHAR)    // \u1234
432
      .set (RE_NESTED_CHARCLASS)        // [a-z&&[^p-r]]
433
      .makeFinal ();
434
  }
435
 
436
  /**
437
   * Construct a new syntax object with all bits turned off.
438
   * This is equivalent to RE_SYNTAX_EMACS.
439
   */
440
  public RESyntax ()
441
  {
442
    bits = new BitSet (BIT_TOTAL);
443
  }
444
 
445
    /**
446
     * Called internally when constructing predefined syntaxes
447
     * so their interpretation cannot vary.  Conceivably useful
448
     * for your syntaxes as well.  Causes IllegalAccessError to
449
     * be thrown if any attempt to modify the syntax is made.
450
     *
451
     * @return this object for convenient chaining
452
     */
453
  public RESyntax makeFinal ()
454
  {
455
    isFinal = true;
456
    return this;
457
  }
458
 
459
  /**
460
   * Construct a new syntax object with all bits set the same
461
   * as the other syntax.
462
   */
463
  public RESyntax (RESyntax other)
464
  {
465
    bits = (BitSet) other.bits.clone ();
466
  }
467
 
468
  /**
469
   * Check if a given bit is set in this syntax.
470
   */
471
  public boolean get (int index)
472
  {
473
    return bits.get (index);
474
  }
475
 
476
  /**
477
   * Set a given bit in this syntax.
478
   *
479
   * @param index the constant (RESyntax.RE_xxx) bit to set.
480
   * @return a reference to this object for easy chaining.
481
   */
482
  public RESyntax set (int index)
483
  {
484
    if (isFinal)
485
      throw new IllegalAccessError (RE.getLocalizedMessage ("syntax.final"));
486
    bits.set (index);
487
    return this;
488
  }
489
 
490
  /**
491
   * Clear a given bit in this syntax.
492
   *
493
   * @param index the constant (RESyntax.RE_xxx) bit to clear.
494
   * @return a reference to this object for easy chaining.
495
   */
496
  public RESyntax clear (int index)
497
  {
498
    if (isFinal)
499
      throw new IllegalAccessError (RE.getLocalizedMessage ("syntax.final"));
500
    bits.clear (index);
501
    return this;
502
  }
503
 
504
    /**
505
     * Changes the line separator string for regular expressions
506
     * created using this RESyntax.  The default separator is the
507
     * value returned by the system property "line.separator", which
508
     * should be correct when reading platform-specific files from a
509
     * filesystem.  However, many programs may collect input from
510
     * sources where the line separator is differently specified (for
511
     * example, in the applet environment, the text box widget
512
     * interprets line breaks as single-character newlines,
513
     * regardless of the host platform.
514
     *
515
     * Note that setting the line separator to a character or
516
     * characters that have specific meaning within the current syntax
517
     * can cause unexpected chronosynclastic infundibula.
518
     *
519
     * @return this object for convenient chaining
520
     */
521
  public RESyntax setLineSeparator (String aSeparator)
522
  {
523
    if (isFinal)
524
      throw new IllegalAccessError (RE.getLocalizedMessage ("syntax.final"));
525
    lineSeparator = aSeparator;
526
    return this;
527
  }
528
 
529
    /**
530
     * Returns the currently active line separator string.  The default
531
     * is the platform-dependent system property "line.separator".
532
     */
533
  public String getLineSeparator ()
534
  {
535
    return lineSeparator;
536
  }
537
}

powered by: WebSVN 2.1.0

© copyright 1999-2025 OpenCores.org, equivalent to Oliscience, all rights reserved. OpenCores®, registered trademark.