1 |
769 |
jeremybenn |
/* gnu/regexp/RESyntax.java
|
2 |
|
|
Copyright (C) 2006 Free Software Foundation, Inc.
|
3 |
|
|
|
4 |
|
|
This file is part of GNU Classpath.
|
5 |
|
|
|
6 |
|
|
GNU Classpath is free software; you can redistribute it and/or modify
|
7 |
|
|
it under the terms of the GNU General Public License as published by
|
8 |
|
|
the Free Software Foundation; either version 2, or (at your option)
|
9 |
|
|
any later version.
|
10 |
|
|
|
11 |
|
|
GNU Classpath is distributed in the hope that it will be useful, but
|
12 |
|
|
WITHOUT ANY WARRANTY; without even the implied warranty of
|
13 |
|
|
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
14 |
|
|
General Public License for more details.
|
15 |
|
|
|
16 |
|
|
You should have received a copy of the GNU General Public License
|
17 |
|
|
along with GNU Classpath; see the file COPYING. If not, write to the
|
18 |
|
|
Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
|
19 |
|
|
02110-1301 USA.
|
20 |
|
|
|
21 |
|
|
Linking this library statically or dynamically with other modules is
|
22 |
|
|
making a combined work based on this library. Thus, the terms and
|
23 |
|
|
conditions of the GNU General Public License cover the whole
|
24 |
|
|
combination.
|
25 |
|
|
|
26 |
|
|
As a special exception, the copyright holders of this library give you
|
27 |
|
|
permission to link this library with independent modules to produce an
|
28 |
|
|
executable, regardless of the license terms of these independent
|
29 |
|
|
modules, and to copy and distribute the resulting executable under
|
30 |
|
|
terms of your choice, provided that you also meet, for each linked
|
31 |
|
|
independent module, the terms and conditions of the license of that
|
32 |
|
|
module. An independent module is a module which is not derived from
|
33 |
|
|
or based on this library. If you modify this library, you may extend
|
34 |
|
|
this exception to your version of the library, but you are not
|
35 |
|
|
obligated to do so. If you do not wish to do so, delete this
|
36 |
|
|
exception statement from your version. */
|
37 |
|
|
|
38 |
|
|
|
39 |
|
|
package gnu.java.util.regex;
|
40 |
|
|
import java.io.Serializable;
|
41 |
|
|
import java.util.BitSet;
|
42 |
|
|
|
43 |
|
|
/**
|
44 |
|
|
* An RESyntax specifies the way a regular expression will be compiled.
|
45 |
|
|
* This class provides a number of predefined useful constants for
|
46 |
|
|
* emulating popular regular expression syntaxes. Additionally the
|
47 |
|
|
* user may construct his or her own syntax, using any combination of the
|
48 |
|
|
* syntax bit constants. The syntax is an optional argument to any of the
|
49 |
|
|
* matching methods on class RE.
|
50 |
|
|
*
|
51 |
|
|
* @author <A HREF="mailto:wes@cacas.org">Wes Biggs</A>
|
52 |
|
|
*/
|
53 |
|
|
|
54 |
|
|
public final class RESyntax implements Serializable
|
55 |
|
|
{
|
56 |
|
|
static final String DEFAULT_LINE_SEPARATOR =
|
57 |
|
|
System.getProperty ("line.separator");
|
58 |
|
|
|
59 |
|
|
private BitSet bits;
|
60 |
|
|
|
61 |
|
|
// true for the constant defined syntaxes
|
62 |
|
|
private boolean isFinal = false;
|
63 |
|
|
|
64 |
|
|
private String lineSeparator = DEFAULT_LINE_SEPARATOR;
|
65 |
|
|
|
66 |
|
|
// Values for constants are bit indexes
|
67 |
|
|
|
68 |
|
|
/**
|
69 |
|
|
* Syntax bit. Backslash is an escape character in lists.
|
70 |
|
|
*/
|
71 |
|
|
public static final int RE_BACKSLASH_ESCAPE_IN_LISTS = 0;
|
72 |
|
|
|
73 |
|
|
/**
|
74 |
|
|
* Syntax bit. Use \? instead of ? and \+ instead of +.
|
75 |
|
|
*/
|
76 |
|
|
public static final int RE_BK_PLUS_QM = 1;
|
77 |
|
|
|
78 |
|
|
/**
|
79 |
|
|
* Syntax bit. POSIX character classes ([:...:]) in lists are allowed.
|
80 |
|
|
*/
|
81 |
|
|
public static final int RE_CHAR_CLASSES = 2;
|
82 |
|
|
|
83 |
|
|
/**
|
84 |
|
|
* Syntax bit. ^ and $ are special everywhere.
|
85 |
|
|
* <B>Not implemented.</B>
|
86 |
|
|
*/
|
87 |
|
|
public static final int RE_CONTEXT_INDEP_ANCHORS = 3;
|
88 |
|
|
|
89 |
|
|
/**
|
90 |
|
|
* Syntax bit. Repetition operators are only special in valid positions.
|
91 |
|
|
* <B>Not implemented.</B>
|
92 |
|
|
*/
|
93 |
|
|
public static final int RE_CONTEXT_INDEP_OPS = 4;
|
94 |
|
|
|
95 |
|
|
/**
|
96 |
|
|
* Syntax bit. Repetition and alternation operators are invalid
|
97 |
|
|
* at start and end of pattern and other places.
|
98 |
|
|
* <B>Not implemented</B>.
|
99 |
|
|
*/
|
100 |
|
|
public static final int RE_CONTEXT_INVALID_OPS = 5;
|
101 |
|
|
|
102 |
|
|
/**
|
103 |
|
|
* Syntax bit. Match-any-character operator (.) matches a newline.
|
104 |
|
|
*/
|
105 |
|
|
public static final int RE_DOT_NEWLINE = 6;
|
106 |
|
|
|
107 |
|
|
/**
|
108 |
|
|
* Syntax bit. Match-any-character operator (.) does not match a null.
|
109 |
|
|
*/
|
110 |
|
|
public static final int RE_DOT_NOT_NULL = 7;
|
111 |
|
|
|
112 |
|
|
/**
|
113 |
|
|
* Syntax bit. Intervals ({x}, {x,}, {x,y}) are allowed.
|
114 |
|
|
*/
|
115 |
|
|
public static final int RE_INTERVALS = 8;
|
116 |
|
|
|
117 |
|
|
/**
|
118 |
|
|
* Syntax bit. No alternation (|), match one-or-more (+), or
|
119 |
|
|
* match zero-or-one (?) operators.
|
120 |
|
|
*/
|
121 |
|
|
public static final int RE_LIMITED_OPS = 9;
|
122 |
|
|
|
123 |
|
|
/**
|
124 |
|
|
* Syntax bit. Newline is an alternation operator.
|
125 |
|
|
*/
|
126 |
|
|
public static final int RE_NEWLINE_ALT = 10; // impl.
|
127 |
|
|
|
128 |
|
|
/**
|
129 |
|
|
* Syntax bit. Intervals use { } instead of \{ \}
|
130 |
|
|
*/
|
131 |
|
|
public static final int RE_NO_BK_BRACES = 11;
|
132 |
|
|
|
133 |
|
|
/**
|
134 |
|
|
* Syntax bit. Grouping uses ( ) instead of \( \).
|
135 |
|
|
*/
|
136 |
|
|
public static final int RE_NO_BK_PARENS = 12;
|
137 |
|
|
|
138 |
|
|
/**
|
139 |
|
|
* Syntax bit. Backreferences not allowed.
|
140 |
|
|
*/
|
141 |
|
|
public static final int RE_NO_BK_REFS = 13;
|
142 |
|
|
|
143 |
|
|
/**
|
144 |
|
|
* Syntax bit. Alternation uses | instead of \|
|
145 |
|
|
*/
|
146 |
|
|
public static final int RE_NO_BK_VBAR = 14;
|
147 |
|
|
|
148 |
|
|
/**
|
149 |
|
|
* Syntax bit. <B>Not implemented</B>.
|
150 |
|
|
*/
|
151 |
|
|
public static final int RE_NO_EMPTY_RANGES = 15;
|
152 |
|
|
|
153 |
|
|
/**
|
154 |
|
|
* Syntax bit. An unmatched right parenthesis (')' or '\)', depending
|
155 |
|
|
* on RE_NO_BK_PARENS) will throw an exception when compiling.
|
156 |
|
|
*/
|
157 |
|
|
public static final int RE_UNMATCHED_RIGHT_PAREN_ORD = 16;
|
158 |
|
|
|
159 |
|
|
/**
|
160 |
|
|
* Syntax bit. <B>Not implemented.</B>
|
161 |
|
|
*/
|
162 |
|
|
public static final int RE_HAT_LISTS_NOT_NEWLINE = 17;
|
163 |
|
|
|
164 |
|
|
/**
|
165 |
|
|
* Syntax bit. Stingy matching is allowed (+?, *?, ??, {x,y}?).
|
166 |
|
|
*/
|
167 |
|
|
public static final int RE_STINGY_OPS = 18;
|
168 |
|
|
|
169 |
|
|
/**
|
170 |
|
|
* Syntax bit. Allow character class escapes (\d, \D, \s, \S, \w, \W).
|
171 |
|
|
*/
|
172 |
|
|
public static final int RE_CHAR_CLASS_ESCAPES = 19;
|
173 |
|
|
|
174 |
|
|
/**
|
175 |
|
|
* Syntax bit. Allow use of (?:xxx) grouping (subexpression is not saved).
|
176 |
|
|
*/
|
177 |
|
|
public static final int RE_PURE_GROUPING = 20;
|
178 |
|
|
|
179 |
|
|
/**
|
180 |
|
|
* Syntax bit. Allow use of (?=xxx) and (?!xxx) apply the subexpression
|
181 |
|
|
* to the text following the current position without consuming that text.
|
182 |
|
|
*/
|
183 |
|
|
public static final int RE_LOOKAHEAD = 21;
|
184 |
|
|
|
185 |
|
|
/**
|
186 |
|
|
* Syntax bit. Allow beginning- and end-of-string anchors (\A, \Z).
|
187 |
|
|
*/
|
188 |
|
|
public static final int RE_STRING_ANCHORS = 22;
|
189 |
|
|
|
190 |
|
|
/**
|
191 |
|
|
* Syntax bit. Allow embedded comments, (?#comment), as in Perl5.
|
192 |
|
|
*/
|
193 |
|
|
public static final int RE_COMMENTS = 23;
|
194 |
|
|
|
195 |
|
|
/**
|
196 |
|
|
* Syntax bit. Allow character class escapes within lists, as in Perl5.
|
197 |
|
|
*/
|
198 |
|
|
public static final int RE_CHAR_CLASS_ESC_IN_LISTS = 24;
|
199 |
|
|
|
200 |
|
|
/**
|
201 |
|
|
* Syntax bit. Possessive matching is allowed (++, *+, ?+, {x,y}+).
|
202 |
|
|
*/
|
203 |
|
|
public static final int RE_POSSESSIVE_OPS = 25;
|
204 |
|
|
|
205 |
|
|
/**
|
206 |
|
|
* Syntax bit. Allow embedded flags, (?is-x), as in Perl5.
|
207 |
|
|
*/
|
208 |
|
|
public static final int RE_EMBEDDED_FLAGS = 26;
|
209 |
|
|
|
210 |
|
|
/**
|
211 |
|
|
* Syntax bit. Allow octal char (\0377), as in Perl5.
|
212 |
|
|
*/
|
213 |
|
|
public static final int RE_OCTAL_CHAR = 27;
|
214 |
|
|
|
215 |
|
|
/**
|
216 |
|
|
* Syntax bit. Allow hex char (\x1b), as in Perl5.
|
217 |
|
|
*/
|
218 |
|
|
public static final int RE_HEX_CHAR = 28;
|
219 |
|
|
|
220 |
|
|
/**
|
221 |
|
|
* Syntax bit. Allow Unicode char (\u1234), as in Java 1.4.
|
222 |
|
|
*/
|
223 |
|
|
public static final int RE_UNICODE_CHAR = 29;
|
224 |
|
|
|
225 |
|
|
/**
|
226 |
|
|
* Syntax bit. Allow named property (\p{P}, \P{p}), as in Perl5.
|
227 |
|
|
*/
|
228 |
|
|
public static final int RE_NAMED_PROPERTY = 30;
|
229 |
|
|
|
230 |
|
|
/**
|
231 |
|
|
* Syntax bit. Allow nested characterclass ([a-z&&[^p-r]]), as in Java 1.4.
|
232 |
|
|
*/
|
233 |
|
|
public static final int RE_NESTED_CHARCLASS = 31;
|
234 |
|
|
|
235 |
|
|
private static final int BIT_TOTAL = 32;
|
236 |
|
|
|
237 |
|
|
/**
|
238 |
|
|
* Predefined syntax.
|
239 |
|
|
* Emulates regular expression support in the awk utility.
|
240 |
|
|
*/
|
241 |
|
|
public static final RESyntax RE_SYNTAX_AWK;
|
242 |
|
|
|
243 |
|
|
/**
|
244 |
|
|
* Predefined syntax.
|
245 |
|
|
* Emulates regular expression support in the ed utility.
|
246 |
|
|
*/
|
247 |
|
|
public static final RESyntax RE_SYNTAX_ED;
|
248 |
|
|
|
249 |
|
|
/**
|
250 |
|
|
* Predefined syntax.
|
251 |
|
|
* Emulates regular expression support in the egrep utility.
|
252 |
|
|
*/
|
253 |
|
|
public static final RESyntax RE_SYNTAX_EGREP;
|
254 |
|
|
|
255 |
|
|
/**
|
256 |
|
|
* Predefined syntax.
|
257 |
|
|
* Emulates regular expression support in the GNU Emacs editor.
|
258 |
|
|
*/
|
259 |
|
|
public static final RESyntax RE_SYNTAX_EMACS;
|
260 |
|
|
|
261 |
|
|
/**
|
262 |
|
|
* Predefined syntax.
|
263 |
|
|
* Emulates regular expression support in the grep utility.
|
264 |
|
|
*/
|
265 |
|
|
public static final RESyntax RE_SYNTAX_GREP;
|
266 |
|
|
|
267 |
|
|
/**
|
268 |
|
|
* Predefined syntax.
|
269 |
|
|
* Emulates regular expression support in the POSIX awk specification.
|
270 |
|
|
*/
|
271 |
|
|
public static final RESyntax RE_SYNTAX_POSIX_AWK;
|
272 |
|
|
|
273 |
|
|
/**
|
274 |
|
|
* Predefined syntax.
|
275 |
|
|
* Emulates POSIX basic regular expression support.
|
276 |
|
|
*/
|
277 |
|
|
public static final RESyntax RE_SYNTAX_POSIX_BASIC;
|
278 |
|
|
|
279 |
|
|
/**
|
280 |
|
|
* Predefined syntax.
|
281 |
|
|
* Emulates regular expression support in the POSIX egrep specification.
|
282 |
|
|
*/
|
283 |
|
|
public static final RESyntax RE_SYNTAX_POSIX_EGREP;
|
284 |
|
|
|
285 |
|
|
/**
|
286 |
|
|
* Predefined syntax.
|
287 |
|
|
* Emulates POSIX extended regular expression support.
|
288 |
|
|
*/
|
289 |
|
|
public static final RESyntax RE_SYNTAX_POSIX_EXTENDED;
|
290 |
|
|
|
291 |
|
|
/**
|
292 |
|
|
* Predefined syntax.
|
293 |
|
|
* Emulates POSIX basic minimal regular expressions.
|
294 |
|
|
*/
|
295 |
|
|
public static final RESyntax RE_SYNTAX_POSIX_MINIMAL_BASIC;
|
296 |
|
|
|
297 |
|
|
/**
|
298 |
|
|
* Predefined syntax.
|
299 |
|
|
* Emulates POSIX extended minimal regular expressions.
|
300 |
|
|
*/
|
301 |
|
|
public static final RESyntax RE_SYNTAX_POSIX_MINIMAL_EXTENDED;
|
302 |
|
|
|
303 |
|
|
/**
|
304 |
|
|
* Predefined syntax.
|
305 |
|
|
* Emulates regular expression support in the sed utility.
|
306 |
|
|
*/
|
307 |
|
|
public static final RESyntax RE_SYNTAX_SED;
|
308 |
|
|
|
309 |
|
|
/**
|
310 |
|
|
* Predefined syntax.
|
311 |
|
|
* Emulates regular expression support in Larry Wall's perl, version 4,
|
312 |
|
|
*/
|
313 |
|
|
public static final RESyntax RE_SYNTAX_PERL4;
|
314 |
|
|
|
315 |
|
|
/**
|
316 |
|
|
* Predefined syntax.
|
317 |
|
|
* Emulates regular expression support in Larry Wall's perl, version 4,
|
318 |
|
|
* using single line mode (/s modifier).
|
319 |
|
|
*/
|
320 |
|
|
public static final RESyntax RE_SYNTAX_PERL4_S; // single line mode (/s)
|
321 |
|
|
|
322 |
|
|
/**
|
323 |
|
|
* Predefined syntax.
|
324 |
|
|
* Emulates regular expression support in Larry Wall's perl, version 5.
|
325 |
|
|
*/
|
326 |
|
|
public static final RESyntax RE_SYNTAX_PERL5;
|
327 |
|
|
|
328 |
|
|
/**
|
329 |
|
|
* Predefined syntax.
|
330 |
|
|
* Emulates regular expression support in Larry Wall's perl, version 5,
|
331 |
|
|
* using single line mode (/s modifier).
|
332 |
|
|
*/
|
333 |
|
|
public static final RESyntax RE_SYNTAX_PERL5_S;
|
334 |
|
|
|
335 |
|
|
/**
|
336 |
|
|
* Predefined syntax.
|
337 |
|
|
* Emulates regular expression support in Java 1.4's java.util.regex
|
338 |
|
|
* package.
|
339 |
|
|
*/
|
340 |
|
|
public static final RESyntax RE_SYNTAX_JAVA_1_4;
|
341 |
|
|
|
342 |
|
|
static
|
343 |
|
|
{
|
344 |
|
|
// Define syntaxes
|
345 |
|
|
|
346 |
|
|
RE_SYNTAX_EMACS = new RESyntax ().makeFinal ();
|
347 |
|
|
|
348 |
|
|
RESyntax RE_SYNTAX_POSIX_COMMON =
|
349 |
|
|
new RESyntax ().set (RE_CHAR_CLASSES).set (RE_DOT_NEWLINE).
|
350 |
|
|
set (RE_DOT_NOT_NULL).set (RE_INTERVALS).set (RE_NO_EMPTY_RANGES).
|
351 |
|
|
makeFinal ();
|
352 |
|
|
|
353 |
|
|
RE_SYNTAX_POSIX_BASIC =
|
354 |
|
|
new RESyntax (RE_SYNTAX_POSIX_COMMON).set (RE_BK_PLUS_QM).makeFinal ();
|
355 |
|
|
|
356 |
|
|
RE_SYNTAX_POSIX_EXTENDED =
|
357 |
|
|
new RESyntax (RE_SYNTAX_POSIX_COMMON).set (RE_CONTEXT_INDEP_ANCHORS).
|
358 |
|
|
set (RE_CONTEXT_INDEP_OPS).set (RE_NO_BK_BRACES).set (RE_NO_BK_PARENS).
|
359 |
|
|
set (RE_NO_BK_VBAR).set (RE_UNMATCHED_RIGHT_PAREN_ORD).makeFinal ();
|
360 |
|
|
|
361 |
|
|
RE_SYNTAX_AWK =
|
362 |
|
|
new RESyntax ().set (RE_BACKSLASH_ESCAPE_IN_LISTS).
|
363 |
|
|
set (RE_DOT_NOT_NULL).set (RE_NO_BK_PARENS).set (RE_NO_BK_REFS).
|
364 |
|
|
set (RE_NO_BK_VBAR).set (RE_NO_EMPTY_RANGES).
|
365 |
|
|
set (RE_UNMATCHED_RIGHT_PAREN_ORD).makeFinal ();
|
366 |
|
|
|
367 |
|
|
RE_SYNTAX_POSIX_AWK =
|
368 |
|
|
new RESyntax (RE_SYNTAX_POSIX_EXTENDED).
|
369 |
|
|
set (RE_BACKSLASH_ESCAPE_IN_LISTS).makeFinal ();
|
370 |
|
|
|
371 |
|
|
RE_SYNTAX_GREP =
|
372 |
|
|
new RESyntax ().set (RE_BK_PLUS_QM).set (RE_CHAR_CLASSES).
|
373 |
|
|
set (RE_HAT_LISTS_NOT_NEWLINE).set (RE_INTERVALS).set (RE_NEWLINE_ALT).
|
374 |
|
|
makeFinal ();
|
375 |
|
|
|
376 |
|
|
RE_SYNTAX_EGREP =
|
377 |
|
|
new RESyntax ().set (RE_CHAR_CLASSES).set (RE_CONTEXT_INDEP_ANCHORS).
|
378 |
|
|
set (RE_CONTEXT_INDEP_OPS).set (RE_HAT_LISTS_NOT_NEWLINE).
|
379 |
|
|
set (RE_NEWLINE_ALT).set (RE_NO_BK_PARENS).set (RE_NO_BK_VBAR).
|
380 |
|
|
makeFinal ();
|
381 |
|
|
|
382 |
|
|
RE_SYNTAX_POSIX_EGREP =
|
383 |
|
|
new RESyntax (RE_SYNTAX_EGREP).set (RE_INTERVALS).set (RE_NO_BK_BRACES).
|
384 |
|
|
makeFinal ();
|
385 |
|
|
|
386 |
|
|
/* P1003.2/D11.2, section 4.20.7.1, lines 5078ff. */
|
387 |
|
|
|
388 |
|
|
RE_SYNTAX_ED = new RESyntax (RE_SYNTAX_POSIX_BASIC).makeFinal ();
|
389 |
|
|
|
390 |
|
|
RE_SYNTAX_SED = new RESyntax (RE_SYNTAX_POSIX_BASIC).makeFinal ();
|
391 |
|
|
|
392 |
|
|
RE_SYNTAX_POSIX_MINIMAL_BASIC =
|
393 |
|
|
new RESyntax (RE_SYNTAX_POSIX_COMMON).set (RE_LIMITED_OPS).makeFinal ();
|
394 |
|
|
|
395 |
|
|
/* Differs from RE_SYNTAX_POSIX_EXTENDED in that RE_CONTEXT_INVALID_OPS
|
396 |
|
|
replaces RE_CONTEXT_INDEP_OPS and RE_NO_BK_REFS is added. */
|
397 |
|
|
|
398 |
|
|
RE_SYNTAX_POSIX_MINIMAL_EXTENDED =
|
399 |
|
|
new RESyntax (RE_SYNTAX_POSIX_COMMON).set (RE_CONTEXT_INDEP_ANCHORS).
|
400 |
|
|
set (RE_CONTEXT_INVALID_OPS).set (RE_NO_BK_BRACES).
|
401 |
|
|
set (RE_NO_BK_PARENS).set (RE_NO_BK_REFS).set (RE_NO_BK_VBAR).
|
402 |
|
|
set (RE_UNMATCHED_RIGHT_PAREN_ORD).makeFinal ();
|
403 |
|
|
|
404 |
|
|
/* There is no official Perl spec, but here's a "best guess" */
|
405 |
|
|
|
406 |
|
|
RE_SYNTAX_PERL4 = new RESyntax ().set (RE_BACKSLASH_ESCAPE_IN_LISTS).set (RE_CONTEXT_INDEP_ANCHORS).set (RE_CONTEXT_INDEP_OPS) // except for '{', apparently
|
407 |
|
|
.set (RE_INTERVALS).set (RE_NO_BK_BRACES).set (RE_NO_BK_PARENS).set (RE_NO_BK_VBAR).set (RE_NO_EMPTY_RANGES).set (RE_CHAR_CLASS_ESCAPES) // \d,\D,\w,\W,\s,\S
|
408 |
|
|
.makeFinal ();
|
409 |
|
|
|
410 |
|
|
RE_SYNTAX_PERL4_S =
|
411 |
|
|
new RESyntax (RE_SYNTAX_PERL4).set (RE_DOT_NEWLINE).makeFinal ();
|
412 |
|
|
|
413 |
|
|
RE_SYNTAX_PERL5 = new RESyntax (RE_SYNTAX_PERL4).set (RE_PURE_GROUPING) // (?:)
|
414 |
|
|
.set (RE_STINGY_OPS) // *?,??,+?,{}?
|
415 |
|
|
.set (RE_LOOKAHEAD) // (?=)(?!)
|
416 |
|
|
.set (RE_STRING_ANCHORS) // \A,\Z
|
417 |
|
|
.set (RE_CHAR_CLASS_ESC_IN_LISTS) // \d,\D,\w,\W,\s,\S within []
|
418 |
|
|
.set (RE_COMMENTS) // (?#)
|
419 |
|
|
.set (RE_EMBEDDED_FLAGS) // (?imsx-imsx)
|
420 |
|
|
.set (RE_OCTAL_CHAR) // \0377
|
421 |
|
|
.set (RE_HEX_CHAR) // \x1b
|
422 |
|
|
.set (RE_NAMED_PROPERTY) // \p{prop}, \P{prop}
|
423 |
|
|
.makeFinal ();
|
424 |
|
|
|
425 |
|
|
RE_SYNTAX_PERL5_S =
|
426 |
|
|
new RESyntax (RE_SYNTAX_PERL5).set (RE_DOT_NEWLINE).makeFinal ();
|
427 |
|
|
|
428 |
|
|
RE_SYNTAX_JAVA_1_4 = new RESyntax (RE_SYNTAX_PERL5)
|
429 |
|
|
// XXX
|
430 |
|
|
.set (RE_POSSESSIVE_OPS) // *+,?+,++,{}+
|
431 |
|
|
.set (RE_UNICODE_CHAR) // \u1234
|
432 |
|
|
.set (RE_NESTED_CHARCLASS) // [a-z&&[^p-r]]
|
433 |
|
|
.makeFinal ();
|
434 |
|
|
}
|
435 |
|
|
|
436 |
|
|
/**
|
437 |
|
|
* Construct a new syntax object with all bits turned off.
|
438 |
|
|
* This is equivalent to RE_SYNTAX_EMACS.
|
439 |
|
|
*/
|
440 |
|
|
public RESyntax ()
|
441 |
|
|
{
|
442 |
|
|
bits = new BitSet (BIT_TOTAL);
|
443 |
|
|
}
|
444 |
|
|
|
445 |
|
|
/**
|
446 |
|
|
* Called internally when constructing predefined syntaxes
|
447 |
|
|
* so their interpretation cannot vary. Conceivably useful
|
448 |
|
|
* for your syntaxes as well. Causes IllegalAccessError to
|
449 |
|
|
* be thrown if any attempt to modify the syntax is made.
|
450 |
|
|
*
|
451 |
|
|
* @return this object for convenient chaining
|
452 |
|
|
*/
|
453 |
|
|
public RESyntax makeFinal ()
|
454 |
|
|
{
|
455 |
|
|
isFinal = true;
|
456 |
|
|
return this;
|
457 |
|
|
}
|
458 |
|
|
|
459 |
|
|
/**
|
460 |
|
|
* Construct a new syntax object with all bits set the same
|
461 |
|
|
* as the other syntax.
|
462 |
|
|
*/
|
463 |
|
|
public RESyntax (RESyntax other)
|
464 |
|
|
{
|
465 |
|
|
bits = (BitSet) other.bits.clone ();
|
466 |
|
|
}
|
467 |
|
|
|
468 |
|
|
/**
|
469 |
|
|
* Check if a given bit is set in this syntax.
|
470 |
|
|
*/
|
471 |
|
|
public boolean get (int index)
|
472 |
|
|
{
|
473 |
|
|
return bits.get (index);
|
474 |
|
|
}
|
475 |
|
|
|
476 |
|
|
/**
|
477 |
|
|
* Set a given bit in this syntax.
|
478 |
|
|
*
|
479 |
|
|
* @param index the constant (RESyntax.RE_xxx) bit to set.
|
480 |
|
|
* @return a reference to this object for easy chaining.
|
481 |
|
|
*/
|
482 |
|
|
public RESyntax set (int index)
|
483 |
|
|
{
|
484 |
|
|
if (isFinal)
|
485 |
|
|
throw new IllegalAccessError (RE.getLocalizedMessage ("syntax.final"));
|
486 |
|
|
bits.set (index);
|
487 |
|
|
return this;
|
488 |
|
|
}
|
489 |
|
|
|
490 |
|
|
/**
|
491 |
|
|
* Clear a given bit in this syntax.
|
492 |
|
|
*
|
493 |
|
|
* @param index the constant (RESyntax.RE_xxx) bit to clear.
|
494 |
|
|
* @return a reference to this object for easy chaining.
|
495 |
|
|
*/
|
496 |
|
|
public RESyntax clear (int index)
|
497 |
|
|
{
|
498 |
|
|
if (isFinal)
|
499 |
|
|
throw new IllegalAccessError (RE.getLocalizedMessage ("syntax.final"));
|
500 |
|
|
bits.clear (index);
|
501 |
|
|
return this;
|
502 |
|
|
}
|
503 |
|
|
|
504 |
|
|
/**
|
505 |
|
|
* Changes the line separator string for regular expressions
|
506 |
|
|
* created using this RESyntax. The default separator is the
|
507 |
|
|
* value returned by the system property "line.separator", which
|
508 |
|
|
* should be correct when reading platform-specific files from a
|
509 |
|
|
* filesystem. However, many programs may collect input from
|
510 |
|
|
* sources where the line separator is differently specified (for
|
511 |
|
|
* example, in the applet environment, the text box widget
|
512 |
|
|
* interprets line breaks as single-character newlines,
|
513 |
|
|
* regardless of the host platform.
|
514 |
|
|
*
|
515 |
|
|
* Note that setting the line separator to a character or
|
516 |
|
|
* characters that have specific meaning within the current syntax
|
517 |
|
|
* can cause unexpected chronosynclastic infundibula.
|
518 |
|
|
*
|
519 |
|
|
* @return this object for convenient chaining
|
520 |
|
|
*/
|
521 |
|
|
public RESyntax setLineSeparator (String aSeparator)
|
522 |
|
|
{
|
523 |
|
|
if (isFinal)
|
524 |
|
|
throw new IllegalAccessError (RE.getLocalizedMessage ("syntax.final"));
|
525 |
|
|
lineSeparator = aSeparator;
|
526 |
|
|
return this;
|
527 |
|
|
}
|
528 |
|
|
|
529 |
|
|
/**
|
530 |
|
|
* Returns the currently active line separator string. The default
|
531 |
|
|
* is the platform-dependent system property "line.separator".
|
532 |
|
|
*/
|
533 |
|
|
public String getLineSeparator ()
|
534 |
|
|
{
|
535 |
|
|
return lineSeparator;
|
536 |
|
|
}
|
537 |
|
|
}
|