OpenCores
URL https://opencores.org/ocsvn/openrisc/openrisc/trunk

Subversion Repositories openrisc

[/] [openrisc/] [trunk/] [gnu-dev/] [or1k-gcc/] [libjava/] [classpath/] [java/] [util/] [regex/] [Matcher.java] - Blame information for rev 771

Details | Compare with Previous | View Log

Line No. Rev Author Line
1 771 jeremybenn
/* Matcher.java -- Instance of a regular expression applied to a char sequence.
2
   Copyright (C) 2002, 2004, 2006 Free Software Foundation, Inc.
3
 
4
This file is part of GNU Classpath.
5
 
6
GNU Classpath is free software; you can redistribute it and/or modify
7
it under the terms of the GNU General Public License as published by
8
the Free Software Foundation; either version 2, or (at your option)
9
any later version.
10
 
11
GNU Classpath is distributed in the hope that it will be useful, but
12
WITHOUT ANY WARRANTY; without even the implied warranty of
13
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
14
General Public License for more details.
15
 
16
You should have received a copy of the GNU General Public License
17
along with GNU Classpath; see the file COPYING.  If not, write to the
18
Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
19
02110-1301 USA.
20
 
21
Linking this library statically or dynamically with other modules is
22
making a combined work based on this library.  Thus, the terms and
23
conditions of the GNU General Public License cover the whole
24
combination.
25
 
26
As a special exception, the copyright holders of this library give you
27
permission to link this library with independent modules to produce an
28
executable, regardless of the license terms of these independent
29
modules, and to copy and distribute the resulting executable under
30
terms of your choice, provided that you also meet, for each linked
31
independent module, the terms and conditions of the license of that
32
module.  An independent module is a module which is not derived from
33
or based on this library.  If you modify this library, you may extend
34
this exception to your version of the library, but you are not
35
obligated to do so.  If you do not wish to do so, delete this
36
exception statement from your version. */
37
 
38
 
39
package java.util.regex;
40
 
41
import gnu.java.lang.CPStringBuilder;
42
 
43
import gnu.java.util.regex.CharIndexed;
44
import gnu.java.util.regex.RE;
45
import gnu.java.util.regex.REMatch;
46
 
47
/**
48
 * Instance of a regular expression applied to a char sequence.
49
 *
50
 * @since 1.4
51
 */
52
public final class Matcher implements MatchResult
53
{
54
  private Pattern pattern;
55
  private CharSequence input;
56
  // We use CharIndexed as an input object to the getMatch method in order
57
  // that /\G/ (the end of the previous match) may work.  The information
58
  // of the previous match is stored in the CharIndexed object.
59
  private CharIndexed inputCharIndexed;
60
  private int position;
61
  private int appendPosition;
62
  private REMatch match;
63
 
64
  /**
65
   * The start of the region of the input on which to match.
66
   */
67
  private int regionStart;
68
 
69
  /**
70
   * The end of the region of the input on which to match.
71
   */
72
  private int regionEnd;
73
 
74
  /**
75
   * True if the match process should look beyond the
76
   * region marked by regionStart to regionEnd when
77
   * performing lookAhead, lookBehind and boundary
78
   * matching.
79
   */
80
  private boolean transparentBounds;
81
 
82
  /**
83
   * The flags that affect the anchoring bounds.
84
   * If {@link #hasAnchoringBounds()} is {@code true},
85
   * the match process will honour the
86
   * anchoring bounds: ^, \A, \Z, \z and $.  If
87
   * {@link #hasAnchoringBounds()} is {@code false},
88
   * the anchors are ignored and appropriate flags,
89
   * stored in this variable, are used to provide this
90
   * behaviour.
91
   */
92
  private int anchoringBounds;
93
 
94
  Matcher(Pattern pattern, CharSequence input)
95
  {
96
    this.pattern = pattern;
97
    this.input = input;
98
    this.inputCharIndexed = RE.makeCharIndexed(input, 0);
99
    regionStart = 0;
100
    regionEnd = input.length();
101
    transparentBounds = false;
102
    anchoringBounds = 0;
103
  }
104
 
105
  /**
106
   * @param sb The target string buffer
107
   * @param replacement The replacement string
108
   *
109
   * @exception IllegalStateException If no match has yet been attempted,
110
   * or if the previous match operation failed
111
   * @exception IndexOutOfBoundsException If the replacement string refers
112
   * to a capturing group that does not exist in the pattern
113
   */
114
  public Matcher appendReplacement (StringBuffer sb, String replacement)
115
    throws IllegalStateException
116
  {
117
    assertMatchOp();
118
    sb.append(input.subSequence(appendPosition,
119
                                match.getStartIndex()).toString());
120
    sb.append(RE.getReplacement(replacement, match,
121
        RE.REG_REPLACE_USE_BACKSLASHESCAPE));
122
    appendPosition = match.getEndIndex();
123
    return this;
124
  }
125
 
126
  /**
127
   * @param sb The target string buffer
128
   */
129
  public StringBuffer appendTail (StringBuffer sb)
130
  {
131
    sb.append(input.subSequence(appendPosition, input.length()).toString());
132
    return sb;
133
  }
134
 
135
  /**
136
   * @exception IllegalStateException If no match has yet been attempted,
137
   * or if the previous match operation failed
138
   */
139
  public int end ()
140
    throws IllegalStateException
141
  {
142
    assertMatchOp();
143
    return match.getEndIndex();
144
  }
145
 
146
  /**
147
   * @param group The index of a capturing group in this matcher's pattern
148
   *
149
   * @exception IllegalStateException If no match has yet been attempted,
150
   * or if the previous match operation failed
151
   * @exception IndexOutOfBoundsException If the replacement string refers
152
   * to a capturing group that does not exist in the pattern
153
   */
154
  public int end (int group)
155
    throws IllegalStateException
156
  {
157
    assertMatchOp();
158
    return match.getEndIndex(group);
159
  }
160
 
161
  public boolean find ()
162
  {
163
    boolean first = (match == null);
164
    if (transparentBounds || (regionStart == 0 && regionEnd == input.length()))
165
      match = pattern.getRE().getMatch(inputCharIndexed, position, anchoringBounds);
166
    else
167
      match = pattern.getRE().getMatch(input.subSequence(regionStart, regionEnd),
168
                                       position, anchoringBounds);
169
    if (match != null)
170
      {
171
        int endIndex = match.getEndIndex();
172
        // Are we stuck at the same position?
173
        if (!first && endIndex == position)
174
          {
175
            match = null;
176
            // Not at the end of the input yet?
177
            if (position < input.length() - 1)
178
              {
179
                position++;
180
                return find(position);
181
              }
182
            else
183
              return false;
184
          }
185
        position = endIndex;
186
        return true;
187
      }
188
    return false;
189
  }
190
 
191
  /**
192
   * @param start The index to start the new pattern matching
193
   *
194
   * @exception IndexOutOfBoundsException If the replacement string refers
195
   * to a capturing group that does not exist in the pattern
196
   */
197
  public boolean find (int start)
198
  {
199
    if (transparentBounds || (regionStart == 0 && regionEnd == input.length()))
200
      match = pattern.getRE().getMatch(inputCharIndexed, start, anchoringBounds);
201
    else
202
      match = pattern.getRE().getMatch(input.subSequence(regionStart, regionEnd),
203
                                       start, anchoringBounds);
204
    if (match != null)
205
      {
206
        position = match.getEndIndex();
207
        return true;
208
      }
209
    return false;
210
  }
211
 
212
  /**
213
   * @exception IllegalStateException If no match has yet been attempted,
214
   * or if the previous match operation failed
215
   */
216
  public String group ()
217
  {
218
    assertMatchOp();
219
    return match.toString();
220
  }
221
 
222
  /**
223
   * @param group The index of a capturing group in this matcher's pattern
224
   *
225
   * @exception IllegalStateException If no match has yet been attempted,
226
   * or if the previous match operation failed
227
   * @exception IndexOutOfBoundsException If the replacement string refers
228
   * to a capturing group that does not exist in the pattern
229
   */
230
  public String group (int group)
231
    throws IllegalStateException
232
  {
233
    assertMatchOp();
234
    return match.toString(group);
235
  }
236
 
237
  /**
238
   * @param replacement The replacement string
239
   */
240
  public String replaceFirst (String replacement)
241
  {
242
    reset();
243
    // Semantics might not quite match
244
    return pattern.getRE().substitute(input, replacement, position,
245
        RE.REG_REPLACE_USE_BACKSLASHESCAPE);
246
  }
247
 
248
  /**
249
   * @param replacement The replacement string
250
   */
251
  public String replaceAll (String replacement)
252
  {
253
    reset();
254
    return pattern.getRE().substituteAll(input, replacement, position,
255
        RE.REG_REPLACE_USE_BACKSLASHESCAPE);
256
  }
257
 
258
  public int groupCount ()
259
  {
260
    return pattern.getRE().getNumSubs();
261
  }
262
 
263
  public boolean lookingAt ()
264
  {
265
    if (transparentBounds || (regionStart == 0 && regionEnd == input.length()))
266
      match = pattern.getRE().getMatch(inputCharIndexed, regionStart,
267
                                       anchoringBounds|RE.REG_FIX_STARTING_POSITION|RE.REG_ANCHORINDEX);
268
    else
269
      match = pattern.getRE().getMatch(input.subSequence(regionStart, regionEnd), 0,
270
                                       anchoringBounds|RE.REG_FIX_STARTING_POSITION);
271
    if (match != null)
272
      {
273
        if (match.getStartIndex() == 0)
274
          {
275
            position = match.getEndIndex();
276
            return true;
277
          }
278
        match = null;
279
      }
280
    return false;
281
  }
282
 
283
  /**
284
   * Attempts to match the entire input sequence against the pattern.
285
   *
286
   * If the match succeeds then more information can be obtained via the
287
   * start, end, and group methods.
288
   *
289
   * @see #start()
290
   * @see #end()
291
   * @see #group()
292
   */
293
  public boolean matches ()
294
  {
295
    if (transparentBounds || (regionStart == 0 && regionEnd == input.length()))
296
      match = pattern.getRE().getMatch(inputCharIndexed, regionStart,
297
                                       anchoringBounds|RE.REG_TRY_ENTIRE_MATCH|RE.REG_FIX_STARTING_POSITION|RE.REG_ANCHORINDEX);
298
    else
299
      match = pattern.getRE().getMatch(input.subSequence(regionStart, regionEnd), 0,
300
                                       anchoringBounds|RE.REG_TRY_ENTIRE_MATCH|RE.REG_FIX_STARTING_POSITION);
301
    if (match != null)
302
      {
303
        if (match.getStartIndex() == 0)
304
          {
305
            position = match.getEndIndex();
306
            if (position == input.length())
307
                return true;
308
          }
309
        match = null;
310
      }
311
    return false;
312
  }
313
 
314
  /**
315
   * Returns the Pattern that is interpreted by this Matcher
316
   */
317
  public Pattern pattern ()
318
  {
319
    return pattern;
320
  }
321
 
322
  /**
323
   * Resets the internal state of the matcher, including
324
   * resetting the region to its default state of encompassing
325
   * the whole input.  The state of {@link #hasTransparentBounds()}
326
   * and {@link #hasAnchoringBounds()} are unaffected.
327
   *
328
   * @return a reference to this matcher.
329
   * @see #regionStart()
330
   * @see #regionEnd()
331
   * @see #hasTransparentBounds()
332
   * @see #hasAnchoringBounds()
333
   */
334
  public Matcher reset ()
335
  {
336
    position = 0;
337
    match = null;
338
    regionStart = 0;
339
    regionEnd = input.length();
340
    appendPosition = 0;
341
    return this;
342
  }
343
 
344
  /**
345
   * Resets the internal state of the matcher, including
346
   * resetting the region to its default state of encompassing
347
   * the whole input.  The state of {@link #hasTransparentBounds()}
348
   * and {@link #hasAnchoringBounds()} are unaffected.
349
   *
350
   * @param input The new input character sequence.
351
   * @return a reference to this matcher.
352
   * @see #regionStart()
353
   * @see #regionEnd()
354
   * @see #hasTransparentBounds()
355
   * @see #hasAnchoringBounds()
356
   */
357
  public Matcher reset (CharSequence input)
358
  {
359
    this.input = input;
360
    this.inputCharIndexed = RE.makeCharIndexed(input, 0);
361
    return reset();
362
  }
363
 
364
  /**
365
   * @return the index of a capturing group in this matcher's pattern
366
   *
367
   * @exception IllegalStateException If no match has yet been attempted,
368
   * or if the previous match operation failed
369
   */
370
  public int start ()
371
    throws IllegalStateException
372
  {
373
    assertMatchOp();
374
    return match.getStartIndex();
375
  }
376
 
377
  /**
378
   * @param group The index of a capturing group in this matcher's pattern
379
   *
380
   * @exception IllegalStateException If no match has yet been attempted,
381
   * or if the previous match operation failed
382
   * @exception IndexOutOfBoundsException If the replacement string refers
383
   * to a capturing group that does not exist in the pattern
384
   */
385
  public int start (int group)
386
    throws IllegalStateException
387
  {
388
    assertMatchOp();
389
    return match.getStartIndex(group);
390
  }
391
 
392
  /**
393
   * @return True if and only if the matcher hit the end of input.
394
   * @since 1.5
395
   */
396
  public boolean hitEnd()
397
  {
398
    return inputCharIndexed.hitEnd();
399
  }
400
 
401
  /**
402
   * @return A string expression of this matcher.
403
   */
404
  public String toString()
405
  {
406
    CPStringBuilder sb = new CPStringBuilder();
407
    sb.append(this.getClass().getName())
408
      .append("[pattern=").append(pattern.pattern())
409
      .append(" region=").append(regionStart).append(",").append(regionEnd)
410
      .append(" anchoringBounds=").append(anchoringBounds == 0)
411
      .append(" transparentBounds=").append(transparentBounds)
412
      .append(" lastmatch=").append(match == null ? "" : match.toString())
413
      .append("]");
414
    return sb.toString();
415
  }
416
 
417
  private void assertMatchOp()
418
  {
419
    if (match == null) throw new IllegalStateException();
420
  }
421
 
422
  /**
423
   * <p>
424
   * Defines the region of the input on which to match.
425
   * By default, the {@link Matcher} attempts to match
426
   * the whole string (from 0 to the length of the input),
427
   * but a region between {@code start} (inclusive) and
428
   * {@code end} (exclusive) on which to match may instead
429
   * be defined using this method.
430
   * </p>
431
   * <p>
432
   * The behaviour of region matching is further affected
433
   * by the use of transparent or opaque bounds (see
434
   * {@link #useTransparentBounds(boolean)}) and whether or not
435
   * anchors ({@code ^} and {@code $}) are in use
436
   * (see {@link #useAnchoringBounds(boolean)}).  With transparent
437
   * bounds, the matcher is aware of input outside the bounds
438
   * set by this method, whereas, with opaque bounds (the default)
439
   * only the input within the bounds is used.  The use of
440
   * anchors are affected by this setting; with transparent
441
   * bounds, anchors will match the beginning of the real input,
442
   * while with opaque bounds they match the beginning of the
443
   * region.  {@link #useAnchoringBounds(boolean)} can be used
444
   * to turn on or off the matching of anchors.
445
   * </p>
446
   *
447
   * @param start the start of the region (inclusive).
448
   * @param end the end of the region (exclusive).
449
   * @return a reference to this matcher.
450
   * @throws IndexOutOfBoundsException if either {@code start} or
451
   *                                   {@code end} are less than zero,
452
   *                                   if either {@code start} or
453
   *                                   {@code end} are greater than the
454
   *                                   length of the input, or if
455
   *                                   {@code start} is greater than
456
   *                                   {@code end}.
457
   * @see #regionStart()
458
   * @see #regionEnd()
459
   * @see #hasTransparentBounds()
460
   * @see #useTransparentBounds(boolean)
461
   * @see #hasAnchoringBounds()
462
   * @see #useAnchoringBounds(boolean)
463
   * @since 1.5
464
   */
465
  public Matcher region(int start, int end)
466
  {
467
    int length = input.length();
468
    if (start < 0)
469
      throw new IndexOutOfBoundsException("The start position was less than zero.");
470
    if (start >= length)
471
      throw new IndexOutOfBoundsException("The start position is after the end of the input.");
472
    if (end < 0)
473
      throw new IndexOutOfBoundsException("The end position was less than zero.");
474
    if (end > length)
475
      throw new IndexOutOfBoundsException("The end position is after the end of the input.");
476
    if (start > end)
477
      throw new IndexOutOfBoundsException("The start position is after the end position.");
478
    reset();
479
    regionStart = start;
480
    regionEnd = end;
481
    return this;
482
  }
483
 
484
  /**
485
   * The start of the region on which to perform matches (inclusive).
486
   *
487
   * @return the start index of the region.
488
   * @see #region(int,int)
489
   * #see #regionEnd()
490
   * @since 1.5
491
   */
492
  public int regionStart()
493
  {
494
    return regionStart;
495
  }
496
 
497
  /**
498
   * The end of the region on which to perform matches (exclusive).
499
   *
500
   * @return the end index of the region.
501
   * @see #region(int,int)
502
   * @see #regionStart()
503
   * @since 1.5
504
   */
505
  public int regionEnd()
506
  {
507
    return regionEnd;
508
  }
509
 
510
  /**
511
   * Returns true if the bounds of the region marked by
512
   * {@link #regionStart()} and {@link #regionEnd()} are
513
   * transparent.  When these bounds are transparent, the
514
   * matching process can look beyond them to perform
515
   * lookahead, lookbehind and boundary matching operations.
516
   * By default, the bounds are opaque.
517
   *
518
   * @return true if the bounds of the matching region are
519
   *         transparent.
520
   * @see #useTransparentBounds(boolean)
521
   * @see #region(int,int)
522
   * @see #regionStart()
523
   * @see #regionEnd()
524
   * @since 1.5
525
   */
526
  public boolean hasTransparentBounds()
527
  {
528
    return transparentBounds;
529
  }
530
 
531
  /**
532
   * Sets the transparency of the bounds of the region
533
   * marked by {@link #regionStart()} and {@link #regionEnd()}.
534
   * A value of {@code true} makes the bounds transparent,
535
   * so the matcher can see beyond them to perform lookahead,
536
   * lookbehind and boundary matching operations.  A value
537
   * of {@code false} (the default) makes the bounds opaque,
538
   * restricting the match to the input region denoted
539
   * by {@link #regionStart()} and {@link #regionEnd()}.
540
   *
541
   * @param transparent true if the bounds should be transparent.
542
   * @return a reference to this matcher.
543
   * @see #hasTransparentBounds()
544
   * @see #region(int,int)
545
   * @see #regionStart()
546
   * @see #regionEnd()
547
   * @since 1.5
548
   */
549
  public Matcher useTransparentBounds(boolean transparent)
550
  {
551
    transparentBounds = transparent;
552
    return this;
553
  }
554
 
555
  /**
556
   * Returns true if the matcher will honour the use of
557
   * the anchoring bounds: {@code ^}, {@code \A}, {@code \Z},
558
   * {@code \z} and {@code $}.  By default, the anchors
559
   * are used.  Note that the effect of the anchors is
560
   * also affected by {@link #hasTransparentBounds()}.
561
   *
562
   * @return true if the matcher will attempt to match
563
   *         the anchoring bounds.
564
   * @see #useAnchoringBounds(boolean)
565
   * @see #hasTransparentBounds()
566
   * @since 1.5
567
   */
568
  public boolean hasAnchoringBounds()
569
  {
570
    return anchoringBounds == 0;
571
  }
572
 
573
  /**
574
   * Enables or disables the use of the anchoring bounds:
575
   * {@code ^}, {@code \A}, {@code \Z}, {@code \z} and
576
   * {@code $}. By default, their use is enabled.  When
577
   * disabled, the matcher will not attempt to match
578
   * the anchors.
579
   *
580
   * @param useAnchors true if anchoring bounds should be used.
581
   * @return a reference to this matcher.
582
   * @since 1.5
583
   * @see #hasAnchoringBounds()
584
   */
585
  public Matcher useAnchoringBounds(boolean useAnchors)
586
  {
587
    if (useAnchors)
588
      anchoringBounds = 0;
589
    else
590
      anchoringBounds = RE.REG_NOTBOL|RE.REG_NOTEOL;
591
    return this;
592
  }
593
 
594
  /**
595
   * Returns a read-only snapshot of the current state of
596
   * the {@link Matcher} as a {@link MatchResult}.  Any
597
   * subsequent changes to this instance are not reflected
598
   * in the returned {@link MatchResult}.
599
   *
600
   * @return a {@link MatchResult} instance representing the
601
   *         current state of the {@link Matcher}.
602
   */
603
  public MatchResult toMatchResult()
604
  {
605
    Matcher snapshot = new Matcher(pattern, input);
606
    if (match != null)
607
      snapshot.match = (REMatch) match.clone();
608
    return snapshot;
609
  }
610
 
611
}

powered by: WebSVN 2.1.0

© copyright 1999-2024 OpenCores.org, equivalent to Oliscience, all rights reserved. OpenCores®, registered trademark.