OpenCores
URL https://opencores.org/ocsvn/openrisc/openrisc/trunk

Subversion Repositories openrisc

[/] [openrisc/] [trunk/] [gnu-dev/] [or1k-gcc/] [libjava/] [classpath/] [gnu/] [java/] [util/] [regex/] [REMatch.java] - Blame information for rev 791

Go to most recent revision | Details | Compare with Previous | View Log

Line No. Rev Author Line
1 769 jeremybenn
/* gnu/regexp/REMatch.java
2
   Copyright (C) 2006 Free Software Foundation, Inc.
3
 
4
This file is part of GNU Classpath.
5
 
6
GNU Classpath is free software; you can redistribute it and/or modify
7
it under the terms of the GNU General Public License as published by
8
the Free Software Foundation; either version 2, or (at your option)
9
any later version.
10
 
11
GNU Classpath is distributed in the hope that it will be useful, but
12
WITHOUT ANY WARRANTY; without even the implied warranty of
13
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
14
General Public License for more details.
15
 
16
You should have received a copy of the GNU General Public License
17
along with GNU Classpath; see the file COPYING.  If not, write to the
18
Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
19
02110-1301 USA.
20
 
21
Linking this library statically or dynamically with other modules is
22
making a combined work based on this library.  Thus, the terms and
23
conditions of the GNU General Public License cover the whole
24
combination.
25
 
26
As a special exception, the copyright holders of this library give you
27
permission to link this library with independent modules to produce an
28
executable, regardless of the license terms of these independent
29
modules, and to copy and distribute the resulting executable under
30
terms of your choice, provided that you also meet, for each linked
31
independent module, the terms and conditions of the license of that
32
module.  An independent module is a module which is not derived from
33
or based on this library.  If you modify this library, you may extend
34
this exception to your version of the library, but you are not
35
obligated to do so.  If you do not wish to do so, delete this
36
exception statement from your version. */
37
 
38
 
39
package gnu.java.util.regex;
40
 
41
import gnu.java.lang.CPStringBuilder;
42
 
43
import java.io.Serializable;
44
 
45
/**
46
 * An instance of this class represents a match
47
 * completed by a gnu.regexp matching function. It can be used
48
 * to obtain relevant information about the location of a match
49
 * or submatch.
50
 *
51
 * @author <A HREF="mailto:wes@cacas.org">Wes Biggs</A>
52
 */
53
public final class REMatch implements Serializable, Cloneable
54
{
55
  private String matchedText;
56
  private CharIndexed matchedCharIndexed;
57
 
58
  // These variables are package scope for fast access within the engine
59
  int eflags;                   // execution flags this match was made using
60
 
61
  // Offset in source text where match was tried.  This is zero-based;
62
  // the actual position in the source text is given by (offset + anchor).
63
  int offset;
64
 
65
  // Anchor position refers to the index into the source input
66
  // at which the matching operation began.
67
  // This is also useful for the ANCHORINDEX option.
68
  int anchor;
69
 
70
  // Package scope; used by RE.
71
  int index;                    // used while matching to mark current match position in input
72
  // start1[i] is set when the i-th subexp starts. And start1[i] is copied
73
  // to start[i] when the i-th subexp ends.  So start[i] keeps the previously
74
  // assigned value while the i-th subexp is being processed. This makes
75
  // backreference to the i-th subexp within the i-th subexp possible.
76
  int[] start;                  // start positions (relative to offset) for each (sub)exp.
77
  int[] start1;                 // start positions (relative to offset) for each (sub)exp.
78
  int[] end;                    // end positions for the same
79
  // start[i] == -1 or end[i] == -1 means that the start/end position is void.
80
  // start[i] == p or end[i] == p where p < 0 and p != -1 means that
81
  // the actual start/end position is (p+1). Start/end positions may
82
  // become negative when the subexpression is in a RETokenLookBehind.
83
  boolean empty;                // empty string matched. This flag is used only within
84
  // RETokenRepeated.
85
 
86
  BacktrackStack backtrackStack;
87
 
88
  public Object clone ()
89
  {
90
    try
91
    {
92
      REMatch copy = (REMatch) super.clone ();
93
 
94
        copy.start = (int[]) start.clone ();
95
        copy.start1 = (int[]) start1.clone ();
96
        copy.end = (int[]) end.clone ();
97
 
98
        return copy;
99
    }
100
    catch (CloneNotSupportedException e)
101
    {
102
      throw new Error ();       // doesn't happen
103
    }
104
  }
105
 
106
  void assignFrom (REMatch other)
107
  {
108
    start = other.start;
109
    start1 = other.start1;
110
    end = other.end;
111
    index = other.index;
112
    backtrackStack = other.backtrackStack;
113
  }
114
 
115
  REMatch (int subs, int anchor, int eflags)
116
  {
117
    start = new int[subs + 1];
118
    start1 = new int[subs + 1];
119
    end = new int[subs + 1];
120
    this.anchor = anchor;
121
    this.eflags = eflags;
122
    clear (anchor);
123
  }
124
 
125
  void finish (CharIndexed text)
126
  {
127
    start[0] = 0;
128
    CPStringBuilder sb = new CPStringBuilder ();
129
    int i;
130
    for (i = 0; i < end[0]; i++)
131
      sb.append (text.charAt (i));
132
    matchedText = sb.toString ();
133
    matchedCharIndexed = text;
134
    for (i = 0; i < start.length; i++)
135
      {
136
        // If any subexpressions didn't terminate, they don't count
137
        // TODO check if this code ever gets hit
138
        if ((start[i] == -1) ^ (end[i] == -1))
139
          {
140
            start[i] = -1;
141
            end[i] = -1;
142
          }
143
      }
144
    backtrackStack = null;
145
  }
146
 
147
    /** Clears the current match and moves the offset to the new index. */
148
  void clear (int index)
149
  {
150
    offset = index;
151
    this.index = 0;
152
    for (int i = 0; i < start.length; i++)
153
      {
154
        start[i] = start1[i] = end[i] = -1;
155
      }
156
    backtrackStack = null;
157
  }
158
 
159
    /**
160
     * Returns the string matching the pattern.  This makes it convenient
161
     * to write code like the following:
162
     * <P>
163
     * <code>
164
     * REMatch myMatch = myExpression.getMatch(myString);<br>
165
     * if (myMatch != null) System.out.println("Regexp found: "+myMatch);
166
     * </code>
167
     */
168
  public String toString ()
169
  {
170
    return matchedText;
171
  }
172
 
173
    /**
174
     * Returns the index within the input text where the match in its entirety
175
     * began.
176
     */
177
  public int getStartIndex ()
178
  {
179
    return offset + start[0];
180
  }
181
 
182
    /**
183
     * Returns the index within the input string where the match in
184
     * its entirety ends.  The return value is the next position after
185
     * the end of the string; therefore, a match created by the
186
     * following call:
187
     *
188
     * <P>
189
     * <code>REMatch myMatch = myExpression.getMatch(myString);</code>
190
     * <P>
191
     * can be viewed (given that myMatch is not null) by creating
192
     * <P>
193
     * <code>String theMatch = myString.substring(myMatch.getStartIndex(),
194
     * myMatch.getEndIndex());</code>
195
     * <P>
196
     * But you can save yourself that work, since the <code>toString()</code>
197
     * method (above) does exactly that for you.
198
     */
199
  public int getEndIndex ()
200
  {
201
    return offset + end[0];
202
  }
203
 
204
    /**
205
     * Returns the string matching the given subexpression.  The subexpressions
206
     * are indexed starting with one, not zero.  That is, the subexpression
207
     * identified by the first set of parentheses in a regular expression
208
     * could be retrieved from an REMatch by calling match.toString(1).
209
     *
210
     * @param sub Index of the subexpression.
211
     */
212
  public String toString (int sub)
213
  {
214
    if ((sub >= start.length) || sub < 0)
215
      throw new IndexOutOfBoundsException ("No group " + sub);
216
    if (start[sub] == -1)
217
      return null;
218
    if (start[sub] >= 0 && end[sub] <= matchedText.length ())
219
      return (matchedText.substring (start[sub], end[sub]));
220
    else
221
      {
222
        // This case occurs with RETokenLookAhead or RETokenLookBehind.
223
        CPStringBuilder sb = new CPStringBuilder ();
224
        int s = start[sub];
225
        int e = end[sub];
226
        if (s < 0)
227
          s += 1;
228
        if (e < 0)
229
          e += 1;
230
        for (int i = start[0] + s; i < start[0] + e; i++)
231
          sb.append (matchedCharIndexed.charAt (i));
232
        return sb.toString ();
233
      }
234
  }
235
 
236
    /**
237
     * Returns the index within the input string used to generate this match
238
     * where subexpression number <i>sub</i> begins, or <code>-1</code> if
239
     * the subexpression does not exist.  The initial position is zero.
240
     *
241
     * @param sub Subexpression index
242
     * @deprecated Use getStartIndex(int) instead.
243
     */
244
  public int getSubStartIndex (int sub)
245
  {
246
    if (sub >= start.length)
247
      return -1;
248
    int x = start[sub];
249
    return (x == -1) ? x : (x >= 0) ? offset + x : offset + x + 1;
250
  }
251
 
252
    /**
253
     * Returns the index within the input string used to generate this match
254
     * where subexpression number <i>sub</i> begins, or <code>-1</code> if
255
     * the subexpression does not exist.  The initial position is zero.
256
     *
257
     * @param sub Subexpression index
258
     * @since gnu.regexp 1.1.0
259
     */
260
  public int getStartIndex (int sub)
261
  {
262
    if (sub >= start.length)
263
      return -1;
264
    int x = start[sub];
265
    return (x == -1) ? x : (x >= 0) ? offset + x : offset + x + 1;
266
  }
267
 
268
    /**
269
     * Returns the index within the input string used to generate this match
270
     * where subexpression number <i>sub</i> ends, or <code>-1</code> if
271
     * the subexpression does not exist.  The initial position is zero.
272
     *
273
     * @param sub Subexpression index
274
     * @deprecated Use getEndIndex(int) instead
275
     */
276
  public int getSubEndIndex (int sub)
277
  {
278
    if (sub >= start.length)
279
      return -1;
280
    int x = end[sub];
281
    return (x == -1) ? x : (x >= 0) ? offset + x : offset + x + 1;
282
  }
283
 
284
    /**
285
     * Returns the index within the input string used to generate this match
286
     * where subexpression number <i>sub</i> ends, or <code>-1</code> if
287
     * the subexpression does not exist.  The initial position is zero.
288
     *
289
     * @param sub Subexpression index
290
     */
291
  public int getEndIndex (int sub)
292
  {
293
    if (sub >= start.length)
294
      return -1;
295
    int x = end[sub];
296
    return (x == -1) ? x : (x >= 0) ? offset + x : offset + x + 1;
297
  }
298
 
299
    /**
300
     * Substitute the results of this match to create a new string.
301
     * This is patterned after PERL, so the tokens to watch out for are
302
     * <code>$0</code> through <code>$9</code>.  <code>$0</code> matches
303
     * the full substring matched; <code>$<i>n</i></code> matches
304
     * subexpression number <i>n</i>.
305
     * <code>$10, $11, ...</code> may match the 10th, 11th, ... subexpressions
306
     * if such subexpressions exist.
307
     *
308
     * @param input A string consisting of literals and <code>$<i>n</i></code> tokens.
309
     */
310
  public String substituteInto (String input)
311
  {
312
    // a la Perl, $0 is whole thing, $1 - $9 are subexpressions
313
    CPStringBuilder output = new CPStringBuilder ();
314
    int pos;
315
    for (pos = 0; pos < input.length () - 1; pos++)
316
      {
317
        if ((input.charAt (pos) == '$')
318
            && (Character.isDigit (input.charAt (pos + 1))))
319
          {
320
            int val = Character.digit (input.charAt (++pos), 10);
321
            int pos1 = pos + 1;
322
            while (pos1 < input.length () &&
323
                   Character.isDigit (input.charAt (pos1)))
324
              {
325
                int val1 =
326
                  val * 10 + Character.digit (input.charAt (pos1), 10);
327
                if (val1 >= start.length)
328
                  break;
329
                pos1++;
330
                val = val1;
331
              }
332
            pos = pos1 - 1;
333
 
334
            if (val < start.length)
335
              {
336
                output.append (toString (val));
337
              }
338
          }
339
        else
340
          output.append (input.charAt (pos));
341
      }
342
    if (pos < input.length ())
343
      output.append (input.charAt (pos));
344
    return output.toString ();
345
  }
346
 
347
/*  The following are used for debugging purpose
348
    public static String d(REMatch m) {
349
        if (m == null) return "null";
350
        else return "[" + m.index + "]";
351
    }
352
 
353
    public String substringUptoIndex(CharIndexed input) {
354
        StringBuffer sb = new StringBuffer();
355
        for (int i = 0; i < index; i++) {
356
            sb.append(input.charAt(i));
357
        }
358
        return sb.toString();
359
    }
360
*/
361
 
362
}

powered by: WebSVN 2.1.0

© copyright 1999-2025 OpenCores.org, equivalent to Oliscience, all rights reserved. OpenCores®, registered trademark.