OpenCores
URL https://opencores.org/ocsvn/scarts/scarts/trunk

Subversion Repositories scarts

[/] [scarts/] [trunk/] [toolchain/] [scarts-gcc/] [gcc-4.1.1/] [libjava/] [classpath/] [gnu/] [regexp/] [RETokenRepeated.java] - Blame information for rev 14

Details | Compare with Previous | View Log

Line No. Rev Author Line
1 14 jlechner
/* gnu/regexp/RETokenRepeated.java
2
   Copyright (C) 1998-2001, 2004 Free Software Foundation, Inc.
3
 
4
This file is part of GNU Classpath.
5
 
6
GNU Classpath is free software; you can redistribute it and/or modify
7
it under the terms of the GNU General Public License as published by
8
the Free Software Foundation; either version 2, or (at your option)
9
any later version.
10
 
11
GNU Classpath is distributed in the hope that it will be useful, but
12
WITHOUT ANY WARRANTY; without even the implied warranty of
13
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
14
General Public License for more details.
15
 
16
You should have received a copy of the GNU General Public License
17
along with GNU Classpath; see the file COPYING.  If not, write to the
18
Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
19
02110-1301 USA.
20
 
21
Linking this library statically or dynamically with other modules is
22
making a combined work based on this library.  Thus, the terms and
23
conditions of the GNU General Public License cover the whole
24
combination.
25
 
26
As a special exception, the copyright holders of this library give you
27
permission to link this library with independent modules to produce an
28
executable, regardless of the license terms of these independent
29
modules, and to copy and distribute the resulting executable under
30
terms of your choice, provided that you also meet, for each linked
31
independent module, the terms and conditions of the license of that
32
module.  An independent module is a module which is not derived from
33
or based on this library.  If you modify this library, you may extend
34
this exception to your version of the library, but you are not
35
obligated to do so.  If you do not wish to do so, delete this
36
exception statement from your version. */
37
 
38
 
39
package gnu.regexp;
40
 
41
import java.util.Vector;
42
 
43
final class RETokenRepeated extends REToken {
44
    private REToken token;
45
    private int min,max;
46
    private boolean stingy;
47
    private boolean possessive;
48
    private boolean alwaysEmpty; // Special case of {0}
49
 
50
    RETokenRepeated(int subIndex, REToken token, int min, int max) {
51
        super(subIndex);
52
        this.token = token;
53
        this.min = min;
54
        this.max = max;
55
        alwaysEmpty = (min == 0 && max == 0);
56
    }
57
 
58
    /** Sets the minimal matching mode to true. */
59
    void makeStingy() {
60
        stingy = true;
61
    }
62
 
63
    /** Queries if this token has minimal matching enabled. */
64
    boolean isStingy() {
65
        return stingy;
66
    }
67
 
68
    /** Sets possessive matching mode to true. */
69
    void makePossessive() {
70
        possessive = true;
71
    }
72
 
73
    /** Queries if this token has possessive matching enabled. */
74
    boolean isPossessive() {
75
        return possessive;
76
    }
77
 
78
    /**
79
     * The minimum length of a repeated token is the minimum length
80
     * of the token multiplied by the minimum number of times it must
81
     * match.
82
     */
83
    int getMinimumLength() {
84
        return (min * token.getMinimumLength());
85
    }
86
 
87
    int getMaximumLength() {
88
        if (max == Integer.MAX_VALUE) return Integer.MAX_VALUE;
89
        int tmax = token.getMaximumLength();
90
        if (tmax == Integer.MAX_VALUE) return tmax;
91
        return (max * tmax);
92
    }
93
 
94
    boolean stopMatchingIfSatisfied = true;
95
 
96
    private static REMatch findDoables(REToken tk,
97
                        CharIndexed input, REMatch mymatch) {
98
 
99
            REMatch.REMatchList doables = new REMatch.REMatchList();
100
 
101
            // try next repeat at all possible positions
102
            for (REMatch current = mymatch;
103
                 current != null; current = current.next) {
104
                REMatch recurrent = (REMatch) current.clone();
105
                int origin = recurrent.index;
106
                tk = (REToken) tk.clone();
107
                tk.next = tk.uncle = null;
108
                if (tk.match(input, recurrent)) {
109
                    if (recurrent.index == origin) recurrent.empty = true;
110
                    // add all items in current to doables array
111
                    doables.addTail(recurrent);
112
                }
113
            }
114
            return doables.head;
115
    }
116
 
117
    // We do need to save every possible point, but the number of clone()
118
    // invocations here is really a killer for performance on non-stingy
119
    // repeat operators.  I'm open to suggestions...
120
 
121
    // Hypothetical question: can you have a RE that matches 1 times,
122
    // 3 times, 5 times, but not 2 times or 4 times?  Does having
123
    // the subexpression back-reference operator allow that?
124
 
125
    boolean match(CharIndexed input, REMatch mymatch) {
126
        // Possible positions for the next repeat to match at
127
        REMatch newMatch = mymatch;
128
 
129
        // {0} needs some special treatment.
130
        if (alwaysEmpty) {
131
            REMatch result = matchRest(input, newMatch);
132
            if (result != null) {
133
                mymatch.assignFrom(result);
134
                return true;
135
            }
136
            else {
137
                return false;
138
            }
139
        }
140
 
141
        // number of times we've matched so far
142
        int numRepeats = 0;
143
 
144
        REMatch doables;
145
        int lastIndex = mymatch.index;
146
        boolean emptyMatchFound = false;
147
 
148
        while (numRepeats < min) {
149
            doables = findDoables(token, input, newMatch);
150
 
151
            // if none of the possibilities worked out, 
152
            // it means that minimum number of repeats could not be found.
153
            if (doables == null) return false;
154
 
155
            // reassign where the next repeat can match
156
            newMatch = doables;
157
 
158
            // increment how many repeats we've successfully found
159
            ++numRepeats;
160
 
161
            if (newMatch.empty) {
162
                numRepeats = min;
163
                emptyMatchFound = true;
164
                break;
165
            }
166
            lastIndex = newMatch.index;
167
        }
168
 
169
        Vector positions = new Vector();
170
 
171
        while (numRepeats <= max) {
172
            // We want to check something like  
173
            //    if (stingy)
174
            // and neglect the further matching.  But experience tells
175
            // such neglection may cause incomplete matching.
176
            // For example, if we neglect the seemingly unnecessay
177
            // matching, /^(b+?|a){1,2}?c/ cannot match "bbc".
178
            // On the other hand, if we do not stop the unnecessary
179
            // matching, /(([a-c])b*?\2)*/ matches "ababbbcbc"
180
            // entirely when we wan to find only "ababb".
181
            // In order to make regression tests pass, we do as we did.
182
            if (stopMatchingIfSatisfied && stingy) {
183
                REMatch results = matchRest(input, newMatch);
184
                if (results != null) {
185
                    mymatch.assignFrom(results);
186
                    return true;
187
                }
188
            }
189
            positions.add(newMatch);
190
            if (emptyMatchFound) break;
191
 
192
            doables = findDoables(token, input, newMatch);
193
            if (doables == null) break;
194
 
195
            // doables.index == lastIndex occurs either
196
            //   (1) when an empty string was the longest
197
            //       that matched this token.
198
            // or
199
            //   (2) when the same string matches this token many times.
200
            //       For example, "acbab" itself matches "a.*b" and
201
            //       its substrings "acb" and "ab" also match.
202
            //       In this case, we do not have to go further until
203
            //       numRepeats == max because the more numRepeats grows,
204
            //       the shorter the substring matching this token becomes.
205
            //       So the previous succesful match must have bee the best
206
            //       match.  But this is not necessarily the case if stingy.
207
            if (doables.index == lastIndex) {
208
                if (doables.empty) {
209
                    emptyMatchFound = true;
210
                }
211
                else {
212
                    if (!stingy) break;
213
                }
214
            }
215
            numRepeats++;
216
            newMatch = doables;
217
            lastIndex = newMatch.index;
218
        }
219
 
220
        // We're greedy, but ease off until a true match is found.
221
        // At this point we've either got too many or just the right amount.
222
        // See if this numRepeats works with the rest of the regexp.
223
 
224
        REMatch.REMatchList allResults = new REMatch.REMatchList();
225
 
226
        int posCount = positions.size();
227
        int posIndex = (stingy ? 0 : posCount - 1);
228
 
229
        while (posCount-- > 0) {
230
            REMatch m = (REMatch) positions.elementAt(posIndex);
231
            if (stingy) posIndex++; else posIndex--;
232
 
233
            REMatch results = matchRest(input, m);
234
            if (results != null) {
235
                // Order these from longest to shortest
236
                // Start by assuming longest (more repeats)
237
                // If stingy the order is shortest to longest.
238
                allResults.addTail(results);
239
            }
240
            else {
241
                if (possessive) break;
242
            }
243
        }
244
 
245
        if (allResults.head != null) {
246
            mymatch.assignFrom(allResults.head); // does this get all?
247
            return true;
248
        }
249
        // If we fall out, no matches.
250
        return false;
251
    }
252
 
253
    private REMatch matchRest(CharIndexed input, final REMatch newMatch) {
254
        REMatch current, single;
255
        REMatch.REMatchList doneIndex = new REMatch.REMatchList();
256
        // Test all possible matches for this number of repeats
257
        for (current = newMatch; current != null; current = current.next) {
258
            // clone() separates a single match from the chain
259
            single = (REMatch) current.clone();
260
            if (next(input, single)) {
261
                // chain results to doneIndex
262
                doneIndex.addTail(single);
263
            }
264
        }
265
        return doneIndex.head;
266
    }
267
 
268
    void dump(StringBuffer os) {
269
        os.append("(?:");
270
        token.dumpAll(os);
271
        os.append(')');
272
        if ((max == Integer.MAX_VALUE) && (min <= 1))
273
            os.append( (min == 0) ? '*' : '+' );
274
        else if ((min == 0) && (max == 1))
275
            os.append('?');
276
        else {
277
            os.append('{').append(min);
278
            if (max > min) {
279
                os.append(',');
280
                if (max != Integer.MAX_VALUE) os.append(max);
281
            }
282
            os.append('}');
283
        }
284
        if (stingy) os.append('?');
285
    }
286
}

powered by: WebSVN 2.1.0

© copyright 1999-2024 OpenCores.org, equivalent to Oliscience, all rights reserved. OpenCores®, registered trademark.