OpenCores
URL https://opencores.org/ocsvn/scarts/scarts/trunk

Subversion Repositories scarts

[/] [scarts/] [trunk/] [toolchain/] [scarts-gcc/] [gcc-4.1.1/] [libjava/] [classpath/] [gnu/] [regexp/] [RETokenRepeated.java] - Rev 14

Compare with Previous | Blame | View Log

/* gnu/regexp/RETokenRepeated.java
   Copyright (C) 1998-2001, 2004 Free Software Foundation, Inc.
 
This file is part of GNU Classpath.
 
GNU Classpath is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2, or (at your option)
any later version.
 
GNU Classpath is distributed in the hope that it will be useful, but
WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
General Public License for more details.
 
You should have received a copy of the GNU General Public License
along with GNU Classpath; see the file COPYING.  If not, write to the
Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
02110-1301 USA.
 
Linking this library statically or dynamically with other modules is
making a combined work based on this library.  Thus, the terms and
conditions of the GNU General Public License cover the whole
combination.
 
As a special exception, the copyright holders of this library give you
permission to link this library with independent modules to produce an
executable, regardless of the license terms of these independent
modules, and to copy and distribute the resulting executable under
terms of your choice, provided that you also meet, for each linked
independent module, the terms and conditions of the license of that
module.  An independent module is a module which is not derived from
or based on this library.  If you modify this library, you may extend
this exception to your version of the library, but you are not
obligated to do so.  If you do not wish to do so, delete this
exception statement from your version. */
 
 
package gnu.regexp;
 
import java.util.Vector;
 
final class RETokenRepeated extends REToken {
    private REToken token;
    private int min,max;
    private boolean stingy;
    private boolean possessive;
    private boolean alwaysEmpty; // Special case of {0}
 
    RETokenRepeated(int subIndex, REToken token, int min, int max) {
	super(subIndex);
	this.token = token;
	this.min = min;
	this.max = max;
	alwaysEmpty = (min == 0 && max == 0);
    }
 
    /** Sets the minimal matching mode to true. */
    void makeStingy() {
	stingy = true;
    }
 
    /** Queries if this token has minimal matching enabled. */
    boolean isStingy() {
	return stingy;
    }
 
    /** Sets possessive matching mode to true. */
    void makePossessive() {
        possessive = true;
    }
 
    /** Queries if this token has possessive matching enabled. */
    boolean isPossessive() {
        return possessive;
    }
 
    /**
     * The minimum length of a repeated token is the minimum length
     * of the token multiplied by the minimum number of times it must
     * match.
     */
    int getMinimumLength() {
	return (min * token.getMinimumLength());
    }
 
    int getMaximumLength() {
        if (max == Integer.MAX_VALUE) return Integer.MAX_VALUE;
	int tmax = token.getMaximumLength();
	if (tmax == Integer.MAX_VALUE) return tmax;
	return (max * tmax);
    }
 
    boolean stopMatchingIfSatisfied = true;
 
    private static REMatch findDoables(REToken tk,
			CharIndexed input, REMatch mymatch) {
 
	    REMatch.REMatchList doables = new REMatch.REMatchList();
 
	    // try next repeat at all possible positions
	    for (REMatch current = mymatch;
		 current != null; current = current.next) {
		REMatch recurrent = (REMatch) current.clone();
		int origin = recurrent.index;
		tk = (REToken) tk.clone();
		tk.next = tk.uncle = null;
		if (tk.match(input, recurrent)) {
		    if (recurrent.index == origin) recurrent.empty = true;
		    // add all items in current to doables array
		    doables.addTail(recurrent);
		}
	    }
	    return doables.head;
    }
 
    // We do need to save every possible point, but the number of clone()
    // invocations here is really a killer for performance on non-stingy
    // repeat operators.  I'm open to suggestions...
 
    // Hypothetical question: can you have a RE that matches 1 times,
    // 3 times, 5 times, but not 2 times or 4 times?  Does having
    // the subexpression back-reference operator allow that?
 
    boolean match(CharIndexed input, REMatch mymatch) {
	// Possible positions for the next repeat to match at
	REMatch newMatch = mymatch;
 
	// {0} needs some special treatment.
	if (alwaysEmpty) {
	    REMatch result = matchRest(input, newMatch);
	    if (result != null) {
	        mymatch.assignFrom(result);
	        return true;
	    }
	    else {
	        return false;
	    }
	}
 
	// number of times we've matched so far
	int numRepeats = 0; 
 
	REMatch doables;
	int lastIndex = mymatch.index;
	boolean emptyMatchFound = false;
 
	while (numRepeats < min) {
	    doables = findDoables(token, input, newMatch);
 
	    // if none of the possibilities worked out, 
	    // it means that minimum number of repeats could not be found.
	    if (doables == null) return false;
 
	    // reassign where the next repeat can match
	    newMatch = doables;
 
	    // increment how many repeats we've successfully found
	    ++numRepeats;
 
	    if (newMatch.empty) {
		numRepeats = min;
		emptyMatchFound = true;
		break;
	    }
	    lastIndex = newMatch.index;
	}
 
	Vector positions = new Vector();
 
	while (numRepeats <= max) {
	    // We want to check something like  
	    //    if (stingy)
	    // and neglect the further matching.  But experience tells
	    // such neglection may cause incomplete matching.
	    // For example, if we neglect the seemingly unnecessay
	    // matching, /^(b+?|a){1,2}?c/ cannot match "bbc".
	    // On the other hand, if we do not stop the unnecessary
	    // matching, /(([a-c])b*?\2)*/ matches "ababbbcbc"
	    // entirely when we wan to find only "ababb".
	    // In order to make regression tests pass, we do as we did.
	    if (stopMatchingIfSatisfied && stingy) {
		REMatch results = matchRest(input, newMatch);
		if (results != null) {
		    mymatch.assignFrom(results);
		    return true;
		}
	    }
	    positions.add(newMatch);
	    if (emptyMatchFound) break;
 
	    doables = findDoables(token, input, newMatch);
	    if (doables == null) break;
 
	    // doables.index == lastIndex occurs either
	    //   (1) when an empty string was the longest
	    //       that matched this token.
	    // or
	    //   (2) when the same string matches this token many times.
	    //       For example, "acbab" itself matches "a.*b" and
	    //       its substrings "acb" and "ab" also match.
	    //       In this case, we do not have to go further until
	    //       numRepeats == max because the more numRepeats grows,
	    //       the shorter the substring matching this token becomes.
	    //       So the previous succesful match must have bee the best
	    //       match.  But this is not necessarily the case if stingy.
	    if (doables.index == lastIndex) {
	        if (doables.empty) {
		    emptyMatchFound = true;
                }
	        else {
		    if (!stingy) break;
		}
	    }
	    numRepeats++;
	    newMatch = doables;
	    lastIndex = newMatch.index;
	}
 
	// We're greedy, but ease off until a true match is found.
	// At this point we've either got too many or just the right amount.
	// See if this numRepeats works with the rest of the regexp.
 
	REMatch.REMatchList allResults = new REMatch.REMatchList();
 
	int posCount = positions.size();
	int posIndex = (stingy ? 0 : posCount - 1);
 
	while (posCount-- > 0) {
	    REMatch m = (REMatch) positions.elementAt(posIndex);
            if (stingy) posIndex++; else posIndex--;
 
	    REMatch results = matchRest(input, m);
            if (results != null) {
	    	// Order these from longest to shortest
		// Start by assuming longest (more repeats)
		// If stingy the order is shortest to longest.
		allResults.addTail(results);
	    }
	    else {
		if (possessive) break;
	    }
	}
 
	if (allResults.head != null) {
	    mymatch.assignFrom(allResults.head); // does this get all?
	    return true;
	}
	// If we fall out, no matches.
	return false;
    }
 
    private REMatch matchRest(CharIndexed input, final REMatch newMatch) {
	REMatch current, single;
	REMatch.REMatchList doneIndex = new REMatch.REMatchList();
	// Test all possible matches for this number of repeats
	for (current = newMatch; current != null; current = current.next) {
	    // clone() separates a single match from the chain
	    single = (REMatch) current.clone();
	    if (next(input, single)) {
		// chain results to doneIndex
		doneIndex.addTail(single);
	    }
	}
	return doneIndex.head;
    }
 
    void dump(StringBuffer os) {
	os.append("(?:");
	token.dumpAll(os);
	os.append(')');
	if ((max == Integer.MAX_VALUE) && (min <= 1))
	    os.append( (min == 0) ? '*' : '+' );
	else if ((min == 0) && (max == 1))
	    os.append('?');
	else {
	    os.append('{').append(min);
	    if (max > min) {
		os.append(',');
		if (max != Integer.MAX_VALUE) os.append(max);
	    }
	    os.append('}');
	}
	if (stingy) os.append('?');
    }
}
 

Compare with Previous | Blame | View Log

powered by: WebSVN 2.1.0

© copyright 1999-2024 OpenCores.org, equivalent to Oliscience, all rights reserved. OpenCores®, registered trademark.