| 1 | 
         747 | 
         jeremybenn | 
         // Copyright 2011 The Go Authors.  All rights reserved.
  | 
      
      
         | 2 | 
          | 
          | 
         // Use of this source code is governed by a BSD-style
  | 
      
      
         | 3 | 
          | 
          | 
         // license that can be found in the LICENSE file.
  | 
      
      
         | 4 | 
          | 
          | 
          
  | 
      
      
         | 5 | 
          | 
          | 
         package syntax
  | 
      
      
         | 6 | 
          | 
          | 
          
  | 
      
      
         | 7 | 
          | 
          | 
         // Simplify returns a regexp equivalent to re but without counted repetitions
  | 
      
      
         | 8 | 
          | 
          | 
         // and with various other simplifications, such as rewriting /(?:a+)+/ to /a+/.
  | 
      
      
         | 9 | 
          | 
          | 
         // The resulting regexp will execute correctly but its string representation
  | 
      
      
         | 10 | 
          | 
          | 
         // will not produce the same parse tree, because capturing parentheses
  | 
      
      
         | 11 | 
          | 
          | 
         // may have been duplicated or removed.  For example, the simplified form
  | 
      
      
         | 12 | 
          | 
          | 
         // for /(x){1,2}/ is /(x)(x)?/ but both parentheses capture as $1.
  | 
      
      
         | 13 | 
          | 
          | 
         // The returned regexp may share structure with or be the original.
  | 
      
      
         | 14 | 
          | 
          | 
         func (re *Regexp) Simplify() *Regexp {
  | 
      
      
         | 15 | 
          | 
          | 
                 if re == nil {
  | 
      
      
         | 16 | 
          | 
          | 
                         return nil
  | 
      
      
         | 17 | 
          | 
          | 
                 }
  | 
      
      
         | 18 | 
          | 
          | 
                 switch re.Op {
  | 
      
      
         | 19 | 
          | 
          | 
                 case OpCapture, OpConcat, OpAlternate:
  | 
      
      
         | 20 | 
          | 
          | 
                         // Simplify children, building new Regexp if children change.
  | 
      
      
         | 21 | 
          | 
          | 
                         nre := re
  | 
      
      
         | 22 | 
          | 
          | 
                         for i, sub := range re.Sub {
  | 
      
      
         | 23 | 
          | 
          | 
                                 nsub := sub.Simplify()
  | 
      
      
         | 24 | 
          | 
          | 
                                 if nre == re && nsub != sub {
  | 
      
      
         | 25 | 
          | 
          | 
                                         // Start a copy.
  | 
      
      
         | 26 | 
          | 
          | 
                                         nre = new(Regexp)
  | 
      
      
         | 27 | 
          | 
          | 
                                         *nre = *re
  | 
      
      
         | 28 | 
          | 
          | 
                                         nre.Rune = nil
  | 
      
      
         | 29 | 
          | 
          | 
                                         nre.Sub = append(nre.Sub0[:0], re.Sub[:i]...)
  | 
      
      
         | 30 | 
          | 
          | 
                                 }
  | 
      
      
         | 31 | 
          | 
          | 
                                 if nre != re {
  | 
      
      
         | 32 | 
          | 
          | 
                                         nre.Sub = append(nre.Sub, nsub)
  | 
      
      
         | 33 | 
          | 
          | 
                                 }
  | 
      
      
         | 34 | 
          | 
          | 
                         }
  | 
      
      
         | 35 | 
          | 
          | 
                         return nre
  | 
      
      
         | 36 | 
          | 
          | 
          
  | 
      
      
         | 37 | 
          | 
          | 
                 case OpStar, OpPlus, OpQuest:
  | 
      
      
         | 38 | 
          | 
          | 
                         sub := re.Sub[0].Simplify()
  | 
      
      
         | 39 | 
          | 
          | 
                         return simplify1(re.Op, re.Flags, sub, re)
  | 
      
      
         | 40 | 
          | 
          | 
          
  | 
      
      
         | 41 | 
          | 
          | 
                 case OpRepeat:
  | 
      
      
         | 42 | 
          | 
          | 
                         // Special special case: x{0} matches the empty string
  | 
      
      
         | 43 | 
          | 
          | 
                         // and doesn't even need to consider x.
  | 
      
      
         | 44 | 
          | 
          | 
                         if re.Min == 0 && re.Max == 0 {
  | 
      
      
         | 45 | 
          | 
          | 
                                 return &Regexp{Op: OpEmptyMatch}
  | 
      
      
         | 46 | 
          | 
          | 
                         }
  | 
      
      
         | 47 | 
          | 
          | 
          
  | 
      
      
         | 48 | 
          | 
          | 
                         // The fun begins.
  | 
      
      
         | 49 | 
          | 
          | 
                         sub := re.Sub[0].Simplify()
  | 
      
      
         | 50 | 
          | 
          | 
          
  | 
      
      
         | 51 | 
          | 
          | 
                         // x{n,} means at least n matches of x.
  | 
      
      
         | 52 | 
          | 
          | 
                         if re.Max == -1 {
  | 
      
      
         | 53 | 
          | 
          | 
                                 // Special case: x{0,} is x*.
  | 
      
      
         | 54 | 
          | 
          | 
                                 if re.Min == 0 {
  | 
      
      
         | 55 | 
          | 
          | 
                                         return simplify1(OpStar, re.Flags, sub, nil)
  | 
      
      
         | 56 | 
          | 
          | 
                                 }
  | 
      
      
         | 57 | 
          | 
          | 
          
  | 
      
      
         | 58 | 
          | 
          | 
                                 // Special case: x{1,} is x+.
  | 
      
      
         | 59 | 
          | 
          | 
                                 if re.Min == 1 {
  | 
      
      
         | 60 | 
          | 
          | 
                                         return simplify1(OpPlus, re.Flags, sub, nil)
  | 
      
      
         | 61 | 
          | 
          | 
                                 }
  | 
      
      
         | 62 | 
          | 
          | 
          
  | 
      
      
         | 63 | 
          | 
          | 
                                 // General case: x{4,} is xxxx+.
  | 
      
      
         | 64 | 
          | 
          | 
                                 nre := &Regexp{Op: OpConcat}
  | 
      
      
         | 65 | 
          | 
          | 
                                 nre.Sub = nre.Sub0[:0]
  | 
      
      
         | 66 | 
          | 
          | 
                                 for i := 0; i < re.Min-1; i++ {
  | 
      
      
         | 67 | 
          | 
          | 
                                         nre.Sub = append(nre.Sub, sub)
  | 
      
      
         | 68 | 
          | 
          | 
                                 }
  | 
      
      
         | 69 | 
          | 
          | 
                                 nre.Sub = append(nre.Sub, simplify1(OpPlus, re.Flags, sub, nil))
  | 
      
      
         | 70 | 
          | 
          | 
                                 return nre
  | 
      
      
         | 71 | 
          | 
          | 
                         }
  | 
      
      
         | 72 | 
          | 
          | 
          
  | 
      
      
         | 73 | 
          | 
          | 
                         // Special case x{0} handled above.
  | 
      
      
         | 74 | 
          | 
          | 
          
  | 
      
      
         | 75 | 
          | 
          | 
                         // Special case: x{1} is just x.
  | 
      
      
         | 76 | 
          | 
          | 
                         if re.Min == 1 && re.Max == 1 {
  | 
      
      
         | 77 | 
          | 
          | 
                                 return sub
  | 
      
      
         | 78 | 
          | 
          | 
                         }
  | 
      
      
         | 79 | 
          | 
          | 
          
  | 
      
      
         | 80 | 
          | 
          | 
                         // General case: x{n,m} means n copies of x and m copies of x?
  | 
      
      
         | 81 | 
          | 
          | 
                         // The machine will do less work if we nest the final m copies,
  | 
      
      
         | 82 | 
          | 
          | 
                         // so that x{2,5} = xx(x(x(x)?)?)?
  | 
      
      
         | 83 | 
          | 
          | 
          
  | 
      
      
         | 84 | 
          | 
          | 
                         // Build leading prefix: xx.
  | 
      
      
         | 85 | 
          | 
          | 
                         var prefix *Regexp
  | 
      
      
         | 86 | 
          | 
          | 
                         if re.Min > 0 {
  | 
      
      
         | 87 | 
          | 
          | 
                                 prefix = &Regexp{Op: OpConcat}
  | 
      
      
         | 88 | 
          | 
          | 
                                 prefix.Sub = prefix.Sub0[:0]
  | 
      
      
         | 89 | 
          | 
          | 
                                 for i := 0; i < re.Min; i++ {
  | 
      
      
         | 90 | 
          | 
          | 
                                         prefix.Sub = append(prefix.Sub, sub)
  | 
      
      
         | 91 | 
          | 
          | 
                                 }
  | 
      
      
         | 92 | 
          | 
          | 
                         }
  | 
      
      
         | 93 | 
          | 
          | 
          
  | 
      
      
         | 94 | 
          | 
          | 
                         // Build and attach suffix: (x(x(x)?)?)?
  | 
      
      
         | 95 | 
          | 
          | 
                         if re.Max > re.Min {
  | 
      
      
         | 96 | 
          | 
          | 
                                 suffix := simplify1(OpQuest, re.Flags, sub, nil)
  | 
      
      
         | 97 | 
          | 
          | 
                                 for i := re.Min + 1; i < re.Max; i++ {
  | 
      
      
         | 98 | 
          | 
          | 
                                         nre2 := &Regexp{Op: OpConcat}
  | 
      
      
         | 99 | 
          | 
          | 
                                         nre2.Sub = append(nre2.Sub0[:0], sub, suffix)
  | 
      
      
         | 100 | 
          | 
          | 
                                         suffix = simplify1(OpQuest, re.Flags, nre2, nil)
  | 
      
      
         | 101 | 
          | 
          | 
                                 }
  | 
      
      
         | 102 | 
          | 
          | 
                                 if prefix == nil {
  | 
      
      
         | 103 | 
          | 
          | 
                                         return suffix
  | 
      
      
         | 104 | 
          | 
          | 
                                 }
  | 
      
      
         | 105 | 
          | 
          | 
                                 prefix.Sub = append(prefix.Sub, suffix)
  | 
      
      
         | 106 | 
          | 
          | 
                         }
  | 
      
      
         | 107 | 
          | 
          | 
                         if prefix != nil {
  | 
      
      
         | 108 | 
          | 
          | 
                                 return prefix
  | 
      
      
         | 109 | 
          | 
          | 
                         }
  | 
      
      
         | 110 | 
          | 
          | 
          
  | 
      
      
         | 111 | 
          | 
          | 
                         // Some degenerate case like min > max or min < max < 0.
  | 
      
      
         | 112 | 
          | 
          | 
                         // Handle as impossible match.
  | 
      
      
         | 113 | 
          | 
          | 
                         return &Regexp{Op: OpNoMatch}
  | 
      
      
         | 114 | 
          | 
          | 
                 }
  | 
      
      
         | 115 | 
          | 
          | 
          
  | 
      
      
         | 116 | 
          | 
          | 
                 return re
  | 
      
      
         | 117 | 
          | 
          | 
         }
  | 
      
      
         | 118 | 
          | 
          | 
          
  | 
      
      
         | 119 | 
          | 
          | 
         // simplify1 implements Simplify for the unary OpStar,
  | 
      
      
         | 120 | 
          | 
          | 
         // OpPlus, and OpQuest operators.  It returns the simple regexp
  | 
      
      
         | 121 | 
          | 
          | 
         // equivalent to
  | 
      
      
         | 122 | 
          | 
          | 
         //
  | 
      
      
         | 123 | 
          | 
          | 
         //      Regexp{Op: op, Flags: flags, Sub: {sub}}
  | 
      
      
         | 124 | 
          | 
          | 
         //
  | 
      
      
         | 125 | 
          | 
          | 
         // under the assumption that sub is already simple, and
  | 
      
      
         | 126 | 
          | 
          | 
         // without first allocating that structure.  If the regexp
  | 
      
      
         | 127 | 
          | 
          | 
         // to be returned turns out to be equivalent to re, simplify1
  | 
      
      
         | 128 | 
          | 
          | 
         // returns re instead.
  | 
      
      
         | 129 | 
          | 
          | 
         //
  | 
      
      
         | 130 | 
          | 
          | 
         // simplify1 is factored out of Simplify because the implementation
  | 
      
      
         | 131 | 
          | 
          | 
         // for other operators generates these unary expressions.
  | 
      
      
         | 132 | 
          | 
          | 
         // Letting them call simplify1 makes sure the expressions they
  | 
      
      
         | 133 | 
          | 
          | 
         // generate are simple.
  | 
      
      
         | 134 | 
          | 
          | 
         func simplify1(op Op, flags Flags, sub, re *Regexp) *Regexp {
  | 
      
      
         | 135 | 
          | 
          | 
                 // Special case: repeat the empty string as much as
  | 
      
      
         | 136 | 
          | 
          | 
                 // you want, but it's still the empty string.
  | 
      
      
         | 137 | 
          | 
          | 
                 if sub.Op == OpEmptyMatch {
  | 
      
      
         | 138 | 
          | 
          | 
                         return sub
  | 
      
      
         | 139 | 
          | 
          | 
                 }
  | 
      
      
         | 140 | 
          | 
          | 
                 // The operators are idempotent if the flags match.
  | 
      
      
         | 141 | 
          | 
          | 
                 if op == sub.Op && flags&NonGreedy == sub.Flags&NonGreedy {
  | 
      
      
         | 142 | 
          | 
          | 
                         return sub
  | 
      
      
         | 143 | 
          | 
          | 
                 }
  | 
      
      
         | 144 | 
          | 
          | 
                 if re != nil && re.Op == op && re.Flags&NonGreedy == flags&NonGreedy && sub == re.Sub[0] {
  | 
      
      
         | 145 | 
          | 
          | 
                         return re
  | 
      
      
         | 146 | 
          | 
          | 
                 }
  | 
      
      
         | 147 | 
          | 
          | 
          
  | 
      
      
         | 148 | 
          | 
          | 
                 re = &Regexp{Op: op, Flags: flags}
  | 
      
      
         | 149 | 
          | 
          | 
                 re.Sub = append(re.Sub0[:0], sub)
  | 
      
      
         | 150 | 
          | 
          | 
                 return re
  | 
      
      
         | 151 | 
          | 
          | 
         }
  |