OpenCores
URL https://opencores.org/ocsvn/openrisc/openrisc/trunk

Subversion Repositories openrisc

[/] [openrisc/] [trunk/] [gnu-dev/] [or1k-gcc/] [libgo/] [go/] [exp/] [utf8string/] [string.go] - Blame information for rev 747

Details | Compare with Previous | View Log

Line No. Rev Author Line
1 747 jeremybenn
// Copyright 2009 The Go Authors. All rights reserved.
2
// Use of this source code is governed by a BSD-style
3
// license that can be found in the LICENSE file.
4
 
5
// Package utf8string provides an efficient way to index strings by rune rather than by byte.
6
package utf8string
7
 
8
import (
9
        "errors"
10
        "unicode/utf8"
11
)
12
 
13
// String wraps a regular string with a small structure that provides more
14
// efficient indexing by code point index, as opposed to byte index.
15
// Scanning incrementally forwards or backwards is O(1) per index operation
16
// (although not as fast a range clause going forwards).  Random access is
17
// O(N) in the length of the string, but the overhead is less than always
18
// scanning from the beginning.
19
// If the string is ASCII, random access is O(1).
20
// Unlike the built-in string type, String has internal mutable state and
21
// is not thread-safe.
22
type String struct {
23
        str      string
24
        numRunes int
25
        // If width > 0, the rune at runePos starts at bytePos and has the specified width.
26
        width    int
27
        bytePos  int
28
        runePos  int
29
        nonASCII int // byte index of the first non-ASCII rune.
30
}
31
 
32
// NewString returns a new UTF-8 string with the provided contents.
33
func NewString(contents string) *String {
34
        return new(String).Init(contents)
35
}
36
 
37
// Init initializes an existing String to hold the provided contents.
38
// It returns a pointer to the initialized String.
39
func (s *String) Init(contents string) *String {
40
        s.str = contents
41
        s.bytePos = 0
42
        s.runePos = 0
43
        for i := 0; i < len(contents); i++ {
44
                if contents[i] >= utf8.RuneSelf {
45
                        // Not ASCII.
46
                        s.numRunes = utf8.RuneCountInString(contents)
47
                        _, s.width = utf8.DecodeRuneInString(contents)
48
                        s.nonASCII = i
49
                        return s
50
                }
51
        }
52
        // ASCII is simple.  Also, the empty string is ASCII.
53
        s.numRunes = len(contents)
54
        s.width = 0
55
        s.nonASCII = len(contents)
56
        return s
57
}
58
 
59
// String returns the contents of the String.  This method also means the
60
// String is directly printable by fmt.Print.
61
func (s *String) String() string {
62
        return s.str
63
}
64
 
65
// RuneCount returns the number of runes (Unicode code points) in the String.
66
func (s *String) RuneCount() int {
67
        return s.numRunes
68
}
69
 
70
// IsASCII returns a boolean indicating whether the String contains only ASCII bytes.
71
func (s *String) IsASCII() bool {
72
        return s.width == 0
73
}
74
 
75
// Slice returns the string sliced at rune positions [i:j].
76
func (s *String) Slice(i, j int) string {
77
        // ASCII is easy.  Let the compiler catch the indexing error if there is one.
78
        if j < s.nonASCII {
79
                return s.str[i:j]
80
        }
81
        if i < 0 || j > s.numRunes || i > j {
82
                panic(sliceOutOfRange)
83
        }
84
        if i == j {
85
                return ""
86
        }
87
        // For non-ASCII, after At(i), bytePos is always the position of the indexed character.
88
        var low, high int
89
        switch {
90
        case i < s.nonASCII:
91
                low = i
92
        case i == s.numRunes:
93
                low = len(s.str)
94
        default:
95
                s.At(i)
96
                low = s.bytePos
97
        }
98
        switch {
99
        case j == s.numRunes:
100
                high = len(s.str)
101
        default:
102
                s.At(j)
103
                high = s.bytePos
104
        }
105
        return s.str[low:high]
106
}
107
 
108
// At returns the rune with index i in the String.  The sequence of runes is the same
109
// as iterating over the contents with a "for range" clause.
110
func (s *String) At(i int) rune {
111
        // ASCII is easy.  Let the compiler catch the indexing error if there is one.
112
        if i < s.nonASCII {
113
                return rune(s.str[i])
114
        }
115
 
116
        // Now we do need to know the index is valid.
117
        if i < 0 || i >= s.numRunes {
118
                panic(outOfRange)
119
        }
120
 
121
        var r rune
122
 
123
        // Five easy common cases: within 1 spot of bytePos/runePos, or the beginning, or the end.
124
        // With these cases, all scans from beginning or end work in O(1) time per rune.
125
        switch {
126
 
127
        case i == s.runePos-1: // backing up one rune
128
                r, s.width = utf8.DecodeLastRuneInString(s.str[0:s.bytePos])
129
                s.runePos = i
130
                s.bytePos -= s.width
131
                return r
132
        case i == s.runePos+1: // moving ahead one rune
133
                s.runePos = i
134
                s.bytePos += s.width
135
                fallthrough
136
        case i == s.runePos:
137
                r, s.width = utf8.DecodeRuneInString(s.str[s.bytePos:])
138
                return r
139
        case i == 0: // start of string
140
                r, s.width = utf8.DecodeRuneInString(s.str)
141
                s.runePos = 0
142
                s.bytePos = 0
143
                return r
144
 
145
        case i == s.numRunes-1: // last rune in string
146
                r, s.width = utf8.DecodeLastRuneInString(s.str)
147
                s.runePos = i
148
                s.bytePos = len(s.str) - s.width
149
                return r
150
        }
151
 
152
        // We need to do a linear scan.  There are three places to start from:
153
        // 1) The beginning
154
        // 2) bytePos/runePos.
155
        // 3) The end
156
        // Choose the closest in rune count, scanning backwards if necessary.
157
        forward := true
158
        if i < s.runePos {
159
                // Between beginning and pos.  Which is closer?
160
                // Since both i and runePos are guaranteed >= nonASCII, that's the
161
                // lowest location we need to start from.
162
                if i < (s.runePos-s.nonASCII)/2 {
163
                        // Scan forward from beginning
164
                        s.bytePos, s.runePos = s.nonASCII, s.nonASCII
165
                } else {
166
                        // Scan backwards from where we are
167
                        forward = false
168
                }
169
        } else {
170
                // Between pos and end.  Which is closer?
171
                if i-s.runePos < (s.numRunes-s.runePos)/2 {
172
                        // Scan forward from pos
173
                } else {
174
                        // Scan backwards from end
175
                        s.bytePos, s.runePos = len(s.str), s.numRunes
176
                        forward = false
177
                }
178
        }
179
        if forward {
180
                // TODO: Is it much faster to use a range loop for this scan?
181
                for {
182
                        r, s.width = utf8.DecodeRuneInString(s.str[s.bytePos:])
183
                        if s.runePos == i {
184
                                break
185
                        }
186
                        s.runePos++
187
                        s.bytePos += s.width
188
                }
189
        } else {
190
                for {
191
                        r, s.width = utf8.DecodeLastRuneInString(s.str[0:s.bytePos])
192
                        s.runePos--
193
                        s.bytePos -= s.width
194
                        if s.runePos == i {
195
                                break
196
                        }
197
                }
198
        }
199
        return r
200
}
201
 
202
var outOfRange = errors.New("utf8.String: index out of range")
203
var sliceOutOfRange = errors.New("utf8.String: slice index out of range")

powered by: WebSVN 2.1.0

© copyright 1999-2024 OpenCores.org, equivalent to Oliscience, all rights reserved. OpenCores®, registered trademark.