OpenCores
URL https://opencores.org/ocsvn/openrisc/openrisc/trunk

Subversion Repositories openrisc

[/] [openrisc/] [trunk/] [gnu-dev/] [or1k-gcc/] [libgo/] [go/] [encoding/] [csv/] [reader.go] - Blame information for rev 801

Go to most recent revision | Details | Compare with Previous | View Log

Line No. Rev Author Line
1 747 jeremybenn
// Copyright 2011 The Go Authors. All rights reserved.
2
// Use of this source code is governed by a BSD-style
3
// license that can be found in the LICENSE file.
4
 
5
// Package csv reads and writes comma-separated values (CSV) files.
6
//
7
// A csv file contains zero or more records of one or more fields per record.
8
// Each record is separated by the newline character. The final record may
9
// optionally be followed by a newline character.
10
//
11
//      field1,field2,field3
12
//
13
// White space is considered part of a field.
14
//
15
// Carriage returns before newline characters are silently removed.
16
//
17
// Blank lines are ignored.  A line with only whitespace characters (excluding
18
// the ending newline character) is not considered a blank line.
19
//
20
// Fields which start and stop with the quote character " are called
21
// quoted-fields.  The beginning and ending quote are not part of the
22
// field.
23
//
24
// The source:
25
//
26
//      normal string,"quoted-field"
27
//
28
// results in the fields
29
//
30
//      {`normal string`, `quoted-field`}
31
//
32
// Within a quoted-field a quote character followed by a second quote
33
// character is considered a single quote.
34
//
35
//      "the ""word"" is true","a ""quoted-field"""
36
//
37
// results in
38
//
39
//      {`the "word" is true`, `a "quoted-field"`}
40
//
41
// Newlines and commas may be included in a quoted-field
42
//
43
//      "Multi-line
44
//      field","comma is ,"
45
//
46
// results in
47
//
48
//      {`Multi-line
49
//      field`, `comma is ,`}
50
package csv
51
 
52
import (
53
        "bufio"
54
        "bytes"
55
        "errors"
56
        "fmt"
57
        "io"
58
        "unicode"
59
)
60
 
61
// A ParseError is returned for parsing errors.
62
// The first line is 1.  The first column is 0.
63
type ParseError struct {
64
        Line   int   // Line where the error occurred
65
        Column int   // Column (rune index) where the error occurred
66
        Err    error // The actual error
67
}
68
 
69
func (e *ParseError) Error() string {
70
        return fmt.Sprintf("line %d, column %d: %s", e.Line, e.Column, e.Err)
71
}
72
 
73
// These are the errors that can be returned in ParseError.Error
74
var (
75
        ErrTrailingComma = errors.New("extra delimiter at end of line")
76
        ErrBareQuote     = errors.New("bare \" in non-quoted-field")
77
        ErrQuote         = errors.New("extraneous \" in field")
78
        ErrFieldCount    = errors.New("wrong number of fields in line")
79
)
80
 
81
// A Reader reads records from a CSV-encoded file.
82
//
83
// As returned by NewReader, a Reader expects input conforming to RFC 4180.
84
// The exported fields can be changed to customize the details before the
85
// first call to Read or ReadAll.
86
//
87
// Comma is the field delimiter.  It defaults to ','.
88
//
89
// Comment, if not 0, is the comment character. Lines beginning with the
90
// Comment character are ignored.
91
//
92
// If FieldsPerRecord is positive, Read requires each record to
93
// have the given number of fields.  If FieldsPerRecord is 0, Read sets it to
94
// the number of fields in the first record, so that future records must
95
// have the same field count.
96
//
97
// If LazyQuotes is true, a quote may appear in an unquoted field and a
98
// non-doubled quote may appear in a quoted field.
99
//
100
// If TrailingComma is true, the last field may be an unquoted empty field.
101
//
102
// If TrimLeadingSpace is true, leading white space in a field is ignored.
103
type Reader struct {
104
        Comma            rune // Field delimiter (set to ',' by NewReader)
105
        Comment          rune // Comment character for start of line
106
        FieldsPerRecord  int  // Number of expected fields per record
107
        LazyQuotes       bool // Allow lazy quotes
108
        TrailingComma    bool // Allow trailing comma
109
        TrimLeadingSpace bool // Trim leading space
110
        line             int
111
        column           int
112
        r                *bufio.Reader
113
        field            bytes.Buffer
114
}
115
 
116
// NewReader returns a new Reader that reads from r.
117
func NewReader(r io.Reader) *Reader {
118
        return &Reader{
119
                Comma: ',',
120
                r:     bufio.NewReader(r),
121
        }
122
}
123
 
124
// error creates a new ParseError based on err.
125
func (r *Reader) error(err error) error {
126
        return &ParseError{
127
                Line:   r.line,
128
                Column: r.column,
129
                Err:    err,
130
        }
131
}
132
 
133
// Read reads one record from r.  The record is a slice of strings with each
134
// string representing one field.
135
func (r *Reader) Read() (record []string, err error) {
136
        for {
137
                record, err = r.parseRecord()
138
                if record != nil {
139
                        break
140
                }
141
                if err != nil {
142
                        return nil, err
143
                }
144
        }
145
 
146
        if r.FieldsPerRecord > 0 {
147
                if len(record) != r.FieldsPerRecord {
148
                        r.column = 0 // report at start of record
149
                        return record, r.error(ErrFieldCount)
150
                }
151
        } else if r.FieldsPerRecord == 0 {
152
                r.FieldsPerRecord = len(record)
153
        }
154
        return record, nil
155
}
156
 
157
// ReadAll reads all the remaining records from r.
158
// Each record is a slice of fields.
159
// A successful call returns err == nil, not err == EOF. Because ReadAll is
160
// defined to read until EOF, it does not treat end of file as an error to be
161
// reported.
162
func (r *Reader) ReadAll() (records [][]string, err error) {
163
        for {
164
                record, err := r.Read()
165
                if err == io.EOF {
166
                        return records, nil
167
                }
168
                if err != nil {
169
                        return nil, err
170
                }
171
                records = append(records, record)
172
        }
173
        panic("unreachable")
174
}
175
 
176
// readRune reads one rune from r, folding \r\n to \n and keeping track
177
// of how far into the line we have read.  r.column will point to the start
178
// of this rune, not the end of this rune.
179
func (r *Reader) readRune() (rune, error) {
180
        r1, _, err := r.r.ReadRune()
181
 
182
        // Handle \r\n here.  We make the simplifying assumption that
183
        // anytime \r is followed by \n that it can be folded to \n.
184
        // We will not detect files which contain both \r\n and bare \n.
185
        if r1 == '\r' {
186
                r1, _, err = r.r.ReadRune()
187
                if err == nil {
188
                        if r1 != '\n' {
189
                                r.r.UnreadRune()
190
                                r1 = '\r'
191
                        }
192
                }
193
        }
194
        r.column++
195
        return r1, err
196
}
197
 
198
// unreadRune puts the last rune read from r back.
199
func (r *Reader) unreadRune() {
200
        r.r.UnreadRune()
201
        r.column--
202
}
203
 
204
// skip reads runes up to and including the rune delim or until error.
205
func (r *Reader) skip(delim rune) error {
206
        for {
207
                r1, err := r.readRune()
208
                if err != nil {
209
                        return err
210
                }
211
                if r1 == delim {
212
                        return nil
213
                }
214
        }
215
        panic("unreachable")
216
}
217
 
218
// parseRecord reads and parses a single csv record from r.
219
func (r *Reader) parseRecord() (fields []string, err error) {
220
        // Each record starts on a new line.  We increment our line
221
        // number (lines start at 1, not 0) and set column to -1
222
        // so as we increment in readRune it points to the character we read.
223
        r.line++
224
        r.column = -1
225
 
226
        // Peek at the first rune.  If it is an error we are done.
227
        // If we are support comments and it is the comment character
228
        // then skip to the end of line.
229
 
230
        r1, _, err := r.r.ReadRune()
231
        if err != nil {
232
                return nil, err
233
        }
234
 
235
        if r.Comment != 0 && r1 == r.Comment {
236
                return nil, r.skip('\n')
237
        }
238
        r.r.UnreadRune()
239
 
240
        // At this point we have at least one field.
241
        for {
242
                haveField, delim, err := r.parseField()
243
                if haveField {
244
                        fields = append(fields, r.field.String())
245
                }
246
                if delim == '\n' || err == io.EOF {
247
                        return fields, err
248
                } else if err != nil {
249
                        return nil, err
250
                }
251
        }
252
        panic("unreachable")
253
}
254
 
255
// parseField parses the next field in the record.  The read field is
256
// located in r.field.  Delim is the first character not part of the field
257
// (r.Comma or '\n').
258
func (r *Reader) parseField() (haveField bool, delim rune, err error) {
259
        r.field.Reset()
260
 
261
        r1, err := r.readRune()
262
        if err != nil {
263
                // If we have EOF and are not at the start of a line
264
                // then we return the empty field.  We have already
265
                // checked for trailing commas if needed.
266
                if err == io.EOF && r.column != 0 {
267
                        return true, 0, err
268
                }
269
                return false, 0, err
270
        }
271
 
272
        if r.TrimLeadingSpace {
273
                for r1 != '\n' && unicode.IsSpace(r1) {
274
                        r1, err = r.readRune()
275
                        if err != nil {
276
                                return false, 0, err
277
                        }
278
                }
279
        }
280
 
281
        switch r1 {
282
        case r.Comma:
283
                // will check below
284
 
285
        case '\n':
286
                // We are a trailing empty field or a blank line
287
                if r.column == 0 {
288
                        return false, r1, nil
289
                }
290
                return true, r1, nil
291
 
292
        case '"':
293
                // quoted field
294
        Quoted:
295
                for {
296
                        r1, err = r.readRune()
297
                        if err != nil {
298
                                if err == io.EOF {
299
                                        if r.LazyQuotes {
300
                                                return true, 0, err
301
                                        }
302
                                        return false, 0, r.error(ErrQuote)
303
                                }
304
                                return false, 0, err
305
                        }
306
                        switch r1 {
307
                        case '"':
308
                                r1, err = r.readRune()
309
                                if err != nil || r1 == r.Comma {
310
                                        break Quoted
311
                                }
312
                                if r1 == '\n' {
313
                                        return true, r1, nil
314
                                }
315
                                if r1 != '"' {
316
                                        if !r.LazyQuotes {
317
                                                r.column--
318
                                                return false, 0, r.error(ErrQuote)
319
                                        }
320
                                        // accept the bare quote
321
                                        r.field.WriteRune('"')
322
                                }
323
                        case '\n':
324
                                r.line++
325
                                r.column = -1
326
                        }
327
                        r.field.WriteRune(r1)
328
                }
329
 
330
        default:
331
                // unquoted field
332
                for {
333
                        r.field.WriteRune(r1)
334
                        r1, err = r.readRune()
335
                        if err != nil || r1 == r.Comma {
336
                                break
337
                        }
338
                        if r1 == '\n' {
339
                                return true, r1, nil
340
                        }
341
                        if !r.LazyQuotes && r1 == '"' {
342
                                return false, 0, r.error(ErrBareQuote)
343
                        }
344
                }
345
        }
346
 
347
        if err != nil {
348
                if err == io.EOF {
349
                        return true, 0, err
350
                }
351
                return false, 0, err
352
        }
353
 
354
        if !r.TrailingComma {
355
                // We don't allow trailing commas.  See if we
356
                // are at the end of the line (being mindful
357
                // of trimming spaces).
358
                c := r.column
359
                r1, err = r.readRune()
360
                if r.TrimLeadingSpace {
361
                        for r1 != '\n' && unicode.IsSpace(r1) {
362
                                r1, err = r.readRune()
363
                                if err != nil {
364
                                        break
365
                                }
366
                        }
367
                }
368
                if err == io.EOF || r1 == '\n' {
369
                        r.column = c // report the comma
370
                        return false, 0, r.error(ErrTrailingComma)
371
                }
372
                r.unreadRune()
373
        }
374
        return true, r1, nil
375
}

powered by: WebSVN 2.1.0

© copyright 1999-2024 OpenCores.org, equivalent to Oliscience, all rights reserved. OpenCores®, registered trademark.