URL
https://opencores.org/ocsvn/openrisc/openrisc/trunk
Subversion Repositories openrisc
[/] [openrisc/] [trunk/] [gnu-dev/] [or1k-gcc/] [libgo/] [go/] [encoding/] [csv/] [reader.go] - Rev 747
Compare with Previous | Blame | View Log
// Copyright 2011 The Go Authors. All rights reserved.// Use of this source code is governed by a BSD-style// license that can be found in the LICENSE file.// Package csv reads and writes comma-separated values (CSV) files.//// A csv file contains zero or more records of one or more fields per record.// Each record is separated by the newline character. The final record may// optionally be followed by a newline character.//// field1,field2,field3//// White space is considered part of a field.//// Carriage returns before newline characters are silently removed.//// Blank lines are ignored. A line with only whitespace characters (excluding// the ending newline character) is not considered a blank line.//// Fields which start and stop with the quote character " are called// quoted-fields. The beginning and ending quote are not part of the// field.//// The source://// normal string,"quoted-field"//// results in the fields//// {`normal string`, `quoted-field`}//// Within a quoted-field a quote character followed by a second quote// character is considered a single quote.//// "the ""word"" is true","a ""quoted-field"""//// results in//// {`the "word" is true`, `a "quoted-field"`}//// Newlines and commas may be included in a quoted-field//// "Multi-line// field","comma is ,"//// results in//// {`Multi-line// field`, `comma is ,`}package csvimport ("bufio""bytes""errors""fmt""io""unicode")// A ParseError is returned for parsing errors.// The first line is 1. The first column is 0.type ParseError struct {Line int // Line where the error occurredColumn int // Column (rune index) where the error occurredErr error // The actual error}func (e *ParseError) Error() string {return fmt.Sprintf("line %d, column %d: %s", e.Line, e.Column, e.Err)}// These are the errors that can be returned in ParseError.Errorvar (ErrTrailingComma = errors.New("extra delimiter at end of line")ErrBareQuote = errors.New("bare \" in non-quoted-field")ErrQuote = errors.New("extraneous \" in field")ErrFieldCount = errors.New("wrong number of fields in line"))// A Reader reads records from a CSV-encoded file.//// As returned by NewReader, a Reader expects input conforming to RFC 4180.// The exported fields can be changed to customize the details before the// first call to Read or ReadAll.//// Comma is the field delimiter. It defaults to ','.//// Comment, if not 0, is the comment character. Lines beginning with the// Comment character are ignored.//// If FieldsPerRecord is positive, Read requires each record to// have the given number of fields. If FieldsPerRecord is 0, Read sets it to// the number of fields in the first record, so that future records must// have the same field count.//// If LazyQuotes is true, a quote may appear in an unquoted field and a// non-doubled quote may appear in a quoted field.//// If TrailingComma is true, the last field may be an unquoted empty field.//// If TrimLeadingSpace is true, leading white space in a field is ignored.type Reader struct {Comma rune // Field delimiter (set to ',' by NewReader)Comment rune // Comment character for start of lineFieldsPerRecord int // Number of expected fields per recordLazyQuotes bool // Allow lazy quotesTrailingComma bool // Allow trailing commaTrimLeadingSpace bool // Trim leading spaceline intcolumn intr *bufio.Readerfield bytes.Buffer}// NewReader returns a new Reader that reads from r.func NewReader(r io.Reader) *Reader {return &Reader{Comma: ',',r: bufio.NewReader(r),}}// error creates a new ParseError based on err.func (r *Reader) error(err error) error {return &ParseError{Line: r.line,Column: r.column,Err: err,}}// Read reads one record from r. The record is a slice of strings with each// string representing one field.func (r *Reader) Read() (record []string, err error) {for {record, err = r.parseRecord()if record != nil {break}if err != nil {return nil, err}}if r.FieldsPerRecord > 0 {if len(record) != r.FieldsPerRecord {r.column = 0 // report at start of recordreturn record, r.error(ErrFieldCount)}} else if r.FieldsPerRecord == 0 {r.FieldsPerRecord = len(record)}return record, nil}// ReadAll reads all the remaining records from r.// Each record is a slice of fields.// A successful call returns err == nil, not err == EOF. Because ReadAll is// defined to read until EOF, it does not treat end of file as an error to be// reported.func (r *Reader) ReadAll() (records [][]string, err error) {for {record, err := r.Read()if err == io.EOF {return records, nil}if err != nil {return nil, err}records = append(records, record)}panic("unreachable")}// readRune reads one rune from r, folding \r\n to \n and keeping track// of how far into the line we have read. r.column will point to the start// of this rune, not the end of this rune.func (r *Reader) readRune() (rune, error) {r1, _, err := r.r.ReadRune()// Handle \r\n here. We make the simplifying assumption that// anytime \r is followed by \n that it can be folded to \n.// We will not detect files which contain both \r\n and bare \n.if r1 == '\r' {r1, _, err = r.r.ReadRune()if err == nil {if r1 != '\n' {r.r.UnreadRune()r1 = '\r'}}}r.column++return r1, err}// unreadRune puts the last rune read from r back.func (r *Reader) unreadRune() {r.r.UnreadRune()r.column--}// skip reads runes up to and including the rune delim or until error.func (r *Reader) skip(delim rune) error {for {r1, err := r.readRune()if err != nil {return err}if r1 == delim {return nil}}panic("unreachable")}// parseRecord reads and parses a single csv record from r.func (r *Reader) parseRecord() (fields []string, err error) {// Each record starts on a new line. We increment our line// number (lines start at 1, not 0) and set column to -1// so as we increment in readRune it points to the character we read.r.line++r.column = -1// Peek at the first rune. If it is an error we are done.// If we are support comments and it is the comment character// then skip to the end of line.r1, _, err := r.r.ReadRune()if err != nil {return nil, err}if r.Comment != 0 && r1 == r.Comment {return nil, r.skip('\n')}r.r.UnreadRune()// At this point we have at least one field.for {haveField, delim, err := r.parseField()if haveField {fields = append(fields, r.field.String())}if delim == '\n' || err == io.EOF {return fields, err} else if err != nil {return nil, err}}panic("unreachable")}// parseField parses the next field in the record. The read field is// located in r.field. Delim is the first character not part of the field// (r.Comma or '\n').func (r *Reader) parseField() (haveField bool, delim rune, err error) {r.field.Reset()r1, err := r.readRune()if err != nil {// If we have EOF and are not at the start of a line// then we return the empty field. We have already// checked for trailing commas if needed.if err == io.EOF && r.column != 0 {return true, 0, err}return false, 0, err}if r.TrimLeadingSpace {for r1 != '\n' && unicode.IsSpace(r1) {r1, err = r.readRune()if err != nil {return false, 0, err}}}switch r1 {case r.Comma:// will check belowcase '\n':// We are a trailing empty field or a blank lineif r.column == 0 {return false, r1, nil}return true, r1, nilcase '"':// quoted fieldQuoted:for {r1, err = r.readRune()if err != nil {if err == io.EOF {if r.LazyQuotes {return true, 0, err}return false, 0, r.error(ErrQuote)}return false, 0, err}switch r1 {case '"':r1, err = r.readRune()if err != nil || r1 == r.Comma {break Quoted}if r1 == '\n' {return true, r1, nil}if r1 != '"' {if !r.LazyQuotes {r.column--return false, 0, r.error(ErrQuote)}// accept the bare quoter.field.WriteRune('"')}case '\n':r.line++r.column = -1}r.field.WriteRune(r1)}default:// unquoted fieldfor {r.field.WriteRune(r1)r1, err = r.readRune()if err != nil || r1 == r.Comma {break}if r1 == '\n' {return true, r1, nil}if !r.LazyQuotes && r1 == '"' {return false, 0, r.error(ErrBareQuote)}}}if err != nil {if err == io.EOF {return true, 0, err}return false, 0, err}if !r.TrailingComma {// We don't allow trailing commas. See if we// are at the end of the line (being mindful// of trimming spaces).c := r.columnr1, err = r.readRune()if r.TrimLeadingSpace {for r1 != '\n' && unicode.IsSpace(r1) {r1, err = r.readRune()if err != nil {break}}}if err == io.EOF || r1 == '\n' {r.column = c // report the commareturn false, 0, r.error(ErrTrailingComma)}r.unreadRune()}return true, r1, nil}
