Skip to content

Commit e5f6290

Browse files
committed
csv no supports input limit and variables number of fields
1 parent 7c2655e commit e5f6290

3 files changed

Lines changed: 24 additions & 13 deletions

File tree

gss/DeserializeBytes.go

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,7 @@
88
package gss
99

1010
import (
11+
"bytes"
1112
"fmt"
1213
"reflect"
1314
"strings"
@@ -54,15 +55,15 @@ func DeserializeBytes(input *DeserializeInput) (interface{}, error) {
5455

5556
switch input.Format {
5657
case "csv", "tsv":
57-
return DeserializeCSV(string(input.Bytes), input.Format, input.Header, input.Comment, input.LazyQuotes, input.SkipLines, input.Limit, input.Type)
58+
return DeserializeCSV(bytes.NewReader(input.Bytes), input.Format, input.Header, input.Comment, input.LazyQuotes, input.SkipLines, input.Limit, input.Type)
5859
case "properties":
5960
return DeserializeProperties(string(input.Bytes), input.Comment, input.Type)
6061
case "bson":
6162
return deserializeBSON(input.Bytes, input.Type)
6263
case "json":
6364
return DeserializeJSON(input.Bytes, input.Type)
6465
case "jsonl":
65-
return DeserializeJSONL(string(input.Bytes), input.Comment, input.SkipLines, input.Limit, input.Type, input.Async)
66+
return DeserializeJSONL(bytes.NewReader(input.Bytes), input.Comment, input.SkipLines, input.Limit, input.Type, input.Async)
6667
case "hcl":
6768
ptr := reflect.New(input.Type)
6869
ptr.Elem().Set(reflect.MakeMap(input.Type))

gss/DeserializeCSV.go

Lines changed: 18 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -12,16 +12,15 @@ import (
1212
"github.com/pkg/errors"
1313
"io"
1414
"reflect"
15-
"strings"
1615
)
1716

1817
// DeserializeCSV deserializes a CSV or TSV string into a Go instance.
1918
// - https://golang.org/pkg/encoding/csv/
20-
func DeserializeCSV(input string, format string, input_header []string, input_comment string, input_lazy_quotes bool, inputSkipLines int, input_limit int, output_type reflect.Type) (interface{}, error) {
19+
func DeserializeCSV(input io.Reader, format string, input_header []string, input_comment string, inputLazyQuotes bool, inputSkipLines int, inputLimit int, output_type reflect.Type) (interface{}, error) {
2120

2221
if output_type.Kind() == reflect.Map {
23-
if input_limit != 1 {
24-
return nil, errors.Wrap(&ErrInvalidLimit{Value: input_limit}, "DeserializeCSV expects input limit of 1 when output type is of kind map.")
22+
if inputLimit != 1 {
23+
return nil, errors.Wrap(&ErrInvalidLimit{Value: inputLimit}, "DeserializeCSV expects input limit of 1 when output type is of kind map.")
2524
}
2625
if len(input_header) == 0 {
2726
return nil, errors.New("deserializeCSV when returning a map type expects a input header")
@@ -30,11 +29,12 @@ func DeserializeCSV(input string, format string, input_header []string, input_co
3029
return nil, &ErrInvalidKind{Value: output_type.Kind(), Valid: []reflect.Kind{reflect.Array, reflect.Slice, reflect.Map}}
3130
}
3231

33-
reader := csv.NewReader(strings.NewReader(input))
32+
reader := csv.NewReader(input)
3433
if format == "tsv" {
3534
reader.Comma = '\t'
3635
}
37-
reader.LazyQuotes = input_lazy_quotes
36+
reader.LazyQuotes = inputLazyQuotes
37+
reader.FieldsPerRecord = -1 // records may have a variable number of fields
3838

3939
if len(input_comment) > 1 {
4040
return nil, errors.New("go's encoding/csv package only supports single character comment characters")
@@ -45,7 +45,7 @@ func DeserializeCSV(input string, format string, input_header []string, input_co
4545
if output_type.Kind() == reflect.Map {
4646
inRow, err := reader.Read()
4747
if err != nil {
48-
return nil, errors.Wrap(err, "Error reading row from input with format csv")
48+
return nil, errors.Wrap(err, "error reading row from input with format csv")
4949
}
5050
if len(inRow) == 0 {
5151
return nil, &ErrEmptyRow{}
@@ -79,14 +79,23 @@ func DeserializeCSV(input string, format string, input_header []string, input_co
7979
if err == io.EOF {
8080
break
8181
} else {
82-
return nil, errors.Wrap(err, "Error reading row from input with format csv")
82+
return nil, errors.Wrap(err, "error reading row from input with format csv")
8383
}
8484
}
8585
m := reflect.MakeMap(output_type.Elem())
8686
for i, h := range input_header {
87-
m.SetMapIndex(reflect.ValueOf(h), reflect.ValueOf(inRow[i]))
87+
if i < len(inRow) {
88+
m.SetMapIndex(reflect.ValueOf(h), reflect.ValueOf(inRow[i]))
89+
}
90+
//else {
91+
// m.SetMapIndex(reflect.ValueOf(h), "")
92+
//}
8893
}
8994
output = reflect.Append(output, m)
95+
96+
if inputLimit > 0 && output.Len() >= inputLimit {
97+
break
98+
}
9099
}
91100

92101
return output.Interface(), nil

gss/DeserializeJSONL.go

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,7 @@ import (
1212
"context"
1313
"encoding/json"
1414
"fmt"
15+
"io"
1516
"reflect"
1617
"strings"
1718
"sync"
@@ -24,14 +25,14 @@ import (
2425

2526
// DeserializeJSONL deserializes the input JSON lines bytes into a Go object.
2627
// - https://golang.org/pkg/encoding/json/
27-
func DeserializeJSONL(input string, inputComment string, inputSkipLines int, inputLimit int, outputType reflect.Type, async bool) (interface{}, error) {
28+
func DeserializeJSONL(input io.Reader, inputComment string, inputSkipLines int, inputLimit int, outputType reflect.Type, async bool) (interface{}, error) {
2829

2930
output := reflect.MakeSlice(outputType, 0, 0)
3031
if inputLimit == 0 {
3132
return output.Interface(), nil
3233
}
3334

34-
scanner := bufio.NewScanner(strings.NewReader(input))
35+
scanner := bufio.NewScanner(input)
3536
scanner.Split(bufio.ScanLines)
3637
for i := 0; i < inputSkipLines; i++ {
3738
if !scanner.Scan() {

0 commit comments

Comments
 (0)