Skip to content

Commit 33fad82

Browse files
authored
Minor fixes and new flags (#14)
various fixes and new flags
1 parent 8db94b3 commit 33fad82

File tree

13 files changed

+211
-111
lines changed

13 files changed

+211
-111
lines changed

cmd/gss/main.go

+42-12
Original file line numberDiff line numberDiff line change
@@ -39,6 +39,7 @@ import (
3939
"github.com/spatialcurrent/go-simple-serializer/pkg/gss"
4040
"github.com/spatialcurrent/go-simple-serializer/pkg/iterator"
4141
"github.com/spatialcurrent/go-simple-serializer/pkg/jsonl"
42+
"github.com/spatialcurrent/go-simple-serializer/pkg/properties"
4243
"github.com/spatialcurrent/go-simple-serializer/pkg/serializer"
4344
"github.com/spatialcurrent/go-simple-serializer/pkg/sv"
4445
"github.com/spatialcurrent/go-stringify/pkg/stringify"
@@ -88,8 +89,7 @@ func canStream(inputFormat string, outputFormat string, outputSorted bool) bool
8889
}
8990

9091
func initFlags(flag *pflag.FlagSet, formats []string) {
91-
cli.InitInputFlags(flag, formats)
92-
cli.InitOutputFlags(flag, formats)
92+
cli.InitCliFlags(flag, formats)
9393
}
9494

9595
func checkConfig(v *viper.Viper, formats []string) error {
@@ -131,6 +131,8 @@ func main() {
131131

132132
inputHeader := stringify.StringSliceToInterfaceSlice(v.GetStringSlice(cli.FlagInputHeader))
133133

134+
inputLineSeparator := v.GetString(cli.FlagInputLineSeparator)
135+
134136
outputFormat := v.GetString(cli.FlagOutputFormat)
135137

136138
outputHeader := stringify.StringSliceToInterfaceSlice(v.GetStringSlice(cli.FlagOutputHeader))
@@ -158,20 +160,46 @@ func main() {
158160

159161
outputLimit := v.GetInt(cli.FlagOutputLimit)
160162

163+
verbose := v.GetBool(cli.FlagVerbose)
164+
165+
if verbose {
166+
err := properties.Write(&properties.WriteInput{
167+
Writer: os.Stdout,
168+
LineSeparator: "\n",
169+
KeyValueSeparator: "=",
170+
Object: v.AllSettings(),
171+
KeySerializer: stringify.NewDefaultStringer(),
172+
ValueSerializer: stringify.NewDefaultStringer(),
173+
Sorted: true,
174+
Reversed: false,
175+
EscapePrefix: "\\",
176+
EscapeSpace: false,
177+
EscapeEqual: true,
178+
EscapeColon: false,
179+
EscapeNewLine: true,
180+
})
181+
if err != nil {
182+
return errors.Wrap(err, "error writing viper settings")
183+
}
184+
}
185+
161186
if canStream(inputFormat, outputFormat, outputSorted) {
162187

163188
p := pipe.NewBuilder()
164189

165190
it, errorIterator := iterator.NewIterator(&iterator.NewIteratorInput{
166-
Reader: os.Stdin,
167-
Type: reflect.TypeOf([]map[string]interface{}{}),
168-
Format: inputFormat,
169-
Header: inputHeader,
170-
SkipLines: v.GetInt(cli.FlagInputSkipLines),
171-
SkipBlanks: true,
172-
SkipComments: true,
173-
Comment: v.GetString(cli.FlagInputComment),
174-
Trim: v.GetBool(cli.FlagInputTrim),
191+
Reader: os.Stdin,
192+
Type: reflect.TypeOf([]map[string]interface{}{}),
193+
Format: inputFormat,
194+
Header: inputHeader,
195+
ScannerBufferSize: v.GetInt(cli.FlagInputScannerBufferSize),
196+
SkipLines: v.GetInt(cli.FlagInputSkipLines),
197+
SkipBlanks: true,
198+
SkipComments: true,
199+
Comment: v.GetString(cli.FlagInputComment),
200+
Trim: v.GetBool(cli.FlagInputTrim),
201+
LineSeparator: inputLineSeparator,
202+
DropCR: v.GetBool(cli.FlagInputDropCR),
175203
})
176204
if it == nil {
177205
return errors.New(fmt.Sprintf("error building input iterator with format %q", inputFormat))
@@ -223,9 +251,11 @@ func main() {
223251
InputHeader: inputHeader,
224252
InputComment: v.GetString(cli.FlagInputComment),
225253
InputLazyQuotes: v.GetBool(cli.FlagInputLazyQuotes),
254+
InputScannerBufferSize: v.GetInt(cli.FlagInputScannerBufferSize),
226255
InputSkipLines: v.GetInt(cli.FlagInputSkipLines),
227256
InputLimit: v.GetInt(cli.FlagInputLimit),
228-
InputLineSeparator: v.GetString(cli.FlagInputLineSeparator),
257+
InputLineSeparator: inputLineSeparator,
258+
InputDropCR: v.GetBool(cli.FlagInputDropCR),
229259
InputEscapePrefix: v.GetString(cli.FlagInputEscapePrefix),
230260
InputUnescapeSpace: v.GetBool(cli.FlagInputUnescapeSpace),
231261
InputUnescapeNewLine: v.GetBool(cli.FlagInputUnescapeNewLine),

pkg/cli/cli.go

+15
Original file line numberDiff line numberDiff line change
@@ -10,10 +10,25 @@ package cli
1010

1111
import (
1212
"github.com/pkg/errors"
13+
"github.com/spf13/pflag"
1314
)
1415

1516
var (
1617
ErrMissingKeyValueSeparator = errors.New("missing key-value separator")
1718
ErrMissingLineSeparator = errors.New("missing line separator")
1819
ErrMissingEscapePrefix = errors.New("missing escape prefix")
1920
)
21+
22+
const (
23+
FlagVerbose string = "verbose"
24+
)
25+
26+
// Initialize cli flags
27+
func InitCliFlags(flag *pflag.FlagSet, formats []string) {
28+
29+
InitInputFlags(flag, formats)
30+
31+
InitOutputFlags(flag, formats)
32+
33+
flag.BoolP(FlagVerbose, "v", false, "verbose output")
34+
}

pkg/cli/input.go

+4-1
Original file line numberDiff line numberDiff line change
@@ -26,6 +26,7 @@ const (
2626
FlagInputLazyQuotes string = "input-lazy-quotes"
2727
FlagInputTrim string = "input-trim"
2828
FlagInputReaderBufferSize string = "input-reader-buffer-size"
29+
FlagInputScannerBufferSize string = "input-scanner-buffer-size"
2930
FlagInputSkipLines string = "input-skip-lines"
3031
FlagInputLineSeparator string = "input-line-separator"
3132
FlagInputKeyValueSeparator string = "input-key-value-separator"
@@ -67,11 +68,13 @@ func InitInputFlags(flag *pflag.FlagSet, formats []string) {
6768
flag.StringSlice(FlagInputHeader, DefaultInputHeader, "The input header if the stdin input has no header.")
6869
flag.StringP(FlagInputComment, "c", "", "The input comment character, e.g., #. Commented lines are not sent to output.")
6970
flag.Bool(FlagInputLazyQuotes, false, "allows lazy quotes for CSV and TSV")
71+
flag.Int(FlagInputReaderBufferSize, 4096, "the buffer size of the file reader")
72+
flag.Int(FlagInputScannerBufferSize, 0, "the initial buffer size for the scanner")
7073
flag.Int(FlagInputSkipLines, DefaultSkipLines, "The number of lines to skip before processing")
7174
flag.IntP(FlagInputLimit, "l", DefaultInputLimit, "The input limit")
7275
flag.BoolP(FlagInputTrim, "t", false, "trim input lines")
7376
flag.String(FlagInputLineSeparator, "\n", "override line separator. Used with properties and JSONL formats.")
74-
flag.String(FlagInputKeyValueSeparator, "", "override key-value separator. not used.")
77+
flag.String(FlagInputKeyValueSeparator, "=", "override key-value separator. not used.")
7578
flag.Bool(FlagInputDropCR, false, "drop carriage return characters that immediately precede new line characters")
7679
flag.String(FlagInputEscapePrefix, "", "override escape prefix. Used with properties format.")
7780
flag.Bool(FlagInputUnescapeColon, false, "Unescape colon characters in input. Used with properties format.")

pkg/gss/Convert.go

+3
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,7 @@ type ConvertInput struct {
2020
InputHeader []interface{}
2121
InputComment string
2222
InputLazyQuotes bool
23+
InputScannerBufferSize int
2324
InputSkipLines int
2425
InputLimit int
2526
InputLineSeparator string
@@ -52,6 +53,7 @@ func NewConvertInput(bytes []byte, inputFormat string, outputFormat string) *Con
5253
InputHeader: NoHeader,
5354
InputComment: NoComment,
5455
InputLazyQuotes: false,
56+
InputScannerBufferSize: 0,
5557
InputSkipLines: NoSkip,
5658
InputLimit: NoLimit,
5759
InputLineSeparator: "\n",
@@ -82,6 +84,7 @@ func Convert(input *ConvertInput) ([]byte, error) {
8284
Limit(input.InputLimit).
8385
Header(input.InputHeader).
8486
Comment(input.InputComment).
87+
ScannerBufferSize(input.InputScannerBufferSize).
8588
LazyQuotes(input.InputLazyQuotes).
8689
SkipLines(input.InputSkipLines).
8790
LineSeparator(input.InputLineSeparator).

pkg/gss/DeserializeBytes.go

+33-31
Original file line numberDiff line numberDiff line change
@@ -23,24 +23,25 @@ import (
2323

2424
// DeserializeBytesInput provides the input for the DeserializeBytes function.
2525
type DeserializeBytesInput struct {
26-
Bytes []byte
27-
Format string
28-
Header []interface{}
29-
Comment string
30-
LazyQuotes bool
31-
SkipLines int
32-
SkipBlanks bool
33-
SkipComments bool
34-
Trim bool
35-
Limit int
36-
LineSeparator string
37-
DropCR bool
38-
Type reflect.Type
39-
EscapePrefix string
40-
UnescapeSpace bool
41-
UnescapeNewLine bool
42-
UnescapeColon bool
43-
UnescapeEqual bool
26+
Bytes []byte
27+
Format string
28+
Header []interface{}
29+
Comment string
30+
LazyQuotes bool
31+
ScannerBufferSize int
32+
SkipLines int
33+
SkipBlanks bool
34+
SkipComments bool
35+
Trim bool
36+
Limit int
37+
LineSeparator string
38+
DropCR bool
39+
Type reflect.Type
40+
EscapePrefix string
41+
UnescapeSpace bool
42+
UnescapeNewLine bool
43+
UnescapeColon bool
44+
UnescapeEqual bool
4445
}
4546

4647
// DeserializeBytes reads in an object as string bytes and returns the representative Go instance.
@@ -49,19 +50,20 @@ func DeserializeBytes(input *DeserializeBytesInput) (interface{}, error) {
4950
switch input.Format {
5051
case "csv", "tsv", "jsonl", "tags":
5152
it, errorIterator := iterator.NewIterator(&iterator.NewIteratorInput{
52-
Reader: bytes.NewReader(input.Bytes),
53-
Type: input.Type,
54-
Format: input.Format,
55-
Header: input.Header,
56-
Comment: input.Comment,
57-
SkipLines: input.SkipLines,
58-
SkipBlanks: input.SkipBlanks,
59-
SkipComments: input.SkipComments,
60-
LazyQuotes: input.LazyQuotes,
61-
Trim: input.Trim,
62-
Limit: input.Limit,
63-
LineSeparator: []byte(input.LineSeparator)[0],
64-
DropCR: input.DropCR,
53+
Reader: bytes.NewReader(input.Bytes),
54+
Type: input.Type,
55+
Format: input.Format,
56+
Header: input.Header,
57+
Comment: input.Comment,
58+
ScannerBufferSize: input.ScannerBufferSize,
59+
SkipLines: input.SkipLines,
60+
SkipBlanks: input.SkipBlanks,
61+
SkipComments: input.SkipComments,
62+
LazyQuotes: input.LazyQuotes,
63+
Trim: input.Trim,
64+
Limit: input.Limit,
65+
LineSeparator: input.LineSeparator,
66+
DropCR: input.DropCR,
6567
})
6668
if errorIterator != nil {
6769
return nil, errors.Wrap(errorIterator, "error creating iterator")

pkg/gss/DeserializeReader.go

+1-1
Original file line numberDiff line numberDiff line change
@@ -58,7 +58,7 @@ func DeserializeReader(input *DeserializeReaderInput) (interface{}, error) {
5858
LazyQuotes: input.LazyQuotes,
5959
Trim: input.Trim,
6060
Limit: input.Limit,
61-
LineSeparator: []byte(input.LineSeparator)[0],
61+
LineSeparator: input.LineSeparator,
6262
DropCR: input.DropCR,
6363
})
6464
if errorIterator != nil {

pkg/iterator/Iterator.go

+24-11
Original file line numberDiff line numberDiff line change
@@ -16,11 +16,17 @@ import (
1616
"io"
1717
"reflect"
1818

19+
"github.com/pkg/errors"
20+
1921
"github.com/spatialcurrent/go-simple-serializer/pkg/jsonl"
2022
"github.com/spatialcurrent/go-simple-serializer/pkg/sv"
2123
"github.com/spatialcurrent/go-simple-serializer/pkg/tags"
2224
)
2325

26+
var (
27+
ErrMissingLineSeparator = errors.New("missing line separator")
28+
)
29+
2430
// Iterator is a simple interface that supports iterating over an input object source.
2531
type Iterator interface {
2632
Next() (interface{}, error) // Returns the next object or error if any. When input is exhausted, returns (nil, io.EOF).
@@ -31,6 +37,7 @@ type NewIteratorInput struct {
3137
Reader io.Reader // the underlying reader
3238
Format string // the format
3339
Header []interface{} // for csv and tsv, the header. If not given, then reads first line of stream as header.
40+
ScannerBufferSize int // the initial buffer size for the scanner
3441
SkipLines int // Skip a given number of lines at the beginning of the stream.
3542
SkipBlanks bool // Skip blank lines. If false, Next() returns a blank line as (nil, nil). If true, Next() simply skips forward until it finds a non-blank line.
3643
SkipComments bool // Skip commented lines. If false, Next() returns a commented line as (nil, nil). If true, Next() simply skips forward until it finds a non-commented line.
@@ -39,7 +46,7 @@ type NewIteratorInput struct {
3946
LazyQuotes bool // for csv and tsv, parse with lazy quotes
4047
Limit int // Limit the number of objects to read and return from the underlying stream.
4148
KeyValueSeparator string // For tags, the key-value separator.
42-
LineSeparator byte // For JSON Lines, the new line byte.
49+
LineSeparator string // For JSON Lines, the new line byte.
4350
DropCR bool // For JSON Lines, drop carriage returns at the end of lines.
4451
Type reflect.Type //
4552
}
@@ -51,18 +58,24 @@ type NewIteratorInput struct {
5158
// - tags - Tags (key-value pairs)
5259
// - tsv - Tab-Separated Values
5360
func NewIterator(input *NewIteratorInput) (Iterator, error) {
61+
62+
if len(input.LineSeparator) == 0 {
63+
return nil, ErrMissingLineSeparator
64+
}
65+
5466
switch input.Format {
5567
case "jsonl":
5668
it := jsonl.NewIterator(&jsonl.NewIteratorInput{
57-
Reader: input.Reader,
58-
SkipLines: input.SkipLines,
59-
SkipBlanks: input.SkipBlanks,
60-
SkipComments: input.SkipComments,
61-
Comment: input.Comment,
62-
Trim: input.Trim,
63-
Limit: input.Limit,
64-
LineSeparator: input.LineSeparator,
65-
DropCR: input.DropCR,
69+
Reader: input.Reader,
70+
ScannerBufferSize: input.ScannerBufferSize,
71+
SkipLines: input.SkipLines,
72+
SkipBlanks: input.SkipBlanks,
73+
SkipComments: input.SkipComments,
74+
Comment: input.Comment,
75+
Trim: input.Trim,
76+
Limit: input.Limit,
77+
LineSeparator: []byte(input.LineSeparator)[0],
78+
DropCR: input.DropCR,
6679
})
6780
return it, nil
6881
case "csv", "tags", "tsv":
@@ -94,7 +107,7 @@ func NewIterator(input *NewIteratorInput) (Iterator, error) {
94107
SkipComments: input.SkipComments,
95108
Comment: input.Comment,
96109
KeyValueSeparator: input.KeyValueSeparator,
97-
LineSeparator: input.LineSeparator,
110+
LineSeparator: []byte(input.LineSeparator)[0],
98111
DropCR: input.DropCR,
99112
Limit: input.Limit,
100113
})

pkg/json/Unmarshal.go

+10-6
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,7 @@
88
package json
99

1010
import (
11+
"bytes"
1112
stdjson "encoding/json" // import the standard json library as stdjson
1213
"fmt"
1314
"unicode/utf8"
@@ -35,12 +36,15 @@ func Unmarshal(b []byte) (interface{}, error) {
3536
return nil, ErrEmptyInput
3637
}
3738

38-
switch string(b) {
39-
case "true":
39+
if bytes.HasPrefix(b, BytesTrue) {
4040
return true, nil
41-
case "false":
41+
}
42+
43+
if bytes.HasPrefix(b, BytesFalse) {
4244
return false, nil
43-
case "null":
45+
}
46+
47+
if bytes.HasPrefix(b, BytesNull) {
4448
return nil, nil
4549
}
4650

@@ -54,14 +58,14 @@ func Unmarshal(b []byte) (interface{}, error) {
5458
obj := make([]interface{}, 0)
5559
err := stdjson.Unmarshal(b, &obj)
5660
if err != nil {
57-
return nil, errors.Wrap(err, fmt.Sprintf("error unmarshaling JSON %q into slice", string(b)))
61+
return nil, errors.Wrap(err, fmt.Sprintf("error unmarshaling JSON %q into %T", string(b), obj))
5862
}
5963
return obj, nil
6064
case '{':
6165
obj := map[string]interface{}{}
6266
err := stdjson.Unmarshal(b, &obj)
6367
if err != nil {
64-
return nil, errors.Wrap(err, fmt.Sprintf("error unmarshaling JSON %q into map", string(b)))
68+
return nil, errors.Wrap(err, fmt.Sprintf("error unmarshaling JSON %q into %T", string(b), obj))
6569
}
6670
return obj, nil
6771
case '"':

pkg/json/json.go

+4
Original file line numberDiff line numberDiff line change
@@ -16,4 +16,8 @@ import (
1616
var (
1717
ErrEmptyInput = errors.New("empty input")
1818
ErrInvalidRune = errors.New("invalid rune")
19+
20+
BytesTrue = []byte("true")
21+
BytesFalse = []byte("false")
22+
BytesNull = []byte("null")
1923
)

0 commit comments

Comments
 (0)