Skip to content

Commit 5dba856

Browse files
authored
support multi-document YAML (#17)
1 parent 4d49859 commit 5dba856

File tree

5 files changed

+130
-15
lines changed

5 files changed

+130
-15
lines changed

pkg/splitter/ScanDocuments.go

+43
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,43 @@
1+
// =================================================================
2+
//
3+
// Copyright (C) 2019 Spatial Current, Inc. - All Rights Reserved
4+
// Released as open source under the MIT License. See LICENSE file.
5+
//
6+
// =================================================================
7+
8+
package splitter
9+
10+
import (
11+
"bufio"
12+
"bytes"
13+
)
14+
15+
// ScanDocuments returns a function that splits a stream of bytes on
16+
// the given separator byte slice and whether to drop line-ending carriage returns.
17+
// Returns a new bufio.SplitFunc compatible with bufio.Scanner
18+
//
19+
// Examples:
20+
// - ScanDocuments([]byte("---\n")[0], true) - split on YAML boundary marker and drop carriage returns at the end of a line.
21+
func ScanDocuments(separator []byte, dropCR bool) bufio.SplitFunc {
22+
return bufio.SplitFunc(func(data []byte, atEOF bool) (advance int, token []byte, err error) {
23+
if atEOF && len(data) == 0 {
24+
return 0, nil, nil
25+
}
26+
if i := bytes.Index(data, separator); i >= 0 {
27+
// We have a full separator-terminated line.
28+
if dropCR {
29+
return i + len(separator), DropCarriageReturn(data[0:i]), nil
30+
}
31+
return i + len(separator), data[0:i], nil
32+
}
33+
// If we're at EOF, we have a final, non-terminated line. Return it.
34+
if atEOF {
35+
if dropCR {
36+
return len(data), DropCarriageReturn(data), nil
37+
}
38+
return len(data), data, nil
39+
}
40+
// Request more data.
41+
return 0, nil, nil
42+
})
43+
}

pkg/yaml/Unmarshal.go

+36-7
Original file line numberDiff line numberDiff line change
@@ -8,13 +8,17 @@
88
package yaml
99

1010
import (
11+
"bufio"
12+
"bytes"
1113
"fmt"
1214
"strconv"
1315
"strings"
1416
"unicode/utf8" // utf8 is used to decode the first rune in the string
1517

1618
"github.com/pkg/errors"
1719
goyaml "gopkg.in/yaml.v2" // import the YAML library from https://github.com/go-yaml/yaml
20+
21+
"github.com/spatialcurrent/go-simple-serializer/pkg/splitter"
1822
)
1923

2024
// Unmarshal parses a slice of bytes into an object using a few simple type inference rules.
@@ -37,15 +41,40 @@ func Unmarshal(b []byte) (interface{}, error) {
3741
return nil, ErrEmptyInput
3842
}
3943

40-
switch string(b) {
41-
case "true":
44+
if bytes.Equal(b, True) {
4245
return true, nil
43-
case "false":
46+
}
47+
if bytes.Equal(b, False) {
4448
return false, nil
45-
case "null":
49+
}
50+
if bytes.Equal(b, Null) {
4651
return nil, nil
4752
}
4853

54+
if bytes.HasPrefix(b, BoundaryMarker) {
55+
s := bufio.NewScanner(bytes.NewReader(b))
56+
s.Split(splitter.ScanDocuments(BoundaryMarker, true))
57+
obj := make([]interface{}, 0)
58+
i := 0
59+
for s.Scan() {
60+
if d := s.Bytes(); len(d) > 0 {
61+
if (len(d) == 1 && d[0] == '\n') || (len(d) == 2 && d[0] == '\n' && d[1] == '\r') {
62+
continue
63+
}
64+
element, err := Unmarshal(d)
65+
if err != nil {
66+
return obj, errors.Wrapf(err, "error scanning document %d", i)
67+
}
68+
obj = append(obj, element)
69+
i++
70+
}
71+
}
72+
if err := s.Err(); err != nil {
73+
return obj, errors.Wrap(err, fmt.Sprintf("error scanning YAML %q", string(b)))
74+
}
75+
return obj, nil
76+
}
77+
4978
first, _ := utf8.DecodeRune(b)
5079
if first == utf8.RuneError {
5180
return nil, ErrInvalidRune
@@ -56,21 +85,21 @@ func Unmarshal(b []byte) (interface{}, error) {
5685
obj := make([]interface{}, 0)
5786
err := goyaml.Unmarshal(b, &obj)
5887
if err != nil {
59-
return nil, errors.Wrap(err, fmt.Sprintf("error unmarshaling JSON %q", string(b)))
88+
return nil, errors.Wrap(err, fmt.Sprintf("error unmarshaling YAML %q", string(b)))
6089
}
6190
return obj, nil
6291
case '{':
6392
obj := map[string]interface{}{}
6493
err := goyaml.Unmarshal(b, &obj)
6594
if err != nil {
66-
return nil, errors.Wrap(err, fmt.Sprintf("error unmarshaling JSON %q", string(b)))
95+
return nil, errors.Wrap(err, fmt.Sprintf("error unmarshaling YAML %q", string(b)))
6796
}
6897
return obj, nil
6998
case '"':
7099
obj := ""
71100
err := goyaml.Unmarshal(b, &obj)
72101
if err != nil {
73-
return nil, errors.Wrap(err, fmt.Sprintf("error unmarshaling JSON %q", string(b)))
102+
return nil, errors.Wrap(err, fmt.Sprintf("error unmarshaling YAML %q", string(b)))
74103
}
75104
return obj, nil
76105
}

pkg/yaml/UnmarshalType.go

+37-7
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,8 @@
88
package yaml
99

1010
import (
11+
"bufio"
12+
"bytes"
1113
"fmt"
1214
"reflect"
1315
"strconv"
@@ -16,6 +18,8 @@ import (
1618

1719
"github.com/pkg/errors"
1820
goyaml "gopkg.in/yaml.v2" // import the YAML library from https://github.com/go-yaml/yaml
21+
22+
"github.com/spatialcurrent/go-simple-serializer/pkg/splitter"
1923
)
2024

2125
// UnmarshalType parses a slice of bytes into an object of a given type.
@@ -27,21 +31,46 @@ func UnmarshalType(b []byte, outputType reflect.Type) (interface{}, error) {
2731
return nil, ErrEmptyInput
2832
}
2933

30-
switch string(b) {
31-
case "true":
34+
if bytes.Equal(b, True) {
3235
if outputType.Kind() != reflect.Bool {
3336
return nil, &ErrInvalidKind{Value: outputType, Expected: []reflect.Kind{reflect.Bool}}
3437
}
3538
return true, nil
36-
case "false":
39+
}
40+
if bytes.Equal(b, False) {
3741
if outputType.Kind() != reflect.Bool {
3842
return nil, &ErrInvalidKind{Value: outputType, Expected: []reflect.Kind{reflect.Bool}}
3943
}
4044
return false, nil
41-
case "null":
45+
}
46+
if bytes.Equal(b, Null) {
4247
return nil, nil
4348
}
4449

50+
if bytes.HasPrefix(b, BoundaryMarker) {
51+
if outputType.Kind() != reflect.Slice {
52+
return nil, &ErrInvalidKind{Value: outputType, Expected: []reflect.Kind{reflect.Slice}}
53+
}
54+
s := bufio.NewScanner(bytes.NewReader(b))
55+
s.Split(splitter.ScanDocuments(BoundaryMarker, true))
56+
out := reflect.MakeSlice(outputType, 0, 0)
57+
i := 0
58+
for s.Scan() {
59+
if d := s.Bytes(); len(d) > 0 {
60+
obj, err := UnmarshalType(d, outputType.Elem())
61+
if err != nil {
62+
return out.Interface(), errors.Wrapf(err, "error scanning document %d", i)
63+
}
64+
out = reflect.Append(out, reflect.ValueOf(obj))
65+
i++
66+
}
67+
}
68+
if err := s.Err(); err != nil {
69+
return out.Interface(), errors.Wrap(err, fmt.Sprintf("error scanning YAML %q", string(b)))
70+
}
71+
return out.Interface(), nil
72+
}
73+
4574
first, _ := utf8.DecodeRune(b)
4675
if first == utf8.RuneError {
4776
return nil, ErrInvalidRune
@@ -56,7 +85,7 @@ func UnmarshalType(b []byte, outputType reflect.Type) (interface{}, error) {
5685
ptr.Elem().Set(reflect.MakeSlice(outputType, 0, 0))
5786
err := goyaml.Unmarshal(b, ptr.Interface())
5887
if err != nil {
59-
return nil, errors.Wrap(err, fmt.Sprintf("error unmarshaling JSON %q", string(b)))
88+
return nil, errors.Wrap(err, fmt.Sprintf("error unmarshaling YAML %q", string(b)))
6089
}
6190
return ptr.Elem().Interface(), nil
6291
case '{':
@@ -67,7 +96,7 @@ func UnmarshalType(b []byte, outputType reflect.Type) (interface{}, error) {
6796
ptr.Elem().Set(reflect.MakeMap(outputType))
6897
err := goyaml.Unmarshal(b, ptr.Interface())
6998
if err != nil {
70-
return nil, errors.Wrap(err, fmt.Sprintf("error unmarshaling JSON %q", string(b)))
99+
return nil, errors.Wrap(err, fmt.Sprintf("error unmarshaling YAML %q", string(b)))
71100
}
72101
return ptr.Elem().Interface(), nil
73102
case '"':
@@ -77,7 +106,7 @@ func UnmarshalType(b []byte, outputType reflect.Type) (interface{}, error) {
77106
obj := ""
78107
err := goyaml.Unmarshal(b, &obj)
79108
if err != nil {
80-
return nil, errors.Wrap(err, fmt.Sprintf("error unmarshaling JSON %q", string(b)))
109+
return nil, errors.Wrap(err, fmt.Sprintf("error unmarshaling YAML %q", string(b)))
81110
}
82111
return obj, nil
83112
}
@@ -109,5 +138,6 @@ func UnmarshalType(b []byte, outputType reflect.Type) (interface{}, error) {
109138
}
110139
return f, nil
111140
}
141+
112142
return string(b), nil
113143
}

pkg/yaml/yaml.go

+7
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,13 @@ import (
1515
"github.com/pkg/errors"
1616
)
1717

18+
var (
19+
True = []byte("true")
20+
False = []byte("false")
21+
Null = []byte("null")
22+
BoundaryMarker = []byte("---\n")
23+
)
24+
1825
var (
1926
ErrEmptyInput = errors.New("empty input")
2027
ErrInvalidRune = errors.New("invalid rune")

scripts/test-cli.sh

+7-1
Original file line numberDiff line numberDiff line change
@@ -79,6 +79,12 @@ testHCLJSON() {
7979
assertEquals "unexpected output" "${expected}" "${output}"
8080
}
8181

82+
testYAMLJSON() {
83+
local expected='[{"a":"x"},{"b":"y"},"foo"]'
84+
local output=$(echo -e '---\na: x\n---\nb: "y"\n---\nfoo---\n' | gss -i yaml -o json)
85+
assertEquals "unexpected output" "${expected}" "${output}"
86+
}
87+
8288
oneTimeSetUp() {
8389
echo "Setting up"
8490
echo "Using temporary directory at ${SHUNIT_TMPDIR}"
@@ -89,4 +95,4 @@ oneTimeTearDown() {
8995
}
9096

9197
# Load shUnit2.
92-
. "${DIR}/shunit2"
98+
. "${DIR}/shunit2"

0 commit comments

Comments
 (0)