Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
153 changes: 145 additions & 8 deletions pkg/transformers/sort_within_records.go
Original file line number Diff line number Diff line change
Expand Up @@ -3,9 +3,12 @@ package transformers
import (
"fmt"
"os"
"regexp"
"strings"

"github.com/johnkerl/miller/v6/pkg/cli"
"github.com/johnkerl/miller/v6/pkg/lib"
"github.com/johnkerl/miller/v6/pkg/mlrval"
"github.com/johnkerl/miller/v6/pkg/types"
)

Expand All @@ -25,8 +28,11 @@ func transformerSortWithinRecordsUsage(
fmt.Fprintf(o, "Usage: %s %s [options]\n", "mlr", verbNameSortWithinRecords)
fmt.Fprintln(o, "Outputs records sorted lexically ascending by keys.")
fmt.Fprintf(o, "Options:\n")
fmt.Fprintf(o, "-r Recursively sort subobjects/submaps, e.g. for JSON input.\n")
fmt.Fprintf(o, "-h|--help Show this message.\n")
fmt.Fprintf(o, "-f {names} Sort only these keys; others preserve record order.\n")
fmt.Fprintf(o, "-r {names} Like -f but use regular expressions to match field names.\n");
fmt.Fprintf(o, " Example: -f '^[xy]' -r sorts keys starting with x or y.\n")
fmt.Fprintf(o, " Without -f, -r recursively sorts subobjects/submaps (e.g. for JSON input).\n")
fmt.Fprintf(o, "-h|--help Show this message.\n")
}

func transformerSortWithinRecordsParseCLI(
Expand All @@ -39,8 +45,11 @@ func transformerSortWithinRecordsParseCLI(

// Skip the verb name from the current spot in the mlr command line
argi := *pargi
verb := args[argi]
argi++
doRecurse := false
var fieldNames []string = nil
doRegexes := false

for argi < argc /* variable increment: 1 or 2 depending on flag */ {
opt := args[argi]
Expand All @@ -57,23 +66,33 @@ func transformerSortWithinRecordsParseCLI(
os.Exit(0)

} else if opt == "-r" {
doRecurse = true
if fieldNames != nil {
doRegexes = true
} else {
doRecurse = true
}

} else if opt == "-f" {
fieldNames = cli.VerbGetStringArrayArgOrDie(verb, opt, args, &argi, argc)

} else {
transformerSortWithinRecordsUsage(os.Stderr)
os.Exit(1)
}
}

// TODO: allow sort by key or value?
// TODO: allow sort ascendending/descending?
// -r with -f means regex; -r without -f means recurse
if fieldNames != nil && doRecurse {
doRegexes = true
doRecurse = false
}

*pargi = argi
if !doConstruct { // All transformers must do this for main command-line parsing
return nil
}

transformer, err := NewTransformerSortWithinRecords(doRecurse)
transformer, err := NewTransformerSortWithinRecords(doRecurse, fieldNames, doRegexes)
if err != nil {
fmt.Fprintln(os.Stderr, err)
os.Exit(1)
Expand All @@ -84,15 +103,52 @@ func transformerSortWithinRecordsParseCLI(

// ----------------------------------------------------------------
type TransformerSortWithinRecords struct {
doRecurse bool
fieldNames []string
fieldSet map[string]bool
regex *regexp.Regexp
doRegexes bool
recordTransformerFunc RecordTransformerFunc
}

func NewTransformerSortWithinRecords(
doRecurse bool,
fieldNames []string,
doRegexes bool,
) (*TransformerSortWithinRecords, error) {

tr := &TransformerSortWithinRecords{}
if doRecurse {
tr := &TransformerSortWithinRecords{
doRecurse: doRecurse,
fieldNames: fieldNames,
doRegexes: doRegexes,
}

if fieldNames != nil {
if doRegexes {
// Handles "a.*b"i Miller case-insensitive-regex specification
regexString := fieldNames[0]
regex, err := lib.CompileMillerRegex(regexString)
if err != nil {
fmt.Fprintf(
os.Stderr,
"%s %s: cannot compile regex [%s]\n",
"mlr", verbNameSortWithinRecords, regexString,
)
os.Exit(1)
}
tr.regex = regex
} else {
tr.fieldSet = lib.StringListToSet(fieldNames)
}
}

if fieldNames != nil {
if doRecurse {
tr.recordTransformerFunc = tr.transformSelectiveRecursively
} else {
tr.recordTransformerFunc = tr.transformSelective
}
} else if doRecurse {
tr.recordTransformerFunc = tr.transformRecursively
} else {
tr.recordTransformerFunc = tr.transformNonrecursively
Expand All @@ -113,6 +169,87 @@ func (tr *TransformerSortWithinRecords) Transform(
tr.recordTransformerFunc(inrecAndContext, outputRecordsAndContexts, inputDownstreamDoneChannel, outputDownstreamDoneChannel)
}

// ----------------------------------------------------------------
func (tr *TransformerSortWithinRecords) keyMatches(key string) bool {
if tr.doRegexes {
return tr.regex.MatchString(key)
}
return tr.fieldSet[key]
}

// ----------------------------------------------------------------
func (tr *TransformerSortWithinRecords) transformSelective(
inrecAndContext *types.RecordAndContext,
outputRecordsAndContexts *[]*types.RecordAndContext, // list of *types.RecordAndContext
inputDownstreamDoneChannel <-chan bool,
outputDownstreamDoneChannel chan<- bool,
) {
if !inrecAndContext.EndOfStream {
inrec := inrecAndContext.Record
var matchingKeys []string
var restEntries []*mlrval.MlrmapEntry
for pe := inrec.Head; pe != nil; pe = pe.Next {
if tr.keyMatches(pe.Key) {
matchingKeys = append(matchingKeys, pe.Key)
} else {
restEntries = append(restEntries, pe)
}
}
lib.SortStrings(matchingKeys)
other := mlrval.NewMlrmapAsRecord()
for _, key := range matchingKeys {
other.PutReference(key, inrec.Get(key))
}
for _, pe := range restEntries {
other.PutReference(pe.Key, pe.Value)
}
*inrec = *other
}
*outputRecordsAndContexts = append(*outputRecordsAndContexts, inrecAndContext)
}

// ----------------------------------------------------------------
func (tr *TransformerSortWithinRecords) transformSelectiveRecursively(
inrecAndContext *types.RecordAndContext,
outputRecordsAndContexts *[]*types.RecordAndContext, // list of *types.RecordAndContext
inputDownstreamDoneChannel <-chan bool,
outputDownstreamDoneChannel chan<- bool,
) {
if !inrecAndContext.EndOfStream {
inrec := inrecAndContext.Record
var matchingKeys []string
var restEntries []*mlrval.MlrmapEntry
for pe := inrec.Head; pe != nil; pe = pe.Next {
if tr.keyMatches(pe.Key) {
matchingKeys = append(matchingKeys, pe.Key)
} else {
restEntries = append(restEntries, pe)
}
}
lib.SortStrings(matchingKeys)
other := mlrval.NewMlrmapAsRecord()
for _, key := range matchingKeys {
val := inrec.Get(key)
if val != nil {
if m := val.GetMap(); m != nil {
m.SortByKeyRecursively()
}
}
other.PutReference(key, val)
}
for _, pe := range restEntries {
if pe.Value != nil {
if m := pe.Value.GetMap(); m != nil {
m.SortByKeyRecursively()
}
}
other.PutReference(pe.Key, pe.Value)
}
*inrec = *other
}
*outputRecordsAndContexts = append(*outputRecordsAndContexts, inrecAndContext)
}

// ----------------------------------------------------------------
func (tr *TransformerSortWithinRecords) transformNonrecursively(
inrecAndContext *types.RecordAndContext,
Expand Down
1 change: 1 addition & 0 deletions test/cases/verb-sort-within-records/0004-regex/cmd
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
mlr --from test/input/sort-within-records.dkvp sort-within-records -f '^[ai]' -r
Empty file.
10 changes: 10 additions & 0 deletions test/cases/verb-sort-within-records/0004-regex/expout
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
a=pan,i=1,b=pan,x=0.34679014,y=0.72680286
a=eks,i=2,x=0.75867996,y=0.52215111,b=pan
a=wye,i=3,y=0.33831853,b=wye,x=0.20460331
a=eks,i=4,x=0.38139939,b=wye,y=0.13418874
a=wye,i=5,y=0.86362447,x=0.57328892,b=pan
a=zee,i=6,y=0.49322129,b=pan,x=0.52712616
a=eks,i=7,b=zee,x=0.61178406,y=0.18788492
a=zee,i=8,x=0.59855401,b=wye,y=0.97618139
a=hat,i=9,b=wye,x=0.03144188,y=0.74955076
a=pan,i=10,y=0.95261836,x=0.50262601,b=wye
1 change: 1 addition & 0 deletions test/cases/verb-sort-within-records/0005-f-explicit/cmd
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
mlr --from test/input/sort-within-records.dkvp sort-within-records -f a,i
Empty file.
10 changes: 10 additions & 0 deletions test/cases/verb-sort-within-records/0005-f-explicit/expout
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
a=pan,i=1,b=pan,x=0.34679014,y=0.72680286
a=eks,i=2,x=0.75867996,y=0.52215111,b=pan
a=wye,i=3,y=0.33831853,b=wye,x=0.20460331
a=eks,i=4,x=0.38139939,b=wye,y=0.13418874
a=wye,i=5,y=0.86362447,x=0.57328892,b=pan
a=zee,i=6,y=0.49322129,b=pan,x=0.52712616
a=eks,i=7,b=zee,x=0.61178406,y=0.18788492
a=zee,i=8,x=0.59855401,b=wye,y=0.97618139
a=hat,i=9,b=wye,x=0.03144188,y=0.74955076
a=pan,i=10,y=0.95261836,x=0.50262601,b=wye
Loading