Skip to content

Commit 9a833b5

Browse files
authored
[QT-437] Optimize matrix operations (#77)
This work was spawned by reports of people encountering context deadline errors when trying to list scenarios. We previously had a hard timeout of 5 seconds for listing which we have changed to use scenario level `--timeout` flag going forward. This solved the immediate problem but begged the question of why scenario listing was taking nearly that long at all. When I was investigating this I determined there were several reasons that listing was slow: * We were always decoding and evaluating all scenarios when listing, even though we only needed the reference information. If you're using `enos scenario list` to validate that the configuration for all scenarios is correct, that makes sense, but most people are probably using it to see which scenarios are available and don't necessarily want to validate all of them every time the command is invoked. * When filtering we would always fully decode all scenarios before filtering to keep the scenarios that are desired. * Every time we were comparing a matrix vector we were making a copy of it. The problem compounded exponentially as we added additional vectors and elements to the matrix. Nearly 3/4's of the CPU time we spent when listing scenarios was actually being used here for allocations, garbage collection, and sorting after copy. To improve our situation we do the following: * Add support to the decoder for shallow decoding of scenarios to the reference level. * Add filtering support to the decoder. Now we can optionally shallow decode, then filter, before fully decoding a scenario. * Rewrite our matrix vector implementation. Instead of an array alias the matrix Vector type is a struct which we use for new efficiency gains. We now always pass references to Vectors instead of passing them by value and creating copies. We also lazily keep track of a sorted copy of vectors to allow faster repeat comparisons. This has drastically reduced our allocations and garbage collection. When we combine all of these changes we improved the listing time by at least an order of magnitude. A drawback to this approach is that listing no longer validates that the full configuration in a flight plan, the scenarios and all of their variants. To handle that we introduce a new `validate` sub-command that fully decodes all matched scenarios to ensure that the flight plan is valid. We also introduce the `--profile` hidden flag that will turn on CPU and memory profiling and output the pprof files into the current directory. These profiles were useful in determining the bottleneck of the prior implementation so we'll leave them there for possible future use. * Add support for reference level scenario decoding * Add support for filtering during decoding * Rewrite matrix vector implementation to reduce allocations and GC * Use references to vectors instead of passing by value * Lazily create ordered copies of vectors when comparing * Add `--profile` hidden flag to enable CPU and memory profiling * Add `validate` sub-command for validating configuration of a flight plan. * Bump version Signed-off-by: Ryan Cragun <[email protected]>
1 parent a4ec981 commit 9a833b5

33 files changed

+1878
-1236
lines changed

acceptance/scenario_check_test.go

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -127,7 +127,7 @@ func TestAcc_Cmd_Scenario_Check_WithWarnings(t *testing.T) {
127127
path, err := filepath.Abs(filepath.Join("./scenarios", "scenario_generate_has_warnings"))
128128
require.NoError(t, err)
129129

130-
cmd := fmt.Sprintf("scenario validate --chdir %s --out %s --format json", path, outDir)
130+
cmd := fmt.Sprintf("scenario check --chdir %s --out %s --format json", path, outDir)
131131
if failOnWarnings {
132132
cmd = fmt.Sprintf("%s --fail-on-warnings", cmd)
133133
}
Lines changed: 48 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,48 @@
1+
package acceptance
2+
3+
import (
4+
"context"
5+
"fmt"
6+
"path/filepath"
7+
"testing"
8+
9+
"github.com/stretchr/testify/require"
10+
"google.golang.org/protobuf/encoding/protojson"
11+
12+
"github.com/hashicorp/enos/proto/hashicorp/enos/v1/pb"
13+
)
14+
15+
func TestAcc_Cmd_Scenario_Validate(t *testing.T) {
16+
enos := newAcceptanceRunner(t)
17+
18+
for _, test := range []struct {
19+
dir string
20+
out *pb.ValidateScenariosConfigurationResponse
21+
fail bool
22+
}{
23+
{
24+
dir: "scenario_list_pass_0",
25+
out: &pb.ValidateScenariosConfigurationResponse{},
26+
},
27+
{
28+
dir: "scenario_list_fail_malformed",
29+
fail: true,
30+
},
31+
} {
32+
t.Run(test.dir, func(t *testing.T) {
33+
path, err := filepath.Abs(filepath.Join("./scenarios", test.dir))
34+
require.NoError(t, err)
35+
cmd := fmt.Sprintf("scenario validate --chdir %s --format json", path)
36+
fmt.Println(path)
37+
out, err := enos.run(context.Background(), cmd)
38+
if test.fail {
39+
require.Error(t, err)
40+
return
41+
}
42+
43+
require.NoError(t, err)
44+
got := &pb.ValidateScenariosConfigurationResponse{}
45+
require.NoError(t, protojson.Unmarshal(out, got))
46+
})
47+
}
48+
}

go.mod

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -17,7 +17,7 @@ require (
1717
github.com/mitchellh/go-wordwrap v0.0.0-20150314170334-ad45545899c7
1818
github.com/olekukonko/tablewriter v0.0.5
1919
github.com/spf13/cobra v1.4.0
20-
github.com/stretchr/testify v1.7.0
20+
github.com/stretchr/testify v1.8.0
2121
github.com/zclconf/go-cty v1.11.0
2222
golang.org/x/term v0.0.0-20220411215600-e5f449aeb171
2323
golang.org/x/text v0.3.8
@@ -65,5 +65,5 @@ require (
6565
golang.org/x/sys v0.0.0-20220722155257-8c9f86f7a55f // indirect
6666
google.golang.org/genproto v0.0.0-20200526211855-cb27e3aa2013 // indirect
6767
gopkg.in/check.v1 v1.0.0-20190902080502-41f04d3bba15 // indirect
68-
gopkg.in/yaml.v3 v3.0.0-20200313102051-9f266ea9e77c // indirect
68+
gopkg.in/yaml.v3 v3.0.1 // indirect
6969
)

go.sum

Lines changed: 6 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -166,12 +166,14 @@ github.com/spf13/cobra v1.4.0/go.mod h1:Wo4iy3BUC+X2Fybo0PDqwJIv3dNRiZLHQymsfxlB
166166
github.com/spf13/pflag v1.0.5 h1:iy+VFUOCP1a+8yFto/drg2CJ5u0yRoB7fZw3DKv/JXA=
167167
github.com/spf13/pflag v1.0.5/go.mod h1:McXfInJRrz4CZXVZOBLb0bTZqETkiAhM9Iw0y3An2Bg=
168168
github.com/stretchr/objx v0.1.0/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME=
169+
github.com/stretchr/objx v0.4.0/go.mod h1:YvHI0jy2hoMjB+UWwv71VJQ9isScKT/TqJzVSSt89Yw=
169170
github.com/stretchr/testify v1.2.2/go.mod h1:a8OnRcib4nhh0OaRAV+Yts87kKdq0PP7pXfy6kDkUVs=
170171
github.com/stretchr/testify v1.3.0/go.mod h1:M5WIy9Dh21IEIfnGCwXGc5bZfKNJtfHm1UVUgZn+9EI=
171172
github.com/stretchr/testify v1.5.1/go.mod h1:5W2xD1RspED5o8YsWQXVCued0rvSQ+mT+I5cxcmMvtA=
172173
github.com/stretchr/testify v1.6.1/go.mod h1:6Fq8oRcR53rry900zMqJjRRixrwX3KX962/h/Wwjteg=
173-
github.com/stretchr/testify v1.7.0 h1:nwc3DEeHmmLAfoZucVR881uASk0Mfjw8xYJ99tb5CcY=
174-
github.com/stretchr/testify v1.7.0/go.mod h1:6Fq8oRcR53rry900zMqJjRRixrwX3KX962/h/Wwjteg=
174+
github.com/stretchr/testify v1.7.1/go.mod h1:6Fq8oRcR53rry900zMqJjRRixrwX3KX962/h/Wwjteg=
175+
github.com/stretchr/testify v1.8.0 h1:pSgiaMZlXftHpm5L7V1+rVB+AZJydKsMxsQBIJw4PKk=
176+
github.com/stretchr/testify v1.8.0/go.mod h1:yNjHg4UonilssWZ8iaSj1OCr/vHnekPRkoO+kdMU+MU=
175177
github.com/vmihailenco/msgpack v3.3.3+incompatible/go.mod h1:fy3FlTQTDXWkZ7Bh6AcGMlsjHatGryHQYUTf1ShIgkk=
176178
github.com/vmihailenco/msgpack/v4 v4.3.12/go.mod h1:gborTTJjAo/GWTqqRjrLCn9pgNN+NXzzngzBKDPIqw4=
177179
github.com/vmihailenco/tagparser v0.1.1/go.mod h1:OeAg3pn3UbLjkWt+rN9oFYB6u/cQgqMEUPoW2WPyhdI=
@@ -258,7 +260,8 @@ gopkg.in/yaml.v2 v2.2.8/go.mod h1:hI93XBmqTisBFMUTm0b8Fm+jr3Dg1NNxqwp+5A1VGuI=
258260
gopkg.in/yaml.v2 v2.3.0/go.mod h1:hI93XBmqTisBFMUTm0b8Fm+jr3Dg1NNxqwp+5A1VGuI=
259261
gopkg.in/yaml.v2 v2.4.0 h1:D8xgwECY7CYvx+Y2n4sBz93Jn9JRvxdiyyo8CTfuKaY=
260262
gopkg.in/yaml.v2 v2.4.0/go.mod h1:RDklbk79AGWmwhnvt/jBztapEOGDOx6ZbXqjP6csGnQ=
261-
gopkg.in/yaml.v3 v3.0.0-20200313102051-9f266ea9e77c h1:dUUwHk2QECo/6vqA44rthZ8ie2QXMNeKRTHCNY2nXvo=
262263
gopkg.in/yaml.v3 v3.0.0-20200313102051-9f266ea9e77c/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM=
264+
gopkg.in/yaml.v3 v3.0.1 h1:fxVm/GzAzEWqLHuvctI91KS9hhNmmWOoWu0XTYJS7CA=
265+
gopkg.in/yaml.v3 v3.0.1/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM=
263266
honnef.co/go/tools v0.0.0-20190102054323-c2f93a96b099/go.mod h1:rf3lG4BRIbNafJWhAfAdb/ePZxsR/4RtNHQocxwk9r4=
264267
honnef.co/go/tools v0.0.0-20190523083050-ea95bdfd59fc/go.mod h1:rf3lG4BRIbNafJWhAfAdb/ePZxsR/4RtNHQocxwk9r4=

internal/command/enos/cmd/root.go

Lines changed: 69 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,11 @@ import (
44
"context"
55
"errors"
66
"fmt"
7+
"io"
78
"os"
9+
"path/filepath"
10+
"runtime"
11+
"runtime/pprof"
812
"strings"
913
"time"
1014

@@ -37,6 +41,8 @@ type rootStateS struct {
3741
enosServer *server.ServiceV1
3842
enosConnection *client.Connection
3943
operatorConfig *pb.Operator_Config
44+
profile bool
45+
cpuProfileOut io.ReadWriteCloser
4046
}
4147

4248
var rootState = &rootStateS{
@@ -60,6 +66,9 @@ func Execute() {
6066
rootCmd.PersistentFlags().StringVar(&rootState.stdoutPath, "stdout", "", "Path to write output. (default $STDOUT)")
6167
rootCmd.PersistentFlags().StringVar(&rootState.stderrPath, "stderr", "", "Path to write error output. (default $STDERR)")
6268
rootCmd.PersistentFlags().Int32Var(&rootState.operatorConfig.WorkerCount, "worker-count", 4, "Number of scenario operation workers")
69+
rootCmd.PersistentFlags().BoolVar(&rootState.profile, "profile", false, "Enable Go profiling")
70+
_ = rootCmd.PersistentFlags().MarkHidden("profile")
71+
6372
if err := rootCmd.Execute(); err != nil {
6473
var exitErr *status.ErrExit
6574
if errors.As(err, &exitErr) {
@@ -130,9 +139,54 @@ func setupCLIUI() error {
130139
return err
131140
}
132141

142+
func startCPUProfiling() error {
143+
wd, err := os.Getwd()
144+
if err != nil {
145+
return err
146+
}
147+
148+
rootState.cpuProfileOut, err = os.Create(filepath.Join(wd, "cpu.pprof"))
149+
if err != nil {
150+
return err
151+
}
152+
153+
if err := pprof.StartCPUProfile(rootState.cpuProfileOut); err != nil {
154+
return err
155+
}
156+
157+
return nil
158+
}
159+
160+
func runMemoryProfiling() error {
161+
wd, err := os.Getwd()
162+
if err != nil {
163+
return err
164+
}
165+
166+
m, err := os.Create(filepath.Join(wd, "memory.pprof"))
167+
if err != nil {
168+
return err
169+
}
170+
defer m.Close()
171+
172+
runtime.GC()
173+
174+
if err := pprof.WriteHeapProfile(m); err != nil {
175+
return err
176+
}
177+
178+
return nil
179+
}
180+
133181
func rootCmdPreRun(cmd *cobra.Command, args []string) error {
134182
cmd.SilenceErrors = true // we handle this ourselves
135183

184+
if rootState.profile {
185+
if err := startCPUProfiling(); err != nil {
186+
return err
187+
}
188+
}
189+
136190
// Setup our UI configuration first
137191
err := setupCLIUI()
138192
if err != nil {
@@ -155,12 +209,27 @@ func rootCmdPreRun(cmd *cobra.Command, args []string) error {
155209
}
156210

157211
func rootCmdPostRun(cmd *cobra.Command, args []string) {
212+
if rootState.profile {
213+
if rootState.cpuProfileOut != nil {
214+
defer rootState.cpuProfileOut.Close()
215+
}
216+
defer pprof.StopCPUProfile()
217+
}
218+
158219
if rootState.enosServer != nil {
159220
err := rootState.enosServer.Stop()
160221
if err != nil {
161222
_ = ui.ShowError(err)
162223
}
163224
}
164225

226+
// Run memory profiling after we've shut everything down everything but
227+
// our UI
228+
if rootState.profile {
229+
if err := runMemoryProfiling(); err != nil {
230+
_ = ui.ShowError(err)
231+
}
232+
}
233+
165234
ui.Close()
166235
}

internal/command/enos/cmd/scenario.go

Lines changed: 2 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -75,6 +75,7 @@ func newScenarioCmd() *cobra.Command {
7575
scenarioCmd.AddCommand(newScenarioRunCmd())
7676
scenarioCmd.AddCommand(newScenarioExecCmd())
7777
scenarioCmd.AddCommand(newScenarioOutputCmd())
78+
scenarioCmd.AddCommand(newScenarioValidateConfigCmd())
7879

7980
return scenarioCmd
8081
}
@@ -104,12 +105,7 @@ func scenarioCmdPreRun(cmd *cobra.Command, args []string) error {
104105
// scenarioCmdPostRun is the scenario sub-command post-run. We'll use it to shut
105106
// down the server.
106107
func scenarioCmdPostRun(cmd *cobra.Command, args []string) {
107-
if rootState.enosServer != nil {
108-
err := rootState.enosServer.Stop()
109-
if err != nil {
110-
_ = ui.ShowError(err)
111-
}
112-
}
108+
rootCmdPostRun(cmd, args)
113109
}
114110

115111
// setupDefaultScenarioCfg sets up default scenario configuration

internal/command/enos/cmd/scenario_check.go

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -13,7 +13,6 @@ import (
1313
func newScenarioCheckCmd() *cobra.Command {
1414
cmd := &cobra.Command{
1515
Use: "check [FILTER]",
16-
Aliases: []string{"validate"}, // old name of the check command
1716
Short: "Check that scenarios are valid",
1817
Long: fmt.Sprintf("Check that scenarios are valid by generating the Scenario's Terraform Root Module, initializing it, validating it, and planning. %s", scenarioFilterDesc),
1918
RunE: runScenarioCheckCmd,

internal/command/enos/cmd/scenario_list.go

Lines changed: 1 addition & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,9 +1,6 @@
11
package cmd
22

33
import (
4-
"context"
5-
"time"
6-
74
"github.com/spf13/cobra"
85

96
"github.com/hashicorp/enos/internal/diagnostics"
@@ -24,7 +21,7 @@ func newScenarioListCmd() *cobra.Command {
2421

2522
// runScenarioListCmd runs a scenario list
2623
func runScenarioListCmd(cmd *cobra.Command, args []string) error {
27-
ctx, cancel := context.WithTimeout(context.Background(), 5*time.Second)
24+
ctx, cancel := scenarioTimeoutContext()
2825
defer cancel()
2926

3027
sf, err := flightplan.ParseScenarioFilter(args)
Lines changed: 46 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,46 @@
1+
package cmd
2+
3+
import (
4+
"github.com/spf13/cobra"
5+
6+
"github.com/hashicorp/enos/internal/diagnostics"
7+
"github.com/hashicorp/enos/internal/flightplan"
8+
"github.com/hashicorp/enos/proto/hashicorp/enos/v1/pb"
9+
)
10+
11+
func newScenarioValidateConfigCmd() *cobra.Command {
12+
return &cobra.Command{
13+
Use: "validate [FILTER]",
14+
Short: "Validate configuration",
15+
Long: "Validate all scenario and variant configurations",
16+
RunE: runScenarioValidateCfgCmd,
17+
ValidArgsFunction: scenarioNameCompletion,
18+
}
19+
}
20+
21+
// runScenarioValidateCfgCmd is the function that validates all flight plan configuration
22+
func runScenarioValidateCfgCmd(cmd *cobra.Command, args []string) error {
23+
ctx, cancel := scenarioTimeoutContext()
24+
defer cancel()
25+
26+
sf, err := flightplan.ParseScenarioFilter(args)
27+
if err != nil {
28+
return ui.ShowScenariosValidateConfig(&pb.ValidateScenariosConfigurationResponse{
29+
Diagnostics: diagnostics.FromErr(err),
30+
})
31+
}
32+
33+
res, err := rootState.enosConnection.Client.ValidateScenariosConfiguration(
34+
ctx, &pb.ValidateScenariosConfigurationRequest{
35+
Workspace: &pb.Workspace{
36+
Flightplan: scenarioState.protoFp,
37+
},
38+
Filter: sf.Proto(),
39+
},
40+
)
41+
if err != nil {
42+
return err
43+
}
44+
45+
return ui.ShowScenariosValidateConfig(res)
46+
}

internal/command/enos/main.go

Lines changed: 0 additions & 7 deletions
This file was deleted.

0 commit comments

Comments
 (0)