Skip to content

Commit 999cdff

Browse files
committed
lux: fail fast on non-retryable serve errors instead of looping (spec 18)
`latere lux serve` reconnect-looped forever on errors that retrying cannot fix: not signed in, missing llm.serve, or the tunnel feature disabled on the server (404). Classify these as fatal and return immediately with one clear, actionable message: - bearer/auth failure -> surface the sign-in instruction. - dial 404 -> the tunnel feature is not enabled on the server. - dial 401/403 -> your login lacks llm.serve; run `latere auth login`. Transient/network errors still reconnect with backoff. Test covers the fast-fail path.
1 parent af4bce8 commit 999cdff

2 files changed

Lines changed: 63 additions & 2 deletions

File tree

internal/tunnel/extra_test.go

Lines changed: 32 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,7 @@ package tunnel
33
import (
44
"bufio"
55
"context"
6+
"errors"
67
"io"
78
"net"
89
"net/http"
@@ -12,6 +13,37 @@ import (
1213
"time"
1314
)
1415

16+
// TestRunFailsFastOnAuthError: a non-retryable bearer error (e.g. not signed
17+
// in) must return immediately, not enter the reconnect-with-backoff loop.
18+
func TestRunFailsFastOnAuthError(t *testing.T) {
19+
local := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
20+
if r.URL.Path == "/api/tags" {
21+
_, _ = w.Write([]byte(`{"models":[{"name":"m1"}]}`))
22+
}
23+
}))
24+
defer local.Close()
25+
26+
authErr := errors.New("not signed in for Lux; run `latere auth login`")
27+
done := make(chan error, 1)
28+
go func() {
29+
done <- Run(context.Background(), Options{
30+
LuxURL: "http://127.0.0.1:1",
31+
Bearer: func(context.Context) (string, error) { return "", authErr },
32+
Runtime: RuntimeOllama,
33+
UpstreamURL: local.URL,
34+
Out: io.Discard,
35+
})
36+
}()
37+
select {
38+
case err := <-done:
39+
if !errors.Is(err, authErr) {
40+
t.Errorf("Run err = %v, want the auth error surfaced", err)
41+
}
42+
case <-time.After(3 * time.Second):
43+
t.Fatal("Run did not fail fast on a non-retryable auth error (it looped)")
44+
}
45+
}
46+
1547
func TestDefaultURL(t *testing.T) {
1648
cases := map[string]string{
1749
RuntimeOllama: "http://localhost:11434",

internal/tunnel/serve.go

Lines changed: 31 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,7 @@ import (
44
"bufio"
55
"context"
66
"encoding/json"
7+
"errors"
78
"fmt"
89
"io"
910
"net"
@@ -15,6 +16,19 @@ import (
1516
"github.com/hashicorp/yamux"
1617
)
1718

19+
// fatalErr marks an error that reconnecting cannot fix (not signed in,
20+
// missing capability, or the tunnel feature disabled on the server). Run
21+
// surfaces it once and exits, instead of looping with backoff forever.
22+
type fatalErr struct{ err error }
23+
24+
func (e fatalErr) Error() string { return e.err.Error() }
25+
func (e fatalErr) Unwrap() error { return e.err }
26+
func fatal(err error) error { return fatalErr{err} }
27+
func isFatal(err error) bool {
28+
var f fatalErr
29+
return errors.As(err, &f)
30+
}
31+
1832
// Descriptor is the handshake contract advertised to luxd (spec 18 Layer
1933
// 2). It must match latere-ai/lux internal/tunnel.Descriptor on the wire.
2034
type Descriptor struct {
@@ -69,6 +83,12 @@ func Run(ctx context.Context, opts Options) error {
6983
if ctx.Err() != nil {
7084
return ctx.Err()
7185
}
86+
// A non-retryable error (not signed in, missing llm.serve, feature
87+
// disabled) returns immediately so the user sees one clear message
88+
// instead of an endless reconnect loop.
89+
if isFatal(err) {
90+
return err
91+
}
7292
if err != nil {
7393
fmt.Fprintf(opts.Out, "tunnel: disconnected (%v); reconnecting in %s\n", err, backoff)
7494
}
@@ -96,15 +116,24 @@ func runSession(ctx context.Context, opts Options) error {
96116

97117
bearer, err := opts.Bearer(ctx)
98118
if err != nil {
99-
return err
119+
// No usable identity (e.g. not signed in). Retrying will not help.
120+
return fatal(err)
100121
}
101122

102123
wsURL := toWS(opts.LuxURL) + "/lux/v1/tunnel"
103-
c, _, err := websocket.Dial(ctx, wsURL, &websocket.DialOptions{
124+
c, resp, err := websocket.Dial(ctx, wsURL, &websocket.DialOptions{
104125
Subprotocols: []string{"lux.tunnel.v1"},
105126
HTTPHeader: http.Header{"Authorization": {"Bearer " + bearer}},
106127
})
107128
if err != nil {
129+
if resp != nil {
130+
switch resp.StatusCode {
131+
case http.StatusNotFound:
132+
return fatal(fmt.Errorf("the local-model tunnel is not enabled on %s yet. Ask your operator to turn it on (LUX_TUNNEL_ENABLED).", opts.LuxURL))
133+
case http.StatusUnauthorized, http.StatusForbidden:
134+
return fatal(fmt.Errorf("your login may not serve models here (it needs the llm.serve scope). Run `latere auth login` to refresh your scopes, then try again."))
135+
}
136+
}
108137
return fmt.Errorf("dial %s: %w", wsURL, err)
109138
}
110139
defer c.CloseNow()

0 commit comments

Comments
 (0)