Skip to content

Commit 10fe7ea

Browse files
fix(security): harden TCP workspace bridge with strict mTLS
Port the Memoh-SaaS bridge mTLS fix to current Memoh OSS runtime. - add strict mTLS for bridge TCP server/client; UDS and disabled mode remain unchanged - wire bridge_tls config/env and startup fail-fast for instance_id, server_dir, and bridge_dir - inject bridge-side mTLS material into workspace specs for Docker/Kata TCP bridge - document the open-source material layout and keep server_dir separate from bridge_dir The SaaS K8s ensurePod and service-account-token parts are intentionally not included because current Memoh main no longer has the K8s backend. (cherry picked and adapted from commit 7082324785d130f978ee7f5dcf523deeb3f9cc20)
1 parent 5bbca4d commit 10fe7ea

18 files changed

Lines changed: 1742 additions & 15 deletions

File tree

cmd/agent/app.go

Lines changed: 10 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -240,7 +240,7 @@ func provideBridgeProvider(manage *workspace.Manager) bridge.Provider {
240240
return manage
241241
}
242242

243-
func provideWorkspaceManager(lc fx.Lifecycle, log *slog.Logger, service ctr.Service, networkController netctl.Controller, cfg config.Config, conn *pgxpool.Pool, queries dbstore.Queries) *workspace.Manager {
243+
func provideWorkspaceManager(lc fx.Lifecycle, log *slog.Logger, service ctr.Service, networkController netctl.Controller, cfg config.Config, conn *pgxpool.Pool, queries dbstore.Queries) (*workspace.Manager, error) {
244244
localSvc := workspace.NewLocalService(log, cfg.Local, cfg.Workspace.DataRoot)
245245
lc.Append(fx.Hook{
246246
OnStop: func(context.Context) error {
@@ -249,7 +249,15 @@ func provideWorkspaceManager(lc fx.Lifecycle, log *slog.Logger, service ctr.Serv
249249
},
250250
})
251251
runtimeSvc := workspace.NewRuntimeRouter(service, localSvc)
252-
return workspace.NewManager(log, runtimeSvc, networkController, cfg.Workspace, cfg.Containerd.Namespace, conn, queries)
252+
mgr := workspace.NewManager(log, runtimeSvc, networkController, cfg.Workspace, cfg.Containerd.Namespace, conn, queries)
253+
tlsOpts, err := workspace.BridgeTLSRuntimeOptionsFromConfig(cfg)
254+
if err != nil {
255+
return nil, err
256+
}
257+
if tlsOpts != nil {
258+
mgr.SetBridgeTLS(tlsOpts)
259+
}
260+
return mgr, nil
253261
}
254262

255263
func provideMemoryLLM(modelsService *models.Service, settingsService *settings.Service, queries dbstore.Queries, log *slog.Logger) memprovider.LLM {

cmd/bridge/main.go

Lines changed: 18 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -142,9 +142,9 @@ func main() {
142142
return
143143
}
144144

145-
srv := grpc.NewServer(
146-
grpc.MaxRecvMsgSize(16*1024*1024),
147-
grpc.MaxSendMsgSize(16*1024*1024),
145+
serverOpts := []grpc.ServerOption{
146+
grpc.MaxRecvMsgSize(16 * 1024 * 1024),
147+
grpc.MaxSendMsgSize(16 * 1024 * 1024),
148148
grpc.KeepaliveParams(keepalive.ServerParameters{
149149
MaxConnectionIdle: 5 * time.Minute,
150150
MaxConnectionAge: 30 * time.Minute,
@@ -156,7 +156,21 @@ func main() {
156156
MinTime: 10 * time.Second,
157157
PermitWithoutStream: true,
158158
}),
159-
)
159+
}
160+
// strict mTLS 只约束 TCP 通道;UDS 走文件系统 socket 权限的本地信任模型。
161+
// strict 下材料缺失/损坏直接拒绝启动,不回退明文(设计 §10)。
162+
if network == "tcp" {
163+
creds, err := bridgeServerCredentials()
164+
if err != nil {
165+
logger.Error("bridge TLS configuration invalid", slog.Any("error", err))
166+
return
167+
}
168+
if creds != nil {
169+
serverOpts = append(serverOpts, grpc.Creds(creds))
170+
logger.Info("bridge TCP gRPC requires mTLS", slog.String("mode", bridgeTLSModeStrict))
171+
}
172+
}
173+
srv := grpc.NewServer(serverOpts...)
160174
pb.RegisterContainerServiceServer(srv, bridgesvc.New(bridgesvc.Options{
161175
DefaultWorkDir: bridgesvc.DefaultWorkDir,
162176
DataMount: bridgesvc.DefaultWorkDir,

cmd/bridge/tls.go

Lines changed: 91 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,91 @@
1+
package main
2+
3+
import (
4+
"crypto/tls"
5+
"crypto/x509"
6+
"errors"
7+
"fmt"
8+
"os"
9+
"slices"
10+
"strings"
11+
12+
"google.golang.org/grpc/credentials"
13+
)
14+
15+
// bridge TCP 通道的 strict mTLS(设计 memoh-saas-bridge-mtls-design.md §8.1)。
16+
// 材料由 memoh-bridge-mtls Secret 挂载,env 只传路径与模式,私钥不进 env。
17+
const (
18+
bridgeTLSModeEnv = "BRIDGE_TLS_MODE"
19+
bridgeTLSCertFileEnv = "BRIDGE_TLS_CERT_FILE"
20+
bridgeTLSKeyFileEnv = "BRIDGE_TLS_KEY_FILE"
21+
bridgeTLSClientCAFileEnv = "BRIDGE_TLS_CLIENT_CA_FILE"
22+
bridgeTLSExpectedClientURIEnv = "BRIDGE_TLS_EXPECTED_CLIENT_URI"
23+
24+
bridgeTLSModeDisabled = "disabled"
25+
bridgeTLSModeStrict = "strict"
26+
)
27+
28+
// bridgeServerCredentials 按 BRIDGE_TLS_MODE 构建 gRPC server credentials。
29+
// disabled/空 → (nil, nil) 维持现状;strict → 必须 mTLS,材料缺失即错误,
30+
// 绝不静默回退明文(设计 §10)。仅对 TCP listener 调用;UDS 走文件系统权限。
31+
func bridgeServerCredentials() (credentials.TransportCredentials, error) {
32+
mode := strings.ToLower(strings.TrimSpace(os.Getenv(bridgeTLSModeEnv)))
33+
switch mode {
34+
case "", bridgeTLSModeDisabled:
35+
return nil, nil
36+
case bridgeTLSModeStrict:
37+
default:
38+
return nil, fmt.Errorf("unknown %s %q (want %s|%s)", bridgeTLSModeEnv, mode, bridgeTLSModeDisabled, bridgeTLSModeStrict)
39+
}
40+
41+
certFile := strings.TrimSpace(os.Getenv(bridgeTLSCertFileEnv))
42+
keyFile := strings.TrimSpace(os.Getenv(bridgeTLSKeyFileEnv))
43+
caFile := strings.TrimSpace(os.Getenv(bridgeTLSClientCAFileEnv))
44+
expectedURI := strings.TrimSpace(os.Getenv(bridgeTLSExpectedClientURIEnv))
45+
if certFile == "" || keyFile == "" || caFile == "" || expectedURI == "" {
46+
return nil, fmt.Errorf("strict bridge TLS requires %s, %s, %s and %s", bridgeTLSCertFileEnv, bridgeTLSKeyFileEnv, bridgeTLSClientCAFileEnv, bridgeTLSExpectedClientURIEnv)
47+
}
48+
49+
cert, err := tls.LoadX509KeyPair(certFile, keyFile)
50+
if err != nil {
51+
return nil, fmt.Errorf("load bridge server keypair: %w", err)
52+
}
53+
caPEM, err := os.ReadFile(caFile) //nolint:gosec // G304: path comes from operator-controlled env, not end-user input
54+
if err != nil {
55+
return nil, fmt.Errorf("read server client CA: %w", err)
56+
}
57+
pool := x509.NewCertPool()
58+
if !pool.AppendCertsFromPEM(caPEM) {
59+
return nil, fmt.Errorf("no certificates parsed from %s", caFile)
60+
}
61+
62+
cfg := &tls.Config{
63+
MinVersion: tls.VersionTLS12,
64+
Certificates: []tls.Certificate{cert},
65+
ClientCAs: pool,
66+
ClientAuth: tls.RequireAndVerifyClientCert,
67+
// RequireAndVerifyClientCert 已验链(含 ClientAuth EKU);这里把调用方
68+
// 钉死到本 instance 的 Memoh Server SPIFFE 身份。被攻破的 bot 只持有
69+
// shared bridge server cert(ServerAuth、bridge URI),过不了这一关。
70+
VerifyConnection: func(cs tls.ConnectionState) error {
71+
return verifyMemohServerClientIdentity(cs, expectedURI)
72+
},
73+
}
74+
return credentials.NewTLS(cfg), nil
75+
}
76+
77+
func verifyMemohServerClientIdentity(cs tls.ConnectionState, expectedURI string) error {
78+
if len(cs.PeerCertificates) == 0 {
79+
return errors.New("client certificate required")
80+
}
81+
leaf := cs.PeerCertificates[0]
82+
if !slices.Contains(leaf.ExtKeyUsage, x509.ExtKeyUsageClientAuth) {
83+
return errors.New("client certificate lacks ClientAuth EKU")
84+
}
85+
for _, uri := range leaf.URIs {
86+
if uri.String() == expectedURI {
87+
return nil
88+
}
89+
}
90+
return fmt.Errorf("client certificate URI SAN mismatch (want %s)", expectedURI)
91+
}

0 commit comments

Comments
 (0)