Skip to content

Commit 26eb180

Browse files
committed
feat(validators): add cargo registry validator
Closes #1055. Verification mirrors the PyPI validator: substring-match `mcp-name: <serverName>` against the package's rendered README. The publisher adds a single line to their README before publishing. Two-call retrieval pattern: 1. `GET /api/v1/crates/{name}/{version}/readme` returns 200 with a JSON pointer `{"url": "https://static.crates.io/readmes/.../...html"}` — crates.io hands us the URL rather than emitting a 302. 2. Follow the pointer to the rendered HTML. The two-call pattern stays on the documented public crates.io API surface. The CDN URL layout is observed-stable, but treating it as the entry point would mean depending on an undocumented path. With two calls, crates.io controls where the README lives. Missing crates and missing versions surface as 403 from the CDN (S3's default for missing keys), not 404. The validator treats any non-200 as "not found" and surfaces the actual status code in the error message. Tests are integration-only (matching the npm/pypi pattern). 16 sub-cases across input validation, registry-baseURL rejection (four variants), ownership against real crates (serde, tokio, rand), and server-name format variations. The positive-path case is gated on `rust-faf-mcp` v0.2.3+ being published with `mcp-name: io.github.Wolfe-Jam/rust-faf-mcp` in its README — the commented-out test in `cargo_test.go` will uncomment to become the live anchor once that publish happens. Refs #1055
1 parent 145c2ec commit 26eb180

3 files changed

Lines changed: 332 additions & 0 deletions

File tree

internal/validators/package.go

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -23,6 +23,8 @@ func ValidatePackage(ctx context.Context, pkg model.Package, serverName string)
2323
return registries.ValidateOCI(ctx, pkg, serverName)
2424
case model.RegistryTypeMCPB:
2525
return registries.ValidateMCPB(ctx, pkg, serverName)
26+
case model.RegistryTypeCargo:
27+
return registries.ValidateCargo(ctx, pkg, serverName)
2628
default:
2729
return fmt.Errorf("unsupported registry type: %s", pkg.RegistryType)
2830
}
Lines changed: 142 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,142 @@
1+
package registries
2+
3+
import (
4+
"context"
5+
"encoding/json"
6+
"errors"
7+
"fmt"
8+
"io"
9+
"net/http"
10+
"net/url"
11+
"strings"
12+
"time"
13+
14+
"github.com/modelcontextprotocol/registry/pkg/model"
15+
)
16+
17+
var (
18+
ErrMissingIdentifierForCargo = errors.New("package identifier is required for Cargo packages")
19+
ErrMissingVersionForCargo = errors.New("package version is required for Cargo packages")
20+
)
21+
22+
// CargoReadmeMetaResponse is the structure returned by the crates.io readme metadata endpoint.
23+
//
24+
// crates.io's /api/v1/crates/{name}/{version}/readme endpoint returns 200 OK with a JSON
25+
// body containing a `url` field that points to the rendered README on the static CDN —
26+
// rather than emitting a 302 redirect. Validators must follow the pointer to retrieve
27+
// the actual README content.
28+
type CargoReadmeMetaResponse struct {
29+
URL string `json:"url"`
30+
}
31+
32+
// ValidateCargo validates that a Cargo (crates.io) package contains the correct MCP server name.
33+
//
34+
// Verification mechanism: the `mcp-name: <server-name>` token is searched for in the package's
35+
// rendered README. This mirrors the PyPI validator's README-token approach (see ValidatePyPI),
36+
// requiring no Cargo.toml parsing on the registry side. Crate authors add a single line
37+
// `mcp-name: io.github.OWNER/REPO` to their README before publishing.
38+
//
39+
// Two-call retrieval pattern:
40+
// 1. GET https://crates.io/api/v1/crates/{name}/{version}/readme
41+
// → 200 OK with JSON: {"url": "https://static.crates.io/readmes/.../...html"}
42+
// 2. GET <url from step 1>
43+
// → 200 OK with rendered README HTML, or 403 if the crate/version is missing
44+
//
45+
// The two-call pattern stays on the documented crates.io API surface rather than relying
46+
// on the CDN URL layout being stable.
47+
func ValidateCargo(ctx context.Context, pkg model.Package, serverName string) error {
48+
// Set default registry base URL if empty
49+
if pkg.RegistryBaseURL == "" {
50+
pkg.RegistryBaseURL = model.RegistryURLCrates
51+
}
52+
53+
if pkg.Identifier == "" {
54+
return ErrMissingIdentifierForCargo
55+
}
56+
57+
if pkg.Version == "" {
58+
return ErrMissingVersionForCargo
59+
}
60+
61+
// Validate that MCPB-specific fields are not present
62+
if pkg.FileSHA256 != "" {
63+
return fmt.Errorf("Cargo packages must not have 'fileSha256' field - this is only for MCPB packages")
64+
}
65+
66+
// Validate that the registry base URL matches crates.io exactly
67+
if pkg.RegistryBaseURL != model.RegistryURLCrates {
68+
return fmt.Errorf("registry type and base URL do not match: '%s' is not valid for registry type '%s'. Expected: %s",
69+
pkg.RegistryBaseURL, model.RegistryTypeCargo, model.RegistryURLCrates)
70+
}
71+
72+
client := &http.Client{Timeout: 10 * time.Second}
73+
// crates.io's crawler policy expects a non-generic User-Agent identifying the source.
74+
userAgent := "MCP-Registry-Validator/1.0 (https://registry.modelcontextprotocol.io)"
75+
76+
// Step 1: fetch the README pointer from the documented API endpoint.
77+
metaURL := fmt.Sprintf("%s/api/v1/crates/%s/%s/readme",
78+
pkg.RegistryBaseURL,
79+
url.PathEscape(pkg.Identifier),
80+
url.PathEscape(pkg.Version))
81+
82+
metaReq, err := http.NewRequestWithContext(ctx, http.MethodGet, metaURL, nil)
83+
if err != nil {
84+
return fmt.Errorf("failed to create crates.io metadata request: %w", err)
85+
}
86+
metaReq.Header.Set("User-Agent", userAgent)
87+
metaReq.Header.Set("Accept", "application/json")
88+
89+
metaResp, err := client.Do(metaReq)
90+
if err != nil {
91+
return fmt.Errorf("failed to fetch package metadata from crates.io: %w", err)
92+
}
93+
defer metaResp.Body.Close()
94+
95+
if metaResp.StatusCode != http.StatusOK {
96+
return fmt.Errorf("Cargo package '%s' metadata fetch failed (status: %d)", pkg.Identifier, metaResp.StatusCode)
97+
}
98+
99+
var meta CargoReadmeMetaResponse
100+
if err := json.NewDecoder(metaResp.Body).Decode(&meta); err != nil {
101+
return fmt.Errorf("failed to parse crates.io readme metadata: %w", err)
102+
}
103+
if meta.URL == "" {
104+
return fmt.Errorf("Cargo package '%s' metadata response missing 'url' field", pkg.Identifier)
105+
}
106+
107+
// Step 2: fetch the rendered README from the URL the API gave us.
108+
readmeReq, err := http.NewRequestWithContext(ctx, http.MethodGet, meta.URL, nil)
109+
if err != nil {
110+
return fmt.Errorf("failed to create crates.io readme request: %w", err)
111+
}
112+
readmeReq.Header.Set("User-Agent", userAgent)
113+
readmeReq.Header.Set("Accept", "text/html")
114+
115+
readmeResp, err := client.Do(readmeReq)
116+
if err != nil {
117+
return fmt.Errorf("failed to fetch rendered README from crates.io: %w", err)
118+
}
119+
defer readmeResp.Body.Close()
120+
121+
// Missing crates and missing versions surface as 403 (S3 default for missing keys),
122+
// not 404. Treat any non-200 as "not found" — matches the shape of the npm/PyPI
123+
// validators and surfaces the actual status code for debugging.
124+
if readmeResp.StatusCode != http.StatusOK {
125+
return fmt.Errorf("Cargo package '%s' version '%s' not found on crates.io (status: %d)", pkg.Identifier, pkg.Version, readmeResp.StatusCode)
126+
}
127+
128+
body, err := io.ReadAll(readmeResp.Body)
129+
if err != nil {
130+
return fmt.Errorf("failed to read rendered README: %w", err)
131+
}
132+
133+
// Search for the mcp-name: <server-name> token. The token contains no characters
134+
// that get HTML-escaped during README rendering (no <, >, &, ", '), so a direct
135+
// substring match against the rendered HTML is reliable.
136+
mcpNamePattern := "mcp-name: " + serverName
137+
if strings.Contains(string(body), mcpNamePattern) {
138+
return nil
139+
}
140+
141+
return fmt.Errorf("Cargo package '%s' ownership validation failed. The server name '%s' must appear as 'mcp-name: %s' in the package README", pkg.Identifier, serverName, serverName)
142+
}
Lines changed: 188 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,188 @@
1+
package registries_test
2+
3+
import (
4+
"context"
5+
"testing"
6+
7+
"github.com/modelcontextprotocol/registry/internal/validators/registries"
8+
"github.com/modelcontextprotocol/registry/pkg/model"
9+
"github.com/stretchr/testify/assert"
10+
)
11+
12+
func TestValidateCargo_RealPackages(t *testing.T) {
13+
ctx := context.Background()
14+
15+
tests := []struct {
16+
name string
17+
packageName string
18+
version string
19+
serverName string
20+
expectError bool
21+
errorMessage string
22+
}{
23+
{
24+
name: "empty package identifier should fail",
25+
packageName: "",
26+
version: "0.1.0",
27+
serverName: "io.github.example/test",
28+
expectError: true,
29+
errorMessage: "package identifier is required for Cargo packages",
30+
},
31+
{
32+
name: "empty package version should fail",
33+
packageName: "rust-faf-mcp",
34+
version: "",
35+
serverName: "io.github.example/test",
36+
expectError: true,
37+
errorMessage: "package version is required for Cargo packages",
38+
},
39+
{
40+
name: "non-existent crate should fail",
41+
packageName: generateRandomPackageName(),
42+
version: "0.1.0",
43+
serverName: "io.github.example/test",
44+
expectError: true,
45+
errorMessage: "not found",
46+
},
47+
{
48+
name: "non-existent version of real crate should fail",
49+
packageName: "serde",
50+
version: "99.99.99-not-real",
51+
serverName: "io.github.example/test",
52+
expectError: true,
53+
errorMessage: "not found",
54+
},
55+
{
56+
name: "real crate without mcp-name token should fail",
57+
packageName: "serde", // most-downloaded crate; no MCP server claim
58+
version: "1.0.219",
59+
serverName: "io.github.example/test",
60+
expectError: true,
61+
errorMessage: "ownership validation failed",
62+
},
63+
{
64+
name: "real crate with mismatched mcp-name should fail",
65+
packageName: "tokio",
66+
version: "1.40.0",
67+
serverName: "io.github.example/completely-different-name",
68+
expectError: true,
69+
errorMessage: "ownership validation failed",
70+
},
71+
{
72+
name: "additional real crate without mcp-name (rand)",
73+
packageName: "rand",
74+
version: "0.9.0",
75+
serverName: "io.github.example/test",
76+
expectError: true,
77+
errorMessage: "ownership validation failed",
78+
},
79+
// TODO: enable once rust-faf-mcp v0.2.3+ ships with the line
80+
// mcp-name: io.github.Wolfe-Jam/rust-faf-mcp
81+
// in its README. Until then, the positive-path test has no real-world
82+
// target — cargo isn't yet a supported registry type.
83+
// {
84+
// name: "real crate with mcp-name in README should pass",
85+
// packageName: "rust-faf-mcp",
86+
// version: "0.2.3",
87+
// serverName: "io.github.Wolfe-Jam/rust-faf-mcp",
88+
// expectError: false,
89+
// },
90+
}
91+
92+
for _, tt := range tests {
93+
t.Run(tt.name, func(t *testing.T) {
94+
pkg := model.Package{
95+
RegistryType: model.RegistryTypeCargo,
96+
Identifier: tt.packageName,
97+
Version: tt.version,
98+
}
99+
100+
err := registries.ValidateCargo(ctx, pkg, tt.serverName)
101+
102+
if tt.expectError {
103+
assert.Error(t, err)
104+
assert.Contains(t, err.Error(), tt.errorMessage)
105+
} else {
106+
assert.NoError(t, err)
107+
}
108+
})
109+
}
110+
}
111+
112+
func TestValidateCargo_RegistryBaseURLMismatch(t *testing.T) {
113+
ctx := context.Background()
114+
115+
tests := []struct {
116+
name string
117+
baseURL string
118+
}{
119+
{name: "different host", baseURL: "https://example.com"},
120+
{name: "trailing slash", baseURL: "https://crates.io/"},
121+
{name: "http (not https)", baseURL: "http://crates.io"},
122+
{name: "subdomain", baseURL: "https://www.crates.io"},
123+
}
124+
125+
for _, tt := range tests {
126+
t.Run(tt.name, func(t *testing.T) {
127+
pkg := model.Package{
128+
RegistryType: model.RegistryTypeCargo,
129+
RegistryBaseURL: tt.baseURL,
130+
Identifier: "rust-faf-mcp",
131+
Version: "0.2.2",
132+
}
133+
134+
err := registries.ValidateCargo(ctx, pkg, "io.github.Wolfe-Jam/rust-faf-mcp")
135+
assert.Error(t, err)
136+
assert.Contains(t, err.Error(), "registry type and base URL do not match")
137+
})
138+
}
139+
}
140+
141+
func TestValidateCargo_RejectsMCPBOnlyFields(t *testing.T) {
142+
ctx := context.Background()
143+
144+
pkg := model.Package{
145+
RegistryType: model.RegistryTypeCargo,
146+
Identifier: "rust-faf-mcp",
147+
Version: "0.2.2",
148+
FileSHA256: "0000000000000000000000000000000000000000000000000000000000000000",
149+
}
150+
151+
err := registries.ValidateCargo(ctx, pkg, "io.github.Wolfe-Jam/rust-faf-mcp")
152+
assert.Error(t, err)
153+
assert.Contains(t, err.Error(), "Cargo packages must not have 'fileSha256' field")
154+
}
155+
156+
// Server names follow io.github.OWNER/REPO and may contain dots, slashes,
157+
// hyphens, underscores, and digits. None of these get HTML-escaped during
158+
// README rendering, so substring match against the rendered HTML is reliable.
159+
// These tests exercise format variations against a real crate that doesn't
160+
// declare any mcp-name (serde) — every case fails ownership, but we verify
161+
// the failure error preserves the exact server name unchanged.
162+
func TestValidateCargo_ServerNameFormats(t *testing.T) {
163+
ctx := context.Background()
164+
165+
tests := []struct {
166+
name string
167+
serverName string
168+
}{
169+
{name: "canonical io.github format", serverName: "io.github.Wolfe-Jam/rust-faf-mcp"},
170+
{name: "multiple hyphens", serverName: "io.github.example/multi-hyphen-test-name"},
171+
{name: "underscore", serverName: "io.github.example/snake_case_name"},
172+
{name: "numeric suffix", serverName: "io.github.example/server-v2"},
173+
}
174+
175+
for _, tt := range tests {
176+
t.Run(tt.name, func(t *testing.T) {
177+
pkg := model.Package{
178+
RegistryType: model.RegistryTypeCargo,
179+
Identifier: "serde",
180+
Version: "1.0.219",
181+
}
182+
183+
err := registries.ValidateCargo(ctx, pkg, tt.serverName)
184+
assert.Error(t, err)
185+
assert.Contains(t, err.Error(), tt.serverName)
186+
})
187+
}
188+
}

0 commit comments

Comments
 (0)