Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
27 changes: 21 additions & 6 deletions decentralized-api/internal/devshard/shared_runtime.go
Original file line number Diff line number Diff line change
Expand Up @@ -249,8 +249,13 @@ func EvaluateValidationResponse(
originalResponsePayload []byte,
thresholds *ValidationThresholdResolver,
) (*devshardpkg.ValidateResult, error) {
// A 4xx from the validator's own re-execution means the executor-supplied
// prompt/enforced_tokens could not be processed, so the inference is
// unverifiable and must not be auto-approved (previously failed open to Valid:true).
if resp.StatusCode == http.StatusBadRequest || resp.StatusCode == http.StatusUnprocessableEntity {
return &devshardpkg.ValidateResult{Valid: true}, nil
logging.Warn(logPrefix+" validation failed: validator re-execution rejected request",
chaintypes.Validation, "inferenceId", inferenceID, "status", resp.StatusCode)
return &devshardpkg.ValidateResult{Valid: false}, nil
}

respBytes, err := ReadHTTPBody(resp)
Expand Down Expand Up @@ -283,11 +288,21 @@ func EvaluateValidationResponse(
InferenceId: inferenceID,
ResponseBytes: respBytes,
}
result := validationpkg.CompareLogits(
originalResponse.ExtractLogits(),
validationResponse.ExtractLogits(),
base,
)
// CompareLogits short-circuits to perfect similarity (1.0) when the ORIGINAL
// logits are empty, so an executor that stored a response with no logprobs
// would always pass. Reject only the asymmetric case (exactly one side empty):
// the executor's output cannot be verified against the validator's
// re-execution. Both-empty is left to CompareLogits so legitimate
// reasoning-burn empties (e.g. Kimi-K2.6, finish_reason=length) still match.
originalLogits := originalResponse.ExtractLogits()
validationLogits := validationResponse.ExtractLogits()
if (len(originalLogits) == 0) != (len(validationLogits) == 0) {
logging.Warn(logPrefix+" validation failed: logit presence mismatch between original and validation response",
chaintypes.Validation, "inferenceId", inferenceID,
"originalLogits", len(originalLogits), "validationLogits", len(validationLogits))
return &devshardpkg.ValidateResult{Valid: false}, nil
}
result := validationpkg.CompareLogits(originalLogits, validationLogits, base)
valid, err := EvaluateValidationResult(ctx, result, req, thresholds)
if err != nil {
return nil, err
Expand Down
71 changes: 71 additions & 0 deletions decentralized-api/internal/devshard/validation_failopen_test.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,71 @@
package devshard

import (
"context"
"io"
"net/http"
"strings"
"testing"

"devshard/bridge"
devshardpkg "devshard"

"github.com/stretchr/testify/assert"
"github.com/stretchr/testify/require"
)

// Regression tests for the two devshard validation fail-opens in
// EvaluateValidationResponse. Each asserts the SECURE (post-fix) behavior and
// fails against the unpatched code:
// - empty logprobs in the executor's stored response must be rejected
// (unpatched: CompareLogits([],x) == 1.0 similarity -> Valid:true)
// - a 4xx from the validator's own re-execution must be rejected
// (unpatched: returned Valid:true before any content check)
// A legitimate matching response must still validate (no false-reject regression).
func newResolver(req devshardpkg.ValidateRequest) *ValidationThresholdResolver {
return cachedThresholdResolver(req, &bridge.Decimal{Value: 90, Exponent: -2}) // 0.90
}

func okResp(body string) *http.Response {
return &http.Response{StatusCode: http.StatusOK, Body: io.NopCloser(strings.NewReader(body)), Header: make(http.Header)}
}

const logprobBody = `{"id":"v","object":"chat.completion","model":"model-a","choices":[{"index":0,"message":{"role":"assistant","content":"hello world"},"logprobs":{"content":[{"token":"hello","logprob":-0.10,"top_logprobs":[{"token":"hello","logprob":-0.10}]},{"token":"world","logprob":-0.05,"top_logprobs":[{"token":"world","logprob":-0.05}]}]},"finish_reason":"stop"}],"usage":{"prompt_tokens":50,"completion_tokens":200,"total_tokens":250}}`

const emptyLogprobBody = `{"id":"o","object":"chat.completion","model":"model-a","choices":[{"index":0,"message":{"role":"assistant","content":"hello world"},"logprobs":{"content":[]},"finish_reason":"stop"}],"usage":{"prompt_tokens":50,"completion_tokens":200,"total_tokens":250}}`

func TestEvaluateValidationResponse_EmptyOriginalLogits_IsInvalid(t *testing.T) {
req := devshardpkg.ValidateRequest{EscrowID: "e1", EpochID: 7, Model: "model-a"}
res, err := EvaluateValidationResponse(context.Background(), okResp(logprobBody), req, "id1", "[t]", []byte(emptyLogprobBody), newResolver(req))
require.NoError(t, err)
assert.False(t, res.Valid, "executor response with no logprobs must be rejected, not auto-passed")
}

func TestEvaluateValidationResponse_4xxReExec_IsInvalid(t *testing.T) {
req := devshardpkg.ValidateRequest{EscrowID: "e2", EpochID: 7, Model: "model-a"}
for _, status := range []int{http.StatusBadRequest, http.StatusUnprocessableEntity} {
resp := &http.Response{StatusCode: status, Body: io.NopCloser(strings.NewReader(`{"error":"rejected"}`)), Header: make(http.Header)}
res, err := EvaluateValidationResponse(context.Background(), resp, req, "id2", "[t]", []byte(logprobBody), newResolver(req))
require.NoError(t, err)
assert.False(t, res.Valid, "validator re-exec %d must not auto-approve", status)
}
}

func TestEvaluateValidationResponse_MatchingLogits_StillValid(t *testing.T) {
// No-regression: a genuine response with matching logprobs must still pass.
req := devshardpkg.ValidateRequest{EscrowID: "e3", EpochID: 7, Model: "model-a"}
res, err := EvaluateValidationResponse(context.Background(), okResp(logprobBody), req, "id3", "[t]", []byte(logprobBody), newResolver(req))
require.NoError(t, err)
assert.True(t, res.Valid, "legitimate matching response must remain valid after the fix")
}

func TestEvaluateValidationResponse_BothEmpty_StaysValid(t *testing.T) {
// No-regression for legitimate reasoning-burn empties (Kimi-K2.6, see PR #1233):
// when the validator's own re-execution ALSO yields no logprobs, both-empty is
// a genuine match and must not be rejected. The attacker cannot force this case
// because they don't control the validator's independent re-execution.
req := devshardpkg.ValidateRequest{EscrowID: "e4", EpochID: 7, Model: "model-a"}
res, err := EvaluateValidationResponse(context.Background(), okResp(emptyLogprobBody), req, "id4", "[t]", []byte(emptyLogprobBody), newResolver(req))
require.NoError(t, err)
assert.True(t, res.Valid, "legitimate both-empty (reasoning-burn) must remain valid")
}
Loading