Skip to content

Commit 9fa70e3

Browse files
authored
fix: location claim processing (#286)
When querying IPNI for location claims, the indexing service now iterates through all provider results instead of failing on the first error. This allows queries to succeed as long as at least one valid provider result exists, even if others have incomplete or invalid addresses. See this INPI record as an example: https://cid.contact/cid/bagbaieraqcpyzx3sytzsitbdxqpahht3hw4ros5ex2jlhxe4muwcgc7d4xua ProviderResults[1] is invalid, has only 1 address, and it is missing the location URL. ProviderResults[0] is correct, but the indexer never finds this record. This fixes the issue where IPNI records with missing blob retrieval endpoints (e.g., only claim URL, no blob URL) would cause the entire query to fail with a 500 error, even when other valid provider results were available. The implementation follows the same pattern used for index claim processing, where all results are tried, and only the last error is returned if all attempts fail.
1 parent c4f166f commit 9fa70e3

File tree

3 files changed

+207
-15
lines changed

3 files changed

+207
-15
lines changed

pkg/aws/dynamoallocationstable_test.go

Lines changed: 0 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -122,14 +122,6 @@ func createAllocationsTable(t *testing.T, c *dynamodb.Client, tableName string)
122122
AttributeName: aws.String("multihash"),
123123
AttributeType: types.ScalarAttributeTypeS,
124124
},
125-
{
126-
AttributeName: aws.String("size"),
127-
AttributeType: types.ScalarAttributeTypeN,
128-
},
129-
{
130-
AttributeName: aws.String("invocation"),
131-
AttributeType: types.ScalarAttributeTypeS,
132-
},
133125
{
134126
AttributeName: aws.String("insertedAt"),
135127
AttributeType: types.ScalarAttributeTypeS,

pkg/service/service.go

Lines changed: 23 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -135,6 +135,10 @@ func (is *IndexingService) jobHandler(mhCtx context.Context, j job, spawn func(j
135135
}
136136

137137
s.AddEvent(fmt.Sprintf("processing %d results", len(results)))
138+
139+
var indexFetchSucceeded bool
140+
var lastIndexFetchErr error
141+
138142
for _, result := range results {
139143
// unmarshall metadata for this provider
140144
md := metadata.MetadataContext.New()
@@ -211,9 +215,10 @@ func (is *IndexingService) jobHandler(mhCtx context.Context, j job, spawn func(j
211215
case *metadata.LocationCommitmentMetadata:
212216
s.AddEvent("processing location claim")
213217

214-
// for a location claim, we just store it, unless its for an index CID, in which case get the full idnex
218+
// for a location claim, we just store it, unless its for an index CID, in which case get the full index
215219
if j.indexForMh != nil {
216-
// fetch (from URL or cache) the full index
220+
// Try to fetch the index from this provider result
221+
// If it fails, we'll continue to the next result instead of failing the entire query
217222
shard := typedProtocol.Shard
218223
if shard == nil {
219224
c := cid.NewCidV1(cid.Raw, j.mh)
@@ -222,14 +227,18 @@ func (is *IndexingService) jobHandler(mhCtx context.Context, j job, spawn func(j
222227
url, err := fetchRetrievalURL(*result.Provider, *shard)
223228
if err != nil {
224229
telemetry.Error(s, err, "fetching index retrieval URL")
225-
return fmt.Errorf("fetching retrieval URL for index %q: %w", shard, err)
230+
log.Warnw("failed to fetch retrieval URL, will try next provider result if available", "shard", shard, "provider", result.Provider.ID, "err", err)
231+
lastIndexFetchErr = fmt.Errorf("fetching retrieval URL for index %q from provider %s: %w", shard, result.Provider.ID, err)
232+
continue // Try next provider result
226233
}
227234

228235
s.AddEvent("fetching index")
229236
var auth *types.RetrievalAuth
230237
match, err := assert.Location.Match(validator.NewSource(claim.Capabilities()[0], claim))
231238
if err != nil {
232-
return fmt.Errorf("failed to match claim to location commitment: %w", err)
239+
log.Warnw("failed to match claim to location commitment, will try next provider result if available", "err", err)
240+
lastIndexFetchErr = fmt.Errorf("failed to match claim to location commitment: %w", err)
241+
continue
233242
}
234243
lcCaveats := match.Value().Nb()
235244
space := lcCaveats.Space
@@ -261,10 +270,13 @@ func (is *IndexingService) jobHandler(mhCtx context.Context, j job, spawn func(j
261270
index, err := is.blobIndexLookup.Find(mhCtx, result.ContextID, *j.indexProviderRecord, req)
262271
if err != nil {
263272
telemetry.Error(s, err, "fetching index blob")
264-
return err
273+
log.Warnw("failed to fetch index blob, will try next provider result if available", "provider", result.Provider.ID, "err", err)
274+
lastIndexFetchErr = fmt.Errorf("fetching index blob from provider %s: %w", result.Provider.ID, err)
275+
continue // Try next provider result
265276
}
266277

267-
// Add the index to the query results, if we don't already have it
278+
// Success! Add the index to the query results, if we don't already have it
279+
indexFetchSucceeded = true
268280
state.CmpSwap(
269281
func(qs queryState) bool {
270282
return !qs.qr.Indexes.Has(result.ContextID)
@@ -289,6 +301,11 @@ func (is *IndexingService) jobHandler(mhCtx context.Context, j job, spawn func(j
289301
}
290302
}
291303
}
304+
305+
// If we attempted to fetch an index but all attempts failed, return the last error
306+
if lastIndexFetchErr != nil && !indexFetchSucceeded {
307+
return fmt.Errorf("failed to fetch index from all provider results: %w", lastIndexFetchErr)
308+
}
292309
return nil
293310
}
294311

pkg/service/service_test.go

Lines changed: 184 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -422,9 +422,192 @@ func TestQuery(t *testing.T) {
422422

423423
require.Error(t, err)
424424
})
425+
426+
t.Run("succeeds when at least one provider result is valid despite others having incomplete addresses", func(t *testing.T) {
427+
mockBlobIndexLookup := blobindexlookup.NewMockBlobIndexLookup(t)
428+
mockClaimsService := contentclaims.NewMockContentClaimsService(t)
429+
mockProviderIndex := providerindex.NewMockProviderIndex(t)
430+
431+
// First provider has incomplete addresses (only claim endpoint, missing blob endpoint)
432+
// This simulates the real-world issue from cid.contact
433+
badProviderAddr := &peer.AddrInfo{
434+
ID: testutil.RandomPeer(t),
435+
Addrs: []ma.Multiaddr{
436+
testutil.Must(ma.NewMultiaddr("/dns/indexer.storacha.network/https/http-path/claim%2F%7Bclaim%7D"))(t),
437+
},
438+
}
439+
440+
// Second provider has valid addresses but will return wrong claim type
441+
wrongClaimProviderAddr := &peer.AddrInfo{
442+
ID: testutil.RandomPeer(t),
443+
Addrs: []ma.Multiaddr{
444+
testutil.Must(ma.NewMultiaddr("/dns/wrong.storacha.network/tls/http/http-path/%2Fclaims%2F%7Bclaim%7D"))(t),
445+
testutil.Must(ma.NewMultiaddr("/dns/wrong.storacha.network/tls/http/http-path/%2Fblobs%2F%7Bblob%7D"))(t),
446+
},
447+
}
448+
449+
// Third provider has valid addresses (both claim and blob endpoints)
450+
goodProviderAddr := &peer.AddrInfo{
451+
ID: testutil.RandomPeer(t),
452+
Addrs: []ma.Multiaddr{
453+
testutil.Must(ma.NewMultiaddr("/dns/storacha.network/tls/http/http-path/%2Fclaims%2F%7Bclaim%7D"))(t),
454+
testutil.Must(ma.NewMultiaddr("/dns/storacha.network/tls/http/http-path/%2Fblobs%2F%7Bblob%7D"))(t),
455+
},
456+
}
457+
458+
contentLink := testutil.RandomCID(t)
459+
contentHash := contentLink.(cidlink.Link).Hash()
460+
space := testutil.RandomDID(t)
461+
462+
// content will have a location claim and an index claim
463+
locationDelegationCid, locationDelegation, locationProviderResult := buildTestLocationClaim(t, contentLink.(cidlink.Link), goodProviderAddr, space, rand.Uint64N(5000))
464+
indexDelegationCid, indexDelegation, indexResult, indexCid, index := buildTestIndexClaim(t, contentLink.(cidlink.Link), goodProviderAddr)
465+
466+
contentResults := []model.ProviderResult{locationProviderResult, indexResult}
467+
468+
// expect a call to find records for content
469+
mockProviderIndex.EXPECT().Find(extmocks.AnyContext, providerindex.QueryKey{
470+
Hash: contentHash,
471+
TargetClaims: []multicodec.Code{metadata.EqualsClaimID, metadata.IndexClaimID, metadata.LocationCommitmentID},
472+
}).Return(contentResults, nil)
473+
474+
// the results for content should make the IndexingService ask for both claims
475+
locationClaimUrl := testutil.Must(url.Parse(fmt.Sprintf("https://storacha.network/claims/%s", locationDelegationCid.String())))(t)
476+
mockClaimsService.EXPECT().Find(extmocks.AnyContext, locationDelegationCid, locationClaimUrl).Return(locationDelegation, nil)
477+
indexClaimUrl := testutil.Must(url.Parse(fmt.Sprintf("https://storacha.network/claims/%s", indexDelegationCid.String())))(t)
478+
mockClaimsService.EXPECT().Find(extmocks.AnyContext, indexDelegationCid, indexClaimUrl).Return(indexDelegation, nil)
479+
480+
// then attempt to find records for the index referenced in the index claim
481+
// This returns THREE provider results:
482+
// 1. First one with bad addresses and nil shard (fails at fetchRetrievalURL)
483+
// 2. Second one with valid addresses but returns wrong claim type (fails at assert.Location.Match)
484+
// 3. Third one with good addresses and valid claim (succeeds)
485+
indexSize := rand.Uint64N(5000)
486+
// First provider has nil shard to test the shard == nil code path, fails at fetchRetrievalURL
487+
badIndexLocationDelegationCid, badIndexLocationDelegation, badIndexLocationProviderResult := buildTestLocationClaimWithShard(t, indexCid, badProviderAddr, space, indexSize, nil)
488+
// Second provider has valid addresses but we'll return wrong claim type (index delegation instead of location)
489+
wrongClaimLocationDelegationCid, _, wrongClaimProviderResult := buildTestLocationClaim(t, indexCid, wrongClaimProviderAddr, space, indexSize)
490+
// Third provider is valid
491+
goodIndexLocationDelegationCid, goodIndexLocationDelegation, goodIndexLocationProviderResult := buildTestLocationClaim(t, indexCid, goodProviderAddr, space, indexSize)
492+
493+
mockProviderIndex.EXPECT().Find(extmocks.AnyContext, providerindex.QueryKey{
494+
Hash: indexCid.Hash(),
495+
TargetClaims: []multicodec.Code{metadata.LocationCommitmentID},
496+
}).Return([]model.ProviderResult{badIndexLocationProviderResult, wrongClaimProviderResult, goodIndexLocationProviderResult}, nil)
497+
498+
// fetch the first index's location claim (bad provider - fails at fetchRetrievalURL)
499+
// Note: http-path encoding produces paths without leading slash
500+
badIndexLocationClaimUrl := testutil.Must(url.Parse(fmt.Sprintf("https://indexer.storacha.network/claim/%s", badIndexLocationDelegationCid.String())))(t)
501+
badIndexLocationClaimUrl.Path = fmt.Sprintf("claim/%s", badIndexLocationDelegationCid.String()) // Remove leading slash
502+
mockClaimsService.EXPECT().Find(extmocks.AnyContext, badIndexLocationDelegationCid, badIndexLocationClaimUrl).Return(badIndexLocationDelegation, nil)
503+
504+
// fetch the second index's location claim (wrong claim provider - returns index delegation instead of location)
505+
// This will cause assert.Location.Match to fail, covering lines 239-240
506+
wrongClaimLocationClaimUrl := testutil.Must(url.Parse(fmt.Sprintf("https://wrong.storacha.network/claims/%s", wrongClaimLocationDelegationCid.String())))(t)
507+
mockClaimsService.EXPECT().Find(extmocks.AnyContext, wrongClaimLocationDelegationCid, wrongClaimLocationClaimUrl).Return(indexDelegation, nil) // Return index delegation instead of location
508+
509+
// fetch the third index's location claim (good provider)
510+
goodIndexLocationClaimUrl := testutil.Must(url.Parse(fmt.Sprintf("https://storacha.network/claims/%s", goodIndexLocationDelegationCid.String())))(t)
511+
mockClaimsService.EXPECT().Find(extmocks.AnyContext, goodIndexLocationDelegationCid, goodIndexLocationClaimUrl).Return(goodIndexLocationDelegation, nil)
512+
513+
// The first provider result will fail to build a retrieval URL (no blob endpoint)
514+
// The second provider result will fail at assert.Location.Match (wrong claim type)
515+
// The third provider result should succeed
516+
goodIndexBlobUrl := testutil.Must(url.Parse(fmt.Sprintf("https://storacha.network/blobs/%s", digestutil.Format(indexCid.Hash()))))(t)
517+
retrievalReq := types.NewRetrievalRequest(goodIndexBlobUrl, &metadata.Range{Length: &indexSize}, nil)
518+
mockBlobIndexLookup.EXPECT().Find(extmocks.AnyContext, types.EncodedContextID(goodIndexLocationProviderResult.ContextID), indexResult, retrievalReq).Return(index, nil)
519+
520+
service := NewIndexingService(testutil.Service, mockBlobIndexLookup, mockClaimsService, peer.AddrInfo{ID: testutil.RandomPeer(t)}, mockProviderIndex)
521+
522+
result, err := service.Query(t.Context(), types.Query{Hashes: []mh.Multihash{contentHash}})
523+
524+
// Should succeed despite the first two provider results failing
525+
require.NoError(t, err)
526+
require.NotNil(t, result)
527+
require.Len(t, result.Indexes(), 1, "should have successfully fetched the index from the third provider result")
528+
})
529+
530+
t.Run("returns error when all provider results are invalid (have incomplete addresses)", func(t *testing.T) {
531+
mockBlobIndexLookup := blobindexlookup.NewMockBlobIndexLookup(t)
532+
mockClaimsService := contentclaims.NewMockContentClaimsService(t)
533+
mockProviderIndex := providerindex.NewMockProviderIndex(t)
534+
535+
// Both providers have incomplete addresses (only claim endpoint, missing blob endpoint)
536+
badProviderAddr1 := &peer.AddrInfo{
537+
ID: testutil.RandomPeer(t),
538+
Addrs: []ma.Multiaddr{
539+
testutil.Must(ma.NewMultiaddr("/dns/indexer.storacha.network/https/http-path/claim%2F%7Bclaim%7D"))(t),
540+
},
541+
}
542+
543+
badProviderAddr2 := &peer.AddrInfo{
544+
ID: testutil.RandomPeer(t),
545+
Addrs: []ma.Multiaddr{
546+
testutil.Must(ma.NewMultiaddr("/dns/other.storacha.network/https/http-path/claim%2F%7Bclaim%7D"))(t),
547+
},
548+
}
549+
550+
contentLink := testutil.RandomCID(t)
551+
contentHash := contentLink.(cidlink.Link).Hash()
552+
space := testutil.RandomDID(t)
553+
554+
// content will have a location claim and an index claim
555+
locationDelegationCid, locationDelegation, locationProviderResult := buildTestLocationClaim(t, contentLink.(cidlink.Link), badProviderAddr1, space, rand.Uint64N(5000))
556+
indexDelegationCid, indexDelegation, indexResult, indexCid, _ := buildTestIndexClaim(t, contentLink.(cidlink.Link), badProviderAddr1)
557+
558+
contentResults := []model.ProviderResult{locationProviderResult, indexResult}
559+
560+
// expect a call to find records for content
561+
mockProviderIndex.EXPECT().Find(extmocks.AnyContext, providerindex.QueryKey{
562+
Hash: contentHash,
563+
TargetClaims: []multicodec.Code{metadata.EqualsClaimID, metadata.IndexClaimID, metadata.LocationCommitmentID},
564+
}).Return(contentResults, nil)
565+
566+
// the results for content should make the IndexingService ask for both claims
567+
locationClaimUrl := testutil.Must(url.Parse(fmt.Sprintf("https://indexer.storacha.network/claim/%s", locationDelegationCid.String())))(t)
568+
locationClaimUrl.Path = fmt.Sprintf("claim/%s", locationDelegationCid.String())
569+
mockClaimsService.EXPECT().Find(extmocks.AnyContext, locationDelegationCid, locationClaimUrl).Return(locationDelegation, nil)
570+
indexClaimUrl := testutil.Must(url.Parse(fmt.Sprintf("https://indexer.storacha.network/claim/%s", indexDelegationCid.String())))(t)
571+
indexClaimUrl.Path = fmt.Sprintf("claim/%s", indexDelegationCid.String())
572+
mockClaimsService.EXPECT().Find(extmocks.AnyContext, indexDelegationCid, indexClaimUrl).Return(indexDelegation, nil)
573+
574+
// then attempt to find records for the index - both have bad addresses
575+
indexSize := rand.Uint64N(5000)
576+
badIndexLocationDelegationCid1, badIndexLocationDelegation1, badIndexLocationProviderResult1 := buildTestLocationClaim(t, indexCid, badProviderAddr1, space, indexSize)
577+
badIndexLocationDelegationCid2, badIndexLocationDelegation2, badIndexLocationProviderResult2 := buildTestLocationClaim(t, indexCid, badProviderAddr2, space, indexSize)
578+
579+
mockProviderIndex.EXPECT().Find(extmocks.AnyContext, providerindex.QueryKey{
580+
Hash: indexCid.Hash(),
581+
TargetClaims: []multicodec.Code{metadata.LocationCommitmentID},
582+
}).Return([]model.ProviderResult{badIndexLocationProviderResult1, badIndexLocationProviderResult2}, nil)
583+
584+
// fetch both location claims (both bad providers)
585+
badIndexLocationClaimUrl1 := testutil.Must(url.Parse(fmt.Sprintf("https://indexer.storacha.network/claim/%s", badIndexLocationDelegationCid1.String())))(t)
586+
badIndexLocationClaimUrl1.Path = fmt.Sprintf("claim/%s", badIndexLocationDelegationCid1.String())
587+
mockClaimsService.EXPECT().Find(extmocks.AnyContext, badIndexLocationDelegationCid1, badIndexLocationClaimUrl1).Return(badIndexLocationDelegation1, nil)
588+
589+
badIndexLocationClaimUrl2 := testutil.Must(url.Parse(fmt.Sprintf("https://other.storacha.network/claim/%s", badIndexLocationDelegationCid2.String())))(t)
590+
badIndexLocationClaimUrl2.Path = fmt.Sprintf("claim/%s", badIndexLocationDelegationCid2.String())
591+
mockClaimsService.EXPECT().Find(extmocks.AnyContext, badIndexLocationDelegationCid2, badIndexLocationClaimUrl2).Return(badIndexLocationDelegation2, nil)
592+
593+
// Both provider results will fail to build a retrieval URL (no blob endpoint)
594+
// No mockBlobIndexLookup expectations because we never get that far
595+
596+
service := NewIndexingService(testutil.Service, mockBlobIndexLookup, mockClaimsService, peer.AddrInfo{ID: testutil.RandomPeer(t)}, mockProviderIndex)
597+
598+
_, err := service.Query(t.Context(), types.Query{Hashes: []mh.Multihash{contentHash}})
599+
600+
// Should fail because all provider results have incomplete addresses
601+
require.Error(t, err)
602+
require.Contains(t, err.Error(), "failed to fetch index from all provider results")
603+
})
425604
}
426605

427606
func buildTestLocationClaim(t *testing.T, contentLink cidlink.Link, providerAddr *peer.AddrInfo, space did.DID, size uint64) (cidlink.Link, delegation.Delegation, model.ProviderResult) {
607+
return buildTestLocationClaimWithShard(t, contentLink, providerAddr, space, size, &contentLink.Cid)
608+
}
609+
610+
func buildTestLocationClaimWithShard(t *testing.T, contentLink cidlink.Link, providerAddr *peer.AddrInfo, space did.DID, size uint64, shard *cid.Cid) (cidlink.Link, delegation.Delegation, model.ProviderResult) {
428611
locationClaim := cassert.Location.New(testutil.Alice.DID().String(), cassert.LocationCaveats{
429612
Content: ctypes.FromHash(contentLink.Hash()),
430613
Location: []url.URL{*testutil.Must(url.Parse("https://storacha.network"))(t)},
@@ -441,7 +624,7 @@ func buildTestLocationClaim(t *testing.T, contentLink cidlink.Link, providerAddr
441624
}.Sum(testutil.Must(io.ReadAll(delegation.Archive(locationDelegation)))(t)))(t)
442625

443626
locationMetadata := metadata.LocationCommitmentMetadata{
444-
Shard: &contentLink.Cid,
627+
Shard: shard,
445628
Claim: locationDelegationCid,
446629
Range: &metadata.Range{Length: &size},
447630
Expiration: time.Now().Add(time.Hour).Unix(),

0 commit comments

Comments
 (0)