@@ -34,6 +34,7 @@ import {
3434import { zerionFungiblesSchema } from './validators/fungible'
3535
3636import type { CoingeckoAssetDetails } from '@/lib/coingecko/types'
37+ import type { CoinGeckoMarketCap } from '@/lib/market-service/coingecko/coingecko-types'
3738import type { PartialFields } from '@/lib/types'
3839
3940// NOTE: this must call the zerion api directly rather than our proxy because of rate limiting requirements
@@ -48,11 +49,25 @@ axiosRetry(axiosInstance, { retries: 5, retryDelay: axiosRetry.exponentialDelay
4849const ZERION_API_KEY = process . env . ZERION_API_KEY
4950if ( ! ZERION_API_KEY ) throw new Error ( 'Missing Zerion API key - see readme for instructions' )
5051
52+ const REGEN_ALL = process . env . REGEN_ALL === 'true'
53+
5154const manualRelatedAssetIndex : Record < AssetId , AssetId [ ] > = {
5255 [ ethAssetId ] : [ optimismAssetId , arbitrumAssetId , arbitrumNovaAssetId , baseAssetId ] ,
5356 [ foxAssetId ] : [ foxOnArbitrumOneAssetId ] ,
5457}
5558
59+ // Category → Canonical Asset mapping for bridged tokens
60+ // Maps CoinGecko bridged categories to their Ethereum canonical tokens
61+ // Note: bridged-usdt includes USDT0 variants - they will be grouped together with ETH USDT as primary
62+ const BRIDGED_CATEGORY_MAPPINGS : Record < string , AssetId > = {
63+ 'bridged-usdc' : 'eip155:1/erc20:0xa0b86991c6218b36c1d19d4a2e9eb0ce3606eb48' , // ETH USDC
64+ 'bridged-usdt' : 'eip155:1/erc20:0xdac17f958d2ee523a2206206994597c13d831ec7' , // ETH USDT (includes USDT0)
65+ 'bridged-weth' : 'eip155:1/erc20:0xc02aaa39b223fe8d0a0e5c4f27ead9083c756cc2' , // ETH WETH
66+ 'bridged-wbtc' : 'eip155:1/erc20:0x2260fac5e5542a773aa44fbcfedf7c193bc2c599' , // ETH WBTC
67+ 'bridged-dai' : 'eip155:1/erc20:0x6b175474e89094c44da98b954eedeac495271d0f' , // ETH DAI
68+ 'bridged-wsteth' : 'eip155:1/erc20:0x7f39c581f595b53c5cb19bd0b3f8da6c935e2ca0' , // ETH wstETH
69+ }
70+
5671export const getManualRelatedAssetIds = (
5772 assetId : AssetId ,
5873) : { relatedAssetIds : AssetId [ ] ; relatedAssetKey : AssetId } | undefined => {
@@ -82,6 +97,30 @@ export const getManualRelatedAssetIds = (
8297const isSome = < T > ( option : T | null | undefined ) : option is T =>
8398 ! isUndefined ( option ) && ! isNull ( option )
8499
100+ // Pre-fetch bridged category mappings
101+ // Returns mapping of category → array of coin IDs in that category
102+ const fetchBridgedCategoryMappings = async ( ) : Promise < Record < string , string [ ] > > => {
103+ const categoryToCoinIds : Record < string , string [ ] > = { }
104+
105+ for ( const category of Object . keys ( BRIDGED_CATEGORY_MAPPINGS ) ) {
106+ const { data } = await axiosInstance . get < CoinGeckoMarketCap [ ] > (
107+ `${ coingeckoBaseUrl } /coins/markets` ,
108+ {
109+ params : {
110+ category,
111+ vs_currency : 'usd' ,
112+ per_page : 250 ,
113+ page : 1 ,
114+ } ,
115+ } ,
116+ )
117+
118+ categoryToCoinIds [ category ] = data . map ( coin => coin . id )
119+ }
120+
121+ return categoryToCoinIds
122+ }
123+
85124const chunkArray = < T > ( array : T [ ] , chunkSize : number ) => {
86125 const result = [ ]
87126 for ( let i = 0 ; i < array . length ; i += chunkSize ) {
@@ -91,8 +130,6 @@ const chunkArray = <T>(array: T[], chunkSize: number) => {
91130 return result
92131}
93132
94- const PLASMA_USDT0_ASSET_ID = 'eip155:9745/erc20:0xb8ce59fc3717ada4c02eadf9682a9e934f625ebb'
95-
96133const getZerionRelatedAssetIds = async (
97134 assetId : AssetId ,
98135 assetData : Record < AssetId , PartialFields < Asset , 'relatedAssetKey' > > ,
@@ -132,14 +169,13 @@ const getZerionRelatedAssetIds = async (
132169
133170 const implementations = firstEntry . attributes . implementations
134171
135- // Use all assetIds actually present in the dataset, excluding Plasma USDT0 (corrupt CoinGecko data)
172+ // Use all assetIds actually present in the dataset
136173 const allRelatedAssetIds = implementations
137174 ?. map ( zerionImplementationToMaybeAssetId )
138175 . filter ( isSome )
139176 . filter ( relatedAssetId => {
140177 return assetData [ relatedAssetId ] !== undefined
141178 } )
142- . filter ( relatedAssetId => relatedAssetId !== PLASMA_USDT0_ASSET_ID )
143179
144180 if ( ! allRelatedAssetIds || allRelatedAssetIds . length <= 1 ) {
145181 return
@@ -154,6 +190,7 @@ const getZerionRelatedAssetIds = async (
154190const getCoingeckoRelatedAssetIds = async (
155191 assetId : AssetId ,
156192 assetData : Record < AssetId , PartialFields < Asset , 'relatedAssetKey' > > ,
193+ categoryToCoinIds : Record < string , string [ ] > ,
157194) : Promise < { relatedAssetIds : AssetId [ ] ; relatedAssetKey : AssetId } | undefined > => {
158195 if ( ! isToken ( assetId ) ) return
159196 // Yes, this means effectively the same but double wrap never hurts
@@ -164,15 +201,57 @@ const getCoingeckoRelatedAssetIds = async (
164201 const { data } = await axios . get < CoingeckoAssetDetails > ( `${ coingeckoBaseUrl } /coins/${ coinUri } ` )
165202
166203 const platforms = data . platforms
204+ const coinId = data . id
167205
168- // Use all assetIds actually present in the dataset, excluding Plasma USDT0 (corrupt CoinGecko data)
169- const allRelatedAssetIds = Object . entries ( platforms )
206+ // Use all assetIds actually present in the dataset
207+ let allRelatedAssetIds = Object . entries ( platforms )
170208 ?. map ( coingeckoPlatformDetailsToMaybeAssetId )
171209 . filter ( isSome )
172210 . filter ( relatedAssetId => assetData [ relatedAssetId ] !== undefined )
173- . filter ( relatedAssetId => relatedAssetId !== PLASMA_USDT0_ASSET_ID )
211+
212+ // Determine canonical asset in THREE ways:
213+ let bridgedCanonical : AssetId | undefined
214+
215+ // 1. Check if THIS asset is an Ethereum canonical (e.g., processing ETH USDT itself)
216+ const ethereumCanonicals = Object . values ( BRIDGED_CATEGORY_MAPPINGS )
217+ if ( ethereumCanonicals . includes ( assetId ) ) {
218+ bridgedCanonical = assetId
219+ }
220+
221+ // 2. Check if this coin is in a bridged category (catches bridged variants with unique coin IDs)
222+ if ( ! bridgedCanonical ) {
223+ for ( const [ category , coinIds ] of Object . entries ( categoryToCoinIds ) ) {
224+ if ( coinIds . includes ( coinId ) ) {
225+ bridgedCanonical = BRIDGED_CATEGORY_MAPPINGS [ category ]
226+ break
227+ }
228+ }
229+ }
230+
231+ // 3. Check if platforms list contains an Ethereum canonical (catches shared coin IDs like USDC/USDT)
232+ // CoinGecko uses the same coin ID for native USDC/USDT across multiple chains
233+ if ( ! bridgedCanonical ) {
234+ for ( const canonical of ethereumCanonicals ) {
235+ if ( allRelatedAssetIds . includes ( canonical ) ) {
236+ bridgedCanonical = canonical
237+ break
238+ }
239+ }
240+ }
241+
242+ // Add canonical FIRST to ensure it becomes the primary (relatedAssetKey)
243+ // This fixes the first-come-first-served issue where non-canonical assets became primaries
244+ if ( bridgedCanonical && assetData [ bridgedCanonical ] ) {
245+ allRelatedAssetIds . unshift ( bridgedCanonical )
246+ // Remove duplicates while preserving order
247+ allRelatedAssetIds = Array . from ( new Set ( allRelatedAssetIds ) )
248+ }
174249
175250 if ( allRelatedAssetIds . length <= 1 ) {
251+ // Still return canonical even if no other assets yet (fixes Zerion override for WBTC/WETH/WSTETH)
252+ if ( bridgedCanonical ) {
253+ return { relatedAssetIds : [ ] , relatedAssetKey : bridgedCanonical }
254+ }
176255 return
177256 }
178257
@@ -190,38 +269,37 @@ const processRelatedAssetIds = async (
190269 assetId : AssetId ,
191270 assetData : Record < AssetId , PartialFields < Asset , 'relatedAssetKey' > > ,
192271 relatedAssetIndex : Record < AssetId , AssetId [ ] > ,
272+ categoryToCoinIds : Record < string , string [ ] > ,
193273 throttle : ( ) => Promise < void > ,
194274) : Promise < void > => {
195- // Skip related asset generation for Plasma usdt0 - Coingecko has corrupt data claiming
196- // it shares the same Arbitrum/Polygon contracts as real USDT, which corrupts groupings
197- if ( assetId === PLASMA_USDT0_ASSET_ID ) {
198- assetData [ assetId ] . relatedAssetKey = null
199- await throttle ( )
200- return
201- }
202-
203275 const existingRelatedAssetKey = assetData [ assetId ] . relatedAssetKey
204276
205- if ( existingRelatedAssetKey ) {
277+ if ( ! REGEN_ALL && existingRelatedAssetKey ) {
206278 return
207279 }
208280
209281 console . log ( `Processing related assetIds for ${ assetId } ` )
210282
211283 // Check if this asset is already in the relatedAssetIndex
212- for ( const [ key , relatedAssets ] of Object . entries ( relatedAssetIndex ) ) {
213- if ( relatedAssets . includes ( assetId ) ) {
214- if ( existingRelatedAssetKey !== key ) {
215- console . log (
216- `Updating relatedAssetKey for ${ assetId } from ${ existingRelatedAssetKey } to ${ key } ` ,
217- )
218- assetData [ assetId ] . relatedAssetKey = key
284+ if ( ! REGEN_ALL ) {
285+ for ( const [ key , relatedAssets ] of Object . entries ( relatedAssetIndex ) ) {
286+ if ( relatedAssets . includes ( assetId ) ) {
287+ if ( existingRelatedAssetKey !== key ) {
288+ console . log (
289+ `Updating relatedAssetKey for ${ assetId } from ${ existingRelatedAssetKey } to ${ key } ` ,
290+ )
291+ assetData [ assetId ] . relatedAssetKey = key
292+ }
293+ return // Early return - asset already processed and grouped
219294 }
220- return // Early return - asset already processed and grouped
221295 }
222296 }
223297
224- const coingeckoRelatedAssetsResult = await getCoingeckoRelatedAssetIds ( assetId , assetData )
298+ const coingeckoRelatedAssetsResult = await getCoingeckoRelatedAssetIds (
299+ assetId ,
300+ assetData ,
301+ categoryToCoinIds ,
302+ )
225303 . then ( result => {
226304 happyCount ++
227305 return result
@@ -256,10 +334,19 @@ const processRelatedAssetIds = async (
256334 relatedAssetIds : [ ] ,
257335 }
258336
337+ // Prioritize CoinGecko if it detected an Ethereum canonical (via our three-way check)
338+ // This prevents Zerion from overriding our canonical detection
339+ const ethereumCanonicals = Object . values ( BRIDGED_CATEGORY_MAPPINGS )
340+ const coingeckoDetectedCanonical =
341+ coingeckoRelatedAssetsResult ?. relatedAssetKey &&
342+ ethereumCanonicals . includes ( coingeckoRelatedAssetsResult . relatedAssetKey )
343+
259344 let relatedAssetKey =
260345 manualRelatedAssetsResult ?. relatedAssetKey ||
261- zerionRelatedAssetsResult ?. relatedAssetKey ||
262- coingeckoRelatedAssetsResult ?. relatedAssetKey ||
346+ ( coingeckoDetectedCanonical
347+ ? coingeckoRelatedAssetsResult ?. relatedAssetKey
348+ : zerionRelatedAssetsResult ?. relatedAssetKey ||
349+ coingeckoRelatedAssetsResult ?. relatedAssetKey ) ||
263350 assetId
264351
265352 // If the relatedAssetKey itself points to another key, follow the chain to find the actual key
@@ -269,13 +356,6 @@ const processRelatedAssetIds = async (
269356 relatedAssetKey = relatedAssetKeyData
270357 }
271358
272- // If the relatedAssetKey is Plasma USDT0, reject this entire grouping
273- if ( relatedAssetKey === PLASMA_USDT0_ASSET_ID ) {
274- assetData [ assetId ] . relatedAssetKey = null
275- await throttle ( )
276- return
277- }
278-
279359 const zerionRelatedAssetIds = zerionRelatedAssetsResult ?. relatedAssetIds ?? [ ]
280360 const coingeckoRelatedAssetIds = coingeckoRelatedAssetsResult ?. relatedAssetIds ?? [ ]
281361
@@ -286,10 +366,30 @@ const processRelatedAssetIds = async (
286366 ...coingeckoRelatedAssetIds ,
287367 assetId ,
288368 ] ) ,
289- ) . filter ( id => id !== PLASMA_USDT0_ASSET_ID ) // Filter out Plasma USDT0 from final merged array
369+ )
370+
371+ // First-come-first-served conflict detection
372+ // Filters out assets already claimed by a different group to prevent cross-contamination
373+ const cleanedRelatedAssetIds = mergedRelatedAssetIds . filter ( candidateAssetId => {
374+ const existingKey = assetData [ candidateAssetId ] ?. relatedAssetKey
375+
376+ // Asset has no group yet, or is already in the current group - OK to include
377+ if ( ! existingKey || existingKey === relatedAssetKey ) {
378+ return true
379+ }
380+
381+ // Asset already belongs to a different group - reject to prevent stealing
382+ console . warn (
383+ `[Related Asset Conflict] Asset ${ candidateAssetId } already belongs to group ${ existingKey } , ` +
384+ `refusing to add to ${ relatedAssetKey } . ` +
385+ `This asset was claimed by a higher market cap token that processed first. ` +
386+ `Upstream data provider (CoinGecko/Zerion) may have data quality issues.` ,
387+ )
388+ return false
389+ } )
290390
291391 // Has zerion-provided related assets, or manually added ones
292- const hasRelatedAssets = mergedRelatedAssetIds . length > 1
392+ const hasRelatedAssets = cleanedRelatedAssetIds . length > 1
293393
294394 if ( hasRelatedAssets ) {
295395 // Check if this exact group already exists in the index (can happen with parallel processing)
@@ -300,7 +400,7 @@ const processRelatedAssetIds = async (
300400 // Merge with existing group instead of replacing it
301401 const currentGroup = relatedAssetIndex [ relatedAssetKey ] || [ ]
302402 relatedAssetIndex [ relatedAssetKey ] = Array . from (
303- new Set ( [ ...currentGroup , ...mergedRelatedAssetIds ] ) ,
403+ new Set ( [ ...currentGroup , ...cleanedRelatedAssetIds ] ) ,
304404 )
305405 }
306406
@@ -330,24 +430,20 @@ export const generateRelatedAssetIndex = async () => {
330430 )
331431
332432 const { assetData : generatedAssetData , sortedAssetIds } = decodeAssetData ( encodedAssetData )
333- const relatedAssetIndex = decodeRelatedAssetIndex ( encodedRelatedAssetIndex , sortedAssetIds )
433+ const relatedAssetIndex = REGEN_ALL
434+ ? { }
435+ : decodeRelatedAssetIndex ( encodedRelatedAssetIndex , sortedAssetIds )
334436
335- // Remove stale related asset data from the assetData where:
336- // a) the primary related asset no longer exists in the dataset
337- // b) the related asset key is Plasma usdt0 (corrupt Coingecko data)
437+ // Remove stale related asset data from the assetData where the primary related asset no longer exists
338438 Object . values ( generatedAssetData ) . forEach ( asset => {
339439 const relatedAssetKey = asset . relatedAssetKey
340440
341441 if ( ! relatedAssetKey ) return
342442
343443 const primaryRelatedAsset = generatedAssetData [ relatedAssetKey ]
344444
345- // Clear Plasma usdt0 related asset key - Coingecko has corrupt data for this token
346- const isPlasmaUsdt0 =
347- relatedAssetKey === 'eip155:9745/erc20:0xb8ce59fc3717ada4c02eadf9682a9e934f625ebb'
348-
349445 // remove relatedAssetKey from the existing data to ensure the related assets get updated
350- if ( primaryRelatedAsset === undefined || isPlasmaUsdt0 ) {
446+ if ( primaryRelatedAsset === undefined ) {
351447 delete relatedAssetIndex [ relatedAssetKey ]
352448 delete asset . relatedAssetKey
353449 }
@@ -361,6 +457,8 @@ export const generateRelatedAssetIndex = async () => {
361457 )
362458 } )
363459
460+ const categoryToCoinIds = await fetchBridgedCategoryMappings ( )
461+
364462 const { throttle, clear : clearThrottleInterval } = createThrottle ( {
365463 capacity : 50 , // Reduced initial capacity to allow for a burst but not too high
366464 costPerReq : 1 , // Keeping the cost per request as 1 for simplicity
@@ -373,7 +471,13 @@ export const generateRelatedAssetIndex = async () => {
373471 console . log ( `Processing chunk: ${ i } of ${ chunks . length } ` )
374472 await Promise . all (
375473 batch . map ( async assetId => {
376- await processRelatedAssetIds ( assetId , generatedAssetData , relatedAssetIndex , throttle )
474+ await processRelatedAssetIds (
475+ assetId ,
476+ generatedAssetData ,
477+ relatedAssetIndex ,
478+ categoryToCoinIds ,
479+ throttle ,
480+ )
377481 return
378482 } ) ,
379483 )
0 commit comments