Skip to content

Commit 993172e

Browse files
committed
steps 2 añadir debugs
1 parent 6c5f7db commit 993172e

File tree

2 files changed

+19
-61
lines changed

2 files changed

+19
-61
lines changed

curation-pipeline/src/components/steps/Step2GenomeTF.jsx

Lines changed: 16 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -58,32 +58,35 @@ async function fetchNuccoreTaxInfo(acc) {
5858
}
5959

6060
async function fetchTaxonomyLineageEx(taxid) {
61-
const url = `${ENTREZ_BASE}/esummary.fcgi?db=taxonomy&id=${encodeURIComponent(
61+
// efetch es el endpoint que devuelve LineageEx de forma consistente
62+
const url = `${ENTREZ_BASE}/efetch.fcgi?db=taxonomy&id=${encodeURIComponent(
6263
taxid
6364
)}&retmode=json`;
65+
6466
const j = await fetchJson(url, { isNcbi: true });
6567

66-
const node = j?.result?.[String(taxid)];
67-
if (!node) return null;
68+
// efetch devuelve algo tipo { result: [ { TaxId, ScientificName, Rank, LineageEx: [...] } ] }
69+
const rec = Array.isArray(j?.result) ? j.result[0] : null;
70+
if (!rec) return null;
6871

69-
const lineageEx = Array.isArray(node.lineageex) ? node.lineageex : [];
72+
const lineageEx = Array.isArray(rec.LineageEx) ? rec.LineageEx : [];
7073

74+
// LineageEx son los ancestros; añadimos el nodo final (leaf)
7175
const path = lineageEx.map((x) => ({
72-
taxid: x?.taxid ? String(x.taxid) : "",
73-
name: x?.scientificname || "",
74-
rank: x?.rank || "no rank",
76+
taxid: x?.TaxId ? String(x.TaxId) : "",
77+
name: x?.ScientificName || "",
78+
rank: x?.Rank || "no rank",
7579
}));
7680

77-
// Afegim el node final (taxid consultat) si no hi és
7881
const leaf = {
79-
taxid: String(taxid),
80-
name: node.scientificname || "",
81-
rank: node.rank || "no rank",
82+
taxid: rec.TaxId ? String(rec.TaxId) : String(taxid),
83+
name: rec.ScientificName || "",
84+
rank: rec.Rank || "no rank",
8285
};
8386

8487
if (!path.length || path[path.length - 1].taxid !== leaf.taxid) path.push(leaf);
8588

86-
// Neteja de duplicats i buits
89+
// limpiar duplicados/basura
8790
const seen = new Set();
8891
const cleaned = [];
8992
for (const n of path) {
@@ -92,7 +95,7 @@ async function fetchTaxonomyLineageEx(taxid) {
9295
cleaned.push(n);
9396
}
9497

95-
// El format que guardem ja porta el parent_taxonomy_id per facilitar inserts
98+
// devolver chain con parent_taxonomy_id encadenado
9699
return cleaned.map((n, i) => ({
97100
taxonomy_id: n.taxid,
98101
name: n.name,

curation-pipeline/src/components/steps/Step7CurationInfo.jsx

Lines changed: 3 additions & 48 deletions
Original file line numberDiff line numberDiff line change
@@ -360,63 +360,19 @@ WHERE NOT EXISTS (
360360
}
361361

362362
// Taxonomia + link a genome
363-
// Inserta la cadena completa (ancestros + hoja)
364-
// Pero la BD "indexa" hasta species: si el leaf es strain/subspecies, enlazamos el genoma a la species.
365-
366-
// DEBUG
367-
console.log("STEP7 taxonomyData keys", Object.keys(taxonomyData || {}));
368-
console.log("STEP7 byAcc keys", Object.keys(taxByAcc));
369-
370363
const taxByAcc = taxonomyData?.byAccession || {};
371364

372-
const rankScore = (r) => {
373-
const x = String(r || "no rank").toLowerCase();
374-
const order = {
375-
"no rank": 0,
376-
superkingdom: 1,
377-
kingdom: 2,
378-
phylum: 3,
379-
class: 4,
380-
order: 5,
381-
family: 6,
382-
genus: 7,
383-
species: 8,
384-
subspecies: 9,
385-
strain: 10,
386-
};
387-
return order[x] ?? 0;
388-
};
389-
390365
for (const acc of accessions) {
391-
392-
console.log("STEP7 for acc", acc, taxByAcc?.[acc]);
393-
394366
const tInfo = taxByAcc?.[acc];
395-
const chainRaw = Array.isArray(tInfo?.chain) ? tInfo.chain : [];
396-
if (!chainRaw.length) continue;
397-
398-
// Aseguramos orden de general -> específico.
399-
// Si viene al revés, invertimos (p.ej. empieza en strain y termina en phylum).
400-
const chainOrdered = [...chainRaw];
401-
if (chainOrdered.length >= 2) {
402-
const first = rankScore(chainOrdered[0]?.rank);
403-
const last = rankScore(chainOrdered[chainOrdered.length - 1]?.rank);
404-
if (first > last) chainOrdered.reverse();
405-
}
406-
407-
// Si NCBI devuelve strain/subspecies, recortamos la cadena para enlazar el genoma a species.
408-
let chain = chainOrdered;
409-
const idxSpecies = chainOrdered.findIndex((n) => String(n?.rank || "").toLowerCase() === "species");
410-
if (idxSpecies >= 0) chain = chainOrdered.slice(0, idxSpecies + 1);
367+
const chain = Array.isArray(tInfo?.chain) ? tInfo.chain : [];
368+
if (!chain.length) continue;
411369

412-
// Insert/Update de cada nodo y su parent_id (FK a core_taxonomy.id)
413370
for (let i = 0; i < chain.length; i++) {
414371
const node = chain[i];
415372
const taxid = String(node.taxonomy_id || "").trim();
416-
if (!taxid) continue;
417-
418373
const name = String(node.name || "").trim();
419374
const rank = String(node.rank || "no rank").trim();
375+
if (!taxid) continue;
420376

421377
const parentTaxid = i > 0 ? String(chain[i - 1].taxonomy_id || "").trim() : "";
422378
const parentIdExpr = parentTaxid
@@ -445,7 +401,6 @@ WHERE taxonomy_id='${esc(taxid)}';
445401
`.trim());
446402
}
447403

448-
// Enlazamos el genoma al leaf elegido (species si existe; si no, el último del chain)
449404
const leafTaxid = String(chain[chain.length - 1]?.taxonomy_id || "").trim();
450405
if (leafTaxid) {
451406
sql.push(`

0 commit comments

Comments
 (0)