-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathinterpretation-policy.json
More file actions
202 lines (202 loc) · 6.85 KB
/
Copy pathinterpretation-policy.json
File metadata and controls
202 lines (202 loc) · 6.85 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
{
"schemaVersion": "1.1",
"targetStages": [
"discover_interpretation",
"biological_interpretation",
"validate_interpretation"
],
"claimBoundary": {
"associativeOnly": [
"Report pathway/gene-set enrichment as statistical associations only",
"Treat disease-gene overlays as suggestive, never diagnostic"
],
"requiresEvidence": [
"Any claim of pathway involvement must cite the enrichment method, database version, and FDR-adjusted p-value",
"Disease-relevance language must cite the disease-gene panel source"
]
},
"verifiableEntities": {
"enabled": true,
"entityNamePatterns": [
"[A-Z][A-Z0-9]{1,}",
"GO:\\d{7}",
"R-HSA-\\d+",
"R-MMU-\\d+",
"R-RNO-\\d+",
"R-DRE-\\d+",
"(?:hsa|mmu|rno|dre|cel|sce|dme|ath)\\d{5}",
"K\\d{5}",
"WP\\d+",
"MP:\\d{7}",
"DOID:\\d+",
"EFO_\\d+",
"HP:\\d{7}",
"REACTOME:R-[A-Z]{3}-\\d+",
"FB(?:gn|tr|pp)\\d{7}",
"ENS[A-Z]{0,4}[GTP]\\d{6,}",
"WBGene\\d{8}",
"ZDB-GENE-\\d+-\\d+",
"AT[1-5MC]G\\d{5}",
"Y[A-P][LR]\\d{3}[WC]"
],
"entityNameExcludePatterns": [
"^USA$", "^WHO$", "^CDC$", "^BBC$", "^EU$", "^US$", "^UK$",
"^PCR$", "^QPCR$", "^DNA$", "^RNA$", "^ATP$", "^ADP$", "^AMP$",
"^NGS$", "^ELISA$", "^SDS$", "^PAGE$", "^BSA$", "^HRP$",
"^FDA$", "^EMA$", "^ISO$", "^GMP$",
"^MS$", "^MS2$", "^MS3$",
"^IVD$", "^IVT$",
"^FDR$", "^FC$", "^DE$", "^DEG$", "^DEGS$", "^LFC$",
"^GSEA$", "^GO$", "^KEGG$", "^MSIGDB$", "^ORA$", "^FGSEA$",
"^TPM$", "^CPM$", "^RPKM$", "^FPKM$", "^TMM$", "^VST$", "^RLE$",
"^UMI$", "^PCA$", "^UMAP$", "^TSNE$", "^QC$",
"^SME$", "^FASTQ$", "^BH$", "^SWFC$", "^ECAA$", "^SAP$",
"^ITT$", "^MMRM$", "^GEO$", "^SRA$", "^ENA$", "^BAM$", "^SAM$",
"^VCF$", "^GFF$", "^GTF$", "^BED$", "^CSV$", "^TSV$", "^JSON$",
"^ES$", "^SE$", "^CI$", "^SD$", "^IQR$", "^AUC$", "^ROC$"
],
"directionVocab": {
"up": [
"upregulated",
"up-regulated",
"increased",
"elevated",
"enriched",
"induced",
"higher",
"activated"
],
"down": [
"downregulated",
"down-regulated",
"decreased",
"reduced",
"depleted",
"suppressed",
"lower",
"repressed",
"inhibited"
]
},
"effectSizeColumns": [
"log2FC",
"logFC",
"logFoldChange",
"log2FoldChange",
"log2fc",
"effect_size",
"estimate",
"nes",
"NES",
"es"
],
"entityColumns": [
"gene",
"gene_name",
"symbol",
"feature",
"entity",
"name",
"term",
"gene_set_id",
"pathway",
"pathway_id",
"id",
"cell",
"cell_id",
"barcode",
"sample",
"sample_id",
"taxon",
"taxon_id",
"taxon_name",
"otu",
"otu_id",
"asv",
"region",
"peak",
"peak_id",
"cpg",
"cpg_id",
"site",
"probe",
"probe_id",
"variant",
"snp",
"rsid",
"locus",
"accession",
"protein",
"protein_id",
"uniprot",
"transcript",
"transcript_id"
],
"pvalueColumns": [
"pvalue",
"p_value",
"pval",
"padj",
"adj.P.Val",
"FDR",
"q_value",
"adj_p_value",
"adj_pvalue",
"fdr_q_value",
"qvalue",
"adjusted_pvalue",
"p_adj"
],
"tolerance": {
"log2FcAbsoluteDelta": 0.05,
"pvalueRelativeDelta": 0.1
},
"literatureGrounding": {
"minPapers": 2,
"minSources": 1
}
},
"validationContract": {
"requiredOutputs": [],
"metrics": []
},
"evidenceRules": [
{
"id": "adjusted_p_value_required",
"description": "Any significance claim about a gene/feature must cite an FDR-adjusted p-value column (padj, FDR, q_value, adj.P.Val). Raw p-values without multiple-testing correction are not sufficient for differential-expression assertions.",
"appliesTo": ["differentially_expressed", "upregulated", "downregulated", "enriched", "depleted"],
"severity": "blocking"
},
{
"id": "effect_size_sign_consistency",
"description": "When a narrative declares an entity 'upregulated' / 'increased' / 'higher', the cited effect-size column (log2FC, logFC, effect_size) must be positive on the row matching the entity. The mirror rule applies for downregulation. Sign mismatches surface as ClaimVerificationReport mismatches.",
"appliesTo": ["upregulated", "downregulated", "increased", "decreased", "elevated", "reduced"],
"severity": "blocking"
},
{
"id": "direction_vocab_match",
"description": "Direction-language vocabulary in narrative prose must match one of the configured directionVocab terms (up / down sets). Free-form synonyms outside the vocab are flagged so the SME can either extend the vocab or rephrase the claim.",
"appliesTo": ["all_directional_claims"],
"severity": "warning"
},
{
"id": "pathway_claim_requires_method_and_db_version",
"description": "Pathway / gene-set enrichment claims must cite the enrichment method (e.g. fgsea, gprofiler, gseapy) and the database version (e.g. Reactome 2025-09, GO 2025-Q3). The claimBoundary.requiresEvidence rule drives this — narrative without method+version cite is downgraded to an associative-only restatement.",
"appliesTo": ["pathway_enriched", "pathway_depleted", "gene_set_enriched"],
"severity": "blocking"
},
{
"id": "literature_concordance_verified",
"description": "Any claim that an analytical finding is concordant with / consistent with / replicates prior literature must cite at least minPapers distinct PMIDs that the claims_evidence_matrix.csv records as verified supporting rows, drawn from at least minSources distinct source kinds. A cited PMID absent from the matrix, or a matrix row flagged opposite_direction, surfaces as a ClaimVerificationReport mismatch.",
"appliesTo": ["concordant_with_literature", "consistent_with_prior", "replicates_prior", "corroborates_prior"],
"severity": "blocking"
},
{
"id": "literature_support_requires_pmid",
"description": "A literature-grounded support claim must anchor to one or more PMID citations; concordance / prior-work language without a PMID citation cannot be cross-checked against the claims_evidence_matrix.csv prior-work rows and is downgraded to an unverifiable assertion.",
"appliesTo": ["concordant_with_literature", "consistent_with_prior", "replicates_prior", "corroborates_prior"],
"severity": "blocking"
}
],
"note": "schemaVersion 1.1 adds verifiableEntities: a policy block consumed by the claim verifier to check that narrative reports cite values matching the committed result tables."
}