|
| 1 | +{ |
| 2 | + "schema_version": "1.0.0", |
| 3 | + "epic_issue": 257, |
| 4 | + "contract_issue": 258, |
| 5 | + "probe_issue": 259, |
| 6 | + "sample_window": { |
| 7 | + "min_examples": 10000, |
| 8 | + "max_examples": 50000 |
| 9 | + }, |
| 10 | + "source_probe": { |
| 11 | + "dataset": "songlab/TraitGym", |
| 12 | + "config": "complex_traits", |
| 13 | + "split": "test", |
| 14 | + "holdout_chromosomes": [ |
| 15 | + "3", |
| 16 | + "11", |
| 17 | + "14", |
| 18 | + "16", |
| 19 | + "17", |
| 20 | + "20" |
| 21 | + ], |
| 22 | + "bootstrap_samples": 200, |
| 23 | + "bootstrap_seed": 257, |
| 24 | + "score_columns": [ |
| 25 | + { |
| 26 | + "id": "maf_source_only", |
| 27 | + "column": "maf", |
| 28 | + "direction": "positive", |
| 29 | + "description": "Minor-allele-frequency metadata score." |
| 30 | + }, |
| 31 | + { |
| 32 | + "id": "ld_score_source_only", |
| 33 | + "column": "ld_score", |
| 34 | + "direction": "positive", |
| 35 | + "description": "LD-score metadata score." |
| 36 | + }, |
| 37 | + { |
| 38 | + "id": "tss_distance_source_only", |
| 39 | + "column": "tss_dist", |
| 40 | + "direction": "negative_abs", |
| 41 | + "description": "Distance-to-TSS metadata score, closer variants ranked higher." |
| 42 | + } |
| 43 | + ] |
| 44 | + }, |
| 45 | + "candidate_sources": [ |
| 46 | + { |
| 47 | + "id": "traitgym_complex_traits", |
| 48 | + "name": "TraitGym complex_traits", |
| 49 | + "url": "https://github.com/songlab-cal/TraitGym", |
| 50 | + "dataset_ref": "songlab/TraitGym:complex_traits:test", |
| 51 | + "public_labels": true, |
| 52 | + "observed_examples": 11400, |
| 53 | + "within_probe_scale": true, |
| 54 | + "teacher_delta_cache": "missing", |
| 55 | + "admissible_for_fx_probe": false, |
| 56 | + "blocking_reasons": [ |
| 57 | + "Variant labels are public and load through Hugging Face datasets, but the source does not provide a GenoLeWM-FX ref/alt teacher-delta cache.", |
| 58 | + "The loaded columns expose variant metadata and binary labels, not signed teacher residuals or teacher feature tensors." |
| 59 | + ] |
| 60 | + }, |
| 61 | + { |
| 62 | + "id": "traitgym_mendelian_traits", |
| 63 | + "name": "TraitGym mendelian_traits", |
| 64 | + "url": "https://github.com/songlab-cal/TraitGym", |
| 65 | + "dataset_ref": "songlab/TraitGym:mendelian_traits:test", |
| 66 | + "public_labels": true, |
| 67 | + "observed_examples": 3380, |
| 68 | + "within_probe_scale": false, |
| 69 | + "teacher_delta_cache": "missing", |
| 70 | + "admissible_for_fx_probe": false, |
| 71 | + "blocking_reasons": [ |
| 72 | + "The public split is below the 10k cheap-probe floor.", |
| 73 | + "The source does not provide ref/alt teacher deltas or teacher feature tensors." |
| 74 | + ] |
| 75 | + }, |
| 76 | + { |
| 77 | + "id": "mave_mpra_qtl_candidate_slices", |
| 78 | + "name": "MAVE, MPRA, and QTL candidate slices", |
| 79 | + "url": "https://www.mavedb.org/", |
| 80 | + "dataset_ref": "not_locked", |
| 81 | + "public_labels": "mixed", |
| 82 | + "observed_examples": null, |
| 83 | + "within_probe_scale": false, |
| 84 | + "teacher_delta_cache": "missing", |
| 85 | + "admissible_for_fx_probe": false, |
| 86 | + "blocking_reasons": [ |
| 87 | + "No exact assay, genome build, windowing rule, split manifest, and teacher-cache manifest is locked in this repo.", |
| 88 | + "Using these sources now would require new curation before the probe can be reproduced." |
| 89 | + ] |
| 90 | + } |
| 91 | + ], |
| 92 | + "candidate_teachers": [ |
| 93 | + { |
| 94 | + "id": "alphagenome_api", |
| 95 | + "name": "AlphaGenome API", |
| 96 | + "url": "https://github.com/google-deepmind/alphagenome", |
| 97 | + "role_allowed_by_contract": "sampled calibration/API baseline", |
| 98 | + "bulk_training_admissible": false, |
| 99 | + "local_runtime_available": false, |
| 100 | + "blocking_reasons": [ |
| 101 | + "The contract permits AlphaGenome only as a bounded sampled calibration/API baseline unless terms and rates support larger use.", |
| 102 | + "The local run has no AlphaGenome API key or package, and the public API is not a checksum-addressed 10k-50k teacher cache." |
| 103 | + ] |
| 104 | + }, |
| 105 | + { |
| 106 | + "id": "alphagenome_weights", |
| 107 | + "name": "AlphaGenome released weights", |
| 108 | + "url": "https://github.com/google-deepmind/alphagenome_research", |
| 109 | + "role_allowed_by_contract": "future heavyweight local teacher if terms, hardware, and artifacts are locked", |
| 110 | + "bulk_training_admissible": false, |
| 111 | + "local_runtime_available": false, |
| 112 | + "blocking_reasons": [ |
| 113 | + "Weights require accepting model terms and the upstream docs recommend H100-class hardware for model inference.", |
| 114 | + "No accepted-terms receipt, model revision, hardware receipt, or ref/alt teacher-cache artifact is available in this repo." |
| 115 | + ] |
| 116 | + }, |
| 117 | + { |
| 118 | + "id": "borzoi", |
| 119 | + "name": "Borzoi", |
| 120 | + "url": "https://github.com/calico/borzoi", |
| 121 | + "role_allowed_by_contract": "candidate local teacher", |
| 122 | + "bulk_training_admissible": false, |
| 123 | + "local_runtime_available": false, |
| 124 | + "blocking_reasons": [ |
| 125 | + "The model is public, but the local run does not have the TensorFlow teacher stack or downloaded model weights.", |
| 126 | + "The upstream large training data is requester-pays; no compact public ref/alt cache is locked for GenoLeWM-FX." |
| 127 | + ] |
| 128 | + }, |
| 129 | + { |
| 130 | + "id": "enformer", |
| 131 | + "name": "Enformer", |
| 132 | + "url": "https://www.nature.com/articles/s41592-021-01252-x", |
| 133 | + "role_allowed_by_contract": "candidate local teacher", |
| 134 | + "bulk_training_admissible": false, |
| 135 | + "local_runtime_available": false, |
| 136 | + "blocking_reasons": [ |
| 137 | + "No TensorFlow teacher stack, model revision, input window manifest, or ref/alt output cache is present in this repo.", |
| 138 | + "Using Enformer now would make the probe depend on ad hoc setup rather than a public GenoLeWM-FX cache manifest." |
| 139 | + ] |
| 140 | + }, |
| 141 | + { |
| 142 | + "id": "chrombpnet", |
| 143 | + "name": "ChromBPNet", |
| 144 | + "url": "https://github.com/kundajelab/chrombpnet", |
| 145 | + "role_allowed_by_contract": "candidate task-specific teacher", |
| 146 | + "bulk_training_admissible": false, |
| 147 | + "local_runtime_available": false, |
| 148 | + "blocking_reasons": [ |
| 149 | + "The repository provides model/training tooling, but no selected cell-type model, peaks, genome, or ref/alt cache is locked here.", |
| 150 | + "No GPU/Docker teacher runtime receipt or checksum-addressed GenoLeWM-FX cache is present." |
| 151 | + ] |
| 152 | + } |
| 153 | + ], |
| 154 | + "required_baselines": [ |
| 155 | + "zero-delta/no-edit", |
| 156 | + "source-only/label-prior", |
| 157 | + "Carbon likelihood/log-odds where applicable", |
| 158 | + "direct teacher ref-alt score", |
| 159 | + "linear/logistic probe on teacher features", |
| 160 | + "available public model predictions when feasible" |
| 161 | + ], |
| 162 | + "recommended_issue_actions": [ |
| 163 | + { |
| 164 | + "issue": 258, |
| 165 | + "action": "close-completed", |
| 166 | + "reason": "The source-controlled contract is locked in docs/research/fx-experiment-contract.md." |
| 167 | + }, |
| 168 | + { |
| 169 | + "issue": 259, |
| 170 | + "action": "close-kill", |
| 171 | + "reason": "The feasibility gate found no reproducible 10k-50k public teacher-delta slice under the contract." |
| 172 | + }, |
| 173 | + { |
| 174 | + "issue": 260, |
| 175 | + "action": "close-not-planned", |
| 176 | + "reason": "Teacher adapters and caches should not be built until a source/teacher pair passes the feasibility gate." |
| 177 | + }, |
| 178 | + { |
| 179 | + "issue": 261, |
| 180 | + "action": "close-not-planned", |
| 181 | + "reason": "The residual model path should not be implemented without a selected target and teacher cache." |
| 182 | + }, |
| 183 | + { |
| 184 | + "issue": 262, |
| 185 | + "action": "close-not-planned", |
| 186 | + "reason": "No staged Hugging Face jobs should launch without the #259 go decision." |
| 187 | + }, |
| 188 | + { |
| 189 | + "issue": 263, |
| 190 | + "action": "close-not-planned", |
| 191 | + "reason": "The locked benchmark gate is unreachable because the experiment stops at feasibility." |
| 192 | + }, |
| 193 | + { |
| 194 | + "issue": 264, |
| 195 | + "action": "close-completed", |
| 196 | + "reason": "The public output is the kill report, not a model artifact release." |
| 197 | + }, |
| 198 | + { |
| 199 | + "issue": 265, |
| 200 | + "action": "close-completed", |
| 201 | + "reason": "The final paper/demo decision is no FX paper update and no demo because evidence is insufficient." |
| 202 | + } |
| 203 | + ] |
| 204 | +} |
0 commit comments