From 44c1d1ab125a19cb93e02bb3c73d89f965b617d7 Mon Sep 17 00:00:00 2001 From: Josh Horton Date: Wed, 18 Mar 2026 13:08:06 +0000 Subject: [PATCH 01/24] Add results from the openmm840 qa --- .../computational_results.json | 4934 +++++++++++++++++ .../submission.yaml | 32 + 2 files changed, 4966 insertions(+) create mode 100644 openfe_benchmarks/results/2026-03-18-openmm-840-qa-testing/computational_results.json create mode 100644 openfe_benchmarks/results/2026-03-18-openmm-840-qa-testing/submission.yaml diff --git a/openfe_benchmarks/results/2026-03-18-openmm-840-qa-testing/computational_results.json b/openfe_benchmarks/results/2026-03-18-openmm-840-qa-testing/computational_results.json new file mode 100644 index 0000000..fa5fc58 --- /dev/null +++ b/openfe_benchmarks/results/2026-03-18-openmm-840-qa-testing/computational_results.json @@ -0,0 +1,4934 @@ +{ + "DG": [ + { + "ligand": "ejm_50", + "DG": { + "magnitude": 0.29126535165543466, + "unit": "kilocalories_per_mole", + ":is_custom:": true, + "pint_unit_registry": "openff_units" + }, + "DG_uncertainty": { + "magnitude": 0.09917235228482976, + "unit": "kilocalories_per_mole", + ":is_custom:": true, + "pint_unit_registry": "openff_units" + }, + "system_group": "jacs_set", + "system_name": "tyk2", + "source": "MLE" + }, + { + "ligand": "ejm_42", + "DG": { + "magnitude": 0.19152705913028711, + "unit": "kilocalories_per_mole", + ":is_custom:": true, + "pint_unit_registry": "openff_units" + }, + "DG_uncertainty": { + "magnitude": 0.09494870673093252, + "unit": "kilocalories_per_mole", + ":is_custom:": true, + "pint_unit_registry": "openff_units" + }, + "system_group": "jacs_set", + "system_name": "tyk2", + "source": "MLE" + }, + { + "ligand": "ejm_48", + "DG": { + "magnitude": 0.6607629762131684, + "unit": "kilocalories_per_mole", + ":is_custom:": true, + "pint_unit_registry": "openff_units" + }, + "DG_uncertainty": { + "magnitude": 0.11518369119192944, + "unit": "kilocalories_per_mole", + ":is_custom:": true, + "pint_unit_registry": "openff_units" + }, + "system_group": "jacs_set", + "system_name": "tyk2", + "source": "MLE" + }, + { + "ligand": "ejm_55", + "DG": { + "magnitude": -0.5926799441635051, + "unit": "kilocalories_per_mole", + ":is_custom:": true, + "pint_unit_registry": "openff_units" + }, + "DG_uncertainty": { + "magnitude": 0.15362099228783124, + "unit": "kilocalories_per_mole", + ":is_custom:": true, + "pint_unit_registry": "openff_units" + }, + "system_group": "jacs_set", + "system_name": "tyk2", + "source": "MLE" + }, + { + "ligand": "ejm_54", + "DG": { + "magnitude": 0.17393058577592413, + "unit": "kilocalories_per_mole", + ":is_custom:": true, + "pint_unit_registry": "openff_units" + }, + "DG_uncertainty": { + "magnitude": 0.163023542671173, + "unit": "kilocalories_per_mole", + ":is_custom:": true, + "pint_unit_registry": "openff_units" + }, + "system_group": "jacs_set", + "system_name": "tyk2", + "source": "MLE" + }, + { + "ligand": "jmc_23", + "DG": { + "magnitude": -1.2246516284257254, + "unit": "kilocalories_per_mole", + ":is_custom:": true, + "pint_unit_registry": "openff_units" + }, + "DG_uncertainty": { + "magnitude": 0.0876729477724904, + "unit": "kilocalories_per_mole", + ":is_custom:": true, + "pint_unit_registry": "openff_units" + }, + "system_group": "jacs_set", + "system_name": "tyk2", + "source": "MLE" + }, + { + "ligand": "jmc_28", + "DG": { + "magnitude": -0.9062639493216924, + "unit": "kilocalories_per_mole", + ":is_custom:": true, + "pint_unit_registry": "openff_units" + }, + "DG_uncertainty": { + "magnitude": 0.10269441383959206, + "unit": "kilocalories_per_mole", + ":is_custom:": true, + "pint_unit_registry": "openff_units" + }, + "system_group": "jacs_set", + "system_name": "tyk2", + "source": "MLE" + }, + { + "ligand": "ejm_31", + "DG": { + "magnitude": -0.17096192538386168, + "unit": "kilocalories_per_mole", + ":is_custom:": true, + "pint_unit_registry": "openff_units" + }, + "DG_uncertainty": { + "magnitude": 0.054938658795406575, + "unit": "kilocalories_per_mole", + ":is_custom:": true, + "pint_unit_registry": "openff_units" + }, + "system_group": "jacs_set", + "system_name": "tyk2", + "source": "MLE" + }, + { + "ligand": "jmc_27", + "DG": { + "magnitude": -1.4147688972765025, + "unit": "kilocalories_per_mole", + ":is_custom:": true, + "pint_unit_registry": "openff_units" + }, + "DG_uncertainty": { + "magnitude": 0.08856330806651277, + "unit": "kilocalories_per_mole", + ":is_custom:": true, + "pint_unit_registry": "openff_units" + }, + "system_group": "jacs_set", + "system_name": "tyk2", + "source": "MLE" + }, + { + "ligand": "jmc_30", + "DG": { + "magnitude": -1.8757626097098932, + "unit": "kilocalories_per_mole", + ":is_custom:": true, + "pint_unit_registry": "openff_units" + }, + "DG_uncertainty": { + "magnitude": 0.07812592097861754, + "unit": "kilocalories_per_mole", + ":is_custom:": true, + "pint_unit_registry": "openff_units" + }, + "system_group": "jacs_set", + "system_name": "tyk2", + "source": "MLE" + }, + { + "ligand": "ejm_46", + "DG": { + "magnitude": -0.9398774595643715, + "unit": "kilocalories_per_mole", + ":is_custom:": true, + "pint_unit_registry": "openff_units" + }, + "DG_uncertainty": { + "magnitude": 0.07668191741075524, + "unit": "kilocalories_per_mole", + ":is_custom:": true, + "pint_unit_registry": "openff_units" + }, + "system_group": "jacs_set", + "system_name": "tyk2", + "source": "MLE" + }, + { + "ligand": "ejm_45", + "DG": { + "magnitude": 0.32064376361660196, + "unit": "kilocalories_per_mole", + ":is_custom:": true, + "pint_unit_registry": "openff_units" + }, + "DG_uncertainty": { + "magnitude": 0.10615715902554235, + "unit": "kilocalories_per_mole", + ":is_custom:": true, + "pint_unit_registry": "openff_units" + }, + "system_group": "jacs_set", + "system_name": "tyk2", + "source": "MLE" + }, + { + "ligand": "ejm_44", + "DG": { + "magnitude": 2.6820095113046794, + "unit": "kilocalories_per_mole", + ":is_custom:": true, + "pint_unit_registry": "openff_units" + }, + "DG_uncertainty": { + "magnitude": 0.15648448886217892, + "unit": "kilocalories_per_mole", + ":is_custom:": true, + "pint_unit_registry": "openff_units" + }, + "system_group": "jacs_set", + "system_name": "tyk2", + "source": "MLE" + }, + { + "ligand": "ejm_47", + "DG": { + "magnitude": 0.2943067305517464, + "unit": "kilocalories_per_mole", + ":is_custom:": true, + "pint_unit_registry": "openff_units" + }, + "DG_uncertainty": { + "magnitude": 0.0849718727731203, + "unit": "kilocalories_per_mole", + ":is_custom:": true, + "pint_unit_registry": "openff_units" + }, + "system_group": "jacs_set", + "system_name": "tyk2", + "source": "MLE" + }, + { + "ligand": "ejm_49", + "DG": { + "magnitude": 0.7265934300979362, + "unit": "kilocalories_per_mole", + ":is_custom:": true, + "pint_unit_registry": "openff_units" + }, + "DG_uncertainty": { + "magnitude": 0.1834104598705046, + "unit": "kilocalories_per_mole", + ":is_custom:": true, + "pint_unit_registry": "openff_units" + }, + "system_group": "jacs_set", + "system_name": "tyk2", + "source": "MLE" + }, + { + "ligand": "ejm_43", + "DG": { + "magnitude": 1.7839270054997698, + "unit": "kilocalories_per_mole", + ":is_custom:": true, + "pint_unit_registry": "openff_units" + }, + "DG_uncertainty": { + "magnitude": 0.13110488350666116, + "unit": "kilocalories_per_mole", + ":is_custom:": true, + "pint_unit_registry": "openff_units" + }, + "system_group": "jacs_set", + "system_name": "tyk2", + "source": "MLE" + }, + { + "ligand": "2x", + "DG": { + "magnitude": 1.402473784656189, + "unit": "kilocalories_per_mole", + ":is_custom:": true, + "pint_unit_registry": "openff_units" + }, + "DG_uncertainty": { + "magnitude": 0.16362990296451582, + "unit": "kilocalories_per_mole", + ":is_custom:": true, + "pint_unit_registry": "openff_units" + }, + "system_group": "jacs_set", + "system_name": "p38", + "source": "MLE" + }, + { + "ligand": "2v", + "DG": { + "magnitude": 3.0252709313609847, + "unit": "kilocalories_per_mole", + ":is_custom:": true, + "pint_unit_registry": "openff_units" + }, + "DG_uncertainty": { + "magnitude": 0.1392041952315424, + "unit": "kilocalories_per_mole", + ":is_custom:": true, + "pint_unit_registry": "openff_units" + }, + "system_group": "jacs_set", + "system_name": "p38", + "source": "MLE" + }, + { + "ligand": "3fly", + "DG": { + "magnitude": 0.9357154959044522, + "unit": "kilocalories_per_mole", + ":is_custom:": true, + "pint_unit_registry": "openff_units" + }, + "DG_uncertainty": { + "magnitude": 0.1517623776273203, + "unit": "kilocalories_per_mole", + ":is_custom:": true, + "pint_unit_registry": "openff_units" + }, + "system_group": "jacs_set", + "system_name": "p38", + "source": "MLE" + }, + { + "ligand": "2z", + "DG": { + "magnitude": 1.3925012717986371, + "unit": "kilocalories_per_mole", + ":is_custom:": true, + "pint_unit_registry": "openff_units" + }, + "DG_uncertainty": { + "magnitude": 0.35808037761446715, + "unit": "kilocalories_per_mole", + ":is_custom:": true, + "pint_unit_registry": "openff_units" + }, + "system_group": "jacs_set", + "system_name": "p38", + "source": "MLE" + }, + { + "ligand": "3fmk", + "DG": { + "magnitude": -1.4174232029590677, + "unit": "kilocalories_per_mole", + ":is_custom:": true, + "pint_unit_registry": "openff_units" + }, + "DG_uncertainty": { + "magnitude": 0.3841937885880197, + "unit": "kilocalories_per_mole", + ":is_custom:": true, + "pint_unit_registry": "openff_units" + }, + "system_group": "jacs_set", + "system_name": "p38", + "source": "MLE" + }, + { + "ligand": "2o", + "DG": { + "magnitude": -1.6874291873757477, + "unit": "kilocalories_per_mole", + ":is_custom:": true, + "pint_unit_registry": "openff_units" + }, + "DG_uncertainty": { + "magnitude": 0.2748582320961915, + "unit": "kilocalories_per_mole", + ":is_custom:": true, + "pint_unit_registry": "openff_units" + }, + "system_group": "jacs_set", + "system_name": "p38", + "source": "MLE" + }, + { + "ligand": "3fln", + "DG": { + "magnitude": 0.25457879378399895, + "unit": "kilocalories_per_mole", + ":is_custom:": true, + "pint_unit_registry": "openff_units" + }, + "DG_uncertainty": { + "magnitude": 0.08616835674948678, + "unit": "kilocalories_per_mole", + ":is_custom:": true, + "pint_unit_registry": "openff_units" + }, + "system_group": "jacs_set", + "system_name": "p38", + "source": "MLE" + }, + { + "ligand": "2n", + "DG": { + "magnitude": -1.312922416351007, + "unit": "kilocalories_per_mole", + ":is_custom:": true, + "pint_unit_registry": "openff_units" + }, + "DG_uncertainty": { + "magnitude": 0.1407417414748107, + "unit": "kilocalories_per_mole", + ":is_custom:": true, + "pint_unit_registry": "openff_units" + }, + "system_group": "jacs_set", + "system_name": "p38", + "source": "MLE" + }, + { + "ligand": "2e", + "DG": { + "magnitude": 1.3736709114203682, + "unit": "kilocalories_per_mole", + ":is_custom:": true, + "pint_unit_registry": "openff_units" + }, + "DG_uncertainty": { + "magnitude": 0.10601823494148799, + "unit": "kilocalories_per_mole", + ":is_custom:": true, + "pint_unit_registry": "openff_units" + }, + "system_group": "jacs_set", + "system_name": "p38", + "source": "MLE" + }, + { + "ligand": "2y", + "DG": { + "magnitude": 0.9756250807961369, + "unit": "kilocalories_per_mole", + ":is_custom:": true, + "pint_unit_registry": "openff_units" + }, + "DG_uncertainty": { + "magnitude": 0.5829998457516582, + "unit": "kilocalories_per_mole", + ":is_custom:": true, + "pint_unit_registry": "openff_units" + }, + "system_group": "jacs_set", + "system_name": "p38", + "source": "MLE" + }, + { + "ligand": "2s", + "DG": { + "magnitude": -1.0384922890247537, + "unit": "kilocalories_per_mole", + ":is_custom:": true, + "pint_unit_registry": "openff_units" + }, + "DG_uncertainty": { + "magnitude": 0.35262381430424494, + "unit": "kilocalories_per_mole", + ":is_custom:": true, + "pint_unit_registry": "openff_units" + }, + "system_group": "jacs_set", + "system_name": "p38", + "source": "MLE" + }, + { + "ligand": "2r", + "DG": { + "magnitude": -1.0151434977895408, + "unit": "kilocalories_per_mole", + ":is_custom:": true, + "pint_unit_registry": "openff_units" + }, + "DG_uncertainty": { + "magnitude": 0.3019397617598854, + "unit": "kilocalories_per_mole", + ":is_custom:": true, + "pint_unit_registry": "openff_units" + }, + "system_group": "jacs_set", + "system_name": "p38", + "source": "MLE" + }, + { + "ligand": "2aa", + "DG": { + "magnitude": -0.5233617987215631, + "unit": "kilocalories_per_mole", + ":is_custom:": true, + "pint_unit_registry": "openff_units" + }, + "DG_uncertainty": { + "magnitude": 0.45857184881898433, + "unit": "kilocalories_per_mole", + ":is_custom:": true, + "pint_unit_registry": "openff_units" + }, + "system_group": "jacs_set", + "system_name": "p38", + "source": "MLE" + }, + { + "ligand": "3flq", + "DG": { + "magnitude": -3.115753401372757, + "unit": "kilocalories_per_mole", + ":is_custom:": true, + "pint_unit_registry": "openff_units" + }, + "DG_uncertainty": { + "magnitude": 0.47470413795406696, + "unit": "kilocalories_per_mole", + ":is_custom:": true, + "pint_unit_registry": "openff_units" + }, + "system_group": "jacs_set", + "system_name": "p38", + "source": "MLE" + }, + { + "ligand": "3flw", + "DG": { + "magnitude": -0.9887012836898492, + "unit": "kilocalories_per_mole", + ":is_custom:": true, + "pint_unit_registry": "openff_units" + }, + "DG_uncertainty": { + "magnitude": 0.4047369355253599, + "unit": "kilocalories_per_mole", + ":is_custom:": true, + "pint_unit_registry": "openff_units" + }, + "system_group": "jacs_set", + "system_name": "p38", + "source": "MLE" + }, + { + "ligand": "2k", + "DG": { + "magnitude": 1.4945763706711874, + "unit": "kilocalories_per_mole", + ":is_custom:": true, + "pint_unit_registry": "openff_units" + }, + "DG_uncertainty": { + "magnitude": 0.09767713562392834, + "unit": "kilocalories_per_mole", + ":is_custom:": true, + "pint_unit_registry": "openff_units" + }, + "system_group": "jacs_set", + "system_name": "p38", + "source": "MLE" + }, + { + "ligand": "2m", + "DG": { + "magnitude": -1.2086131941648164, + "unit": "kilocalories_per_mole", + ":is_custom:": true, + "pint_unit_registry": "openff_units" + }, + "DG_uncertainty": { + "magnitude": 0.10904083414852378, + "unit": "kilocalories_per_mole", + ":is_custom:": true, + "pint_unit_registry": "openff_units" + }, + "system_group": "jacs_set", + "system_name": "p38", + "source": "MLE" + }, + { + "ligand": "2u", + "DG": { + "magnitude": -1.7993394986310383, + "unit": "kilocalories_per_mole", + ":is_custom:": true, + "pint_unit_registry": "openff_units" + }, + "DG_uncertainty": { + "magnitude": 0.3985649080205063, + "unit": "kilocalories_per_mole", + ":is_custom:": true, + "pint_unit_registry": "openff_units" + }, + "system_group": "jacs_set", + "system_name": "p38", + "source": "MLE" + }, + { + "ligand": "2j", + "DG": { + "magnitude": 0.907512517557072, + "unit": "kilocalories_per_mole", + ":is_custom:": true, + "pint_unit_registry": "openff_units" + }, + "DG_uncertainty": { + "magnitude": 0.12017116206119788, + "unit": "kilocalories_per_mole", + ":is_custom:": true, + "pint_unit_registry": "openff_units" + }, + "system_group": "jacs_set", + "system_name": "p38", + "source": "MLE" + }, + { + "ligand": "2t", + "DG": { + "magnitude": -1.6156093607059905, + "unit": "kilocalories_per_mole", + ":is_custom:": true, + "pint_unit_registry": "openff_units" + }, + "DG_uncertainty": { + "magnitude": 0.17889749921124085, + "unit": "kilocalories_per_mole", + ":is_custom:": true, + "pint_unit_registry": "openff_units" + }, + "system_group": "jacs_set", + "system_name": "p38", + "source": "MLE" + }, + { + "ligand": "2ff", + "DG": { + "magnitude": 0.20303907336629878, + "unit": "kilocalories_per_mole", + ":is_custom:": true, + "pint_unit_registry": "openff_units" + }, + "DG_uncertainty": { + "magnitude": 0.12815562906133746, + "unit": "kilocalories_per_mole", + ":is_custom:": true, + "pint_unit_registry": "openff_units" + }, + "system_group": "jacs_set", + "system_name": "p38", + "source": "MLE" + }, + { + "ligand": "2p", + "DG": { + "magnitude": -0.5655419132651249, + "unit": "kilocalories_per_mole", + ":is_custom:": true, + "pint_unit_registry": "openff_units" + }, + "DG_uncertainty": { + "magnitude": 0.2905923411992188, + "unit": "kilocalories_per_mole", + ":is_custom:": true, + "pint_unit_registry": "openff_units" + }, + "system_group": "jacs_set", + "system_name": "p38", + "source": "MLE" + }, + { + "ligand": "2ee", + "DG": { + "magnitude": -0.13605074481316404, + "unit": "kilocalories_per_mole", + ":is_custom:": true, + "pint_unit_registry": "openff_units" + }, + "DG_uncertainty": { + "magnitude": 0.21521706815916217, + "unit": "kilocalories_per_mole", + ":is_custom:": true, + "pint_unit_registry": "openff_units" + }, + "system_group": "jacs_set", + "system_name": "p38", + "source": "MLE" + }, + { + "ligand": "2gg", + "DG": { + "magnitude": 1.2935202311363638, + "unit": "kilocalories_per_mole", + ":is_custom:": true, + "pint_unit_registry": "openff_units" + }, + "DG_uncertainty": { + "magnitude": 0.15776085370467832, + "unit": "kilocalories_per_mole", + ":is_custom:": true, + "pint_unit_registry": "openff_units" + }, + "system_group": "jacs_set", + "system_name": "p38", + "source": "MLE" + }, + { + "ligand": "2l", + "DG": { + "magnitude": -1.0740023190194963, + "unit": "kilocalories_per_mole", + ":is_custom:": true, + "pint_unit_registry": "openff_units" + }, + "DG_uncertainty": { + "magnitude": 0.11037515728210154, + "unit": "kilocalories_per_mole", + ":is_custom:": true, + "pint_unit_registry": "openff_units" + }, + "system_group": "jacs_set", + "system_name": "p38", + "source": "MLE" + }, + { + "ligand": "2q", + "DG": { + "magnitude": -1.7503584028809382, + "unit": "kilocalories_per_mole", + ":is_custom:": true, + "pint_unit_registry": "openff_units" + }, + "DG_uncertainty": { + "magnitude": 0.216980682113927, + "unit": "kilocalories_per_mole", + ":is_custom:": true, + "pint_unit_registry": "openff_units" + }, + "system_group": "jacs_set", + "system_name": "p38", + "source": "MLE" + }, + { + "ligand": "2g", + "DG": { + "magnitude": 0.010660091115846093, + "unit": "kilocalories_per_mole", + ":is_custom:": true, + "pint_unit_registry": "openff_units" + }, + "DG_uncertainty": { + "magnitude": 0.08774664983749976, + "unit": "kilocalories_per_mole", + ":is_custom:": true, + "pint_unit_registry": "openff_units" + }, + "system_group": "jacs_set", + "system_name": "p38", + "source": "MLE" + }, + { + "ligand": "2c", + "DG": { + "magnitude": -0.9139549473034174, + "unit": "kilocalories_per_mole", + ":is_custom:": true, + "pint_unit_registry": "openff_units" + }, + "DG_uncertainty": { + "magnitude": 0.17933551114643104, + "unit": "kilocalories_per_mole", + ":is_custom:": true, + "pint_unit_registry": "openff_units" + }, + "system_group": "jacs_set", + "system_name": "p38", + "source": "MLE" + }, + { + "ligand": "2i", + "DG": { + "magnitude": 0.7837093233242358, + "unit": "kilocalories_per_mole", + ":is_custom:": true, + "pint_unit_registry": "openff_units" + }, + "DG_uncertainty": { + "magnitude": 0.11492535392427111, + "unit": "kilocalories_per_mole", + ":is_custom:": true, + "pint_unit_registry": "openff_units" + }, + "system_group": "jacs_set", + "system_name": "p38", + "source": "MLE" + }, + { + "ligand": "2h", + "DG": { + "magnitude": 1.140782212442983, + "unit": "kilocalories_per_mole", + ":is_custom:": true, + "pint_unit_registry": "openff_units" + }, + "DG_uncertainty": { + "magnitude": 0.09695123630030483, + "unit": "kilocalories_per_mole", + ":is_custom:": true, + "pint_unit_registry": "openff_units" + }, + "system_group": "jacs_set", + "system_name": "p38", + "source": "MLE" + }, + { + "ligand": "3flz", + "DG": { + "magnitude": 0.7458905687132686, + "unit": "kilocalories_per_mole", + ":is_custom:": true, + "pint_unit_registry": "openff_units" + }, + "DG_uncertainty": { + "magnitude": 0.09919668442106934, + "unit": "kilocalories_per_mole", + ":is_custom:": true, + "pint_unit_registry": "openff_units" + }, + "system_group": "jacs_set", + "system_name": "p38", + "source": "MLE" + }, + { + "ligand": "2bb", + "DG": { + "magnitude": 1.7304086098693254, + "unit": "kilocalories_per_mole", + ":is_custom:": true, + "pint_unit_registry": "openff_units" + }, + "DG_uncertainty": { + "magnitude": 0.6095674355362066, + "unit": "kilocalories_per_mole", + ":is_custom:": true, + "pint_unit_registry": "openff_units" + }, + "system_group": "jacs_set", + "system_name": "p38", + "source": "MLE" + }, + { + "ligand": "3fmh", + "DG": { + "magnitude": 1.1059067484454168, + "unit": "kilocalories_per_mole", + ":is_custom:": true, + "pint_unit_registry": "openff_units" + }, + "DG_uncertainty": { + "magnitude": 0.9826981846915885, + "unit": "kilocalories_per_mole", + ":is_custom:": true, + "pint_unit_registry": "openff_units" + }, + "system_group": "jacs_set", + "system_name": "p38", + "source": "MLE" + }, + { + "ligand": "2f", + "DG": { + "magnitude": 1.3868554417054604, + "unit": "kilocalories_per_mole", + ":is_custom:": true, + "pint_unit_registry": "openff_units" + }, + "DG_uncertainty": { + "magnitude": 0.10167116076869546, + "unit": "kilocalories_per_mole", + ":is_custom:": true, + "pint_unit_registry": "openff_units" + }, + "system_group": "jacs_set", + "system_name": "p38", + "source": "MLE" + }, + { + "ligand": "19charg", + "DG": { + "magnitude": -0.501459078762063, + "unit": "kilocalories_per_mole", + ":is_custom:": true, + "pint_unit_registry": "openff_units" + }, + "DG_uncertainty": { + "magnitude": 0.08802770108498328, + "unit": "kilocalories_per_mole", + ":is_custom:": true, + "pint_unit_registry": "openff_units" + }, + "system_group": "charge_annihilation_set", + "system_name": "irak4_s3", + "source": "MLE" + }, + { + "ligand": "27", + "DG": { + "magnitude": 0.4304453957801377, + "unit": "kilocalories_per_mole", + ":is_custom:": true, + "pint_unit_registry": "openff_units" + }, + "DG_uncertainty": { + "magnitude": 0.07124165229524881, + "unit": "kilocalories_per_mole", + ":is_custom:": true, + "pint_unit_registry": "openff_units" + }, + "system_group": "charge_annihilation_set", + "system_name": "irak4_s3", + "source": "MLE" + }, + { + "ligand": "28charg", + "DG": { + "magnitude": 0.07101368298192536, + "unit": "kilocalories_per_mole", + ":is_custom:": true, + "pint_unit_registry": "openff_units" + }, + "DG_uncertainty": { + "magnitude": 0.13277043703505112, + "unit": "kilocalories_per_mole", + ":is_custom:": true, + "pint_unit_registry": "openff_units" + }, + "system_group": "charge_annihilation_set", + "system_name": "irak4_s3", + "source": "MLE" + }, + { + "ligand": "21", + "DG": { + "magnitude": -1.731545406606206, + "unit": "kilocalories_per_mole", + ":is_custom:": true, + "pint_unit_registry": "openff_units" + }, + "DG_uncertainty": { + "magnitude": 0.05894673532740687, + "unit": "kilocalories_per_mole", + ":is_custom:": true, + "pint_unit_registry": "openff_units" + }, + "system_group": "charge_annihilation_set", + "system_name": "egfr", + "source": "MLE" + }, + { + "ligand": "27chargR", + "DG": { + "magnitude": 0.9201371595566695, + "unit": "kilocalories_per_mole", + ":is_custom:": true, + "pint_unit_registry": "openff_units" + }, + "DG_uncertainty": { + "magnitude": 0.10613089729022374, + "unit": "kilocalories_per_mole", + ":is_custom:": true, + "pint_unit_registry": "openff_units" + }, + "system_group": "charge_annihilation_set", + "system_name": "egfr", + "source": "MLE" + }, + { + "ligand": "30charg", + "DG": { + "magnitude": 0.811408247049536, + "unit": "kilocalories_per_mole", + ":is_custom:": true, + "pint_unit_registry": "openff_units" + }, + "DG_uncertainty": { + "magnitude": 0.07816534186689623, + "unit": "kilocalories_per_mole", + ":is_custom:": true, + "pint_unit_registry": "openff_units" + }, + "system_group": "charge_annihilation_set", + "system_name": "egfr", + "source": "MLE" + } + ], + "DDG": [ + { + "ligand_a": "ejm_50", + "ligand_b": "ejm_42", + "system_group": "jacs_set", + "system_name": "tyk2", + "repeats": 3, + "DDG": { + "magnitude": -0.31190011815882457, + "unit": "kilocalories_per_mole", + ":is_custom:": true, + "pint_unit_registry": "openff_units" + }, + "DDG_uncertainty": { + "magnitude": 0.18487315903557983, + "unit": "kilocalories_per_mole", + ":is_custom:": true, + "pint_unit_registry": "openff_units" + }, + "DGs_complex": [ + -19.16080704943104, + -19.111385364555613, + -18.821940214304785 + ], + "DGs_solvent": [ + -18.571648686711896, + -18.830385089422162, + -18.756398497680898 + ], + "Complex_smallest_mbar_overlaps": [ + 0.13376294007626177, + 0.13055139667336355, + 0.12983153960084914 + ], + "Complex_smallest_replica_mixing": [ + 0.08936825885978428, + 0.09551208285385501, + 0.09167750325097529 + ], + "Solvent_smallest_mbar_overlaps": [ + 0.1379351626786042, + 0.13809103838868403, + 0.13816936991084877 + ], + "Solvent_smallest_replica_mixing": [ + 0.108948432760364, + 0.10522066738428418, + 0.1102123356926188 + ] + }, + { + "ligand_a": "jmc_23", + "ligand_b": "jmc_28", + "system_group": "jacs_set", + "system_name": "tyk2", + "repeats": 3, + "DDG": { + "magnitude": 0.36623109022296063, + "unit": "kilocalories_per_mole", + ":is_custom:": true, + "pint_unit_registry": "openff_units" + }, + "DDG_uncertainty": { + "magnitude": 0.11640533073579613, + "unit": "kilocalories_per_mole", + ":is_custom:": true, + "pint_unit_registry": "openff_units" + }, + "DGs_complex": [ + 22.70408815873226, + 22.458056284998342, + 22.487929608010205 + ], + "DGs_solvent": [ + 22.197952240200127, + 22.13034422061398, + 22.223084320257822 + ], + "Complex_smallest_mbar_overlaps": [ + 0.1416995738770979, + 0.14116436430160711, + 0.1419894952857548 + ], + "Complex_smallest_replica_mixing": [ + 0.12060702875399361, + 0.11719670200235571, + 0.11852704257767549 + ], + "Solvent_smallest_mbar_overlaps": [ + 0.14291634228515276, + 0.14314490886512032, + 0.1438190363250322 + ], + "Solvent_smallest_replica_mixing": [ + 0.12048503611971104, + 0.12386248736097068, + 0.12462082912032356 + ] + }, + { + "ligand_a": "ejm_31", + "ligand_b": "ejm_46", + "system_group": "jacs_set", + "system_name": "tyk2", + "repeats": 3, + "DDG": { + "magnitude": -0.7488952687425261, + "unit": "kilocalories_per_mole", + ":is_custom:": true, + "pint_unit_registry": "openff_units" + }, + "DDG_uncertainty": { + "magnitude": 0.09016750292603079, + "unit": "kilocalories_per_mole", + ":is_custom:": true, + "pint_unit_registry": "openff_units" + }, + "DGs_complex": [ + -29.01021257823298, + -29.190678420782003, + -29.134048123354756 + ], + "DGs_solvent": [ + -28.388934469690422, + -28.29342124586621, + -28.40589760058554 + ], + "Complex_smallest_mbar_overlaps": [ + 0.12391854414577823, + 0.12728048114417317, + 0.12432086578445879 + ], + "Complex_smallest_replica_mixing": [ + 0.09936406995230525, + 0.08785192909280501, + 0.09504550050556117 + ], + "Solvent_smallest_mbar_overlaps": [ + 0.12081198774344082, + 0.11714612296936465, + 0.11981483589375609 + ], + "Solvent_smallest_replica_mixing": [ + 0.10313447927199192, + 0.10338725985844287, + 0.10273972602739725 + ] + }, + { + "ligand_a": "jmc_27", + "ligand_b": "ejm_46", + "system_group": "jacs_set", + "system_name": "tyk2", + "repeats": 3, + "DDG": { + "magnitude": 0.4164254733740478, + "unit": "kilocalories_per_mole", + ":is_custom:": true, + "pint_unit_registry": "openff_units" + }, + "DDG_uncertainty": { + "magnitude": 0.09876860452401351, + "unit": "kilocalories_per_mole", + ":is_custom:": true, + "pint_unit_registry": "openff_units" + }, + "DGs_complex": [ + -8.474240892289592, + -8.544392711253266, + -8.70493330056103 + ], + "DGs_solvent": [ + -8.961963044813643, + -9.009584892266643, + -9.001295387145747 + ], + "Complex_smallest_mbar_overlaps": [ + 0.12075339102487868, + 0.12075705754361211, + 0.12094073669100758 + ], + "Complex_smallest_replica_mixing": [ + 0.11051502145922747, + 0.09428715874620829, + 0.0968149646107179 + ], + "Solvent_smallest_mbar_overlaps": [ + 0.10664733156507009, + 0.10695795462700747, + 0.10723994372650122 + ], + "Solvent_smallest_replica_mixing": [ + 0.09453993933265925, + 0.08442871587462084, + 0.09479271991911022 + ] + }, + { + "ligand_a": "ejm_50", + "ligand_b": "ejm_48", + "system_group": "jacs_set", + "system_name": "tyk2", + "repeats": 3, + "DDG": { + "magnitude": 0.5341463866436627, + "unit": "kilocalories_per_mole", + ":is_custom:": true, + "pint_unit_registry": "openff_units" + }, + "DDG_uncertainty": { + "magnitude": 0.3094542970554522, + "unit": "kilocalories_per_mole", + ":is_custom:": true, + "pint_unit_registry": "openff_units" + }, + "DGs_complex": [ + 50.71509551483679, + 51.40136661180214, + 51.0720664413858 + ], + "DGs_solvent": [ + 50.44195116282237, + 50.71417320866378, + 50.429965036607584 + ], + "Complex_smallest_mbar_overlaps": [ + 0.08065189023653123, + 0.091595613911839, + 0.08620352393865843 + ], + "Complex_smallest_replica_mixing": [ + 0.013504611330698288, + 0.024412855377008654, + 0.013816475495307613 + ], + "Solvent_smallest_mbar_overlaps": [ + 0.11718424478793976, + 0.1053715857586385, + 0.11340153536625901 + ], + "Solvent_smallest_replica_mixing": [ + 0.03973168214654283, + 0.031682641107561235, + 0.034055727554179564 + ] + }, + { + "ligand_a": "jmc_30", + "ligand_b": "ejm_46", + "system_group": "jacs_set", + "system_name": "tyk2", + "repeats": 3, + "DDG": { + "magnitude": 0.9371399293961673, + "unit": "kilocalories_per_mole", + ":is_custom:": true, + "pint_unit_registry": "openff_units" + }, + "DDG_uncertainty": { + "magnitude": 0.018851480589629665, + "unit": "kilocalories_per_mole", + ":is_custom:": true, + "pint_unit_registry": "openff_units" + }, + "DGs_complex": [ + 25.578451505274504, + 25.608444739174015, + 25.617997436382662 + ], + "DGs_solvent": [ + 24.652597871398413, + 24.671528498114434, + 24.669347523129833 + ], + "Complex_smallest_mbar_overlaps": [ + 0.13723072503954822, + 0.13700768022413606, + 0.1394285680288189 + ], + "Complex_smallest_replica_mixing": [ + 0.10300316122233931, + 0.09955995599559957, + 0.10945399393326592 + ], + "Solvent_smallest_mbar_overlaps": [ + 0.13412870322248974, + 0.13238089160125308, + 0.1316668208399034 + ], + "Solvent_smallest_replica_mixing": [ + 0.09831460674157304, + 0.10161779575328615, + 0.1051567239635996 + ] + }, + { + "ligand_a": "ejm_50", + "ligand_b": "ejm_55", + "system_group": "jacs_set", + "system_name": "tyk2", + "repeats": 3, + "DDG": { + "magnitude": -0.916084341967732, + "unit": "kilocalories_per_mole", + ":is_custom:": true, + "pint_unit_registry": "openff_units" + }, + "DDG_uncertainty": { + "magnitude": 0.1345450147693899, + "unit": "kilocalories_per_mole", + ":is_custom:": true, + "pint_unit_registry": "openff_units" + }, + "DGs_complex": [ + -44.17911361527046, + -44.43549613530486, + -44.43978996564394 + ], + "DGs_solvent": [ + -43.371586840561996, + -43.509910749053915, + -43.42464910070016 + ], + "Complex_smallest_mbar_overlaps": [ + 0.1778754405760441, + 0.18017099898235453, + 0.18082746173296835 + ], + "Complex_smallest_replica_mixing": [ + 0.10995955510616785, + 0.11692015209125475, + 0.12517146776406035 + ], + "Solvent_smallest_mbar_overlaps": [ + 0.18600447312112164, + 0.18568913856836897, + 0.18224489428141152 + ], + "Solvent_smallest_replica_mixing": [ + 0.12452471482889733, + 0.12360970677451971, + 0.11945500633713561 + ] + }, + { + "ligand_a": "jmc_28", + "ligand_b": "jmc_30", + "system_group": "jacs_set", + "system_name": "tyk2", + "repeats": 3, + "DDG": { + "magnitude": -0.9313926243440456, + "unit": "kilocalories_per_mole", + ":is_custom:": true, + "pint_unit_registry": "openff_units" + }, + "DDG_uncertainty": { + "magnitude": 0.10388639788020433, + "unit": "kilocalories_per_mole", + ":is_custom:": true, + "pint_unit_registry": "openff_units" + }, + "DGs_complex": [ + -13.938623728206014, + -13.825428209858975, + -13.68632523402718 + ], + "DGs_solvent": [ + -12.869496729262291, + -12.887935568457241, + -12.898767001340497 + ], + "Complex_smallest_mbar_overlaps": [ + 0.12596583866728828, + 0.12512502020949173, + 0.1255163773029776 + ], + "Complex_smallest_replica_mixing": [ + 0.12338044758539458, + 0.11348122866894197, + 0.12851929092805006 + ], + "Solvent_smallest_mbar_overlaps": [ + 0.12365957892116246, + 0.1236959353984504, + 0.12419263649950771 + ], + "Solvent_smallest_replica_mixing": [ + 0.12431243124312431, + 0.12260967379077616, + 0.12664307381193124 + ] + }, + { + "ligand_a": "ejm_31", + "ligand_b": "ejm_42", + "system_group": "jacs_set", + "system_name": "tyk2", + "repeats": 3, + "DDG": { + "magnitude": 0.6911130202134572, + "unit": "kilocalories_per_mole", + ":is_custom:": true, + "pint_unit_registry": "openff_units" + }, + "DDG_uncertainty": { + "magnitude": 0.1585637071928594, + "unit": "kilocalories_per_mole", + ":is_custom:": true, + "pint_unit_registry": "openff_units" + }, + "DGs_complex": [ + -12.52053992688133, + -12.693240399437823, + -12.314087801885053 + ], + "DGs_solvent": [ + -13.15519160398313, + -13.2351138993596, + -13.210901685501845 + ], + "Complex_smallest_mbar_overlaps": [ + 0.10828688807734116, + 0.10627674092590095, + 0.10335060684463246 + ], + "Complex_smallest_replica_mixing": [ + 0.09505363528009535, + 0.09234828496042216, + 0.09007707129094412 + ], + "Solvent_smallest_mbar_overlaps": [ + 0.10237895001773795, + 0.10314379891210053, + 0.10306871655924223 + ], + "Solvent_smallest_replica_mixing": [ + 0.10616784630940344, + 0.09479271991911022, + 0.0968149646107179 + ] + }, + { + "ligand_a": "jmc_23", + "ligand_b": "ejm_31", + "system_group": "jacs_set", + "system_name": "tyk2", + "repeats": 3, + "DDG": { + "magnitude": 1.102046405017669, + "unit": "kilocalories_per_mole", + ":is_custom:": true, + "pint_unit_registry": "openff_units" + }, + "DDG_uncertainty": { + "magnitude": 0.14013398224300408, + "unit": "kilocalories_per_mole", + ":is_custom:": true, + "pint_unit_registry": "openff_units" + }, + "DGs_complex": [ + 0.6728841405617866, + 0.8746447362763453, + 0.6488853414210868 + ], + "DGs_solvent": [ + -0.5004980363015142, + -0.26868370266352204, + -0.34054325782875244 + ], + "Complex_smallest_mbar_overlaps": [ + 0.131387850766123, + 0.13001794428128802, + 0.13286795091162407 + ], + "Complex_smallest_replica_mixing": [ + 0.07512315270935961, + 0.07508731082654249, + 0.09143686502177069 + ], + "Solvent_smallest_mbar_overlaps": [ + 0.1295014640191342, + 0.1295744456404279, + 0.13028602635435663 + ], + "Solvent_smallest_replica_mixing": [ + 0.08410732714138287, + 0.08561643835616438, + 0.0801314459049545 + ] + }, + { + "ligand_a": "ejm_47", + "ligand_b": "ejm_50", + "system_group": "jacs_set", + "system_name": "tyk2", + "repeats": 3, + "DDG": { + "magnitude": -0.11154255326255225, + "unit": "kilocalories_per_mole", + ":is_custom:": true, + "pint_unit_registry": "openff_units" + }, + "DDG_uncertainty": { + "magnitude": 0.1561341109264418, + "unit": "kilocalories_per_mole", + ":is_custom:": true, + "pint_unit_registry": "openff_units" + }, + "DGs_complex": [ + -45.59830621538094, + -45.69408131533385, + -45.334355266644614 + ], + "DGs_solvent": [ + -45.383858207141586, + -45.468729400107065, + -45.4395275303231 + ], + "Complex_smallest_mbar_overlaps": [ + 0.11692312607980797, + 0.11548443168722737, + 0.11921158583831405 + ], + "Complex_smallest_replica_mixing": [ + 0.0456656346749226, + 0.04118873826903024, + 0.038751345532831 + ], + "Solvent_smallest_mbar_overlaps": [ + 0.12390168973071564, + 0.12350746672722915, + 0.1206565041080654 + ], + "Solvent_smallest_replica_mixing": [ + 0.06066734074823053, + 0.05388151174668029, + 0.05864509605662285 + ] + }, + { + "ligand_a": "ejm_55", + "ligand_b": "ejm_54", + "system_group": "jacs_set", + "system_name": "tyk2", + "repeats": 3, + "DDG": { + "magnitude": 0.2767266696811319, + "unit": "kilocalories_per_mole", + ":is_custom:": true, + "pint_unit_registry": "openff_units" + }, + "DDG_uncertainty": { + "magnitude": 0.5252882481323151, + "unit": "kilocalories_per_mole", + ":is_custom:": true, + "pint_unit_registry": "openff_units" + }, + "DGs_complex": [ + 25.634803878041474, + 25.602083303194657, + 25.685287522883364 + ], + "DGs_solvent": [ + 24.644875425023372, + 25.87938446521765, + 25.567734804835073 + ], + "Complex_smallest_mbar_overlaps": [ + 0.16929610099601902, + 0.16899159333662458, + 0.17010329138196761 + ], + "Complex_smallest_replica_mixing": [ + 0.11410018552875696, + 0.1319514661274014, + 0.12873862158647595 + ], + "Solvent_smallest_mbar_overlaps": [ + 0.17258574651118336, + 0.17090107035868987, + 0.1695965093480625 + ], + "Solvent_smallest_replica_mixing": [ + 0.130927291886196, + 0.13751263902932254, + 0.14079878665318504 + ] + }, + { + "ligand_a": "ejm_49", + "ligand_b": "ejm_31", + "system_group": "jacs_set", + "system_name": "tyk2", + "repeats": 3, + "DDG": { + "magnitude": -0.7586372523142302, + "unit": "kilocalories_per_mole", + ":is_custom:": true, + "pint_unit_registry": "openff_units" + }, + "DDG_uncertainty": { + "magnitude": 0.27682570627770536, + "unit": "kilocalories_per_mole", + ":is_custom:": true, + "pint_unit_registry": "openff_units" + }, + "DGs_complex": [ + 20.09825569520222, + 20.008504205745187, + 19.472740654113803 + ], + "DGs_solvent": [ + 20.63331175099524, + 20.630773866318638, + 20.59132669469003 + ], + "Complex_smallest_mbar_overlaps": [ + 0.07332183437429869, + 0.07492693094392974, + 0.06482688501987194 + ], + "Complex_smallest_replica_mixing": [ + 0.015175718849840255, + 0.017743979721166033, + 0.017543859649122806 + ], + "Solvent_smallest_mbar_overlaps": [ + 0.11562292849318584, + 0.1143871186444747, + 0.11726054856438224 + ], + "Solvent_smallest_replica_mixing": [ + 0.047269969666329625, + 0.0389988358556461, + 0.04024767801857585 + ] + }, + { + "ligand_a": "jmc_23", + "ligand_b": "jmc_27", + "system_group": "jacs_set", + "system_name": "tyk2", + "repeats": 3, + "DDG": { + "magnitude": -0.1973578482554217, + "unit": "kilocalories_per_mole", + ":is_custom:": true, + "pint_unit_registry": "openff_units" + }, + "DDG_uncertainty": { + "magnitude": 0.03475793921871755, + "unit": "kilocalories_per_mole", + ":is_custom:": true, + "pint_unit_registry": "openff_units" + }, + "DGs_complex": [ + 10.190419798951348, + 10.124141066265146, + 10.188030155270068 + ], + "DGs_solvent": [ + 10.38470669078386, + 10.344771473550882, + 10.365186400918082 + ], + "Complex_smallest_mbar_overlaps": [ + 0.22307188221442528, + 0.22190341106421838, + 0.22378469859882863 + ], + "Complex_smallest_replica_mixing": [ + 0.19969666329625885, + 0.19152196118488254, + 0.19901112484548825 + ], + "Solvent_smallest_mbar_overlaps": [ + 0.2219462934548013, + 0.2230370973198237, + 0.22118305193484997 + ], + "Solvent_smallest_replica_mixing": [ + 0.19514661274014156, + 0.1903437815975733, + 0.1994438827098079 + ] + }, + { + "ligand_a": "ejm_31", + "ligand_b": "ejm_48", + "system_group": "jacs_set", + "system_name": "tyk2", + "repeats": 3, + "DDG": { + "magnitude": 0.8066356116094369, + "unit": "kilocalories_per_mole", + ":is_custom:": true, + "pint_unit_registry": "openff_units" + }, + "DDG_uncertainty": { + "magnitude": 0.12079847964548013, + "unit": "kilocalories_per_mole", + ":is_custom:": true, + "pint_unit_registry": "openff_units" + }, + "DGs_complex": [ + -11.370590164887243, + -11.15471756527737, + -11.088011824247115 + ], + "DGs_solvent": [ + -12.00140140500631, + -12.014746507838368, + -12.017078476395364 + ], + "Complex_smallest_mbar_overlaps": [ + 0.10986296371232865, + 0.10772783412679628, + 0.10965586483680564 + ], + "Complex_smallest_replica_mixing": [ + 0.04764776839565742, + 0.033646322378716745, + 0.035636561479869426 + ], + "Solvent_smallest_mbar_overlaps": [ + 0.10850724789011988, + 0.11020963737500898, + 0.10518795030466864 + ], + "Solvent_smallest_replica_mixing": [ + 0.06370070778564206, + 0.05680359435173299, + 0.06622851365015167 + ] + }, + { + "ligand_a": "ejm_44", + "ligand_b": "ejm_43", + "system_group": "jacs_set", + "system_name": "tyk2", + "repeats": 3, + "DDG": { + "magnitude": -0.9901327297773079, + "unit": "kilocalories_per_mole", + ":is_custom:": true, + "pint_unit_registry": "openff_units" + }, + "DDG_uncertainty": { + "magnitude": 0.1345112185742903, + "unit": "kilocalories_per_mole", + ":is_custom:": true, + "pint_unit_registry": "openff_units" + }, + "DGs_complex": [ + 8.773485983373927, + 8.868105232682508, + 8.989095388945623 + ], + "DGs_solvent": [ + 9.728499391897751, + 9.96896393341908, + 9.903621469017146 + ], + "Complex_smallest_mbar_overlaps": [ + 0.11753921768625564, + 0.115147192474471, + 0.11477594804400199 + ], + "Complex_smallest_replica_mixing": [ + 0.08063328424153167, + 0.09805924412665985, + 0.09098018769551616 + ], + "Solvent_smallest_mbar_overlaps": [ + 0.10415303449480709, + 0.10637207323089107, + 0.1054800287810906 + ], + "Solvent_smallest_replica_mixing": [ + 0.09984833164812942, + 0.1051567239635996, + 0.09630940343781598 + ] + }, + { + "ligand_a": "ejm_43", + "ligand_b": "ejm_42", + "system_group": "jacs_set", + "system_name": "tyk2", + "repeats": 3, + "DDG": { + "magnitude": -1.6725148955864118, + "unit": "kilocalories_per_mole", + ":is_custom:": true, + "pint_unit_registry": "openff_units" + }, + "DDG_uncertainty": { + "magnitude": 0.12548819230305017, + "unit": "kilocalories_per_mole", + ":is_custom:": true, + "pint_unit_registry": "openff_units" + }, + "DGs_complex": [ + 9.65018756969153, + 9.704017724888304, + 9.777430707810066 + ], + "DGs_solvent": [ + 11.335158497570813, + 11.273518263236435, + 11.540503928341884 + ], + "Complex_smallest_mbar_overlaps": [ + 0.10151518300576419, + 0.10000814732350109, + 0.09919590183439851 + ], + "Complex_smallest_replica_mixing": [ + 0.08436532507739938, + 0.08818393480791618, + 0.08472367049009384 + ], + "Solvent_smallest_mbar_overlaps": [ + 0.10532685314445385, + 0.10566620001644182, + 0.10493907898120303 + ], + "Solvent_smallest_replica_mixing": [ + 0.10288169868554095, + 0.1102123356926188, + 0.11017410228509249 + ] + }, + { + "ligand_a": "ejm_47", + "ligand_b": "ejm_31", + "system_group": "jacs_set", + "system_name": "tyk2", + "repeats": 3, + "DDG": { + "magnitude": -0.42914217336133476, + "unit": "kilocalories_per_mole", + ":is_custom:": true, + "pint_unit_registry": "openff_units" + }, + "DDG_uncertainty": { + "magnitude": 0.09009349080843024, + "unit": "kilocalories_per_mole", + ":is_custom:": true, + "pint_unit_registry": "openff_units" + }, + "DGs_complex": [ + 16.66455004001092, + 16.63353442917762, + 16.83326455741535 + ], + "DGs_solvent": [ + 17.111252284275793, + 17.158210675997353, + 17.14931258641475 + ], + "Complex_smallest_mbar_overlaps": [ + 0.11024742270989725, + 0.10112960263552395, + 0.10720304409173459 + ], + "Complex_smallest_replica_mixing": [ + 0.045536791314837156, + 0.0629346314325452, + 0.051414581066376495 + ], + "Solvent_smallest_mbar_overlaps": [ + 0.1089060268362668, + 0.10895123768920253, + 0.10938170960496879 + ], + "Solvent_smallest_replica_mixing": [ + 0.07763023493360573, + 0.07912032355915066, + 0.08316481294236602 + ] + }, + { + "ligand_a": "ejm_42", + "ligand_b": "ejm_54", + "system_group": "jacs_set", + "system_name": "tyk2", + "repeats": 3, + "DDG": { + "magnitude": 0.023743982859214086, + "unit": "kilocalories_per_mole", + ":is_custom:": true, + "pint_unit_registry": "openff_units" + }, + "DDG_uncertainty": { + "magnitude": 0.15259445469110478, + "unit": "kilocalories_per_mole", + ":is_custom:": true, + "pint_unit_registry": "openff_units" + }, + "DGs_complex": [ + -8.360484288773174, + -8.454031987573893, + -8.185657727668064 + ], + "DGs_solvent": [ + -8.442909272751802, + -8.418426148937474, + -8.210070530903497 + ], + "Complex_smallest_mbar_overlaps": [ + 0.17560885920989688, + 0.17323639559229229, + 0.17380179833081966 + ], + "Complex_smallest_replica_mixing": [ + 0.10218978102189781, + 0.10824345146379044, + 0.11437718277066357 + ], + "Solvent_smallest_mbar_overlaps": [ + 0.1871862158170948, + 0.1865805956386249, + 0.18718278636927746 + ], + "Solvent_smallest_replica_mixing": [ + 0.1356691253951528, + 0.13832077502691065, + 0.13043478260869565 + ] + }, + { + "ligand_a": "ejm_31", + "ligand_b": "ejm_45", + "system_group": "jacs_set", + "system_name": "tyk2", + "repeats": 3, + "DDG": { + "magnitude": 0.49160568900046187, + "unit": "kilocalories_per_mole", + ":is_custom:": true, + "pint_unit_registry": "openff_units" + }, + "DDG_uncertainty": { + "magnitude": 0.0971072231071624, + "unit": "kilocalories_per_mole", + ":is_custom:": true, + "pint_unit_registry": "openff_units" + }, + "DGs_complex": [ + 11.678225942983286, + 11.677525451818473, + 11.637780321936342 + ], + "DGs_solvent": [ + 11.038201975170825, + 11.240504738871525, + 11.240007935694361 + ], + "Complex_smallest_mbar_overlaps": [ + 0.11381186226025179, + 0.10908855008528516, + 0.11603533611279043 + ], + "Complex_smallest_replica_mixing": [ + 0.04724964739069112, + 0.0364293659621802, + 0.042269187986651836 + ], + "Solvent_smallest_mbar_overlaps": [ + 0.13962795637960182, + 0.13767816442093675, + 0.13972528037172557 + ], + "Solvent_smallest_replica_mixing": [ + 0.0788675429726997, + 0.06825075834175935, + 0.07381193124368049 + ] + }, + { + "ligand_a": "ejm_31", + "ligand_b": "ejm_44", + "system_group": "jacs_set", + "system_name": "tyk2", + "repeats": 3, + "DDG": { + "magnitude": 2.3959460144061424, + "unit": "kilocalories_per_mole", + ":is_custom:": true, + "pint_unit_registry": "openff_units" + }, + "DDG_uncertainty": { + "magnitude": 0.2997203710866624, + "unit": "kilocalories_per_mole", + ":is_custom:": true, + "pint_unit_registry": "openff_units" + }, + "DGs_complex": [ + -35.776093436207546, + -35.434665582008414, + -36.13418486552902 + ], + "DGs_solvent": [ + -38.18637415571591, + -38.06213775203912, + -38.28427001920838 + ], + "Complex_smallest_mbar_overlaps": [ + 0.06532762740879944, + 0.06611673723358033, + 0.07269076770057471 + ], + "Complex_smallest_replica_mixing": [ + 0.007982583454281567, + 0.014189886480908152, + 0.018452982810920122 + ], + "Solvent_smallest_mbar_overlaps": [ + 0.09391077249872196, + 0.0912788342687875, + 0.09003931739462981 + ], + "Solvent_smallest_replica_mixing": [ + 0.03943377148634985, + 0.041708796764408494, + 0.037538304392236974 + ] + }, + { + "ligand_a": "ejm_49", + "ligand_b": "ejm_50", + "system_group": "jacs_set", + "system_name": "tyk2", + "repeats": 3, + "DDG": { + "magnitude": -0.5524232311454753, + "unit": "kilocalories_per_mole", + ":is_custom:": true, + "pint_unit_registry": "openff_units" + }, + "DDG_uncertainty": { + "magnitude": 0.25415370955825656, + "unit": "kilocalories_per_mole", + ":is_custom:": true, + "pint_unit_registry": "openff_units" + }, + "DGs_complex": [ + -42.19690714784933, + -42.17718911197918, + -42.68689141840163 + ], + "DGs_solvent": [ + -41.91462882376703, + -41.6823354632367, + -41.806753697789986 + ], + "Complex_smallest_mbar_overlaps": [ + 0.05507848847353458, + 0.05023465533176064, + 0.04805787954625746 + ], + "Complex_smallest_replica_mixing": [ + 0.006777893639207508, + 0.007405515832482125, + 0.006954102920723227 + ], + "Solvent_smallest_mbar_overlaps": [ + 0.0793072362419808, + 0.08145985410718955, + 0.08210049410365801 + ], + "Solvent_smallest_replica_mixing": [ + 0.016177957532861477, + 0.02275025278058645, + 0.019716885743174924 + ] + }, + { + "ligand_a": "2x", + "ligand_b": "2v", + "system_group": "jacs_set", + "system_name": "p38", + "repeats": 3, + "DDG": { + "magnitude": 1.5610649694077994, + "unit": "kilocalories_per_mole", + ":is_custom:": true, + "pint_unit_registry": "openff_units" + }, + "DDG_uncertainty": { + "magnitude": 0.15476100551094377, + "unit": "kilocalories_per_mole", + ":is_custom:": true, + "pint_unit_registry": "openff_units" + }, + "DGs_complex": [ + -6.34494429736851, + -6.60766300709865, + -6.286112159480605 + ], + "DGs_solvent": [ + -7.88804477688721, + -7.9841319622821025, + -8.049737633001852 + ], + "Complex_smallest_mbar_overlaps": [ + 0.1316054919239175, + 0.12843221035579228, + 0.13342400217845768 + ], + "Complex_smallest_replica_mixing": [ + 0.0551061678463094, + 0.05643513789581205, + 0.056875631951466124 + ], + "Solvent_smallest_mbar_overlaps": [ + 0.12824740624039507, + 0.1273139877010416, + 0.12547744556705193 + ], + "Solvent_smallest_replica_mixing": [ + 0.0551061678463094, + 0.054600606673407485, + 0.04802831142568251 + ] + }, + { + "ligand_a": "3fmk", + "ligand_b": "3fly", + "system_group": "jacs_set", + "system_name": "p38", + "repeats": 3, + "DDG": { + "magnitude": 2.3531386988635425, + "unit": "kilocalories_per_mole", + ":is_custom:": true, + "pint_unit_registry": "openff_units" + }, + "DDG_uncertainty": { + "magnitude": 0.36381157681129445, + "unit": "kilocalories_per_mole", + ":is_custom:": true, + "pint_unit_registry": "openff_units" + }, + "DGs_complex": [ + -21.930982740496482, + -21.308893015412934, + -21.09798957133443 + ], + "DGs_solvent": [ + -23.860931464166917, + -23.67811136215507, + -23.85823859751249 + ], + "Complex_smallest_mbar_overlaps": [ + 0.13691182703746504, + 0.15380374872015187, + 0.14257429741826305 + ], + "Complex_smallest_replica_mixing": [ + 0.06485468245425188, + 0.07355915065722952, + 0.07482305358948432 + ], + "Solvent_smallest_mbar_overlaps": [ + 0.1258931861705904, + 0.12744256288065467, + 0.1198099867984415 + ], + "Solvent_smallest_replica_mixing": [ + 0.04701718907987867, + 0.06193124368048534, + 0.040697674418604654 + ] + }, + { + "ligand_a": "2o", + "ligand_b": "3fln", + "system_group": "jacs_set", + "system_name": "p38", + "repeats": 3, + "DDG": { + "magnitude": 2.3920696315652066, + "unit": "kilocalories_per_mole", + ":is_custom:": true, + "pint_unit_registry": "openff_units" + }, + "DDG_uncertainty": { + "magnitude": 0.5005215279211973, + "unit": "kilocalories_per_mole", + ":is_custom:": true, + "pint_unit_registry": "openff_units" + }, + "DGs_complex": [ + -81.88638205994715, + -81.46763597765408, + -82.0253008735319 + ], + "DGs_solvent": [ + -83.92147634908416, + -83.82778469677255, + -84.80626675997203 + ], + "Complex_smallest_mbar_overlaps": [ + 0.0705065095217306, + 0.06306680859533224, + 0.052956948459663186 + ], + "Complex_smallest_replica_mixing": [ + 0.012639029322548028, + 0.012125902992776058, + 0.008682328907048007 + ], + "Solvent_smallest_mbar_overlaps": [ + 0.04550025354232517, + 0.04777658198040124, + 0.04821946920795428 + ], + "Solvent_smallest_replica_mixing": [ + 0.00884732052578362, + 0.008088978766430738, + 0.007077856420626896 + ] + }, + { + "ligand_a": "2y", + "ligand_b": "2v", + "system_group": "jacs_set", + "system_name": "p38", + "repeats": 3, + "DDG": { + "magnitude": 2.0496458505648647, + "unit": "kilocalories_per_mole", + ":is_custom:": true, + "pint_unit_registry": "openff_units" + }, + "DDG_uncertainty": { + "magnitude": 0.5835605799207267, + "unit": "kilocalories_per_mole", + ":is_custom:": true, + "pint_unit_registry": "openff_units" + }, + "DGs_complex": [ + 53.8909711552022, + 53.549739007072745, + 54.87149162165456 + ], + "DGs_solvent": [ + 51.84758921220694, + 52.068872345343095, + 52.246802674684886 + ], + "Complex_smallest_mbar_overlaps": [ + 0.11959394856636371, + 0.11858459466759336, + 0.11585783085992414 + ], + "Complex_smallest_replica_mixing": [ + 0.03611971104231166, + 0.03538928210313448, + 0.03766430738119313 + ], + "Solvent_smallest_mbar_overlaps": [ + 0.1229722270415005, + 0.12707488500063863, + 0.12234808924055462 + ], + "Solvent_smallest_replica_mixing": [ + 0.045881126173096975, + 0.050050556117290194, + 0.03766430738119313 + ] + }, + { + "ligand_a": "2s", + "ligand_b": "2r", + "system_group": "jacs_set", + "system_name": "p38", + "repeats": 3, + "DDG": { + "magnitude": 0.2898092332319564, + "unit": "kilocalories_per_mole", + ":is_custom:": true, + "pint_unit_registry": "openff_units" + }, + "DDG_uncertainty": { + "magnitude": 0.5347619226916517, + "unit": "kilocalories_per_mole", + ":is_custom:": true, + "pint_unit_registry": "openff_units" + }, + "DGs_complex": [ + 35.695153893081745, + 36.19157695645586, + 36.78508846738147 + ], + "DGs_solvent": [ + 35.79705688680006, + 35.660485069673626, + 36.34484966074954 + ], + "Complex_smallest_mbar_overlaps": [ + 0.08876184109653515, + 0.1139108526784562, + 0.10997099117235347 + ], + "Complex_smallest_replica_mixing": [ + 0.01870576339737108, + 0.03387259858442872, + 0.024014155712841254 + ], + "Solvent_smallest_mbar_overlaps": [ + 0.1333006533934039, + 0.12008372800931835, + 0.14256896484602713 + ], + "Solvent_smallest_replica_mixing": [ + 0.04929221435793731, + 0.03575076608784474, + 0.04802831142568251 + ] + }, + { + "ligand_a": "2aa", + "ligand_b": "3fly", + "system_group": "jacs_set", + "system_name": "p38", + "repeats": 3, + "DDG": { + "magnitude": -0.8585962397199935, + "unit": "kilocalories_per_mole", + ":is_custom:": true, + "pint_unit_registry": "openff_units" + }, + "DDG_uncertainty": { + "magnitude": 0.8033756463612287, + "unit": "kilocalories_per_mole", + ":is_custom:": true, + "pint_unit_registry": "openff_units" + }, + "DGs_complex": [ + 50.12916015324384, + 49.59127503158945, + 48.57048936590872 + ], + "DGs_solvent": [ + 50.05723640253965, + 49.856040256697135, + 50.953436610665214 + ], + "Complex_smallest_mbar_overlaps": [ + 0.09997391564086458, + 0.12997867078283395, + 0.08373472862433128 + ], + "Complex_smallest_replica_mixing": [ + 0.020980788675429726, + 0.04044489383215369, + 0.020728008088978768 + ], + "Solvent_smallest_mbar_overlaps": [ + 0.08880821030479624, + 0.12038653305140658, + 0.08782577228758608 + ], + "Solvent_smallest_replica_mixing": [ + 0.016683518705763397, + 0.05055611729019211, + 0.016683518705763397 + ] + }, + { + "ligand_a": "3flw", + "ligand_b": "3fly", + "system_group": "jacs_set", + "system_name": "p38", + "repeats": 3, + "DDG": { + "magnitude": 1.5529606809341203, + "unit": "kilocalories_per_mole", + ":is_custom:": true, + "pint_unit_registry": "openff_units" + }, + "DDG_uncertainty": { + "magnitude": 0.45752269525274764, + "unit": "kilocalories_per_mole", + ":is_custom:": true, + "pint_unit_registry": "openff_units" + }, + "DGs_complex": [ + -30.377087708939655, + -29.436863252608635, + -30.22507350753656 + ], + "DGs_solvent": [ + -31.297764997800158, + -31.62722668783109, + -31.77291482625596 + ], + "Complex_smallest_mbar_overlaps": [ + 0.11012561275594637, + 0.14355062938919508, + 0.12334069966280345 + ], + "Complex_smallest_replica_mixing": [ + 0.040091930541368746, + 0.05813953488372093, + 0.05131445904954499 + ], + "Solvent_smallest_mbar_overlaps": [ + 0.11730947093440383, + 0.10438599980853329, + 0.11445685486524394 + ], + "Solvent_smallest_replica_mixing": [ + 0.04698672114402452, + 0.03640040444893832, + 0.035136501516683516 + ] + }, + { + "ligand_a": "2k", + "ligand_b": "3fln", + "system_group": "jacs_set", + "system_name": "p38", + "repeats": 3, + "DDG": { + "magnitude": -1.2155262405028182, + "unit": "kilocalories_per_mole", + ":is_custom:": true, + "pint_unit_registry": "openff_units" + }, + "DDG_uncertainty": { + "magnitude": 0.05571817961839361, + "unit": "kilocalories_per_mole", + ":is_custom:": true, + "pint_unit_registry": "openff_units" + }, + "DGs_complex": [ + -33.835159947371984, + -33.794365433499316, + -33.91319778043779 + ], + "DGs_solvent": [ + -32.59875433289383, + -32.66209447066243, + -32.63529563624438 + ], + "Complex_smallest_mbar_overlaps": [ + 0.11918982323298731, + 0.12380305733105947, + 0.12043181959557284 + ], + "Complex_smallest_replica_mixing": [ + 0.08245521601685986, + 0.07686414708886619, + 0.08670374115267948 + ], + "Solvent_smallest_mbar_overlaps": [ + 0.11654397510808602, + 0.11618587369413037, + 0.11646164615589488 + ], + "Solvent_smallest_replica_mixing": [ + 0.08784473953013279, + 0.07684529828109202, + 0.09045881126173097 + ] + }, + { + "ligand_a": "3fln", + "ligand_b": "2e", + "system_group": "jacs_set", + "system_name": "p38", + "repeats": 3, + "DDG": { + "magnitude": 1.11292214185462, + "unit": "kilocalories_per_mole", + ":is_custom:": true, + "pint_unit_registry": "openff_units" + }, + "DDG_uncertainty": { + "magnitude": 0.08559294699837033, + "unit": "kilocalories_per_mole", + ":is_custom:": true, + "pint_unit_registry": "openff_units" + }, + "DGs_complex": [ + 2.2458826877997855, + 2.3089112575336435, + 2.433991614598979 + ], + "DGs_solvent": [ + 1.1702830267024562, + 1.254293474181807, + 1.2254426334842845 + ], + "Complex_smallest_mbar_overlaps": [ + 0.08833007640108602, + 0.0885744750192168, + 0.08878137885684231 + ], + "Complex_smallest_replica_mixing": [ + 0.08719704952581665, + 0.09175935288169869, + 0.08796764408493428 + ], + "Solvent_smallest_mbar_overlaps": [ + 0.1065055284277504, + 0.10760624748550335, + 0.10706690168326397 + ], + "Solvent_smallest_replica_mixing": [ + 0.10111223458038422, + 0.11071789686552073, + 0.10566228513650151 + ] + }, + { + "ligand_a": "2u", + "ligand_b": "2k", + "system_group": "jacs_set", + "system_name": "p38", + "repeats": 3, + "DDG": { + "magnitude": 3.1146459148287207, + "unit": "kilocalories_per_mole", + ":is_custom:": true, + "pint_unit_registry": "openff_units" + }, + "DDG_uncertainty": { + "magnitude": 0.502192790993695, + "unit": "kilocalories_per_mole", + ":is_custom:": true, + "pint_unit_registry": "openff_units" + }, + "DGs_complex": [ + -40.13239071990043, + -40.62478604191973, + -41.1894139642321 + ], + "DGs_solvent": [ + -43.44518020617255, + -43.772551120221635, + -44.072797144144225 + ], + "Complex_smallest_mbar_overlaps": [ + 0.005719198797247576, + 0.008464710237366582, + 0.009128714267458364 + ], + "Complex_smallest_replica_mixing": [ + 0.0, + 0.0, + 0.0 + ], + "Solvent_smallest_mbar_overlaps": [ + 0.006283259434764158, + 0.007285671013638708, + 0.005224925304881056 + ], + "Solvent_smallest_replica_mixing": [ + 0.0, + 0.0005055611729019212, + 0.0005107252298263534 + ] + }, + { + "ligand_a": "2j", + "ligand_b": "2k", + "system_group": "jacs_set", + "system_name": "p38", + "repeats": 3, + "DDG": { + "magnitude": 0.9434972459600459, + "unit": "kilocalories_per_mole", + ":is_custom:": true, + "pint_unit_registry": "openff_units" + }, + "DDG_uncertainty": { + "magnitude": 0.21794995130944414, + "unit": "kilocalories_per_mole", + ":is_custom:": true, + "pint_unit_registry": "openff_units" + }, + "DGs_complex": [ + 82.874077621677, + 82.85525510600634, + 82.46503371149598 + ], + "DGs_solvent": [ + 81.8187872305234, + 81.90375524752986, + 81.64133222324598 + ], + "Complex_smallest_mbar_overlaps": [ + 0.1589788594726974, + 0.16134309035122865, + 0.15303654433640412 + ], + "Complex_smallest_replica_mixing": [ + 0.09049544994944388, + 0.07103134479271991, + 0.0659757330637007 + ], + "Solvent_smallest_mbar_overlaps": [ + 0.16331142623057543, + 0.1642617899067399, + 0.16070212413935084 + ], + "Solvent_smallest_replica_mixing": [ + 0.067926455566905, + 0.0659541188738269, + 0.08063700707785643 + ] + }, + { + "ligand_a": "2q", + "ligand_b": "2k", + "system_group": "jacs_set", + "system_name": "p38", + "repeats": 3, + "DDG": { + "magnitude": 3.302033353626129, + "unit": "kilocalories_per_mole", + ":is_custom:": true, + "pint_unit_registry": "openff_units" + }, + "DDG_uncertainty": { + "magnitude": 0.22889522664628187, + "unit": "kilocalories_per_mole", + ":is_custom:": true, + "pint_unit_registry": "openff_units" + }, + "DGs_complex": [ + 59.14486730704456, + 59.17798593820995, + 59.474593547094514 + ], + "DGs_solvent": [ + 56.19951432823593, + 55.78309152230953, + 55.9087408809252 + ], + "Complex_smallest_mbar_overlaps": [ + 0.0182879716410348, + 0.020619218335067437, + 0.022748249415143775 + ], + "Complex_smallest_replica_mixing": [ + 0.0, + 0.0025278058645096056, + 0.0015166835187057635 + ], + "Solvent_smallest_mbar_overlaps": [ + 0.0162873079171063, + 0.011066463807387825, + 0.014405531808085056 + ], + "Solvent_smallest_replica_mixing": [ + 0.001053740779768177, + 0.0005055611729019212, + 0.0020222446916076846 + ] + }, + { + "ligand_a": "2g", + "ligand_b": "2c", + "system_group": "jacs_set", + "system_name": "p38", + "repeats": 3, + "DDG": { + "magnitude": -0.991933070834403, + "unit": "kilocalories_per_mole", + ":is_custom:": true, + "pint_unit_registry": "openff_units" + }, + "DDG_uncertainty": { + "magnitude": 0.19493385259693377, + "unit": "kilocalories_per_mole", + ":is_custom:": true, + "pint_unit_registry": "openff_units" + }, + "DGs_complex": [ + 0.8347104293437511, + 0.5678376657292645, + 0.39402904797113963 + ], + "DGs_solvent": [ + 1.596487881781548, + 1.675709346506592, + 1.5001791272592246 + ], + "Complex_smallest_mbar_overlaps": [ + 0.08261222546481212, + 0.0743940971583477, + 0.08001043318424017 + ], + "Complex_smallest_replica_mixing": [ + 0.021233569261880688, + 0.019716885743174924, + 0.01744186046511628 + ], + "Solvent_smallest_mbar_overlaps": [ + 0.061116778749775384, + 0.06070230669329578, + 0.05852224491627549 + ], + "Solvent_smallest_replica_mixing": [ + 0.014661274014155713, + 0.01251276813074566, + 0.014408493427704751 + ] + }, + { + "ligand_a": "2j", + "ligand_b": "2t", + "system_group": "jacs_set", + "system_name": "p38", + "repeats": 3, + "DDG": { + "magnitude": -2.5816132208357345, + "unit": "kilocalories_per_mole", + ":is_custom:": true, + "pint_unit_registry": "openff_units" + }, + "DDG_uncertainty": { + "magnitude": 0.2289196627592395, + "unit": "kilocalories_per_mole", + ":is_custom:": true, + "pint_unit_registry": "openff_units" + }, + "DGs_complex": [ + 19.162270427421415, + 18.884163365384122, + 19.424438796606967 + ], + "DGs_solvent": [ + 21.77989369161034, + 21.65210598728651, + 21.783712573022864 + ], + "Complex_smallest_mbar_overlaps": [ + 0.17537179017179927, + 0.1768463158962125, + 0.17899106432661108 + ], + "Complex_smallest_replica_mixing": [ + 0.14838220424671386, + 0.14585439838220424, + 0.14632277834525026 + ], + "Solvent_smallest_mbar_overlaps": [ + 0.1795891531240771, + 0.18178630088805764, + 0.18267107070929928 + ], + "Solvent_smallest_replica_mixing": [ + 0.12462082912032356, + 0.12386248736097068, + 0.12967644084934277 + ] + }, + { + "ligand_a": "2bb", + "ligand_b": "2v", + "system_group": "jacs_set", + "system_name": "p38", + "repeats": 3, + "DDG": { + "magnitude": 1.294862321491678, + "unit": "kilocalories_per_mole", + ":is_custom:": true, + "pint_unit_registry": "openff_units" + }, + "DDG_uncertainty": { + "magnitude": 0.6117244609725263, + "unit": "kilocalories_per_mole", + ":is_custom:": true, + "pint_unit_registry": "openff_units" + }, + "DGs_complex": [ + -145.68783975893388, + -146.95050158113366, + -145.84054069126964 + ], + "DGs_solvent": [ + -147.22836016180128, + -147.78668536371592, + -147.3484234702951 + ], + "Complex_smallest_mbar_overlaps": [ + 0.03647662207854935, + 0.033876682183452377, + 0.03696355433096745 + ], + "Complex_smallest_replica_mixing": [ + 0.00281214848143982, + 0.005308392315470172, + 0.003033367037411527 + ], + "Solvent_smallest_mbar_overlaps": [ + 0.03756058615424141, + 0.0404006621468552, + 0.04667265002229325 + ], + "Solvent_smallest_replica_mixing": [ + 0.004044489383215369, + 0.0020222446916076846, + 0.00884732052578362 + ] + }, + { + "ligand_a": "2l", + "ligand_b": "3fln", + "system_group": "jacs_set", + "system_name": "p38", + "repeats": 3, + "DDG": { + "magnitude": 1.3494651289803485, + "unit": "kilocalories_per_mole", + ":is_custom:": true, + "pint_unit_registry": "openff_units" + }, + "DDG_uncertainty": { + "magnitude": 0.07853412354051971, + "unit": "kilocalories_per_mole", + ":is_custom:": true, + "pint_unit_registry": "openff_units" + }, + "DGs_complex": [ + -22.34257710039029, + -22.51253067319933, + -22.431607936745575 + ], + "DGs_solvent": [ + -23.830305309565308, + -23.7510059753539, + -23.753799812357048 + ], + "Complex_smallest_mbar_overlaps": [ + 0.1072562952018195, + 0.1060597313394167, + 0.10677830305215166 + ], + "Complex_smallest_replica_mixing": [ + 0.10214504596527069, + 0.10743174924165824, + 0.09934277047522751 + ], + "Solvent_smallest_mbar_overlaps": [ + 0.10360869650912934, + 0.10358942345571326, + 0.10367377465378276 + ], + "Solvent_smallest_replica_mixing": [ + 0.09525025536261492, + 0.09403437815975733, + 0.10342185903983657 + ] + }, + { + "ligand_a": "2j", + "ligand_b": "2ff", + "system_group": "jacs_set", + "system_name": "p38", + "repeats": 3, + "DDG": { + "magnitude": -0.89255913242296, + "unit": "kilocalories_per_mole", + ":is_custom:": true, + "pint_unit_registry": "openff_units" + }, + "DDG_uncertainty": { + "magnitude": 0.123001801406735, + "unit": "kilocalories_per_mole", + ":is_custom:": true, + "pint_unit_registry": "openff_units" + }, + "DGs_complex": [ + 4.16678946485973, + 3.88543458031491, + 4.007052439077462 + ], + "DGs_solvent": [ + 4.942542112076134, + 4.943007721092815, + 4.851404048352033 + ], + "Complex_smallest_mbar_overlaps": [ + 0.14691349010242769, + 0.15980248573305475, + 0.15487107204964642 + ], + "Complex_smallest_replica_mixing": [ + 0.0429726996966633, + 0.0669050051072523, + 0.07379979570990806 + ], + "Solvent_smallest_mbar_overlaps": [ + 0.17276710725184072, + 0.17179888338803603, + 0.17359289141258769 + ], + "Solvent_smallest_replica_mixing": [ + 0.07962588473205258, + 0.0801314459049545, + 0.08796764408493428 + ] + }, + { + "ligand_a": "2v", + "ligand_b": "3fly", + "system_group": "jacs_set", + "system_name": "p38", + "repeats": 3, + "DDG": { + "magnitude": -2.082288017056463, + "unit": "kilocalories_per_mole", + ":is_custom:": true, + "pint_unit_registry": "openff_units" + }, + "DDG_uncertainty": { + "magnitude": 0.08227902264320527, + "unit": "kilocalories_per_mole", + ":is_custom:": true, + "pint_unit_registry": "openff_units" + }, + "DGs_complex": [ + 44.27073908032549, + 44.19680159346747, + 44.09362697932192 + ], + "DGs_solvent": [ + 46.26446627075027, + 46.31893676935105, + 46.22462866418295 + ], + "Complex_smallest_mbar_overlaps": [ + 0.10717043052220175, + 0.11202763536309211, + 0.11038523771920304 + ], + "Complex_smallest_replica_mixing": [ + 0.08417593528816987, + 0.08215369059656219, + 0.0884732052578362 + ], + "Solvent_smallest_mbar_overlaps": [ + 0.10136315945686432, + 0.10167389766919438, + 0.10319513039958948 + ], + "Solvent_smallest_replica_mixing": [ + 0.08063700707785643, + 0.08063700707785643, + 0.07836198179979778 + ] + }, + { + "ligand_a": "2v", + "ligand_b": "2z", + "system_group": "jacs_set", + "system_name": "p38", + "repeats": 3, + "DDG": { + "magnitude": -1.4087645570897696, + "unit": "kilocalories_per_mole", + ":is_custom:": true, + "pint_unit_registry": "openff_units" + }, + "DDG_uncertainty": { + "magnitude": 0.36145131399991265, + "unit": "kilocalories_per_mole", + ":is_custom:": true, + "pint_unit_registry": "openff_units" + }, + "DGs_complex": [ + -9.892874241458605, + -9.979276301080583, + -9.458539529173535 + ], + "DGs_solvent": [ + -8.055561687373942, + -8.312720462090859, + -8.736114250978613 + ], + "Complex_smallest_mbar_overlaps": [ + 0.08279378037904597, + 0.08272448192906283, + 0.08427059688456004 + ], + "Complex_smallest_replica_mixing": [ + 0.03317811408614668, + 0.037278415015641296, + 0.033367037411526794 + ], + "Solvent_smallest_mbar_overlaps": [ + 0.10042413125744344, + 0.09674523521933583, + 0.09331560533491669 + ], + "Solvent_smallest_replica_mixing": [ + 0.027805864509605663, + 0.024519716885743174, + 0.03070175438596491 + ] + }, + { + "ligand_a": "2j", + "ligand_b": "2v", + "system_group": "jacs_set", + "system_name": "p38", + "repeats": 3, + "DDG": { + "magnitude": 1.8120211456761055, + "unit": "kilocalories_per_mole", + ":is_custom:": true, + "pint_unit_registry": "openff_units" + }, + "DDG_uncertainty": { + "magnitude": 0.49570894943721877, + "unit": "kilocalories_per_mole", + ":is_custom:": true, + "pint_unit_registry": "openff_units" + }, + "DGs_complex": [ + -16.225430552872243, + -15.171875383610205, + -16.216428791044336 + ], + "DGs_solvent": [ + -17.73097394082993, + -17.664457321835656, + -17.654366901889517 + ], + "Complex_smallest_mbar_overlaps": [ + 0.08820837926380458, + 0.09370609170991916, + 0.0832907512519269 + ], + "Complex_smallest_replica_mixing": [ + 0.02578361981799798, + 0.02561837455830389, + 0.025025278058645097 + ], + "Solvent_smallest_mbar_overlaps": [ + 0.08998308836114768, + 0.09130979136294703, + 0.09466409876563482 + ], + "Solvent_smallest_replica_mixing": [ + 0.026036400404448937, + 0.016511867905056758, + 0.025530839231547017 + ] + }, + { + "ligand_a": "2g", + "ligand_b": "2i", + "system_group": "jacs_set", + "system_name": "p38", + "repeats": 3, + "DDG": { + "magnitude": 0.7656255644889072, + "unit": "kilocalories_per_mole", + ":is_custom:": true, + "pint_unit_registry": "openff_units" + }, + "DDG_uncertainty": { + "magnitude": 0.08969102932576853, + "unit": "kilocalories_per_mole", + ":is_custom:": true, + "pint_unit_registry": "openff_units" + }, + "DGs_complex": [ + -47.482797498508496, + -47.678458021282104, + -47.5502812168207 + ], + "DGs_solvent": [ + -48.326858196837584, + -48.29468632935921, + -48.3868689038812 + ], + "Complex_smallest_mbar_overlaps": [ + 0.16282462615228827, + 0.16244206433972475, + 0.16355033844778555 + ], + "Complex_smallest_replica_mixing": [ + 0.1435793731041456, + 0.15419615773508594, + 0.15874620829120323 + ], + "Solvent_smallest_mbar_overlaps": [ + 0.17764719690889713, + 0.17902068001928023, + 0.1799486922495435 + ], + "Solvent_smallest_replica_mixing": [ + 0.17315470171890798, + 0.17696629213483145, + 0.16615067079463364 + ] + }, + { + "ligand_a": "2g", + "ligand_b": "2h", + "system_group": "jacs_set", + "system_name": "p38", + "repeats": 3, + "DDG": { + "magnitude": 1.1319388232797394, + "unit": "kilocalories_per_mole", + ":is_custom:": true, + "pint_unit_registry": "openff_units" + }, + "DDG_uncertainty": { + "magnitude": 0.04436919887828085, + "unit": "kilocalories_per_mole", + ":is_custom:": true, + "pint_unit_registry": "openff_units" + }, + "DGs_complex": [ + -34.148206488067615, + -34.13969691597984, + -34.0822355885741 + ], + "DGs_solvent": [ + -35.24757366817628, + -35.21893890812331, + -35.29944288616117 + ], + "Complex_smallest_mbar_overlaps": [ + 0.19842712300230597, + 0.20051192289840367, + 0.20032215152508231 + ], + "Complex_smallest_replica_mixing": [ + 0.0922649140546006, + 0.09529828109201213, + 0.089737108190091 + ], + "Solvent_smallest_mbar_overlaps": [ + 0.20104792877645017, + 0.19929432478488607, + 0.1949387655899394 + ], + "Solvent_smallest_replica_mixing": [ + 0.09175935288169869, + 0.10540950455005056, + 0.10869565217391304 + ] + }, + { + "ligand_a": "3fmh", + "ligand_b": "3fly", + "system_group": "jacs_set", + "system_name": "p38", + "repeats": 3, + "DDG": { + "magnitude": -0.17019125254089218, + "unit": "kilocalories_per_mole", + ":is_custom:": true, + "pint_unit_registry": "openff_units" + }, + "DDG_uncertainty": { + "magnitude": 1.000789886472866, + "unit": "kilocalories_per_mole", + ":is_custom:": true, + "pint_unit_registry": "openff_units" + }, + "DGs_complex": [ + -23.78037157820324, + -22.75466082653179, + -25.070688580680823 + ], + "DGs_solvent": [ + -23.2750956292334, + -23.764557670375837, + -24.055493928183935 + ], + "Complex_smallest_mbar_overlaps": [ + 0.12261051647875143, + 0.13823343532843832, + 0.11204538070718037 + ], + "Complex_smallest_replica_mixing": [ + 0.04505005561735261, + 0.053842264914054604, + 0.038097886540600666 + ], + "Solvent_smallest_mbar_overlaps": [ + 0.12232294802863337, + 0.12119490016137932, + 0.12083691690922534 + ], + "Solvent_smallest_replica_mixing": [ + 0.047522750252780584, + 0.03993933265925177, + 0.05232558139534884 + ] + }, + { + "ligand_a": "2q", + "ligand_b": "2ee", + "system_group": "jacs_set", + "system_name": "p38", + "repeats": 3, + "DDG": { + "magnitude": 1.4510333064765462, + "unit": "kilocalories_per_mole", + ":is_custom:": true, + "pint_unit_registry": "openff_units" + }, + "DDG_uncertainty": { + "magnitude": 0.3870642543085633, + "unit": "kilocalories_per_mole", + ":is_custom:": true, + "pint_unit_registry": "openff_units" + }, + "DGs_complex": [ + 29.780384886255423, + 30.200221877736652, + 29.257748982184292 + ], + "DGs_solvent": [ + 28.286620040468055, + 28.257725395308263, + 28.34091039097041 + ], + "Complex_smallest_mbar_overlaps": [ + 0.04026946298556667, + 0.05743519638362556, + 0.04535769295200803 + ], + "Complex_smallest_replica_mixing": [ + 0.004044489383215369, + 0.01314459049544995, + 0.007077856420626896 + ], + "Solvent_smallest_mbar_overlaps": [ + 0.042539488458461484, + 0.03550988882260297, + 0.039589007964083794 + ], + "Solvent_smallest_replica_mixing": [ + 0.00455005055611729, + 0.0032861476238624874, + 0.004044489383215369 + ] + }, + { + "ligand_a": "2x", + "ligand_b": "3fly", + "system_group": "jacs_set", + "system_name": "p38", + "repeats": 3, + "DDG": { + "magnitude": -0.44577667307589763, + "unit": "kilocalories_per_mole", + ":is_custom:": true, + "pint_unit_registry": "openff_units" + }, + "DDG_uncertainty": { + "magnitude": 0.09022465065997028, + "unit": "kilocalories_per_mole", + ":is_custom:": true, + "pint_unit_registry": "openff_units" + }, + "DGs_complex": [ + 35.287625756730314, + 35.3798139301352, + 35.16314425781312 + ], + "DGs_solvent": [ + 35.7073051575959, + 35.7448325820688, + 35.71577622424162 + ], + "Complex_smallest_mbar_overlaps": [ + 0.16062136915337166, + 0.16200490981529853, + 0.16051179314319758 + ], + "Complex_smallest_replica_mixing": [ + 0.10869565217391304, + 0.12714863498483317, + 0.12306501547987617 + ], + "Solvent_smallest_mbar_overlaps": [ + 0.16459651249907142, + 0.16383747508686122, + 0.1635362202960148 + ], + "Solvent_smallest_replica_mixing": [ + 0.13157894736842105, + 0.1256319514661274, + 0.12765419615773507 + ] + }, + { + "ligand_a": "2o", + "ligand_b": "2n", + "system_group": "jacs_set", + "system_name": "p38", + "repeats": 3, + "DDG": { + "magnitude": 0.2136687059875304, + "unit": "kilocalories_per_mole", + ":is_custom:": true, + "pint_unit_registry": "openff_units" + }, + "DDG_uncertainty": { + "magnitude": 0.2992134951871598, + "unit": "kilocalories_per_mole", + ":is_custom:": true, + "pint_unit_registry": "openff_units" + }, + "DGs_complex": [ + -3.0712804433461542, + -2.4828457332240768, + -2.432919573718693 + ], + "DGs_solvent": [ + -2.908805535900766, + -2.945905648932593, + -2.7733406834181547 + ], + "Complex_smallest_mbar_overlaps": [ + 0.1240539560796338, + 0.1256639479017017, + 0.1388815803372485 + ], + "Complex_smallest_replica_mixing": [ + 0.03286147623862487, + 0.040697674418604654, + 0.04954499494438827 + ], + "Solvent_smallest_mbar_overlaps": [ + 0.14025550612058116, + 0.13964002697143446, + 0.14173690146363263 + ], + "Solvent_smallest_replica_mixing": [ + 0.05434782608695652, + 0.052072800808897875, + 0.05308392315470172 + ] + }, + { + "ligand_a": "3fln", + "ligand_b": "2v", + "system_group": "jacs_set", + "system_name": "p38", + "repeats": 3, + "DDG": { + "magnitude": 2.8204976323403628, + "unit": "kilocalories_per_mole", + ":is_custom:": true, + "pint_unit_registry": "openff_units" + }, + "DDG_uncertainty": { + "magnitude": 0.20007425050096114, + "unit": "kilocalories_per_mole", + ":is_custom:": true, + "pint_unit_registry": "openff_units" + }, + "DGs_complex": [ + -63.99099035424003, + -64.38939858917938, + -64.3349996948822 + ], + "DGs_solvent": [ + -66.95076853440165, + -67.04528015779668, + -67.18083284312439 + ], + "Complex_smallest_mbar_overlaps": [ + 0.10673300420643717, + 0.09652528229647978, + 0.09686749406915138 + ], + "Complex_smallest_replica_mixing": [ + 0.034378159757330634, + 0.029877425944841676, + 0.0358948432760364 + ], + "Solvent_smallest_mbar_overlaps": [ + 0.09947888708733973, + 0.10043296025938654, + 0.10514031365726278 + ], + "Solvent_smallest_replica_mixing": [ + 0.03210313447927199, + 0.03447395301327886, + 0.029575328614762385 + ] + }, + { + "ligand_a": "2f", + "ligand_b": "2e", + "system_group": "jacs_set", + "system_name": "p38", + "repeats": 3, + "DDG": { + "magnitude": -0.008244785337959026, + "unit": "kilocalories_per_mole", + ":is_custom:": true, + "pint_unit_registry": "openff_units" + }, + "DDG_uncertainty": { + "magnitude": 0.07658584575194212, + "unit": "kilocalories_per_mole", + ":is_custom:": true, + "pint_unit_registry": "openff_units" + }, + "DGs_complex": [ + 2.4503060111676715, + 2.2794497796290383, + 2.2980189602593333 + ], + "DGs_solvent": [ + 2.354264008947156, + 2.3479937955360275, + 2.350251302586736 + ], + "Complex_smallest_mbar_overlaps": [ + 0.133101856074992, + 0.12826582371072895, + 0.13152501814760761 + ], + "Complex_smallest_replica_mixing": [ + 0.13321536905965622, + 0.11931243680485339, + 0.12799791449426487 + ], + "Solvent_smallest_mbar_overlaps": [ + 0.1223557696273997, + 0.12238991777700665, + 0.12241700966578777 + ], + "Solvent_smallest_replica_mixing": [ + 0.12740141557128412, + 0.11956521739130435, + 0.12259858442871588 + ] + }, + { + "ligand_a": "3flq", + "ligand_b": "2v", + "system_group": "jacs_set", + "system_name": "p38", + "repeats": 3, + "DDG": { + "magnitude": 7.7681178560909245, + "unit": "kilocalories_per_mole", + ":is_custom:": true, + "pint_unit_registry": "openff_units" + }, + "DDG_uncertainty": { + "magnitude": 0.5506812494406506, + "unit": "kilocalories_per_mole", + ":is_custom:": true, + "pint_unit_registry": "openff_units" + }, + "DGs_complex": [ + -196.14687024484002, + -197.4562761072638, + -196.75246009505236 + ], + "DGs_solvent": [ + -204.7355378260991, + -204.48528266397082, + -204.43913952535888 + ], + "Complex_smallest_mbar_overlaps": [ + 0.03274338907505308, + 0.025550503910294, + 0.025130126140115817 + ], + "Complex_smallest_replica_mixing": [ + 0.0025278058645096056, + 0.0025278058645096056, + 0.0005107252298263534 + ], + "Solvent_smallest_mbar_overlaps": [ + 0.037813153026277004, + 0.041118665756337616, + 0.04138514579797249 + ], + "Solvent_smallest_replica_mixing": [ + 0.007077856420626896, + 0.005055611729019211, + 0.006066734074823054 + ] + }, + { + "ligand_a": "2j", + "ligand_b": "2p", + "system_group": "jacs_set", + "system_name": "p38", + "repeats": 3, + "DDG": { + "magnitude": -1.1246833925592803, + "unit": "kilocalories_per_mole", + ":is_custom:": true, + "pint_unit_registry": "openff_units" + }, + "DDG_uncertainty": { + "magnitude": 0.5742164408289936, + "unit": "kilocalories_per_mole", + ":is_custom:": true, + "pint_unit_registry": "openff_units" + }, + "DGs_complex": [ + 30.227211494724973, + 31.438234260291203, + 30.215609990329888 + ], + "DGs_solvent": [ + 31.729220468173626, + 31.738024636258114, + 31.787860818592172 + ], + "Complex_smallest_mbar_overlaps": [ + 0.05835661806578293, + 0.05495498511171304, + 0.05467981641159252 + ], + "Complex_smallest_replica_mixing": [ + 0.012133468149646108, + 0.005857294994675187, + 0.008088978766430738 + ], + "Solvent_smallest_mbar_overlaps": [ + 0.04037229328045264, + 0.04285584027388233, + 0.04107730048055153 + ], + "Solvent_smallest_replica_mixing": [ + 0.007330637007077857, + 0.007481940144478844, + 0.009521218715995648 + ] + }, + { + "ligand_a": "2j", + "ligand_b": "2ee", + "system_group": "jacs_set", + "system_name": "p38", + "repeats": 3, + "DDG": { + "magnitude": -0.8852394242798169, + "unit": "kilocalories_per_mole", + ":is_custom:": true, + "pint_unit_registry": "openff_units" + }, + "DDG_uncertainty": { + "magnitude": 0.33493540787329246, + "unit": "kilocalories_per_mole", + ":is_custom:": true, + "pint_unit_registry": "openff_units" + }, + "DGs_complex": [ + 46.97802669247554, + 47.17143696491097, + 46.44390773174861 + ], + "DGs_solvent": [ + 47.677456745694755, + 47.93535046189015, + 47.63628245438965 + ], + "Complex_smallest_mbar_overlaps": [ + 0.08246326270238208, + 0.09171727738860609, + 0.09321368800539305 + ], + "Complex_smallest_replica_mixing": [ + 0.0260577915376677, + 0.024483133841131665, + 0.028563656147986942 + ], + "Solvent_smallest_mbar_overlaps": [ + 0.10076242244654485, + 0.09972621305624987, + 0.0986291962527788 + ], + "Solvent_smallest_replica_mixing": [ + 0.03134479271991911, + 0.0275530839231547, + 0.028058645096056625 + ] + }, + { + "ligand_a": "2p", + "ligand_b": "3fln", + "system_group": "jacs_set", + "system_name": "p38", + "repeats": 3, + "DDG": { + "magnitude": 0.9352906416645936, + "unit": "kilocalories_per_mole", + ":is_custom:": true, + "pint_unit_registry": "openff_units" + }, + "DDG_uncertainty": { + "magnitude": 0.33015977633312926, + "unit": "kilocalories_per_mole", + ":is_custom:": true, + "pint_unit_registry": "openff_units" + }, + "DGs_complex": [ + 18.26315749225566, + 18.984647719763544, + 18.702625220779108 + ], + "DGs_solvent": [ + 17.510571738248274, + 17.814281450825575, + 17.81970531873069 + ], + "Complex_smallest_mbar_overlaps": [ + 0.05355479591822359, + 0.06023923090052676, + 0.06018001260231177 + ], + "Complex_smallest_replica_mixing": [ + 0.010111223458038422, + 0.011627906976744186, + 0.013902932254802831 + ], + "Solvent_smallest_mbar_overlaps": [ + 0.04763974925036743, + 0.04637671643260123, + 0.04515891955973867 + ], + "Solvent_smallest_replica_mixing": [ + 0.010616784630940344, + 0.010469867211440245, + 0.007077856420626896 + ] + }, + { + "ligand_a": "2f", + "ligand_b": "3flz", + "system_group": "jacs_set", + "system_name": "p38", + "repeats": 3, + "DDG": { + "magnitude": -0.6418389038140561, + "unit": "kilocalories_per_mole", + ":is_custom:": true, + "pint_unit_registry": "openff_units" + }, + "DDG_uncertainty": { + "magnitude": 0.03221511038750755, + "unit": "kilocalories_per_mole", + ":is_custom:": true, + "pint_unit_registry": "openff_units" + }, + "DGs_complex": [ + 0.8130363935499286, + 0.8735733403390727, + 0.8726810478075623 + ], + "DGs_solvent": [ + 1.4733083525484048, + 1.507190167607758, + 1.5043089729825692 + ], + "Complex_smallest_mbar_overlaps": [ + 0.09000095877570179, + 0.08911297467221231, + 0.08863154710267018 + ], + "Complex_smallest_replica_mixing": [ + 0.08211678832116788, + 0.09020902090209021, + 0.08872598584428716 + ], + "Solvent_smallest_mbar_overlaps": [ + 0.10739071802518425, + 0.10724155390590813, + 0.10764446083805117 + ], + "Solvent_smallest_replica_mixing": [ + 0.10288169868554095, + 0.10869565217391304, + 0.11046511627906977 + ] + }, + { + "ligand_a": "2gg", + "ligand_b": "3fln", + "system_group": "jacs_set", + "system_name": "p38", + "repeats": 3, + "DDG": { + "magnitude": -1.0070882773203014, + "unit": "kilocalories_per_mole", + ":is_custom:": true, + "pint_unit_registry": "openff_units" + }, + "DDG_uncertainty": { + "magnitude": 0.14900468612140091, + "unit": "kilocalories_per_mole", + ":is_custom:": true, + "pint_unit_registry": "openff_units" + }, + "DGs_complex": [ + -10.526953345474702, + -10.513873733358677, + -10.643582886444491 + ], + "DGs_solvent": [ + -9.74048603178427, + -9.508529296726904, + -9.41412980480579 + ], + "Complex_smallest_mbar_overlaps": [ + 0.09595243932461166, + 0.09701626387708251, + 0.09867696775959206 + ], + "Complex_smallest_replica_mixing": [ + 0.026036400404448937, + 0.02247191011235955, + 0.023735810113519093 + ], + "Solvent_smallest_mbar_overlaps": [ + 0.10570031962448566, + 0.11241271186815495, + 0.11288272828255076 + ], + "Solvent_smallest_replica_mixing": [ + 0.027300303336703743, + 0.03260869565217391, + 0.037917087967644085 + ] + }, + { + "ligand_a": "2t", + "ligand_b": "3fln", + "system_group": "jacs_set", + "system_name": "p38", + "repeats": 3, + "DDG": { + "magnitude": 1.816272439307344, + "unit": "kilocalories_per_mole", + ":is_custom:": true, + "pint_unit_registry": "openff_units" + }, + "DDG_uncertainty": { + "magnitude": 0.21978345066677363, + "unit": "kilocalories_per_mole", + ":is_custom:": true, + "pint_unit_registry": "openff_units" + }, + "DGs_complex": [ + 29.33350441670494, + 29.790235752316335, + 29.79821047801117 + ], + "DGs_solvent": [ + 27.777605675652058, + 27.854591732262122, + 27.840935921196234 + ], + "Complex_smallest_mbar_overlaps": [ + 0.14061143282678665, + 0.14019122790894534, + 0.13849995595636574 + ], + "Complex_smallest_replica_mixing": [ + 0.12308478038815117, + 0.1327098078867543, + 0.13776541961577352 + ], + "Solvent_smallest_mbar_overlaps": [ + 0.1342895251691245, + 0.1344712138313741, + 0.13145391909171364 + ], + "Solvent_smallest_replica_mixing": [ + 0.1006066734074823, + 0.11056105610561057, + 0.10692618806875633 + ] + }, + { + "ligand_a": "2h", + "ligand_b": "2i", + "system_group": "jacs_set", + "system_name": "p38", + "repeats": 3, + "DDG": { + "magnitude": -0.33865029226697274, + "unit": "kilocalories_per_mole", + ":is_custom:": true, + "pint_unit_registry": "openff_units" + }, + "DDG_uncertainty": { + "magnitude": 0.14129121831848232, + "unit": "kilocalories_per_mole", + ":is_custom:": true, + "pint_unit_registry": "openff_units" + }, + "DGs_complex": [ + -10.246794192223922, + -10.321577057085724, + -10.054435155295705 + ], + "DGs_solvent": [ + -9.923539508798676, + -9.935021496168842, + -9.748294522836911 + ], + "Complex_smallest_mbar_overlaps": [ + 0.21584241114429792, + 0.21422123124675468, + 0.2086071130859728 + ], + "Complex_smallest_replica_mixing": [ + 0.17824310520939735, + 0.1878159757330637, + 0.1710929519918284 + ], + "Solvent_smallest_mbar_overlaps": [ + 0.22584110258979875, + 0.22425358708368043, + 0.2256395620832567 + ], + "Solvent_smallest_replica_mixing": [ + 0.20045500505561173, + 0.1936299292214358, + 0.19590495449949444 + ] + }, + { + "ligand_a": "2j", + "ligand_b": "2gg", + "system_group": "jacs_set", + "system_name": "p38", + "repeats": 3, + "DDG": { + "magnitude": 0.5596926107771054, + "unit": "kilocalories_per_mole", + ":is_custom:": true, + "pint_unit_registry": "openff_units" + }, + "DDG_uncertainty": { + "magnitude": 0.3479402676176178, + "unit": "kilocalories_per_mole", + ":is_custom:": true, + "pint_unit_registry": "openff_units" + }, + "DGs_complex": [ + 59.148358749576836, + 59.97967934078981, + 59.475059945589706 + ], + "DGs_solvent": [ + 58.95599059928364, + 59.060959398808386, + 58.907070205532996 + ], + "Complex_smallest_mbar_overlaps": [ + 0.09728504695255757, + 0.09441146045145489, + 0.09650982635728102 + ], + "Complex_smallest_replica_mixing": [ + 0.022244691607684528, + 0.02578361981799798, + 0.01693629929221436 + ], + "Solvent_smallest_mbar_overlaps": [ + 0.09807516664142407, + 0.09750653064513587, + 0.09412756812443633 + ], + "Solvent_smallest_replica_mixing": [ + 0.027805864509605663, + 0.028311425682507583, + 0.02578361981799798 + ] + }, + { + "ligand_a": "2n", + "ligand_b": "2r", + "system_group": "jacs_set", + "system_name": "p38", + "repeats": 3, + "DDG": { + "magnitude": 0.2103128576225206, + "unit": "kilocalories_per_mole", + ":is_custom:": true, + "pint_unit_registry": "openff_units" + }, + "DDG_uncertainty": { + "magnitude": 0.30638245254173946, + "unit": "kilocalories_per_mole", + ":is_custom:": true, + "pint_unit_registry": "openff_units" + }, + "DGs_complex": [ + -3.8880071139613444, + -4.57631487453112, + -3.992123130685379 + ], + "DGs_solvent": [ + -4.297796174964481, + -4.390191389337642, + -4.39939612774328 + ], + "Complex_smallest_mbar_overlaps": [ + 0.14360693554321288, + 0.13576031324145255, + 0.13381343218307346 + ], + "Complex_smallest_replica_mixing": [ + 0.06340378197997776, + 0.05675954592363261, + 0.04575328614762386 + ], + "Solvent_smallest_mbar_overlaps": [ + 0.13531974184300477, + 0.13459936802590508, + 0.1333065640933696 + ], + "Solvent_smallest_replica_mixing": [ + 0.04800817160367722, + 0.04592363261093911, + 0.06268958543983821 + ] + }, + { + "ligand_a": "2n", + "ligand_b": "2s", + "system_group": "jacs_set", + "system_name": "p38", + "repeats": 3, + "DDG": { + "magnitude": 0.42395259273920516, + "unit": "kilocalories_per_mole", + ":is_custom:": true, + "pint_unit_registry": "openff_units" + }, + "DDG_uncertainty": { + "magnitude": 0.4005874404193791, + "unit": "kilocalories_per_mole", + ":is_custom:": true, + "pint_unit_registry": "openff_units" + }, + "DGs_complex": [ + -31.66364623758417, + -31.77106318509678, + -32.12542239332285 + ], + "DGs_solvent": [ + -32.687521176737924, + -31.835333911940342, + -32.30913450554315 + ], + "Complex_smallest_mbar_overlaps": [ + 0.14362609505657245, + 0.11451890326317474, + 0.14126108878227422 + ], + "Complex_smallest_replica_mixing": [ + 0.05333670374115268, + 0.036905965621840245, + 0.05055611729019211 + ], + "Solvent_smallest_mbar_overlaps": [ + 0.151422252389513, + 0.15452128444338523, + 0.15339472193056625 + ], + "Solvent_smallest_replica_mixing": [ + 0.059403437815975735, + 0.06648129423660262, + 0.05669050051072523 + ] + }, + { + "ligand_a": "2j", + "ligand_b": "2l", + "system_group": "jacs_set", + "system_name": "p38", + "repeats": 3, + "DDG": { + "magnitude": -1.8599701392773937, + "unit": "kilocalories_per_mole", + ":is_custom:": true, + "pint_unit_registry": "openff_units" + }, + "DDG_uncertainty": { + "magnitude": 0.18946081404759263, + "unit": "kilocalories_per_mole", + ":is_custom:": true, + "pint_unit_registry": "openff_units" + }, + "DGs_complex": [ + 71.61191436949022, + 71.24114239618838, + 71.62318360277035 + ], + "DGs_solvent": [ + 73.41984401289169, + 73.2621440150205, + 73.37416275836897 + ], + "Complex_smallest_mbar_overlaps": [ + 0.1678081508088681, + 0.17260007817146267, + 0.17183616752640993 + ], + "Complex_smallest_replica_mixing": [ + 0.15925176946410516, + 0.15557275541795665, + 0.15293225480283115 + ], + "Solvent_smallest_mbar_overlaps": [ + 0.183883877172252, + 0.18151891677531815, + 0.18330966459566742 + ], + "Solvent_smallest_replica_mixing": [ + 0.12082912032355915, + 0.11558307533539731, + 0.1243680485338726 + ] + }, + { + "ligand_a": "2k", + "ligand_b": "2m", + "system_group": "jacs_set", + "system_name": "p38", + "repeats": 3, + "DDG": { + "magnitude": -2.7031895648360003, + "unit": "kilocalories_per_mole", + ":is_custom:": true, + "pint_unit_registry": "openff_units" + }, + "DDG_uncertainty": { + "magnitude": 0.04995896547285099, + "unit": "kilocalories_per_mole", + ":is_custom:": true, + "pint_unit_registry": "openff_units" + }, + "DGs_complex": [ + 7.844849563289203, + 7.724585646096473, + 7.7676290383623225 + ], + "DGs_solvent": [ + 10.486534308906672, + 10.484128143411292, + 10.475970489938035 + ], + "Complex_smallest_mbar_overlaps": [ + 0.10532518577015855, + 0.10103227418093964, + 0.10214082620402833 + ], + "Complex_smallest_replica_mixing": [ + 0.03232533889468196, + 0.03083923154701719, + 0.03571428571428571 + ], + "Solvent_smallest_mbar_overlaps": [ + 0.09874069850740082, + 0.10058492324781172, + 0.10000444977057138 + ], + "Solvent_smallest_replica_mixing": [ + 0.028816986855409503, + 0.036905965621840245, + 0.03134479271991911 + ] + }, + { + "ligand_a": "2c", + "ligand_b": "3flz", + "system_group": "jacs_set", + "system_name": "p38", + "repeats": 3, + "DDG": { + "magnitude": 1.5158988319674405, + "unit": "kilocalories_per_mole", + ":is_custom:": true, + "pint_unit_registry": "openff_units" + }, + "DDG_uncertainty": { + "magnitude": 0.2850508836539041, + "unit": "kilocalories_per_mole", + ":is_custom:": true, + "pint_unit_registry": "openff_units" + }, + "DGs_complex": [ + -0.08449550525207378, + -0.23413778742540928, + -0.7039940560681466 + ], + "DGs_solvent": [ + -1.7574652826689614, + -2.0064037198855025, + -1.8064548420934867 + ], + "Complex_smallest_mbar_overlaps": [ + 0.09493899420676687, + 0.08608094086560737, + 0.08559133510256914 + ], + "Complex_smallest_replica_mixing": [ + 0.025278058645096056, + 0.021033013844515443, + 0.02571041948579161 + ], + "Solvent_smallest_mbar_overlaps": [ + 0.05988698527481441, + 0.056405348658681895, + 0.06168639121814175 + ], + "Solvent_smallest_replica_mixing": [ + 0.011627906976744186, + 0.008594539939332659, + 0.01557711950970378 + ] + }, + { + "ligand_a": "2z", + "ligand_b": "3fly", + "system_group": "jacs_set", + "system_name": "p38", + "repeats": 3, + "DDG": { + "magnitude": 1.2900713000325084, + "unit": "kilocalories_per_mole", + ":is_custom:": true, + "pint_unit_registry": "openff_units" + }, + "DDG_uncertainty": { + "magnitude": 1.0093680161189547, + "unit": "kilocalories_per_mole", + ":is_custom:": true, + "pint_unit_registry": "openff_units" + }, + "DGs_complex": [ + 22.57995565266321, + 21.21425661148975, + 21.728794840541894 + ], + "DGs_solvent": [ + 21.051181589494472, + 21.23076354056162, + 19.37084807454124 + ], + "Complex_smallest_mbar_overlaps": [ + 0.10198158347231295, + 0.11779626990385111, + 0.11409217368181884 + ], + "Complex_smallest_replica_mixing": [ + 0.06513083048919227, + 0.04120323559150657, + 0.06622851365015167 + ], + "Solvent_smallest_mbar_overlaps": [ + 0.14067634362698161, + 0.14136370797084533, + 0.13750068245600816 + ], + "Solvent_smallest_replica_mixing": [ + 0.05358948432760364, + 0.06900910010111223, + 0.08291203235591507 + ] + }, + { + "ligand_a": "2u", + "ligand_b": "3fln", + "system_group": "jacs_set", + "system_name": "p38", + "repeats": 3, + "DDG": { + "magnitude": 2.3659953512711382, + "unit": "kilocalories_per_mole", + ":is_custom:": true, + "pint_unit_registry": "openff_units" + }, + "DDG_uncertainty": { + "magnitude": 0.6625942186156518, + "unit": "kilocalories_per_mole", + ":is_custom:": true, + "pint_unit_registry": "openff_units" + }, + "DGs_complex": [ + -72.98064194558562, + -71.75648162583873, + -73.08907113647442 + ], + "DGs_solvent": [ + -74.71168104128634, + -75.3490771649008, + -74.86342255552505 + ], + "Complex_smallest_mbar_overlaps": [ + 0.021875917527799573, + 0.019245028434510935, + 0.021101414251556608 + ], + "Complex_smallest_replica_mixing": [ + 0.0010111223458038423, + 0.0017064846416382253, + 0.0020222446916076846 + ], + "Solvent_smallest_mbar_overlaps": [ + 0.021008255004268568, + 0.0168361993283318, + 0.021269924020818443 + ], + "Solvent_smallest_replica_mixing": [ + 0.0015321756894790602, + 0.0020855057351407717, + 0.0005055611729019212 + ] + }, + { + "ligand_a": "2g", + "ligand_b": "3flz", + "system_group": "jacs_set", + "system_name": "p38", + "repeats": 3, + "DDG": { + "magnitude": 0.7441684233686107, + "unit": "kilocalories_per_mole", + ":is_custom:": true, + "pint_unit_registry": "openff_units" + }, + "DDG_uncertainty": { + "magnitude": 0.05847724532123635, + "unit": "kilocalories_per_mole", + ":is_custom:": true, + "pint_unit_registry": "openff_units" + }, + "DGs_complex": [ + 0.5606329734749874, + 0.43604510962689447, + 0.4964447140208148 + ], + "DGs_solvent": [ + -0.2823843834417408, + -0.24522553999496724, + -0.21177254954642713 + ], + "Complex_smallest_mbar_overlaps": [ + 0.08701027227659307, + 0.08708465994862354, + 0.08718830124402377 + ], + "Complex_smallest_replica_mixing": [ + 0.08316481294236602, + 0.08707865168539326, + 0.09201213346814964 + ], + "Solvent_smallest_mbar_overlaps": [ + 0.10821277326600037, + 0.10844806963778332, + 0.10855362203925409 + ], + "Solvent_smallest_replica_mixing": [ + 0.10439838220424671, + 0.11274014155712841, + 0.10945399393326592 + ] + }, + { + "ligand_a": "2aa", + "ligand_b": "3flq", + "system_group": "jacs_set", + "system_name": "p38", + "repeats": 3, + "DDG": { + "magnitude": 0.6343180728372886, + "unit": "kilocalories_per_mole", + ":is_custom:": true, + "pint_unit_registry": "openff_units" + }, + "DDG_uncertainty": { + "magnitude": 0.7754860263915238, + "unit": "kilocalories_per_mole", + ":is_custom:": true, + "pint_unit_registry": "openff_units" + }, + "DGs_complex": [ + 173.50513952445394, + 172.2207822765914, + 173.87919562622804 + ], + "DGs_solvent": [ + 172.20764886468808, + 172.96719147956694, + 172.52732286450646 + ], + "Complex_smallest_mbar_overlaps": [ + 0.02540604991291027, + 0.0356571589582112, + 0.034771888478849974 + ], + "Complex_smallest_replica_mixing": [ + 0.0025278058645096056, + 0.003033367037411527, + 0.0025278058645096056 + ], + "Solvent_smallest_mbar_overlaps": [ + 0.03313563392218073, + 0.05562981550728482, + 0.047768347783859785 + ], + "Solvent_smallest_replica_mixing": [ + 0.0016501650165016502, + 0.009605662285136502, + 0.005362614913176711 + ] + }, + { + "ligand_a": "2g", + "ligand_b": "3fln", + "system_group": "jacs_set", + "system_name": "p38", + "repeats": 3, + "DDG": { + "magnitude": 0.24355532909243882, + "unit": "kilocalories_per_mole", + ":is_custom:": true, + "pint_unit_registry": "openff_units" + }, + "DDG_uncertainty": { + "magnitude": 0.020771742068728907, + "unit": "kilocalories_per_mole", + ":is_custom:": true, + "pint_unit_registry": "openff_units" + }, + "DGs_complex": [ + -0.031082372187482374, + -0.0007454682310518203, + -0.04905003295647761 + ], + "DGs_solvent": [ + -0.2721141264868519, + -0.26270084568191143, + -0.2767288884835649 + ], + "Complex_smallest_mbar_overlaps": [ + 0.09368289583299645, + 0.09499693421341417, + 0.09504351250631302 + ], + "Complex_smallest_replica_mixing": [ + 0.09482758620689655, + 0.09464902186421174, + 0.09956942949407965 + ], + "Solvent_smallest_mbar_overlaps": [ + 0.1061487788653746, + 0.10599238990094036, + 0.10615210383650002 + ], + "Solvent_smallest_replica_mixing": [ + 0.10465116279069768, + 0.10566228513650151, + 0.10717896865520728 + ] + }, + { + "ligand_a": "2ff", + "ligand_b": "3fln", + "system_group": "jacs_set", + "system_name": "p38", + "repeats": 3, + "DDG": { + "magnitude": -0.16733380944066312, + "unit": "kilocalories_per_mole", + ":is_custom:": true, + "pint_unit_registry": "openff_units" + }, + "DDG_uncertainty": { + "magnitude": 0.13268756296587103, + "unit": "kilocalories_per_mole", + ":is_custom:": true, + "pint_unit_registry": "openff_units" + }, + "DGs_complex": [ + 44.28310949928939, + 44.08978230425552, + 44.41080113619733 + ], + "DGs_solvent": [ + 44.443789361425495, + 44.43157646349521, + 44.41032854314354 + ], + "Complex_smallest_mbar_overlaps": [ + 0.15238647074547965, + 0.149532484295647, + 0.1510994350634236 + ], + "Complex_smallest_replica_mixing": [ + 0.07204246713852376, + 0.07352941176470588, + 0.06774519716885744 + ], + "Solvent_smallest_mbar_overlaps": [ + 0.16626837925929416, + 0.16501689888936769, + 0.16616632229424247 + ], + "Solvent_smallest_replica_mixing": [ + 0.09934277047522751, + 0.09494324045407637, + 0.09150657229524772 + ] + }, + { + "ligand_a": "2aa", + "ligand_b": "3flw", + "system_group": "jacs_set", + "system_name": "p38", + "repeats": 3, + "DDG": { + "magnitude": -0.946018225104055, + "unit": "kilocalories_per_mole", + ":is_custom:": true, + "pint_unit_registry": "openff_units" + }, + "DDG_uncertainty": { + "magnitude": 0.5204587957618652, + "unit": "kilocalories_per_mole", + ":is_custom:": true, + "pint_unit_registry": "openff_units" + }, + "DGs_complex": [ + 53.01838639721525, + 51.93152887439266, + 52.13408897093109 + ], + "DGs_solvent": [ + 52.99720207668792, + 53.45122340006204, + 53.47363344110124 + ], + "Complex_smallest_mbar_overlaps": [ + 0.07070548660053211, + 0.08737785725201297, + 0.052200562148462026 + ], + "Complex_smallest_replica_mixing": [ + 0.02102803738317757, + 0.021961184882533197, + 0.007916241062308479 + ], + "Solvent_smallest_mbar_overlaps": [ + 0.06533705153188253, + 0.06811759819748536, + 0.060268999776895746 + ], + "Solvent_smallest_replica_mixing": [ + 0.012639029322548028, + 0.015419615773508595, + 0.00910010111223458 + ] + }, + { + "ligand_a": "2ee", + "ligand_b": "3fln", + "system_group": "jacs_set", + "system_name": "p38", + "repeats": 3, + "DDG": { + "magnitude": 0.42221202536986135, + "unit": "kilocalories_per_mole", + ":is_custom:": true, + "pint_unit_registry": "openff_units" + }, + "DDG_uncertainty": { + "magnitude": 0.3134227877392276, + "unit": "kilocalories_per_mole", + ":is_custom:": true, + "pint_unit_registry": "openff_units" + }, + "DGs_complex": [ + 1.7066008472588112, + 2.2283377950104994, + 2.4347185180928066 + ], + "DGs_solvent": [ + 1.787630281408768, + 1.6277491698734852, + 1.6876416329702786 + ], + "Complex_smallest_mbar_overlaps": [ + 0.08911706971671998, + 0.09680813588557652, + 0.09536571855108487 + ], + "Complex_smallest_replica_mixing": [ + 0.019211324570273004, + 0.02300303336703741, + 0.02704752275025278 + ], + "Solvent_smallest_mbar_overlaps": [ + 0.11029268603292688, + 0.10988820046360007, + 0.11648113516319511 + ], + "Solvent_smallest_replica_mixing": [ + 0.03741152679474216, + 0.03488372093023256, + 0.032355915065722954 + ] + }, + { + "ligand_a": "2n", + "ligand_b": "3fln", + "system_group": "jacs_set", + "system_name": "p38", + "repeats": 3, + "DDG": { + "magnitude": 1.5375910719088495, + "unit": "kilocalories_per_mole", + ":is_custom:": true, + "pint_unit_registry": "openff_units" + }, + "DDG_uncertainty": { + "magnitude": 0.12903156579787325, + "unit": "kilocalories_per_mole", + ":is_custom:": true, + "pint_unit_registry": "openff_units" + }, + "DGs_complex": [ + -28.785797380214298, + -29.070091011211538, + -28.838709232486686 + ], + "DGs_solvent": [ + -30.392297699350312, + -30.483899498544318, + -30.431173641744447 + ], + "Complex_smallest_mbar_overlaps": [ + 0.1561638735794862, + 0.15303804457098705, + 0.15700556047939676 + ], + "Complex_smallest_replica_mixing": [ + 0.06774519716885744, + 0.07750301568154402, + 0.08315392895586653 + ], + "Solvent_smallest_mbar_overlaps": [ + 0.1443289636456939, + 0.1426164620661198, + 0.14361441153485605 + ], + "Solvent_smallest_replica_mixing": [ + 0.0704070407040704, + 0.06648129423660262, + 0.06511746680286006 + ] + }, + { + "ligand_a": "17charg", + "ligand_b": "18", + "system_group": "charge_annihilation_set", + "system_name": "irak4_s2", + "repeats": 3, + "DDG": { + "magnitude": -1.9372416484584178, + "unit": "kilocalories_per_mole", + ":is_custom:": true, + "pint_unit_registry": "openff_units" + }, + "DDG_uncertainty": { + "magnitude": 0.04856687578618938, + "unit": "kilocalories_per_mole", + ":is_custom:": true, + "pint_unit_registry": "openff_units" + }, + "DGs_complex": [ + -63.93511307268735, + -63.923172804394014, + -63.96065970051045 + ], + "DGs_solvent": [ + -61.963891650164534, + -61.976290484242064, + -62.06703849780995 + ], + "Complex_smallest_mbar_overlaps": [ + 0.19958229830280627, + 0.20035168944620024, + 0.19943791085724064 + ], + "Complex_smallest_replica_mixing": [ + 0.13398838767306834, + 0.13591635916359163, + 0.13392391870766024 + ], + "Solvent_smallest_mbar_overlaps": [ + 0.19763453924636895, + 0.1972246864525244, + 0.19787666035336005 + ], + "Solvent_smallest_replica_mixing": [ + 0.146122758272291, + 0.1402500631472594, + 0.14037635766607728 + ] + }, + { + "ligand_a": "17charg", + "ligand_b": "14", + "system_group": "charge_annihilation_set", + "system_name": "irak4_s2", + "repeats": 3, + "DDG": { + "magnitude": -1.5855575662381938, + "unit": "kilocalories_per_mole", + ":is_custom:": true, + "pint_unit_registry": "openff_units" + }, + "DDG_uncertainty": { + "magnitude": 0.10365831475160588, + "unit": "kilocalories_per_mole", + ":is_custom:": true, + "pint_unit_registry": "openff_units" + }, + "DGs_complex": [ + -103.83377139243893, + -103.7157494161017, + -103.5919299729964 + ], + "DGs_solvent": [ + -102.17281658014271, + -102.10796596263567, + -102.10399554004411 + ], + "Complex_smallest_mbar_overlaps": [ + 0.20973569947016632, + 0.20861641871862596, + 0.21113770666940232 + ], + "Complex_smallest_replica_mixing": [ + 0.15599705665930833, + 0.1707026597856292, + 0.1574614422676115 + ], + "Solvent_smallest_mbar_overlaps": [ + 0.20900382840303483, + 0.20924076403327505, + 0.20959892464022056 + ], + "Solvent_smallest_replica_mixing": [ + 0.17865429234338748, + 0.17391593508911946, + 0.18274816872947713 + ] + }, + { + "ligand_a": "6", + "ligand_b": "5charg", + "system_group": "charge_annihilation_set", + "system_name": "irak4_s2", + "repeats": 3, + "DDG": { + "magnitude": -1.7644282196229923, + "unit": "kilocalories_per_mole", + ":is_custom:": true, + "pint_unit_registry": "openff_units" + }, + "DDG_uncertainty": { + "magnitude": 0.2612321514355722, + "unit": "kilocalories_per_mole", + ":is_custom:": true, + "pint_unit_registry": "openff_units" + }, + "DGs_complex": [ + -15.191000217579752, + -14.844901100150738, + -15.365119220137847 + ], + "DGs_solvent": [ + -13.177348249866528, + -13.397197398290475, + -13.533190230842363 + ], + "Complex_smallest_mbar_overlaps": [ + 0.16094949504190872, + 0.16167317536164355, + 0.161390964464272 + ], + "Complex_smallest_replica_mixing": [ + 0.07605576055760557, + 0.07244081360453485, + 0.06473643709118893 + ], + "Solvent_smallest_mbar_overlaps": [ + 0.17018239939806348, + 0.1705214463555485, + 0.1765041282329322 + ], + "Solvent_smallest_replica_mixing": [ + 0.07299823187673656, + 0.07852768563409034, + 0.07956554685526648 + ] + }, + { + "ligand_a": "19charg", + "ligand_b": "27", + "system_group": "charge_annihilation_set", + "system_name": "irak4_s3", + "repeats": 3, + "DDG": { + "magnitude": 0.9319044745422005, + "unit": "kilocalories_per_mole", + ":is_custom:": true, + "pint_unit_registry": "openff_units" + }, + "DDG_uncertainty": { + "magnitude": 0.08955729679735826, + "unit": "kilocalories_per_mole", + ":is_custom:": true, + "pint_unit_registry": "openff_units" + }, + "DGs_complex": [ + -62.9795073505689, + -63.18245269976545, + -63.11612203391767 + ], + "DGs_solvent": [ + -63.98497407145467, + -64.05644546673825, + -64.03237596968569 + ], + "Complex_smallest_mbar_overlaps": [ + 0.22421724424089906, + 0.22318334343882593, + 0.22452215658896355 + ], + "Complex_smallest_replica_mixing": [ + 0.1728486646884273, + 0.17844677137870854, + 0.17726135889954148 + ], + "Solvent_smallest_mbar_overlaps": [ + 0.21897308933328358, + 0.21965913079418262, + 0.2200542620136834 + ], + "Solvent_smallest_replica_mixing": [ + 0.19064157615559485, + 0.18565294266228846, + 0.1898873316845287 + ] + }, + { + "ligand_a": "28charg", + "ligand_b": "27", + "system_group": "charge_annihilation_set", + "system_name": "irak4_s3", + "repeats": 3, + "DDG": { + "magnitude": 0.3594317127982123, + "unit": "kilocalories_per_mole", + ":is_custom:": true, + "pint_unit_registry": "openff_units" + }, + "DDG_uncertainty": { + "magnitude": 0.19405630055776088, + "unit": "kilocalories_per_mole", + ":is_custom:": true, + "pint_unit_registry": "openff_units" + }, + "DGs_complex": [ + -53.47035224754216, + -53.03433515315095, + -53.09362923358137 + ], + "DGs_solvent": [ + -53.58048009869361, + -53.53350368554152, + -53.562627988433974 + ], + "Complex_smallest_mbar_overlaps": [ + 0.2154101933644808, + 0.2189790109622285, + 0.21657053648920255 + ], + "Complex_smallest_replica_mixing": [ + 0.15250105977108944, + 0.15138309965896174, + 0.13813989839781166 + ], + "Solvent_smallest_mbar_overlaps": [ + 0.21509290243540863, + 0.21483008902978687, + 0.21335978741727524 + ], + "Solvent_smallest_replica_mixing": [ + 0.16304622379388733, + 0.15729982318767366, + 0.15136398080323313 + ] + }, + { + "ligand_a": "21", + "ligand_b": "27chargR", + "system_group": "charge_annihilation_set", + "system_name": "egfr", + "repeats": 3, + "DDG": { + "magnitude": 2.651682566162876, + "unit": "kilocalories_per_mole", + ":is_custom:": true, + "pint_unit_registry": "openff_units" + }, + "DDG_uncertainty": { + "magnitude": 0.15286317169811045, + "unit": "kilocalories_per_mole", + ":is_custom:": true, + "pint_unit_registry": "openff_units" + }, + "DGs_complex": [ + -93.748746155798, + -93.38558504874081, + -93.56651798083234 + ], + "DGs_solvent": [ + -96.23741915094435, + -96.25183437314833, + -96.16664335976705 + ], + "Complex_smallest_mbar_overlaps": [ + 0.20921215681999647, + 0.2072057022919036, + 0.2045842750861124 + ], + "Complex_smallest_replica_mixing": [ + 0.11737604212373848, + 0.12420304070622855, + 0.11647866601275135 + ], + "Solvent_smallest_mbar_overlaps": [ + 0.20772780272574662, + 0.20860643996709452, + 0.20844668729904775 + ], + "Solvent_smallest_replica_mixing": [ + 0.1390502652184895, + 0.137536578877361, + 0.13684011113917657 + ] + }, + { + "ligand_a": "21", + "ligand_b": "30charg", + "system_group": "charge_annihilation_set", + "system_name": "egfr", + "repeats": 3, + "DDG": { + "magnitude": 2.5429536536557436, + "unit": "kilocalories_per_mole", + ":is_custom:": true, + "pint_unit_registry": "openff_units" + }, + "DDG_uncertainty": { + "magnitude": 0.08891180568534318, + "unit": "kilocalories_per_mole", + ":is_custom:": true, + "pint_unit_registry": "openff_units" + }, + "DGs_complex": [ + -107.38693197070442, + -107.5335235811763, + -107.34603009577637 + ], + "DGs_solvent": [ + -109.99643972625063, + -109.98690672329289, + -109.91200015908082 + ], + "Complex_smallest_mbar_overlaps": [ + 0.2024093830390034, + 0.2040563628169898, + 0.20305075892741042 + ], + "Complex_smallest_replica_mixing": [ + 0.10980490245122561, + 0.10141596175064362, + 0.10997559487492374 + ], + "Solvent_smallest_mbar_overlaps": [ + 0.20527006477192627, + 0.2052806822994185, + 0.20577167663361984 + ], + "Solvent_smallest_replica_mixing": [ + 0.13317757009345793, + 0.12503157362970446, + 0.12471583733265976 + ] + } + ] +} \ No newline at end of file diff --git a/openfe_benchmarks/results/2026-03-18-openmm-840-qa-testing/submission.yaml b/openfe_benchmarks/results/2026-03-18-openmm-840-qa-testing/submission.yaml new file mode 100644 index 0000000..e5418f0 --- /dev/null +++ b/openfe_benchmarks/results/2026-03-18-openmm-840-qa-testing/submission.yaml @@ -0,0 +1,32 @@ +# REQUIRED: unique, kebab-case identifier for this submission +submission_id: 2026-03-18-openmm-840-qa-testing + +# REQUIRED: short descriptive title +title: QA runs for OpenMM 8.4.0 using some JACS and Charge changing edges + +# REQUIRED: list of contributing authors (name, affiliation; ORCID optional) +authors: + - name: Joshua Horton + affiliation: OpenFE + +# REQUIRED: publication/submission date (ISO 8601) +date: 2026-03-18 + +# REQUIRED: OpenFE version used to produce the gathered reports +openfe_version: 1.8.0 + +# Recommended but useful: force field and partial charge descriptor +forcefield: openff-2.3.0 +partial_charges: nagl_openff-gnn-am1bcc-1.0.0.pt + +# REQUIRED: long-term archive pointer (at least doi or url) +archive: + doi: + archive_provider: + +# REQUIRED: license for the submission (e.g. CC-BY-4.0) +license: CC-BY-4.0 + +# RECOMMENDED / OPTIONAL metadata +summary: "RBFE runs for the TYK2 and P38 JACS sets and Charge changing edges from the egfr, irak_s2/s3 edges charge annihilation edges." +keywords: [charge_change, rbfe, benchmark, openfe, openmm-840] \ No newline at end of file From 4015e9b9764300f0d58dd6b5f987e3a65e329922 Mon Sep 17 00:00:00 2001 From: Josh Horton Date: Wed, 18 Mar 2026 13:12:19 +0000 Subject: [PATCH 02/24] add example results generation script --- .../_example_generate_results_local.py | 177 ++++++++++++++++++ 1 file changed, 177 insertions(+) create mode 100644 openfe_benchmarks/scripts/_example_generate_results_local.py diff --git a/openfe_benchmarks/scripts/_example_generate_results_local.py b/openfe_benchmarks/scripts/_example_generate_results_local.py new file mode 100644 index 0000000..e54eaca --- /dev/null +++ b/openfe_benchmarks/scripts/_example_generate_results_local.py @@ -0,0 +1,177 @@ +import json +import click +from gufe.tokenization import JSON_HANDLER +from gufe import AlchemicalNetwork, ProteinComponent +from openff.units import unit +import pathlib +from cinnabar import FEMap +from collections import defaultdict +import numpy as np +import zstandard as zstd +from openfecli.commands.gather import _get_names, _get_type + +def _get_simulation_key(result: dict) -> tuple[tuple[str, str], str]: + lig_a_name, lig_b_name = _get_names(result) + phase = _get_type(result) + return (lig_a_name, lig_b_name), phase + + +@click.command() +@click.option( + "--results_dir", + help="The directory containing the transformation results", + multiple=True, + type=click.Path(exists=True, dir_okay=True, file_okay=False, path_type=pathlib.Path) +) +@click.option( + "--network", help="The path to the alchemical network JSON file", + type=click.Path(exists=True, dir_okay=False, file_okay=True, path_type=pathlib.Path) +) +@click.option( + "--output_dir", help="The directory to write the archive to", + type=click.Path(exists=True, dir_okay=True, file_okay=False, path_type=pathlib.Path) +) +def main(results_dir, network, output_dir): + """ + Gather all the results for the transformations in the network and write the DDG/DG to a json file with units and metadata which can be used + for down stream analysis. + """ + # load the network to make sure all transformations are present in the results + network = AlchemicalNetwork.from_json(network.as_posix()) + # make a list of transformation keys using (ligand_a.name, ligand_b.name, phase) + transformations_to_run = set() + for transformation in network.edges: + ligand_a_name = transformation.mapping.componentA.name + ligand_b_name = transformation.mapping.componentB.name + # get the phase + if transformation.stateA.contains(ProteinComponent): + phase = "complex" + else: + phase = "solvent" + transformations_to_run.add((ligand_a_name, ligand_b_name, phase)) + + + # make a key using the (lig_a.name, lig_b.name) + raw_results = defaultdict(list) + for result_dir in results_dir: + # search for the results json files + for result_file in result_dir.glob("*.json.zst"): + with open(result_file, "rb") as f: + dctx = zstd.ZstdDecompressor() + with dctx.stream_reader(f) as reader: + result = json.load(reader, cls=JSON_HANDLER.decoder) + # make a key for this result + key, phase = _get_simulation_key(result) + raw_results[key].append((phase, result)) + + # now loop over the raw results and extract the ddg/dg and metadata + gathered_results = {"DG": [], "DDG": []} + # first workout the system group and system name which should be stored on all egdes + transformation = list(network.edges)[0] + mapping_annotations = transformation.mapping.annotations + system_group = mapping_annotations["system_group"] + system_name = mapping_annotations["system_name"] + + # check that all simulations in the alchemical network have an associated result + found_results = set() + for key, results in raw_results.items(): + lig_a_name, lig_b_name = key + for phase, result in results: + key = (lig_a_name, lig_b_name, phase) + if key not in transformations_to_run: + raise ValueError(f"Found results for transformation {key} which is not in the alchemical network") + found_results.add((lig_a_name, lig_b_name, phase)) + + # now check that we have results for all transformations + missing_transformations = transformations_to_run - found_results + if missing_transformations: + raise ValueError(f"Missing results for transformations: {missing_transformations}") + + # also build a femap so we can get DGs if possible + fe_map = FEMap() + for key, results in raw_results.items(): + lig_a_name, lig_b_name = key + entry_data = { + "ligand_a": lig_a_name, + "ligand_b": lig_b_name, + "system_group": system_group, + "system_name": system_name, + # assuming this is the RBFE protocol we should have the same number of repeats for each phase + "repeats": len(results) // 2, + } + # group the results by phase + complex_results = [result for phase, result in results if phase == "complex"] + solvent_results = [result for phase, result in results if phase == "solvent"] + assert len(complex_results) == len(solvent_results), f"Found different number of complex and solvent results for {key}" + # get the estimated values for each repeat + complex_data = [result["estimate"].m_as(unit.kilocalories_per_mole) for result in complex_results] + complex_dg = np.mean(complex_data) * unit.kilocalories_per_mole + complex_dg_uncertainty = np.std(complex_data) * unit.kilocalories_per_mole + # get the solvent data + solvent_data = [result["estimate"].m_as(unit.kilocalories_per_mole) for result in solvent_results] + solvent_dg = np.mean(solvent_data) * unit.kilocalories_per_mole + solvent_dg_uncertainty = np.std(solvent_data) * unit.kilocalories_per_mole + + # get the combinded ddg and uncertainty + entry_data["DDG"] = complex_dg - solvent_dg + entry_data["DDG_uncertainty"] = np.sqrt(complex_dg_uncertainty**2 + solvent_dg_uncertainty**2) + + # add the raw values for debugging? + entry_data["DGs_complex"] = complex_data + entry_data["DGs_solvent"] = solvent_data + + # calculate the smallest off diagonal element of the mabr overlap matrix and the replica mixing matrix for each result + for phase_results, label in zip([complex_results, solvent_results], ["Complex", "Solvent"]): + mbar_overlap_elements = [] + replica_mixing_elements = [] + for phase_result in phase_results: + # get the key for this protocol result + result_key = list(phase_result["protocol_result"]["data"].keys())[0] + # calculate the smallest overlap for the mbar overlap matrix + overlap_matrix = phase_result["protocol_result"]["data"][result_key][0]["outputs"]["unit_mbar_overlap"]["matrix"] + mbar_overlap_elements.append(np.diagonal(overlap_matrix, offset=1).min()) + + # calculate the smallest off diagonal element of the replica mixing matrix + mixing_matrix = phase_result["protocol_result"]["data"][result_key][0]["outputs"]["replica_exchange_statistics"]["matrix"] + replica_mixing_elements.append(np.diagonal(mixing_matrix, offset=1).min()) + + entry_data[f"{label}_smallest_mbar_overlaps"] = mbar_overlap_elements + entry_data[f"{label}_smallest_replica_mixing"] = replica_mixing_elements + + gathered_results["DDG"].append(entry_data) + + # also add the DDG data to the FEMAP + fe_map.add_relative_calculation( + labelA=lig_a_name, + labelB=lig_b_name, + value=entry_data["DDG"], + uncertainty=entry_data["DDG_uncertainty"] + ) + + # check if the network is connected and we can calculate the DGs + if fe_map.check_weakly_connected(): + # generate the absolute values for the map centered around zero these should be shifted when comparing with experiment + fe_map.generate_absolute_values() + abs_df = fe_map.get_absolute_dataframe() + for _, row in abs_df.iterrows(): + entry_data = { + "ligand": row["label"], + "DG": row["DG (kcal/mol)"] * unit.kilocalories_per_mole, + "DG_uncertainty": row["uncertainty (kcal/mol)"] * unit.kilocalories_per_mole, + "system_group": system_group, + "system_name": system_name, + "source": row["source"], + } + gathered_results["DG"].append(entry_data) + + # write out the data to a json file + output_file = output_dir / "computaional_results.json" + with open(output_file, "w") as w: + json.dump(gathered_results, w, cls=JSON_HANDLER.encoder, indent=4) + +if __name__ == "__main__": + main() + + + + From 13f4acc0d25f29fd1eff1dcc99d0fc9467336745 Mon Sep 17 00:00:00 2001 From: Josh Horton Date: Thu, 19 Mar 2026 16:26:16 +0000 Subject: [PATCH 03/24] add results helper, add plotting example script and update submission.yaml --- openfe_benchmarks/data/_results_utils.py | 69 +++++++++++++++++++ .../submission.yaml | 26 +++++-- .../scripts/_example_plot_rbfe.py | 41 +++++++++++ 3 files changed, 130 insertions(+), 6 deletions(-) create mode 100644 openfe_benchmarks/data/_results_utils.py create mode 100644 openfe_benchmarks/scripts/_example_plot_rbfe.py diff --git a/openfe_benchmarks/data/_results_utils.py b/openfe_benchmarks/data/_results_utils.py new file mode 100644 index 0000000..9c31dfb --- /dev/null +++ b/openfe_benchmarks/data/_results_utils.py @@ -0,0 +1,69 @@ +from cinnabar import FEMap +from openfe_benchmarks.data._benchmark_systems import get_benchmark_data_system +from collections import defaultdict +import json +from gufe.tokenization import JSON_HANDLER +from openff.units import unit + +def build_femap_from_relative_results(results: list[dict]) -> dict[tuple[str, str], FEMap]: + """ + Build FEMaps for each of the unique combinations of system_group and system_name in the DDG results and add experimental data + for each of the ligands present in the DDG results. + + Parameters + ---------- + results: list[dict] + A list of relative binding free energy estimates which should include at least the following entries: + - ligand_a: str + - ligand_b: str + - system_group: str + - system_name: str + - DDG: Quantity + - DDG_uncertainty: Quantity + + Returns + ------- + dict[tuple[str, str], FEMap] + A dictionary mapping each unique combination of system_group and system_name to an FEMap with calculated and experimental reference data. + """ + # get the unique combinations of system_group and system_name + results_by_system_key = defaultdict(list) + for result in results: + key = (result["system_group"], result["system_name"]) + results_by_system_key[key].append(result) + + femaps_by_system_key = {} + unique_ligands = set() + for system_key, system_results in results_by_system_key.items(): + system_group, system_name = system_key + benchmark_data = get_benchmark_data_system(system_group, system_name) + femap = FEMap() + for result in system_results: + ligand_a = result["ligand_a"] + ligand_b = result["ligand_b"] + # record the ligands added to the femap + unique_ligands.update([ligand_a, ligand_b]) + ddg = result["DDG"] + ddg_uncertainty = result["DDG_uncertainty"] + femap.add_relative_calculation( + labelA=ligand_a, + labelB=ligand_b, + value=ddg, + uncertainty=ddg_uncertainty, + ) + + # add experimental data for each of the ligands in the results + experimental_file = benchmark_data.reference_data["experimental_binding_data"] + experimental_data = json.load(open(experimental_file), cls=JSON_HANDLER.decoder) + + for ligand in unique_ligands: + exp_data = experimental_data.get(ligand, None) + if exp_data is not None: + femap.add_experimental_measurement( + label=ligand, + value=exp_data["dg"], + uncertainty=exp_data.get("uncertainty", 0 * unit.kilocalorie_per_mole), + ) + + femaps_by_system_key[system_key] = femap + return femaps_by_system_key \ No newline at end of file diff --git a/openfe_benchmarks/results/2026-03-18-openmm-840-qa-testing/submission.yaml b/openfe_benchmarks/results/2026-03-18-openmm-840-qa-testing/submission.yaml index e5418f0..e974466 100644 --- a/openfe_benchmarks/results/2026-03-18-openmm-840-qa-testing/submission.yaml +++ b/openfe_benchmarks/results/2026-03-18-openmm-840-qa-testing/submission.yaml @@ -2,22 +2,29 @@ submission_id: 2026-03-18-openmm-840-qa-testing # REQUIRED: short descriptive title -title: QA runs for OpenMM 8.4.0 using some JACS and Charge changing edges +title: QA runs for OpenMM 8.4.0 using JACS (P38 & TYK2) and Charge changing edges. +# REQUIRED: short descriptive summary (1-2 sentences) +summary: "RBFE runs for the TYK2 and P38 JACS sets and Charge changing edges from the egfr, irak_s2/s3 charge annihilation sets. Note this means the charge annihilation sets are not complete compared to what is in that system and should not be +compared to other complete runs due to the missing edges." +# REQUIRED: list of keywords (from the controlled vocabulary if possible) +keywords: [charge_change, rbfe, benchmark, openfe, openmm-840] # REQUIRED: list of contributing authors (name, affiliation; ORCID optional) authors: - name: Joshua Horton - affiliation: OpenFE # REQUIRED: publication/submission date (ISO 8601) date: 2026-03-18 # REQUIRED: OpenFE version used to produce the gathered reports openfe_version: 1.8.0 +openmm_version: 8.4.0 +openff_toolkit_version: -# Recommended but useful: force field and partial charge descriptor +# Recommended but useful: force field, partial charge and network descriptors forcefield: openff-2.3.0 partial_charges: nagl_openff-gnn-am1bcc-1.0.0.pt +network: industry_benchmarks_network # REQUIRED: long-term archive pointer (at least doi or url) archive: @@ -27,6 +34,13 @@ archive: # REQUIRED: license for the submission (e.g. CC-BY-4.0) license: CC-BY-4.0 -# RECOMMENDED / OPTIONAL metadata -summary: "RBFE runs for the TYK2 and P38 JACS sets and Charge changing edges from the egfr, irak_s2/s3 edges charge annihilation edges." -keywords: [charge_change, rbfe, benchmark, openfe, openmm-840] \ No newline at end of file +# RECOMMENDED / OPTIONAL metadata which detail the protocols used and associated settings allowing for mixed protocol networks +protocol_settings: + - # list some default settings or anything interesting which has been changed such as the number of lambda windows or the schedule + - protocol: RelativeHybridTopologyProtocol + simulation_time: 5 ns + equilibration_time: 1 ns + timestep: 4 fs + temperature: 298.15 K + pressure: 1 atm + repeats: 3 diff --git a/openfe_benchmarks/scripts/_example_plot_rbfe.py b/openfe_benchmarks/scripts/_example_plot_rbfe.py new file mode 100644 index 0000000..a800724 --- /dev/null +++ b/openfe_benchmarks/scripts/_example_plot_rbfe.py @@ -0,0 +1,41 @@ +from openfe_benchmarks.data._results_utils import build_femap_from_relative_results +import json +from gufe.tokenization import JSON_HANDLER +from cinnabar import plotting +import pathlib + +RESULTS_FILE = "../results/2026-03-18-openmm-840-qa-testing/computational_results.json" +OUTPUT_DIR = "outputs" + +def main(): + """ + An example script which can load the calculated DDG values from RBFE calculations and plot vs experimental data for each system. + + Notes: + - Does not plot the DG values + """ + + # load the results file + results = json.load(open(RESULTS_FILE), cls=JSON_HANDLER.decoder) + # check we have DDG values + if "DDG" not in results: + raise ValueError(f"Results file {RESULTS_FILE} does not contain 'DDG' values, cannot plot") + + # build FEMaps and load with experimental data + femaps_by_system = build_femap_from_relative_results(results=results["DDG"]) + + output_dir = pathlib.Path(OUTPUT_DIR) + output_dir.mkdir(parents=True, exist_ok=True) + # for each system plot the RBFE results + for (system_group, system_name), femap in femaps_by_system.items(): + leg_graph = femap.to_legacy_graph() + plotting.plot_DDGs( + graph=leg_graph, + title=f"{system_group}-{system_name}", + figsize=5, + scatter_kwargs={"s": 20, "marker": "o"}, + filename=(output_dir / f"{system_group}_{system_name}_DDG.png").as_posix() + ) + +if __name__ == "__main__": + main() \ No newline at end of file From 7102325a3f840b02edc2251517ef2c9800cc02bf Mon Sep 17 00:00:00 2001 From: jaclark5 Date: Wed, 15 Apr 2026 15:16:26 -0400 Subject: [PATCH 04/24] Add automated submission documents --- .../scripts/prepare_metadata_submission.py | 1018 +++++++++++++++++ 1 file changed, 1018 insertions(+) create mode 100644 openfe_benchmarks/scripts/prepare_metadata_submission.py diff --git a/openfe_benchmarks/scripts/prepare_metadata_submission.py b/openfe_benchmarks/scripts/prepare_metadata_submission.py new file mode 100644 index 0000000..0275070 --- /dev/null +++ b/openfe_benchmarks/scripts/prepare_metadata_submission.py @@ -0,0 +1,1018 @@ +#!/usr/bin/env python3 +"""Prepare benchmark submission artifacts from an AlchemicalArchive or AlchemicalNetwork JSON file. + +This module generates `submission.yaml` and `zenodo_description.md` from a JSON archive +exported by OpenFE/Alchemiscale. It no longer exposes a CLI; instead, call +`process_network(...)` directly from Python. + +Example: + + from pathlib import Path + from openfe_benchmarks.scripts.prepare_metadata_submission import process_network + + process_network( + input_file=Path("archive.json.bz2"), + output_dir=Path("."), + submission_id="2026-04-15-example", + keywords="openfe,alchemicalarchive", + author=["Jane Doe"], + license="CC-BY-4.0", + ) +""" + +from __future__ import annotations + +import bz2 +import json +import re +import statistics +import textwrap +from dataclasses import dataclass, field +from datetime import date +from pathlib import Path +from typing import Any + + +@dataclass +class AutoMetadata: + openfe_version: str = "" + openmm_version: str = "" + openff_toolkit_version: str = "" + forcefield: str = "" + partial_charges: str = "" + network_descriptor: str = "" + benchmark_data_set: str = "" + benchmark_system: str = "" + protocol_settings: dict[str, str] = field(default_factory=dict) + + +def _require_ref_key(ref: dict[str, Any]) -> str: + key = ref.get(":gufe-key:") + if not key: + raise KeyError(f"Expected :gufe-key: reference, got: {ref}") + return key + + +def _get_network_key( + archive_obj: dict[str, Any] | None, network_obj: dict[str, Any] | None +) -> str: + if archive_obj is not None: + network_ref = archive_obj.get("network") + if isinstance(network_ref, dict): + return _require_ref_key(network_ref) + + if network_obj is not None: + name = network_obj.get("name") + if isinstance(name, str) and name: + return name + return "NA" + + raise ValueError("Could not determine network key from input file") + + +def _open_json_file(path: Path): + if path.suffix == ".bz2": + return bz2.open(path, "rt", encoding="utf-8") + return open(path, "rt", encoding="utf-8") + + +def _load_token_table( + input_path: Path, +) -> tuple[dict[str, dict[str, Any]], dict[str, Any] | None, dict[str, Any] | None]: + with _open_json_file(input_path) as f: + token_table = json.load(f) + + if isinstance(token_table, dict): + token_table = list(token_table.items()) + + if not isinstance(token_table, list): + raise ValueError("Unsupported JSON token table format") + + by_key: dict[str, dict[str, Any]] = {} + archive_obj: dict[str, Any] | None = None + network_obj: dict[str, Any] | None = None + + for item in token_table: + if not (isinstance(item, list) and len(item) == 2): + continue + table_key, payload = item + if not isinstance(payload, dict): + continue + + gufe_key = payload.get(":gufe-key:") or table_key + if isinstance(gufe_key, str): + by_key[gufe_key] = payload + + qualname = payload.get("__qualname__") + if qualname == "AlchemicalArchive": + archive_obj = payload + elif qualname == "AlchemicalNetwork": + network_obj = payload + + if archive_obj is None and network_obj is None: + raise ValueError( + "Could not find AlchemicalArchive or AlchemicalNetwork object in token table" + ) + + return by_key, archive_obj, network_obj + + +def _transformation_refs( + archive_obj: dict[str, Any] | None, network_obj: dict[str, Any] | None +) -> list[Any]: + if archive_obj is not None: + return [ + item[0] + for item in archive_obj.get("transformation_results", []) + if isinstance(item, list) and len(item) == 2 + ] + if network_obj is not None: + edges = network_obj.get("edges") or [] + if isinstance(edges, list): + return edges + return [] + + +def _detect_mode( + by_key: dict[str, dict[str, Any]], + archive_obj: dict[str, Any] | None, + network_obj: dict[str, Any] | None, +) -> str: + names: list[str] = [] + for transformation_ref in _transformation_refs(archive_obj, network_obj)[:20]: + transformation = by_key[_require_ref_key(transformation_ref)] + name = str(transformation.get("name") or "") + if name: + names.append(name) + + if any(n.startswith("complex_") or n.startswith("solvent_") for n in names): + return "rbfe" + return "asfe" + + +def _slugify(value: str) -> str: + return re.sub(r"[^a-z0-9]+", "-", value.lower()).strip("-") + + +def _default_submission_id(network_key: str) -> str: + return f"{date.today().isoformat()}-{_slugify(network_key)}" + + +def _iter_nested_items(obj: Any) -> list[tuple[str, Any]]: + items: list[tuple[str, Any]] = [] + if isinstance(obj, dict): + for k, v in obj.items(): + items.append((str(k), v)) + items.extend(_iter_nested_items(v)) + elif isinstance(obj, list): + for v in obj: + items.extend(_iter_nested_items(v)) + return items + + +def _quantity_to_text(value: Any) -> str: + if isinstance(value, dict) and "magnitude" in value and "unit" in value: + return f"{value['magnitude']} {value['unit']}" + return str(value) + + +def _guess_network_descriptor(archive_stem: str, network_key: str) -> str: + if archive_stem.startswith(f"{network_key}-"): + return archive_stem[len(network_key) + 1 :] + if archive_stem.startswith("AlchemicalNetwork-"): + parts = archive_stem.split("-", maxsplit=2) + if len(parts) == 3: + return parts[2] + return archive_stem + + +def _infer_benchmark_data_set_system( + *, by_key: dict[str, dict[str, Any]], mode: str, archive_stem: str, network_key: str +) -> tuple[str, str]: + blob = json.dumps(list(by_key.values())).lower() + descriptor = _guess_network_descriptor(archive_stem, network_key).lower() + search_space = " ".join( + [blob, descriptor, archive_stem.lower(), network_key.lower()] + ) + + system = "" + for candidate in ("freesolv", "tyk2", "mnsol"): + if candidate in search_space: + system = candidate + break + + benchmark_set = "" + if "jacs_set" in search_space or "jacs" in search_space: + benchmark_set = "jacs_set" + elif "solvation_set" in search_space or mode == "asfe": + benchmark_set = "solvation_set" + + if not system and mode == "asfe": + system = "freesolv" + + return benchmark_set, system + + +def _extract_sim_times(settings_block: dict[str, Any]) -> tuple[str, str]: + equilibration = settings_block.get("equilibration_length") + production = settings_block.get("production_length") + return _quantity_to_text( + equilibration + ) if equilibration is not None else "", _quantity_to_text( + production + ) if production is not None else "" + + +def _build_protocol_settings( + protocol_obj: dict[str, Any] | None, mode: str +) -> dict[str, str]: + protocol_name = ( + "AbsoluteSolvationProtocol" + if mode == "asfe" + else "RelativeHybridTopologyProtocol" + ) + out: dict[str, str] = {"protocol": protocol_name} + + if not protocol_obj: + out["notes"] = "Protocol settings unavailable in archive payload." + return out + + settings = protocol_obj.get("settings") or {} + + repeats = settings.get("protocol_repeats") + if repeats is not None: + out["repeats"] = str(repeats) + + integrator_settings = settings.get("integrator_settings") or {} + if isinstance(integrator_settings, dict): + timestep = integrator_settings.get("timestep") + if timestep is not None: + out["timestep"] = _quantity_to_text(timestep) + + thermo_settings = settings.get("thermo_settings") or {} + if isinstance(thermo_settings, dict): + temperature = thermo_settings.get("temperature") + pressure = thermo_settings.get("pressure") + if temperature is not None: + out["temperature"] = _quantity_to_text(temperature) + if pressure is not None: + out["pressure"] = _quantity_to_text(pressure) + + lambda_settings = settings.get("lambda_settings") or {} + if isinstance(lambda_settings, dict): + lambda_windows = lambda_settings.get("lambda_windows") + if lambda_windows is not None: + out["lambda_windows"] = str(lambda_windows) + else: + lambda_counts: list[str] = [] + for lambda_key in ("lambda_elec", "lambda_vdw", "lambda_restraints"): + values = lambda_settings.get(lambda_key) + if isinstance(values, list): + lambda_counts.append(f"{lambda_key}:{len(values)}") + if lambda_counts: + out["lambda_schedule"] = ", ".join(lambda_counts) + + # Protocol-specific handling: RBFE typically has a single simulation block; + # ASFE commonly has separate vacuum and solvent simulation settings. + if mode == "rbfe": + sim = settings.get("simulation_settings") or {} + if isinstance(sim, dict): + eq, prod = _extract_sim_times(sim) + if eq: + out["equilibration_time"] = eq + if prod: + out["production_time"] = prod + else: + for prefix, key in ( + ("vacuum", "vacuum_simulation_settings"), + ("solvent", "solvent_simulation_settings"), + ): + sim = settings.get(key) or {} + if not isinstance(sim, dict): + continue + eq, prod = _extract_sim_times(sim) + if eq: + out[f"{prefix}_equilibration_time"] = eq + if prod: + out[f"{prefix}_production_time"] = prod + + if len(out) == 1: + out["notes"] = "Protocol class found, but detailed settings were unavailable." + + return out + + +def _render_protocol_settings_yaml(protocol_settings: dict[str, str]) -> str: + lines = ["protocol_settings:"] + protocol_name = protocol_settings.get("protocol", "") + lines.append(f" - protocol: {protocol_name}") + + preferred_order = [ + "production_time", + "equilibration_time", + "vacuum_production_time", + "vacuum_equilibration_time", + "solvent_production_time", + "solvent_equilibration_time", + "timestep", + "temperature", + "pressure", + "repeats", + "lambda_windows", + "lambda_schedule", + "notes", + ] + + for key in preferred_order: + if key in protocol_settings: + lines.append(f" {key}: {json.dumps(str(protocol_settings[key]))}") + + for key in sorted( + k for k in protocol_settings if k not in set(preferred_order) | {"protocol"} + ): + lines.append(f" {key}: {json.dumps(str(protocol_settings[key]))}") + + return "\n".join(lines) + + +def _resolve_payload( + by_key: dict[str, dict[str, Any]], ref: Any +) -> tuple[str | None, dict[str, Any] | None]: + if isinstance(ref, dict): + ref_key = ref.get(":gufe-key:") + if isinstance(ref_key, str): + return ref_key, by_key.get(ref_key) + return None, None + + +def _component_name(component: dict[str, Any], component_key: str) -> str: + molprops = component.get("molprops") or {} + if isinstance(molprops, dict): + ofe_name = molprops.get("ofe-name") + if ofe_name: + return str(ofe_name) + + if component.get("name"): + return str(component.get("name")) + + if component.get("smiles"): + return str(component.get("smiles")) + + if component.get("solvent_molecule"): + return str(component.get("solvent_molecule")) + + return component_key + + +def _component_atoms(component: dict[str, Any]) -> int: + atoms = component.get("atoms") + if isinstance(atoms, list): + return len(atoms) + return 0 + + +def _repeat_stats_summary(repeat_counts: list[int]) -> str: + if not repeat_counts: + return "repeats stats unavailable" + + mean_repeats = statistics.fmean(repeat_counts) + median_repeats = statistics.median(repeat_counts) + dist: dict[int, int] = {} + for count in repeat_counts: + dist[count] = dist.get(count, 0) + 1 + dist_text = ", ".join(f"{k}:{v}" for k, v in sorted(dist.items())) + return ( + f"min={min(repeat_counts)}, median={median_repeats:g}, mean={mean_repeats:.2f}, " + f"max={max(repeat_counts)}, distribution={{ {dist_text} }}" + ) + + +def _build_content_summary( + by_key: dict[str, dict[str, Any]], + archive_obj: dict[str, Any] | None, + network_obj: dict[str, Any] | None, + mode: str, + benchmark_data_set: str, + forcefield: str, + partial_charges: str, +) -> str: + transformation_results = ( + archive_obj.get("transformation_results", []) if archive_obj else [] + ) + repeat_counts: list[int] = [] + max_atoms_per_system = 0 + + solutes: set[str] = set() + solvents: set[str] = set() + + ligands: set[str] = set() + proteins: set[str] = set() + cofactors: set[str] = set() + systems_with_cofactors: set[str] = set() + + visited_systems_for_cofactors: set[str] = set() + + if archive_obj is not None: + transformation_refs = [ + item[0] + for item in transformation_results + if isinstance(item, list) and len(item) == 2 + ] + repeat_counts = [ + len(item[1]) + for item in transformation_results + if isinstance(item, list) and len(item) == 2 + ] + else: + transformation_refs = _transformation_refs(archive_obj, network_obj) + + transformation_count = len(transformation_refs) + + for transformation_ref in transformation_refs: + _, transformation = _resolve_payload(by_key, transformation_ref) + if not transformation: + continue + + for state_key in ("stateA", "stateB"): + cs_key, chemical_system = _resolve_payload( + by_key, transformation.get(state_key) + ) + if not chemical_system: + continue + + components = chemical_system.get("components") or {} + if not isinstance(components, dict): + continue + + system_atoms = 0 + local_cofactors: set[str] = set() + + for label, component_ref in components.items(): + comp_key, component = _resolve_payload(by_key, component_ref) + if not component: + continue + + label_l = str(label).lower() + qualname = str(component.get("__qualname__") or "") + comp_name = _component_name(component, comp_key or "unknown") + comp_atoms = _component_atoms(component) + system_atoms += comp_atoms + + if mode == "asfe": + if "solvent" in label_l or "solventcomponent" in qualname.lower(): + solvents.add(comp_name) + elif "solute" in label_l or qualname == "SmallMoleculeComponent": + solutes.add(comp_name) + else: + if "protein" in label_l or qualname == "ProteinComponent": + proteins.add(comp_name) + elif "ligand" in label_l: + ligands.add(comp_name) + elif "cofactor" in label_l: + cofactors.add(comp_name) + local_cofactors.add(comp_name) + elif ( + qualname == "SmallMoleculeComponent" + and "solvent" not in label_l + ): + # Non-solvent small molecules that are not explicit ligands are treated as cofactors. + cofactors.add(comp_name) + local_cofactors.add(comp_name) + + max_atoms_per_system = max(max_atoms_per_system, system_atoms) + + if ( + mode == "rbfe" + and cs_key + and cs_key not in visited_systems_for_cofactors + ): + visited_systems_for_cofactors.add(cs_key) + if local_cofactors: + systems_with_cofactors.add(cs_key) + + repeats_text = _repeat_stats_summary(repeat_counts) + + subject = benchmark_data_set or "benchmark" + field_info = forcefield or "an unspecified force field" + charge_info = partial_charges or "unspecified partial charges" + if mode == "rbfe": + cofactor_list = ", ".join(sorted(cofactors)) if cofactors else "none" + summary_parts = [ + f"This submission describes the {subject} RBFE benchmark prepared with {field_info} and {charge_info}.", + f"The network contains {transformation_count} transformations across {len(ligands)} unique ligands and {len(proteins)} unique proteins.", + ] + if systems_with_cofactors: + summary_parts.append( + f"{len(systems_with_cofactors)} systems include cofactors ({cofactor_list})." + ) + else: + summary_parts = [ + f"This submission describes the {subject} ASFE benchmark prepared with {field_info} and {charge_info}.", + f"The archive contains {transformation_count} transformations across {len(solutes)} unique solutes and {len(solvents)} unique solvents.", + ] + + summary_parts.append( + f"The largest simulated chemical system contains {max_atoms_per_system} atoms, and repeat counts per transformation are {repeats_text}." + ) + summary_parts.append( + "Results are derived from archived Alchemiscale workflow data." + ) + summary_text = " ".join(summary_parts) + return textwrap.fill(summary_text, width=100) + + +def _extract_auto_metadata( + *, + by_key: dict[str, dict[str, Any]], + mode: str, + archive_path: Path, + network_key: str, + archive_stem: str, +) -> AutoMetadata: + metadata = AutoMetadata() + metadata.network_descriptor = _guess_network_descriptor(archive_stem, network_key) + metadata.benchmark_data_set, metadata.benchmark_system = ( + _infer_benchmark_data_set_system( + by_key=by_key, + mode=mode, + archive_stem=archive_stem, + network_key=network_key, + ) + ) + + protocol_obj: dict[str, Any] | None = None + + for payload in by_key.values(): + qualname = payload.get("__qualname__") + if qualname in { + "RelativeHybridTopologyProtocol", + "AbsoluteSolvationProtocol", + "ASFEProtocol", + }: + protocol_obj = payload + + settings = payload.get("settings") or {} + forcefield_settings = ( + settings.get("forcefield_settings") + or settings.get("solvent_forcefield_settings") + or settings.get("vacuum_forcefield_settings") + or {} + ) + if not metadata.forcefield and isinstance(forcefield_settings, dict): + metadata.forcefield = str( + forcefield_settings.get("small_molecule_forcefield") or "" + ) + if not metadata.forcefield: + ffs = forcefield_settings.get("forcefields") + if isinstance(ffs, list) and ffs: + preferred = next( + ( + ff + for ff in ffs + if isinstance(ff, str) and "openff" in ff + ), + None, + ) + if preferred: + metadata.forcefield = str(preferred).replace(".offxml", "") + + partial_charge_settings = settings.get("partial_charge_settings") or {} + if not metadata.partial_charges and isinstance( + partial_charge_settings, dict + ): + method = partial_charge_settings.get("partial_charge_method") + nagl_model = partial_charge_settings.get("nagl_model") + if method and nagl_model: + metadata.partial_charges = f"{method} ({nagl_model})" + elif method: + metadata.partial_charges = str(method) + + for key, value in _iter_nested_items(payload): + if ( + not metadata.openmm_version + and key == "openmm_version" + and isinstance(value, str) + ): + metadata.openmm_version = value + if ( + not metadata.openfe_version + and key == "openfe_version" + and isinstance(value, str) + ): + metadata.openfe_version = value + if ( + not metadata.openff_toolkit_version + and key == "openff_toolkit_version" + and isinstance(value, str) + ): + metadata.openff_toolkit_version = value + + molprops = payload.get("molprops") + if isinstance(molprops, dict): + charge_provenance = molprops.get("charge_provenance") + if isinstance(charge_provenance, str): + try: + charge_provenance = json.loads(charge_provenance) + except json.JSONDecodeError: + charge_provenance = None + if isinstance(charge_provenance, dict): + if not metadata.openfe_version and isinstance( + charge_provenance.get("openfe_version"), str + ): + metadata.openfe_version = charge_provenance["openfe_version"] + if not metadata.openff_toolkit_version and isinstance( + charge_provenance.get("openff_toolkit_version"), str + ): + metadata.openff_toolkit_version = charge_provenance[ + "openff_toolkit_version" + ] + if not metadata.partial_charges: + charge_method = charge_provenance.get("charge_method") + nagl_model = charge_provenance.get("nagl_model") + if charge_method and nagl_model: + metadata.partial_charges = f"{charge_method} ({nagl_model})" + elif charge_method: + metadata.partial_charges = str(charge_method) + + metadata.protocol_settings = _build_protocol_settings(protocol_obj, mode) + return metadata + + +def _normalize_partial_charge_info(partial_charges: str) -> str: + value = partial_charges.strip() + if not value: + return "" + + lower = value.lower() + + # Canonical openfe-benchmarks style: nagl_.pt + model_match = re.search( + r"(openff-gnn-am1bcc-[0-9.]+\.0\.pt|openff-gnn-am1bcc-[0-9.]+\.pt)", lower + ) + if "nagl" in lower and model_match: + model = model_match.group(1) + return f"nagl_{model}" + + if lower in {"am1bcc", "am1-bcc"}: + return "am1bcc" + + normalized = re.sub(r"[^a-z0-9._-]+", "_", lower).strip("_") + return normalized + + +def _make_tags( + *, mode: str, forcefield: str, partial_charge_tag: str, user_keywords: list[str] +) -> list[str]: + tags: list[str] = [] + tags.append(mode) + if forcefield: + tags.append(forcefield) + if partial_charge_tag: + tags.append(partial_charge_tag) + tags.extend(user_keywords) + + # Deduplicate while preserving order. + out: list[str] = [] + seen: set[str] = set() + for tag in tags: + t = tag.strip() + if not t or t in seen: + continue + seen.add(t) + out.append(t) + return out + + +def _yaml_block(text: str, indent_spaces: int = 2) -> str: + indent = " " * indent_spaces + lines = text.splitlines() or [""] + return "\n".join(f"{indent}{line}" for line in lines) + + +def _make_submission_yaml( + *, + submission_id: str, + title: str, + summary: str, + tags: list[str], + authors: list[str], + openfe_version: str, + openmm_version: str, + openff_toolkit_version: str, + forcefield: str, + partial_charges: str, + network_descriptor: str, + benchmark_data_set: str, + benchmark_system: str, + archive_doi: str, + archive_provider: str, + license_name: str, + protocol_settings: dict[str, str], +) -> str: + if not authors: + authors = ["TODO: add author name"] + + tags_yaml = ", ".join(tags) + authors_yaml = "\n".join(f" - name: {name}" for name in authors) + protocol_settings_yaml = _render_protocol_settings_yaml(protocol_settings) + + return f"""# REQUIRED: unique, kebab-case identifier for this submission +submission_id: {submission_id} + +# REQUIRED: short descriptive title +title: {title} + +# REQUIRED: short descriptive summary (1-2 sentences) +summary: | +{_yaml_block(summary, 2)} + +# REQUIRED: list of submission tags +tags: [{tags_yaml}] + +# REQUIRED: list of contributing authors (name, affiliation; ORCID optional) +authors: +{authors_yaml} + +# REQUIRED: publication/submission date (ISO 8601) +date: {date.today().isoformat()} + +# REQUIRED: OpenFE/OpenMM/toolkit versions used to produce gathered reports +openfe_version: {openfe_version} +openmm_version: {openmm_version} +openff_toolkit_version: {openff_toolkit_version} + +# Recommended descriptors +forcefield: {forcefield} +partial_charges: {partial_charges} +network: {network_descriptor} + +# BenchmarkData provenance (from openfe-benchmarks planning script) +benchmark_data: + source_repository: https://github.com/OpenFreeEnergy/openfe-benchmarks + set: {benchmark_data_set} + system: {benchmark_system} + +# REQUIRED: long-term archive pointer (at least doi or url) +archive: + doi: {archive_doi} + archive_provider: {archive_provider} + +# REQUIRED: license for the submission +license: {license_name} + +# RECOMMENDED / OPTIONAL metadata for protocol settings +{protocol_settings_yaml} +""" + + +def _make_zenodo_description( + *, + title: str, + summary: str, + archive_filename: str, + network_key: str, + mode: str, + content_summary: str, + openfe_version: str, + openmm_version: str, + openff_toolkit_version: str, + forcefield: str, + partial_charges: str, + network_descriptor: str, + tags: list[str], + benchmark_data_set: str, + benchmark_system: str, + n_transformations: int, + submission_yaml_file: str, + license_name: str, + protocol_settings: dict[str, str], +) -> str: + content_kind = "ASFE" if mode == "asfe" else "RBFE" + openmm_display = openmm_version or "" + protocol_lines = ( + "\n".join( + [f"- {k}: {v}" for k, v in protocol_settings.items() if k != "protocol"] + ) + or "- notes: Protocol settings unavailable" + ) + + return f"""# {title} + +## Description + +## Overview + +{content_kind} benchmark results prepared from an AlchemicalArchive generated with OpenFE and Alchemiscale. + +{content_summary} + +## Software versions + +- openfe_version: {openfe_version} +- openmm_version: {openmm_display} +- openff_toolkit_version: {openff_toolkit_version} + +## Recommended descriptors + +- forcefield: {forcefield} +- partial_charges: {partial_charges} +- network: {network_descriptor} + +## BenchmarkData provenance + +- source_repository: https://github.com/OpenFreeEnergy/openfe-benchmarks +- set: {benchmark_data_set} +- system: {benchmark_system} + +## Protocol settings + +- protocol: {protocol_settings.get("protocol", "unknown")} +{protocol_lines} + +- archive file: {archive_filename} +- network key: {network_key} + +## Contents + +### Data files + +- {submission_yaml_file}: submission metadata +- zenodo_description.md: Zenodo metadata description + +### Network summary + +- total transformations in archive: {n_transformations} + +## Simulation details + +- generation workflow: network prepared with plan_*.py scripts from openfe-benchmarks and then archived from Alchemiscale + +## Changelog + +- Generated by prepare_archive_submission.py on {date.today().isoformat()} + +## Rights + +- License: {license_name} +""" + + +def process_network( + input_file: Path, + output_dir: Path = Path("."), + submission_id: str | None = None, + keywords: str = "openfe,alchemicalarchive", + author: list[str] | None = None, + license: str = "CC-BY-4.0", +) -> tuple[Path, Path]: + """Generate submission metadata from an archived OpenFE JSON network. + + Parameters + ---------- + input_file: + Path to the AlchemicalArchive or AlchemicalNetwork JSON file. Supported + extensions are `.json`, `.bz2`, and `.json.bz2`. + output_dir: + Directory where `submission.yaml` and `zenodo_description.md` will be + written. Defaults to the current working directory. + submission_id: + Optional identifier to use in `submission.yaml`. If omitted, a default + value is generated from the current date and network key. + keywords: + Comma-separated list of additional tags to include in the submission + metadata. The generated tag list also always includes the detected + `mode` (either ``asfe`` or ``rbfe``), the resolved forcefield string, + and normalized partial charge information. + author: + Optional list of author entries for the submission YAML. Each entry is + treated as a raw string and written to the `authors` section. + license: + License string to write into the submission metadata. + + Notes + ----- + The generated submission YAML will include placeholder values for archive + DOI and provider, which are intentionally not propagated into the Zenodo + description output. + + Returns + ------- + tuple[Path, Path] + Paths to the generated `submission.yaml` and `zenodo_description.md`. + """ + input_path = input_file.resolve() + if not input_path.exists(): + raise FileNotFoundError(f"Input file not found: {input_path}") + + out_dir = output_dir.resolve() + out_dir.mkdir(parents=True, exist_ok=True) + + by_key, archive_obj, network_obj = _load_token_table(input_path) + mode = _detect_mode(by_key, archive_obj, network_obj) + + network_key = _get_network_key(archive_obj, network_obj) + + archive_stem = input_path.name + for suffix in (".json.bz2", ".bz2", ".json"): + if archive_stem.endswith(suffix): + archive_stem = archive_stem[: -len(suffix)] + break + + auto_metadata = _extract_auto_metadata( + by_key=by_key, + mode=mode, + archive_path=input_path, + network_key=network_key, + archive_stem=archive_stem, + ) + + openfe_version = auto_metadata.openfe_version + openmm_version = auto_metadata.openmm_version + openff_toolkit_version = auto_metadata.openff_toolkit_version + forcefield = auto_metadata.forcefield + partial_charges_raw = auto_metadata.partial_charges + partial_charge_tag = _normalize_partial_charge_info(partial_charges_raw) + partial_charges = partial_charge_tag or partial_charges_raw + network_descriptor = auto_metadata.network_descriptor + + content_summary = _build_content_summary( + by_key, + archive_obj, + network_obj, + mode, + auto_metadata.benchmark_data_set, + forcefield, + partial_charges, + ) + + benchmark_data_set = auto_metadata.benchmark_data_set + benchmark_system = auto_metadata.benchmark_system + + # Requested behavior: always leave title for manual curation. + title = "TODO: add title" + + submission_yaml_filename = "submission.yaml" + zenodo_description_filename = "zenodo_description.md" + + submission_yaml_path = out_dir / submission_yaml_filename + zenodo_description_path = out_dir / zenodo_description_filename + + submission_id = submission_id or _default_submission_id(network_key) + keywords_list = [k.strip() for k in keywords.split(",") if k.strip()] + tags = _make_tags( + mode=mode, + forcefield=forcefield, + partial_charge_tag=partial_charges, + user_keywords=keywords_list, + ) + + submission_yaml_text = _make_submission_yaml( + submission_id=submission_id, + title=title, + summary=content_summary, + tags=tags, + authors=author or [], + openfe_version=openfe_version, + openmm_version=openmm_version, + openff_toolkit_version=openff_toolkit_version, + forcefield=forcefield, + partial_charges=partial_charges, + network_descriptor=network_descriptor, + benchmark_data_set=benchmark_data_set, + benchmark_system=benchmark_system, + archive_doi="TODO: add DOI", + archive_provider="TODO: add archive provider", + license_name=license, + protocol_settings=auto_metadata.protocol_settings, + ) + submission_yaml_path.write_text(submission_yaml_text) + + zenodo_description_text = _make_zenodo_description( + title=title, + summary=content_summary, + archive_filename=input_path.name, + network_key=network_key, + mode=mode, + content_summary=content_summary, + openfe_version=openfe_version, + openmm_version=openmm_version, + openff_toolkit_version=openff_toolkit_version, + forcefield=forcefield, + partial_charges=partial_charges, + network_descriptor=network_descriptor, + tags=tags, + benchmark_data_set=benchmark_data_set, + benchmark_system=benchmark_system, + n_transformations=len(_transformation_refs(archive_obj, network_obj)), + submission_yaml_file=submission_yaml_filename, + license_name=license, + protocol_settings=auto_metadata.protocol_settings, + ) + zenodo_description_path.write_text(zenodo_description_text) + + print(f"Input file: {input_path}") + print(f"Detected mode: {mode}") + print(f"Submission YAML: {submission_yaml_path}") + print(f"Zenodo description: {zenodo_description_path}") + + return submission_yaml_path, zenodo_description_path From 750b84435c7b4ca55fb71b014ad553b917273ac9 Mon Sep 17 00:00:00 2001 From: jaclark5 Date: Wed, 3 Jun 2026 11:01:18 -0400 Subject: [PATCH 05/24] Add annotations to transformations --- .../scripts/_example_plan_rbfe.py | 55 ++++++++++++++++++- 1 file changed, 52 insertions(+), 3 deletions(-) diff --git a/openfe_benchmarks/scripts/_example_plan_rbfe.py b/openfe_benchmarks/scripts/_example_plan_rbfe.py index 01d6231..ba83846 100644 --- a/openfe_benchmarks/scripts/_example_plan_rbfe.py +++ b/openfe_benchmarks/scripts/_example_plan_rbfe.py @@ -86,7 +86,13 @@ def process_components(benchmark_sys): def compile_network_transformations( - ligand_network, solvent, ligands_by_name, protein, cofactors + ligand_network, + solvent, + ligands_by_name, + protein, + cofactors, + system_group, + system_name, ): """ Compile alchemical transformations for a given network. @@ -103,6 +109,10 @@ def compile_network_transformations( The protein component for the transformations. cofactors : list or None List of cofactor components, if any. + system_group : str + The benchmark system group name (e.g., "jacs_set", "mcs_docking_set"). + system_name : str + The benchmark system name (e.g., "p38", "hne"). Returns ------- @@ -111,11 +121,20 @@ def compile_network_transformations( """ transformations = [] for edge in ligand_network.edges: + # Add system_group and system_name to the mapping annotations + mapping_annotations = dict(edge.annotations) if edge.annotations else {} + mapping_annotations.update( + { + "system_group": system_group, + "system_name": system_name, + } + ) + new_edge = openfe.LigandAtomMapping( componentA=ligands_by_name[edge.componentA.name], componentB=ligands_by_name[edge.componentB.name], componentA_to_componentB=edge.componentA_to_componentB, - annotations=edge.annotations, + annotations=mapping_annotations, ) # create the transformations for the bound and solvent legs @@ -175,7 +194,13 @@ def main(): lig_network, ligand_dict, protein, cofactors = process_components(benchmark_sys) transformations = compile_network_transformations( - lig_network, SOLVENT, ligand_dict, protein, cofactors + lig_network, + SOLVENT, + ligand_dict, + protein, + cofactors, + BENCHMARK_SET, + BENCHMARK_SYS, ) # Can be used as input for Alchemiscale @@ -297,6 +322,30 @@ def validate_rbfe_network(network_file): except Exception as e: errors.append(f"Failed to create protocol for transformation '{name}': {e}") + # Validate mapping annotations + if transformation.mapping is None: + errors.append(f"Transformation '{name}' has no mapping") + else: + mapping_annot = transformation.mapping.annotations + if not mapping_annot: + errors.append(f"Transformation '{name}' mapping has no annotations") + else: + for required_key in ["system_group", "system_name"]: + if required_key not in mapping_annot: + errors.append( + f"Transformation '{name}' mapping missing '{required_key}' annotation" + ) + elif mapping_annot[required_key] != ( + BENCHMARK_SET + if required_key == "system_group" + else BENCHMARK_SYS + ): + errors.append( + f"Transformation '{name}' mapping has incorrect '{required_key}': " + f"expected '{BENCHMARK_SET if required_key == 'system_group' else BENCHMARK_SYS}', " + f"got '{mapping_annot[required_key]}'" + ) + # Validate that we have both complex and solvent legs for each ligand pair expected_ligand_pairs = len(expected_lig_network.edges) if len(complex_legs) != expected_ligand_pairs: From 4760a3698d1c753e589db2859739096d701e9f8b Mon Sep 17 00:00:00 2001 From: jaclark5 Date: Wed, 3 Jun 2026 11:03:08 -0400 Subject: [PATCH 06/24] Remove repeats and make all lowercase --- .../_example_generate_results_local.py | 98 ++++++++++++------- 1 file changed, 63 insertions(+), 35 deletions(-) diff --git a/openfe_benchmarks/scripts/_example_generate_results_local.py b/openfe_benchmarks/scripts/_example_generate_results_local.py index e54eaca..d085240 100644 --- a/openfe_benchmarks/scripts/_example_generate_results_local.py +++ b/openfe_benchmarks/scripts/_example_generate_results_local.py @@ -10,6 +10,7 @@ import zstandard as zstd from openfecli.commands.gather import _get_names, _get_type + def _get_simulation_key(result: dict) -> tuple[tuple[str, str], str]: lig_a_name, lig_b_name = _get_names(result) phase = _get_type(result) @@ -18,22 +19,30 @@ def _get_simulation_key(result: dict) -> tuple[tuple[str, str], str]: @click.command() @click.option( - "--results_dir", - help="The directory containing the transformation results", - multiple=True, - type=click.Path(exists=True, dir_okay=True, file_okay=False, path_type=pathlib.Path) + "--results_dir", + help="The directory containing the transformation results", + multiple=True, + type=click.Path( + exists=True, dir_okay=True, file_okay=False, path_type=pathlib.Path + ), ) @click.option( - "--network", help="The path to the alchemical network JSON file", - type=click.Path(exists=True, dir_okay=False, file_okay=True, path_type=pathlib.Path) + "--network", + help="The path to the alchemical network JSON file", + type=click.Path( + exists=True, dir_okay=False, file_okay=True, path_type=pathlib.Path + ), ) @click.option( - "--output_dir", help="The directory to write the archive to", - type=click.Path(exists=True, dir_okay=True, file_okay=False, path_type=pathlib.Path) + "--output_dir", + help="The directory to write the archive to", + type=click.Path( + exists=True, dir_okay=True, file_okay=False, path_type=pathlib.Path + ), ) def main(results_dir, network, output_dir): """ - Gather all the results for the transformations in the network and write the DDG/DG to a json file with units and metadata which can be used + Gather all the results for the transformations in the network and write the DDG/DG to a json file with units and metadata which can be used for down stream analysis. """ # load the network to make sure all transformations are present in the results @@ -50,7 +59,6 @@ def main(results_dir, network, output_dir): phase = "solvent" transformations_to_run.add((ligand_a_name, ligand_b_name, phase)) - # make a key using the (lig_a.name, lig_b.name) raw_results = defaultdict(list) for result_dir in results_dir: @@ -79,13 +87,17 @@ def main(results_dir, network, output_dir): for phase, result in results: key = (lig_a_name, lig_b_name, phase) if key not in transformations_to_run: - raise ValueError(f"Found results for transformation {key} which is not in the alchemical network") + raise ValueError( + f"Found results for transformation {key} which is not in the alchemical network" + ) found_results.add((lig_a_name, lig_b_name, phase)) # now check that we have results for all transformations missing_transformations = transformations_to_run - found_results if missing_transformations: - raise ValueError(f"Missing results for transformations: {missing_transformations}") + raise ValueError( + f"Missing results for transformations: {missing_transformations}" + ) # also build a femap so we can get DGs if possible fe_map = FEMap() @@ -96,44 +108,62 @@ def main(results_dir, network, output_dir): "ligand_b": lig_b_name, "system_group": system_group, "system_name": system_name, - # assuming this is the RBFE protocol we should have the same number of repeats for each phase - "repeats": len(results) // 2, } # group the results by phase complex_results = [result for phase, result in results if phase == "complex"] solvent_results = [result for phase, result in results if phase == "solvent"] - assert len(complex_results) == len(solvent_results), f"Found different number of complex and solvent results for {key}" + assert len(complex_results) == len(solvent_results), ( + f"Found different number of complex and solvent results for {key}" + ) # get the estimated values for each repeat - complex_data = [result["estimate"].m_as(unit.kilocalories_per_mole) for result in complex_results] + complex_data = [ + result["estimate"].m_as(unit.kilocalories_per_mole) + for result in complex_results + ] complex_dg = np.mean(complex_data) * unit.kilocalories_per_mole complex_dg_uncertainty = np.std(complex_data) * unit.kilocalories_per_mole # get the solvent data - solvent_data = [result["estimate"].m_as(unit.kilocalories_per_mole) for result in solvent_results] + solvent_data = [ + result["estimate"].m_as(unit.kilocalories_per_mole) + for result in solvent_results + ] solvent_dg = np.mean(solvent_data) * unit.kilocalories_per_mole solvent_dg_uncertainty = np.std(solvent_data) * unit.kilocalories_per_mole # get the combinded ddg and uncertainty - entry_data["DDG"] = complex_dg - solvent_dg - entry_data["DDG_uncertainty"] = np.sqrt(complex_dg_uncertainty**2 + solvent_dg_uncertainty**2) + entry_data["ddg"] = complex_dg - solvent_dg + entry_data["ddg_uncertainty"] = np.sqrt( + complex_dg_uncertainty**2 + solvent_dg_uncertainty**2 + ) # add the raw values for debugging? - entry_data["DGs_complex"] = complex_data - entry_data["DGs_solvent"] = solvent_data + entry_data["dgs_complex"] = complex_data + entry_data["dgs_solvent"] = solvent_data # calculate the smallest off diagonal element of the mabr overlap matrix and the replica mixing matrix for each result - for phase_results, label in zip([complex_results, solvent_results], ["Complex", "Solvent"]): + for phase_results, label in zip( + [complex_results, solvent_results], ["complex", "solvent"] + ): mbar_overlap_elements = [] replica_mixing_elements = [] for phase_result in phase_results: # get the key for this protocol result result_key = list(phase_result["protocol_result"]["data"].keys())[0] # calculate the smallest overlap for the mbar overlap matrix - overlap_matrix = phase_result["protocol_result"]["data"][result_key][0]["outputs"]["unit_mbar_overlap"]["matrix"] - mbar_overlap_elements.append(np.diagonal(overlap_matrix, offset=1).min()) + overlap_matrix = phase_result["protocol_result"]["data"][result_key][0][ + "outputs" + ]["unit_mbar_overlap"]["matrix"] + mbar_overlap_elements.append( + np.diagonal(overlap_matrix, offset=1).min() + ) # calculate the smallest off diagonal element of the replica mixing matrix - mixing_matrix = phase_result["protocol_result"]["data"][result_key][0]["outputs"]["replica_exchange_statistics"]["matrix"] - replica_mixing_elements.append(np.diagonal(mixing_matrix, offset=1).min()) + mixing_matrix = phase_result["protocol_result"]["data"][result_key][0][ + "outputs" + ]["replica_exchange_statistics"]["matrix"] + replica_mixing_elements.append( + np.diagonal(mixing_matrix, offset=1).min() + ) entry_data[f"{label}_smallest_mbar_overlaps"] = mbar_overlap_elements entry_data[f"{label}_smallest_replica_mixing"] = replica_mixing_elements @@ -145,7 +175,7 @@ def main(results_dir, network, output_dir): labelA=lig_a_name, labelB=lig_b_name, value=entry_data["DDG"], - uncertainty=entry_data["DDG_uncertainty"] + uncertainty=entry_data["DDG_uncertainty"], ) # check if the network is connected and we can calculate the DGs @@ -156,22 +186,20 @@ def main(results_dir, network, output_dir): for _, row in abs_df.iterrows(): entry_data = { "ligand": row["label"], - "DG": row["DG (kcal/mol)"] * unit.kilocalories_per_mole, - "DG_uncertainty": row["uncertainty (kcal/mol)"] * unit.kilocalories_per_mole, + "dg": row["DG (kcal/mol)"] * unit.kilocalories_per_mole, + "dg_uncertainty": row["uncertainty (kcal/mol)"] + * unit.kilocalories_per_mole, "system_group": system_group, "system_name": system_name, "source": row["source"], } gathered_results["DG"].append(entry_data) - + # write out the data to a json file - output_file = output_dir / "computaional_results.json" + output_file = output_dir / "computational_results.json" with open(output_file, "w") as w: json.dump(gathered_results, w, cls=JSON_HANDLER.encoder, indent=4) + if __name__ == "__main__": main() - - - - From ff629f163ee0fa17c7b4f9dd468ce601964cd9f3 Mon Sep 17 00:00:00 2001 From: jaclark5 Date: Wed, 3 Jun 2026 13:19:58 -0400 Subject: [PATCH 07/24] Fix submission generation --- .../run_prepare_metadata.sh | 24 + .../submission.yaml | 68 +- .../scripts/prepare_metadata_submission.py | 989 +++++++++++++++--- 3 files changed, 900 insertions(+), 181 deletions(-) create mode 100755 openfe_benchmarks/results/2026-03-18-openmm-840-qa-testing/run_prepare_metadata.sh diff --git a/openfe_benchmarks/results/2026-03-18-openmm-840-qa-testing/run_prepare_metadata.sh b/openfe_benchmarks/results/2026-03-18-openmm-840-qa-testing/run_prepare_metadata.sh new file mode 100755 index 0000000..09cc1f6 --- /dev/null +++ b/openfe_benchmarks/results/2026-03-18-openmm-840-qa-testing/run_prepare_metadata.sh @@ -0,0 +1,24 @@ +#!/bin/bash +# Generate submission metadata for all networks (charge_changes and jacs_set) +# This script processes all networks from the ResultSubmission folder + +set -e # Exit on error + +echo "Generating submission metadata for all benchmark networks..." +echo "" + +micromamba run -n openfe-benchmarks python \ + /Users/jenniferclark/bin/openfe-benchmarks/openfe_benchmarks/scripts/prepare_metadata_submission.py \ + "/Users/jenniferclark/OMSF/OpenFE/BenchmarkRepo/ResultSubmission/networks/*/*/*alchemicalnetwork.json" \ + --output-dir . \ + --submission-id "2026-03-18-openmm-840-qa-testing" \ + --keywords "charge_change, rbfe, benchmark, openfe, openmm-840" \ + --author "Josh Horton" \ + --license "CC-BY-4.0" \ + --no-alchemiscale \ + --summary-suffix "Note this means the charge annihilation sets are not complete compared to what is in that system and should not be compared to other complete runs due to the missing edges." + +echo "" +echo "Done! Check the files in this directory:" +echo " - submission.yaml" +echo " - zenodo_description.md" diff --git a/openfe_benchmarks/results/2026-03-18-openmm-840-qa-testing/submission.yaml b/openfe_benchmarks/results/2026-03-18-openmm-840-qa-testing/submission.yaml index e974466..b109f91 100644 --- a/openfe_benchmarks/results/2026-03-18-openmm-840-qa-testing/submission.yaml +++ b/openfe_benchmarks/results/2026-03-18-openmm-840-qa-testing/submission.yaml @@ -2,45 +2,67 @@ submission_id: 2026-03-18-openmm-840-qa-testing # REQUIRED: short descriptive title -title: QA runs for OpenMM 8.4.0 using JACS (P38 & TYK2) and Charge changing edges. +title: OpenFE RBFE - charge_annihilation_set, jacs_set (5 systems) - 2026-03-18-openmm-840-qa-testing + # REQUIRED: short descriptive summary (1-2 sentences) -summary: "RBFE runs for the TYK2 and P38 JACS sets and Charge changing edges from the egfr, irak_s2/s3 charge annihilation sets. Note this means the charge annihilation sets are not complete compared to what is in that system and should not be -compared to other complete runs due to the missing edges." -# REQUIRED: list of keywords (from the controlled vocabulary if possible) -keywords: [charge_change, rbfe, benchmark, openfe, openmm-840] +summary: | + This submission describes the charge_annihilation_set, jacs_set RBFE benchmark + (charge_annihilation_set: egfr, irak4_s2, irak4_s3; jacs_set: p38, tyk2) prepared with openff-2.3.0 + and nagl_openff-gnn-am1bcc-1.0.0.pt. The submission contains 160 transformations, 61 unique ligands, + and 5 unique proteins. The largest simulated chemical system contains 6744 atoms. Note this means the charge annihilation sets are not complete compared to what is in that system and should not be compared to other complete runs due to the missing edges. + +# REQUIRED: list of submission tags +tags: [rbfe, openff-2.3.0, nagl_openff-gnn-am1bcc-1.0.0.pt, charge_change, benchmark, openfe, openmm-840] # REQUIRED: list of contributing authors (name, affiliation; ORCID optional) authors: - - name: Joshua Horton + - name: Josh Horton # REQUIRED: publication/submission date (ISO 8601) -date: 2026-03-18 +date: 2026-06-03 -# REQUIRED: OpenFE version used to produce the gathered reports +# REQUIRED: OpenFE/OpenMM/toolkit versions used to produce gathered reports openfe_version: 1.8.0 -openmm_version: 8.4.0 -openff_toolkit_version: +openmm_version: +openff_toolkit_version: 0.18.0 -# Recommended but useful: force field, partial charge and network descriptors +# Recommended descriptors forcefield: openff-2.3.0 partial_charges: nagl_openff-gnn-am1bcc-1.0.0.pt -network: industry_benchmarks_network + +# BenchmarkData provenance (from openfe-benchmarks planning script) +benchmark_data: + source_repository: https://github.com/OpenFreeEnergy/openfe-benchmarks + set: charge_annihilation_set, jacs_set + system: egfr, irak4_s2, irak4_s3, p38, tyk2 # REQUIRED: long-term archive pointer (at least doi or url) archive: - doi: - archive_provider: + doi: TODO add DOI + archive_provider: TODO add archive provider -# REQUIRED: license for the submission (e.g. CC-BY-4.0) +# REQUIRED: license for the submission license: CC-BY-4.0 -# RECOMMENDED / OPTIONAL metadata which detail the protocols used and associated settings allowing for mixed protocol networks +# RECOMMENDED / OPTIONAL metadata for protocol settings protocol_settings: - - # list some default settings or anything interesting which has been changed such as the number of lambda windows or the schedule - protocol: RelativeHybridTopologyProtocol - simulation_time: 5 ns - equilibration_time: 1 ns - timestep: 4 fs - temperature: 298.15 K - pressure: 1 atm - repeats: 3 + count: 3 + systems: "charge_annihilation_set/egfr, charge_annihilation_set/irak4_s2, charge_annihilation_set/irak4_s3" + files: "charge_annihilation_set_egfr_alchemicalnetwork.json, charge_annihilation_set_irak4_s2_alchemicalnetwork.json, charge_annihilation_set_irak4_s3_alchemicalnetwork.json" + production_time: "20 nanosecond" + equilibration_time: "1.0 nanosecond" + timestep: "4.0 femtosecond" + temperature: "298.15 kelvin" + pressure: "1 bar" + lambda_windows: "22" + - protocol: RelativeHybridTopologyProtocol + count: 2 + systems: "jacs_set/p38, jacs_set/tyk2" + files: "alchemicalnetwork.json" + production_time: "5.0 nanosecond" + equilibration_time: "1.0 nanosecond" + timestep: "4.0 femtosecond" + temperature: "298.15 kelvin" + pressure: "1 bar" + lambda_windows: "11" diff --git a/openfe_benchmarks/scripts/prepare_metadata_submission.py b/openfe_benchmarks/scripts/prepare_metadata_submission.py index 0275070..2c268b9 100644 --- a/openfe_benchmarks/scripts/prepare_metadata_submission.py +++ b/openfe_benchmarks/scripts/prepare_metadata_submission.py @@ -1,38 +1,94 @@ #!/usr/bin/env python3 -"""Prepare benchmark submission artifacts from an AlchemicalArchive or AlchemicalNetwork JSON file. +"""Prepare benchmark submission artifacts from AlchemicalArchive or AlchemicalNetwork JSON files. -This module generates `submission.yaml` and `zenodo_description.md` from a JSON archive -exported by OpenFE/Alchemiscale. It no longer exposes a CLI; instead, call -`process_network(...)` directly from Python. +This module generates `submission.yaml` and `zenodo_description.md` from one or more JSON archives +exported by OpenFE/Alchemiscale. Supports single files, lists of files, or glob patterns with +potentially different protocol settings. -Example: +Example (Python API): from pathlib import Path from openfe_benchmarks.scripts.prepare_metadata_submission import process_network + # Using explicit file list process_network( - input_file=Path("archive.json.bz2"), + input_files=[Path("archive1.json.bz2"), Path("archive2.json.bz2")], output_dir=Path("."), submission_id="2026-04-15-example", keywords="openfe,alchemicalarchive", author=["Jane Doe"], license="CC-BY-4.0", ) + + # Using glob pattern + process_network( + input_files="networks/*/*.json", + output_dir=Path("."), + submission_id="2026-04-15-example", + keywords="openfe,alchemicalarchive", + author=["Jane Doe"], + license="CC-BY-4.0", + ) + +Example (CLI): + + # Explicit files + python prepare_metadata_submission.py archive1.json.bz2 archive2.json.bz2 \\ + --output-dir ./output \\ + --submission-id "2026-04-15-example" \\ + --keywords "openfe,alchemicalarchive" \\ + --author "Jane Doe" \\ + --license "CC-BY-4.0" + + # Glob pattern + python prepare_metadata_submission.py "networks/*/*.json" \\ + --output-dir ./output \\ + --submission-id "2026-04-15-example" """ from __future__ import annotations +import argparse import bz2 +import glob as glob_module import json import re import statistics +import sys import textwrap +from collections import defaultdict from dataclasses import dataclass, field from datetime import date from pathlib import Path from typing import Any +@dataclass +class ProtocolSettingsInfo: + """Container for protocol settings with source metadata.""" + + settings: dict[str, str] + source_file: str + benchmark_set: str + benchmark_system: str + network_key: str + + +@dataclass +class SystemInfo: + """Per-system information extracted from transformations.""" + + system_group: str + system_name: str + n_transformations: int + ligands: set[str] = field(default_factory=set) + proteins: set[str] = field(default_factory=set) + solutes: set[str] = field(default_factory=set) + solvents: set[str] = field(default_factory=set) + max_atoms: int = 0 + files: set[str] = field(default_factory=set) + + @dataclass class AutoMetadata: openfe_version: str = "" @@ -43,7 +99,8 @@ class AutoMetadata: network_descriptor: str = "" benchmark_data_set: str = "" benchmark_system: str = "" - protocol_settings: dict[str, str] = field(default_factory=dict) + protocol_settings_list: list[ProtocolSettingsInfo] = field(default_factory=list) + system_info_list: list[SystemInfo] = field(default_factory=list) def _require_ref_key(ref: dict[str, Any]) -> str: @@ -158,6 +215,51 @@ def _default_submission_id(network_key: str) -> str: return f"{date.today().isoformat()}-{_slugify(network_key)}" +def _generate_title( + mode: str, + benchmark_sets: list[str], + systems: list[str], + submission_id: str, +) -> str: + """ + Generate a descriptive title for the submission. + + Format rules: + - Single set, 1-3 systems: "OpenFE RBFE - set - sys1, sys2, sys3 - submission_id" + - Single set, 4+ systems: "OpenFE RBFE - set (N systems) - submission_id" + - 2-3 sets, any systems: "OpenFE RBFE - set1, set2 (N systems) - submission_id" + - 4+ sets: "OpenFE RBFE - Multi-set Benchmark (N sets, M systems) - submission_id" + """ + calc_type = "ASFE" if mode == "asfe" else "RBFE" + + n_sets = len(benchmark_sets) + n_systems = len(systems) + + if n_sets == 0: + # Fallback if no benchmark set detected + return f"OpenFE {calc_type} Benchmark - {submission_id}" + + if n_sets == 1: + set_name = benchmark_sets[0] + if n_systems <= 3: + # List system names + systems_str = ", ".join(systems) + return f"OpenFE {calc_type} - {set_name} - {systems_str} - {submission_id}" + else: + # Use count + return f"OpenFE {calc_type} - {set_name} ({n_systems} systems) - {submission_id}" + + if n_sets <= 3: + # List set names with system count + sets_str = ", ".join(benchmark_sets) + return ( + f"OpenFE {calc_type} - {sets_str} ({n_systems} systems) - {submission_id}" + ) + + # Many sets - use multi-set notation + return f"OpenFE {calc_type} - Multi-set Benchmark ({n_sets} sets, {n_systems} systems) - {submission_id}" + + def _iter_nested_items(obj: Any) -> list[tuple[str, Any]]: items: list[tuple[str, Any]] = [] if isinstance(obj, dict): @@ -176,6 +278,39 @@ def _quantity_to_text(value: Any) -> str: return str(value) +def _extract_system_info_from_mapping( + by_key: dict[str, dict[str, Any]], transformation_ref: Any +) -> tuple[str, str]: + """ + Extract system_group and system_name from LigandAtomMapping annotations. + + Returns: + (system_group, system_name) tuple, or ("", "") if not found + """ + transformation = by_key.get(_require_ref_key(transformation_ref), {}) + mapping_ref = transformation.get("mapping") + if not mapping_ref: + return ("", "") + + mapping = by_key.get(_require_ref_key(mapping_ref), {}) + if mapping.get("__qualname__") != "LigandAtomMapping": + return ("", "") + + annotations = mapping.get("annotations") + if isinstance(annotations, str): + try: + annotations = json.loads(annotations) + except json.JSONDecodeError: + return ("", "") + + if not isinstance(annotations, dict): + return ("", "") + + system_group = annotations.get("system_group", "") + system_name = annotations.get("system_name", "") + return (str(system_group), str(system_name)) + + def _guess_network_descriptor(archive_stem: str, network_key: str) -> str: if archive_stem.startswith(f"{network_key}-"): return archive_stem[len(network_key) + 1 :] @@ -239,10 +374,6 @@ def _build_protocol_settings( settings = protocol_obj.get("settings") or {} - repeats = settings.get("protocol_repeats") - if repeats is not None: - out["repeats"] = str(repeats) - integrator_settings = settings.get("integrator_settings") or {} if isinstance(integrator_settings, dict): timestep = integrator_settings.get("timestep") @@ -302,10 +433,22 @@ def _build_protocol_settings( return out -def _render_protocol_settings_yaml(protocol_settings: dict[str, str]) -> str: +def _render_protocol_settings_yaml( + protocol_settings_list: list[ProtocolSettingsInfo], +) -> str: + """Render protocol settings as YAML, grouping by unique setting combinations.""" + if not protocol_settings_list: + return "protocol_settings:\n - protocol: unknown\n notes: Protocol settings unavailable." + lines = ["protocol_settings:"] - protocol_name = protocol_settings.get("protocol", "") - lines.append(f" - protocol: {protocol_name}") + + # Group identical protocol settings and collect source metadata + settings_groups: dict[str, list[ProtocolSettingsInfo]] = defaultdict(list) + + for info in protocol_settings_list: + # Create a hashable key from the settings only + settings_key = json.dumps(info.settings, sort_keys=True) + settings_groups[settings_key].append(info) preferred_order = [ "production_time", @@ -317,20 +460,49 @@ def _render_protocol_settings_yaml(protocol_settings: dict[str, str]) -> str: "timestep", "temperature", "pressure", - "repeats", "lambda_windows", "lambda_schedule", "notes", ] - for key in preferred_order: - if key in protocol_settings: - lines.append(f" {key}: {json.dumps(str(protocol_settings[key]))}") - - for key in sorted( - k for k in protocol_settings if k not in set(preferred_order) | {"protocol"} - ): - lines.append(f" {key}: {json.dumps(str(protocol_settings[key]))}") + for idx, (settings_key, info_list) in enumerate(settings_groups.items()): + settings = info_list[0].settings + protocol_name = settings.get("protocol", "") + lines.append(f" - protocol: {protocol_name}") + + # Add count and source information + if len(protocol_settings_list) > 1: + lines.append(f" count: {len(info_list)}") + + # Collect unique benchmark systems + systems = sorted( + set( + f"{info.benchmark_set}/{info.benchmark_system}" + for info in info_list + if info.benchmark_set or info.benchmark_system + ) + ) + if systems: + systems_str = ", ".join(systems) + lines.append(f" systems: {json.dumps(systems_str)}") + + # Collect source files (just filenames, not full paths) + files = sorted(set(Path(info.source_file).name for info in info_list)) + if files and len(files) <= 5: # Only show if not too many + files_str = ", ".join(files) + lines.append(f" files: {json.dumps(files_str)}") + elif files: + lines.append(f" files: {json.dumps(f'{len(files)} files')}") + + # Add protocol settings + for key in preferred_order: + if key in settings: + lines.append(f" {key}: {json.dumps(str(settings[key]))}") + + for key in sorted( + k for k in settings if k not in set(preferred_order) | {"protocol"} + ): + lines.append(f" {key}: {json.dumps(str(settings[key]))}") return "\n".join(lines) @@ -373,7 +545,7 @@ def _component_atoms(component: dict[str, Any]) -> int: def _repeat_stats_summary(repeat_counts: list[int]) -> str: if not repeat_counts: - return "repeats stats unavailable" + return "described below" mean_repeats = statistics.fmean(repeat_counts) median_repeats = statistics.median(repeat_counts) @@ -389,42 +561,62 @@ def _repeat_stats_summary(repeat_counts: list[int]) -> str: def _build_content_summary( by_key: dict[str, dict[str, Any]], - archive_obj: dict[str, Any] | None, - network_obj: dict[str, Any] | None, + archive_objs: list[dict[str, Any]], + network_objs: list[dict[str, Any]], mode: str, benchmark_data_set: str, forcefield: str, partial_charges: str, -) -> str: - transformation_results = ( - archive_obj.get("transformation_results", []) if archive_obj else [] - ) - repeat_counts: list[int] = [] - max_atoms_per_system = 0 + used_alchemiscale: bool = True, + source_files: list[str] = None, +) -> tuple[str, list[SystemInfo]]: + """ + Build content summary and extract per-system information. - solutes: set[str] = set() - solvents: set[str] = set() + Parameters + ---------- + archive_objs: List of AlchemicalArchive objects + network_objs: List of AlchemicalNetwork objects - ligands: set[str] = set() - proteins: set[str] = set() - cofactors: set[str] = set() - systems_with_cofactors: set[str] = set() + Returns: + (summary_text, list of SystemInfo objects) + """ + if source_files is None: + source_files = [] - visited_systems_for_cofactors: set[str] = set() + repeat_counts: list[int] = [] + transformation_refs: list[Any] = [] - if archive_obj is not None: - transformation_refs = [ + # Collect transformation refs from all archives and networks + for archive_obj in archive_objs: + transformation_results = archive_obj.get("transformation_results", []) + trans_refs = [ item[0] for item in transformation_results if isinstance(item, list) and len(item) == 2 ] - repeat_counts = [ - len(item[1]) - for item in transformation_results - if isinstance(item, list) and len(item) == 2 - ] - else: - transformation_refs = _transformation_refs(archive_obj, network_obj) + transformation_refs.extend(trans_refs) + repeat_counts.extend( + [ + len(item[1]) + for item in transformation_results + if isinstance(item, list) and len(item) == 2 + ] + ) + + for network_obj in network_objs: + edges = network_obj.get("edges") or [] + if isinstance(edges, list): + transformation_refs.extend(edges) + + # Per-system tracking + system_data: dict[ + tuple[str, str], SystemInfo + ] = {} # key: (system_group, system_name) + + visited_systems_for_cofactors: set[str] = set() + all_cofactors: set[str] = set() + systems_with_cofactors: set[str] = set() transformation_count = len(transformation_refs) @@ -433,6 +625,23 @@ def _build_content_summary( if not transformation: continue + # Extract system info from mapping annotations + system_group, system_name = _extract_system_info_from_mapping( + by_key, transformation_ref + ) + + # Initialize SystemInfo if first time seeing this system + system_key = (system_group, system_name) + if system_key not in system_data: + system_data[system_key] = SystemInfo( + system_group=system_group, + system_name=system_name, + n_transformations=0, + ) + + system_info = system_data[system_key] + system_info.n_transformations += 1 + for state_key in ("stateA", "stateB"): cs_key, chemical_system = _resolve_payload( by_key, transformation.get(state_key) @@ -460,26 +669,26 @@ def _build_content_summary( if mode == "asfe": if "solvent" in label_l or "solventcomponent" in qualname.lower(): - solvents.add(comp_name) + system_info.solvents.add(comp_name) elif "solute" in label_l or qualname == "SmallMoleculeComponent": - solutes.add(comp_name) + system_info.solutes.add(comp_name) else: if "protein" in label_l or qualname == "ProteinComponent": - proteins.add(comp_name) + system_info.proteins.add(comp_name) elif "ligand" in label_l: - ligands.add(comp_name) + system_info.ligands.add(comp_name) elif "cofactor" in label_l: - cofactors.add(comp_name) + all_cofactors.add(comp_name) local_cofactors.add(comp_name) elif ( qualname == "SmallMoleculeComponent" and "solvent" not in label_l ): # Non-solvent small molecules that are not explicit ligands are treated as cofactors. - cofactors.add(comp_name) + all_cofactors.add(comp_name) local_cofactors.add(comp_name) - max_atoms_per_system = max(max_atoms_per_system, system_atoms) + system_info.max_atoms = max(system_info.max_atoms, system_atoms) if ( mode == "rbfe" @@ -490,35 +699,90 @@ def _build_content_summary( if local_cofactors: systems_with_cofactors.add(cs_key) - repeats_text = _repeat_stats_summary(repeat_counts) + # Add source files to system_info + for system_info in system_data.values(): + system_info.files = set(source_files) - subject = benchmark_data_set or "benchmark" field_info = forcefield or "an unspecified force field" charge_info = partial_charges or "unspecified partial charges" - if mode == "rbfe": - cofactor_list = ", ".join(sorted(cofactors)) if cofactors else "none" - summary_parts = [ - f"This submission describes the {subject} RBFE benchmark prepared with {field_info} and {charge_info}.", - f"The network contains {transformation_count} transformations across {len(ligands)} unique ligands and {len(proteins)} unique proteins.", + + # Group systems by benchmark set for explicit listing + sets_to_systems: dict[str, list[str]] = defaultdict(list) + for si in system_data.values(): + if si.system_group and si.system_name: + sets_to_systems[si.system_group].append(si.system_name) + + # Sort systems within each set + for systems_list in sets_to_systems.values(): + systems_list.sort() + + unique_sets = sorted(sets_to_systems.keys()) + + # Build descriptive subject line + if len(unique_sets) == 0: + subject = benchmark_data_set or "benchmark" + elif len(unique_sets) == 1: + subject = unique_sets[0] + else: + subject = ", ".join(unique_sets) + + # Build explicit systems description: "set1: sys1, sys2; set2: sys3, sys4" + if len(unique_sets) > 1: + set_descriptions = [ + f"{set_name}: {', '.join(sets_to_systems[set_name])}" + for set_name in unique_sets ] + systems_desc = "; ".join(set_descriptions) + elif len(unique_sets) == 1: + systems_desc = ", ".join(sets_to_systems[unique_sets[0]]) + else: + systems_desc = f"{len(system_data)} systems" + + # Count totals across all systems + total_ligands = sum(len(si.ligands) for si in system_data.values()) + total_proteins = sum(len(si.proteins) for si in system_data.values()) + total_solutes = sum(len(si.solutes) for si in system_data.values()) + total_solvents = sum(len(si.solvents) for si in system_data.values()) + max_atoms_overall = max((si.max_atoms for si in system_data.values()), default=0) + + # Build summary + if mode == "rbfe": + cofactor_list = ", ".join(sorted(all_cofactors)) if all_cofactors else "none" + if len(system_data) > 1: + summary_parts = [ + f"This submission describes the {subject} RBFE benchmark ({systems_desc}) prepared with {field_info} and {charge_info}.", + f"The submission contains {transformation_count} transformations, {total_ligands} unique ligands, and {total_proteins} unique proteins.", + ] + else: + summary_parts = [ + f"This submission describes the {subject} RBFE benchmark prepared with {field_info} and {charge_info}.", + f"The network contains {transformation_count} transformations across {total_ligands} unique ligands and {total_proteins} unique proteins.", + ] if systems_with_cofactors: summary_parts.append( f"{len(systems_with_cofactors)} systems include cofactors ({cofactor_list})." ) else: - summary_parts = [ - f"This submission describes the {subject} ASFE benchmark prepared with {field_info} and {charge_info}.", - f"The archive contains {transformation_count} transformations across {len(solutes)} unique solutes and {len(solvents)} unique solvents.", - ] + if len(system_data) > 1: + summary_parts = [ + f"This submission describes the {subject} ASFE benchmark ({systems_desc}) prepared with {field_info} and {charge_info}.", + f"The submission contains {transformation_count} transformations, {total_solutes} unique solutes, and {total_solvents} unique solvents.", + ] + else: + summary_parts = [ + f"This submission describes the {subject} ASFE benchmark prepared with {field_info} and {charge_info}.", + f"The archive contains {transformation_count} transformations across {total_solutes} unique solutes and {total_solvents} unique solvents.", + ] summary_parts.append( - f"The largest simulated chemical system contains {max_atoms_per_system} atoms, and repeat counts per transformation are {repeats_text}." - ) - summary_parts.append( - "Results are derived from archived Alchemiscale workflow data." + f"The largest simulated chemical system contains {max_atoms_overall} atoms." ) + if used_alchemiscale: + summary_parts.append( + "Results are derived from archived Alchemiscale workflow data." + ) summary_text = " ".join(summary_parts) - return textwrap.fill(summary_text, width=100) + return textwrap.fill(summary_text, width=100), list(system_data.values()) def _extract_auto_metadata( @@ -634,7 +898,17 @@ def _extract_auto_metadata( elif charge_method: metadata.partial_charges = str(charge_method) - metadata.protocol_settings = _build_protocol_settings(protocol_obj, mode) + # Build protocol settings for this archive with source metadata + protocol_settings = _build_protocol_settings(protocol_obj, mode) + protocol_info = ProtocolSettingsInfo( + settings=protocol_settings, + source_file=str(archive_path), + benchmark_set=metadata.benchmark_data_set, + benchmark_system=metadata.benchmark_system, + network_key=network_key, + ) + metadata.protocol_settings_list = [protocol_info] + return metadata @@ -707,14 +981,23 @@ def _make_submission_yaml( archive_doi: str, archive_provider: str, license_name: str, - protocol_settings: dict[str, str], + protocol_settings_list: list[ProtocolSettingsInfo], + network_key_to_systems: dict[str, list[str]], ) -> str: if not authors: - authors = ["TODO: add author name"] + authors = ["TODO add author name"] tags_yaml = ", ".join(tags) authors_yaml = "\n".join(f" - name: {name}" for name in authors) - protocol_settings_yaml = _render_protocol_settings_yaml(protocol_settings) + protocol_settings_yaml = _render_protocol_settings_yaml(protocol_settings_list) + + # Render network_key to systems mapping + network_keys_yaml = "" + if network_key_to_systems: + network_keys_yaml = "\n# Network keys to systems mapping\nnetwork_keys:\n" + for network_key in sorted(network_key_to_systems.keys()): + systems = ", ".join(network_key_to_systems[network_key]) + network_keys_yaml += f" {json.dumps(network_key)}: {json.dumps(systems)}\n" return f"""# REQUIRED: unique, kebab-case identifier for this submission submission_id: {submission_id} @@ -744,14 +1027,13 @@ def _make_submission_yaml( # Recommended descriptors forcefield: {forcefield} partial_charges: {partial_charges} -network: {network_descriptor} # BenchmarkData provenance (from openfe-benchmarks planning script) benchmark_data: source_repository: https://github.com/OpenFreeEnergy/openfe-benchmarks set: {benchmark_data_set} system: {benchmark_system} - +{network_keys_yaml} # REQUIRED: long-term archive pointer (at least doi or url) archive: doi: {archive_doi} @@ -785,16 +1067,79 @@ def _make_zenodo_description( n_transformations: int, submission_yaml_file: str, license_name: str, - protocol_settings: dict[str, str], + protocol_settings_list: list[ProtocolSettingsInfo], + has_archive_objects: bool, + used_alchemiscale: bool, + system_info_list: list[SystemInfo] = None, + network_key_to_systems: dict[str, list[str]] = None, ) -> str: content_kind = "ASFE" if mode == "asfe" else "RBFE" - openmm_display = openmm_version or "" - protocol_lines = ( - "\n".join( - [f"- {k}: {v}" for k, v in protocol_settings.items() if k != "protocol"] + openmm_display = openmm_version or "" + + # Determine the source type for the overview text + if has_archive_objects: + source_description = "AlchemicalArchive" + else: + source_description = "AlchemicalNetwork" + + # Build workflow description + workflow_text = "OpenFE" + if used_alchemiscale: + workflow_text += " and Alchemiscale" + + if system_info_list is None: + system_info_list = [] + + # Group protocol settings by unique combinations + settings_groups: dict[str, list[ProtocolSettingsInfo]] = defaultdict(list) + for info in protocol_settings_list: + settings_key = json.dumps(info.settings, sort_keys=True) + settings_groups[settings_key].append(info) + + # Build protocol settings section + protocol_blocks: list[str] = [] + for idx, (settings_key, info_list) in enumerate(settings_groups.items()): + protocol_settings = info_list[0].settings + + if len(settings_groups) > 1: + protocol_blocks.append(f"\n### Protocol Settings Group {idx + 1}") + + # Add system information + systems = sorted( + set( + f"{info.benchmark_set}/{info.benchmark_system}" + for info in info_list + if info.benchmark_set or info.benchmark_system + ) ) - or "- notes: Protocol settings unavailable" - ) + if systems: + protocol_blocks.append( + f"**Systems ({len(info_list)} networks):** {', '.join(systems)}" + ) + + protocol_lines = "\n".join( + [f"- {k}: {v}" for k, v in protocol_settings.items()] + ) + if not protocol_lines: + protocol_lines = "- notes: Protocol settings unavailable" + protocol_blocks.append(protocol_lines) + + protocol_section = "\n".join(protocol_blocks) + + # Per-system details section removed per user request + system_details_section = "" + + # Build network keys to systems mapping section + # Only show for AlchemicalArchive files (network_key_to_systems will be populated) + # For AlchemicalNetwork files, the "name" field is not a meaningful network key + network_keys_section = "" + if network_key_to_systems: + network_keys_lines = [] + for network_key_item in sorted(network_key_to_systems.keys()): + systems = ", ".join(network_key_to_systems[network_key_item]) + network_keys_lines.append(f" - {network_key_item}: {systems}") + if network_keys_lines: + network_keys_section = "\n- network keys:\n" + "\n".join(network_keys_lines) return f"""# {title} @@ -802,7 +1147,7 @@ def _make_zenodo_description( ## Overview -{content_kind} benchmark results prepared from an AlchemicalArchive generated with OpenFE and Alchemiscale. +{content_kind} benchmark results prepared from {source_description} JSON file(s) generated with {workflow_text}. {content_summary} @@ -816,40 +1161,19 @@ def _make_zenodo_description( - forcefield: {forcefield} - partial_charges: {partial_charges} -- network: {network_descriptor} ## BenchmarkData provenance - source_repository: https://github.com/OpenFreeEnergy/openfe-benchmarks -- set: {benchmark_data_set} -- system: {benchmark_system} +- benchmark_set: {benchmark_data_set} +- systems: {benchmark_system} ## Protocol settings -- protocol: {protocol_settings.get("protocol", "unknown")} -{protocol_lines} - -- archive file: {archive_filename} -- network key: {network_key} - -## Contents +{protocol_section} -### Data files - -- {submission_yaml_file}: submission metadata -- zenodo_description.md: Zenodo metadata description - -### Network summary - -- total transformations in archive: {n_transformations} - -## Simulation details - -- generation workflow: network prepared with plan_*.py scripts from openfe-benchmarks and then archived from Alchemiscale - -## Changelog - -- Generated by prepare_archive_submission.py on {date.today().isoformat()} +- archive file: {archive_filename}{network_keys_section} +{system_details_section} ## Rights @@ -858,20 +1182,25 @@ def _make_zenodo_description( def process_network( - input_file: Path, + input_files: Path | list[Path] | str, output_dir: Path = Path("."), submission_id: str | None = None, keywords: str = "openfe,alchemicalarchive", author: list[str] | None = None, license: str = "CC-BY-4.0", + used_alchemiscale: bool = True, + summary_suffix: str | None = None, ) -> tuple[Path, Path]: - """Generate submission metadata from an archived OpenFE JSON network. + """Generate submission metadata from one or more archived OpenFE JSON networks. Parameters ---------- - input_file: - Path to the AlchemicalArchive or AlchemicalNetwork JSON file. Supported - extensions are `.json`, `.bz2`, and `.json.bz2`. + input_files: + Path to a single AlchemicalArchive/AlchemicalNetwork JSON file, a list + of such files, or a glob pattern string (e.g., "networks/*/*.json"). + Supported extensions are `.json`, `.bz2`, and `.json.bz2`. + When multiple files are provided, protocol settings from each are collected + and grouped in the output. output_dir: Directory where `submission.yaml` and `zenodo_description.md` will be written. Defaults to the current working directory. @@ -888,6 +1217,22 @@ def process_network( treated as a raw string and written to the `authors` section. license: License string to write into the submission metadata. + used_alchemiscale: + Whether Alchemiscale was used to generate the results. If True, the + description will mention Alchemiscale. Defaults to True. + submission_id: + Optional identifier to use in `submission.yaml`. If omitted, a default + value is generated from the current date and network key. + keywords: + Comma-separated list of additional tags to include in the submission + metadata. The generated tag list also always includes the detected + `mode` (either ``asfe`` or ``rbfe``), the resolved forcefield string, + and normalized partial charge information. + author: + Optional list of author entries for the submission YAML. Each entry is + treated as a raw string and written to the `authors` section. + license: + License string to write into the submission metadata. Notes ----- @@ -900,56 +1245,237 @@ def process_network( tuple[Path, Path] Paths to the generated `submission.yaml` and `zenodo_description.md`. """ - input_path = input_file.resolve() - if not input_path.exists(): - raise FileNotFoundError(f"Input file not found: {input_path}") + # Normalize input to list and expand glob patterns + if isinstance(input_files, str): + # Glob pattern + matched_files = glob_module.glob(input_files, recursive=True) + if not matched_files: + raise ValueError(f"No files matched glob pattern: {input_files}") + input_paths = [Path(f) for f in sorted(matched_files)] + elif isinstance(input_files, Path): + input_paths = [input_files] + else: + input_paths = input_files + + if not input_paths: + raise ValueError("At least one input file must be provided") + + # Validate all input files exist + for input_path in input_paths: + resolved_path = input_path.resolve() + if not resolved_path.exists(): + raise FileNotFoundError(f"Input file not found: {resolved_path}") out_dir = output_dir.resolve() out_dir.mkdir(parents=True, exist_ok=True) - by_key, archive_obj, network_obj = _load_token_table(input_path) - mode = _detect_mode(by_key, archive_obj, network_obj) + # Process all input files and collect metadata + all_metadata: list[AutoMetadata] = [] + all_by_key: dict[str, dict[str, Any]] = {} + all_archive_objs: list[dict[str, Any]] = [] + all_network_objs: list[dict[str, Any]] = [] + all_network_keys: list[str] = [] + modes: set[str] = set() - network_key = _get_network_key(archive_obj, network_obj) + for input_path in input_paths: + resolved_path = input_path.resolve() - archive_stem = input_path.name - for suffix in (".json.bz2", ".bz2", ".json"): - if archive_stem.endswith(suffix): - archive_stem = archive_stem[: -len(suffix)] - break + by_key, archive_obj, network_obj = _load_token_table(resolved_path) + mode = _detect_mode(by_key, archive_obj, network_obj) + modes.add(mode) - auto_metadata = _extract_auto_metadata( - by_key=by_key, - mode=mode, - archive_path=input_path, - network_key=network_key, - archive_stem=archive_stem, - ) + network_key = _get_network_key(archive_obj, network_obj) + all_network_keys.append(network_key) + + archive_stem = resolved_path.name + for suffix in (".json.bz2", ".bz2", ".json"): + if archive_stem.endswith(suffix): + archive_stem = archive_stem[: -len(suffix)] + break + + metadata = _extract_auto_metadata( + by_key=by_key, + mode=mode, + archive_path=resolved_path, + network_key=network_key, + archive_stem=archive_stem, + ) + all_metadata.append(metadata) + + # Accumulate objects for summary generation + all_by_key.update(by_key) + if archive_obj: + all_archive_objs.append(archive_obj) + if network_obj: + all_network_objs.append(network_obj) - openfe_version = auto_metadata.openfe_version - openmm_version = auto_metadata.openmm_version - openff_toolkit_version = auto_metadata.openff_toolkit_version - forcefield = auto_metadata.forcefield - partial_charges_raw = auto_metadata.partial_charges + # Check consistency + if len(modes) > 1: + raise ValueError( + f"Mixed modes detected across input files: {modes}. All files must be either ASFE or RBFE." + ) + + mode = modes.pop() + + # Merge metadata from all files + merged_metadata = AutoMetadata() + + # Collect protocol settings from all files + for metadata in all_metadata: + merged_metadata.protocol_settings_list.extend(metadata.protocol_settings_list) + + # Use first non-empty value for scalar fields + if not merged_metadata.openfe_version and metadata.openfe_version: + merged_metadata.openfe_version = metadata.openfe_version + if not merged_metadata.openmm_version and metadata.openmm_version: + merged_metadata.openmm_version = metadata.openmm_version + if ( + not merged_metadata.openff_toolkit_version + and metadata.openff_toolkit_version + ): + merged_metadata.openff_toolkit_version = metadata.openff_toolkit_version + if not merged_metadata.forcefield and metadata.forcefield: + merged_metadata.forcefield = metadata.forcefield + if not merged_metadata.partial_charges and metadata.partial_charges: + merged_metadata.partial_charges = metadata.partial_charges + if not merged_metadata.network_descriptor and metadata.network_descriptor: + merged_metadata.network_descriptor = metadata.network_descriptor + if not merged_metadata.benchmark_data_set and metadata.benchmark_data_set: + merged_metadata.benchmark_data_set = metadata.benchmark_data_set + if not merged_metadata.benchmark_system and metadata.benchmark_system: + merged_metadata.benchmark_system = metadata.benchmark_system + + # Use merged data for outputs + openfe_version = merged_metadata.openfe_version + openmm_version = merged_metadata.openmm_version + openff_toolkit_version = merged_metadata.openff_toolkit_version + forcefield = merged_metadata.forcefield + partial_charges_raw = merged_metadata.partial_charges partial_charge_tag = _normalize_partial_charge_info(partial_charges_raw) partial_charges = partial_charge_tag or partial_charges_raw - network_descriptor = auto_metadata.network_descriptor + network_descriptor = merged_metadata.network_descriptor - content_summary = _build_content_summary( - by_key, - archive_obj, - network_obj, + # Build content summary from combined data + # Get list of source file names + source_file_names = [p.name for p in input_paths] + + content_summary, system_info_list = _build_content_summary( + all_by_key, + all_archive_objs, + all_network_objs, mode, - auto_metadata.benchmark_data_set, + merged_metadata.benchmark_data_set, forcefield, partial_charges, + used_alchemiscale, + source_file_names, ) - benchmark_data_set = auto_metadata.benchmark_data_set - benchmark_system = auto_metadata.benchmark_system + # Append additional summary text if provided + if summary_suffix: + content_summary = content_summary.rstrip() + " " + summary_suffix.strip() + + # Store system_info_list in merged_metadata + merged_metadata.system_info_list = system_info_list + + # Build network_key to systems mapping (only for AlchemicalArchive files) + # For AlchemicalNetwork files, the "name" field is not a meaningful network key + network_key_to_systems: dict[str, list[str]] = defaultdict(list) + has_archive_objects = len(all_archive_objs) > 0 + + # Override benchmark_data_set and benchmark_system from system_info_list if available + if system_info_list: + # Use system_group from system info (more reliable than string matching) + system_groups = set( + si.system_group for si in system_info_list if si.system_group + ) + if len(system_groups) == 1: + merged_metadata.benchmark_data_set = system_groups.pop() + elif len(system_groups) > 1: + # Multiple groups - list them + merged_metadata.benchmark_data_set = ", ".join(sorted(system_groups)) + + # List all system names + system_names = sorted( + set(si.system_name for si in system_info_list if si.system_name) + ) + if system_names: + merged_metadata.benchmark_system = ", ".join(system_names) + + # Update protocol_settings_list with correct system info + # For each input file, extract system info from first transformation + file_to_system: dict[str, tuple[str, str]] = {} + + for idx, input_path in enumerate(input_paths): + by_key, archive_obj, network_obj = _load_token_table(input_path) + transformation_refs = _transformation_refs(archive_obj, network_obj) + if transformation_refs: + system_group, system_name = _extract_system_info_from_mapping( + by_key, transformation_refs[0] + ) + file_to_system[str(input_path)] = (system_group, system_name) + + # Build network_key to systems mapping only for AlchemicalArchive files + if has_archive_objects and archive_obj: + network_key = all_network_keys[idx] + system_path = ( + f"{system_group}/{system_name}" + if system_group and system_name + else system_name + ) + if ( + system_path + and system_path not in network_key_to_systems[network_key] + ): + network_key_to_systems[network_key].append(system_path) + + # Update each protocol settings info with correct system data + for protocol_info in merged_metadata.protocol_settings_list: + # protocol_info.source_file is the full path + if protocol_info.source_file in file_to_system: + group, name = file_to_system[protocol_info.source_file] + protocol_info.benchmark_set = group + protocol_info.benchmark_system = name + + # Extract these AFTER overriding from system_info_list + benchmark_data_set = merged_metadata.benchmark_data_set + benchmark_system = merged_metadata.benchmark_system + + # Calculate total transformations across all files + total_transformations = 0 + for archive_obj in all_archive_objs: + total_transformations += len(_transformation_refs(archive_obj, None)) + for network_obj in all_network_objs: + if not any( + net_obj == network_obj + for net_obj in [a.get("network") for a in all_archive_objs if a] + ): + total_transformations += len(_transformation_refs(None, network_obj)) + + # Use primary network key for submission ID + primary_network_key = all_network_keys[0] if all_network_keys else "unknown" + + # Generate a descriptive title + submission_id_str = submission_id or _default_submission_id(primary_network_key) + + # Extract unique benchmark sets and systems for title + unique_sets = [] + if system_info_list: + unique_sets = sorted( + set(si.system_group for si in system_info_list if si.system_group) + ) + if not unique_sets and benchmark_data_set: + unique_sets = [s.strip() for s in benchmark_data_set.split(",")] + + unique_systems = [] + if system_info_list: + unique_systems = sorted( + set(si.system_name for si in system_info_list if si.system_name) + ) + elif benchmark_system: + unique_systems = [s.strip() for s in benchmark_system.split(",")] - # Requested behavior: always leave title for manual curation. - title = "TODO: add title" + title = _generate_title(mode, unique_sets, unique_systems, submission_id_str) submission_yaml_filename = "submission.yaml" zenodo_description_filename = "zenodo_description.md" @@ -957,7 +1483,7 @@ def process_network( submission_yaml_path = out_dir / submission_yaml_filename zenodo_description_path = out_dir / zenodo_description_filename - submission_id = submission_id or _default_submission_id(network_key) + submission_id = submission_id or _default_submission_id(primary_network_key) keywords_list = [k.strip() for k in keywords.split(",") if k.strip()] tags = _make_tags( mode=mode, @@ -980,18 +1506,22 @@ def process_network( network_descriptor=network_descriptor, benchmark_data_set=benchmark_data_set, benchmark_system=benchmark_system, - archive_doi="TODO: add DOI", - archive_provider="TODO: add archive provider", + archive_doi="TODO add DOI", + archive_provider="TODO add archive provider", license_name=license, - protocol_settings=auto_metadata.protocol_settings, + protocol_settings_list=merged_metadata.protocol_settings_list, + network_key_to_systems=network_key_to_systems, ) submission_yaml_path.write_text(submission_yaml_text) + # For Zenodo description, list all input files + archive_filenames = ", ".join(p.name for p in input_paths) + zenodo_description_text = _make_zenodo_description( title=title, summary=content_summary, - archive_filename=input_path.name, - network_key=network_key, + archive_filename=archive_filenames, + network_key=primary_network_key, mode=mode, content_summary=content_summary, openfe_version=openfe_version, @@ -1003,16 +1533,159 @@ def process_network( tags=tags, benchmark_data_set=benchmark_data_set, benchmark_system=benchmark_system, - n_transformations=len(_transformation_refs(archive_obj, network_obj)), + n_transformations=total_transformations, submission_yaml_file=submission_yaml_filename, license_name=license, - protocol_settings=auto_metadata.protocol_settings, + protocol_settings_list=merged_metadata.protocol_settings_list, + has_archive_objects=has_archive_objects, + used_alchemiscale=used_alchemiscale, + system_info_list=system_info_list, + network_key_to_systems=network_key_to_systems, ) zenodo_description_path.write_text(zenodo_description_text) - print(f"Input file: {input_path}") + print(f"Processed {len(input_paths)} input file(s)") print(f"Detected mode: {mode}") print(f"Submission YAML: {submission_yaml_path}") print(f"Zenodo description: {zenodo_description_path}") return submission_yaml_path, zenodo_description_path + + +def main(): + """CLI entry point for prepare_metadata_submission.""" + parser = argparse.ArgumentParser( + description="Generate submission.yaml and zenodo_description.md from OpenFE JSON archives", + formatter_class=argparse.RawDescriptionHelpFormatter, + epilog=textwrap.dedent(""" + Examples: + # Single archive file + %(prog)s archive.json.bz2 + + # Multiple archive files + %(prog)s archive1.json.bz2 archive2.json.bz2 --output-dir ./results + + # Glob pattern + %(prog)s "networks/*/*.json" --output-dir ./results + + # Multiple glob patterns + %(prog)s "charge_changes/*/*.json" "jacs_set/*/*.json" + + # Full example with all options + %(prog)s "networks/*/*.json" \\ + --output-dir ./output \\ + --submission-id "2026-06-03-tyk2-rbfe" \\ + --keywords "openfe,rbfe,tyk2" \\ + --author "Jane Doe" \\ + --author "John Smith" \\ + --license "CC-BY-4.0" + """), + ) + + parser.add_argument( + "input_patterns", + type=str, + nargs="+", + metavar="INPUT", + help="One or more file paths or glob patterns (e.g., 'networks/*/*.json'). " + "Glob patterns support * and ** wildcards.", + ) + + parser.add_argument( + "-o", + "--output-dir", + type=Path, + default=Path("."), + help="Output directory for submission.yaml and zenodo_description.md (default: current directory)", + ) + + parser.add_argument( + "-s", + "--submission-id", + type=str, + default=None, + help="Submission ID (default: auto-generated from date and network key)", + ) + + parser.add_argument( + "-k", + "--keywords", + type=str, + default="openfe,alchemicalarchive", + help="Comma-separated keywords/tags (default: 'openfe,alchemicalarchive')", + ) + + parser.add_argument( + "-a", + "--author", + type=str, + action="append", + dest="authors", + help="Author name (can be specified multiple times)", + ) + + parser.add_argument( + "-l", + "--license", + type=str, + default="CC-BY-4.0", + help="License identifier (default: CC-BY-4.0)", + ) + + parser.add_argument( + "--no-alchemiscale", + action="store_true", + help="Indicate that Alchemiscale was NOT used to generate the results", + ) + + parser.add_argument( + "--summary-suffix", + type=str, + default=None, + help="Additional text to append to the auto-generated summary", + ) + + args = parser.parse_args() + + # Expand glob patterns and collect all matching files + all_files: list[Path] = [] + for pattern in args.input_patterns: + matched = glob_module.glob(pattern, recursive=True) + if matched: + all_files.extend(Path(f) for f in sorted(matched)) + else: + # If no glob match, treat as literal file path + all_files.append(Path(pattern)) + + if not all_files: + print("Error: No input files found", file=sys.stderr) + return 1 + + # Remove duplicates while preserving order + seen = set() + unique_files = [] + for f in all_files: + if f not in seen: + seen.add(f) + unique_files.append(f) + + try: + process_network( + input_files=unique_files, + output_dir=args.output_dir, + submission_id=args.submission_id, + keywords=args.keywords, + author=args.authors, + license=args.license, + used_alchemiscale=not args.no_alchemiscale, + summary_suffix=args.summary_suffix, + ) + print("\n✓ Successfully generated submission metadata") + return 0 + except Exception as e: + print(f"\n✗ Error: {e}", file=sys.stderr) + return 1 + + +if __name__ == "__main__": + sys.exit(main()) From 2afa0b4f5ebe677b9f9691c55646019f968639a0 Mon Sep 17 00:00:00 2001 From: jaclark5 Date: Thu, 4 Jun 2026 10:10:59 -0400 Subject: [PATCH 08/24] Update submission scripts --- .../run_prepare_metadata.sh | 2 +- .../submission.yaml | 5 +- .../scripts/prepare_metadata_submission.py | 210 +++++++++--------- 3 files changed, 108 insertions(+), 109 deletions(-) diff --git a/openfe_benchmarks/results/2026-03-18-openmm-840-qa-testing/run_prepare_metadata.sh b/openfe_benchmarks/results/2026-03-18-openmm-840-qa-testing/run_prepare_metadata.sh index 09cc1f6..1990cc9 100755 --- a/openfe_benchmarks/results/2026-03-18-openmm-840-qa-testing/run_prepare_metadata.sh +++ b/openfe_benchmarks/results/2026-03-18-openmm-840-qa-testing/run_prepare_metadata.sh @@ -12,7 +12,7 @@ micromamba run -n openfe-benchmarks python \ "/Users/jenniferclark/OMSF/OpenFE/BenchmarkRepo/ResultSubmission/networks/*/*/*alchemicalnetwork.json" \ --output-dir . \ --submission-id "2026-03-18-openmm-840-qa-testing" \ - --keywords "charge_change, rbfe, benchmark, openfe, openmm-840" \ + --tags "charge_change, rbfe, benchmark, openfe, openmm-840" \ --author "Josh Horton" \ --license "CC-BY-4.0" \ --no-alchemiscale \ diff --git a/openfe_benchmarks/results/2026-03-18-openmm-840-qa-testing/submission.yaml b/openfe_benchmarks/results/2026-03-18-openmm-840-qa-testing/submission.yaml index b109f91..cf1799c 100644 --- a/openfe_benchmarks/results/2026-03-18-openmm-840-qa-testing/submission.yaml +++ b/openfe_benchmarks/results/2026-03-18-openmm-840-qa-testing/submission.yaml @@ -19,7 +19,7 @@ authors: - name: Josh Horton # REQUIRED: publication/submission date (ISO 8601) -date: 2026-06-03 +date: 2026-06-04 # REQUIRED: OpenFE/OpenMM/toolkit versions used to produce gathered reports openfe_version: 1.8.0 @@ -36,6 +36,9 @@ benchmark_data: set: charge_annihilation_set, jacs_set system: egfr, irak4_s2, irak4_s3, p38, tyk2 +# REQUIRED: results file +results: computational_results.json + # REQUIRED: long-term archive pointer (at least doi or url) archive: doi: TODO add DOI diff --git a/openfe_benchmarks/scripts/prepare_metadata_submission.py b/openfe_benchmarks/scripts/prepare_metadata_submission.py index 2c268b9..51a98a9 100644 --- a/openfe_benchmarks/scripts/prepare_metadata_submission.py +++ b/openfe_benchmarks/scripts/prepare_metadata_submission.py @@ -15,7 +15,7 @@ input_files=[Path("archive1.json.bz2"), Path("archive2.json.bz2")], output_dir=Path("."), submission_id="2026-04-15-example", - keywords="openfe,alchemicalarchive", + tags="openfe,alchemicalarchive", author=["Jane Doe"], license="CC-BY-4.0", ) @@ -25,7 +25,7 @@ input_files="networks/*/*.json", output_dir=Path("."), submission_id="2026-04-15-example", - keywords="openfe,alchemicalarchive", + tags="openfe,alchemicalarchive", author=["Jane Doe"], license="CC-BY-4.0", ) @@ -36,7 +36,7 @@ python prepare_metadata_submission.py archive1.json.bz2 archive2.json.bz2 \\ --output-dir ./output \\ --submission-id "2026-04-15-example" \\ - --keywords "openfe,alchemicalarchive" \\ + --tags "openfe,alchemicalarchive" \\ --author "Jane Doe" \\ --license "CC-BY-4.0" @@ -53,7 +53,6 @@ import glob as glob_module import json import re -import statistics import sys import textwrap from collections import defaultdict @@ -62,6 +61,8 @@ from pathlib import Path from typing import Any +from openfe_benchmarks.data import BenchmarkIndex + @dataclass class ProtocolSettingsInfo: @@ -96,7 +97,6 @@ class AutoMetadata: openff_toolkit_version: str = "" forcefield: str = "" partial_charges: str = "" - network_descriptor: str = "" benchmark_data_set: str = "" benchmark_system: str = "" protocol_settings_list: list[ProtocolSettingsInfo] = field(default_factory=list) @@ -230,34 +230,31 @@ def _generate_title( - 2-3 sets, any systems: "OpenFE RBFE - set1, set2 (N systems) - submission_id" - 4+ sets: "OpenFE RBFE - Multi-set Benchmark (N sets, M systems) - submission_id" """ - calc_type = "ASFE" if mode == "asfe" else "RBFE" - + mode = mode.upper() n_sets = len(benchmark_sets) n_systems = len(systems) if n_sets == 0: # Fallback if no benchmark set detected - return f"OpenFE {calc_type} Benchmark - {submission_id}" + return f"OpenFE {mode} Benchmark - {submission_id}" if n_sets == 1: set_name = benchmark_sets[0] if n_systems <= 3: # List system names systems_str = ", ".join(systems) - return f"OpenFE {calc_type} - {set_name} - {systems_str} - {submission_id}" + return f"OpenFE {mode} - {set_name} - {systems_str} - {submission_id}" else: # Use count - return f"OpenFE {calc_type} - {set_name} ({n_systems} systems) - {submission_id}" + return f"OpenFE {mode} - {set_name} ({n_systems} systems) - {submission_id}" if n_sets <= 3: # List set names with system count sets_str = ", ".join(benchmark_sets) - return ( - f"OpenFE {calc_type} - {sets_str} ({n_systems} systems) - {submission_id}" - ) + return f"OpenFE {mode} - {sets_str} ({n_systems} systems) - {submission_id}" # Many sets - use multi-set notation - return f"OpenFE {calc_type} - Multi-set Benchmark ({n_sets} sets, {n_systems} systems) - {submission_id}" + return f"OpenFE {mode} - Multi-set Benchmark ({n_sets} sets, {n_systems} systems) - {submission_id}" def _iter_nested_items(obj: Any) -> list[tuple[str, Any]]: @@ -311,39 +308,49 @@ def _extract_system_info_from_mapping( return (str(system_group), str(system_name)) -def _guess_network_descriptor(archive_stem: str, network_key: str) -> str: - if archive_stem.startswith(f"{network_key}-"): - return archive_stem[len(network_key) + 1 :] - if archive_stem.startswith("AlchemicalNetwork-"): - parts = archive_stem.split("-", maxsplit=2) - if len(parts) == 3: - return parts[2] - return archive_stem - - def _infer_benchmark_data_set_system( *, by_key: dict[str, dict[str, Any]], mode: str, archive_stem: str, network_key: str ) -> tuple[str, str]: + """Infer benchmark set and system from file contents using BenchmarkIndex. + + This searches for any known benchmark set or system name in the file's + metadata (filename, network key, and JSON contents). + + Returns: + (benchmark_set, system_name) tuple, or ("", "") if not found + """ blob = json.dumps(list(by_key.values())).lower() - descriptor = _guess_network_descriptor(archive_stem, network_key).lower() - search_space = " ".join( - [blob, descriptor, archive_stem.lower(), network_key.lower()] - ) + search_space = " ".join([blob, archive_stem.lower(), network_key.lower()]) - system = "" - for candidate in ("freesolv", "tyk2", "mnsol"): - if candidate in search_space: - system = candidate - break + # Get all known benchmark sets and systems from the index + index = BenchmarkIndex() + benchmark_sets = index.list_benchmark_sets() + # Check for benchmark set matches benchmark_set = "" - if "jacs_set" in search_space or "jacs" in search_space: - benchmark_set = "jacs_set" - elif "solvation_set" in search_space or mode == "asfe": - benchmark_set = "solvation_set" + for set_name in benchmark_sets: + if set_name.lower() in search_space: + benchmark_set = set_name + break + + # Check for system name matches within the found set (or all sets if no set found) + system = "" + sets_to_check = [benchmark_set] if benchmark_set else benchmark_sets - if not system and mode == "asfe": - system = "freesolv" + for set_name in sets_to_check: + try: + systems = index.list_systems_by_benchmark_set(set_name) + for system_name in systems: + if system_name.lower() in search_space: + system = system_name + if not benchmark_set: + benchmark_set = set_name + break + if system: + break + except ValueError: + # Skip if benchmark set doesn't exist + continue return benchmark_set, system @@ -361,17 +368,18 @@ def _extract_sim_times(settings_block: dict[str, Any]) -> tuple[str, str]: def _build_protocol_settings( protocol_obj: dict[str, Any] | None, mode: str ) -> dict[str, str]: + if not protocol_obj: + return { + "protocol": "unknown", + "notes": "Protocol settings unavailable in archive payload.", + } + + # Detect protocol name from the object protocol_name = ( - "AbsoluteSolvationProtocol" - if mode == "asfe" - else "RelativeHybridTopologyProtocol" + protocol_obj.get("__qualname__") or protocol_obj.get("qualname") or "unknown" ) out: dict[str, str] = {"protocol": protocol_name} - if not protocol_obj: - out["notes"] = "Protocol settings unavailable in archive payload." - return out - settings = protocol_obj.get("settings") or {} integrator_settings = settings.get("integrator_settings") or {} @@ -413,7 +421,7 @@ def _build_protocol_settings( out["equilibration_time"] = eq if prod: out["production_time"] = prod - else: + elif mode == "asfe": for prefix, key in ( ("vacuum", "vacuum_simulation_settings"), ("solvent", "solvent_simulation_settings"), @@ -426,6 +434,10 @@ def _build_protocol_settings( out[f"{prefix}_equilibration_time"] = eq if prod: out[f"{prefix}_production_time"] = prod + else: + ValueError( + f"Calculation type {mode} is not yet supported. Add capability to `_build_protocol_settings`" + ) if len(out) == 1: out["notes"] = "Protocol class found, but detailed settings were unavailable." @@ -465,7 +477,7 @@ def _render_protocol_settings_yaml( "notes", ] - for idx, (settings_key, info_list) in enumerate(settings_groups.items()): + for _, (settings_key, info_list) in enumerate(settings_groups.items()): settings = info_list[0].settings protocol_name = settings.get("protocol", "") lines.append(f" - protocol: {protocol_name}") @@ -543,22 +555,6 @@ def _component_atoms(component: dict[str, Any]) -> int: return 0 -def _repeat_stats_summary(repeat_counts: list[int]) -> str: - if not repeat_counts: - return "described below" - - mean_repeats = statistics.fmean(repeat_counts) - median_repeats = statistics.median(repeat_counts) - dist: dict[int, int] = {} - for count in repeat_counts: - dist[count] = dist.get(count, 0) + 1 - dist_text = ", ".join(f"{k}:{v}" for k, v in sorted(dist.items())) - return ( - f"min={min(repeat_counts)}, median={median_repeats:g}, mean={mean_repeats:.2f}, " - f"max={max(repeat_counts)}, distribution={{ {dist_text} }}" - ) - - def _build_content_summary( by_key: dict[str, dict[str, Any]], archive_objs: list[dict[str, Any]], @@ -672,7 +668,7 @@ def _build_content_summary( system_info.solvents.add(comp_name) elif "solute" in label_l or qualname == "SmallMoleculeComponent": system_info.solutes.add(comp_name) - else: + elif mode == "rbfe": if "protein" in label_l or qualname == "ProteinComponent": system_info.proteins.add(comp_name) elif "ligand" in label_l: @@ -687,6 +683,10 @@ def _build_content_summary( # Non-solvent small molecules that are not explicit ligands are treated as cofactors. all_cofactors.add(comp_name) local_cofactors.add(comp_name) + else: + ValueError( + f"Calculation type {mode} is not yet supported. Add capability to `_build_content_summary`" + ) system_info.max_atoms = max(system_info.max_atoms, system_atoms) @@ -794,7 +794,6 @@ def _extract_auto_metadata( archive_stem: str, ) -> AutoMetadata: metadata = AutoMetadata() - metadata.network_descriptor = _guess_network_descriptor(archive_stem, network_key) metadata.benchmark_data_set, metadata.benchmark_system = ( _infer_benchmark_data_set_system( by_key=by_key, @@ -975,7 +974,6 @@ def _make_submission_yaml( openff_toolkit_version: str, forcefield: str, partial_charges: str, - network_descriptor: str, benchmark_data_set: str, benchmark_system: str, archive_doi: str, @@ -983,6 +981,7 @@ def _make_submission_yaml( license_name: str, protocol_settings_list: list[ProtocolSettingsInfo], network_key_to_systems: dict[str, list[str]], + results_file: str, ) -> str: if not authors: authors = ["TODO add author name"] @@ -1034,6 +1033,9 @@ def _make_submission_yaml( set: {benchmark_data_set} system: {benchmark_system} {network_keys_yaml} +# REQUIRED: results file +results: {results_file} + # REQUIRED: long-term archive pointer (at least doi or url) archive: doi: {archive_doi} @@ -1050,9 +1052,7 @@ def _make_submission_yaml( def _make_zenodo_description( *, title: str, - summary: str, archive_filename: str, - network_key: str, mode: str, content_summary: str, openfe_version: str, @@ -1060,12 +1060,8 @@ def _make_zenodo_description( openff_toolkit_version: str, forcefield: str, partial_charges: str, - network_descriptor: str, - tags: list[str], benchmark_data_set: str, benchmark_system: str, - n_transformations: int, - submission_yaml_file: str, license_name: str, protocol_settings_list: list[ProtocolSettingsInfo], has_archive_objects: bool, @@ -1185,11 +1181,12 @@ def process_network( input_files: Path | list[Path] | str, output_dir: Path = Path("."), submission_id: str | None = None, - keywords: str = "openfe,alchemicalarchive", + tags: str = "openfe,alchemicalarchive", author: list[str] | None = None, license: str = "CC-BY-4.0", used_alchemiscale: bool = True, summary_suffix: str | None = None, + results_file: str = "computational_results.json", ) -> tuple[Path, Path]: """Generate submission metadata from one or more archived OpenFE JSON networks. @@ -1207,7 +1204,7 @@ def process_network( submission_id: Optional identifier to use in `submission.yaml`. If omitted, a default value is generated from the current date and network key. - keywords: + tags: Comma-separated list of additional tags to include in the submission metadata. The generated tag list also always includes the detected `mode` (either ``asfe`` or ``rbfe``), the resolved forcefield string, @@ -1220,19 +1217,11 @@ def process_network( used_alchemiscale: Whether Alchemiscale was used to generate the results. If True, the description will mention Alchemiscale. Defaults to True. - submission_id: - Optional identifier to use in `submission.yaml`. If omitted, a default - value is generated from the current date and network key. - keywords: - Comma-separated list of additional tags to include in the submission - metadata. The generated tag list also always includes the detected - `mode` (either ``asfe`` or ``rbfe``), the resolved forcefield string, - and normalized partial charge information. - author: - Optional list of author entries for the submission YAML. Each entry is - treated as a raw string and written to the `authors` section. - license: - License string to write into the submission metadata. + summary_suffix: + Optional text to append to the auto-generated summary. + results_file: + Name of the results file to reference in submission.yaml and validate + exists in output_dir. Defaults to 'computational_results.json'. Notes ----- @@ -1269,6 +1258,13 @@ def process_network( out_dir = output_dir.resolve() out_dir.mkdir(parents=True, exist_ok=True) + # Check for required results file + results_path = out_dir / results_file + if not results_path.exists(): + raise FileNotFoundError( + f"Required file '{results_file}' not found in output directory: {out_dir}" + ) + # Process all input files and collect metadata all_metadata: list[AutoMetadata] = [] all_by_key: dict[str, dict[str, Any]] = {} @@ -1338,8 +1334,6 @@ def process_network( merged_metadata.forcefield = metadata.forcefield if not merged_metadata.partial_charges and metadata.partial_charges: merged_metadata.partial_charges = metadata.partial_charges - if not merged_metadata.network_descriptor and metadata.network_descriptor: - merged_metadata.network_descriptor = metadata.network_descriptor if not merged_metadata.benchmark_data_set and metadata.benchmark_data_set: merged_metadata.benchmark_data_set = metadata.benchmark_data_set if not merged_metadata.benchmark_system and metadata.benchmark_system: @@ -1353,7 +1347,6 @@ def process_network( partial_charges_raw = merged_metadata.partial_charges partial_charge_tag = _normalize_partial_charge_info(partial_charges_raw) partial_charges = partial_charge_tag or partial_charges_raw - network_descriptor = merged_metadata.network_descriptor # Build content summary from combined data # Get list of source file names @@ -1484,26 +1477,25 @@ def process_network( zenodo_description_path = out_dir / zenodo_description_filename submission_id = submission_id or _default_submission_id(primary_network_key) - keywords_list = [k.strip() for k in keywords.split(",") if k.strip()] - tags = _make_tags( + tags_list = [k.strip() for k in tags.split(",") if k.strip()] + tags_final = _make_tags( mode=mode, forcefield=forcefield, partial_charge_tag=partial_charges, - user_keywords=keywords_list, + user_keywords=tags_list, ) submission_yaml_text = _make_submission_yaml( submission_id=submission_id, title=title, summary=content_summary, - tags=tags, + tags=tags_final, authors=author or [], openfe_version=openfe_version, openmm_version=openmm_version, openff_toolkit_version=openff_toolkit_version, forcefield=forcefield, partial_charges=partial_charges, - network_descriptor=network_descriptor, benchmark_data_set=benchmark_data_set, benchmark_system=benchmark_system, archive_doi="TODO add DOI", @@ -1511,6 +1503,7 @@ def process_network( license_name=license, protocol_settings_list=merged_metadata.protocol_settings_list, network_key_to_systems=network_key_to_systems, + results_file=results_file, ) submission_yaml_path.write_text(submission_yaml_text) @@ -1519,9 +1512,7 @@ def process_network( zenodo_description_text = _make_zenodo_description( title=title, - summary=content_summary, archive_filename=archive_filenames, - network_key=primary_network_key, mode=mode, content_summary=content_summary, openfe_version=openfe_version, @@ -1529,12 +1520,8 @@ def process_network( openff_toolkit_version=openff_toolkit_version, forcefield=forcefield, partial_charges=partial_charges, - network_descriptor=network_descriptor, - tags=tags, benchmark_data_set=benchmark_data_set, benchmark_system=benchmark_system, - n_transformations=total_transformations, - submission_yaml_file=submission_yaml_filename, license_name=license, protocol_settings_list=merged_metadata.protocol_settings_list, has_archive_objects=has_archive_objects, @@ -1575,7 +1562,7 @@ def main(): %(prog)s "networks/*/*.json" \\ --output-dir ./output \\ --submission-id "2026-06-03-tyk2-rbfe" \\ - --keywords "openfe,rbfe,tyk2" \\ + --tags "openfe,rbfe,tyk2" \\ --author "Jane Doe" \\ --author "John Smith" \\ --license "CC-BY-4.0" @@ -1608,11 +1595,11 @@ def main(): ) parser.add_argument( - "-k", - "--keywords", + "-t", + "--tags", type=str, default="openfe,alchemicalarchive", - help="Comma-separated keywords/tags (default: 'openfe,alchemicalarchive')", + help="Comma-separated tags (default: 'openfe,alchemicalarchive')", ) parser.add_argument( @@ -1645,6 +1632,14 @@ def main(): help="Additional text to append to the auto-generated summary", ) + parser.add_argument( + "-r", + "--results-file", + type=str, + default="computational_results.json", + help="Name of the results file in output directory (default: computational_results.json)", + ) + args = parser.parse_args() # Expand glob patterns and collect all matching files @@ -1674,11 +1669,12 @@ def main(): input_files=unique_files, output_dir=args.output_dir, submission_id=args.submission_id, - keywords=args.keywords, + tags=args.tags, author=args.authors, license=args.license, used_alchemiscale=not args.no_alchemiscale, summary_suffix=args.summary_suffix, + results_file=args.results_file, ) print("\n✓ Successfully generated submission metadata") return 0 From bec0407b3a8184c93c67ab0cf5f16a86690d5903 Mon Sep 17 00:00:00 2001 From: Josh Horton Date: Tue, 9 Jun 2026 14:46:52 +0100 Subject: [PATCH 09/24] generate results again, update yaml file, rename script --- .../computational_results.json | 1588 ++++++++--------- .../submission.yaml | 7 +- ...l.py => example_generate_results_local.py} | 12 +- 3 files changed, 764 insertions(+), 843 deletions(-) rename openfe_benchmarks/scripts/{_example_generate_results_local.py => example_generate_results_local.py} (96%) diff --git a/openfe_benchmarks/results/2026-03-18-openmm-840-qa-testing/computational_results.json b/openfe_benchmarks/results/2026-03-18-openmm-840-qa-testing/computational_results.json index fa5fc58..1aaa993 100644 --- a/openfe_benchmarks/results/2026-03-18-openmm-840-qa-testing/computational_results.json +++ b/openfe_benchmarks/results/2026-03-18-openmm-840-qa-testing/computational_results.json @@ -1,14 +1,14 @@ { - "DG": [ + "dg": [ { "ligand": "ejm_50", - "DG": { + "dg": { "magnitude": 0.29126535165543466, "unit": "kilocalories_per_mole", ":is_custom:": true, "pint_unit_registry": "openff_units" }, - "DG_uncertainty": { + "dg_uncertainty": { "magnitude": 0.09917235228482976, "unit": "kilocalories_per_mole", ":is_custom:": true, @@ -20,13 +20,13 @@ }, { "ligand": "ejm_42", - "DG": { + "dg": { "magnitude": 0.19152705913028711, "unit": "kilocalories_per_mole", ":is_custom:": true, "pint_unit_registry": "openff_units" }, - "DG_uncertainty": { + "dg_uncertainty": { "magnitude": 0.09494870673093252, "unit": "kilocalories_per_mole", ":is_custom:": true, @@ -38,13 +38,13 @@ }, { "ligand": "ejm_48", - "DG": { + "dg": { "magnitude": 0.6607629762131684, "unit": "kilocalories_per_mole", ":is_custom:": true, "pint_unit_registry": "openff_units" }, - "DG_uncertainty": { + "dg_uncertainty": { "magnitude": 0.11518369119192944, "unit": "kilocalories_per_mole", ":is_custom:": true, @@ -56,13 +56,13 @@ }, { "ligand": "ejm_55", - "DG": { + "dg": { "magnitude": -0.5926799441635051, "unit": "kilocalories_per_mole", ":is_custom:": true, "pint_unit_registry": "openff_units" }, - "DG_uncertainty": { + "dg_uncertainty": { "magnitude": 0.15362099228783124, "unit": "kilocalories_per_mole", ":is_custom:": true, @@ -74,13 +74,13 @@ }, { "ligand": "ejm_54", - "DG": { + "dg": { "magnitude": 0.17393058577592413, "unit": "kilocalories_per_mole", ":is_custom:": true, "pint_unit_registry": "openff_units" }, - "DG_uncertainty": { + "dg_uncertainty": { "magnitude": 0.163023542671173, "unit": "kilocalories_per_mole", ":is_custom:": true, @@ -92,13 +92,13 @@ }, { "ligand": "jmc_23", - "DG": { + "dg": { "magnitude": -1.2246516284257254, "unit": "kilocalories_per_mole", ":is_custom:": true, "pint_unit_registry": "openff_units" }, - "DG_uncertainty": { + "dg_uncertainty": { "magnitude": 0.0876729477724904, "unit": "kilocalories_per_mole", ":is_custom:": true, @@ -110,13 +110,13 @@ }, { "ligand": "jmc_28", - "DG": { + "dg": { "magnitude": -0.9062639493216924, "unit": "kilocalories_per_mole", ":is_custom:": true, "pint_unit_registry": "openff_units" }, - "DG_uncertainty": { + "dg_uncertainty": { "magnitude": 0.10269441383959206, "unit": "kilocalories_per_mole", ":is_custom:": true, @@ -128,13 +128,13 @@ }, { "ligand": "ejm_31", - "DG": { + "dg": { "magnitude": -0.17096192538386168, "unit": "kilocalories_per_mole", ":is_custom:": true, "pint_unit_registry": "openff_units" }, - "DG_uncertainty": { + "dg_uncertainty": { "magnitude": 0.054938658795406575, "unit": "kilocalories_per_mole", ":is_custom:": true, @@ -146,13 +146,13 @@ }, { "ligand": "jmc_27", - "DG": { + "dg": { "magnitude": -1.4147688972765025, "unit": "kilocalories_per_mole", ":is_custom:": true, "pint_unit_registry": "openff_units" }, - "DG_uncertainty": { + "dg_uncertainty": { "magnitude": 0.08856330806651277, "unit": "kilocalories_per_mole", ":is_custom:": true, @@ -164,13 +164,13 @@ }, { "ligand": "jmc_30", - "DG": { + "dg": { "magnitude": -1.8757626097098932, "unit": "kilocalories_per_mole", ":is_custom:": true, "pint_unit_registry": "openff_units" }, - "DG_uncertainty": { + "dg_uncertainty": { "magnitude": 0.07812592097861754, "unit": "kilocalories_per_mole", ":is_custom:": true, @@ -182,13 +182,13 @@ }, { "ligand": "ejm_46", - "DG": { + "dg": { "magnitude": -0.9398774595643715, "unit": "kilocalories_per_mole", ":is_custom:": true, "pint_unit_registry": "openff_units" }, - "DG_uncertainty": { + "dg_uncertainty": { "magnitude": 0.07668191741075524, "unit": "kilocalories_per_mole", ":is_custom:": true, @@ -200,13 +200,13 @@ }, { "ligand": "ejm_45", - "DG": { + "dg": { "magnitude": 0.32064376361660196, "unit": "kilocalories_per_mole", ":is_custom:": true, "pint_unit_registry": "openff_units" }, - "DG_uncertainty": { + "dg_uncertainty": { "magnitude": 0.10615715902554235, "unit": "kilocalories_per_mole", ":is_custom:": true, @@ -218,13 +218,13 @@ }, { "ligand": "ejm_44", - "DG": { + "dg": { "magnitude": 2.6820095113046794, "unit": "kilocalories_per_mole", ":is_custom:": true, "pint_unit_registry": "openff_units" }, - "DG_uncertainty": { + "dg_uncertainty": { "magnitude": 0.15648448886217892, "unit": "kilocalories_per_mole", ":is_custom:": true, @@ -236,13 +236,13 @@ }, { "ligand": "ejm_47", - "DG": { + "dg": { "magnitude": 0.2943067305517464, "unit": "kilocalories_per_mole", ":is_custom:": true, "pint_unit_registry": "openff_units" }, - "DG_uncertainty": { + "dg_uncertainty": { "magnitude": 0.0849718727731203, "unit": "kilocalories_per_mole", ":is_custom:": true, @@ -254,13 +254,13 @@ }, { "ligand": "ejm_49", - "DG": { + "dg": { "magnitude": 0.7265934300979362, "unit": "kilocalories_per_mole", ":is_custom:": true, "pint_unit_registry": "openff_units" }, - "DG_uncertainty": { + "dg_uncertainty": { "magnitude": 0.1834104598705046, "unit": "kilocalories_per_mole", ":is_custom:": true, @@ -272,13 +272,13 @@ }, { "ligand": "ejm_43", - "DG": { + "dg": { "magnitude": 1.7839270054997698, "unit": "kilocalories_per_mole", ":is_custom:": true, "pint_unit_registry": "openff_units" }, - "DG_uncertainty": { + "dg_uncertainty": { "magnitude": 0.13110488350666116, "unit": "kilocalories_per_mole", ":is_custom:": true, @@ -290,13 +290,13 @@ }, { "ligand": "2x", - "DG": { + "dg": { "magnitude": 1.402473784656189, "unit": "kilocalories_per_mole", ":is_custom:": true, "pint_unit_registry": "openff_units" }, - "DG_uncertainty": { + "dg_uncertainty": { "magnitude": 0.16362990296451582, "unit": "kilocalories_per_mole", ":is_custom:": true, @@ -308,13 +308,13 @@ }, { "ligand": "2v", - "DG": { + "dg": { "magnitude": 3.0252709313609847, "unit": "kilocalories_per_mole", ":is_custom:": true, "pint_unit_registry": "openff_units" }, - "DG_uncertainty": { + "dg_uncertainty": { "magnitude": 0.1392041952315424, "unit": "kilocalories_per_mole", ":is_custom:": true, @@ -326,13 +326,13 @@ }, { "ligand": "3fly", - "DG": { + "dg": { "magnitude": 0.9357154959044522, "unit": "kilocalories_per_mole", ":is_custom:": true, "pint_unit_registry": "openff_units" }, - "DG_uncertainty": { + "dg_uncertainty": { "magnitude": 0.1517623776273203, "unit": "kilocalories_per_mole", ":is_custom:": true, @@ -344,13 +344,13 @@ }, { "ligand": "2z", - "DG": { + "dg": { "magnitude": 1.3925012717986371, "unit": "kilocalories_per_mole", ":is_custom:": true, "pint_unit_registry": "openff_units" }, - "DG_uncertainty": { + "dg_uncertainty": { "magnitude": 0.35808037761446715, "unit": "kilocalories_per_mole", ":is_custom:": true, @@ -362,13 +362,13 @@ }, { "ligand": "3fmk", - "DG": { + "dg": { "magnitude": -1.4174232029590677, "unit": "kilocalories_per_mole", ":is_custom:": true, "pint_unit_registry": "openff_units" }, - "DG_uncertainty": { + "dg_uncertainty": { "magnitude": 0.3841937885880197, "unit": "kilocalories_per_mole", ":is_custom:": true, @@ -380,13 +380,13 @@ }, { "ligand": "2o", - "DG": { + "dg": { "magnitude": -1.6874291873757477, "unit": "kilocalories_per_mole", ":is_custom:": true, "pint_unit_registry": "openff_units" }, - "DG_uncertainty": { + "dg_uncertainty": { "magnitude": 0.2748582320961915, "unit": "kilocalories_per_mole", ":is_custom:": true, @@ -398,13 +398,13 @@ }, { "ligand": "3fln", - "DG": { + "dg": { "magnitude": 0.25457879378399895, "unit": "kilocalories_per_mole", ":is_custom:": true, "pint_unit_registry": "openff_units" }, - "DG_uncertainty": { + "dg_uncertainty": { "magnitude": 0.08616835674948678, "unit": "kilocalories_per_mole", ":is_custom:": true, @@ -416,13 +416,13 @@ }, { "ligand": "2n", - "DG": { + "dg": { "magnitude": -1.312922416351007, "unit": "kilocalories_per_mole", ":is_custom:": true, "pint_unit_registry": "openff_units" }, - "DG_uncertainty": { + "dg_uncertainty": { "magnitude": 0.1407417414748107, "unit": "kilocalories_per_mole", ":is_custom:": true, @@ -434,13 +434,13 @@ }, { "ligand": "2e", - "DG": { + "dg": { "magnitude": 1.3736709114203682, "unit": "kilocalories_per_mole", ":is_custom:": true, "pint_unit_registry": "openff_units" }, - "DG_uncertainty": { + "dg_uncertainty": { "magnitude": 0.10601823494148799, "unit": "kilocalories_per_mole", ":is_custom:": true, @@ -452,13 +452,13 @@ }, { "ligand": "2y", - "DG": { + "dg": { "magnitude": 0.9756250807961369, "unit": "kilocalories_per_mole", ":is_custom:": true, "pint_unit_registry": "openff_units" }, - "DG_uncertainty": { + "dg_uncertainty": { "magnitude": 0.5829998457516582, "unit": "kilocalories_per_mole", ":is_custom:": true, @@ -470,13 +470,13 @@ }, { "ligand": "2s", - "DG": { + "dg": { "magnitude": -1.0384922890247537, "unit": "kilocalories_per_mole", ":is_custom:": true, "pint_unit_registry": "openff_units" }, - "DG_uncertainty": { + "dg_uncertainty": { "magnitude": 0.35262381430424494, "unit": "kilocalories_per_mole", ":is_custom:": true, @@ -488,13 +488,13 @@ }, { "ligand": "2r", - "DG": { + "dg": { "magnitude": -1.0151434977895408, "unit": "kilocalories_per_mole", ":is_custom:": true, "pint_unit_registry": "openff_units" }, - "DG_uncertainty": { + "dg_uncertainty": { "magnitude": 0.3019397617598854, "unit": "kilocalories_per_mole", ":is_custom:": true, @@ -506,13 +506,13 @@ }, { "ligand": "2aa", - "DG": { + "dg": { "magnitude": -0.5233617987215631, "unit": "kilocalories_per_mole", ":is_custom:": true, "pint_unit_registry": "openff_units" }, - "DG_uncertainty": { + "dg_uncertainty": { "magnitude": 0.45857184881898433, "unit": "kilocalories_per_mole", ":is_custom:": true, @@ -524,13 +524,13 @@ }, { "ligand": "3flq", - "DG": { + "dg": { "magnitude": -3.115753401372757, "unit": "kilocalories_per_mole", ":is_custom:": true, "pint_unit_registry": "openff_units" }, - "DG_uncertainty": { + "dg_uncertainty": { "magnitude": 0.47470413795406696, "unit": "kilocalories_per_mole", ":is_custom:": true, @@ -542,13 +542,13 @@ }, { "ligand": "3flw", - "DG": { + "dg": { "magnitude": -0.9887012836898492, "unit": "kilocalories_per_mole", ":is_custom:": true, "pint_unit_registry": "openff_units" }, - "DG_uncertainty": { + "dg_uncertainty": { "magnitude": 0.4047369355253599, "unit": "kilocalories_per_mole", ":is_custom:": true, @@ -560,13 +560,13 @@ }, { "ligand": "2k", - "DG": { + "dg": { "magnitude": 1.4945763706711874, "unit": "kilocalories_per_mole", ":is_custom:": true, "pint_unit_registry": "openff_units" }, - "DG_uncertainty": { + "dg_uncertainty": { "magnitude": 0.09767713562392834, "unit": "kilocalories_per_mole", ":is_custom:": true, @@ -578,13 +578,13 @@ }, { "ligand": "2m", - "DG": { + "dg": { "magnitude": -1.2086131941648164, "unit": "kilocalories_per_mole", ":is_custom:": true, "pint_unit_registry": "openff_units" }, - "DG_uncertainty": { + "dg_uncertainty": { "magnitude": 0.10904083414852378, "unit": "kilocalories_per_mole", ":is_custom:": true, @@ -596,13 +596,13 @@ }, { "ligand": "2u", - "DG": { + "dg": { "magnitude": -1.7993394986310383, "unit": "kilocalories_per_mole", ":is_custom:": true, "pint_unit_registry": "openff_units" }, - "DG_uncertainty": { + "dg_uncertainty": { "magnitude": 0.3985649080205063, "unit": "kilocalories_per_mole", ":is_custom:": true, @@ -614,13 +614,13 @@ }, { "ligand": "2j", - "DG": { + "dg": { "magnitude": 0.907512517557072, "unit": "kilocalories_per_mole", ":is_custom:": true, "pint_unit_registry": "openff_units" }, - "DG_uncertainty": { + "dg_uncertainty": { "magnitude": 0.12017116206119788, "unit": "kilocalories_per_mole", ":is_custom:": true, @@ -632,13 +632,13 @@ }, { "ligand": "2t", - "DG": { + "dg": { "magnitude": -1.6156093607059905, "unit": "kilocalories_per_mole", ":is_custom:": true, "pint_unit_registry": "openff_units" }, - "DG_uncertainty": { + "dg_uncertainty": { "magnitude": 0.17889749921124085, "unit": "kilocalories_per_mole", ":is_custom:": true, @@ -650,13 +650,13 @@ }, { "ligand": "2ff", - "DG": { + "dg": { "magnitude": 0.20303907336629878, "unit": "kilocalories_per_mole", ":is_custom:": true, "pint_unit_registry": "openff_units" }, - "DG_uncertainty": { + "dg_uncertainty": { "magnitude": 0.12815562906133746, "unit": "kilocalories_per_mole", ":is_custom:": true, @@ -668,13 +668,13 @@ }, { "ligand": "2p", - "DG": { + "dg": { "magnitude": -0.5655419132651249, "unit": "kilocalories_per_mole", ":is_custom:": true, "pint_unit_registry": "openff_units" }, - "DG_uncertainty": { + "dg_uncertainty": { "magnitude": 0.2905923411992188, "unit": "kilocalories_per_mole", ":is_custom:": true, @@ -686,13 +686,13 @@ }, { "ligand": "2ee", - "DG": { + "dg": { "magnitude": -0.13605074481316404, "unit": "kilocalories_per_mole", ":is_custom:": true, "pint_unit_registry": "openff_units" }, - "DG_uncertainty": { + "dg_uncertainty": { "magnitude": 0.21521706815916217, "unit": "kilocalories_per_mole", ":is_custom:": true, @@ -704,13 +704,13 @@ }, { "ligand": "2gg", - "DG": { + "dg": { "magnitude": 1.2935202311363638, "unit": "kilocalories_per_mole", ":is_custom:": true, "pint_unit_registry": "openff_units" }, - "DG_uncertainty": { + "dg_uncertainty": { "magnitude": 0.15776085370467832, "unit": "kilocalories_per_mole", ":is_custom:": true, @@ -722,13 +722,13 @@ }, { "ligand": "2l", - "DG": { + "dg": { "magnitude": -1.0740023190194963, "unit": "kilocalories_per_mole", ":is_custom:": true, "pint_unit_registry": "openff_units" }, - "DG_uncertainty": { + "dg_uncertainty": { "magnitude": 0.11037515728210154, "unit": "kilocalories_per_mole", ":is_custom:": true, @@ -740,13 +740,13 @@ }, { "ligand": "2q", - "DG": { + "dg": { "magnitude": -1.7503584028809382, "unit": "kilocalories_per_mole", ":is_custom:": true, "pint_unit_registry": "openff_units" }, - "DG_uncertainty": { + "dg_uncertainty": { "magnitude": 0.216980682113927, "unit": "kilocalories_per_mole", ":is_custom:": true, @@ -758,13 +758,13 @@ }, { "ligand": "2g", - "DG": { + "dg": { "magnitude": 0.010660091115846093, "unit": "kilocalories_per_mole", ":is_custom:": true, "pint_unit_registry": "openff_units" }, - "DG_uncertainty": { + "dg_uncertainty": { "magnitude": 0.08774664983749976, "unit": "kilocalories_per_mole", ":is_custom:": true, @@ -776,13 +776,13 @@ }, { "ligand": "2c", - "DG": { + "dg": { "magnitude": -0.9139549473034174, "unit": "kilocalories_per_mole", ":is_custom:": true, "pint_unit_registry": "openff_units" }, - "DG_uncertainty": { + "dg_uncertainty": { "magnitude": 0.17933551114643104, "unit": "kilocalories_per_mole", ":is_custom:": true, @@ -794,13 +794,13 @@ }, { "ligand": "2i", - "DG": { + "dg": { "magnitude": 0.7837093233242358, "unit": "kilocalories_per_mole", ":is_custom:": true, "pint_unit_registry": "openff_units" }, - "DG_uncertainty": { + "dg_uncertainty": { "magnitude": 0.11492535392427111, "unit": "kilocalories_per_mole", ":is_custom:": true, @@ -812,13 +812,13 @@ }, { "ligand": "2h", - "DG": { + "dg": { "magnitude": 1.140782212442983, "unit": "kilocalories_per_mole", ":is_custom:": true, "pint_unit_registry": "openff_units" }, - "DG_uncertainty": { + "dg_uncertainty": { "magnitude": 0.09695123630030483, "unit": "kilocalories_per_mole", ":is_custom:": true, @@ -830,13 +830,13 @@ }, { "ligand": "3flz", - "DG": { + "dg": { "magnitude": 0.7458905687132686, "unit": "kilocalories_per_mole", ":is_custom:": true, "pint_unit_registry": "openff_units" }, - "DG_uncertainty": { + "dg_uncertainty": { "magnitude": 0.09919668442106934, "unit": "kilocalories_per_mole", ":is_custom:": true, @@ -848,13 +848,13 @@ }, { "ligand": "2bb", - "DG": { + "dg": { "magnitude": 1.7304086098693254, "unit": "kilocalories_per_mole", ":is_custom:": true, "pint_unit_registry": "openff_units" }, - "DG_uncertainty": { + "dg_uncertainty": { "magnitude": 0.6095674355362066, "unit": "kilocalories_per_mole", ":is_custom:": true, @@ -866,13 +866,13 @@ }, { "ligand": "3fmh", - "DG": { + "dg": { "magnitude": 1.1059067484454168, "unit": "kilocalories_per_mole", ":is_custom:": true, "pint_unit_registry": "openff_units" }, - "DG_uncertainty": { + "dg_uncertainty": { "magnitude": 0.9826981846915885, "unit": "kilocalories_per_mole", ":is_custom:": true, @@ -884,13 +884,13 @@ }, { "ligand": "2f", - "DG": { + "dg": { "magnitude": 1.3868554417054604, "unit": "kilocalories_per_mole", ":is_custom:": true, "pint_unit_registry": "openff_units" }, - "DG_uncertainty": { + "dg_uncertainty": { "magnitude": 0.10167116076869546, "unit": "kilocalories_per_mole", ":is_custom:": true, @@ -902,13 +902,13 @@ }, { "ligand": "19charg", - "DG": { + "dg": { "magnitude": -0.501459078762063, "unit": "kilocalories_per_mole", ":is_custom:": true, "pint_unit_registry": "openff_units" }, - "DG_uncertainty": { + "dg_uncertainty": { "magnitude": 0.08802770108498328, "unit": "kilocalories_per_mole", ":is_custom:": true, @@ -920,13 +920,13 @@ }, { "ligand": "27", - "DG": { + "dg": { "magnitude": 0.4304453957801377, "unit": "kilocalories_per_mole", ":is_custom:": true, "pint_unit_registry": "openff_units" }, - "DG_uncertainty": { + "dg_uncertainty": { "magnitude": 0.07124165229524881, "unit": "kilocalories_per_mole", ":is_custom:": true, @@ -938,13 +938,13 @@ }, { "ligand": "28charg", - "DG": { + "dg": { "magnitude": 0.07101368298192536, "unit": "kilocalories_per_mole", ":is_custom:": true, "pint_unit_registry": "openff_units" }, - "DG_uncertainty": { + "dg_uncertainty": { "magnitude": 0.13277043703505112, "unit": "kilocalories_per_mole", ":is_custom:": true, @@ -956,13 +956,13 @@ }, { "ligand": "21", - "DG": { + "dg": { "magnitude": -1.731545406606206, "unit": "kilocalories_per_mole", ":is_custom:": true, "pint_unit_registry": "openff_units" }, - "DG_uncertainty": { + "dg_uncertainty": { "magnitude": 0.05894673532740687, "unit": "kilocalories_per_mole", ":is_custom:": true, @@ -974,13 +974,13 @@ }, { "ligand": "27chargR", - "DG": { + "dg": { "magnitude": 0.9201371595566695, "unit": "kilocalories_per_mole", ":is_custom:": true, "pint_unit_registry": "openff_units" }, - "DG_uncertainty": { + "dg_uncertainty": { "magnitude": 0.10613089729022374, "unit": "kilocalories_per_mole", ":is_custom:": true, @@ -992,13 +992,13 @@ }, { "ligand": "30charg", - "DG": { + "dg": { "magnitude": 0.811408247049536, "unit": "kilocalories_per_mole", ":is_custom:": true, "pint_unit_registry": "openff_units" }, - "DG_uncertainty": { + "dg_uncertainty": { "magnitude": 0.07816534186689623, "unit": "kilocalories_per_mole", ":is_custom:": true, @@ -1009,51 +1009,50 @@ "source": "MLE" } ], - "DDG": [ + "ddg": [ { "ligand_a": "ejm_50", "ligand_b": "ejm_42", "system_group": "jacs_set", "system_name": "tyk2", - "repeats": 3, - "DDG": { + "ddg": { "magnitude": -0.31190011815882457, "unit": "kilocalories_per_mole", ":is_custom:": true, "pint_unit_registry": "openff_units" }, - "DDG_uncertainty": { + "ddg_uncertainty": { "magnitude": 0.18487315903557983, "unit": "kilocalories_per_mole", ":is_custom:": true, "pint_unit_registry": "openff_units" }, - "DGs_complex": [ + "dgs_complex": [ -19.16080704943104, -19.111385364555613, -18.821940214304785 ], - "DGs_solvent": [ + "dgs_solvent": [ -18.571648686711896, -18.830385089422162, -18.756398497680898 ], - "Complex_smallest_mbar_overlaps": [ + "complex_smallest_mbar_overlaps": [ 0.13376294007626177, 0.13055139667336355, 0.12983153960084914 ], - "Complex_smallest_replica_mixing": [ + "complex_smallest_replica_mixing": [ 0.08936825885978428, 0.09551208285385501, 0.09167750325097529 ], - "Solvent_smallest_mbar_overlaps": [ + "solvent_smallest_mbar_overlaps": [ 0.1379351626786042, 0.13809103838868403, 0.13816936991084877 ], - "Solvent_smallest_replica_mixing": [ + "solvent_smallest_replica_mixing": [ 0.108948432760364, 0.10522066738428418, 0.1102123356926188 @@ -1064,45 +1063,44 @@ "ligand_b": "jmc_28", "system_group": "jacs_set", "system_name": "tyk2", - "repeats": 3, - "DDG": { + "ddg": { "magnitude": 0.36623109022296063, "unit": "kilocalories_per_mole", ":is_custom:": true, "pint_unit_registry": "openff_units" }, - "DDG_uncertainty": { + "ddg_uncertainty": { "magnitude": 0.11640533073579613, "unit": "kilocalories_per_mole", ":is_custom:": true, "pint_unit_registry": "openff_units" }, - "DGs_complex": [ + "dgs_complex": [ 22.70408815873226, 22.458056284998342, 22.487929608010205 ], - "DGs_solvent": [ + "dgs_solvent": [ 22.197952240200127, 22.13034422061398, 22.223084320257822 ], - "Complex_smallest_mbar_overlaps": [ + "complex_smallest_mbar_overlaps": [ 0.1416995738770979, 0.14116436430160711, 0.1419894952857548 ], - "Complex_smallest_replica_mixing": [ + "complex_smallest_replica_mixing": [ 0.12060702875399361, 0.11719670200235571, 0.11852704257767549 ], - "Solvent_smallest_mbar_overlaps": [ + "solvent_smallest_mbar_overlaps": [ 0.14291634228515276, 0.14314490886512032, 0.1438190363250322 ], - "Solvent_smallest_replica_mixing": [ + "solvent_smallest_replica_mixing": [ 0.12048503611971104, 0.12386248736097068, 0.12462082912032356 @@ -1113,45 +1111,44 @@ "ligand_b": "ejm_46", "system_group": "jacs_set", "system_name": "tyk2", - "repeats": 3, - "DDG": { + "ddg": { "magnitude": -0.7488952687425261, "unit": "kilocalories_per_mole", ":is_custom:": true, "pint_unit_registry": "openff_units" }, - "DDG_uncertainty": { + "ddg_uncertainty": { "magnitude": 0.09016750292603079, "unit": "kilocalories_per_mole", ":is_custom:": true, "pint_unit_registry": "openff_units" }, - "DGs_complex": [ + "dgs_complex": [ -29.01021257823298, -29.190678420782003, -29.134048123354756 ], - "DGs_solvent": [ + "dgs_solvent": [ -28.388934469690422, -28.29342124586621, -28.40589760058554 ], - "Complex_smallest_mbar_overlaps": [ + "complex_smallest_mbar_overlaps": [ 0.12391854414577823, 0.12728048114417317, 0.12432086578445879 ], - "Complex_smallest_replica_mixing": [ + "complex_smallest_replica_mixing": [ 0.09936406995230525, 0.08785192909280501, 0.09504550050556117 ], - "Solvent_smallest_mbar_overlaps": [ + "solvent_smallest_mbar_overlaps": [ 0.12081198774344082, 0.11714612296936465, 0.11981483589375609 ], - "Solvent_smallest_replica_mixing": [ + "solvent_smallest_replica_mixing": [ 0.10313447927199192, 0.10338725985844287, 0.10273972602739725 @@ -1162,45 +1159,44 @@ "ligand_b": "ejm_46", "system_group": "jacs_set", "system_name": "tyk2", - "repeats": 3, - "DDG": { + "ddg": { "magnitude": 0.4164254733740478, "unit": "kilocalories_per_mole", ":is_custom:": true, "pint_unit_registry": "openff_units" }, - "DDG_uncertainty": { + "ddg_uncertainty": { "magnitude": 0.09876860452401351, "unit": "kilocalories_per_mole", ":is_custom:": true, "pint_unit_registry": "openff_units" }, - "DGs_complex": [ + "dgs_complex": [ -8.474240892289592, -8.544392711253266, -8.70493330056103 ], - "DGs_solvent": [ + "dgs_solvent": [ -8.961963044813643, -9.009584892266643, -9.001295387145747 ], - "Complex_smallest_mbar_overlaps": [ + "complex_smallest_mbar_overlaps": [ 0.12075339102487868, 0.12075705754361211, 0.12094073669100758 ], - "Complex_smallest_replica_mixing": [ + "complex_smallest_replica_mixing": [ 0.11051502145922747, 0.09428715874620829, 0.0968149646107179 ], - "Solvent_smallest_mbar_overlaps": [ + "solvent_smallest_mbar_overlaps": [ 0.10664733156507009, 0.10695795462700747, 0.10723994372650122 ], - "Solvent_smallest_replica_mixing": [ + "solvent_smallest_replica_mixing": [ 0.09453993933265925, 0.08442871587462084, 0.09479271991911022 @@ -1211,45 +1207,44 @@ "ligand_b": "ejm_48", "system_group": "jacs_set", "system_name": "tyk2", - "repeats": 3, - "DDG": { + "ddg": { "magnitude": 0.5341463866436627, "unit": "kilocalories_per_mole", ":is_custom:": true, "pint_unit_registry": "openff_units" }, - "DDG_uncertainty": { + "ddg_uncertainty": { "magnitude": 0.3094542970554522, "unit": "kilocalories_per_mole", ":is_custom:": true, "pint_unit_registry": "openff_units" }, - "DGs_complex": [ + "dgs_complex": [ 50.71509551483679, 51.40136661180214, 51.0720664413858 ], - "DGs_solvent": [ + "dgs_solvent": [ 50.44195116282237, 50.71417320866378, 50.429965036607584 ], - "Complex_smallest_mbar_overlaps": [ + "complex_smallest_mbar_overlaps": [ 0.08065189023653123, 0.091595613911839, 0.08620352393865843 ], - "Complex_smallest_replica_mixing": [ + "complex_smallest_replica_mixing": [ 0.013504611330698288, 0.024412855377008654, 0.013816475495307613 ], - "Solvent_smallest_mbar_overlaps": [ + "solvent_smallest_mbar_overlaps": [ 0.11718424478793976, 0.1053715857586385, 0.11340153536625901 ], - "Solvent_smallest_replica_mixing": [ + "solvent_smallest_replica_mixing": [ 0.03973168214654283, 0.031682641107561235, 0.034055727554179564 @@ -1260,45 +1255,44 @@ "ligand_b": "ejm_46", "system_group": "jacs_set", "system_name": "tyk2", - "repeats": 3, - "DDG": { + "ddg": { "magnitude": 0.9371399293961673, "unit": "kilocalories_per_mole", ":is_custom:": true, "pint_unit_registry": "openff_units" }, - "DDG_uncertainty": { + "ddg_uncertainty": { "magnitude": 0.018851480589629665, "unit": "kilocalories_per_mole", ":is_custom:": true, "pint_unit_registry": "openff_units" }, - "DGs_complex": [ + "dgs_complex": [ 25.578451505274504, 25.608444739174015, 25.617997436382662 ], - "DGs_solvent": [ + "dgs_solvent": [ 24.652597871398413, 24.671528498114434, 24.669347523129833 ], - "Complex_smallest_mbar_overlaps": [ + "complex_smallest_mbar_overlaps": [ 0.13723072503954822, 0.13700768022413606, 0.1394285680288189 ], - "Complex_smallest_replica_mixing": [ + "complex_smallest_replica_mixing": [ 0.10300316122233931, 0.09955995599559957, 0.10945399393326592 ], - "Solvent_smallest_mbar_overlaps": [ + "solvent_smallest_mbar_overlaps": [ 0.13412870322248974, 0.13238089160125308, 0.1316668208399034 ], - "Solvent_smallest_replica_mixing": [ + "solvent_smallest_replica_mixing": [ 0.09831460674157304, 0.10161779575328615, 0.1051567239635996 @@ -1309,45 +1303,44 @@ "ligand_b": "ejm_55", "system_group": "jacs_set", "system_name": "tyk2", - "repeats": 3, - "DDG": { + "ddg": { "magnitude": -0.916084341967732, "unit": "kilocalories_per_mole", ":is_custom:": true, "pint_unit_registry": "openff_units" }, - "DDG_uncertainty": { + "ddg_uncertainty": { "magnitude": 0.1345450147693899, "unit": "kilocalories_per_mole", ":is_custom:": true, "pint_unit_registry": "openff_units" }, - "DGs_complex": [ + "dgs_complex": [ -44.17911361527046, -44.43549613530486, -44.43978996564394 ], - "DGs_solvent": [ + "dgs_solvent": [ -43.371586840561996, -43.509910749053915, -43.42464910070016 ], - "Complex_smallest_mbar_overlaps": [ + "complex_smallest_mbar_overlaps": [ 0.1778754405760441, 0.18017099898235453, 0.18082746173296835 ], - "Complex_smallest_replica_mixing": [ + "complex_smallest_replica_mixing": [ 0.10995955510616785, 0.11692015209125475, 0.12517146776406035 ], - "Solvent_smallest_mbar_overlaps": [ + "solvent_smallest_mbar_overlaps": [ 0.18600447312112164, 0.18568913856836897, 0.18224489428141152 ], - "Solvent_smallest_replica_mixing": [ + "solvent_smallest_replica_mixing": [ 0.12452471482889733, 0.12360970677451971, 0.11945500633713561 @@ -1358,45 +1351,44 @@ "ligand_b": "jmc_30", "system_group": "jacs_set", "system_name": "tyk2", - "repeats": 3, - "DDG": { + "ddg": { "magnitude": -0.9313926243440456, "unit": "kilocalories_per_mole", ":is_custom:": true, "pint_unit_registry": "openff_units" }, - "DDG_uncertainty": { + "ddg_uncertainty": { "magnitude": 0.10388639788020433, "unit": "kilocalories_per_mole", ":is_custom:": true, "pint_unit_registry": "openff_units" }, - "DGs_complex": [ + "dgs_complex": [ -13.938623728206014, -13.825428209858975, -13.68632523402718 ], - "DGs_solvent": [ + "dgs_solvent": [ -12.869496729262291, -12.887935568457241, -12.898767001340497 ], - "Complex_smallest_mbar_overlaps": [ + "complex_smallest_mbar_overlaps": [ 0.12596583866728828, 0.12512502020949173, 0.1255163773029776 ], - "Complex_smallest_replica_mixing": [ + "complex_smallest_replica_mixing": [ 0.12338044758539458, 0.11348122866894197, 0.12851929092805006 ], - "Solvent_smallest_mbar_overlaps": [ + "solvent_smallest_mbar_overlaps": [ 0.12365957892116246, 0.1236959353984504, 0.12419263649950771 ], - "Solvent_smallest_replica_mixing": [ + "solvent_smallest_replica_mixing": [ 0.12431243124312431, 0.12260967379077616, 0.12664307381193124 @@ -1407,45 +1399,44 @@ "ligand_b": "ejm_42", "system_group": "jacs_set", "system_name": "tyk2", - "repeats": 3, - "DDG": { + "ddg": { "magnitude": 0.6911130202134572, "unit": "kilocalories_per_mole", ":is_custom:": true, "pint_unit_registry": "openff_units" }, - "DDG_uncertainty": { + "ddg_uncertainty": { "magnitude": 0.1585637071928594, "unit": "kilocalories_per_mole", ":is_custom:": true, "pint_unit_registry": "openff_units" }, - "DGs_complex": [ + "dgs_complex": [ -12.52053992688133, -12.693240399437823, -12.314087801885053 ], - "DGs_solvent": [ + "dgs_solvent": [ -13.15519160398313, -13.2351138993596, -13.210901685501845 ], - "Complex_smallest_mbar_overlaps": [ + "complex_smallest_mbar_overlaps": [ 0.10828688807734116, 0.10627674092590095, 0.10335060684463246 ], - "Complex_smallest_replica_mixing": [ + "complex_smallest_replica_mixing": [ 0.09505363528009535, 0.09234828496042216, 0.09007707129094412 ], - "Solvent_smallest_mbar_overlaps": [ + "solvent_smallest_mbar_overlaps": [ 0.10237895001773795, 0.10314379891210053, 0.10306871655924223 ], - "Solvent_smallest_replica_mixing": [ + "solvent_smallest_replica_mixing": [ 0.10616784630940344, 0.09479271991911022, 0.0968149646107179 @@ -1456,45 +1447,44 @@ "ligand_b": "ejm_31", "system_group": "jacs_set", "system_name": "tyk2", - "repeats": 3, - "DDG": { + "ddg": { "magnitude": 1.102046405017669, "unit": "kilocalories_per_mole", ":is_custom:": true, "pint_unit_registry": "openff_units" }, - "DDG_uncertainty": { + "ddg_uncertainty": { "magnitude": 0.14013398224300408, "unit": "kilocalories_per_mole", ":is_custom:": true, "pint_unit_registry": "openff_units" }, - "DGs_complex": [ + "dgs_complex": [ 0.6728841405617866, 0.8746447362763453, 0.6488853414210868 ], - "DGs_solvent": [ + "dgs_solvent": [ -0.5004980363015142, -0.26868370266352204, -0.34054325782875244 ], - "Complex_smallest_mbar_overlaps": [ + "complex_smallest_mbar_overlaps": [ 0.131387850766123, 0.13001794428128802, 0.13286795091162407 ], - "Complex_smallest_replica_mixing": [ + "complex_smallest_replica_mixing": [ 0.07512315270935961, 0.07508731082654249, 0.09143686502177069 ], - "Solvent_smallest_mbar_overlaps": [ + "solvent_smallest_mbar_overlaps": [ 0.1295014640191342, 0.1295744456404279, 0.13028602635435663 ], - "Solvent_smallest_replica_mixing": [ + "solvent_smallest_replica_mixing": [ 0.08410732714138287, 0.08561643835616438, 0.0801314459049545 @@ -1505,45 +1495,44 @@ "ligand_b": "ejm_50", "system_group": "jacs_set", "system_name": "tyk2", - "repeats": 3, - "DDG": { + "ddg": { "magnitude": -0.11154255326255225, "unit": "kilocalories_per_mole", ":is_custom:": true, "pint_unit_registry": "openff_units" }, - "DDG_uncertainty": { + "ddg_uncertainty": { "magnitude": 0.1561341109264418, "unit": "kilocalories_per_mole", ":is_custom:": true, "pint_unit_registry": "openff_units" }, - "DGs_complex": [ + "dgs_complex": [ -45.59830621538094, -45.69408131533385, -45.334355266644614 ], - "DGs_solvent": [ + "dgs_solvent": [ -45.383858207141586, -45.468729400107065, -45.4395275303231 ], - "Complex_smallest_mbar_overlaps": [ + "complex_smallest_mbar_overlaps": [ 0.11692312607980797, 0.11548443168722737, 0.11921158583831405 ], - "Complex_smallest_replica_mixing": [ + "complex_smallest_replica_mixing": [ 0.0456656346749226, 0.04118873826903024, 0.038751345532831 ], - "Solvent_smallest_mbar_overlaps": [ + "solvent_smallest_mbar_overlaps": [ 0.12390168973071564, 0.12350746672722915, 0.1206565041080654 ], - "Solvent_smallest_replica_mixing": [ + "solvent_smallest_replica_mixing": [ 0.06066734074823053, 0.05388151174668029, 0.05864509605662285 @@ -1554,45 +1543,44 @@ "ligand_b": "ejm_54", "system_group": "jacs_set", "system_name": "tyk2", - "repeats": 3, - "DDG": { + "ddg": { "magnitude": 0.2767266696811319, "unit": "kilocalories_per_mole", ":is_custom:": true, "pint_unit_registry": "openff_units" }, - "DDG_uncertainty": { + "ddg_uncertainty": { "magnitude": 0.5252882481323151, "unit": "kilocalories_per_mole", ":is_custom:": true, "pint_unit_registry": "openff_units" }, - "DGs_complex": [ + "dgs_complex": [ 25.634803878041474, 25.602083303194657, 25.685287522883364 ], - "DGs_solvent": [ + "dgs_solvent": [ 24.644875425023372, 25.87938446521765, 25.567734804835073 ], - "Complex_smallest_mbar_overlaps": [ + "complex_smallest_mbar_overlaps": [ 0.16929610099601902, 0.16899159333662458, 0.17010329138196761 ], - "Complex_smallest_replica_mixing": [ + "complex_smallest_replica_mixing": [ 0.11410018552875696, 0.1319514661274014, 0.12873862158647595 ], - "Solvent_smallest_mbar_overlaps": [ + "solvent_smallest_mbar_overlaps": [ 0.17258574651118336, 0.17090107035868987, 0.1695965093480625 ], - "Solvent_smallest_replica_mixing": [ + "solvent_smallest_replica_mixing": [ 0.130927291886196, 0.13751263902932254, 0.14079878665318504 @@ -1603,45 +1591,44 @@ "ligand_b": "ejm_31", "system_group": "jacs_set", "system_name": "tyk2", - "repeats": 3, - "DDG": { + "ddg": { "magnitude": -0.7586372523142302, "unit": "kilocalories_per_mole", ":is_custom:": true, "pint_unit_registry": "openff_units" }, - "DDG_uncertainty": { + "ddg_uncertainty": { "magnitude": 0.27682570627770536, "unit": "kilocalories_per_mole", ":is_custom:": true, "pint_unit_registry": "openff_units" }, - "DGs_complex": [ + "dgs_complex": [ 20.09825569520222, 20.008504205745187, 19.472740654113803 ], - "DGs_solvent": [ + "dgs_solvent": [ 20.63331175099524, 20.630773866318638, 20.59132669469003 ], - "Complex_smallest_mbar_overlaps": [ + "complex_smallest_mbar_overlaps": [ 0.07332183437429869, 0.07492693094392974, 0.06482688501987194 ], - "Complex_smallest_replica_mixing": [ + "complex_smallest_replica_mixing": [ 0.015175718849840255, 0.017743979721166033, 0.017543859649122806 ], - "Solvent_smallest_mbar_overlaps": [ + "solvent_smallest_mbar_overlaps": [ 0.11562292849318584, 0.1143871186444747, 0.11726054856438224 ], - "Solvent_smallest_replica_mixing": [ + "solvent_smallest_replica_mixing": [ 0.047269969666329625, 0.0389988358556461, 0.04024767801857585 @@ -1652,45 +1639,44 @@ "ligand_b": "jmc_27", "system_group": "jacs_set", "system_name": "tyk2", - "repeats": 3, - "DDG": { + "ddg": { "magnitude": -0.1973578482554217, "unit": "kilocalories_per_mole", ":is_custom:": true, "pint_unit_registry": "openff_units" }, - "DDG_uncertainty": { + "ddg_uncertainty": { "magnitude": 0.03475793921871755, "unit": "kilocalories_per_mole", ":is_custom:": true, "pint_unit_registry": "openff_units" }, - "DGs_complex": [ + "dgs_complex": [ 10.190419798951348, 10.124141066265146, 10.188030155270068 ], - "DGs_solvent": [ + "dgs_solvent": [ 10.38470669078386, 10.344771473550882, 10.365186400918082 ], - "Complex_smallest_mbar_overlaps": [ + "complex_smallest_mbar_overlaps": [ 0.22307188221442528, 0.22190341106421838, 0.22378469859882863 ], - "Complex_smallest_replica_mixing": [ + "complex_smallest_replica_mixing": [ 0.19969666329625885, 0.19152196118488254, 0.19901112484548825 ], - "Solvent_smallest_mbar_overlaps": [ + "solvent_smallest_mbar_overlaps": [ 0.2219462934548013, 0.2230370973198237, 0.22118305193484997 ], - "Solvent_smallest_replica_mixing": [ + "solvent_smallest_replica_mixing": [ 0.19514661274014156, 0.1903437815975733, 0.1994438827098079 @@ -1701,45 +1687,44 @@ "ligand_b": "ejm_48", "system_group": "jacs_set", "system_name": "tyk2", - "repeats": 3, - "DDG": { + "ddg": { "magnitude": 0.8066356116094369, "unit": "kilocalories_per_mole", ":is_custom:": true, "pint_unit_registry": "openff_units" }, - "DDG_uncertainty": { + "ddg_uncertainty": { "magnitude": 0.12079847964548013, "unit": "kilocalories_per_mole", ":is_custom:": true, "pint_unit_registry": "openff_units" }, - "DGs_complex": [ + "dgs_complex": [ -11.370590164887243, -11.15471756527737, -11.088011824247115 ], - "DGs_solvent": [ + "dgs_solvent": [ -12.00140140500631, -12.014746507838368, -12.017078476395364 ], - "Complex_smallest_mbar_overlaps": [ + "complex_smallest_mbar_overlaps": [ 0.10986296371232865, 0.10772783412679628, 0.10965586483680564 ], - "Complex_smallest_replica_mixing": [ + "complex_smallest_replica_mixing": [ 0.04764776839565742, 0.033646322378716745, 0.035636561479869426 ], - "Solvent_smallest_mbar_overlaps": [ + "solvent_smallest_mbar_overlaps": [ 0.10850724789011988, 0.11020963737500898, 0.10518795030466864 ], - "Solvent_smallest_replica_mixing": [ + "solvent_smallest_replica_mixing": [ 0.06370070778564206, 0.05680359435173299, 0.06622851365015167 @@ -1750,45 +1735,44 @@ "ligand_b": "ejm_43", "system_group": "jacs_set", "system_name": "tyk2", - "repeats": 3, - "DDG": { + "ddg": { "magnitude": -0.9901327297773079, "unit": "kilocalories_per_mole", ":is_custom:": true, "pint_unit_registry": "openff_units" }, - "DDG_uncertainty": { + "ddg_uncertainty": { "magnitude": 0.1345112185742903, "unit": "kilocalories_per_mole", ":is_custom:": true, "pint_unit_registry": "openff_units" }, - "DGs_complex": [ + "dgs_complex": [ 8.773485983373927, 8.868105232682508, 8.989095388945623 ], - "DGs_solvent": [ + "dgs_solvent": [ 9.728499391897751, 9.96896393341908, 9.903621469017146 ], - "Complex_smallest_mbar_overlaps": [ + "complex_smallest_mbar_overlaps": [ 0.11753921768625564, 0.115147192474471, 0.11477594804400199 ], - "Complex_smallest_replica_mixing": [ + "complex_smallest_replica_mixing": [ 0.08063328424153167, 0.09805924412665985, 0.09098018769551616 ], - "Solvent_smallest_mbar_overlaps": [ + "solvent_smallest_mbar_overlaps": [ 0.10415303449480709, 0.10637207323089107, 0.1054800287810906 ], - "Solvent_smallest_replica_mixing": [ + "solvent_smallest_replica_mixing": [ 0.09984833164812942, 0.1051567239635996, 0.09630940343781598 @@ -1799,45 +1783,44 @@ "ligand_b": "ejm_42", "system_group": "jacs_set", "system_name": "tyk2", - "repeats": 3, - "DDG": { + "ddg": { "magnitude": -1.6725148955864118, "unit": "kilocalories_per_mole", ":is_custom:": true, "pint_unit_registry": "openff_units" }, - "DDG_uncertainty": { + "ddg_uncertainty": { "magnitude": 0.12548819230305017, "unit": "kilocalories_per_mole", ":is_custom:": true, "pint_unit_registry": "openff_units" }, - "DGs_complex": [ + "dgs_complex": [ 9.65018756969153, 9.704017724888304, 9.777430707810066 ], - "DGs_solvent": [ + "dgs_solvent": [ 11.335158497570813, 11.273518263236435, 11.540503928341884 ], - "Complex_smallest_mbar_overlaps": [ + "complex_smallest_mbar_overlaps": [ 0.10151518300576419, 0.10000814732350109, 0.09919590183439851 ], - "Complex_smallest_replica_mixing": [ + "complex_smallest_replica_mixing": [ 0.08436532507739938, 0.08818393480791618, 0.08472367049009384 ], - "Solvent_smallest_mbar_overlaps": [ + "solvent_smallest_mbar_overlaps": [ 0.10532685314445385, 0.10566620001644182, 0.10493907898120303 ], - "Solvent_smallest_replica_mixing": [ + "solvent_smallest_replica_mixing": [ 0.10288169868554095, 0.1102123356926188, 0.11017410228509249 @@ -1848,45 +1831,44 @@ "ligand_b": "ejm_31", "system_group": "jacs_set", "system_name": "tyk2", - "repeats": 3, - "DDG": { + "ddg": { "magnitude": -0.42914217336133476, "unit": "kilocalories_per_mole", ":is_custom:": true, "pint_unit_registry": "openff_units" }, - "DDG_uncertainty": { + "ddg_uncertainty": { "magnitude": 0.09009349080843024, "unit": "kilocalories_per_mole", ":is_custom:": true, "pint_unit_registry": "openff_units" }, - "DGs_complex": [ + "dgs_complex": [ 16.66455004001092, 16.63353442917762, 16.83326455741535 ], - "DGs_solvent": [ + "dgs_solvent": [ 17.111252284275793, 17.158210675997353, 17.14931258641475 ], - "Complex_smallest_mbar_overlaps": [ + "complex_smallest_mbar_overlaps": [ 0.11024742270989725, 0.10112960263552395, 0.10720304409173459 ], - "Complex_smallest_replica_mixing": [ + "complex_smallest_replica_mixing": [ 0.045536791314837156, 0.0629346314325452, 0.051414581066376495 ], - "Solvent_smallest_mbar_overlaps": [ + "solvent_smallest_mbar_overlaps": [ 0.1089060268362668, 0.10895123768920253, 0.10938170960496879 ], - "Solvent_smallest_replica_mixing": [ + "solvent_smallest_replica_mixing": [ 0.07763023493360573, 0.07912032355915066, 0.08316481294236602 @@ -1897,45 +1879,44 @@ "ligand_b": "ejm_54", "system_group": "jacs_set", "system_name": "tyk2", - "repeats": 3, - "DDG": { + "ddg": { "magnitude": 0.023743982859214086, "unit": "kilocalories_per_mole", ":is_custom:": true, "pint_unit_registry": "openff_units" }, - "DDG_uncertainty": { + "ddg_uncertainty": { "magnitude": 0.15259445469110478, "unit": "kilocalories_per_mole", ":is_custom:": true, "pint_unit_registry": "openff_units" }, - "DGs_complex": [ + "dgs_complex": [ -8.360484288773174, -8.454031987573893, -8.185657727668064 ], - "DGs_solvent": [ + "dgs_solvent": [ -8.442909272751802, -8.418426148937474, -8.210070530903497 ], - "Complex_smallest_mbar_overlaps": [ + "complex_smallest_mbar_overlaps": [ 0.17560885920989688, 0.17323639559229229, 0.17380179833081966 ], - "Complex_smallest_replica_mixing": [ + "complex_smallest_replica_mixing": [ 0.10218978102189781, 0.10824345146379044, 0.11437718277066357 ], - "Solvent_smallest_mbar_overlaps": [ + "solvent_smallest_mbar_overlaps": [ 0.1871862158170948, 0.1865805956386249, 0.18718278636927746 ], - "Solvent_smallest_replica_mixing": [ + "solvent_smallest_replica_mixing": [ 0.1356691253951528, 0.13832077502691065, 0.13043478260869565 @@ -1946,45 +1927,44 @@ "ligand_b": "ejm_45", "system_group": "jacs_set", "system_name": "tyk2", - "repeats": 3, - "DDG": { + "ddg": { "magnitude": 0.49160568900046187, "unit": "kilocalories_per_mole", ":is_custom:": true, "pint_unit_registry": "openff_units" }, - "DDG_uncertainty": { + "ddg_uncertainty": { "magnitude": 0.0971072231071624, "unit": "kilocalories_per_mole", ":is_custom:": true, "pint_unit_registry": "openff_units" }, - "DGs_complex": [ + "dgs_complex": [ 11.678225942983286, 11.677525451818473, 11.637780321936342 ], - "DGs_solvent": [ + "dgs_solvent": [ 11.038201975170825, 11.240504738871525, 11.240007935694361 ], - "Complex_smallest_mbar_overlaps": [ + "complex_smallest_mbar_overlaps": [ 0.11381186226025179, 0.10908855008528516, 0.11603533611279043 ], - "Complex_smallest_replica_mixing": [ + "complex_smallest_replica_mixing": [ 0.04724964739069112, 0.0364293659621802, 0.042269187986651836 ], - "Solvent_smallest_mbar_overlaps": [ + "solvent_smallest_mbar_overlaps": [ 0.13962795637960182, 0.13767816442093675, 0.13972528037172557 ], - "Solvent_smallest_replica_mixing": [ + "solvent_smallest_replica_mixing": [ 0.0788675429726997, 0.06825075834175935, 0.07381193124368049 @@ -1995,45 +1975,44 @@ "ligand_b": "ejm_44", "system_group": "jacs_set", "system_name": "tyk2", - "repeats": 3, - "DDG": { + "ddg": { "magnitude": 2.3959460144061424, "unit": "kilocalories_per_mole", ":is_custom:": true, "pint_unit_registry": "openff_units" }, - "DDG_uncertainty": { + "ddg_uncertainty": { "magnitude": 0.2997203710866624, "unit": "kilocalories_per_mole", ":is_custom:": true, "pint_unit_registry": "openff_units" }, - "DGs_complex": [ + "dgs_complex": [ -35.776093436207546, -35.434665582008414, -36.13418486552902 ], - "DGs_solvent": [ + "dgs_solvent": [ -38.18637415571591, -38.06213775203912, -38.28427001920838 ], - "Complex_smallest_mbar_overlaps": [ + "complex_smallest_mbar_overlaps": [ 0.06532762740879944, 0.06611673723358033, 0.07269076770057471 ], - "Complex_smallest_replica_mixing": [ + "complex_smallest_replica_mixing": [ 0.007982583454281567, 0.014189886480908152, 0.018452982810920122 ], - "Solvent_smallest_mbar_overlaps": [ + "solvent_smallest_mbar_overlaps": [ 0.09391077249872196, 0.0912788342687875, 0.09003931739462981 ], - "Solvent_smallest_replica_mixing": [ + "solvent_smallest_replica_mixing": [ 0.03943377148634985, 0.041708796764408494, 0.037538304392236974 @@ -2044,45 +2023,44 @@ "ligand_b": "ejm_50", "system_group": "jacs_set", "system_name": "tyk2", - "repeats": 3, - "DDG": { + "ddg": { "magnitude": -0.5524232311454753, "unit": "kilocalories_per_mole", ":is_custom:": true, "pint_unit_registry": "openff_units" }, - "DDG_uncertainty": { + "ddg_uncertainty": { "magnitude": 0.25415370955825656, "unit": "kilocalories_per_mole", ":is_custom:": true, "pint_unit_registry": "openff_units" }, - "DGs_complex": [ + "dgs_complex": [ -42.19690714784933, -42.17718911197918, -42.68689141840163 ], - "DGs_solvent": [ + "dgs_solvent": [ -41.91462882376703, -41.6823354632367, -41.806753697789986 ], - "Complex_smallest_mbar_overlaps": [ + "complex_smallest_mbar_overlaps": [ 0.05507848847353458, 0.05023465533176064, 0.04805787954625746 ], - "Complex_smallest_replica_mixing": [ + "complex_smallest_replica_mixing": [ 0.006777893639207508, 0.007405515832482125, 0.006954102920723227 ], - "Solvent_smallest_mbar_overlaps": [ + "solvent_smallest_mbar_overlaps": [ 0.0793072362419808, 0.08145985410718955, 0.08210049410365801 ], - "Solvent_smallest_replica_mixing": [ + "solvent_smallest_replica_mixing": [ 0.016177957532861477, 0.02275025278058645, 0.019716885743174924 @@ -2093,45 +2071,44 @@ "ligand_b": "2v", "system_group": "jacs_set", "system_name": "p38", - "repeats": 3, - "DDG": { + "ddg": { "magnitude": 1.5610649694077994, "unit": "kilocalories_per_mole", ":is_custom:": true, "pint_unit_registry": "openff_units" }, - "DDG_uncertainty": { + "ddg_uncertainty": { "magnitude": 0.15476100551094377, "unit": "kilocalories_per_mole", ":is_custom:": true, "pint_unit_registry": "openff_units" }, - "DGs_complex": [ + "dgs_complex": [ -6.34494429736851, -6.60766300709865, -6.286112159480605 ], - "DGs_solvent": [ + "dgs_solvent": [ -7.88804477688721, -7.9841319622821025, -8.049737633001852 ], - "Complex_smallest_mbar_overlaps": [ + "complex_smallest_mbar_overlaps": [ 0.1316054919239175, 0.12843221035579228, 0.13342400217845768 ], - "Complex_smallest_replica_mixing": [ + "complex_smallest_replica_mixing": [ 0.0551061678463094, 0.05643513789581205, 0.056875631951466124 ], - "Solvent_smallest_mbar_overlaps": [ + "solvent_smallest_mbar_overlaps": [ 0.12824740624039507, 0.1273139877010416, 0.12547744556705193 ], - "Solvent_smallest_replica_mixing": [ + "solvent_smallest_replica_mixing": [ 0.0551061678463094, 0.054600606673407485, 0.04802831142568251 @@ -2142,45 +2119,44 @@ "ligand_b": "3fly", "system_group": "jacs_set", "system_name": "p38", - "repeats": 3, - "DDG": { + "ddg": { "magnitude": 2.3531386988635425, "unit": "kilocalories_per_mole", ":is_custom:": true, "pint_unit_registry": "openff_units" }, - "DDG_uncertainty": { + "ddg_uncertainty": { "magnitude": 0.36381157681129445, "unit": "kilocalories_per_mole", ":is_custom:": true, "pint_unit_registry": "openff_units" }, - "DGs_complex": [ + "dgs_complex": [ -21.930982740496482, -21.308893015412934, -21.09798957133443 ], - "DGs_solvent": [ + "dgs_solvent": [ -23.860931464166917, -23.67811136215507, -23.85823859751249 ], - "Complex_smallest_mbar_overlaps": [ + "complex_smallest_mbar_overlaps": [ 0.13691182703746504, 0.15380374872015187, 0.14257429741826305 ], - "Complex_smallest_replica_mixing": [ + "complex_smallest_replica_mixing": [ 0.06485468245425188, 0.07355915065722952, 0.07482305358948432 ], - "Solvent_smallest_mbar_overlaps": [ + "solvent_smallest_mbar_overlaps": [ 0.1258931861705904, 0.12744256288065467, 0.1198099867984415 ], - "Solvent_smallest_replica_mixing": [ + "solvent_smallest_replica_mixing": [ 0.04701718907987867, 0.06193124368048534, 0.040697674418604654 @@ -2191,45 +2167,44 @@ "ligand_b": "3fln", "system_group": "jacs_set", "system_name": "p38", - "repeats": 3, - "DDG": { + "ddg": { "magnitude": 2.3920696315652066, "unit": "kilocalories_per_mole", ":is_custom:": true, "pint_unit_registry": "openff_units" }, - "DDG_uncertainty": { + "ddg_uncertainty": { "magnitude": 0.5005215279211973, "unit": "kilocalories_per_mole", ":is_custom:": true, "pint_unit_registry": "openff_units" }, - "DGs_complex": [ + "dgs_complex": [ -81.88638205994715, -81.46763597765408, -82.0253008735319 ], - "DGs_solvent": [ + "dgs_solvent": [ -83.92147634908416, -83.82778469677255, -84.80626675997203 ], - "Complex_smallest_mbar_overlaps": [ + "complex_smallest_mbar_overlaps": [ 0.0705065095217306, 0.06306680859533224, 0.052956948459663186 ], - "Complex_smallest_replica_mixing": [ + "complex_smallest_replica_mixing": [ 0.012639029322548028, 0.012125902992776058, 0.008682328907048007 ], - "Solvent_smallest_mbar_overlaps": [ + "solvent_smallest_mbar_overlaps": [ 0.04550025354232517, 0.04777658198040124, 0.04821946920795428 ], - "Solvent_smallest_replica_mixing": [ + "solvent_smallest_replica_mixing": [ 0.00884732052578362, 0.008088978766430738, 0.007077856420626896 @@ -2240,45 +2215,44 @@ "ligand_b": "2v", "system_group": "jacs_set", "system_name": "p38", - "repeats": 3, - "DDG": { + "ddg": { "magnitude": 2.0496458505648647, "unit": "kilocalories_per_mole", ":is_custom:": true, "pint_unit_registry": "openff_units" }, - "DDG_uncertainty": { + "ddg_uncertainty": { "magnitude": 0.5835605799207267, "unit": "kilocalories_per_mole", ":is_custom:": true, "pint_unit_registry": "openff_units" }, - "DGs_complex": [ + "dgs_complex": [ 53.8909711552022, 53.549739007072745, 54.87149162165456 ], - "DGs_solvent": [ + "dgs_solvent": [ 51.84758921220694, 52.068872345343095, 52.246802674684886 ], - "Complex_smallest_mbar_overlaps": [ + "complex_smallest_mbar_overlaps": [ 0.11959394856636371, 0.11858459466759336, 0.11585783085992414 ], - "Complex_smallest_replica_mixing": [ + "complex_smallest_replica_mixing": [ 0.03611971104231166, 0.03538928210313448, 0.03766430738119313 ], - "Solvent_smallest_mbar_overlaps": [ + "solvent_smallest_mbar_overlaps": [ 0.1229722270415005, 0.12707488500063863, 0.12234808924055462 ], - "Solvent_smallest_replica_mixing": [ + "solvent_smallest_replica_mixing": [ 0.045881126173096975, 0.050050556117290194, 0.03766430738119313 @@ -2289,45 +2263,44 @@ "ligand_b": "2r", "system_group": "jacs_set", "system_name": "p38", - "repeats": 3, - "DDG": { + "ddg": { "magnitude": 0.2898092332319564, "unit": "kilocalories_per_mole", ":is_custom:": true, "pint_unit_registry": "openff_units" }, - "DDG_uncertainty": { + "ddg_uncertainty": { "magnitude": 0.5347619226916517, "unit": "kilocalories_per_mole", ":is_custom:": true, "pint_unit_registry": "openff_units" }, - "DGs_complex": [ + "dgs_complex": [ 35.695153893081745, 36.19157695645586, 36.78508846738147 ], - "DGs_solvent": [ + "dgs_solvent": [ 35.79705688680006, 35.660485069673626, 36.34484966074954 ], - "Complex_smallest_mbar_overlaps": [ + "complex_smallest_mbar_overlaps": [ 0.08876184109653515, 0.1139108526784562, 0.10997099117235347 ], - "Complex_smallest_replica_mixing": [ + "complex_smallest_replica_mixing": [ 0.01870576339737108, 0.03387259858442872, 0.024014155712841254 ], - "Solvent_smallest_mbar_overlaps": [ + "solvent_smallest_mbar_overlaps": [ 0.1333006533934039, 0.12008372800931835, 0.14256896484602713 ], - "Solvent_smallest_replica_mixing": [ + "solvent_smallest_replica_mixing": [ 0.04929221435793731, 0.03575076608784474, 0.04802831142568251 @@ -2338,45 +2311,44 @@ "ligand_b": "3fly", "system_group": "jacs_set", "system_name": "p38", - "repeats": 3, - "DDG": { + "ddg": { "magnitude": -0.8585962397199935, "unit": "kilocalories_per_mole", ":is_custom:": true, "pint_unit_registry": "openff_units" }, - "DDG_uncertainty": { + "ddg_uncertainty": { "magnitude": 0.8033756463612287, "unit": "kilocalories_per_mole", ":is_custom:": true, "pint_unit_registry": "openff_units" }, - "DGs_complex": [ + "dgs_complex": [ 50.12916015324384, 49.59127503158945, 48.57048936590872 ], - "DGs_solvent": [ + "dgs_solvent": [ 50.05723640253965, 49.856040256697135, 50.953436610665214 ], - "Complex_smallest_mbar_overlaps": [ + "complex_smallest_mbar_overlaps": [ 0.09997391564086458, 0.12997867078283395, 0.08373472862433128 ], - "Complex_smallest_replica_mixing": [ + "complex_smallest_replica_mixing": [ 0.020980788675429726, 0.04044489383215369, 0.020728008088978768 ], - "Solvent_smallest_mbar_overlaps": [ + "solvent_smallest_mbar_overlaps": [ 0.08880821030479624, 0.12038653305140658, 0.08782577228758608 ], - "Solvent_smallest_replica_mixing": [ + "solvent_smallest_replica_mixing": [ 0.016683518705763397, 0.05055611729019211, 0.016683518705763397 @@ -2387,45 +2359,44 @@ "ligand_b": "3fly", "system_group": "jacs_set", "system_name": "p38", - "repeats": 3, - "DDG": { + "ddg": { "magnitude": 1.5529606809341203, "unit": "kilocalories_per_mole", ":is_custom:": true, "pint_unit_registry": "openff_units" }, - "DDG_uncertainty": { + "ddg_uncertainty": { "magnitude": 0.45752269525274764, "unit": "kilocalories_per_mole", ":is_custom:": true, "pint_unit_registry": "openff_units" }, - "DGs_complex": [ + "dgs_complex": [ -30.377087708939655, -29.436863252608635, -30.22507350753656 ], - "DGs_solvent": [ + "dgs_solvent": [ -31.297764997800158, -31.62722668783109, -31.77291482625596 ], - "Complex_smallest_mbar_overlaps": [ + "complex_smallest_mbar_overlaps": [ 0.11012561275594637, 0.14355062938919508, 0.12334069966280345 ], - "Complex_smallest_replica_mixing": [ + "complex_smallest_replica_mixing": [ 0.040091930541368746, 0.05813953488372093, 0.05131445904954499 ], - "Solvent_smallest_mbar_overlaps": [ + "solvent_smallest_mbar_overlaps": [ 0.11730947093440383, 0.10438599980853329, 0.11445685486524394 ], - "Solvent_smallest_replica_mixing": [ + "solvent_smallest_replica_mixing": [ 0.04698672114402452, 0.03640040444893832, 0.035136501516683516 @@ -2436,45 +2407,44 @@ "ligand_b": "3fln", "system_group": "jacs_set", "system_name": "p38", - "repeats": 3, - "DDG": { + "ddg": { "magnitude": -1.2155262405028182, "unit": "kilocalories_per_mole", ":is_custom:": true, "pint_unit_registry": "openff_units" }, - "DDG_uncertainty": { + "ddg_uncertainty": { "magnitude": 0.05571817961839361, "unit": "kilocalories_per_mole", ":is_custom:": true, "pint_unit_registry": "openff_units" }, - "DGs_complex": [ + "dgs_complex": [ -33.835159947371984, -33.794365433499316, -33.91319778043779 ], - "DGs_solvent": [ + "dgs_solvent": [ -32.59875433289383, -32.66209447066243, -32.63529563624438 ], - "Complex_smallest_mbar_overlaps": [ + "complex_smallest_mbar_overlaps": [ 0.11918982323298731, 0.12380305733105947, 0.12043181959557284 ], - "Complex_smallest_replica_mixing": [ + "complex_smallest_replica_mixing": [ 0.08245521601685986, 0.07686414708886619, 0.08670374115267948 ], - "Solvent_smallest_mbar_overlaps": [ + "solvent_smallest_mbar_overlaps": [ 0.11654397510808602, 0.11618587369413037, 0.11646164615589488 ], - "Solvent_smallest_replica_mixing": [ + "solvent_smallest_replica_mixing": [ 0.08784473953013279, 0.07684529828109202, 0.09045881126173097 @@ -2485,45 +2455,44 @@ "ligand_b": "2e", "system_group": "jacs_set", "system_name": "p38", - "repeats": 3, - "DDG": { + "ddg": { "magnitude": 1.11292214185462, "unit": "kilocalories_per_mole", ":is_custom:": true, "pint_unit_registry": "openff_units" }, - "DDG_uncertainty": { + "ddg_uncertainty": { "magnitude": 0.08559294699837033, "unit": "kilocalories_per_mole", ":is_custom:": true, "pint_unit_registry": "openff_units" }, - "DGs_complex": [ + "dgs_complex": [ 2.2458826877997855, 2.3089112575336435, 2.433991614598979 ], - "DGs_solvent": [ + "dgs_solvent": [ 1.1702830267024562, 1.254293474181807, 1.2254426334842845 ], - "Complex_smallest_mbar_overlaps": [ + "complex_smallest_mbar_overlaps": [ 0.08833007640108602, 0.0885744750192168, 0.08878137885684231 ], - "Complex_smallest_replica_mixing": [ + "complex_smallest_replica_mixing": [ 0.08719704952581665, 0.09175935288169869, 0.08796764408493428 ], - "Solvent_smallest_mbar_overlaps": [ + "solvent_smallest_mbar_overlaps": [ 0.1065055284277504, 0.10760624748550335, 0.10706690168326397 ], - "Solvent_smallest_replica_mixing": [ + "solvent_smallest_replica_mixing": [ 0.10111223458038422, 0.11071789686552073, 0.10566228513650151 @@ -2534,45 +2503,44 @@ "ligand_b": "2k", "system_group": "jacs_set", "system_name": "p38", - "repeats": 3, - "DDG": { + "ddg": { "magnitude": 3.1146459148287207, "unit": "kilocalories_per_mole", ":is_custom:": true, "pint_unit_registry": "openff_units" }, - "DDG_uncertainty": { + "ddg_uncertainty": { "magnitude": 0.502192790993695, "unit": "kilocalories_per_mole", ":is_custom:": true, "pint_unit_registry": "openff_units" }, - "DGs_complex": [ + "dgs_complex": [ -40.13239071990043, -40.62478604191973, -41.1894139642321 ], - "DGs_solvent": [ + "dgs_solvent": [ -43.44518020617255, -43.772551120221635, -44.072797144144225 ], - "Complex_smallest_mbar_overlaps": [ + "complex_smallest_mbar_overlaps": [ 0.005719198797247576, 0.008464710237366582, 0.009128714267458364 ], - "Complex_smallest_replica_mixing": [ + "complex_smallest_replica_mixing": [ 0.0, 0.0, 0.0 ], - "Solvent_smallest_mbar_overlaps": [ + "solvent_smallest_mbar_overlaps": [ 0.006283259434764158, 0.007285671013638708, 0.005224925304881056 ], - "Solvent_smallest_replica_mixing": [ + "solvent_smallest_replica_mixing": [ 0.0, 0.0005055611729019212, 0.0005107252298263534 @@ -2583,45 +2551,44 @@ "ligand_b": "2k", "system_group": "jacs_set", "system_name": "p38", - "repeats": 3, - "DDG": { + "ddg": { "magnitude": 0.9434972459600459, "unit": "kilocalories_per_mole", ":is_custom:": true, "pint_unit_registry": "openff_units" }, - "DDG_uncertainty": { + "ddg_uncertainty": { "magnitude": 0.21794995130944414, "unit": "kilocalories_per_mole", ":is_custom:": true, "pint_unit_registry": "openff_units" }, - "DGs_complex": [ + "dgs_complex": [ 82.874077621677, 82.85525510600634, 82.46503371149598 ], - "DGs_solvent": [ + "dgs_solvent": [ 81.8187872305234, 81.90375524752986, 81.64133222324598 ], - "Complex_smallest_mbar_overlaps": [ + "complex_smallest_mbar_overlaps": [ 0.1589788594726974, 0.16134309035122865, 0.15303654433640412 ], - "Complex_smallest_replica_mixing": [ + "complex_smallest_replica_mixing": [ 0.09049544994944388, 0.07103134479271991, 0.0659757330637007 ], - "Solvent_smallest_mbar_overlaps": [ + "solvent_smallest_mbar_overlaps": [ 0.16331142623057543, 0.1642617899067399, 0.16070212413935084 ], - "Solvent_smallest_replica_mixing": [ + "solvent_smallest_replica_mixing": [ 0.067926455566905, 0.0659541188738269, 0.08063700707785643 @@ -2632,45 +2599,44 @@ "ligand_b": "2k", "system_group": "jacs_set", "system_name": "p38", - "repeats": 3, - "DDG": { + "ddg": { "magnitude": 3.302033353626129, "unit": "kilocalories_per_mole", ":is_custom:": true, "pint_unit_registry": "openff_units" }, - "DDG_uncertainty": { + "ddg_uncertainty": { "magnitude": 0.22889522664628187, "unit": "kilocalories_per_mole", ":is_custom:": true, "pint_unit_registry": "openff_units" }, - "DGs_complex": [ + "dgs_complex": [ 59.14486730704456, 59.17798593820995, 59.474593547094514 ], - "DGs_solvent": [ + "dgs_solvent": [ 56.19951432823593, 55.78309152230953, 55.9087408809252 ], - "Complex_smallest_mbar_overlaps": [ + "complex_smallest_mbar_overlaps": [ 0.0182879716410348, 0.020619218335067437, 0.022748249415143775 ], - "Complex_smallest_replica_mixing": [ + "complex_smallest_replica_mixing": [ 0.0, 0.0025278058645096056, 0.0015166835187057635 ], - "Solvent_smallest_mbar_overlaps": [ + "solvent_smallest_mbar_overlaps": [ 0.0162873079171063, 0.011066463807387825, 0.014405531808085056 ], - "Solvent_smallest_replica_mixing": [ + "solvent_smallest_replica_mixing": [ 0.001053740779768177, 0.0005055611729019212, 0.0020222446916076846 @@ -2681,45 +2647,44 @@ "ligand_b": "2c", "system_group": "jacs_set", "system_name": "p38", - "repeats": 3, - "DDG": { + "ddg": { "magnitude": -0.991933070834403, "unit": "kilocalories_per_mole", ":is_custom:": true, "pint_unit_registry": "openff_units" }, - "DDG_uncertainty": { + "ddg_uncertainty": { "magnitude": 0.19493385259693377, "unit": "kilocalories_per_mole", ":is_custom:": true, "pint_unit_registry": "openff_units" }, - "DGs_complex": [ + "dgs_complex": [ 0.8347104293437511, 0.5678376657292645, 0.39402904797113963 ], - "DGs_solvent": [ + "dgs_solvent": [ 1.596487881781548, 1.675709346506592, 1.5001791272592246 ], - "Complex_smallest_mbar_overlaps": [ + "complex_smallest_mbar_overlaps": [ 0.08261222546481212, 0.0743940971583477, 0.08001043318424017 ], - "Complex_smallest_replica_mixing": [ + "complex_smallest_replica_mixing": [ 0.021233569261880688, 0.019716885743174924, 0.01744186046511628 ], - "Solvent_smallest_mbar_overlaps": [ + "solvent_smallest_mbar_overlaps": [ 0.061116778749775384, 0.06070230669329578, 0.05852224491627549 ], - "Solvent_smallest_replica_mixing": [ + "solvent_smallest_replica_mixing": [ 0.014661274014155713, 0.01251276813074566, 0.014408493427704751 @@ -2730,45 +2695,44 @@ "ligand_b": "2t", "system_group": "jacs_set", "system_name": "p38", - "repeats": 3, - "DDG": { + "ddg": { "magnitude": -2.5816132208357345, "unit": "kilocalories_per_mole", ":is_custom:": true, "pint_unit_registry": "openff_units" }, - "DDG_uncertainty": { + "ddg_uncertainty": { "magnitude": 0.2289196627592395, "unit": "kilocalories_per_mole", ":is_custom:": true, "pint_unit_registry": "openff_units" }, - "DGs_complex": [ + "dgs_complex": [ 19.162270427421415, 18.884163365384122, 19.424438796606967 ], - "DGs_solvent": [ + "dgs_solvent": [ 21.77989369161034, 21.65210598728651, 21.783712573022864 ], - "Complex_smallest_mbar_overlaps": [ + "complex_smallest_mbar_overlaps": [ 0.17537179017179927, 0.1768463158962125, 0.17899106432661108 ], - "Complex_smallest_replica_mixing": [ + "complex_smallest_replica_mixing": [ 0.14838220424671386, 0.14585439838220424, 0.14632277834525026 ], - "Solvent_smallest_mbar_overlaps": [ + "solvent_smallest_mbar_overlaps": [ 0.1795891531240771, 0.18178630088805764, 0.18267107070929928 ], - "Solvent_smallest_replica_mixing": [ + "solvent_smallest_replica_mixing": [ 0.12462082912032356, 0.12386248736097068, 0.12967644084934277 @@ -2779,45 +2743,44 @@ "ligand_b": "2v", "system_group": "jacs_set", "system_name": "p38", - "repeats": 3, - "DDG": { + "ddg": { "magnitude": 1.294862321491678, "unit": "kilocalories_per_mole", ":is_custom:": true, "pint_unit_registry": "openff_units" }, - "DDG_uncertainty": { + "ddg_uncertainty": { "magnitude": 0.6117244609725263, "unit": "kilocalories_per_mole", ":is_custom:": true, "pint_unit_registry": "openff_units" }, - "DGs_complex": [ + "dgs_complex": [ -145.68783975893388, -146.95050158113366, -145.84054069126964 ], - "DGs_solvent": [ + "dgs_solvent": [ -147.22836016180128, -147.78668536371592, -147.3484234702951 ], - "Complex_smallest_mbar_overlaps": [ + "complex_smallest_mbar_overlaps": [ 0.03647662207854935, 0.033876682183452377, 0.03696355433096745 ], - "Complex_smallest_replica_mixing": [ + "complex_smallest_replica_mixing": [ 0.00281214848143982, 0.005308392315470172, 0.003033367037411527 ], - "Solvent_smallest_mbar_overlaps": [ + "solvent_smallest_mbar_overlaps": [ 0.03756058615424141, 0.0404006621468552, 0.04667265002229325 ], - "Solvent_smallest_replica_mixing": [ + "solvent_smallest_replica_mixing": [ 0.004044489383215369, 0.0020222446916076846, 0.00884732052578362 @@ -2828,45 +2791,44 @@ "ligand_b": "3fln", "system_group": "jacs_set", "system_name": "p38", - "repeats": 3, - "DDG": { + "ddg": { "magnitude": 1.3494651289803485, "unit": "kilocalories_per_mole", ":is_custom:": true, "pint_unit_registry": "openff_units" }, - "DDG_uncertainty": { + "ddg_uncertainty": { "magnitude": 0.07853412354051971, "unit": "kilocalories_per_mole", ":is_custom:": true, "pint_unit_registry": "openff_units" }, - "DGs_complex": [ + "dgs_complex": [ -22.34257710039029, -22.51253067319933, -22.431607936745575 ], - "DGs_solvent": [ + "dgs_solvent": [ -23.830305309565308, -23.7510059753539, -23.753799812357048 ], - "Complex_smallest_mbar_overlaps": [ + "complex_smallest_mbar_overlaps": [ 0.1072562952018195, 0.1060597313394167, 0.10677830305215166 ], - "Complex_smallest_replica_mixing": [ + "complex_smallest_replica_mixing": [ 0.10214504596527069, 0.10743174924165824, 0.09934277047522751 ], - "Solvent_smallest_mbar_overlaps": [ + "solvent_smallest_mbar_overlaps": [ 0.10360869650912934, 0.10358942345571326, 0.10367377465378276 ], - "Solvent_smallest_replica_mixing": [ + "solvent_smallest_replica_mixing": [ 0.09525025536261492, 0.09403437815975733, 0.10342185903983657 @@ -2877,45 +2839,44 @@ "ligand_b": "2ff", "system_group": "jacs_set", "system_name": "p38", - "repeats": 3, - "DDG": { + "ddg": { "magnitude": -0.89255913242296, "unit": "kilocalories_per_mole", ":is_custom:": true, "pint_unit_registry": "openff_units" }, - "DDG_uncertainty": { + "ddg_uncertainty": { "magnitude": 0.123001801406735, "unit": "kilocalories_per_mole", ":is_custom:": true, "pint_unit_registry": "openff_units" }, - "DGs_complex": [ + "dgs_complex": [ 4.16678946485973, 3.88543458031491, 4.007052439077462 ], - "DGs_solvent": [ + "dgs_solvent": [ 4.942542112076134, 4.943007721092815, 4.851404048352033 ], - "Complex_smallest_mbar_overlaps": [ + "complex_smallest_mbar_overlaps": [ 0.14691349010242769, 0.15980248573305475, 0.15487107204964642 ], - "Complex_smallest_replica_mixing": [ + "complex_smallest_replica_mixing": [ 0.0429726996966633, 0.0669050051072523, 0.07379979570990806 ], - "Solvent_smallest_mbar_overlaps": [ + "solvent_smallest_mbar_overlaps": [ 0.17276710725184072, 0.17179888338803603, 0.17359289141258769 ], - "Solvent_smallest_replica_mixing": [ + "solvent_smallest_replica_mixing": [ 0.07962588473205258, 0.0801314459049545, 0.08796764408493428 @@ -2926,45 +2887,44 @@ "ligand_b": "3fly", "system_group": "jacs_set", "system_name": "p38", - "repeats": 3, - "DDG": { + "ddg": { "magnitude": -2.082288017056463, "unit": "kilocalories_per_mole", ":is_custom:": true, "pint_unit_registry": "openff_units" }, - "DDG_uncertainty": { + "ddg_uncertainty": { "magnitude": 0.08227902264320527, "unit": "kilocalories_per_mole", ":is_custom:": true, "pint_unit_registry": "openff_units" }, - "DGs_complex": [ + "dgs_complex": [ 44.27073908032549, 44.19680159346747, 44.09362697932192 ], - "DGs_solvent": [ + "dgs_solvent": [ 46.26446627075027, 46.31893676935105, 46.22462866418295 ], - "Complex_smallest_mbar_overlaps": [ + "complex_smallest_mbar_overlaps": [ 0.10717043052220175, 0.11202763536309211, 0.11038523771920304 ], - "Complex_smallest_replica_mixing": [ + "complex_smallest_replica_mixing": [ 0.08417593528816987, 0.08215369059656219, 0.0884732052578362 ], - "Solvent_smallest_mbar_overlaps": [ + "solvent_smallest_mbar_overlaps": [ 0.10136315945686432, 0.10167389766919438, 0.10319513039958948 ], - "Solvent_smallest_replica_mixing": [ + "solvent_smallest_replica_mixing": [ 0.08063700707785643, 0.08063700707785643, 0.07836198179979778 @@ -2975,45 +2935,44 @@ "ligand_b": "2z", "system_group": "jacs_set", "system_name": "p38", - "repeats": 3, - "DDG": { + "ddg": { "magnitude": -1.4087645570897696, "unit": "kilocalories_per_mole", ":is_custom:": true, "pint_unit_registry": "openff_units" }, - "DDG_uncertainty": { + "ddg_uncertainty": { "magnitude": 0.36145131399991265, "unit": "kilocalories_per_mole", ":is_custom:": true, "pint_unit_registry": "openff_units" }, - "DGs_complex": [ + "dgs_complex": [ -9.892874241458605, -9.979276301080583, -9.458539529173535 ], - "DGs_solvent": [ + "dgs_solvent": [ -8.055561687373942, -8.312720462090859, -8.736114250978613 ], - "Complex_smallest_mbar_overlaps": [ + "complex_smallest_mbar_overlaps": [ 0.08279378037904597, 0.08272448192906283, 0.08427059688456004 ], - "Complex_smallest_replica_mixing": [ + "complex_smallest_replica_mixing": [ 0.03317811408614668, 0.037278415015641296, 0.033367037411526794 ], - "Solvent_smallest_mbar_overlaps": [ + "solvent_smallest_mbar_overlaps": [ 0.10042413125744344, 0.09674523521933583, 0.09331560533491669 ], - "Solvent_smallest_replica_mixing": [ + "solvent_smallest_replica_mixing": [ 0.027805864509605663, 0.024519716885743174, 0.03070175438596491 @@ -3024,45 +2983,44 @@ "ligand_b": "2v", "system_group": "jacs_set", "system_name": "p38", - "repeats": 3, - "DDG": { + "ddg": { "magnitude": 1.8120211456761055, "unit": "kilocalories_per_mole", ":is_custom:": true, "pint_unit_registry": "openff_units" }, - "DDG_uncertainty": { + "ddg_uncertainty": { "magnitude": 0.49570894943721877, "unit": "kilocalories_per_mole", ":is_custom:": true, "pint_unit_registry": "openff_units" }, - "DGs_complex": [ + "dgs_complex": [ -16.225430552872243, -15.171875383610205, -16.216428791044336 ], - "DGs_solvent": [ + "dgs_solvent": [ -17.73097394082993, -17.664457321835656, -17.654366901889517 ], - "Complex_smallest_mbar_overlaps": [ + "complex_smallest_mbar_overlaps": [ 0.08820837926380458, 0.09370609170991916, 0.0832907512519269 ], - "Complex_smallest_replica_mixing": [ + "complex_smallest_replica_mixing": [ 0.02578361981799798, 0.02561837455830389, 0.025025278058645097 ], - "Solvent_smallest_mbar_overlaps": [ + "solvent_smallest_mbar_overlaps": [ 0.08998308836114768, 0.09130979136294703, 0.09466409876563482 ], - "Solvent_smallest_replica_mixing": [ + "solvent_smallest_replica_mixing": [ 0.026036400404448937, 0.016511867905056758, 0.025530839231547017 @@ -3073,45 +3031,44 @@ "ligand_b": "2i", "system_group": "jacs_set", "system_name": "p38", - "repeats": 3, - "DDG": { + "ddg": { "magnitude": 0.7656255644889072, "unit": "kilocalories_per_mole", ":is_custom:": true, "pint_unit_registry": "openff_units" }, - "DDG_uncertainty": { + "ddg_uncertainty": { "magnitude": 0.08969102932576853, "unit": "kilocalories_per_mole", ":is_custom:": true, "pint_unit_registry": "openff_units" }, - "DGs_complex": [ + "dgs_complex": [ -47.482797498508496, -47.678458021282104, -47.5502812168207 ], - "DGs_solvent": [ + "dgs_solvent": [ -48.326858196837584, -48.29468632935921, -48.3868689038812 ], - "Complex_smallest_mbar_overlaps": [ + "complex_smallest_mbar_overlaps": [ 0.16282462615228827, 0.16244206433972475, 0.16355033844778555 ], - "Complex_smallest_replica_mixing": [ + "complex_smallest_replica_mixing": [ 0.1435793731041456, 0.15419615773508594, 0.15874620829120323 ], - "Solvent_smallest_mbar_overlaps": [ + "solvent_smallest_mbar_overlaps": [ 0.17764719690889713, 0.17902068001928023, 0.1799486922495435 ], - "Solvent_smallest_replica_mixing": [ + "solvent_smallest_replica_mixing": [ 0.17315470171890798, 0.17696629213483145, 0.16615067079463364 @@ -3122,45 +3079,44 @@ "ligand_b": "2h", "system_group": "jacs_set", "system_name": "p38", - "repeats": 3, - "DDG": { + "ddg": { "magnitude": 1.1319388232797394, "unit": "kilocalories_per_mole", ":is_custom:": true, "pint_unit_registry": "openff_units" }, - "DDG_uncertainty": { + "ddg_uncertainty": { "magnitude": 0.04436919887828085, "unit": "kilocalories_per_mole", ":is_custom:": true, "pint_unit_registry": "openff_units" }, - "DGs_complex": [ + "dgs_complex": [ -34.148206488067615, -34.13969691597984, -34.0822355885741 ], - "DGs_solvent": [ + "dgs_solvent": [ -35.24757366817628, -35.21893890812331, -35.29944288616117 ], - "Complex_smallest_mbar_overlaps": [ + "complex_smallest_mbar_overlaps": [ 0.19842712300230597, 0.20051192289840367, 0.20032215152508231 ], - "Complex_smallest_replica_mixing": [ + "complex_smallest_replica_mixing": [ 0.0922649140546006, 0.09529828109201213, 0.089737108190091 ], - "Solvent_smallest_mbar_overlaps": [ + "solvent_smallest_mbar_overlaps": [ 0.20104792877645017, 0.19929432478488607, 0.1949387655899394 ], - "Solvent_smallest_replica_mixing": [ + "solvent_smallest_replica_mixing": [ 0.09175935288169869, 0.10540950455005056, 0.10869565217391304 @@ -3171,45 +3127,44 @@ "ligand_b": "3fly", "system_group": "jacs_set", "system_name": "p38", - "repeats": 3, - "DDG": { + "ddg": { "magnitude": -0.17019125254089218, "unit": "kilocalories_per_mole", ":is_custom:": true, "pint_unit_registry": "openff_units" }, - "DDG_uncertainty": { + "ddg_uncertainty": { "magnitude": 1.000789886472866, "unit": "kilocalories_per_mole", ":is_custom:": true, "pint_unit_registry": "openff_units" }, - "DGs_complex": [ + "dgs_complex": [ -23.78037157820324, -22.75466082653179, -25.070688580680823 ], - "DGs_solvent": [ + "dgs_solvent": [ -23.2750956292334, -23.764557670375837, -24.055493928183935 ], - "Complex_smallest_mbar_overlaps": [ + "complex_smallest_mbar_overlaps": [ 0.12261051647875143, 0.13823343532843832, 0.11204538070718037 ], - "Complex_smallest_replica_mixing": [ + "complex_smallest_replica_mixing": [ 0.04505005561735261, 0.053842264914054604, 0.038097886540600666 ], - "Solvent_smallest_mbar_overlaps": [ + "solvent_smallest_mbar_overlaps": [ 0.12232294802863337, 0.12119490016137932, 0.12083691690922534 ], - "Solvent_smallest_replica_mixing": [ + "solvent_smallest_replica_mixing": [ 0.047522750252780584, 0.03993933265925177, 0.05232558139534884 @@ -3220,45 +3175,44 @@ "ligand_b": "2ee", "system_group": "jacs_set", "system_name": "p38", - "repeats": 3, - "DDG": { + "ddg": { "magnitude": 1.4510333064765462, "unit": "kilocalories_per_mole", ":is_custom:": true, "pint_unit_registry": "openff_units" }, - "DDG_uncertainty": { + "ddg_uncertainty": { "magnitude": 0.3870642543085633, "unit": "kilocalories_per_mole", ":is_custom:": true, "pint_unit_registry": "openff_units" }, - "DGs_complex": [ + "dgs_complex": [ 29.780384886255423, 30.200221877736652, 29.257748982184292 ], - "DGs_solvent": [ + "dgs_solvent": [ 28.286620040468055, 28.257725395308263, 28.34091039097041 ], - "Complex_smallest_mbar_overlaps": [ + "complex_smallest_mbar_overlaps": [ 0.04026946298556667, 0.05743519638362556, 0.04535769295200803 ], - "Complex_smallest_replica_mixing": [ + "complex_smallest_replica_mixing": [ 0.004044489383215369, 0.01314459049544995, 0.007077856420626896 ], - "Solvent_smallest_mbar_overlaps": [ + "solvent_smallest_mbar_overlaps": [ 0.042539488458461484, 0.03550988882260297, 0.039589007964083794 ], - "Solvent_smallest_replica_mixing": [ + "solvent_smallest_replica_mixing": [ 0.00455005055611729, 0.0032861476238624874, 0.004044489383215369 @@ -3269,45 +3223,44 @@ "ligand_b": "3fly", "system_group": "jacs_set", "system_name": "p38", - "repeats": 3, - "DDG": { + "ddg": { "magnitude": -0.44577667307589763, "unit": "kilocalories_per_mole", ":is_custom:": true, "pint_unit_registry": "openff_units" }, - "DDG_uncertainty": { + "ddg_uncertainty": { "magnitude": 0.09022465065997028, "unit": "kilocalories_per_mole", ":is_custom:": true, "pint_unit_registry": "openff_units" }, - "DGs_complex": [ + "dgs_complex": [ 35.287625756730314, 35.3798139301352, 35.16314425781312 ], - "DGs_solvent": [ + "dgs_solvent": [ 35.7073051575959, 35.7448325820688, 35.71577622424162 ], - "Complex_smallest_mbar_overlaps": [ + "complex_smallest_mbar_overlaps": [ 0.16062136915337166, 0.16200490981529853, 0.16051179314319758 ], - "Complex_smallest_replica_mixing": [ + "complex_smallest_replica_mixing": [ 0.10869565217391304, 0.12714863498483317, 0.12306501547987617 ], - "Solvent_smallest_mbar_overlaps": [ + "solvent_smallest_mbar_overlaps": [ 0.16459651249907142, 0.16383747508686122, 0.1635362202960148 ], - "Solvent_smallest_replica_mixing": [ + "solvent_smallest_replica_mixing": [ 0.13157894736842105, 0.1256319514661274, 0.12765419615773507 @@ -3318,45 +3271,44 @@ "ligand_b": "2n", "system_group": "jacs_set", "system_name": "p38", - "repeats": 3, - "DDG": { + "ddg": { "magnitude": 0.2136687059875304, "unit": "kilocalories_per_mole", ":is_custom:": true, "pint_unit_registry": "openff_units" }, - "DDG_uncertainty": { + "ddg_uncertainty": { "magnitude": 0.2992134951871598, "unit": "kilocalories_per_mole", ":is_custom:": true, "pint_unit_registry": "openff_units" }, - "DGs_complex": [ + "dgs_complex": [ -3.0712804433461542, -2.4828457332240768, -2.432919573718693 ], - "DGs_solvent": [ + "dgs_solvent": [ -2.908805535900766, -2.945905648932593, -2.7733406834181547 ], - "Complex_smallest_mbar_overlaps": [ + "complex_smallest_mbar_overlaps": [ 0.1240539560796338, 0.1256639479017017, 0.1388815803372485 ], - "Complex_smallest_replica_mixing": [ + "complex_smallest_replica_mixing": [ 0.03286147623862487, 0.040697674418604654, 0.04954499494438827 ], - "Solvent_smallest_mbar_overlaps": [ + "solvent_smallest_mbar_overlaps": [ 0.14025550612058116, 0.13964002697143446, 0.14173690146363263 ], - "Solvent_smallest_replica_mixing": [ + "solvent_smallest_replica_mixing": [ 0.05434782608695652, 0.052072800808897875, 0.05308392315470172 @@ -3367,45 +3319,44 @@ "ligand_b": "2v", "system_group": "jacs_set", "system_name": "p38", - "repeats": 3, - "DDG": { + "ddg": { "magnitude": 2.8204976323403628, "unit": "kilocalories_per_mole", ":is_custom:": true, "pint_unit_registry": "openff_units" }, - "DDG_uncertainty": { + "ddg_uncertainty": { "magnitude": 0.20007425050096114, "unit": "kilocalories_per_mole", ":is_custom:": true, "pint_unit_registry": "openff_units" }, - "DGs_complex": [ + "dgs_complex": [ -63.99099035424003, -64.38939858917938, -64.3349996948822 ], - "DGs_solvent": [ + "dgs_solvent": [ -66.95076853440165, -67.04528015779668, -67.18083284312439 ], - "Complex_smallest_mbar_overlaps": [ + "complex_smallest_mbar_overlaps": [ 0.10673300420643717, 0.09652528229647978, 0.09686749406915138 ], - "Complex_smallest_replica_mixing": [ + "complex_smallest_replica_mixing": [ 0.034378159757330634, 0.029877425944841676, 0.0358948432760364 ], - "Solvent_smallest_mbar_overlaps": [ + "solvent_smallest_mbar_overlaps": [ 0.09947888708733973, 0.10043296025938654, 0.10514031365726278 ], - "Solvent_smallest_replica_mixing": [ + "solvent_smallest_replica_mixing": [ 0.03210313447927199, 0.03447395301327886, 0.029575328614762385 @@ -3416,45 +3367,44 @@ "ligand_b": "2e", "system_group": "jacs_set", "system_name": "p38", - "repeats": 3, - "DDG": { + "ddg": { "magnitude": -0.008244785337959026, "unit": "kilocalories_per_mole", ":is_custom:": true, "pint_unit_registry": "openff_units" }, - "DDG_uncertainty": { + "ddg_uncertainty": { "magnitude": 0.07658584575194212, "unit": "kilocalories_per_mole", ":is_custom:": true, "pint_unit_registry": "openff_units" }, - "DGs_complex": [ + "dgs_complex": [ 2.4503060111676715, 2.2794497796290383, 2.2980189602593333 ], - "DGs_solvent": [ + "dgs_solvent": [ 2.354264008947156, 2.3479937955360275, 2.350251302586736 ], - "Complex_smallest_mbar_overlaps": [ + "complex_smallest_mbar_overlaps": [ 0.133101856074992, 0.12826582371072895, 0.13152501814760761 ], - "Complex_smallest_replica_mixing": [ + "complex_smallest_replica_mixing": [ 0.13321536905965622, 0.11931243680485339, 0.12799791449426487 ], - "Solvent_smallest_mbar_overlaps": [ + "solvent_smallest_mbar_overlaps": [ 0.1223557696273997, 0.12238991777700665, 0.12241700966578777 ], - "Solvent_smallest_replica_mixing": [ + "solvent_smallest_replica_mixing": [ 0.12740141557128412, 0.11956521739130435, 0.12259858442871588 @@ -3465,45 +3415,44 @@ "ligand_b": "2v", "system_group": "jacs_set", "system_name": "p38", - "repeats": 3, - "DDG": { + "ddg": { "magnitude": 7.7681178560909245, "unit": "kilocalories_per_mole", ":is_custom:": true, "pint_unit_registry": "openff_units" }, - "DDG_uncertainty": { + "ddg_uncertainty": { "magnitude": 0.5506812494406506, "unit": "kilocalories_per_mole", ":is_custom:": true, "pint_unit_registry": "openff_units" }, - "DGs_complex": [ + "dgs_complex": [ -196.14687024484002, -197.4562761072638, -196.75246009505236 ], - "DGs_solvent": [ + "dgs_solvent": [ -204.7355378260991, -204.48528266397082, -204.43913952535888 ], - "Complex_smallest_mbar_overlaps": [ + "complex_smallest_mbar_overlaps": [ 0.03274338907505308, 0.025550503910294, 0.025130126140115817 ], - "Complex_smallest_replica_mixing": [ + "complex_smallest_replica_mixing": [ 0.0025278058645096056, 0.0025278058645096056, 0.0005107252298263534 ], - "Solvent_smallest_mbar_overlaps": [ + "solvent_smallest_mbar_overlaps": [ 0.037813153026277004, 0.041118665756337616, 0.04138514579797249 ], - "Solvent_smallest_replica_mixing": [ + "solvent_smallest_replica_mixing": [ 0.007077856420626896, 0.005055611729019211, 0.006066734074823054 @@ -3514,45 +3463,44 @@ "ligand_b": "2p", "system_group": "jacs_set", "system_name": "p38", - "repeats": 3, - "DDG": { + "ddg": { "magnitude": -1.1246833925592803, "unit": "kilocalories_per_mole", ":is_custom:": true, "pint_unit_registry": "openff_units" }, - "DDG_uncertainty": { + "ddg_uncertainty": { "magnitude": 0.5742164408289936, "unit": "kilocalories_per_mole", ":is_custom:": true, "pint_unit_registry": "openff_units" }, - "DGs_complex": [ + "dgs_complex": [ 30.227211494724973, 31.438234260291203, 30.215609990329888 ], - "DGs_solvent": [ + "dgs_solvent": [ 31.729220468173626, 31.738024636258114, 31.787860818592172 ], - "Complex_smallest_mbar_overlaps": [ + "complex_smallest_mbar_overlaps": [ 0.05835661806578293, 0.05495498511171304, 0.05467981641159252 ], - "Complex_smallest_replica_mixing": [ + "complex_smallest_replica_mixing": [ 0.012133468149646108, 0.005857294994675187, 0.008088978766430738 ], - "Solvent_smallest_mbar_overlaps": [ + "solvent_smallest_mbar_overlaps": [ 0.04037229328045264, 0.04285584027388233, 0.04107730048055153 ], - "Solvent_smallest_replica_mixing": [ + "solvent_smallest_replica_mixing": [ 0.007330637007077857, 0.007481940144478844, 0.009521218715995648 @@ -3563,45 +3511,44 @@ "ligand_b": "2ee", "system_group": "jacs_set", "system_name": "p38", - "repeats": 3, - "DDG": { + "ddg": { "magnitude": -0.8852394242798169, "unit": "kilocalories_per_mole", ":is_custom:": true, "pint_unit_registry": "openff_units" }, - "DDG_uncertainty": { + "ddg_uncertainty": { "magnitude": 0.33493540787329246, "unit": "kilocalories_per_mole", ":is_custom:": true, "pint_unit_registry": "openff_units" }, - "DGs_complex": [ + "dgs_complex": [ 46.97802669247554, 47.17143696491097, 46.44390773174861 ], - "DGs_solvent": [ + "dgs_solvent": [ 47.677456745694755, 47.93535046189015, 47.63628245438965 ], - "Complex_smallest_mbar_overlaps": [ + "complex_smallest_mbar_overlaps": [ 0.08246326270238208, 0.09171727738860609, 0.09321368800539305 ], - "Complex_smallest_replica_mixing": [ + "complex_smallest_replica_mixing": [ 0.0260577915376677, 0.024483133841131665, 0.028563656147986942 ], - "Solvent_smallest_mbar_overlaps": [ + "solvent_smallest_mbar_overlaps": [ 0.10076242244654485, 0.09972621305624987, 0.0986291962527788 ], - "Solvent_smallest_replica_mixing": [ + "solvent_smallest_replica_mixing": [ 0.03134479271991911, 0.0275530839231547, 0.028058645096056625 @@ -3612,45 +3559,44 @@ "ligand_b": "3fln", "system_group": "jacs_set", "system_name": "p38", - "repeats": 3, - "DDG": { + "ddg": { "magnitude": 0.9352906416645936, "unit": "kilocalories_per_mole", ":is_custom:": true, "pint_unit_registry": "openff_units" }, - "DDG_uncertainty": { + "ddg_uncertainty": { "magnitude": 0.33015977633312926, "unit": "kilocalories_per_mole", ":is_custom:": true, "pint_unit_registry": "openff_units" }, - "DGs_complex": [ + "dgs_complex": [ 18.26315749225566, 18.984647719763544, 18.702625220779108 ], - "DGs_solvent": [ + "dgs_solvent": [ 17.510571738248274, 17.814281450825575, 17.81970531873069 ], - "Complex_smallest_mbar_overlaps": [ + "complex_smallest_mbar_overlaps": [ 0.05355479591822359, 0.06023923090052676, 0.06018001260231177 ], - "Complex_smallest_replica_mixing": [ + "complex_smallest_replica_mixing": [ 0.010111223458038422, 0.011627906976744186, 0.013902932254802831 ], - "Solvent_smallest_mbar_overlaps": [ + "solvent_smallest_mbar_overlaps": [ 0.04763974925036743, 0.04637671643260123, 0.04515891955973867 ], - "Solvent_smallest_replica_mixing": [ + "solvent_smallest_replica_mixing": [ 0.010616784630940344, 0.010469867211440245, 0.007077856420626896 @@ -3661,45 +3607,44 @@ "ligand_b": "3flz", "system_group": "jacs_set", "system_name": "p38", - "repeats": 3, - "DDG": { + "ddg": { "magnitude": -0.6418389038140561, "unit": "kilocalories_per_mole", ":is_custom:": true, "pint_unit_registry": "openff_units" }, - "DDG_uncertainty": { + "ddg_uncertainty": { "magnitude": 0.03221511038750755, "unit": "kilocalories_per_mole", ":is_custom:": true, "pint_unit_registry": "openff_units" }, - "DGs_complex": [ + "dgs_complex": [ 0.8130363935499286, 0.8735733403390727, 0.8726810478075623 ], - "DGs_solvent": [ + "dgs_solvent": [ 1.4733083525484048, 1.507190167607758, 1.5043089729825692 ], - "Complex_smallest_mbar_overlaps": [ + "complex_smallest_mbar_overlaps": [ 0.09000095877570179, 0.08911297467221231, 0.08863154710267018 ], - "Complex_smallest_replica_mixing": [ + "complex_smallest_replica_mixing": [ 0.08211678832116788, 0.09020902090209021, 0.08872598584428716 ], - "Solvent_smallest_mbar_overlaps": [ + "solvent_smallest_mbar_overlaps": [ 0.10739071802518425, 0.10724155390590813, 0.10764446083805117 ], - "Solvent_smallest_replica_mixing": [ + "solvent_smallest_replica_mixing": [ 0.10288169868554095, 0.10869565217391304, 0.11046511627906977 @@ -3710,45 +3655,44 @@ "ligand_b": "3fln", "system_group": "jacs_set", "system_name": "p38", - "repeats": 3, - "DDG": { + "ddg": { "magnitude": -1.0070882773203014, "unit": "kilocalories_per_mole", ":is_custom:": true, "pint_unit_registry": "openff_units" }, - "DDG_uncertainty": { + "ddg_uncertainty": { "magnitude": 0.14900468612140091, "unit": "kilocalories_per_mole", ":is_custom:": true, "pint_unit_registry": "openff_units" }, - "DGs_complex": [ + "dgs_complex": [ -10.526953345474702, -10.513873733358677, -10.643582886444491 ], - "DGs_solvent": [ + "dgs_solvent": [ -9.74048603178427, -9.508529296726904, -9.41412980480579 ], - "Complex_smallest_mbar_overlaps": [ + "complex_smallest_mbar_overlaps": [ 0.09595243932461166, 0.09701626387708251, 0.09867696775959206 ], - "Complex_smallest_replica_mixing": [ + "complex_smallest_replica_mixing": [ 0.026036400404448937, 0.02247191011235955, 0.023735810113519093 ], - "Solvent_smallest_mbar_overlaps": [ + "solvent_smallest_mbar_overlaps": [ 0.10570031962448566, 0.11241271186815495, 0.11288272828255076 ], - "Solvent_smallest_replica_mixing": [ + "solvent_smallest_replica_mixing": [ 0.027300303336703743, 0.03260869565217391, 0.037917087967644085 @@ -3759,45 +3703,44 @@ "ligand_b": "3fln", "system_group": "jacs_set", "system_name": "p38", - "repeats": 3, - "DDG": { + "ddg": { "magnitude": 1.816272439307344, "unit": "kilocalories_per_mole", ":is_custom:": true, "pint_unit_registry": "openff_units" }, - "DDG_uncertainty": { + "ddg_uncertainty": { "magnitude": 0.21978345066677363, "unit": "kilocalories_per_mole", ":is_custom:": true, "pint_unit_registry": "openff_units" }, - "DGs_complex": [ + "dgs_complex": [ 29.33350441670494, 29.790235752316335, 29.79821047801117 ], - "DGs_solvent": [ + "dgs_solvent": [ 27.777605675652058, 27.854591732262122, 27.840935921196234 ], - "Complex_smallest_mbar_overlaps": [ + "complex_smallest_mbar_overlaps": [ 0.14061143282678665, 0.14019122790894534, 0.13849995595636574 ], - "Complex_smallest_replica_mixing": [ + "complex_smallest_replica_mixing": [ 0.12308478038815117, 0.1327098078867543, 0.13776541961577352 ], - "Solvent_smallest_mbar_overlaps": [ + "solvent_smallest_mbar_overlaps": [ 0.1342895251691245, 0.1344712138313741, 0.13145391909171364 ], - "Solvent_smallest_replica_mixing": [ + "solvent_smallest_replica_mixing": [ 0.1006066734074823, 0.11056105610561057, 0.10692618806875633 @@ -3808,45 +3751,44 @@ "ligand_b": "2i", "system_group": "jacs_set", "system_name": "p38", - "repeats": 3, - "DDG": { + "ddg": { "magnitude": -0.33865029226697274, "unit": "kilocalories_per_mole", ":is_custom:": true, "pint_unit_registry": "openff_units" }, - "DDG_uncertainty": { + "ddg_uncertainty": { "magnitude": 0.14129121831848232, "unit": "kilocalories_per_mole", ":is_custom:": true, "pint_unit_registry": "openff_units" }, - "DGs_complex": [ + "dgs_complex": [ -10.246794192223922, -10.321577057085724, -10.054435155295705 ], - "DGs_solvent": [ + "dgs_solvent": [ -9.923539508798676, -9.935021496168842, -9.748294522836911 ], - "Complex_smallest_mbar_overlaps": [ + "complex_smallest_mbar_overlaps": [ 0.21584241114429792, 0.21422123124675468, 0.2086071130859728 ], - "Complex_smallest_replica_mixing": [ + "complex_smallest_replica_mixing": [ 0.17824310520939735, 0.1878159757330637, 0.1710929519918284 ], - "Solvent_smallest_mbar_overlaps": [ + "solvent_smallest_mbar_overlaps": [ 0.22584110258979875, 0.22425358708368043, 0.2256395620832567 ], - "Solvent_smallest_replica_mixing": [ + "solvent_smallest_replica_mixing": [ 0.20045500505561173, 0.1936299292214358, 0.19590495449949444 @@ -3857,45 +3799,44 @@ "ligand_b": "2gg", "system_group": "jacs_set", "system_name": "p38", - "repeats": 3, - "DDG": { + "ddg": { "magnitude": 0.5596926107771054, "unit": "kilocalories_per_mole", ":is_custom:": true, "pint_unit_registry": "openff_units" }, - "DDG_uncertainty": { + "ddg_uncertainty": { "magnitude": 0.3479402676176178, "unit": "kilocalories_per_mole", ":is_custom:": true, "pint_unit_registry": "openff_units" }, - "DGs_complex": [ + "dgs_complex": [ 59.148358749576836, 59.97967934078981, 59.475059945589706 ], - "DGs_solvent": [ + "dgs_solvent": [ 58.95599059928364, 59.060959398808386, 58.907070205532996 ], - "Complex_smallest_mbar_overlaps": [ + "complex_smallest_mbar_overlaps": [ 0.09728504695255757, 0.09441146045145489, 0.09650982635728102 ], - "Complex_smallest_replica_mixing": [ + "complex_smallest_replica_mixing": [ 0.022244691607684528, 0.02578361981799798, 0.01693629929221436 ], - "Solvent_smallest_mbar_overlaps": [ + "solvent_smallest_mbar_overlaps": [ 0.09807516664142407, 0.09750653064513587, 0.09412756812443633 ], - "Solvent_smallest_replica_mixing": [ + "solvent_smallest_replica_mixing": [ 0.027805864509605663, 0.028311425682507583, 0.02578361981799798 @@ -3906,45 +3847,44 @@ "ligand_b": "2r", "system_group": "jacs_set", "system_name": "p38", - "repeats": 3, - "DDG": { + "ddg": { "magnitude": 0.2103128576225206, "unit": "kilocalories_per_mole", ":is_custom:": true, "pint_unit_registry": "openff_units" }, - "DDG_uncertainty": { + "ddg_uncertainty": { "magnitude": 0.30638245254173946, "unit": "kilocalories_per_mole", ":is_custom:": true, "pint_unit_registry": "openff_units" }, - "DGs_complex": [ + "dgs_complex": [ -3.8880071139613444, -4.57631487453112, -3.992123130685379 ], - "DGs_solvent": [ + "dgs_solvent": [ -4.297796174964481, -4.390191389337642, -4.39939612774328 ], - "Complex_smallest_mbar_overlaps": [ + "complex_smallest_mbar_overlaps": [ 0.14360693554321288, 0.13576031324145255, 0.13381343218307346 ], - "Complex_smallest_replica_mixing": [ + "complex_smallest_replica_mixing": [ 0.06340378197997776, 0.05675954592363261, 0.04575328614762386 ], - "Solvent_smallest_mbar_overlaps": [ + "solvent_smallest_mbar_overlaps": [ 0.13531974184300477, 0.13459936802590508, 0.1333065640933696 ], - "Solvent_smallest_replica_mixing": [ + "solvent_smallest_replica_mixing": [ 0.04800817160367722, 0.04592363261093911, 0.06268958543983821 @@ -3955,45 +3895,44 @@ "ligand_b": "2s", "system_group": "jacs_set", "system_name": "p38", - "repeats": 3, - "DDG": { + "ddg": { "magnitude": 0.42395259273920516, "unit": "kilocalories_per_mole", ":is_custom:": true, "pint_unit_registry": "openff_units" }, - "DDG_uncertainty": { + "ddg_uncertainty": { "magnitude": 0.4005874404193791, "unit": "kilocalories_per_mole", ":is_custom:": true, "pint_unit_registry": "openff_units" }, - "DGs_complex": [ + "dgs_complex": [ -31.66364623758417, -31.77106318509678, -32.12542239332285 ], - "DGs_solvent": [ + "dgs_solvent": [ -32.687521176737924, -31.835333911940342, -32.30913450554315 ], - "Complex_smallest_mbar_overlaps": [ + "complex_smallest_mbar_overlaps": [ 0.14362609505657245, 0.11451890326317474, 0.14126108878227422 ], - "Complex_smallest_replica_mixing": [ + "complex_smallest_replica_mixing": [ 0.05333670374115268, 0.036905965621840245, 0.05055611729019211 ], - "Solvent_smallest_mbar_overlaps": [ + "solvent_smallest_mbar_overlaps": [ 0.151422252389513, 0.15452128444338523, 0.15339472193056625 ], - "Solvent_smallest_replica_mixing": [ + "solvent_smallest_replica_mixing": [ 0.059403437815975735, 0.06648129423660262, 0.05669050051072523 @@ -4004,45 +3943,44 @@ "ligand_b": "2l", "system_group": "jacs_set", "system_name": "p38", - "repeats": 3, - "DDG": { + "ddg": { "magnitude": -1.8599701392773937, "unit": "kilocalories_per_mole", ":is_custom:": true, "pint_unit_registry": "openff_units" }, - "DDG_uncertainty": { + "ddg_uncertainty": { "magnitude": 0.18946081404759263, "unit": "kilocalories_per_mole", ":is_custom:": true, "pint_unit_registry": "openff_units" }, - "DGs_complex": [ + "dgs_complex": [ 71.61191436949022, 71.24114239618838, 71.62318360277035 ], - "DGs_solvent": [ + "dgs_solvent": [ 73.41984401289169, 73.2621440150205, 73.37416275836897 ], - "Complex_smallest_mbar_overlaps": [ + "complex_smallest_mbar_overlaps": [ 0.1678081508088681, 0.17260007817146267, 0.17183616752640993 ], - "Complex_smallest_replica_mixing": [ + "complex_smallest_replica_mixing": [ 0.15925176946410516, 0.15557275541795665, 0.15293225480283115 ], - "Solvent_smallest_mbar_overlaps": [ + "solvent_smallest_mbar_overlaps": [ 0.183883877172252, 0.18151891677531815, 0.18330966459566742 ], - "Solvent_smallest_replica_mixing": [ + "solvent_smallest_replica_mixing": [ 0.12082912032355915, 0.11558307533539731, 0.1243680485338726 @@ -4053,45 +3991,44 @@ "ligand_b": "2m", "system_group": "jacs_set", "system_name": "p38", - "repeats": 3, - "DDG": { + "ddg": { "magnitude": -2.7031895648360003, "unit": "kilocalories_per_mole", ":is_custom:": true, "pint_unit_registry": "openff_units" }, - "DDG_uncertainty": { + "ddg_uncertainty": { "magnitude": 0.04995896547285099, "unit": "kilocalories_per_mole", ":is_custom:": true, "pint_unit_registry": "openff_units" }, - "DGs_complex": [ + "dgs_complex": [ 7.844849563289203, 7.724585646096473, 7.7676290383623225 ], - "DGs_solvent": [ + "dgs_solvent": [ 10.486534308906672, 10.484128143411292, 10.475970489938035 ], - "Complex_smallest_mbar_overlaps": [ + "complex_smallest_mbar_overlaps": [ 0.10532518577015855, 0.10103227418093964, 0.10214082620402833 ], - "Complex_smallest_replica_mixing": [ + "complex_smallest_replica_mixing": [ 0.03232533889468196, 0.03083923154701719, 0.03571428571428571 ], - "Solvent_smallest_mbar_overlaps": [ + "solvent_smallest_mbar_overlaps": [ 0.09874069850740082, 0.10058492324781172, 0.10000444977057138 ], - "Solvent_smallest_replica_mixing": [ + "solvent_smallest_replica_mixing": [ 0.028816986855409503, 0.036905965621840245, 0.03134479271991911 @@ -4102,45 +4039,44 @@ "ligand_b": "3flz", "system_group": "jacs_set", "system_name": "p38", - "repeats": 3, - "DDG": { + "ddg": { "magnitude": 1.5158988319674405, "unit": "kilocalories_per_mole", ":is_custom:": true, "pint_unit_registry": "openff_units" }, - "DDG_uncertainty": { + "ddg_uncertainty": { "magnitude": 0.2850508836539041, "unit": "kilocalories_per_mole", ":is_custom:": true, "pint_unit_registry": "openff_units" }, - "DGs_complex": [ + "dgs_complex": [ -0.08449550525207378, -0.23413778742540928, -0.7039940560681466 ], - "DGs_solvent": [ + "dgs_solvent": [ -1.7574652826689614, -2.0064037198855025, -1.8064548420934867 ], - "Complex_smallest_mbar_overlaps": [ + "complex_smallest_mbar_overlaps": [ 0.09493899420676687, 0.08608094086560737, 0.08559133510256914 ], - "Complex_smallest_replica_mixing": [ + "complex_smallest_replica_mixing": [ 0.025278058645096056, 0.021033013844515443, 0.02571041948579161 ], - "Solvent_smallest_mbar_overlaps": [ + "solvent_smallest_mbar_overlaps": [ 0.05988698527481441, 0.056405348658681895, 0.06168639121814175 ], - "Solvent_smallest_replica_mixing": [ + "solvent_smallest_replica_mixing": [ 0.011627906976744186, 0.008594539939332659, 0.01557711950970378 @@ -4151,45 +4087,44 @@ "ligand_b": "3fly", "system_group": "jacs_set", "system_name": "p38", - "repeats": 3, - "DDG": { + "ddg": { "magnitude": 1.2900713000325084, "unit": "kilocalories_per_mole", ":is_custom:": true, "pint_unit_registry": "openff_units" }, - "DDG_uncertainty": { + "ddg_uncertainty": { "magnitude": 1.0093680161189547, "unit": "kilocalories_per_mole", ":is_custom:": true, "pint_unit_registry": "openff_units" }, - "DGs_complex": [ + "dgs_complex": [ 22.57995565266321, 21.21425661148975, 21.728794840541894 ], - "DGs_solvent": [ + "dgs_solvent": [ 21.051181589494472, 21.23076354056162, 19.37084807454124 ], - "Complex_smallest_mbar_overlaps": [ + "complex_smallest_mbar_overlaps": [ 0.10198158347231295, 0.11779626990385111, 0.11409217368181884 ], - "Complex_smallest_replica_mixing": [ + "complex_smallest_replica_mixing": [ 0.06513083048919227, 0.04120323559150657, 0.06622851365015167 ], - "Solvent_smallest_mbar_overlaps": [ + "solvent_smallest_mbar_overlaps": [ 0.14067634362698161, 0.14136370797084533, 0.13750068245600816 ], - "Solvent_smallest_replica_mixing": [ + "solvent_smallest_replica_mixing": [ 0.05358948432760364, 0.06900910010111223, 0.08291203235591507 @@ -4200,45 +4135,44 @@ "ligand_b": "3fln", "system_group": "jacs_set", "system_name": "p38", - "repeats": 3, - "DDG": { + "ddg": { "magnitude": 2.3659953512711382, "unit": "kilocalories_per_mole", ":is_custom:": true, "pint_unit_registry": "openff_units" }, - "DDG_uncertainty": { + "ddg_uncertainty": { "magnitude": 0.6625942186156518, "unit": "kilocalories_per_mole", ":is_custom:": true, "pint_unit_registry": "openff_units" }, - "DGs_complex": [ + "dgs_complex": [ -72.98064194558562, -71.75648162583873, -73.08907113647442 ], - "DGs_solvent": [ + "dgs_solvent": [ -74.71168104128634, -75.3490771649008, -74.86342255552505 ], - "Complex_smallest_mbar_overlaps": [ + "complex_smallest_mbar_overlaps": [ 0.021875917527799573, 0.019245028434510935, 0.021101414251556608 ], - "Complex_smallest_replica_mixing": [ + "complex_smallest_replica_mixing": [ 0.0010111223458038423, 0.0017064846416382253, 0.0020222446916076846 ], - "Solvent_smallest_mbar_overlaps": [ + "solvent_smallest_mbar_overlaps": [ 0.021008255004268568, 0.0168361993283318, 0.021269924020818443 ], - "Solvent_smallest_replica_mixing": [ + "solvent_smallest_replica_mixing": [ 0.0015321756894790602, 0.0020855057351407717, 0.0005055611729019212 @@ -4249,45 +4183,44 @@ "ligand_b": "3flz", "system_group": "jacs_set", "system_name": "p38", - "repeats": 3, - "DDG": { + "ddg": { "magnitude": 0.7441684233686107, "unit": "kilocalories_per_mole", ":is_custom:": true, "pint_unit_registry": "openff_units" }, - "DDG_uncertainty": { + "ddg_uncertainty": { "magnitude": 0.05847724532123635, "unit": "kilocalories_per_mole", ":is_custom:": true, "pint_unit_registry": "openff_units" }, - "DGs_complex": [ + "dgs_complex": [ 0.5606329734749874, 0.43604510962689447, 0.4964447140208148 ], - "DGs_solvent": [ + "dgs_solvent": [ -0.2823843834417408, -0.24522553999496724, -0.21177254954642713 ], - "Complex_smallest_mbar_overlaps": [ + "complex_smallest_mbar_overlaps": [ 0.08701027227659307, 0.08708465994862354, 0.08718830124402377 ], - "Complex_smallest_replica_mixing": [ + "complex_smallest_replica_mixing": [ 0.08316481294236602, 0.08707865168539326, 0.09201213346814964 ], - "Solvent_smallest_mbar_overlaps": [ + "solvent_smallest_mbar_overlaps": [ 0.10821277326600037, 0.10844806963778332, 0.10855362203925409 ], - "Solvent_smallest_replica_mixing": [ + "solvent_smallest_replica_mixing": [ 0.10439838220424671, 0.11274014155712841, 0.10945399393326592 @@ -4298,45 +4231,44 @@ "ligand_b": "3flq", "system_group": "jacs_set", "system_name": "p38", - "repeats": 3, - "DDG": { + "ddg": { "magnitude": 0.6343180728372886, "unit": "kilocalories_per_mole", ":is_custom:": true, "pint_unit_registry": "openff_units" }, - "DDG_uncertainty": { + "ddg_uncertainty": { "magnitude": 0.7754860263915238, "unit": "kilocalories_per_mole", ":is_custom:": true, "pint_unit_registry": "openff_units" }, - "DGs_complex": [ + "dgs_complex": [ 173.50513952445394, 172.2207822765914, 173.87919562622804 ], - "DGs_solvent": [ + "dgs_solvent": [ 172.20764886468808, 172.96719147956694, 172.52732286450646 ], - "Complex_smallest_mbar_overlaps": [ + "complex_smallest_mbar_overlaps": [ 0.02540604991291027, 0.0356571589582112, 0.034771888478849974 ], - "Complex_smallest_replica_mixing": [ + "complex_smallest_replica_mixing": [ 0.0025278058645096056, 0.003033367037411527, 0.0025278058645096056 ], - "Solvent_smallest_mbar_overlaps": [ + "solvent_smallest_mbar_overlaps": [ 0.03313563392218073, 0.05562981550728482, 0.047768347783859785 ], - "Solvent_smallest_replica_mixing": [ + "solvent_smallest_replica_mixing": [ 0.0016501650165016502, 0.009605662285136502, 0.005362614913176711 @@ -4347,45 +4279,44 @@ "ligand_b": "3fln", "system_group": "jacs_set", "system_name": "p38", - "repeats": 3, - "DDG": { + "ddg": { "magnitude": 0.24355532909243882, "unit": "kilocalories_per_mole", ":is_custom:": true, "pint_unit_registry": "openff_units" }, - "DDG_uncertainty": { + "ddg_uncertainty": { "magnitude": 0.020771742068728907, "unit": "kilocalories_per_mole", ":is_custom:": true, "pint_unit_registry": "openff_units" }, - "DGs_complex": [ + "dgs_complex": [ -0.031082372187482374, -0.0007454682310518203, -0.04905003295647761 ], - "DGs_solvent": [ + "dgs_solvent": [ -0.2721141264868519, -0.26270084568191143, -0.2767288884835649 ], - "Complex_smallest_mbar_overlaps": [ + "complex_smallest_mbar_overlaps": [ 0.09368289583299645, 0.09499693421341417, 0.09504351250631302 ], - "Complex_smallest_replica_mixing": [ + "complex_smallest_replica_mixing": [ 0.09482758620689655, 0.09464902186421174, 0.09956942949407965 ], - "Solvent_smallest_mbar_overlaps": [ + "solvent_smallest_mbar_overlaps": [ 0.1061487788653746, 0.10599238990094036, 0.10615210383650002 ], - "Solvent_smallest_replica_mixing": [ + "solvent_smallest_replica_mixing": [ 0.10465116279069768, 0.10566228513650151, 0.10717896865520728 @@ -4396,45 +4327,44 @@ "ligand_b": "3fln", "system_group": "jacs_set", "system_name": "p38", - "repeats": 3, - "DDG": { + "ddg": { "magnitude": -0.16733380944066312, "unit": "kilocalories_per_mole", ":is_custom:": true, "pint_unit_registry": "openff_units" }, - "DDG_uncertainty": { + "ddg_uncertainty": { "magnitude": 0.13268756296587103, "unit": "kilocalories_per_mole", ":is_custom:": true, "pint_unit_registry": "openff_units" }, - "DGs_complex": [ + "dgs_complex": [ 44.28310949928939, 44.08978230425552, 44.41080113619733 ], - "DGs_solvent": [ + "dgs_solvent": [ 44.443789361425495, 44.43157646349521, 44.41032854314354 ], - "Complex_smallest_mbar_overlaps": [ + "complex_smallest_mbar_overlaps": [ 0.15238647074547965, 0.149532484295647, 0.1510994350634236 ], - "Complex_smallest_replica_mixing": [ + "complex_smallest_replica_mixing": [ 0.07204246713852376, 0.07352941176470588, 0.06774519716885744 ], - "Solvent_smallest_mbar_overlaps": [ + "solvent_smallest_mbar_overlaps": [ 0.16626837925929416, 0.16501689888936769, 0.16616632229424247 ], - "Solvent_smallest_replica_mixing": [ + "solvent_smallest_replica_mixing": [ 0.09934277047522751, 0.09494324045407637, 0.09150657229524772 @@ -4445,45 +4375,44 @@ "ligand_b": "3flw", "system_group": "jacs_set", "system_name": "p38", - "repeats": 3, - "DDG": { + "ddg": { "magnitude": -0.946018225104055, "unit": "kilocalories_per_mole", ":is_custom:": true, "pint_unit_registry": "openff_units" }, - "DDG_uncertainty": { + "ddg_uncertainty": { "magnitude": 0.5204587957618652, "unit": "kilocalories_per_mole", ":is_custom:": true, "pint_unit_registry": "openff_units" }, - "DGs_complex": [ + "dgs_complex": [ 53.01838639721525, 51.93152887439266, 52.13408897093109 ], - "DGs_solvent": [ + "dgs_solvent": [ 52.99720207668792, 53.45122340006204, 53.47363344110124 ], - "Complex_smallest_mbar_overlaps": [ + "complex_smallest_mbar_overlaps": [ 0.07070548660053211, 0.08737785725201297, 0.052200562148462026 ], - "Complex_smallest_replica_mixing": [ + "complex_smallest_replica_mixing": [ 0.02102803738317757, 0.021961184882533197, 0.007916241062308479 ], - "Solvent_smallest_mbar_overlaps": [ + "solvent_smallest_mbar_overlaps": [ 0.06533705153188253, 0.06811759819748536, 0.060268999776895746 ], - "Solvent_smallest_replica_mixing": [ + "solvent_smallest_replica_mixing": [ 0.012639029322548028, 0.015419615773508595, 0.00910010111223458 @@ -4494,45 +4423,44 @@ "ligand_b": "3fln", "system_group": "jacs_set", "system_name": "p38", - "repeats": 3, - "DDG": { + "ddg": { "magnitude": 0.42221202536986135, "unit": "kilocalories_per_mole", ":is_custom:": true, "pint_unit_registry": "openff_units" }, - "DDG_uncertainty": { + "ddg_uncertainty": { "magnitude": 0.3134227877392276, "unit": "kilocalories_per_mole", ":is_custom:": true, "pint_unit_registry": "openff_units" }, - "DGs_complex": [ + "dgs_complex": [ 1.7066008472588112, 2.2283377950104994, 2.4347185180928066 ], - "DGs_solvent": [ + "dgs_solvent": [ 1.787630281408768, 1.6277491698734852, 1.6876416329702786 ], - "Complex_smallest_mbar_overlaps": [ + "complex_smallest_mbar_overlaps": [ 0.08911706971671998, 0.09680813588557652, 0.09536571855108487 ], - "Complex_smallest_replica_mixing": [ + "complex_smallest_replica_mixing": [ 0.019211324570273004, 0.02300303336703741, 0.02704752275025278 ], - "Solvent_smallest_mbar_overlaps": [ + "solvent_smallest_mbar_overlaps": [ 0.11029268603292688, 0.10988820046360007, 0.11648113516319511 ], - "Solvent_smallest_replica_mixing": [ + "solvent_smallest_replica_mixing": [ 0.03741152679474216, 0.03488372093023256, 0.032355915065722954 @@ -4543,45 +4471,44 @@ "ligand_b": "3fln", "system_group": "jacs_set", "system_name": "p38", - "repeats": 3, - "DDG": { + "ddg": { "magnitude": 1.5375910719088495, "unit": "kilocalories_per_mole", ":is_custom:": true, "pint_unit_registry": "openff_units" }, - "DDG_uncertainty": { + "ddg_uncertainty": { "magnitude": 0.12903156579787325, "unit": "kilocalories_per_mole", ":is_custom:": true, "pint_unit_registry": "openff_units" }, - "DGs_complex": [ + "dgs_complex": [ -28.785797380214298, -29.070091011211538, -28.838709232486686 ], - "DGs_solvent": [ + "dgs_solvent": [ -30.392297699350312, -30.483899498544318, -30.431173641744447 ], - "Complex_smallest_mbar_overlaps": [ + "complex_smallest_mbar_overlaps": [ 0.1561638735794862, 0.15303804457098705, 0.15700556047939676 ], - "Complex_smallest_replica_mixing": [ + "complex_smallest_replica_mixing": [ 0.06774519716885744, 0.07750301568154402, 0.08315392895586653 ], - "Solvent_smallest_mbar_overlaps": [ + "solvent_smallest_mbar_overlaps": [ 0.1443289636456939, 0.1426164620661198, 0.14361441153485605 ], - "Solvent_smallest_replica_mixing": [ + "solvent_smallest_replica_mixing": [ 0.0704070407040704, 0.06648129423660262, 0.06511746680286006 @@ -4592,45 +4519,44 @@ "ligand_b": "18", "system_group": "charge_annihilation_set", "system_name": "irak4_s2", - "repeats": 3, - "DDG": { + "ddg": { "magnitude": -1.9372416484584178, "unit": "kilocalories_per_mole", ":is_custom:": true, "pint_unit_registry": "openff_units" }, - "DDG_uncertainty": { + "ddg_uncertainty": { "magnitude": 0.04856687578618938, "unit": "kilocalories_per_mole", ":is_custom:": true, "pint_unit_registry": "openff_units" }, - "DGs_complex": [ + "dgs_complex": [ -63.93511307268735, -63.923172804394014, -63.96065970051045 ], - "DGs_solvent": [ + "dgs_solvent": [ -61.963891650164534, -61.976290484242064, -62.06703849780995 ], - "Complex_smallest_mbar_overlaps": [ + "complex_smallest_mbar_overlaps": [ 0.19958229830280627, 0.20035168944620024, 0.19943791085724064 ], - "Complex_smallest_replica_mixing": [ + "complex_smallest_replica_mixing": [ 0.13398838767306834, 0.13591635916359163, 0.13392391870766024 ], - "Solvent_smallest_mbar_overlaps": [ + "solvent_smallest_mbar_overlaps": [ 0.19763453924636895, 0.1972246864525244, 0.19787666035336005 ], - "Solvent_smallest_replica_mixing": [ + "solvent_smallest_replica_mixing": [ 0.146122758272291, 0.1402500631472594, 0.14037635766607728 @@ -4641,45 +4567,44 @@ "ligand_b": "14", "system_group": "charge_annihilation_set", "system_name": "irak4_s2", - "repeats": 3, - "DDG": { + "ddg": { "magnitude": -1.5855575662381938, "unit": "kilocalories_per_mole", ":is_custom:": true, "pint_unit_registry": "openff_units" }, - "DDG_uncertainty": { + "ddg_uncertainty": { "magnitude": 0.10365831475160588, "unit": "kilocalories_per_mole", ":is_custom:": true, "pint_unit_registry": "openff_units" }, - "DGs_complex": [ + "dgs_complex": [ -103.83377139243893, -103.7157494161017, -103.5919299729964 ], - "DGs_solvent": [ + "dgs_solvent": [ -102.17281658014271, -102.10796596263567, -102.10399554004411 ], - "Complex_smallest_mbar_overlaps": [ + "complex_smallest_mbar_overlaps": [ 0.20973569947016632, 0.20861641871862596, 0.21113770666940232 ], - "Complex_smallest_replica_mixing": [ + "complex_smallest_replica_mixing": [ 0.15599705665930833, 0.1707026597856292, 0.1574614422676115 ], - "Solvent_smallest_mbar_overlaps": [ + "solvent_smallest_mbar_overlaps": [ 0.20900382840303483, 0.20924076403327505, 0.20959892464022056 ], - "Solvent_smallest_replica_mixing": [ + "solvent_smallest_replica_mixing": [ 0.17865429234338748, 0.17391593508911946, 0.18274816872947713 @@ -4690,45 +4615,44 @@ "ligand_b": "5charg", "system_group": "charge_annihilation_set", "system_name": "irak4_s2", - "repeats": 3, - "DDG": { + "ddg": { "magnitude": -1.7644282196229923, "unit": "kilocalories_per_mole", ":is_custom:": true, "pint_unit_registry": "openff_units" }, - "DDG_uncertainty": { + "ddg_uncertainty": { "magnitude": 0.2612321514355722, "unit": "kilocalories_per_mole", ":is_custom:": true, "pint_unit_registry": "openff_units" }, - "DGs_complex": [ + "dgs_complex": [ -15.191000217579752, -14.844901100150738, -15.365119220137847 ], - "DGs_solvent": [ + "dgs_solvent": [ -13.177348249866528, -13.397197398290475, -13.533190230842363 ], - "Complex_smallest_mbar_overlaps": [ + "complex_smallest_mbar_overlaps": [ 0.16094949504190872, 0.16167317536164355, 0.161390964464272 ], - "Complex_smallest_replica_mixing": [ + "complex_smallest_replica_mixing": [ 0.07605576055760557, 0.07244081360453485, 0.06473643709118893 ], - "Solvent_smallest_mbar_overlaps": [ + "solvent_smallest_mbar_overlaps": [ 0.17018239939806348, 0.1705214463555485, 0.1765041282329322 ], - "Solvent_smallest_replica_mixing": [ + "solvent_smallest_replica_mixing": [ 0.07299823187673656, 0.07852768563409034, 0.07956554685526648 @@ -4739,45 +4663,44 @@ "ligand_b": "27", "system_group": "charge_annihilation_set", "system_name": "irak4_s3", - "repeats": 3, - "DDG": { + "ddg": { "magnitude": 0.9319044745422005, "unit": "kilocalories_per_mole", ":is_custom:": true, "pint_unit_registry": "openff_units" }, - "DDG_uncertainty": { + "ddg_uncertainty": { "magnitude": 0.08955729679735826, "unit": "kilocalories_per_mole", ":is_custom:": true, "pint_unit_registry": "openff_units" }, - "DGs_complex": [ + "dgs_complex": [ -62.9795073505689, -63.18245269976545, -63.11612203391767 ], - "DGs_solvent": [ + "dgs_solvent": [ -63.98497407145467, -64.05644546673825, -64.03237596968569 ], - "Complex_smallest_mbar_overlaps": [ + "complex_smallest_mbar_overlaps": [ 0.22421724424089906, 0.22318334343882593, 0.22452215658896355 ], - "Complex_smallest_replica_mixing": [ + "complex_smallest_replica_mixing": [ 0.1728486646884273, 0.17844677137870854, 0.17726135889954148 ], - "Solvent_smallest_mbar_overlaps": [ + "solvent_smallest_mbar_overlaps": [ 0.21897308933328358, 0.21965913079418262, 0.2200542620136834 ], - "Solvent_smallest_replica_mixing": [ + "solvent_smallest_replica_mixing": [ 0.19064157615559485, 0.18565294266228846, 0.1898873316845287 @@ -4788,45 +4711,44 @@ "ligand_b": "27", "system_group": "charge_annihilation_set", "system_name": "irak4_s3", - "repeats": 3, - "DDG": { + "ddg": { "magnitude": 0.3594317127982123, "unit": "kilocalories_per_mole", ":is_custom:": true, "pint_unit_registry": "openff_units" }, - "DDG_uncertainty": { + "ddg_uncertainty": { "magnitude": 0.19405630055776088, "unit": "kilocalories_per_mole", ":is_custom:": true, "pint_unit_registry": "openff_units" }, - "DGs_complex": [ + "dgs_complex": [ -53.47035224754216, -53.03433515315095, -53.09362923358137 ], - "DGs_solvent": [ + "dgs_solvent": [ -53.58048009869361, -53.53350368554152, -53.562627988433974 ], - "Complex_smallest_mbar_overlaps": [ + "complex_smallest_mbar_overlaps": [ 0.2154101933644808, 0.2189790109622285, 0.21657053648920255 ], - "Complex_smallest_replica_mixing": [ + "complex_smallest_replica_mixing": [ 0.15250105977108944, 0.15138309965896174, 0.13813989839781166 ], - "Solvent_smallest_mbar_overlaps": [ + "solvent_smallest_mbar_overlaps": [ 0.21509290243540863, 0.21483008902978687, 0.21335978741727524 ], - "Solvent_smallest_replica_mixing": [ + "solvent_smallest_replica_mixing": [ 0.16304622379388733, 0.15729982318767366, 0.15136398080323313 @@ -4837,45 +4759,44 @@ "ligand_b": "27chargR", "system_group": "charge_annihilation_set", "system_name": "egfr", - "repeats": 3, - "DDG": { + "ddg": { "magnitude": 2.651682566162876, "unit": "kilocalories_per_mole", ":is_custom:": true, "pint_unit_registry": "openff_units" }, - "DDG_uncertainty": { + "ddg_uncertainty": { "magnitude": 0.15286317169811045, "unit": "kilocalories_per_mole", ":is_custom:": true, "pint_unit_registry": "openff_units" }, - "DGs_complex": [ + "dgs_complex": [ -93.748746155798, -93.38558504874081, -93.56651798083234 ], - "DGs_solvent": [ + "dgs_solvent": [ -96.23741915094435, -96.25183437314833, -96.16664335976705 ], - "Complex_smallest_mbar_overlaps": [ + "complex_smallest_mbar_overlaps": [ 0.20921215681999647, 0.2072057022919036, 0.2045842750861124 ], - "Complex_smallest_replica_mixing": [ + "complex_smallest_replica_mixing": [ 0.11737604212373848, 0.12420304070622855, 0.11647866601275135 ], - "Solvent_smallest_mbar_overlaps": [ + "solvent_smallest_mbar_overlaps": [ 0.20772780272574662, 0.20860643996709452, 0.20844668729904775 ], - "Solvent_smallest_replica_mixing": [ + "solvent_smallest_replica_mixing": [ 0.1390502652184895, 0.137536578877361, 0.13684011113917657 @@ -4886,45 +4807,44 @@ "ligand_b": "30charg", "system_group": "charge_annihilation_set", "system_name": "egfr", - "repeats": 3, - "DDG": { + "ddg": { "magnitude": 2.5429536536557436, "unit": "kilocalories_per_mole", ":is_custom:": true, "pint_unit_registry": "openff_units" }, - "DDG_uncertainty": { + "ddg_uncertainty": { "magnitude": 0.08891180568534318, "unit": "kilocalories_per_mole", ":is_custom:": true, "pint_unit_registry": "openff_units" }, - "DGs_complex": [ + "dgs_complex": [ -107.38693197070442, -107.5335235811763, -107.34603009577637 ], - "DGs_solvent": [ + "dgs_solvent": [ -109.99643972625063, -109.98690672329289, -109.91200015908082 ], - "Complex_smallest_mbar_overlaps": [ + "complex_smallest_mbar_overlaps": [ 0.2024093830390034, 0.2040563628169898, 0.20305075892741042 ], - "Complex_smallest_replica_mixing": [ + "complex_smallest_replica_mixing": [ 0.10980490245122561, 0.10141596175064362, 0.10997559487492374 ], - "Solvent_smallest_mbar_overlaps": [ + "solvent_smallest_mbar_overlaps": [ 0.20527006477192627, 0.2052806822994185, 0.20577167663361984 ], - "Solvent_smallest_replica_mixing": [ + "solvent_smallest_replica_mixing": [ 0.13317757009345793, 0.12503157362970446, 0.12471583733265976 diff --git a/openfe_benchmarks/results/2026-03-18-openmm-840-qa-testing/submission.yaml b/openfe_benchmarks/results/2026-03-18-openmm-840-qa-testing/submission.yaml index cf1799c..a95ff7c 100644 --- a/openfe_benchmarks/results/2026-03-18-openmm-840-qa-testing/submission.yaml +++ b/openfe_benchmarks/results/2026-03-18-openmm-840-qa-testing/submission.yaml @@ -6,10 +6,11 @@ title: OpenFE RBFE - charge_annihilation_set, jacs_set (5 systems) - 2026-03-18- # REQUIRED: short descriptive summary (1-2 sentences) summary: | - This submission describes the charge_annihilation_set, jacs_set RBFE benchmark + This submission describes QA testing using openmm-8.4 with the charge_annihilation_set and jacs_set RBFE benchmark (charge_annihilation_set: egfr, irak4_s2, irak4_s3; jacs_set: p38, tyk2) prepared with openff-2.3.0 and nagl_openff-gnn-am1bcc-1.0.0.pt. The submission contains 160 transformations, 61 unique ligands, and 5 unique proteins. The largest simulated chemical system contains 6744 atoms. Note this means the charge annihilation sets are not complete compared to what is in that system and should not be compared to other complete runs due to the missing edges. + This submission was run locally on HPC3 and so no AlchemicalArchive is available but the raw outputs are included in the Zenodo archive. # REQUIRED: list of submission tags tags: [rbfe, openff-2.3.0, nagl_openff-gnn-am1bcc-1.0.0.pt, charge_change, benchmark, openfe, openmm-840] @@ -22,8 +23,8 @@ authors: date: 2026-06-04 # REQUIRED: OpenFE/OpenMM/toolkit versions used to produce gathered reports -openfe_version: 1.8.0 -openmm_version: +openfe_version: 1.9.1 +openmm_version: 8.4.0 openff_toolkit_version: 0.18.0 # Recommended descriptors diff --git a/openfe_benchmarks/scripts/_example_generate_results_local.py b/openfe_benchmarks/scripts/example_generate_results_local.py similarity index 96% rename from openfe_benchmarks/scripts/_example_generate_results_local.py rename to openfe_benchmarks/scripts/example_generate_results_local.py index d085240..73bd402 100644 --- a/openfe_benchmarks/scripts/_example_generate_results_local.py +++ b/openfe_benchmarks/scripts/example_generate_results_local.py @@ -73,7 +73,7 @@ def main(results_dir, network, output_dir): raw_results[key].append((phase, result)) # now loop over the raw results and extract the ddg/dg and metadata - gathered_results = {"DG": [], "DDG": []} + gathered_results = {"dg": [], "ddg": []} # first workout the system group and system name which should be stored on all egdes transformation = list(network.edges)[0] mapping_annotations = transformation.mapping.annotations @@ -130,7 +130,7 @@ def main(results_dir, network, output_dir): solvent_dg = np.mean(solvent_data) * unit.kilocalories_per_mole solvent_dg_uncertainty = np.std(solvent_data) * unit.kilocalories_per_mole - # get the combinded ddg and uncertainty + # get the combined ddg and uncertainty entry_data["ddg"] = complex_dg - solvent_dg entry_data["ddg_uncertainty"] = np.sqrt( complex_dg_uncertainty**2 + solvent_dg_uncertainty**2 @@ -168,14 +168,14 @@ def main(results_dir, network, output_dir): entry_data[f"{label}_smallest_mbar_overlaps"] = mbar_overlap_elements entry_data[f"{label}_smallest_replica_mixing"] = replica_mixing_elements - gathered_results["DDG"].append(entry_data) + gathered_results["ddg"].append(entry_data) # also add the DDG data to the FEMAP fe_map.add_relative_calculation( labelA=lig_a_name, labelB=lig_b_name, - value=entry_data["DDG"], - uncertainty=entry_data["DDG_uncertainty"], + value=entry_data["ddg"], + uncertainty=entry_data["ddg_uncertainty"], ) # check if the network is connected and we can calculate the DGs @@ -193,7 +193,7 @@ def main(results_dir, network, output_dir): "system_name": system_name, "source": row["source"], } - gathered_results["DG"].append(entry_data) + gathered_results["dg"].append(entry_data) # write out the data to a json file output_file = output_dir / "computational_results.json" From 74612c7eac3f654dc84e3dc35d904afa90dd5150 Mon Sep 17 00:00:00 2001 From: Josh Horton Date: Tue, 9 Jun 2026 14:51:08 +0100 Subject: [PATCH 10/24] add transformation validation to planning script --- openfe_benchmarks/scripts/_example_plan_rbfe.py | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/openfe_benchmarks/scripts/_example_plan_rbfe.py b/openfe_benchmarks/scripts/_example_plan_rbfe.py index ba83846..60e754b 100644 --- a/openfe_benchmarks/scripts/_example_plan_rbfe.py +++ b/openfe_benchmarks/scripts/_example_plan_rbfe.py @@ -205,6 +205,11 @@ def main(): # Can be used as input for Alchemiscale alchem_network = openfe.AlchemicalNetwork(edges=transformations) + # check each edge can validated before trying to run + logger.info(f"Validating transformations for system {BENCHMARK_SET} {BENCHMARK_SYS}") + for edge in alchem_network.edges: + edge.create() + # save to file alchem_network.to_json(file=os.path.join(OUTPUT_DIR, FILENAME_ALCHEMICALNETWORK)) From a74851fdb5786a868cb1f39b03357423862f21c8 Mon Sep 17 00:00:00 2001 From: Josh Horton Date: Tue, 9 Jun 2026 15:02:26 +0100 Subject: [PATCH 11/24] update for lower case results --- openfe_benchmarks/data/_results_utils.py | 8 ++++---- openfe_benchmarks/scripts/_example_plot_rbfe.py | 6 +++--- 2 files changed, 7 insertions(+), 7 deletions(-) diff --git a/openfe_benchmarks/data/_results_utils.py b/openfe_benchmarks/data/_results_utils.py index 9c31dfb..2f6e9a9 100644 --- a/openfe_benchmarks/data/_results_utils.py +++ b/openfe_benchmarks/data/_results_utils.py @@ -18,8 +18,8 @@ def build_femap_from_relative_results(results: list[dict]) -> dict[tuple[str, st - ligand_b: str - system_group: str - system_name: str - - DDG: Quantity - - DDG_uncertainty: Quantity + - ddg: Quantity + - ddg_uncertainty: Quantity Returns ------- @@ -43,8 +43,8 @@ def build_femap_from_relative_results(results: list[dict]) -> dict[tuple[str, st ligand_b = result["ligand_b"] # record the ligands added to the femap unique_ligands.update([ligand_a, ligand_b]) - ddg = result["DDG"] - ddg_uncertainty = result["DDG_uncertainty"] + ddg = result["ddg"] + ddg_uncertainty = result["ddg_uncertainty"] femap.add_relative_calculation( labelA=ligand_a, labelB=ligand_b, diff --git a/openfe_benchmarks/scripts/_example_plot_rbfe.py b/openfe_benchmarks/scripts/_example_plot_rbfe.py index a800724..ed895a6 100644 --- a/openfe_benchmarks/scripts/_example_plot_rbfe.py +++ b/openfe_benchmarks/scripts/_example_plot_rbfe.py @@ -18,11 +18,11 @@ def main(): # load the results file results = json.load(open(RESULTS_FILE), cls=JSON_HANDLER.decoder) # check we have DDG values - if "DDG" not in results: - raise ValueError(f"Results file {RESULTS_FILE} does not contain 'DDG' values, cannot plot") + if "ddg" not in results: + raise ValueError(f"Results file {RESULTS_FILE} does not contain 'ddg' values, cannot plot") # build FEMaps and load with experimental data - femaps_by_system = build_femap_from_relative_results(results=results["DDG"]) + femaps_by_system = build_femap_from_relative_results(results=results["ddg"]) output_dir = pathlib.Path(OUTPUT_DIR) output_dir.mkdir(parents=True, exist_ok=True) From 039f78f6beb59db931e6ba2f152f9ac198b240cd Mon Sep 17 00:00:00 2001 From: Josh Horton Date: Thu, 11 Jun 2026 14:27:02 +0100 Subject: [PATCH 12/24] add zenodo DOI --- .../results/2026-03-18-openmm-840-qa-testing/submission.yaml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/openfe_benchmarks/results/2026-03-18-openmm-840-qa-testing/submission.yaml b/openfe_benchmarks/results/2026-03-18-openmm-840-qa-testing/submission.yaml index a95ff7c..012b5c6 100644 --- a/openfe_benchmarks/results/2026-03-18-openmm-840-qa-testing/submission.yaml +++ b/openfe_benchmarks/results/2026-03-18-openmm-840-qa-testing/submission.yaml @@ -42,8 +42,8 @@ results: computational_results.json # REQUIRED: long-term archive pointer (at least doi or url) archive: - doi: TODO add DOI - archive_provider: TODO add archive provider + doi: https://doi.org/10.5281/zenodo.20643703 + archive_provider: zenodo # REQUIRED: license for the submission license: CC-BY-4.0 From bf22a9e4c35f98440541b6f989395f40a02349fb Mon Sep 17 00:00:00 2001 From: jaclark5 Date: Thu, 11 Jun 2026 15:38:30 -0400 Subject: [PATCH 13/24] Update submission yaml --- openfe_benchmarks/data/_benchmark_systems.py | 3 +- .../run_prepare_metadata.sh | 2 +- .../submission.yaml | 195 +- .../scripts/prepare_metadata_submission.py | 1865 ++++++++--------- 4 files changed, 1091 insertions(+), 974 deletions(-) diff --git a/openfe_benchmarks/data/_benchmark_systems.py b/openfe_benchmarks/data/_benchmark_systems.py index 7c16c93..1a0ba9e 100644 --- a/openfe_benchmarks/data/_benchmark_systems.py +++ b/openfe_benchmarks/data/_benchmark_systems.py @@ -75,7 +75,7 @@ def reload(self): logger.debug("Benchmark index successfully reloaded and validated.") - def list_systems_by_tag(self, tags: list[str]) -> list[tuple[str, str]]: + def list_systems_by_tag(self, tags: list[str] = []) -> list[tuple[str, str]]: """ Get all systems that match **all** of the provided tags. @@ -84,6 +84,7 @@ def list_systems_by_tag(self, tags: list[str]) -> list[tuple[str, str]]: tags : list[str] List of tags to filter by (e.g., ['protein', 'cofactors']). Only systems containing every tag in this list will be returned. + Defaults to ``[]``. Returns ------- diff --git a/openfe_benchmarks/results/2026-03-18-openmm-840-qa-testing/run_prepare_metadata.sh b/openfe_benchmarks/results/2026-03-18-openmm-840-qa-testing/run_prepare_metadata.sh index 1990cc9..436c132 100755 --- a/openfe_benchmarks/results/2026-03-18-openmm-840-qa-testing/run_prepare_metadata.sh +++ b/openfe_benchmarks/results/2026-03-18-openmm-840-qa-testing/run_prepare_metadata.sh @@ -7,7 +7,7 @@ set -e # Exit on error echo "Generating submission metadata for all benchmark networks..." echo "" -micromamba run -n openfe-benchmarks python \ +micromamba run -n openfe-benchmarks-test python \ /Users/jenniferclark/bin/openfe-benchmarks/openfe_benchmarks/scripts/prepare_metadata_submission.py \ "/Users/jenniferclark/OMSF/OpenFE/BenchmarkRepo/ResultSubmission/networks/*/*/*alchemicalnetwork.json" \ --output-dir . \ diff --git a/openfe_benchmarks/results/2026-03-18-openmm-840-qa-testing/submission.yaml b/openfe_benchmarks/results/2026-03-18-openmm-840-qa-testing/submission.yaml index 012b5c6..8b689d7 100644 --- a/openfe_benchmarks/results/2026-03-18-openmm-840-qa-testing/submission.yaml +++ b/openfe_benchmarks/results/2026-03-18-openmm-840-qa-testing/submission.yaml @@ -2,40 +2,51 @@ submission_id: 2026-03-18-openmm-840-qa-testing # REQUIRED: short descriptive title -title: OpenFE RBFE - charge_annihilation_set, jacs_set (5 systems) - 2026-03-18-openmm-840-qa-testing +title: OpenFE RBFE - Multi-set Benchmark (2 sets, 5 systems) - 2026-03-18-openmm-840-qa-testing # REQUIRED: short descriptive summary (1-2 sentences) summary: | - This submission describes QA testing using openmm-8.4 with the charge_annihilation_set and jacs_set RBFE benchmark - (charge_annihilation_set: egfr, irak4_s2, irak4_s3; jacs_set: p38, tyk2) prepared with openff-2.3.0 - and nagl_openff-gnn-am1bcc-1.0.0.pt. The submission contains 160 transformations, 61 unique ligands, - and 5 unique proteins. The largest simulated chemical system contains 6744 atoms. Note this means the charge annihilation sets are not complete compared to what is in that system and should not be compared to other complete runs due to the missing edges. - This submission was run locally on HPC3 and so no AlchemicalArchive is available but the raw outputs are included in the Zenodo archive. + This submission describes the charge_annihilation_set, jacs_set RBFE benchmark + (charge_annihilation_set: egfr, irak4_s2, irak4_s3; jacs_set: p38, tyk2) prepared with + tip3p_HFE_multivalent;amber/amber/tip3p_standard;amber/ff14SB/phosaa10;amber and am1bcc. The + submission contains 160 transformations, 45 unique ligands, and 1 unique proteins. Note this means + the charge annihilation sets are not complete compared to what is in that system and should not be + compared to other complete runs due to the missing edges. # REQUIRED: list of submission tags -tags: [rbfe, openff-2.3.0, nagl_openff-gnn-am1bcc-1.0.0.pt, charge_change, benchmark, openfe, openmm-840] +tags: [rbfe, tip3p_HFE_multivalent;amber, amber, tip3p_standard;amber, ff14SB, phosaa10;amber, am1bcc, charge_change, benchmark, openfe, openmm-840] # REQUIRED: list of contributing authors (name, affiliation; ORCID optional) authors: - name: Josh Horton # REQUIRED: publication/submission date (ISO 8601) -date: 2026-06-04 +date: 2026-06-11 + +openfe_version: [] + +openmm_version: [] + +openff_toolkit_version: [] -# REQUIRED: OpenFE/OpenMM/toolkit versions used to produce gathered reports -openfe_version: 1.9.1 -openmm_version: 8.4.0 -openff_toolkit_version: 0.18.0 # Recommended descriptors -forcefield: openff-2.3.0 -partial_charges: nagl_openff-gnn-am1bcc-1.0.0.pt +forcefield: "amber/phosaa10;amber/tip3p_HFE_multivalent;amber/tip3p_standard;amber/ff14SB" + +partial_charges: "am1bcc" -# BenchmarkData provenance (from openfe-benchmarks planning script) + +# BenchmarkData provenance (from openfe-benchmarks planning script) with associated network key benchmark_data: source_repository: https://github.com/OpenFreeEnergy/openfe-benchmarks - set: charge_annihilation_set, jacs_set - system: egfr, irak4_s2, irak4_s3, p38, tyk2 + "charge_annihilation_set": + "egfr": AlchemicalNetwork-b15bcfadae8a358cee9a7dcb9f5445d6 + "irak4_s2": AlchemicalNetwork-e516aadebadf1c036b5930cf7838c6d9 + "irak4_s3": AlchemicalNetwork-6c2af871ad714bf6cda4368eb34fd06a + "jacs_set": + "p38": AlchemicalNetwork-034843fe04133cac65ec83f9b73fc191 + "tyk2": AlchemicalNetwork-ed08b22af2e51875d23b0283d2080252 + # REQUIRED: results file results: computational_results.json @@ -50,23 +61,135 @@ license: CC-BY-4.0 # RECOMMENDED / OPTIONAL metadata for protocol settings protocol_settings: - - protocol: RelativeHybridTopologyProtocol - count: 3 - systems: "charge_annihilation_set/egfr, charge_annihilation_set/irak4_s2, charge_annihilation_set/irak4_s3" - files: "charge_annihilation_set_egfr_alchemicalnetwork.json, charge_annihilation_set_irak4_s2_alchemicalnetwork.json, charge_annihilation_set_irak4_s3_alchemicalnetwork.json" - production_time: "20 nanosecond" - equilibration_time: "1.0 nanosecond" - timestep: "4.0 femtosecond" - temperature: "298.15 kelvin" - pressure: "1 bar" - lambda_windows: "22" - - protocol: RelativeHybridTopologyProtocol - count: 2 - systems: "jacs_set/p38, jacs_set/tyk2" - files: "alchemicalnetwork.json" - production_time: "5.0 nanosecond" - equilibration_time: "1.0 nanosecond" - timestep: "4.0 femtosecond" - temperature: "298.15 kelvin" - pressure: "1 bar" + - protocol: "RelativeHybridTopologyProtocol" + calculation_mode: "rbfe" + timestep: "{'val': 4.0, 'unit': 'femtosecond'}" + temperature: "{'val': 298.15, 'unit': 'kelvin'}" + pressure: "{'val': 1, 'unit': 'bar'}" + forcefields: "amber/phosaa10;amber/tip3p_HFE_multivalent;amber/tip3p_standard;amber/ff14SB" + small_molecule_forcefield: "openff-2.3.0" + partial_charges: "am1bcc" + equilibration_time: "{'val': 1.0, 'unit': 'nanosecond'}" + production_time: "{'val': 5.0, 'unit': 'nanosecond'}" + vacuum_equilibration_time: + vacuum_production_time: + solvent_equilibration_time: + solvent_production_time: + lambda_functions: "default" lambda_windows: "11" + lambda_schedule: "" + notes: | + Applies to systems: + - AlchemicalNetwork-034843fe04133cac65ec83f9b73fc191 jacs_set-p38: ligand_start=2aa, ligand_final=none, solvent=none, cofactors=none, protein=none + - AlchemicalNetwork-034843fe04133cac65ec83f9b73fc191 jacs_set-p38: ligand_start=2bb, ligand_final=none, solvent=none, cofactors=none, protein=none + - AlchemicalNetwork-034843fe04133cac65ec83f9b73fc191 jacs_set-p38: ligand_start=2c, ligand_final=none, solvent=none, cofactors=none, protein=none + - AlchemicalNetwork-034843fe04133cac65ec83f9b73fc191 jacs_set-p38: ligand_start=2ee, ligand_final=none, solvent=none, cofactors=none, protein=none + - AlchemicalNetwork-034843fe04133cac65ec83f9b73fc191 jacs_set-p38: ligand_start=2f, ligand_final=none, solvent=none, cofactors=none, protein=none + - AlchemicalNetwork-034843fe04133cac65ec83f9b73fc191 jacs_set-p38: ligand_start=2ff, ligand_final=none, solvent=none, cofactors=none, protein=none + - AlchemicalNetwork-034843fe04133cac65ec83f9b73fc191 jacs_set-p38: ligand_start=2g, ligand_final=none, solvent=none, cofactors=none, protein=none + - AlchemicalNetwork-034843fe04133cac65ec83f9b73fc191 jacs_set-p38: ligand_start=2gg, ligand_final=none, solvent=none, cofactors=none, protein=none + - AlchemicalNetwork-034843fe04133cac65ec83f9b73fc191 jacs_set-p38: ligand_start=2h, ligand_final=none, solvent=none, cofactors=none, protein=none + - AlchemicalNetwork-034843fe04133cac65ec83f9b73fc191 jacs_set-p38: ligand_start=2j, ligand_final=none, solvent=none, cofactors=none, protein=none + - AlchemicalNetwork-034843fe04133cac65ec83f9b73fc191 jacs_set-p38: ligand_start=2k, ligand_final=none, solvent=none, cofactors=none, protein=none + - AlchemicalNetwork-034843fe04133cac65ec83f9b73fc191 jacs_set-p38: ligand_start=2l, ligand_final=none, solvent=none, cofactors=none, protein=none + - AlchemicalNetwork-034843fe04133cac65ec83f9b73fc191 jacs_set-p38: ligand_start=2n, ligand_final=none, solvent=none, cofactors=none, protein=none + - AlchemicalNetwork-034843fe04133cac65ec83f9b73fc191 jacs_set-p38: ligand_start=2o, ligand_final=none, solvent=none, cofactors=none, protein=none + - AlchemicalNetwork-034843fe04133cac65ec83f9b73fc191 jacs_set-p38: ligand_start=2p, ligand_final=none, solvent=none, cofactors=none, protein=none + - AlchemicalNetwork-034843fe04133cac65ec83f9b73fc191 jacs_set-p38: ligand_start=2q, ligand_final=none, solvent=none, cofactors=none, protein=none + - AlchemicalNetwork-034843fe04133cac65ec83f9b73fc191 jacs_set-p38: ligand_start=2s, ligand_final=none, solvent=none, cofactors=none, protein=none + - AlchemicalNetwork-034843fe04133cac65ec83f9b73fc191 jacs_set-p38: ligand_start=2t, ligand_final=none, solvent=none, cofactors=none, protein=none + - AlchemicalNetwork-034843fe04133cac65ec83f9b73fc191 jacs_set-p38: ligand_start=2u, ligand_final=none, solvent=none, cofactors=none, protein=none + - AlchemicalNetwork-034843fe04133cac65ec83f9b73fc191 jacs_set-p38: ligand_start=2v, ligand_final=none, solvent=none, cofactors=none, protein=none + - AlchemicalNetwork-034843fe04133cac65ec83f9b73fc191 jacs_set-p38: ligand_start=2x, ligand_final=none, solvent=none, cofactors=none, protein=none + - AlchemicalNetwork-034843fe04133cac65ec83f9b73fc191 jacs_set-p38: ligand_start=2y, ligand_final=none, solvent=none, cofactors=none, protein=none + - AlchemicalNetwork-034843fe04133cac65ec83f9b73fc191 jacs_set-p38: ligand_start=2z, ligand_final=none, solvent=none, cofactors=none, protein=none + - AlchemicalNetwork-034843fe04133cac65ec83f9b73fc191 jacs_set-p38: ligand_start=3fln, ligand_final=none, solvent=none, cofactors=none, protein=none + - AlchemicalNetwork-034843fe04133cac65ec83f9b73fc191 jacs_set-p38: ligand_start=3flq, ligand_final=none, solvent=none, cofactors=none, protein=none + - AlchemicalNetwork-034843fe04133cac65ec83f9b73fc191 jacs_set-p38: ligand_start=3flw, ligand_final=none, solvent=none, cofactors=none, protein=none + - AlchemicalNetwork-034843fe04133cac65ec83f9b73fc191 jacs_set-p38: ligand_start=3fmh, ligand_final=none, solvent=none, cofactors=none, protein=none + - AlchemicalNetwork-034843fe04133cac65ec83f9b73fc191 jacs_set-p38: ligand_start=3fmk, ligand_final=none, solvent=none, cofactors=none, protein=none + - AlchemicalNetwork-ed08b22af2e51875d23b0283d2080252 jacs_set-tyk2: ligand_start=ejm_31, ligand_final=none, solvent=none, cofactors=none, protein=none + - AlchemicalNetwork-ed08b22af2e51875d23b0283d2080252 jacs_set-tyk2: ligand_start=ejm_42, ligand_final=none, solvent=none, cofactors=none, protein=none + - AlchemicalNetwork-ed08b22af2e51875d23b0283d2080252 jacs_set-tyk2: ligand_start=ejm_43, ligand_final=none, solvent=none, cofactors=none, protein=none + - AlchemicalNetwork-ed08b22af2e51875d23b0283d2080252 jacs_set-tyk2: ligand_start=ejm_44, ligand_final=none, solvent=none, cofactors=none, protein=none + - AlchemicalNetwork-ed08b22af2e51875d23b0283d2080252 jacs_set-tyk2: ligand_start=ejm_47, ligand_final=none, solvent=none, cofactors=none, protein=none + - AlchemicalNetwork-ed08b22af2e51875d23b0283d2080252 jacs_set-tyk2: ligand_start=ejm_49, ligand_final=none, solvent=none, cofactors=none, protein=none + - AlchemicalNetwork-ed08b22af2e51875d23b0283d2080252 jacs_set-tyk2: ligand_start=ejm_50, ligand_final=none, solvent=none, cofactors=none, protein=none + - AlchemicalNetwork-ed08b22af2e51875d23b0283d2080252 jacs_set-tyk2: ligand_start=ejm_55, ligand_final=none, solvent=none, cofactors=none, protein=none + - AlchemicalNetwork-ed08b22af2e51875d23b0283d2080252 jacs_set-tyk2: ligand_start=jmc_23, ligand_final=none, solvent=none, cofactors=none, protein=none + - AlchemicalNetwork-ed08b22af2e51875d23b0283d2080252 jacs_set-tyk2: ligand_start=jmc_27, ligand_final=none, solvent=none, cofactors=none, protein=none + - AlchemicalNetwork-ed08b22af2e51875d23b0283d2080252 jacs_set-tyk2: ligand_start=jmc_28, ligand_final=none, solvent=none, cofactors=none, protein=none + - AlchemicalNetwork-ed08b22af2e51875d23b0283d2080252 jacs_set-tyk2: ligand_start=jmc_30, ligand_final=none, solvent=none, cofactors=none, protein=none + - protocol: "RelativeHybridTopologyProtocol" + notes: | + Detailed protocol settings differ: + - alchemical_settings.explicit_charge_correction: False -> True + - lambda_settings.lambda_windows: 11 -> 22 + - simulation_settings.n_replicas: 11 -> 22 + - simulation_settings.production_length.val: 5.0 -> 20 + Applies to transformations: + - AlchemicalNetwork-6c2af871ad714bf6cda4368eb34fd06a charge_annihilation_set-irak4_s3: ligand_start=19charg, ligand_final=none, solvent=none, cofactors=none, protein=none + - AlchemicalNetwork-6c2af871ad714bf6cda4368eb34fd06a charge_annihilation_set-irak4_s3: ligand_start=28charg, ligand_final=none, solvent=none, cofactors=none, protein=none + - AlchemicalNetwork-b15bcfadae8a358cee9a7dcb9f5445d6 charge_annihilation_set-egfr: ligand_start=21, ligand_final=none, solvent=none, cofactors=none, protein=none + - AlchemicalNetwork-e516aadebadf1c036b5930cf7838c6d9 charge_annihilation_set-irak4_s2: ligand_start=17charg, ligand_final=none, solvent=none, cofactors=none, protein=none + - AlchemicalNetwork-e516aadebadf1c036b5930cf7838c6d9 charge_annihilation_set-irak4_s2: ligand_start=6, ligand_final=none, solvent=none, cofactors=none, protein=none + - protocol: "RelativeHybridTopologyProtocol" + notes: | + Detailed protocol settings differ: + - alchemical_settings.explicit_charge_correction: False -> True + - lambda_settings.lambda_windows: 11 -> 22 + - simulation_settings.n_replicas: 11 -> 22 + - simulation_settings.production_length.val: 5.0 -> 20 + - solvation_settings.solvent_padding.val: 1.5 -> 1 + Applies to transformations: + - AlchemicalNetwork-6c2af871ad714bf6cda4368eb34fd06a charge_annihilation_set-irak4_s3: ligand_start=19charg, ligand_final=none, solvent=none, cofactors=none, protein={'unknown'} + - AlchemicalNetwork-6c2af871ad714bf6cda4368eb34fd06a charge_annihilation_set-irak4_s3: ligand_start=28charg, ligand_final=none, solvent=none, cofactors=none, protein={'unknown'} + - AlchemicalNetwork-b15bcfadae8a358cee9a7dcb9f5445d6 charge_annihilation_set-egfr: ligand_start=21, ligand_final=none, solvent=none, cofactors=none, protein={'unknown'} + - AlchemicalNetwork-e516aadebadf1c036b5930cf7838c6d9 charge_annihilation_set-irak4_s2: ligand_start=17charg, ligand_final=none, solvent=none, cofactors=none, protein={'unknown'} + - AlchemicalNetwork-e516aadebadf1c036b5930cf7838c6d9 charge_annihilation_set-irak4_s2: ligand_start=6, ligand_final=none, solvent=none, cofactors=none, protein={'unknown'} + - protocol: "RelativeHybridTopologyProtocol" + notes: | + Detailed protocol settings differ: + - solvation_settings.solvent_padding.val: 1.5 -> 1 + Applies to transformations: + - AlchemicalNetwork-034843fe04133cac65ec83f9b73fc191 jacs_set-p38: ligand_start=2aa, ligand_final=none, solvent=none, cofactors=none, protein={'unknown'} + - AlchemicalNetwork-034843fe04133cac65ec83f9b73fc191 jacs_set-p38: ligand_start=2bb, ligand_final=none, solvent=none, cofactors=none, protein={'unknown'} + - AlchemicalNetwork-034843fe04133cac65ec83f9b73fc191 jacs_set-p38: ligand_start=2c, ligand_final=none, solvent=none, cofactors=none, protein={'unknown'} + - AlchemicalNetwork-034843fe04133cac65ec83f9b73fc191 jacs_set-p38: ligand_start=2ee, ligand_final=none, solvent=none, cofactors=none, protein={'unknown'} + - AlchemicalNetwork-034843fe04133cac65ec83f9b73fc191 jacs_set-p38: ligand_start=2f, ligand_final=none, solvent=none, cofactors=none, protein={'unknown'} + - AlchemicalNetwork-034843fe04133cac65ec83f9b73fc191 jacs_set-p38: ligand_start=2ff, ligand_final=none, solvent=none, cofactors=none, protein={'unknown'} + - AlchemicalNetwork-034843fe04133cac65ec83f9b73fc191 jacs_set-p38: ligand_start=2g, ligand_final=none, solvent=none, cofactors=none, protein={'unknown'} + - AlchemicalNetwork-034843fe04133cac65ec83f9b73fc191 jacs_set-p38: ligand_start=2gg, ligand_final=none, solvent=none, cofactors=none, protein={'unknown'} + - AlchemicalNetwork-034843fe04133cac65ec83f9b73fc191 jacs_set-p38: ligand_start=2h, ligand_final=none, solvent=none, cofactors=none, protein={'unknown'} + - AlchemicalNetwork-034843fe04133cac65ec83f9b73fc191 jacs_set-p38: ligand_start=2j, ligand_final=none, solvent=none, cofactors=none, protein={'unknown'} + - AlchemicalNetwork-034843fe04133cac65ec83f9b73fc191 jacs_set-p38: ligand_start=2k, ligand_final=none, solvent=none, cofactors=none, protein={'unknown'} + - AlchemicalNetwork-034843fe04133cac65ec83f9b73fc191 jacs_set-p38: ligand_start=2l, ligand_final=none, solvent=none, cofactors=none, protein={'unknown'} + - AlchemicalNetwork-034843fe04133cac65ec83f9b73fc191 jacs_set-p38: ligand_start=2n, ligand_final=none, solvent=none, cofactors=none, protein={'unknown'} + - AlchemicalNetwork-034843fe04133cac65ec83f9b73fc191 jacs_set-p38: ligand_start=2o, ligand_final=none, solvent=none, cofactors=none, protein={'unknown'} + - AlchemicalNetwork-034843fe04133cac65ec83f9b73fc191 jacs_set-p38: ligand_start=2p, ligand_final=none, solvent=none, cofactors=none, protein={'unknown'} + - AlchemicalNetwork-034843fe04133cac65ec83f9b73fc191 jacs_set-p38: ligand_start=2q, ligand_final=none, solvent=none, cofactors=none, protein={'unknown'} + - AlchemicalNetwork-034843fe04133cac65ec83f9b73fc191 jacs_set-p38: ligand_start=2s, ligand_final=none, solvent=none, cofactors=none, protein={'unknown'} + - AlchemicalNetwork-034843fe04133cac65ec83f9b73fc191 jacs_set-p38: ligand_start=2t, ligand_final=none, solvent=none, cofactors=none, protein={'unknown'} + - AlchemicalNetwork-034843fe04133cac65ec83f9b73fc191 jacs_set-p38: ligand_start=2u, ligand_final=none, solvent=none, cofactors=none, protein={'unknown'} + - AlchemicalNetwork-034843fe04133cac65ec83f9b73fc191 jacs_set-p38: ligand_start=2v, ligand_final=none, solvent=none, cofactors=none, protein={'unknown'} + - AlchemicalNetwork-034843fe04133cac65ec83f9b73fc191 jacs_set-p38: ligand_start=2x, ligand_final=none, solvent=none, cofactors=none, protein={'unknown'} + - AlchemicalNetwork-034843fe04133cac65ec83f9b73fc191 jacs_set-p38: ligand_start=2y, ligand_final=none, solvent=none, cofactors=none, protein={'unknown'} + - AlchemicalNetwork-034843fe04133cac65ec83f9b73fc191 jacs_set-p38: ligand_start=2z, ligand_final=none, solvent=none, cofactors=none, protein={'unknown'} + - AlchemicalNetwork-034843fe04133cac65ec83f9b73fc191 jacs_set-p38: ligand_start=3fln, ligand_final=none, solvent=none, cofactors=none, protein={'unknown'} + - AlchemicalNetwork-034843fe04133cac65ec83f9b73fc191 jacs_set-p38: ligand_start=3flq, ligand_final=none, solvent=none, cofactors=none, protein={'unknown'} + - AlchemicalNetwork-034843fe04133cac65ec83f9b73fc191 jacs_set-p38: ligand_start=3flw, ligand_final=none, solvent=none, cofactors=none, protein={'unknown'} + - AlchemicalNetwork-034843fe04133cac65ec83f9b73fc191 jacs_set-p38: ligand_start=3fmh, ligand_final=none, solvent=none, cofactors=none, protein={'unknown'} + - AlchemicalNetwork-034843fe04133cac65ec83f9b73fc191 jacs_set-p38: ligand_start=3fmk, ligand_final=none, solvent=none, cofactors=none, protein={'unknown'} + - AlchemicalNetwork-ed08b22af2e51875d23b0283d2080252 jacs_set-tyk2: ligand_start=ejm_31, ligand_final=none, solvent=none, cofactors=none, protein={'unknown'} + - AlchemicalNetwork-ed08b22af2e51875d23b0283d2080252 jacs_set-tyk2: ligand_start=ejm_42, ligand_final=none, solvent=none, cofactors=none, protein={'unknown'} + - AlchemicalNetwork-ed08b22af2e51875d23b0283d2080252 jacs_set-tyk2: ligand_start=ejm_43, ligand_final=none, solvent=none, cofactors=none, protein={'unknown'} + - AlchemicalNetwork-ed08b22af2e51875d23b0283d2080252 jacs_set-tyk2: ligand_start=ejm_44, ligand_final=none, solvent=none, cofactors=none, protein={'unknown'} + - AlchemicalNetwork-ed08b22af2e51875d23b0283d2080252 jacs_set-tyk2: ligand_start=ejm_47, ligand_final=none, solvent=none, cofactors=none, protein={'unknown'} + - AlchemicalNetwork-ed08b22af2e51875d23b0283d2080252 jacs_set-tyk2: ligand_start=ejm_49, ligand_final=none, solvent=none, cofactors=none, protein={'unknown'} + - AlchemicalNetwork-ed08b22af2e51875d23b0283d2080252 jacs_set-tyk2: ligand_start=ejm_50, ligand_final=none, solvent=none, cofactors=none, protein={'unknown'} + - AlchemicalNetwork-ed08b22af2e51875d23b0283d2080252 jacs_set-tyk2: ligand_start=ejm_55, ligand_final=none, solvent=none, cofactors=none, protein={'unknown'} + - AlchemicalNetwork-ed08b22af2e51875d23b0283d2080252 jacs_set-tyk2: ligand_start=jmc_23, ligand_final=none, solvent=none, cofactors=none, protein={'unknown'} + - AlchemicalNetwork-ed08b22af2e51875d23b0283d2080252 jacs_set-tyk2: ligand_start=jmc_27, ligand_final=none, solvent=none, cofactors=none, protein={'unknown'} + - AlchemicalNetwork-ed08b22af2e51875d23b0283d2080252 jacs_set-tyk2: ligand_start=jmc_28, ligand_final=none, solvent=none, cofactors=none, protein={'unknown'} + - AlchemicalNetwork-ed08b22af2e51875d23b0283d2080252 jacs_set-tyk2: ligand_start=jmc_30, ligand_final=none, solvent=none, cofactors=none, protein={'unknown'} + diff --git a/openfe_benchmarks/scripts/prepare_metadata_submission.py b/openfe_benchmarks/scripts/prepare_metadata_submission.py index 51a98a9..c32d368 100644 --- a/openfe_benchmarks/scripts/prepare_metadata_submission.py +++ b/openfe_benchmarks/scripts/prepare_metadata_submission.py @@ -48,8 +48,9 @@ from __future__ import annotations +import os import argparse -import bz2 +import ast import glob as glob_module import json import re @@ -60,151 +61,278 @@ from datetime import date from pathlib import Path from typing import Any +import warnings +import pprint + +from gufe.archival import AlchemicalArchive +from gufe import AlchemicalNetwork +from gufe.transformations.transformation import Transformation from openfe_benchmarks.data import BenchmarkIndex +def _add_value_with_keys( + list_obj: list[tuple[Any, list[str]]], + value: Any, + keys: list[str], +) -> None: + for existing_value, existing_keys in list_obj: + if existing_value == value: + for key in keys: + if key not in existing_keys: + existing_keys.append(key) + return + list_obj.append((value, list(keys))) + + @dataclass class ProtocolSettingsInfo: """Container for protocol settings with source metadata.""" - settings: dict[str, str] - source_file: str - benchmark_set: str - benchmark_system: str - network_key: str + calculation_mode: str + protocol: str + full_protocol_settings: str + timestep: str + temperature: str + pressure: str + lambda_functions: str + small_molecule_forcefield: str + forcefields: str + partial_charges: str + lambda_windows: str = "" + lambda_schedule: str = "" + notes: str = "" + # for rbfe + equilibration_time: str | None = None + production_time: str | None = None + # for asfe + vacuum_equilibration_time: str | None = None + vacuum_production_time: str | None = None + solvent_equilibration_time: str | None = None + solvent_production_time: str | None = None + + def __eq__(self, other: Any) -> bool: + if not isinstance(other, ProtocolSettingsInfo): + return NotImplemented + return ( + self.calculation_mode == other.calculation_mode + and self.protocol == other.protocol + and self.notes == other.notes + and self.full_protocol_settings == other.full_protocol_settings + and self.timestep == other.timestep + and self.temperature == other.temperature + and self.pressure == other.pressure + and self.lambda_functions == other.lambda_functions + and self.lambda_windows == other.lambda_windows + and self.lambda_schedule == other.lambda_schedule + and self.small_molecule_forcefield == other.small_molecule_forcefield + and self.forcefields == other.forcefields + and self.partial_charges == other.partial_charges + and self.equilibration_time == other.equilibration_time + and self.production_time == other.production_time + and self.vacuum_equilibration_time == other.vacuum_equilibration_time + and self.vacuum_production_time == other.vacuum_production_time + and self.solvent_equilibration_time == other.solvent_equilibration_time + and self.solvent_production_time == other.solvent_production_time + ) @dataclass class SystemInfo: """Per-system information extracted from transformations.""" - system_group: str - system_name: str - n_transformations: int + benchmark_set: str + benchmark_system: str + calculation_mode: str + source_file: str + network_key: str ligands: set[str] = field(default_factory=set) proteins: set[str] = field(default_factory=set) - solutes: set[str] = field(default_factory=set) + cofactors: set[str] = field(default_factory=set) solvents: set[str] = field(default_factory=set) - max_atoms: int = 0 files: set[str] = field(default_factory=set) + openfe_version: list[tuple[str, list[str]]] = field(default_factory=list) + openmm_version: list[tuple[str, list[str]]] = field(default_factory=list) + openff_toolkit_version: list[tuple[str, list[str]]] = field(default_factory=list) + protocol_settings_list: list[tuple[ProtocolSettingsInfo, list[str]]] = field( + default_factory=list + ) + def make_key( + self, + network_key, + ligand_start, + cofactors, + solvent, + ligand_final=None, + protein=None, + ): + if self.calculation_mode == "rbfe": + return f"{network_key} {self.benchmark_set}-{self.benchmark_system}: ligand_start={ligand_start}, ligand_final={ligand_final}, solvent={solvent or 'none'}, cofactors={cofactors or 'none'}, protein={protein or 'none'}" + elif self.calculation_mode == "asfe": + if protein is not None or ligand_final is not None: + warnings.warn("ASFEs do not use final ligand or protein information.") + return f"{network_key} {self.benchmark_set}-{self.benchmark_system}: ligand_start={ligand_start}, solvent={solvent or 'none'}, cofactors={cofactors or 'none'}" + else: + raise ValueError( + "Set the calculation mode to a supported value: 'rbfe', 'asfe'" + ) -@dataclass -class AutoMetadata: - openfe_version: str = "" - openmm_version: str = "" - openff_toolkit_version: str = "" - forcefield: str = "" - partial_charges: str = "" - benchmark_data_set: str = "" - benchmark_system: str = "" - protocol_settings_list: list[ProtocolSettingsInfo] = field(default_factory=list) - system_info_list: list[SystemInfo] = field(default_factory=list) - + def add_version_setting(self, attribute, value, key): + """Add a version attribute to the appropriate list -def _require_ref_key(ref: dict[str, Any]) -> str: - key = ref.get(":gufe-key:") - if not key: - raise KeyError(f"Expected :gufe-key: reference, got: {ref}") - return key + Parameters + ---------- + attribute : str + Attribute of SystemInfo, one of openmm_version, openfe_version, or openff_toolkit_version + value : str + Version string + key : str + String representing the calculation run with this version + """ + _add_value_with_keys(getattr(self, attribute), value, [key]) + def add_protocol_settings(self, protocol_settings: ProtocolSettingsInfo, key): + """Add or update protocol settings with associated transformation key. -def _get_network_key( - archive_obj: dict[str, Any] | None, network_obj: dict[str, Any] | None -) -> str: - if archive_obj is not None: - network_ref = archive_obj.get("network") - if isinstance(network_ref, dict): - return _require_ref_key(network_ref) + Stores unique ProtocolSettingsInfo objects with a list of transformation keys + that use that protocol configuration. + """ + _add_value_with_keys(self.protocol_settings_list, protocol_settings, [key]) - if network_obj is not None: - name = network_obj.get("name") - if isinstance(name, str) and name: - return name - return "NA" - raise ValueError("Could not determine network key from input file") +@dataclass +class AutoMetadata: + calculation_mode: str = "" + network_key: str = "" + n_transformations: int = 0 + benchmark_sets_systems: list[tuple] = field(default_factory=list) + system_info_dict: dict[tuple, SystemInfo] = field(default_factory=dict) + openfe_version: list[tuple[str, list[str]]] = field(default_factory=list) + openmm_version: list[tuple[str, list[str]]] = field(default_factory=list) + openff_toolkit_version: list[tuple[str, list[str]]] = field(default_factory=list) + protocols: list[tuple[str, list[str]]] = field(default_factory=list) + forcefield: list[tuple[str, list[str]]] = field(default_factory=list) + small_molecule_force_field: list[tuple[str, list[str]]] = field( + default_factory=list + ) + partial_charges: list[tuple[str, list[str]]] = field(default_factory=list) + protocol_settings_list: list[tuple[ProtocolSettingsInfo, list[str]]] = field( + default_factory=list + ) + def update_from_system_info(self) -> None: + """Update aggregated fields from the contained SystemInfo entries.""" + self.benchmark_sets_systems = list(self.system_info_dict.keys()) + + self.openfe_version = [] + self.openmm_version = [] + self.openff_toolkit_version = [] + self.protocols = [] + self.forcefield = [] + self.small_molecule_force_field = [] + self.partial_charges = [] + self.protocol_settings_list = [] + + for system_info in self.system_info_dict.values(): + for version, keys in system_info.openfe_version: + _add_value_with_keys(self.openfe_version, version, keys) + for version, keys in system_info.openmm_version: + _add_value_with_keys(self.openmm_version, version, keys) + for version, keys in system_info.openff_toolkit_version: + _add_value_with_keys(self.openff_toolkit_version, version, keys) + + for protocol_settings, keys in system_info.protocol_settings_list: + if protocol_settings.protocol: + _add_value_with_keys( + self.protocols, protocol_settings.protocol, keys + ) + if protocol_settings.forcefields: + _add_value_with_keys( + self.forcefield, protocol_settings.forcefields, keys + ) + if protocol_settings.small_molecule_forcefield: + _add_value_with_keys( + self.small_molecule_force_field, + protocol_settings.small_molecule_forcefield, + keys, + ) + if protocol_settings.partial_charges: + _add_value_with_keys( + self.partial_charges, + protocol_settings.partial_charges, + keys, + ) -def _open_json_file(path: Path): - if path.suffix == ".bz2": - return bz2.open(path, "rt", encoding="utf-8") - return open(path, "rt", encoding="utf-8") + for existing_settings, existing_keys in self.protocol_settings_list: + if existing_settings == protocol_settings: + for key in keys: + if key not in existing_keys: + existing_keys.append(key) + break + else: + self.protocol_settings_list.append((protocol_settings, list(keys))) -def _load_token_table( +def _load_network( input_path: Path, -) -> tuple[dict[str, dict[str, Any]], dict[str, Any] | None, dict[str, Any] | None]: - with _open_json_file(input_path) as f: - token_table = json.load(f) - - if isinstance(token_table, dict): - token_table = list(token_table.items()) - - if not isinstance(token_table, list): - raise ValueError("Unsupported JSON token table format") - - by_key: dict[str, dict[str, Any]] = {} - archive_obj: dict[str, Any] | None = None - network_obj: dict[str, Any] | None = None - - for item in token_table: - if not (isinstance(item, list) and len(item) == 2): - continue - table_key, payload = item - if not isinstance(payload, dict): - continue +) -> AlchemicalNetwork | AlchemicalArchive: + try: + archive = AlchemicalArchive.from_json(file=input_path) + alchemical_network = archive.network + mode = "alchemicalarchive" + except Exception: + try: + alchemical_network = AlchemicalNetwork.from_json(file=input_path) + mode = "alchemicalnetwork" + except Exception: + raise ImportError( + f"Could not import file neither an AlchemicalArchive nor AlchemicalNetwork: {input_path}" + ) - gufe_key = payload.get(":gufe-key:") or table_key - if isinstance(gufe_key, str): - by_key[gufe_key] = payload + return alchemical_network, mode - qualname = payload.get("__qualname__") - if qualname == "AlchemicalArchive": - archive_obj = payload - elif qualname == "AlchemicalNetwork": - network_obj = payload - if archive_obj is None and network_obj is None: +def _get_network_key( + network_obj: AlchemicalArchive | AlchemicalNetwork, + mode: str, +) -> str: + if mode == "alchemicalarchive": + return network_obj.network.key + elif mode == "alchemicalnetwork": + return network_obj.key + else: raise ValueError( - "Could not find AlchemicalArchive or AlchemicalNetwork object in token table" + f"Network mode must be either 'alchemical network' or 'alchemicalarchive', not {mode}," ) - return by_key, archive_obj, network_obj - def _transformation_refs( - archive_obj: dict[str, Any] | None, network_obj: dict[str, Any] | None + network_obj: AlchemicalArchive | AlchemicalNetwork, + mode: str, ) -> list[Any]: - if archive_obj is not None: - return [ - item[0] - for item in archive_obj.get("transformation_results", []) - if isinstance(item, list) and len(item) == 2 - ] - if network_obj is not None: - edges = network_obj.get("edges") or [] - if isinstance(edges, list): - return edges - return [] + if mode == "alchemicalarchive": + return network_obj.transformation_results + elif mode == "alchemicalnetwork": + return network_obj.edges + else: + raise ValueError( + f"Network mode must be either 'alchemical network' or 'alchemicalarchive', not {mode}," + ) -def _detect_mode( - by_key: dict[str, dict[str, Any]], - archive_obj: dict[str, Any] | None, - network_obj: dict[str, Any] | None, +def _detect_calc_mode( + network_obj: AlchemicalArchive | AlchemicalNetwork, + mode: str, ) -> str: names: list[str] = [] - for transformation_ref in _transformation_refs(archive_obj, network_obj)[:20]: - transformation = by_key[_require_ref_key(transformation_ref)] - name = str(transformation.get("name") or "") - if name: - names.append(name) - if any(n.startswith("complex_") or n.startswith("solvent_") for n in names): - return "rbfe" - return "asfe" + names = [trans.name for trans in _transformation_refs(network_obj, mode)] + if names: + # !!!! NoteHere !!! what is hard coded to detect calculation type? + if any(n.startswith("complex_") or n.startswith("solvent_") for n in names): + return "rbfe" + return "asfe" def _slugify(value: str) -> str: @@ -217,8 +345,7 @@ def _default_submission_id(network_key: str) -> str: def _generate_title( mode: str, - benchmark_sets: list[str], - systems: list[str], + benchmark_set_systems: list[tuple(str, str)], submission_id: str, ) -> str: """ @@ -231,30 +358,27 @@ def _generate_title( - 4+ sets: "OpenFE RBFE - Multi-set Benchmark (N sets, M systems) - submission_id" """ mode = mode.upper() - n_sets = len(benchmark_sets) - n_systems = len(systems) + n_set_systems = len(benchmark_set_systems) - if n_sets == 0: + if n_set_systems == 0: # Fallback if no benchmark set detected return f"OpenFE {mode} Benchmark - {submission_id}" - if n_sets == 1: - set_name = benchmark_sets[0] - if n_systems <= 3: + if len(set([x[0] for x in benchmark_set_systems])) == 1: + set_name = benchmark_set_systems[0][0] + if n_set_systems <= 3: # List system names - systems_str = ", ".join(systems) + systems_str = ", ".join([x[1] for x in benchmark_set_systems]) return f"OpenFE {mode} - {set_name} - {systems_str} - {submission_id}" else: # Use count - return f"OpenFE {mode} - {set_name} ({n_systems} systems) - {submission_id}" + return f"OpenFE {mode} - {set_name} ({n_set_systems} systems) - {submission_id}" - if n_sets <= 3: - # List set names with system count - sets_str = ", ".join(benchmark_sets) - return f"OpenFE {mode} - {sets_str} ({n_systems} systems) - {submission_id}" + if n_set_systems <= 3: + return f"OpenFE {mode} - {', '.join([f'{x}/{y}' for x, y in benchmark_set_systems])} - {submission_id}" # Many sets - use multi-set notation - return f"OpenFE {mode} - Multi-set Benchmark ({n_sets} sets, {n_systems} systems) - {submission_id}" + return f"OpenFE {mode} - Multi-set Benchmark ({len(set([x[0] for x in benchmark_set_systems]))} sets, {len(set([x[1] for x in benchmark_set_systems]))} systems) - {submission_id}" def _iter_nested_items(obj: Any) -> list[tuple[str, Any]]: @@ -275,82 +399,31 @@ def _quantity_to_text(value: Any) -> str: return str(value) -def _extract_system_info_from_mapping( - by_key: dict[str, dict[str, Any]], transformation_ref: Any -) -> tuple[str, str]: - """ - Extract system_group and system_name from LigandAtomMapping annotations. - - Returns: - (system_group, system_name) tuple, or ("", "") if not found - """ - transformation = by_key.get(_require_ref_key(transformation_ref), {}) - mapping_ref = transformation.get("mapping") - if not mapping_ref: - return ("", "") - - mapping = by_key.get(_require_ref_key(mapping_ref), {}) - if mapping.get("__qualname__") != "LigandAtomMapping": - return ("", "") - - annotations = mapping.get("annotations") - if isinstance(annotations, str): - try: - annotations = json.loads(annotations) - except json.JSONDecodeError: - return ("", "") - - if not isinstance(annotations, dict): - return ("", "") - - system_group = annotations.get("system_group", "") - system_name = annotations.get("system_name", "") - return (str(system_group), str(system_name)) - - def _infer_benchmark_data_set_system( - *, by_key: dict[str, dict[str, Any]], mode: str, archive_stem: str, network_key: str + trans: Transformation, ) -> tuple[str, str]: - """Infer benchmark set and system from file contents using BenchmarkIndex. + """Infer benchmark set and system from Transformation contents using BenchmarkIndex. - This searches for any known benchmark set or system name in the file's - metadata (filename, network key, and JSON contents). + This searches for any known benchmark set or system name in the transformation mapping metadata. Returns: (benchmark_set, system_name) tuple, or ("", "") if not found """ - blob = json.dumps(list(by_key.values())).lower() - search_space = " ".join([blob, archive_stem.lower(), network_key.lower()]) - - # Get all known benchmark sets and systems from the index - index = BenchmarkIndex() - benchmark_sets = index.list_benchmark_sets() - - # Check for benchmark set matches - benchmark_set = "" - for set_name in benchmark_sets: - if set_name.lower() in search_space: - benchmark_set = set_name - break - - # Check for system name matches within the found set (or all sets if no set found) - system = "" - sets_to_check = [benchmark_set] if benchmark_set else benchmark_sets - - for set_name in sets_to_check: - try: - systems = index.list_systems_by_benchmark_set(set_name) - for system_name in systems: - if system_name.lower() in search_space: - system = system_name - if not benchmark_set: - benchmark_set = set_name - break - if system: - break - except ValueError: - # Skip if benchmark set doesn't exist - continue + benchmark_set = trans.mapping.annotations.get("system_group", None) + system = trans.mapping.annotations.get("system_name", None) + + if benchmark_set is None and system is not None: + # Get all known benchmark sets and systems from the index + index = BenchmarkIndex() + benchmark_sets_systems = index.list_systems_by_tag() + benchmark_set = [x[0] for x in benchmark_sets_systems if x[1] == system] + if benchmark_set: + benchmark_set = benchmark_set[0] # just take the first one + + if system is None or benchmark_set is None: + raise ValueError( + f"Benchmark set / system could not be found for the transformation, {trans.name}. The following was found: {benchmark_set} / {system}. See valid combinations with `index = ofebm.BenchmarkIndex(); index.list_systems_by_tag()`" + ) return benchmark_set, system @@ -365,22 +438,22 @@ def _extract_sim_times(settings_block: dict[str, Any]) -> tuple[str, str]: ) if production is not None else "" -def _build_protocol_settings( - protocol_obj: dict[str, Any] | None, mode: str -) -> dict[str, str]: +def _build_protocol_settings(protocol_obj, calc_mode) -> dict[str, str]: if not protocol_obj: return { "protocol": "unknown", - "notes": "Protocol settings unavailable in archive payload.", + "notes": "Protocol settings unavailable in archive.", } # Detect protocol name from the object - protocol_name = ( - protocol_obj.get("__qualname__") or protocol_obj.get("qualname") or "unknown" - ) - out: dict[str, str] = {"protocol": protocol_name} + protocol_name = str(type(protocol_obj)).rstrip("'>").split(".")[-1] + out: dict[str, str] = {"protocol": protocol_name, "calculation_mode": calc_mode} + settings = protocol_obj.settings.model_dump() - settings = protocol_obj.get("settings") or {} + if not settings: + out["notes"] = "Protocol class found, but detailed settings were unavailable." + else: + out["full_protocol_settings"] = pprint.pformat(settings) integrator_settings = settings.get("integrator_settings") or {} if isinstance(integrator_settings, dict): @@ -399,21 +472,49 @@ def _build_protocol_settings( lambda_settings = settings.get("lambda_settings") or {} if isinstance(lambda_settings, dict): + out["lambda_functions"] = lambda_settings.get("lambda_functions", "") lambda_windows = lambda_settings.get("lambda_windows") if lambda_windows is not None: out["lambda_windows"] = str(lambda_windows) else: lambda_counts: list[str] = [] - for lambda_key in ("lambda_elec", "lambda_vdw", "lambda_restraints"): - values = lambda_settings.get(lambda_key) - if isinstance(values, list): - lambda_counts.append(f"{lambda_key}:{len(values)}") + for lambda_key, values in lambda_settings.items(): + if lambda_key not in ["lambda_functions", "lambda_windows"]: + continue + lambda_counts.append(f"{lambda_key}:{len(values)}") if lambda_counts: out["lambda_schedule"] = ", ".join(lambda_counts) + forcefield_settings = ( + settings.get("forcefield_settings") + or settings.get("solvent_forcefield_settings") + or settings.get("vacuum_forcefield_settings") + or {} + ) + if forcefield_settings: + out["small_molecule_forcefield"] = str( + forcefield_settings.get("small_molecule_forcefield") or "" + ) + out["forcefields"] = "" + ffs = forcefield_settings.get("forcefields") + if isinstance(ffs, list) and ffs: + out["forcefields"] = ";".join(set([os.path.splitext(ff)[0] for ff in ffs])) + + partial_charge_settings = settings.get("partial_charge_settings") or {} + if partial_charge_settings: + method = partial_charge_settings.get("partial_charge_method") + nagl_model = partial_charge_settings.get("nagl_model") + if not method: + raise ValueError("Protocol does not have a parital charge method defined.") + + if method and nagl_model: + out["partial_charges"] = f"{method} ({nagl_model})" + elif method: + out["partial_charges"] = str(method) + # Protocol-specific handling: RBFE typically has a single simulation block; # ASFE commonly has separate vacuum and solvent simulation settings. - if mode == "rbfe": + if calc_mode == "rbfe": sim = settings.get("simulation_settings") or {} if isinstance(sim, dict): eq, prod = _extract_sim_times(sim) @@ -421,7 +522,7 @@ def _build_protocol_settings( out["equilibration_time"] = eq if prod: out["production_time"] = prod - elif mode == "asfe": + elif calc_mode == "asfe": for prefix, key in ( ("vacuum", "vacuum_simulation_settings"), ("solvent", "solvent_simulation_settings"), @@ -436,100 +537,13 @@ def _build_protocol_settings( out[f"{prefix}_production_time"] = prod else: ValueError( - f"Calculation type {mode} is not yet supported. Add capability to `_build_protocol_settings`" + f"Calculation type {calc_mode} is not yet supported. Add capability to `_build_protocol_settings`" ) - if len(out) == 1: - out["notes"] = "Protocol class found, but detailed settings were unavailable." - return out -def _render_protocol_settings_yaml( - protocol_settings_list: list[ProtocolSettingsInfo], -) -> str: - """Render protocol settings as YAML, grouping by unique setting combinations.""" - if not protocol_settings_list: - return "protocol_settings:\n - protocol: unknown\n notes: Protocol settings unavailable." - - lines = ["protocol_settings:"] - - # Group identical protocol settings and collect source metadata - settings_groups: dict[str, list[ProtocolSettingsInfo]] = defaultdict(list) - - for info in protocol_settings_list: - # Create a hashable key from the settings only - settings_key = json.dumps(info.settings, sort_keys=True) - settings_groups[settings_key].append(info) - - preferred_order = [ - "production_time", - "equilibration_time", - "vacuum_production_time", - "vacuum_equilibration_time", - "solvent_production_time", - "solvent_equilibration_time", - "timestep", - "temperature", - "pressure", - "lambda_windows", - "lambda_schedule", - "notes", - ] - - for _, (settings_key, info_list) in enumerate(settings_groups.items()): - settings = info_list[0].settings - protocol_name = settings.get("protocol", "") - lines.append(f" - protocol: {protocol_name}") - - # Add count and source information - if len(protocol_settings_list) > 1: - lines.append(f" count: {len(info_list)}") - - # Collect unique benchmark systems - systems = sorted( - set( - f"{info.benchmark_set}/{info.benchmark_system}" - for info in info_list - if info.benchmark_set or info.benchmark_system - ) - ) - if systems: - systems_str = ", ".join(systems) - lines.append(f" systems: {json.dumps(systems_str)}") - - # Collect source files (just filenames, not full paths) - files = sorted(set(Path(info.source_file).name for info in info_list)) - if files and len(files) <= 5: # Only show if not too many - files_str = ", ".join(files) - lines.append(f" files: {json.dumps(files_str)}") - elif files: - lines.append(f" files: {json.dumps(f'{len(files)} files')}") - - # Add protocol settings - for key in preferred_order: - if key in settings: - lines.append(f" {key}: {json.dumps(str(settings[key]))}") - - for key in sorted( - k for k in settings if k not in set(preferred_order) | {"protocol"} - ): - lines.append(f" {key}: {json.dumps(str(settings[key]))}") - - return "\n".join(lines) - - -def _resolve_payload( - by_key: dict[str, dict[str, Any]], ref: Any -) -> tuple[str | None, dict[str, Any] | None]: - if isinstance(ref, dict): - ref_key = ref.get(":gufe-key:") - if isinstance(ref_key, str): - return ref_key, by_key.get(ref_key) - return None, None - - -def _component_name(component: dict[str, Any], component_key: str) -> str: +def _component_name(component) -> str: molprops = component.get("molprops") or {} if isinstance(molprops, dict): ofe_name = molprops.get("ofe-name") @@ -545,172 +559,228 @@ def _component_name(component: dict[str, Any], component_key: str) -> str: if component.get("solvent_molecule"): return str(component.get("solvent_molecule")) - return component_key + return "unknown" -def _component_atoms(component: dict[str, Any]) -> int: - atoms = component.get("atoms") - if isinstance(atoms, list): - return len(atoms) - return 0 +def _get_system_info(trans, calc_mode) -> dict[str, set | list]: + # Per-system tracking + for state_key in ("stateA", "stateB"): + chemical_system = getattr(trans, state_key) + if not chemical_system: + continue + solvents = set() + proteins = set() + ligands = list() + cofactors = set() + for label, component in chemical_system.components.items(): + qualname = str(type(component)).rstrip("'>").split(".")[-1] + + component = component.to_dict() + comp_name = _component_name(component) + + if calc_mode == "asfe": + if "solvent" in label or "solventcomponent" in qualname.lower(): + solvents.add(comp_name) + elif "solute" in label or qualname == "SmallMoleculeComponent": + ligands.append(comp_name) + elif calc_mode == "rbfe": + if "protein" in label or qualname == "ProteinComponent": + proteins.add(comp_name) + elif "ligand" in label: + if comp_name not in ligands: + ligands.append(comp_name) + elif "cofactor" in label: + cofactors.add(comp_name) + elif qualname == "SmallMoleculeComponent" and "solvent" not in label: + # Non-solvent small molecules that are not explicit ligands are treated as cofactors. + cofactors.add(comp_name) + else: + ValueError( + f"Calculation type {calc_mode} is not yet supported. Add capability to `_build_content_summary`" + ) -def _build_content_summary( - by_key: dict[str, dict[str, Any]], - archive_objs: list[dict[str, Any]], - network_objs: list[dict[str, Any]], - mode: str, - benchmark_data_set: str, - forcefield: str, - partial_charges: str, - used_alchemiscale: bool = True, - source_files: list[str] = None, -) -> tuple[str, list[SystemInfo]]: - """ - Build content summary and extract per-system information. + return { + "solvents": solvents, + "proteins": proteins, + "ligands": ligands, + "cofactors": cofactors, + } - Parameters - ---------- - archive_objs: List of AlchemicalArchive objects - network_objs: List of AlchemicalNetwork objects - Returns: - (summary_text, list of SystemInfo objects) - """ - if source_files is None: - source_files = [] - - repeat_counts: list[int] = [] - transformation_refs: list[Any] = [] - - # Collect transformation refs from all archives and networks - for archive_obj in archive_objs: - transformation_results = archive_obj.get("transformation_results", []) - trans_refs = [ - item[0] - for item in transformation_results - if isinstance(item, list) and len(item) == 2 - ] - transformation_refs.extend(trans_refs) - repeat_counts.extend( - [ - len(item[1]) - for item in transformation_results - if isinstance(item, list) and len(item) == 2 - ] +def _extract_auto_metadata( + network_obj: AlchemicalArchive | AlchemicalNetwork, + network_mode: str, + source_file: str, +) -> AutoMetadata: + metadata = AutoMetadata() + metadata.network_key = _get_network_key(network_obj, network_mode) + metadata.calculation_mode = _detect_calc_mode(network_obj, network_mode) + + transformations = _transformation_refs(network_obj, network_mode) + metadata.n_transformations = len(transformations) + for trans in transformations: + benchmark_set_system = _infer_benchmark_data_set_system(trans) + if benchmark_set_system not in metadata.system_info_dict: + metadata.system_info_dict[benchmark_set_system] = SystemInfo( + *benchmark_set_system, + metadata.calculation_mode, + source_file, + metadata.network_key, + ) + + system_info = _get_system_info(trans, metadata.calculation_mode) + for key, value in system_info.items(): + current_attr = getattr(metadata.system_info_dict[benchmark_set_system], key) + if isinstance(current_attr, set): + current_attr.update(value) + elif isinstance(current_attr, list): + current_attr.extend(value) + else: + raise TypeError( + f"Unsupported system_info attribute type for {key}: {type(current_attr)}" + ) + + if metadata.calculation_mode == "rbfe": + if len(system_info["ligands"]) == 0 or len(system_info["ligands"]) > 2: + raise ValueError( + f"Transformation detects a count other than one or two ligands: network_key={metadata.network_key}, set/system: {benchmark_set_system}, transformation: {trans.name}, ligands: {system_info['ligands']}" + ) + ligand_start = system_info["ligands"][0] + ligand_final = ( + system_info["ligands"][1] if len(system_info["ligands"]) > 1 else "none" + ) + elif metadata.calculation_mode == "asfe": + if len(system_info["ligands"]) < 1 or len(system_info["ligands"]) > 1: + raise ValueError( + f"Transformation detects a count other than one ligand: network_key={metadata.network_key}, set/system: {benchmark_set_system}, transformation: {trans.name}, ligands: {system_info['ligands']}" + ) + ligand_start = system_info["ligands"][0] + ligand_final = "none" + + key = metadata.system_info_dict[benchmark_set_system].make_key( + metadata.network_key, + ligand_start, + system_info["cofactors"], + system_info["solvents"], + ligand_final=ligand_final, + protein=system_info["proteins"], ) - for network_obj in network_objs: - edges = network_obj.get("edges") or [] - if isinstance(edges, list): - transformation_refs.extend(edges) + protocol_info = ProtocolSettingsInfo( + **_build_protocol_settings(trans.protocol, metadata.calculation_mode) + ) + metadata.system_info_dict[benchmark_set_system].add_protocol_settings( + protocol_info, key + ) - # Per-system tracking - system_data: dict[ - tuple[str, str], SystemInfo - ] = {} # key: (system_group, system_name) + annotations = trans.mapping.annotations + for key, value in annotations.items(): + if "openmm" in key: + metadata.system_info_dict[benchmark_set_system].add_version_setting( + "openmm_version", value, key + ) + if "openfe" in key: + metadata.system_info_dict[benchmark_set_system].add_version_setting( + "openfe_version", value, key + ) + if "openff" in key and "toolkit" in key: + metadata.system_info_dict[benchmark_set_system].add_version_setting( + "openff_toolkit_version", value, key + ) - visited_systems_for_cofactors: set[str] = set() - all_cofactors: set[str] = set() - systems_with_cofactors: set[str] = set() + metadata.update_from_system_info() - transformation_count = len(transformation_refs) + return metadata - for transformation_ref in transformation_refs: - _, transformation = _resolve_payload(by_key, transformation_ref) - if not transformation: - continue - # Extract system info from mapping annotations - system_group, system_name = _extract_system_info_from_mapping( - by_key, transformation_ref - ) +def _normalize_partial_charge_info(partial_charges: str) -> str: + value = partial_charges.strip() + if not value: + return "" - # Initialize SystemInfo if first time seeing this system - system_key = (system_group, system_name) - if system_key not in system_data: - system_data[system_key] = SystemInfo( - system_group=system_group, - system_name=system_name, - n_transformations=0, - ) + lower = value.lower() - system_info = system_data[system_key] - system_info.n_transformations += 1 + # Canonical openfe-benchmarks style: nagl_.pt + model_match = re.search( + r"(openff-gnn-am1bcc-[0-9.]+\.0\.pt|openff-gnn-am1bcc-[0-9.]+\.pt)", lower + ) + if "nagl" in lower and model_match: + model = model_match.group(1) + return f"nagl_{model}" - for state_key in ("stateA", "stateB"): - cs_key, chemical_system = _resolve_payload( - by_key, transformation.get(state_key) - ) - if not chemical_system: - continue + if lower in {"am1bcc", "am1-bcc"}: + return "am1bcc" - components = chemical_system.get("components") or {} - if not isinstance(components, dict): - continue + normalized = re.sub(r"[^a-z0-9._-]+", "_", lower).strip("_") + return normalized - system_atoms = 0 - local_cofactors: set[str] = set() - for label, component_ref in components.items(): - comp_key, component = _resolve_payload(by_key, component_ref) - if not component: - continue +def _make_tags( + *, + mode: str, + forcefield: list[tuple], + partial_charge_tag: list[tuple], + user_keywords: list[str], +) -> list[str]: + tags: list[str] = [] + tags.append(mode) + if forcefield: + tags.extend( + list(set(ff for ff_set, _ in forcefield for ff in ff_set.split("/"))) + ) + if partial_charge_tag: + tags.extend(list(set(x[0] for x in partial_charge_tag))) + tags.extend(user_keywords) - label_l = str(label).lower() - qualname = str(component.get("__qualname__") or "") - comp_name = _component_name(component, comp_key or "unknown") - comp_atoms = _component_atoms(component) - system_atoms += comp_atoms - - if mode == "asfe": - if "solvent" in label_l or "solventcomponent" in qualname.lower(): - system_info.solvents.add(comp_name) - elif "solute" in label_l or qualname == "SmallMoleculeComponent": - system_info.solutes.add(comp_name) - elif mode == "rbfe": - if "protein" in label_l or qualname == "ProteinComponent": - system_info.proteins.add(comp_name) - elif "ligand" in label_l: - system_info.ligands.add(comp_name) - elif "cofactor" in label_l: - all_cofactors.add(comp_name) - local_cofactors.add(comp_name) - elif ( - qualname == "SmallMoleculeComponent" - and "solvent" not in label_l - ): - # Non-solvent small molecules that are not explicit ligands are treated as cofactors. - all_cofactors.add(comp_name) - local_cofactors.add(comp_name) - else: - ValueError( - f"Calculation type {mode} is not yet supported. Add capability to `_build_content_summary`" - ) + # Deduplicate while preserving order. + out: list[str] = [] + seen: set[str] = set() + for tag in tags: + t = tag.strip() + if not t or t in seen: + continue + seen.add(t) + out.append(t) + return out + + +def _yaml_block(text: str, indent_spaces: int = 2) -> str: + indent = " " * indent_spaces + lines = text.splitlines() or [""] + return "\n".join(f"{indent}{line}" for line in lines) + + +def _build_content_summary( + metadata: AutoMetadata, + used_alchemiscale: bool = True, +) -> str: + """ + Build content summary and extract per-system information. - system_info.max_atoms = max(system_info.max_atoms, system_atoms) + Parameters + ---------- + metadata : AutoMetadata + Consilidated - if ( - mode == "rbfe" - and cs_key - and cs_key not in visited_systems_for_cofactors - ): - visited_systems_for_cofactors.add(cs_key) - if local_cofactors: - systems_with_cofactors.add(cs_key) + Returns: + (summary_text, list of SystemInfo objects) + """ - # Add source files to system_info - for system_info in system_data.values(): - system_info.files = set(source_files) + field_info = "/".join( + set(ff for ff_set, _ in metadata.forcefield for ff in ff_set.split("/")) + ) + if not field_info: + field_info = "an unspecified force field" - field_info = forcefield or "an unspecified force field" - charge_info = partial_charges or "unspecified partial charges" + charge_info = "/".join(x[0] for x in metadata.partial_charges) + if not field_info: + charge_info = "an unspecified partial charges" # Group systems by benchmark set for explicit listing sets_to_systems: dict[str, list[str]] = defaultdict(list) - for si in system_data.values(): - if si.system_group and si.system_name: - sets_to_systems[si.system_group].append(si.system_name) + for system_group, system_name in metadata.benchmark_sets_systems: + sets_to_systems[system_group].append(system_name) # Sort systems within each set for systems_list in sets_to_systems.values(): @@ -720,7 +790,7 @@ def _build_content_summary( # Build descriptive subject line if len(unique_sets) == 0: - subject = benchmark_data_set or "benchmark" + subject = "benchmark" elif len(unique_sets) == 1: subject = unique_sets[0] else: @@ -736,251 +806,341 @@ def _build_content_summary( elif len(unique_sets) == 1: systems_desc = ", ".join(sets_to_systems[unique_sets[0]]) else: - systems_desc = f"{len(system_data)} systems" + systems_desc = f"{len(metadata.benchmark_sets_systems)} systems" # Count totals across all systems - total_ligands = sum(len(si.ligands) for si in system_data.values()) - total_proteins = sum(len(si.proteins) for si in system_data.values()) - total_solutes = sum(len(si.solutes) for si in system_data.values()) - total_solvents = sum(len(si.solvents) for si in system_data.values()) - max_atoms_overall = max((si.max_atoms for si in system_data.values()), default=0) + all_structures = { + "ligands": set(), + "proteins": set(), + "cofactors": set(), + "solvents": set(), + } + systems_with_cofactors = [] + for si in metadata.system_info_dict.values(): + for key in all_structures.keys(): + all_structures[key].update(getattr(si, key)) + if si.cofactors: + systems_with_cofactors.append(f"{si.benchmark_set}/{si.benchmark_system}") # Build summary - if mode == "rbfe": - cofactor_list = ", ".join(sorted(all_cofactors)) if all_cofactors else "none" - if len(system_data) > 1: + if metadata.calculation_mode == "rbfe": + cofactor_list = ( + ", ".join(sorted(all_structures["cofactors"])) + if all_structures["cofactors"] + else "none" + ) + if len(unique_sets) > 1: summary_parts = [ f"This submission describes the {subject} RBFE benchmark ({systems_desc}) prepared with {field_info} and {charge_info}.", - f"The submission contains {transformation_count} transformations, {total_ligands} unique ligands, and {total_proteins} unique proteins.", + f"The submission contains {metadata.n_transformations} transformations, {len(all_structures['ligands'])} unique ligands, and {len(all_structures['proteins'])} unique proteins.", ] else: summary_parts = [ f"This submission describes the {subject} RBFE benchmark prepared with {field_info} and {charge_info}.", - f"The network contains {transformation_count} transformations across {total_ligands} unique ligands and {total_proteins} unique proteins.", + f"The network contains {metadata.n_transformations} transformations across {len(all_structures['ligands'])} unique ligands and {len(all_structures['proteins'])} unique proteins.", ] if systems_with_cofactors: summary_parts.append( f"{len(systems_with_cofactors)} systems include cofactors ({cofactor_list})." ) else: - if len(system_data) > 1: + if len(unique_sets) > 1: summary_parts = [ f"This submission describes the {subject} ASFE benchmark ({systems_desc}) prepared with {field_info} and {charge_info}.", - f"The submission contains {transformation_count} transformations, {total_solutes} unique solutes, and {total_solvents} unique solvents.", + f"The submission contains {metadata.n_transformations} transformations, {len(all_structures['ligands'])} unique solutes, and {len(all_structures['solvents'])} unique solvents.", ] else: summary_parts = [ f"This submission describes the {subject} ASFE benchmark prepared with {field_info} and {charge_info}.", - f"The archive contains {transformation_count} transformations across {total_solutes} unique solutes and {total_solvents} unique solvents.", + f"The archive contains {metadata.n_transformations} transformations across {len(all_structures['ligands'])} unique solutes and {len(all_structures['solvents'])} unique solvents.", ] - summary_parts.append( - f"The largest simulated chemical system contains {max_atoms_overall} atoms." - ) if used_alchemiscale: summary_parts.append( "Results are derived from archived Alchemiscale workflow data." ) summary_text = " ".join(summary_parts) - return textwrap.fill(summary_text, width=100), list(system_data.values()) + return textwrap.fill(summary_text, width=100) -def _extract_auto_metadata( - *, - by_key: dict[str, dict[str, Any]], - mode: str, - archive_path: Path, - network_key: str, - archive_stem: str, -) -> AutoMetadata: - metadata = AutoMetadata() - metadata.benchmark_data_set, metadata.benchmark_system = ( - _infer_benchmark_data_set_system( - by_key=by_key, - mode=mode, - archive_stem=archive_stem, - network_key=network_key, - ) +def _render_protocol_settings_yaml( + protocol_settings_list: list[tuple[ProtocolSettingsInfo, list[str]]], +) -> str: + """Take a list of alchemical protocols pairs with strings identifying systems that use it + + All keys in the ProtocolSettingsInfo class are listed except for ``full_protocol_settings``. + + If only one protocol is used, it is labeled as the submission protocol and the notes specify "Applies to all + systems". If more than one protocol is present, the protocol that represents the largest number of systems is + listed last and notes specify as "All remaining systems". The other protocols are listed with notes containing + the list of identifying strings. + + Parameters + ---------- + protocol_settings_list : list[tuple[ProtocolSettingsInfo, list[str]]] + List of unique protocol settings paired with system identifiers that use that protocol. + + Returns + ------- + str + Output yaml section + """ + if not protocol_settings_list: + return "protocol_settings: []\n" + + def _format_value(value: Any) -> str: + if value is None: + return "" + if isinstance(value, bool): + return "true" if value else "false" + if isinstance(value, (int, float)): + return str(value) + return json.dumps(str(value)) + + def _format_identifier(identifier: Any) -> str: + if identifier is None: + return "None" + if isinstance(identifier, str): + return identifier + if isinstance(identifier, (list, tuple, set)): + return ", ".join(str(item) for item in identifier) + return str(identifier) + + ordered_settings = sorted( + enumerate(protocol_settings_list), + key=lambda item: (len(item[1]), item[0]), + ) + + primary_index = max( + range(len(protocol_settings_list)), + key=lambda i: (len(protocol_settings_list[i][1]), -i), ) + primary_settings, _ = protocol_settings_list[primary_index] - protocol_obj: dict[str, Any] | None = None - - for payload in by_key.values(): - qualname = payload.get("__qualname__") - if qualname in { - "RelativeHybridTopologyProtocol", - "AbsoluteSolvationProtocol", - "ASFEProtocol", - }: - protocol_obj = payload - - settings = payload.get("settings") or {} - forcefield_settings = ( - settings.get("forcefield_settings") - or settings.get("solvent_forcefield_settings") - or settings.get("vacuum_forcefield_settings") - or {} + def _parse_full_protocol_settings(value: str) -> Any: + try: + return ast.literal_eval(value) + except Exception: + return None + + def _format_path(path: list[str]) -> str: + return ".".join(path) + + def _compare_full_protocol_settings( + base: ProtocolSettingsInfo, other: ProtocolSettingsInfo + ) -> list[tuple[str, Any, Any]]: + base_obj = _parse_full_protocol_settings(base.full_protocol_settings) + other_obj = _parse_full_protocol_settings(other.full_protocol_settings) + diffs: list[tuple[str, Any, Any]] = [] + + def recurse(path: list[str], a: Any, b: Any) -> None: + if type(a) is not type(b): + diffs.append((_format_path(path), a, b)) + return + if isinstance(a, dict): + for key in sorted(set(a) | set(b)): + if key not in a: + diffs.append((_format_path(path + [key]), None, b[key])) + elif key not in b: + diffs.append((_format_path(path + [key]), a[key], None)) + else: + recurse(path + [key], a[key], b[key]) + elif isinstance(a, list): + if a != b: + diffs.append((_format_path(path), a, b)) + else: + if a != b: + diffs.append((_format_path(path), a, b)) + + if isinstance(base_obj, dict) and isinstance(other_obj, dict): + recurse([], base_obj, other_obj) + return diffs + + def _full_protocol_setting_notes( + base: ProtocolSettingsInfo, other: ProtocolSettingsInfo + ) -> list[str]: + diffs = _compare_full_protocol_settings(base, other) + if not diffs: + return [] + notes: list[str] = [ + "Detailed protocol settings differ:", + ] + for path, base_value, other_value in diffs: + notes.append(f"- {path}: {base_value!r} -> {other_value!r}") + return notes + + output_lines = ["protocol_settings:"] + multiple_protocols = len(protocol_settings_list) > 1 + field_names = [ + "calculation_mode", + "protocol", + "timestep", + "temperature", + "pressure", + "forcefields", + "small_molecule_forcefield", + "partial_charges", + "equilibration_time", + "production_time", + "vacuum_equilibration_time", + "vacuum_production_time", + "solvent_equilibration_time", + "solvent_production_time", + "lambda_functions", + "lambda_windows", + "lambda_schedule", + "notes", + ] + + primary_order = [primary_index] + [ + idx for idx, _ in ordered_settings if idx != primary_index + ] + for index in primary_order: + protocol_settings, identifiers = protocol_settings_list[index] + is_primary = index == primary_index + sorted_ids = sorted(_format_identifier(item) for item in identifiers) + + if is_primary: + if multiple_protocols: + notes_lines = ["Applies to systems:"] + [ + f"- {item}" for item in sorted_ids + ] + notes = "\n".join(notes_lines) + notes_is_multiline = True + else: + notes = "Applies to all systems" + notes_is_multiline = False + else: + notes_lines = _full_protocol_setting_notes( + primary_settings, protocol_settings ) - if not metadata.forcefield and isinstance(forcefield_settings, dict): - metadata.forcefield = str( - forcefield_settings.get("small_molecule_forcefield") or "" + notes_lines += ["Applies to transformations:"] + [ + f"- {item}" for item in sorted_ids + ] + notes = "\n".join(notes_lines) + notes_is_multiline = True + + output_lines.append( + " - protocol: " + _format_value(protocol_settings.protocol) + ) + for field_name in field_names: + if field_name == "protocol": + continue + if field_name == "notes": + if notes_is_multiline: + output_lines.append(" notes: |") + for line in notes.splitlines(): + output_lines.append(" " + line) + else: + output_lines.append(" notes: " + _format_value(notes)) + continue + if is_primary: + output_lines.append( + f" {field_name}: {_format_value(getattr(protocol_settings, field_name))}" + ) + elif field_name == "protocol": + output_lines.append( + f" {field_name}: {_format_value(getattr(protocol_settings, field_name))}" ) - if not metadata.forcefield: - ffs = forcefield_settings.get("forcefields") - if isinstance(ffs, list) and ffs: - preferred = next( - ( - ff - for ff in ffs - if isinstance(ff, str) and "openff" in ff - ), - None, - ) - if preferred: - metadata.forcefield = str(preferred).replace(".offxml", "") - - partial_charge_settings = settings.get("partial_charge_settings") or {} - if not metadata.partial_charges and isinstance( - partial_charge_settings, dict - ): - method = partial_charge_settings.get("partial_charge_method") - nagl_model = partial_charge_settings.get("nagl_model") - if method and nagl_model: - metadata.partial_charges = f"{method} ({nagl_model})" - elif method: - metadata.partial_charges = str(method) - - for key, value in _iter_nested_items(payload): - if ( - not metadata.openmm_version - and key == "openmm_version" - and isinstance(value, str) - ): - metadata.openmm_version = value - if ( - not metadata.openfe_version - and key == "openfe_version" - and isinstance(value, str) - ): - metadata.openfe_version = value - if ( - not metadata.openff_toolkit_version - and key == "openff_toolkit_version" - and isinstance(value, str) - ): - metadata.openff_toolkit_version = value - - molprops = payload.get("molprops") - if isinstance(molprops, dict): - charge_provenance = molprops.get("charge_provenance") - if isinstance(charge_provenance, str): - try: - charge_provenance = json.loads(charge_provenance) - except json.JSONDecodeError: - charge_provenance = None - if isinstance(charge_provenance, dict): - if not metadata.openfe_version and isinstance( - charge_provenance.get("openfe_version"), str - ): - metadata.openfe_version = charge_provenance["openfe_version"] - if not metadata.openff_toolkit_version and isinstance( - charge_provenance.get("openff_toolkit_version"), str - ): - metadata.openff_toolkit_version = charge_provenance[ - "openff_toolkit_version" - ] - if not metadata.partial_charges: - charge_method = charge_provenance.get("charge_method") - nagl_model = charge_provenance.get("nagl_model") - if charge_method and nagl_model: - metadata.partial_charges = f"{charge_method} ({nagl_model})" - elif charge_method: - metadata.partial_charges = str(charge_method) - - # Build protocol settings for this archive with source metadata - protocol_settings = _build_protocol_settings(protocol_obj, mode) - protocol_info = ProtocolSettingsInfo( - settings=protocol_settings, - source_file=str(archive_path), - benchmark_set=metadata.benchmark_data_set, - benchmark_system=metadata.benchmark_system, - network_key=network_key, - ) - metadata.protocol_settings_list = [protocol_info] - return metadata + # For non-primary protocols, only protocol and notes are listed. + return "\n".join(output_lines) + "\n" -def _normalize_partial_charge_info(partial_charges: str) -> str: - value = partial_charges.strip() - if not value: - return "" - lower = value.lower() +def _render_keyed_values_yaml( + section_name: str, + value_keys: list[tuple[Any, list[str]]], + value_label: str = "value", + keys_label: str = "systems", +) -> str: + """Render simple value-with-systems metadata into YAML. - # Canonical openfe-benchmarks style: nagl_.pt - model_match = re.search( - r"(openff-gnn-am1bcc-[0-9.]+\.0\.pt|openff-gnn-am1bcc-[0-9.]+\.pt)", lower + Parameters + ---------- + section_name: + YAML section name. + value_keys: + List of (value, system identifiers) pairs. + value_label: + Label to use for the scalar value. + keys_label: + Label to use for the identifying system keys. + + Returns + ------- + str + Output yaml section. + """ + if not value_keys: + return f"{section_name}: []\n" + if len(value_keys) == 1: + return f"{section_name}: {json.dumps(str(value_keys[0][0]))}\n" + + ordered_settings = sorted( + enumerate(value_keys), + key=lambda item: (len(item[1]), item[0]), ) - if "nagl" in lower and model_match: - model = model_match.group(1) - return f"nagl_{model}" - if lower in {"am1bcc", "am1-bcc"}: - return "am1bcc" + lines = [f"{section_name}:"] + for i, (value, keys) in ordered_settings: + lines.append(f" - {value_label}: {json.dumps(str(value))}") + if i != len(ordered_settings) - 1: + if keys: + lines.append(f" {keys_label}:") + for key in sorted(keys): + lines.append(f" - {json.dumps(str(key))}") + else: + lines.append(f" {keys_label}:") + lines.append(" - All remaining") - normalized = re.sub(r"[^a-z0-9._-]+", "_", lower).strip("_") - return normalized + return "\n".join(lines) + "\n" -def _make_tags( - *, mode: str, forcefield: str, partial_charge_tag: str, user_keywords: list[str] -) -> list[str]: - tags: list[str] = [] - tags.append(mode) - if forcefield: - tags.append(forcefield) - if partial_charge_tag: - tags.append(partial_charge_tag) - tags.extend(user_keywords) +def _render_benchmark_system_yaml(system_info_dict: dict[tuple, SystemInfo]) -> str: + """Render a list of SystemInfo objects into a list of network keys and the benchmark systems they contain - # Deduplicate while preserving order. - out: list[str] = [] - seen: set[str] = set() - for tag in tags: - t = tag.strip() - if not t or t in seen: - continue - seen.add(t) - out.append(t) - return out + Parameters + ---------- + system_info_dict : dict[tuple, SystemInfo] + List of unique protocol settings paired with system identifiers that use that protocol. + Returns + ------- + str + Output yaml section + """ -def _yaml_block(text: str, indent_spaces: int = 2) -> str: - indent = " " * indent_spaces - lines = text.splitlines() or [""] - return "\n".join(f"{indent}{line}" for line in lines) + network_breakdown = defaultdict(lambda: defaultdict(str)) + for si in system_info_dict.values(): + network_breakdown[si.benchmark_set][si.benchmark_system] = si.network_key + + benchmark_yaml = """ +# BenchmarkData provenance (from openfe-benchmarks planning script) with associated network key +benchmark_data: + source_repository: https://github.com/OpenFreeEnergy/openfe-benchmarks +""" + + for benchmark_set in sorted(network_breakdown): + benchmark_yaml += f" {json.dumps(benchmark_set)}:\n" + for benchmark_system, network_key in sorted( + network_breakdown[benchmark_set].items() + ): + benchmark_yaml += f" {json.dumps(benchmark_system)}: {network_key}\n" + + return benchmark_yaml def _make_submission_yaml( - *, + metadata: AutoMetadata, submission_id: str, title: str, summary: str, tags: list[str], authors: list[str], - openfe_version: str, - openmm_version: str, - openff_toolkit_version: str, - forcefield: str, - partial_charges: str, - benchmark_data_set: str, - benchmark_system: str, archive_doi: str, archive_provider: str, license_name: str, - protocol_settings_list: list[ProtocolSettingsInfo], - network_key_to_systems: dict[str, list[str]], results_file: str, ) -> str: if not authors: @@ -988,15 +1148,25 @@ def _make_submission_yaml( tags_yaml = ", ".join(tags) authors_yaml = "\n".join(f" - name: {name}" for name in authors) - protocol_settings_yaml = _render_protocol_settings_yaml(protocol_settings_list) - - # Render network_key to systems mapping - network_keys_yaml = "" - if network_key_to_systems: - network_keys_yaml = "\n# Network keys to systems mapping\nnetwork_keys:\n" - for network_key in sorted(network_key_to_systems.keys()): - systems = ", ".join(network_key_to_systems[network_key]) - network_keys_yaml += f" {json.dumps(network_key)}: {json.dumps(systems)}\n" + protocol_settings_yaml = _render_protocol_settings_yaml( + metadata.protocol_settings_list + ) + benchmark_system_yaml = _render_benchmark_system_yaml(metadata.system_info_dict) + openfe_version_yaml = _render_keyed_values_yaml( + "openfe_version", metadata.openfe_version, "version", "systems" + ) + openmm_version_yaml = _render_keyed_values_yaml( + "openmm_version", metadata.openmm_version, "version", "systems" + ) + openff_toolkit_version_yaml = _render_keyed_values_yaml( + "openff_toolkit_version", metadata.openff_toolkit_version, "version", "systems" + ) + forcefield_yaml = _render_keyed_values_yaml( + "forcefield", metadata.forcefield, "forcefield", "systems" + ) + partial_charges_yaml = _render_keyed_values_yaml( + "partial_charges", metadata.partial_charges, "partial_charges", "systems" + ) return f"""# REQUIRED: unique, kebab-case identifier for this submission submission_id: {submission_id} @@ -1018,21 +1188,15 @@ def _make_submission_yaml( # REQUIRED: publication/submission date (ISO 8601) date: {date.today().isoformat()} -# REQUIRED: OpenFE/OpenMM/toolkit versions used to produce gathered reports -openfe_version: {openfe_version} -openmm_version: {openmm_version} -openff_toolkit_version: {openff_toolkit_version} +{openfe_version_yaml} +{openmm_version_yaml} +{openff_toolkit_version_yaml} # Recommended descriptors -forcefield: {forcefield} -partial_charges: {partial_charges} +{forcefield_yaml} +{partial_charges_yaml} +{benchmark_system_yaml} -# BenchmarkData provenance (from openfe-benchmarks planning script) -benchmark_data: - source_repository: https://github.com/OpenFreeEnergy/openfe-benchmarks - set: {benchmark_data_set} - system: {benchmark_system} -{network_keys_yaml} # REQUIRED: results file results: {results_file} @@ -1050,92 +1214,73 @@ def _make_submission_yaml( def _make_zenodo_description( - *, + metadata: AutoMetadata, + network_mode: str, title: str, archive_filename: str, mode: str, content_summary: str, - openfe_version: str, - openmm_version: str, - openff_toolkit_version: str, - forcefield: str, - partial_charges: str, - benchmark_data_set: str, - benchmark_system: str, license_name: str, - protocol_settings_list: list[ProtocolSettingsInfo], - has_archive_objects: bool, used_alchemiscale: bool, - system_info_list: list[SystemInfo] = None, - network_key_to_systems: dict[str, list[str]] = None, ) -> str: content_kind = "ASFE" if mode == "asfe" else "RBFE" - openmm_display = openmm_version or "" # Determine the source type for the overview text - if has_archive_objects: + if network_mode == "alchemicalarchive": source_description = "AlchemicalArchive" - else: + elif network_mode == "alchemicalnetwork": source_description = "AlchemicalNetwork" + else: + source_description = "OpenFE archive" # Build workflow description workflow_text = "OpenFE" if used_alchemiscale: workflow_text += " and Alchemiscale" - if system_info_list is None: - system_info_list = [] - - # Group protocol settings by unique combinations - settings_groups: dict[str, list[ProtocolSettingsInfo]] = defaultdict(list) - for info in protocol_settings_list: - settings_key = json.dumps(info.settings, sort_keys=True) - settings_groups[settings_key].append(info) - - # Build protocol settings section - protocol_blocks: list[str] = [] - for idx, (settings_key, info_list) in enumerate(settings_groups.items()): - protocol_settings = info_list[0].settings - - if len(settings_groups) > 1: - protocol_blocks.append(f"\n### Protocol Settings Group {idx + 1}") - - # Add system information - systems = sorted( - set( - f"{info.benchmark_set}/{info.benchmark_system}" - for info in info_list - if info.benchmark_set or info.benchmark_system - ) - ) - if systems: - protocol_blocks.append( - f"**Systems ({len(info_list)} networks):** {', '.join(systems)}" - ) - - protocol_lines = "\n".join( - [f"- {k}: {v}" for k, v in protocol_settings.items()] - ) - if not protocol_lines: - protocol_lines = "- notes: Protocol settings unavailable" - protocol_blocks.append(protocol_lines) - - protocol_section = "\n".join(protocol_blocks) - - # Per-system details section removed per user request - system_details_section = "" + protocol_settings_yaml = _render_protocol_settings_yaml( + metadata.protocol_settings_list + ) + benchmark_system_yaml = _render_benchmark_system_yaml(metadata.system_info_dict) + openfe_version_yaml = _render_keyed_values_yaml( + "openfe_version", metadata.openfe_version, "version", "systems" + ) + openmm_version_yaml = _render_keyed_values_yaml( + "openmm_version", metadata.openmm_version, "version", "systems" + ) + openff_toolkit_version_yaml = _render_keyed_values_yaml( + "openff_toolkit_version", + metadata.openff_toolkit_version, + "version", + "systems", + ) + forcefield_yaml = _render_keyed_values_yaml( + "forcefield", metadata.forcefield, "forcefield", "systems" + ) + partial_charges_yaml = _render_keyed_values_yaml( + "partial_charges", + metadata.partial_charges, + "partial_charges", + "systems", + ) # Build network keys to systems mapping section - # Only show for AlchemicalArchive files (network_key_to_systems will be populated) - # For AlchemicalNetwork files, the "name" field is not a meaningful network key network_keys_section = "" - if network_key_to_systems: + network_breakdown = defaultdict(list) + for si in metadata.system_info_dict.values(): + network_breakdown[si.network_key].append( + f"{si.benchmark_set}/{si.benchmark_system}" + ) + + if network_breakdown: network_keys_lines = [] - for network_key_item in sorted(network_key_to_systems.keys()): - systems = ", ".join(network_key_to_systems[network_key_item]) - network_keys_lines.append(f" - {network_key_item}: {systems}") - if network_keys_lines: - network_keys_section = "\n- network keys:\n" + "\n".join(network_keys_lines) + for network_key_item, systems in sorted(network_breakdown.items()): + network_keys_lines.append( + f" - {network_key_item}: {', '.join(sorted(set(systems)))}" + ) + network_keys_section = "## Alchemical Network Keys:\n" + "\n".join( + network_keys_lines + ) return f"""# {title} @@ -1149,30 +1294,24 @@ def _make_zenodo_description( ## Software versions -- openfe_version: {openfe_version} -- openmm_version: {openmm_display} -- openff_toolkit_version: {openff_toolkit_version} +{openfe_version_yaml} +{openmm_version_yaml} +{openff_toolkit_version_yaml} -## Recommended descriptors +{network_keys_section} -- forcefield: {forcefield} -- partial_charges: {partial_charges} +## Recommended descriptors -## BenchmarkData provenance +{forcefield_yaml} +{partial_charges_yaml} -- source_repository: https://github.com/OpenFreeEnergy/openfe-benchmarks -- benchmark_set: {benchmark_data_set} -- systems: {benchmark_system} +{benchmark_system_yaml} ## Protocol settings -{protocol_section} - -- archive file: {archive_filename}{network_keys_section} -{system_details_section} +{protocol_settings_yaml} ## Rights - - License: {license_name} """ @@ -1234,6 +1373,17 @@ def process_network( tuple[Path, Path] Paths to the generated `submission.yaml` and `zenodo_description.md`. """ + + out_dir = output_dir.resolve() + out_dir.mkdir(parents=True, exist_ok=True) + + # Check for required results file + results_path = out_dir / results_file + if not results_path.exists(): + raise FileNotFoundError( + f"Required file '{results_file}' not found in output directory: {out_dir}" + ) + # Normalize input to list and expand glob patterns if isinstance(input_files, str): # Glob pattern @@ -1255,220 +1405,86 @@ def process_network( if not resolved_path.exists(): raise FileNotFoundError(f"Input file not found: {resolved_path}") - out_dir = output_dir.resolve() - out_dir.mkdir(parents=True, exist_ok=True) - - # Check for required results file - results_path = out_dir / results_file - if not results_path.exists(): - raise FileNotFoundError( - f"Required file '{results_file}' not found in output directory: {out_dir}" - ) - # Process all input files and collect metadata - all_metadata: list[AutoMetadata] = [] - all_by_key: dict[str, dict[str, Any]] = {} - all_archive_objs: list[dict[str, Any]] = [] all_network_objs: list[dict[str, Any]] = [] - all_network_keys: list[str] = [] modes: set[str] = set() - + all_network_keys: list[str] = [] + all_metadata: list[AutoMetadata] = [] for input_path in input_paths: resolved_path = input_path.resolve() + network_obj, network_mode = _load_network(resolved_path) + all_network_objs.append(network_obj) - by_key, archive_obj, network_obj = _load_token_table(resolved_path) - mode = _detect_mode(by_key, archive_obj, network_obj) - modes.add(mode) - - network_key = _get_network_key(archive_obj, network_obj) - all_network_keys.append(network_key) - - archive_stem = resolved_path.name - for suffix in (".json.bz2", ".bz2", ".json"): - if archive_stem.endswith(suffix): - archive_stem = archive_stem[: -len(suffix)] - break - - metadata = _extract_auto_metadata( - by_key=by_key, - mode=mode, - archive_path=resolved_path, - network_key=network_key, - archive_stem=archive_stem, - ) + metadata = _extract_auto_metadata(network_obj, network_mode, str(resolved_path)) + modes.add(metadata.calculation_mode) + all_network_keys.append(metadata.network_key) all_metadata.append(metadata) - # Accumulate objects for summary generation - all_by_key.update(by_key) - if archive_obj: - all_archive_objs.append(archive_obj) - if network_obj: - all_network_objs.append(network_obj) - # Check consistency if len(modes) > 1: raise ValueError( f"Mixed modes detected across input files: {modes}. All files must be either ASFE or RBFE." ) - mode = modes.pop() # Merge metadata from all files merged_metadata = AutoMetadata() - - # Collect protocol settings from all files + merged_metadata.calculation_mode = mode for metadata in all_metadata: - merged_metadata.protocol_settings_list.extend(metadata.protocol_settings_list) + merged_metadata.n_transformations += metadata.n_transformations + if not merged_metadata.network_key: + merged_metadata.network_key = metadata.network_key + else: + merged_metadata.network_key += f", {metadata.network_key}" + merged_metadata.benchmark_sets_systems.extend(metadata.benchmark_sets_systems) # Use first non-empty value for scalar fields - if not merged_metadata.openfe_version and metadata.openfe_version: - merged_metadata.openfe_version = metadata.openfe_version - if not merged_metadata.openmm_version and metadata.openmm_version: - merged_metadata.openmm_version = metadata.openmm_version - if ( - not merged_metadata.openff_toolkit_version - and metadata.openff_toolkit_version + for key in [ + "openmm_version", + "openfe_version", + "openff_toolkit_version", + "forcefield", + "partial_charges", + "small_molecule_force_field", + "protocols", + "protocol_settings_list", + ]: + for value, keys in getattr(metadata, key): + _add_value_with_keys(getattr(merged_metadata, key), value, keys) + + if any( + [ + x in merged_metadata.system_info_dict + for x in metadata.system_info_dict.keys() + ] ): - merged_metadata.openff_toolkit_version = metadata.openff_toolkit_version - if not merged_metadata.forcefield and metadata.forcefield: - merged_metadata.forcefield = metadata.forcefield - if not merged_metadata.partial_charges and metadata.partial_charges: - merged_metadata.partial_charges = metadata.partial_charges - if not merged_metadata.benchmark_data_set and metadata.benchmark_data_set: - merged_metadata.benchmark_data_set = metadata.benchmark_data_set - if not merged_metadata.benchmark_system and metadata.benchmark_system: - merged_metadata.benchmark_system = metadata.benchmark_system - - # Use merged data for outputs - openfe_version = merged_metadata.openfe_version - openmm_version = merged_metadata.openmm_version - openff_toolkit_version = merged_metadata.openff_toolkit_version - forcefield = merged_metadata.forcefield - partial_charges_raw = merged_metadata.partial_charges - partial_charge_tag = _normalize_partial_charge_info(partial_charges_raw) - partial_charges = partial_charge_tag or partial_charges_raw + raise ValueError( + f"System is already documented: {[x for x in metadata.system_info_dict.keys() if x in merged_metadata.system_info_dict]}" + ) + + merged_metadata.system_info_dict.update(metadata.system_info_dict) # Build content summary from combined data # Get list of source file names - source_file_names = [p.name for p in input_paths] - - content_summary, system_info_list = _build_content_summary( - all_by_key, - all_archive_objs, - all_network_objs, - mode, - merged_metadata.benchmark_data_set, - forcefield, - partial_charges, + content_summary = _build_content_summary( + merged_metadata, used_alchemiscale, - source_file_names, ) # Append additional summary text if provided if summary_suffix: - content_summary = content_summary.rstrip() + " " + summary_suffix.strip() - - # Store system_info_list in merged_metadata - merged_metadata.system_info_list = system_info_list - - # Build network_key to systems mapping (only for AlchemicalArchive files) - # For AlchemicalNetwork files, the "name" field is not a meaningful network key - network_key_to_systems: dict[str, list[str]] = defaultdict(list) - has_archive_objects = len(all_archive_objs) > 0 - - # Override benchmark_data_set and benchmark_system from system_info_list if available - if system_info_list: - # Use system_group from system info (more reliable than string matching) - system_groups = set( - si.system_group for si in system_info_list if si.system_group - ) - if len(system_groups) == 1: - merged_metadata.benchmark_data_set = system_groups.pop() - elif len(system_groups) > 1: - # Multiple groups - list them - merged_metadata.benchmark_data_set = ", ".join(sorted(system_groups)) - - # List all system names - system_names = sorted( - set(si.system_name for si in system_info_list if si.system_name) + content_summary = textwrap.fill( + content_summary.rstrip() + " " + summary_suffix.strip(), width=100 ) - if system_names: - merged_metadata.benchmark_system = ", ".join(system_names) - - # Update protocol_settings_list with correct system info - # For each input file, extract system info from first transformation - file_to_system: dict[str, tuple[str, str]] = {} - - for idx, input_path in enumerate(input_paths): - by_key, archive_obj, network_obj = _load_token_table(input_path) - transformation_refs = _transformation_refs(archive_obj, network_obj) - if transformation_refs: - system_group, system_name = _extract_system_info_from_mapping( - by_key, transformation_refs[0] - ) - file_to_system[str(input_path)] = (system_group, system_name) - - # Build network_key to systems mapping only for AlchemicalArchive files - if has_archive_objects and archive_obj: - network_key = all_network_keys[idx] - system_path = ( - f"{system_group}/{system_name}" - if system_group and system_name - else system_name - ) - if ( - system_path - and system_path not in network_key_to_systems[network_key] - ): - network_key_to_systems[network_key].append(system_path) - - # Update each protocol settings info with correct system data - for protocol_info in merged_metadata.protocol_settings_list: - # protocol_info.source_file is the full path - if protocol_info.source_file in file_to_system: - group, name = file_to_system[protocol_info.source_file] - protocol_info.benchmark_set = group - protocol_info.benchmark_system = name - - # Extract these AFTER overriding from system_info_list - benchmark_data_set = merged_metadata.benchmark_data_set - benchmark_system = merged_metadata.benchmark_system - - # Calculate total transformations across all files - total_transformations = 0 - for archive_obj in all_archive_objs: - total_transformations += len(_transformation_refs(archive_obj, None)) - for network_obj in all_network_objs: - if not any( - net_obj == network_obj - for net_obj in [a.get("network") for a in all_archive_objs if a] - ): - total_transformations += len(_transformation_refs(None, network_obj)) - # Use primary network key for submission ID - primary_network_key = all_network_keys[0] if all_network_keys else "unknown" + sets_to_systems: dict[str, list[str]] = defaultdict(list) + for system_group, system_name in merged_metadata.benchmark_sets_systems: + sets_to_systems[system_group].append(system_name) # Generate a descriptive title - submission_id_str = submission_id or _default_submission_id(primary_network_key) - - # Extract unique benchmark sets and systems for title - unique_sets = [] - if system_info_list: - unique_sets = sorted( - set(si.system_group for si in system_info_list if si.system_group) - ) - if not unique_sets and benchmark_data_set: - unique_sets = [s.strip() for s in benchmark_data_set.split(",")] - - unique_systems = [] - if system_info_list: - unique_systems = sorted( - set(si.system_name for si in system_info_list if si.system_name) - ) - elif benchmark_system: - unique_systems = [s.strip() for s in benchmark_system.split(",")] + submission_id = submission_id or _default_submission_id("_".join(all_network_keys)) - title = _generate_title(mode, unique_sets, unique_systems, submission_id_str) + title = _generate_title(mode, merged_metadata.benchmark_sets_systems, submission_id) submission_yaml_filename = "submission.yaml" zenodo_description_filename = "zenodo_description.md" @@ -1476,33 +1492,24 @@ def process_network( submission_yaml_path = out_dir / submission_yaml_filename zenodo_description_path = out_dir / zenodo_description_filename - submission_id = submission_id or _default_submission_id(primary_network_key) tags_list = [k.strip() for k in tags.split(",") if k.strip()] tags_final = _make_tags( mode=mode, - forcefield=forcefield, - partial_charge_tag=partial_charges, + forcefield=merged_metadata.forcefield, + partial_charge_tag=merged_metadata.partial_charges, user_keywords=tags_list, ) submission_yaml_text = _make_submission_yaml( + merged_metadata, submission_id=submission_id, title=title, summary=content_summary, tags=tags_final, authors=author or [], - openfe_version=openfe_version, - openmm_version=openmm_version, - openff_toolkit_version=openff_toolkit_version, - forcefield=forcefield, - partial_charges=partial_charges, - benchmark_data_set=benchmark_data_set, - benchmark_system=benchmark_system, archive_doi="TODO add DOI", archive_provider="TODO add archive provider", license_name=license, - protocol_settings_list=merged_metadata.protocol_settings_list, - network_key_to_systems=network_key_to_systems, results_file=results_file, ) submission_yaml_path.write_text(submission_yaml_text) @@ -1511,23 +1518,14 @@ def process_network( archive_filenames = ", ".join(p.name for p in input_paths) zenodo_description_text = _make_zenodo_description( + merged_metadata, + network_mode=network_mode, title=title, archive_filename=archive_filenames, mode=mode, content_summary=content_summary, - openfe_version=openfe_version, - openmm_version=openmm_version, - openff_toolkit_version=openff_toolkit_version, - forcefield=forcefield, - partial_charges=partial_charges, - benchmark_data_set=benchmark_data_set, - benchmark_system=benchmark_system, license_name=license, - protocol_settings_list=merged_metadata.protocol_settings_list, - has_archive_objects=has_archive_objects, used_alchemiscale=used_alchemiscale, - system_info_list=system_info_list, - network_key_to_systems=network_key_to_systems, ) zenodo_description_path.write_text(zenodo_description_text) @@ -1664,23 +1662,18 @@ def main(): seen.add(f) unique_files.append(f) - try: - process_network( - input_files=unique_files, - output_dir=args.output_dir, - submission_id=args.submission_id, - tags=args.tags, - author=args.authors, - license=args.license, - used_alchemiscale=not args.no_alchemiscale, - summary_suffix=args.summary_suffix, - results_file=args.results_file, - ) - print("\n✓ Successfully generated submission metadata") - return 0 - except Exception as e: - print(f"\n✗ Error: {e}", file=sys.stderr) - return 1 + process_network( + input_files=unique_files, + output_dir=args.output_dir, + submission_id=args.submission_id, + tags=args.tags, + author=args.authors, + license=args.license, + used_alchemiscale=not args.no_alchemiscale, + summary_suffix=args.summary_suffix, + results_file=args.results_file, + ) + print("\n✓ Successfully generated submission metadata") if __name__ == "__main__": From aeb168fb25d0b7483aeb8884c0e68d427b7f258f Mon Sep 17 00:00:00 2001 From: jaclark5 Date: Fri, 12 Jun 2026 15:06:26 -0400 Subject: [PATCH 14/24] Update submission yaml generation --- .../submission.yaml | 142 +++------- .../scripts/prepare_metadata_submission.py | 249 +++++++++++------- 2 files changed, 194 insertions(+), 197 deletions(-) diff --git a/openfe_benchmarks/results/2026-03-18-openmm-840-qa-testing/submission.yaml b/openfe_benchmarks/results/2026-03-18-openmm-840-qa-testing/submission.yaml index 8b689d7..28133a8 100644 --- a/openfe_benchmarks/results/2026-03-18-openmm-840-qa-testing/submission.yaml +++ b/openfe_benchmarks/results/2026-03-18-openmm-840-qa-testing/submission.yaml @@ -8,33 +8,26 @@ title: OpenFE RBFE - Multi-set Benchmark (2 sets, 5 systems) - 2026-03-18-openmm summary: | This submission describes the charge_annihilation_set, jacs_set RBFE benchmark (charge_annihilation_set: egfr, irak4_s2, irak4_s3; jacs_set: p38, tyk2) prepared with - tip3p_HFE_multivalent;amber/amber/tip3p_standard;amber/ff14SB/phosaa10;amber and am1bcc. The - submission contains 160 transformations, 45 unique ligands, and 1 unique proteins. Note this means - the charge annihilation sets are not complete compared to what is in that system and should not be - compared to other complete runs due to the missing edges. + tip3p_HFE_multivalent/ff14SB/phosaa10/tip3p_standard and am1bcc_at. The submission contains 160 + edges, 45 unique ligands, and 1 unique proteins. Note this means the charge annihilation sets are + not complete compared to what is in that system and should not be compared to other complete runs + due to the missing edges. # REQUIRED: list of submission tags -tags: [rbfe, tip3p_HFE_multivalent;amber, amber, tip3p_standard;amber, ff14SB, phosaa10;amber, am1bcc, charge_change, benchmark, openfe, openmm-840] +tags: [rbfe, tip3p_HFE_multivalent, ff14SB, phosaa10, tip3p_standard, irak4_s3, egfr, p38, tyk2, charge_annihilation_set, irak4_s2, jacs_set, am1bcc_at, charge_change, benchmark, openfe, openmm-840] # REQUIRED: list of contributing authors (name, affiliation; ORCID optional) authors: - name: Josh Horton # REQUIRED: publication/submission date (ISO 8601) -date: 2026-06-11 - -openfe_version: [] - -openmm_version: [] - -openff_toolkit_version: [] - - -# Recommended descriptors -forcefield: "amber/phosaa10;amber/tip3p_HFE_multivalent;amber/tip3p_standard;amber/ff14SB" - -partial_charges: "am1bcc" - +date: 2026-06-12 +openfe_version: TODO +openmm_version: TODO +openff_toolkit_version: TODO +mapper: "KartografAtomMapper 1.2.0 (LSA)" +forcefield: ["ff14SB", "phosaa10", "tip3p_HFE_multivalent", "tip3p_standard"] +partial_charges: "am1bcc_at" # BenchmarkData provenance (from openfe-benchmarks planning script) with associated network key benchmark_data: @@ -53,8 +46,8 @@ results: computational_results.json # REQUIRED: long-term archive pointer (at least doi or url) archive: - doi: https://doi.org/10.5281/zenodo.20643703 - archive_provider: zenodo + doi: TODO add DOI + archive_provider: TODO add archive provider # REQUIRED: license for the submission license: CC-BY-4.0 @@ -62,13 +55,12 @@ license: CC-BY-4.0 # RECOMMENDED / OPTIONAL metadata for protocol settings protocol_settings: - protocol: "RelativeHybridTopologyProtocol" - calculation_mode: "rbfe" timestep: "{'val': 4.0, 'unit': 'femtosecond'}" temperature: "{'val': 298.15, 'unit': 'kelvin'}" pressure: "{'val': 1, 'unit': 'bar'}" - forcefields: "amber/phosaa10;amber/tip3p_HFE_multivalent;amber/tip3p_standard;amber/ff14SB" + forcefields: ["ff14SB", "phosaa10", "tip3p_HFE_multivalent", "tip3p_standard"] small_molecule_forcefield: "openff-2.3.0" - partial_charges: "am1bcc" + partial_charges: "am1bcc_at" equilibration_time: "{'val': 1.0, 'unit': 'nanosecond'}" production_time: "{'val': 5.0, 'unit': 'nanosecond'}" vacuum_equilibration_time: @@ -79,47 +71,13 @@ protocol_settings: lambda_windows: "11" lambda_schedule: "" notes: | - Applies to systems: + Applies to 40 edges: - AlchemicalNetwork-034843fe04133cac65ec83f9b73fc191 jacs_set-p38: ligand_start=2aa, ligand_final=none, solvent=none, cofactors=none, protein=none - AlchemicalNetwork-034843fe04133cac65ec83f9b73fc191 jacs_set-p38: ligand_start=2bb, ligand_final=none, solvent=none, cofactors=none, protein=none - AlchemicalNetwork-034843fe04133cac65ec83f9b73fc191 jacs_set-p38: ligand_start=2c, ligand_final=none, solvent=none, cofactors=none, protein=none - AlchemicalNetwork-034843fe04133cac65ec83f9b73fc191 jacs_set-p38: ligand_start=2ee, ligand_final=none, solvent=none, cofactors=none, protein=none - AlchemicalNetwork-034843fe04133cac65ec83f9b73fc191 jacs_set-p38: ligand_start=2f, ligand_final=none, solvent=none, cofactors=none, protein=none - - AlchemicalNetwork-034843fe04133cac65ec83f9b73fc191 jacs_set-p38: ligand_start=2ff, ligand_final=none, solvent=none, cofactors=none, protein=none - - AlchemicalNetwork-034843fe04133cac65ec83f9b73fc191 jacs_set-p38: ligand_start=2g, ligand_final=none, solvent=none, cofactors=none, protein=none - - AlchemicalNetwork-034843fe04133cac65ec83f9b73fc191 jacs_set-p38: ligand_start=2gg, ligand_final=none, solvent=none, cofactors=none, protein=none - - AlchemicalNetwork-034843fe04133cac65ec83f9b73fc191 jacs_set-p38: ligand_start=2h, ligand_final=none, solvent=none, cofactors=none, protein=none - - AlchemicalNetwork-034843fe04133cac65ec83f9b73fc191 jacs_set-p38: ligand_start=2j, ligand_final=none, solvent=none, cofactors=none, protein=none - - AlchemicalNetwork-034843fe04133cac65ec83f9b73fc191 jacs_set-p38: ligand_start=2k, ligand_final=none, solvent=none, cofactors=none, protein=none - - AlchemicalNetwork-034843fe04133cac65ec83f9b73fc191 jacs_set-p38: ligand_start=2l, ligand_final=none, solvent=none, cofactors=none, protein=none - - AlchemicalNetwork-034843fe04133cac65ec83f9b73fc191 jacs_set-p38: ligand_start=2n, ligand_final=none, solvent=none, cofactors=none, protein=none - - AlchemicalNetwork-034843fe04133cac65ec83f9b73fc191 jacs_set-p38: ligand_start=2o, ligand_final=none, solvent=none, cofactors=none, protein=none - - AlchemicalNetwork-034843fe04133cac65ec83f9b73fc191 jacs_set-p38: ligand_start=2p, ligand_final=none, solvent=none, cofactors=none, protein=none - - AlchemicalNetwork-034843fe04133cac65ec83f9b73fc191 jacs_set-p38: ligand_start=2q, ligand_final=none, solvent=none, cofactors=none, protein=none - - AlchemicalNetwork-034843fe04133cac65ec83f9b73fc191 jacs_set-p38: ligand_start=2s, ligand_final=none, solvent=none, cofactors=none, protein=none - - AlchemicalNetwork-034843fe04133cac65ec83f9b73fc191 jacs_set-p38: ligand_start=2t, ligand_final=none, solvent=none, cofactors=none, protein=none - - AlchemicalNetwork-034843fe04133cac65ec83f9b73fc191 jacs_set-p38: ligand_start=2u, ligand_final=none, solvent=none, cofactors=none, protein=none - - AlchemicalNetwork-034843fe04133cac65ec83f9b73fc191 jacs_set-p38: ligand_start=2v, ligand_final=none, solvent=none, cofactors=none, protein=none - - AlchemicalNetwork-034843fe04133cac65ec83f9b73fc191 jacs_set-p38: ligand_start=2x, ligand_final=none, solvent=none, cofactors=none, protein=none - - AlchemicalNetwork-034843fe04133cac65ec83f9b73fc191 jacs_set-p38: ligand_start=2y, ligand_final=none, solvent=none, cofactors=none, protein=none - - AlchemicalNetwork-034843fe04133cac65ec83f9b73fc191 jacs_set-p38: ligand_start=2z, ligand_final=none, solvent=none, cofactors=none, protein=none - - AlchemicalNetwork-034843fe04133cac65ec83f9b73fc191 jacs_set-p38: ligand_start=3fln, ligand_final=none, solvent=none, cofactors=none, protein=none - - AlchemicalNetwork-034843fe04133cac65ec83f9b73fc191 jacs_set-p38: ligand_start=3flq, ligand_final=none, solvent=none, cofactors=none, protein=none - - AlchemicalNetwork-034843fe04133cac65ec83f9b73fc191 jacs_set-p38: ligand_start=3flw, ligand_final=none, solvent=none, cofactors=none, protein=none - - AlchemicalNetwork-034843fe04133cac65ec83f9b73fc191 jacs_set-p38: ligand_start=3fmh, ligand_final=none, solvent=none, cofactors=none, protein=none - - AlchemicalNetwork-034843fe04133cac65ec83f9b73fc191 jacs_set-p38: ligand_start=3fmk, ligand_final=none, solvent=none, cofactors=none, protein=none - - AlchemicalNetwork-ed08b22af2e51875d23b0283d2080252 jacs_set-tyk2: ligand_start=ejm_31, ligand_final=none, solvent=none, cofactors=none, protein=none - - AlchemicalNetwork-ed08b22af2e51875d23b0283d2080252 jacs_set-tyk2: ligand_start=ejm_42, ligand_final=none, solvent=none, cofactors=none, protein=none - - AlchemicalNetwork-ed08b22af2e51875d23b0283d2080252 jacs_set-tyk2: ligand_start=ejm_43, ligand_final=none, solvent=none, cofactors=none, protein=none - - AlchemicalNetwork-ed08b22af2e51875d23b0283d2080252 jacs_set-tyk2: ligand_start=ejm_44, ligand_final=none, solvent=none, cofactors=none, protein=none - - AlchemicalNetwork-ed08b22af2e51875d23b0283d2080252 jacs_set-tyk2: ligand_start=ejm_47, ligand_final=none, solvent=none, cofactors=none, protein=none - - AlchemicalNetwork-ed08b22af2e51875d23b0283d2080252 jacs_set-tyk2: ligand_start=ejm_49, ligand_final=none, solvent=none, cofactors=none, protein=none - - AlchemicalNetwork-ed08b22af2e51875d23b0283d2080252 jacs_set-tyk2: ligand_start=ejm_50, ligand_final=none, solvent=none, cofactors=none, protein=none - - AlchemicalNetwork-ed08b22af2e51875d23b0283d2080252 jacs_set-tyk2: ligand_start=ejm_55, ligand_final=none, solvent=none, cofactors=none, protein=none - - AlchemicalNetwork-ed08b22af2e51875d23b0283d2080252 jacs_set-tyk2: ligand_start=jmc_23, ligand_final=none, solvent=none, cofactors=none, protein=none - - AlchemicalNetwork-ed08b22af2e51875d23b0283d2080252 jacs_set-tyk2: ligand_start=jmc_27, ligand_final=none, solvent=none, cofactors=none, protein=none - - AlchemicalNetwork-ed08b22af2e51875d23b0283d2080252 jacs_set-tyk2: ligand_start=jmc_28, ligand_final=none, solvent=none, cofactors=none, protein=none - - AlchemicalNetwork-ed08b22af2e51875d23b0283d2080252 jacs_set-tyk2: ligand_start=jmc_30, ligand_final=none, solvent=none, cofactors=none, protein=none + - etc. - protocol: "RelativeHybridTopologyProtocol" notes: | Detailed protocol settings differ: @@ -127,12 +85,13 @@ protocol_settings: - lambda_settings.lambda_windows: 11 -> 22 - simulation_settings.n_replicas: 11 -> 22 - simulation_settings.production_length.val: 5.0 -> 20 - Applies to transformations: - - AlchemicalNetwork-6c2af871ad714bf6cda4368eb34fd06a charge_annihilation_set-irak4_s3: ligand_start=19charg, ligand_final=none, solvent=none, cofactors=none, protein=none - - AlchemicalNetwork-6c2af871ad714bf6cda4368eb34fd06a charge_annihilation_set-irak4_s3: ligand_start=28charg, ligand_final=none, solvent=none, cofactors=none, protein=none - - AlchemicalNetwork-b15bcfadae8a358cee9a7dcb9f5445d6 charge_annihilation_set-egfr: ligand_start=21, ligand_final=none, solvent=none, cofactors=none, protein=none - - AlchemicalNetwork-e516aadebadf1c036b5930cf7838c6d9 charge_annihilation_set-irak4_s2: ligand_start=17charg, ligand_final=none, solvent=none, cofactors=none, protein=none - - AlchemicalNetwork-e516aadebadf1c036b5930cf7838c6d9 charge_annihilation_set-irak4_s2: ligand_start=6, ligand_final=none, solvent=none, cofactors=none, protein=none + - solvation_settings.solvent_padding.val: 1.5 -> 1 + Applies to 5 edges: + - AlchemicalNetwork-6c2af871ad714bf6cda4368eb34fd06a charge_annihilation_set-irak4_s3: ligand_start=19charg, ligand_final=none, solvent=none, cofactors=none, protein={'unknown'} + - AlchemicalNetwork-6c2af871ad714bf6cda4368eb34fd06a charge_annihilation_set-irak4_s3: ligand_start=28charg, ligand_final=none, solvent=none, cofactors=none, protein={'unknown'} + - AlchemicalNetwork-b15bcfadae8a358cee9a7dcb9f5445d6 charge_annihilation_set-egfr: ligand_start=21, ligand_final=none, solvent=none, cofactors=none, protein={'unknown'} + - AlchemicalNetwork-e516aadebadf1c036b5930cf7838c6d9 charge_annihilation_set-irak4_s2: ligand_start=17charg, ligand_final=none, solvent=none, cofactors=none, protein={'unknown'} + - AlchemicalNetwork-e516aadebadf1c036b5930cf7838c6d9 charge_annihilation_set-irak4_s2: ligand_start=6, ligand_final=none, solvent=none, cofactors=none, protein={'unknown'} - protocol: "RelativeHybridTopologyProtocol" notes: | Detailed protocol settings differ: @@ -140,56 +99,21 @@ protocol_settings: - lambda_settings.lambda_windows: 11 -> 22 - simulation_settings.n_replicas: 11 -> 22 - simulation_settings.production_length.val: 5.0 -> 20 - - solvation_settings.solvent_padding.val: 1.5 -> 1 - Applies to transformations: - - AlchemicalNetwork-6c2af871ad714bf6cda4368eb34fd06a charge_annihilation_set-irak4_s3: ligand_start=19charg, ligand_final=none, solvent=none, cofactors=none, protein={'unknown'} - - AlchemicalNetwork-6c2af871ad714bf6cda4368eb34fd06a charge_annihilation_set-irak4_s3: ligand_start=28charg, ligand_final=none, solvent=none, cofactors=none, protein={'unknown'} - - AlchemicalNetwork-b15bcfadae8a358cee9a7dcb9f5445d6 charge_annihilation_set-egfr: ligand_start=21, ligand_final=none, solvent=none, cofactors=none, protein={'unknown'} - - AlchemicalNetwork-e516aadebadf1c036b5930cf7838c6d9 charge_annihilation_set-irak4_s2: ligand_start=17charg, ligand_final=none, solvent=none, cofactors=none, protein={'unknown'} - - AlchemicalNetwork-e516aadebadf1c036b5930cf7838c6d9 charge_annihilation_set-irak4_s2: ligand_start=6, ligand_final=none, solvent=none, cofactors=none, protein={'unknown'} + Applies to 5 edges: + - AlchemicalNetwork-6c2af871ad714bf6cda4368eb34fd06a charge_annihilation_set-irak4_s3: ligand_start=19charg, ligand_final=none, solvent=none, cofactors=none, protein=none + - AlchemicalNetwork-6c2af871ad714bf6cda4368eb34fd06a charge_annihilation_set-irak4_s3: ligand_start=28charg, ligand_final=none, solvent=none, cofactors=none, protein=none + - AlchemicalNetwork-b15bcfadae8a358cee9a7dcb9f5445d6 charge_annihilation_set-egfr: ligand_start=21, ligand_final=none, solvent=none, cofactors=none, protein=none + - AlchemicalNetwork-e516aadebadf1c036b5930cf7838c6d9 charge_annihilation_set-irak4_s2: ligand_start=17charg, ligand_final=none, solvent=none, cofactors=none, protein=none + - AlchemicalNetwork-e516aadebadf1c036b5930cf7838c6d9 charge_annihilation_set-irak4_s2: ligand_start=6, ligand_final=none, solvent=none, cofactors=none, protein=none - protocol: "RelativeHybridTopologyProtocol" notes: | Detailed protocol settings differ: - solvation_settings.solvent_padding.val: 1.5 -> 1 - Applies to transformations: + Applies to 40 edges: - AlchemicalNetwork-034843fe04133cac65ec83f9b73fc191 jacs_set-p38: ligand_start=2aa, ligand_final=none, solvent=none, cofactors=none, protein={'unknown'} - AlchemicalNetwork-034843fe04133cac65ec83f9b73fc191 jacs_set-p38: ligand_start=2bb, ligand_final=none, solvent=none, cofactors=none, protein={'unknown'} - AlchemicalNetwork-034843fe04133cac65ec83f9b73fc191 jacs_set-p38: ligand_start=2c, ligand_final=none, solvent=none, cofactors=none, protein={'unknown'} - AlchemicalNetwork-034843fe04133cac65ec83f9b73fc191 jacs_set-p38: ligand_start=2ee, ligand_final=none, solvent=none, cofactors=none, protein={'unknown'} - AlchemicalNetwork-034843fe04133cac65ec83f9b73fc191 jacs_set-p38: ligand_start=2f, ligand_final=none, solvent=none, cofactors=none, protein={'unknown'} - - AlchemicalNetwork-034843fe04133cac65ec83f9b73fc191 jacs_set-p38: ligand_start=2ff, ligand_final=none, solvent=none, cofactors=none, protein={'unknown'} - - AlchemicalNetwork-034843fe04133cac65ec83f9b73fc191 jacs_set-p38: ligand_start=2g, ligand_final=none, solvent=none, cofactors=none, protein={'unknown'} - - AlchemicalNetwork-034843fe04133cac65ec83f9b73fc191 jacs_set-p38: ligand_start=2gg, ligand_final=none, solvent=none, cofactors=none, protein={'unknown'} - - AlchemicalNetwork-034843fe04133cac65ec83f9b73fc191 jacs_set-p38: ligand_start=2h, ligand_final=none, solvent=none, cofactors=none, protein={'unknown'} - - AlchemicalNetwork-034843fe04133cac65ec83f9b73fc191 jacs_set-p38: ligand_start=2j, ligand_final=none, solvent=none, cofactors=none, protein={'unknown'} - - AlchemicalNetwork-034843fe04133cac65ec83f9b73fc191 jacs_set-p38: ligand_start=2k, ligand_final=none, solvent=none, cofactors=none, protein={'unknown'} - - AlchemicalNetwork-034843fe04133cac65ec83f9b73fc191 jacs_set-p38: ligand_start=2l, ligand_final=none, solvent=none, cofactors=none, protein={'unknown'} - - AlchemicalNetwork-034843fe04133cac65ec83f9b73fc191 jacs_set-p38: ligand_start=2n, ligand_final=none, solvent=none, cofactors=none, protein={'unknown'} - - AlchemicalNetwork-034843fe04133cac65ec83f9b73fc191 jacs_set-p38: ligand_start=2o, ligand_final=none, solvent=none, cofactors=none, protein={'unknown'} - - AlchemicalNetwork-034843fe04133cac65ec83f9b73fc191 jacs_set-p38: ligand_start=2p, ligand_final=none, solvent=none, cofactors=none, protein={'unknown'} - - AlchemicalNetwork-034843fe04133cac65ec83f9b73fc191 jacs_set-p38: ligand_start=2q, ligand_final=none, solvent=none, cofactors=none, protein={'unknown'} - - AlchemicalNetwork-034843fe04133cac65ec83f9b73fc191 jacs_set-p38: ligand_start=2s, ligand_final=none, solvent=none, cofactors=none, protein={'unknown'} - - AlchemicalNetwork-034843fe04133cac65ec83f9b73fc191 jacs_set-p38: ligand_start=2t, ligand_final=none, solvent=none, cofactors=none, protein={'unknown'} - - AlchemicalNetwork-034843fe04133cac65ec83f9b73fc191 jacs_set-p38: ligand_start=2u, ligand_final=none, solvent=none, cofactors=none, protein={'unknown'} - - AlchemicalNetwork-034843fe04133cac65ec83f9b73fc191 jacs_set-p38: ligand_start=2v, ligand_final=none, solvent=none, cofactors=none, protein={'unknown'} - - AlchemicalNetwork-034843fe04133cac65ec83f9b73fc191 jacs_set-p38: ligand_start=2x, ligand_final=none, solvent=none, cofactors=none, protein={'unknown'} - - AlchemicalNetwork-034843fe04133cac65ec83f9b73fc191 jacs_set-p38: ligand_start=2y, ligand_final=none, solvent=none, cofactors=none, protein={'unknown'} - - AlchemicalNetwork-034843fe04133cac65ec83f9b73fc191 jacs_set-p38: ligand_start=2z, ligand_final=none, solvent=none, cofactors=none, protein={'unknown'} - - AlchemicalNetwork-034843fe04133cac65ec83f9b73fc191 jacs_set-p38: ligand_start=3fln, ligand_final=none, solvent=none, cofactors=none, protein={'unknown'} - - AlchemicalNetwork-034843fe04133cac65ec83f9b73fc191 jacs_set-p38: ligand_start=3flq, ligand_final=none, solvent=none, cofactors=none, protein={'unknown'} - - AlchemicalNetwork-034843fe04133cac65ec83f9b73fc191 jacs_set-p38: ligand_start=3flw, ligand_final=none, solvent=none, cofactors=none, protein={'unknown'} - - AlchemicalNetwork-034843fe04133cac65ec83f9b73fc191 jacs_set-p38: ligand_start=3fmh, ligand_final=none, solvent=none, cofactors=none, protein={'unknown'} - - AlchemicalNetwork-034843fe04133cac65ec83f9b73fc191 jacs_set-p38: ligand_start=3fmk, ligand_final=none, solvent=none, cofactors=none, protein={'unknown'} - - AlchemicalNetwork-ed08b22af2e51875d23b0283d2080252 jacs_set-tyk2: ligand_start=ejm_31, ligand_final=none, solvent=none, cofactors=none, protein={'unknown'} - - AlchemicalNetwork-ed08b22af2e51875d23b0283d2080252 jacs_set-tyk2: ligand_start=ejm_42, ligand_final=none, solvent=none, cofactors=none, protein={'unknown'} - - AlchemicalNetwork-ed08b22af2e51875d23b0283d2080252 jacs_set-tyk2: ligand_start=ejm_43, ligand_final=none, solvent=none, cofactors=none, protein={'unknown'} - - AlchemicalNetwork-ed08b22af2e51875d23b0283d2080252 jacs_set-tyk2: ligand_start=ejm_44, ligand_final=none, solvent=none, cofactors=none, protein={'unknown'} - - AlchemicalNetwork-ed08b22af2e51875d23b0283d2080252 jacs_set-tyk2: ligand_start=ejm_47, ligand_final=none, solvent=none, cofactors=none, protein={'unknown'} - - AlchemicalNetwork-ed08b22af2e51875d23b0283d2080252 jacs_set-tyk2: ligand_start=ejm_49, ligand_final=none, solvent=none, cofactors=none, protein={'unknown'} - - AlchemicalNetwork-ed08b22af2e51875d23b0283d2080252 jacs_set-tyk2: ligand_start=ejm_50, ligand_final=none, solvent=none, cofactors=none, protein={'unknown'} - - AlchemicalNetwork-ed08b22af2e51875d23b0283d2080252 jacs_set-tyk2: ligand_start=ejm_55, ligand_final=none, solvent=none, cofactors=none, protein={'unknown'} - - AlchemicalNetwork-ed08b22af2e51875d23b0283d2080252 jacs_set-tyk2: ligand_start=jmc_23, ligand_final=none, solvent=none, cofactors=none, protein={'unknown'} - - AlchemicalNetwork-ed08b22af2e51875d23b0283d2080252 jacs_set-tyk2: ligand_start=jmc_27, ligand_final=none, solvent=none, cofactors=none, protein={'unknown'} - - AlchemicalNetwork-ed08b22af2e51875d23b0283d2080252 jacs_set-tyk2: ligand_start=jmc_28, ligand_final=none, solvent=none, cofactors=none, protein={'unknown'} - - AlchemicalNetwork-ed08b22af2e51875d23b0283d2080252 jacs_set-tyk2: ligand_start=jmc_30, ligand_final=none, solvent=none, cofactors=none, protein={'unknown'} + - etc. diff --git a/openfe_benchmarks/scripts/prepare_metadata_submission.py b/openfe_benchmarks/scripts/prepare_metadata_submission.py index c32d368..58bc14e 100644 --- a/openfe_benchmarks/scripts/prepare_metadata_submission.py +++ b/openfe_benchmarks/scripts/prepare_metadata_submission.py @@ -97,7 +97,7 @@ class ProtocolSettingsInfo: pressure: str lambda_functions: str small_molecule_forcefield: str - forcefields: str + forcefields: set[str] partial_charges: str lambda_windows: str = "" lambda_schedule: str = "" @@ -139,7 +139,7 @@ def __eq__(self, other: Any) -> bool: @dataclass class SystemInfo: - """Per-system information extracted from transformations.""" + """Per-system information extracted from edges.""" benchmark_set: str benchmark_system: str @@ -154,6 +154,7 @@ class SystemInfo: openfe_version: list[tuple[str, list[str]]] = field(default_factory=list) openmm_version: list[tuple[str, list[str]]] = field(default_factory=list) openff_toolkit_version: list[tuple[str, list[str]]] = field(default_factory=list) + mapper: list[tuple[str, list[str]]] = field(default_factory=list) protocol_settings_list: list[tuple[ProtocolSettingsInfo, list[str]]] = field( default_factory=list ) @@ -195,7 +196,7 @@ def add_version_setting(self, attribute, value, key): def add_protocol_settings(self, protocol_settings: ProtocolSettingsInfo, key): """Add or update protocol settings with associated transformation key. - Stores unique ProtocolSettingsInfo objects with a list of transformation keys + Stores unique ProtocolSettingsInfo objects with a list of edge keys that use that protocol configuration. """ _add_value_with_keys(self.protocol_settings_list, protocol_settings, [key]) @@ -211,6 +212,7 @@ class AutoMetadata: openfe_version: list[tuple[str, list[str]]] = field(default_factory=list) openmm_version: list[tuple[str, list[str]]] = field(default_factory=list) openff_toolkit_version: list[tuple[str, list[str]]] = field(default_factory=list) + mapper: list[tuple[str, list[str]]] = field(default_factory=list) protocols: list[tuple[str, list[str]]] = field(default_factory=list) forcefield: list[tuple[str, list[str]]] = field(default_factory=list) small_molecule_force_field: list[tuple[str, list[str]]] = field( @@ -228,6 +230,7 @@ def update_from_system_info(self) -> None: self.openfe_version = [] self.openmm_version = [] self.openff_toolkit_version = [] + self.mapper = [] self.protocols = [] self.forcefield = [] self.small_molecule_force_field = [] @@ -241,6 +244,8 @@ def update_from_system_info(self) -> None: _add_value_with_keys(self.openmm_version, version, keys) for version, keys in system_info.openff_toolkit_version: _add_value_with_keys(self.openff_toolkit_version, version, keys) + for mapper_info, keys in system_info.mapper: + _add_value_with_keys(self.mapper, mapper_info, keys) for protocol_settings, keys in system_info.protocol_settings_list: if protocol_settings.protocol: @@ -394,8 +399,8 @@ def _iter_nested_items(obj: Any) -> list[tuple[str, Any]]: def _quantity_to_text(value: Any) -> str: - if isinstance(value, dict) and "magnitude" in value and "unit" in value: - return f"{value['magnitude']} {value['unit']}" + if hasattr(value, "magnitude"): + return f"{value:#~}" return str(value) @@ -438,7 +443,7 @@ def _extract_sim_times(settings_block: dict[str, Any]) -> tuple[str, str]: ) if production is not None else "" -def _build_protocol_settings(protocol_obj, calc_mode) -> dict[str, str]: +def _build_protocol_settings(protocol_obj, calc_mode) -> dict[str, str | set(str)]: if not protocol_obj: return { "protocol": "unknown", @@ -495,22 +500,15 @@ def _build_protocol_settings(protocol_obj, calc_mode) -> dict[str, str]: out["small_molecule_forcefield"] = str( forcefield_settings.get("small_molecule_forcefield") or "" ) - out["forcefields"] = "" ffs = forcefield_settings.get("forcefields") if isinstance(ffs, list) and ffs: - out["forcefields"] = ";".join(set([os.path.splitext(ff)[0] for ff in ffs])) + out["forcefields"] = set( + sorted([os.path.splitext(ff.split("/")[1])[0] for ff in ffs]) + ) partial_charge_settings = settings.get("partial_charge_settings") or {} if partial_charge_settings: - method = partial_charge_settings.get("partial_charge_method") - nagl_model = partial_charge_settings.get("nagl_model") - if not method: - raise ValueError("Protocol does not have a parital charge method defined.") - - if method and nagl_model: - out["partial_charges"] = f"{method} ({nagl_model})" - elif method: - out["partial_charges"] = str(method) + out["partial_charges"] = _normalize_partial_charge_info(partial_charge_settings) # Protocol-specific handling: RBFE typically has a single simulation block; # ASFE commonly has separate vacuum and solvent simulation settings. @@ -675,18 +673,33 @@ def _extract_auto_metadata( ) annotations = trans.mapping.annotations - for key, value in annotations.items(): - if "openmm" in key: + + # Extract mapper info if available (Option 1: concatenated string) + if "mapper_settings" in annotations and "mapper_version" in annotations: + mapper_settings = annotations.get("mapper_settings") + mapper_version = annotations.get("mapper_version", "unknown") + if isinstance(mapper_settings, dict): + mapper_name = mapper_settings.get("__qualname__", "unknown").split(".")[ + -1 + ] + mapping_algorithm = mapper_settings.get("_mapping_algorithm", "unknown") + mapper_str = f"{mapper_name} {mapper_version} ({mapping_algorithm})" metadata.system_info_dict[benchmark_set_system].add_version_setting( - "openmm_version", value, key + "mapper", mapper_str, key ) - if "openfe" in key: + + for annotation_key, value in annotations.items(): + if "openmm" in annotation_key: + metadata.system_info_dict[benchmark_set_system].add_version_setting( + "openmm_version", value, annotation_key + ) + if "openfe" in annotation_key: metadata.system_info_dict[benchmark_set_system].add_version_setting( - "openfe_version", value, key + "openfe_version", value, annotation_key ) - if "openff" in key and "toolkit" in key: + if "openff" in annotation_key and "toolkit" in annotation_key: metadata.system_info_dict[benchmark_set_system].add_version_setting( - "openff_toolkit_version", value, key + "openff_toolkit_version", value, annotation_key ) metadata.update_from_system_info() @@ -694,26 +707,60 @@ def _extract_auto_metadata( return metadata -def _normalize_partial_charge_info(partial_charges: str) -> str: - value = partial_charges.strip() - if not value: - return "" +def _normalize_partial_charge_info(partial_charge_settings: dict) -> str: + """Normalize partial charge settings to standardized method tags. - lower = value.lower() + Maps protocol charge method names to the standard method names used in + openfe_benchmarks.data.data_generation.charge_molecules: + - am1bcc_at (AM1BCC with AmberTools) + - am1bcc_oe (AM1BCC with OpenEye) + - am1bccelf10_oe (AM1BCC ELF10 with OpenEye) + - nagl_off (NAGL with OpenFF Toolkit) - # Canonical openfe-benchmarks style: nagl_.pt - model_match = re.search( - r"(openff-gnn-am1bcc-[0-9.]+\.0\.pt|openff-gnn-am1bcc-[0-9.]+\.pt)", lower - ) - if "nagl" in lower and model_match: - model = model_match.group(1) - return f"nagl_{model}" + For nagl_off, appends the model name if available. + + Parameters + ---------- + partial_charge_settings : dict + Protocol partial charge settings dict containing 'partial_charge_method', + optionally 'off_toolkit_backend', and optionally 'nagl_model'. + + Returns + ------- + str + Normalized method tag, e.g., "nagl_off_openff-gnn-am1bcc-1.0.0.pt" or "am1bccelf10_oe". + """ + if not partial_charge_settings or not isinstance(partial_charge_settings, dict): + return "" - if lower in {"am1bcc", "am1-bcc"}: - return "am1bcc" + method = partial_charge_settings.get("partial_charge_method", "").lower().strip() + if not method: + return "" - normalized = re.sub(r"[^a-z0-9._-]+", "_", lower).strip("_") - return normalized + # Map method names to standardized tags matching charge_molecules.py + if "nagl" in method: + # nagl_off with optional model + nagl_model = partial_charge_settings.get("nagl_model", "").strip() + if nagl_model: + # Extract just the filename if it's a path + nagl_model = nagl_model.split("/")[-1].split("\\")[-1] + return f"nagl_off_{nagl_model}" + return "nagl_off" + elif "am1bccelf10" in method or "elf10" in method: + return "am1bccelf10_oe" + elif "am1bcc" in method: + # Check toolkit backend to determine if AmberTools or OpenEye + backend = partial_charge_settings.get("off_toolkit_backend", "").lower().strip() + if backend == "ambertools": + return "am1bcc_at" + elif backend == "openeye": + return "am1bcc_oe" + else: + raise ValueError("Unknown charge backend") + else: + # Fallback: normalize any other method name + normalized = re.sub(r"[^a-z0-9._-]+", "_", method).strip("_") + return normalized def _make_tags( @@ -721,14 +768,15 @@ def _make_tags( mode: str, forcefield: list[tuple], partial_charge_tag: list[tuple], + benchmark_data: list[tuple], user_keywords: list[str], ) -> list[str]: tags: list[str] = [] tags.append(mode) if forcefield: - tags.extend( - list(set(ff for ff_set, _ in forcefield for ff in ff_set.split("/"))) - ) + tags.extend(list(set(ff for ff_set, _ in forcefield for ff in ff_set))) + if benchmark_data: + tags.extend(list(set(y for x in benchmark_data for y in x))) if partial_charge_tag: tags.extend(list(set(x[0] for x in partial_charge_tag))) tags.extend(user_keywords) @@ -767,14 +815,12 @@ def _build_content_summary( (summary_text, list of SystemInfo objects) """ - field_info = "/".join( - set(ff for ff_set, _ in metadata.forcefield for ff in ff_set.split("/")) - ) + field_info = "/".join(set(ff for ff_set, _ in metadata.forcefield for ff in ff_set)) if not field_info: field_info = "an unspecified force field" - charge_info = "/".join(x[0] for x in metadata.partial_charges) - if not field_info: + charge_info = "/".join(set(x[0] for x in metadata.partial_charges)) + if not charge_info: charge_info = "an unspecified partial charges" # Group systems by benchmark set for explicit listing @@ -806,9 +852,9 @@ def _build_content_summary( elif len(unique_sets) == 1: systems_desc = ", ".join(sets_to_systems[unique_sets[0]]) else: - systems_desc = f"{len(metadata.benchmark_sets_systems)} systems" + systems_desc = f"{len(metadata.benchmark_sets_systems)} edges" - # Count totals across all systems + # Count totals across all edges all_structures = { "ligands": set(), "proteins": set(), @@ -832,12 +878,12 @@ def _build_content_summary( if len(unique_sets) > 1: summary_parts = [ f"This submission describes the {subject} RBFE benchmark ({systems_desc}) prepared with {field_info} and {charge_info}.", - f"The submission contains {metadata.n_transformations} transformations, {len(all_structures['ligands'])} unique ligands, and {len(all_structures['proteins'])} unique proteins.", + f"The submission contains {metadata.n_transformations} edges, {len(all_structures['ligands'])} unique ligands, and {len(all_structures['proteins'])} unique proteins.", ] else: summary_parts = [ f"This submission describes the {subject} RBFE benchmark prepared with {field_info} and {charge_info}.", - f"The network contains {metadata.n_transformations} transformations across {len(all_structures['ligands'])} unique ligands and {len(all_structures['proteins'])} unique proteins.", + f"The network contains {metadata.n_transformations} edges across {len(all_structures['ligands'])} unique ligands and {len(all_structures['proteins'])} unique proteins.", ] if systems_with_cofactors: summary_parts.append( @@ -847,12 +893,12 @@ def _build_content_summary( if len(unique_sets) > 1: summary_parts = [ f"This submission describes the {subject} ASFE benchmark ({systems_desc}) prepared with {field_info} and {charge_info}.", - f"The submission contains {metadata.n_transformations} transformations, {len(all_structures['ligands'])} unique solutes, and {len(all_structures['solvents'])} unique solvents.", + f"The submission contains {metadata.n_transformations} edges, {len(all_structures['ligands'])} unique solutes, and {len(all_structures['solvents'])} unique solvents.", ] else: summary_parts = [ f"This submission describes the {subject} ASFE benchmark prepared with {field_info} and {charge_info}.", - f"The archive contains {metadata.n_transformations} transformations across {len(all_structures['ligands'])} unique solutes and {len(all_structures['solvents'])} unique solvents.", + f"The archive contains {metadata.n_transformations} edges across {len(all_structures['ligands'])} unique solutes and {len(all_structures['solvents'])} unique solvents.", ] if used_alchemiscale: @@ -871,8 +917,8 @@ def _render_protocol_settings_yaml( All keys in the ProtocolSettingsInfo class are listed except for ``full_protocol_settings``. If only one protocol is used, it is labeled as the submission protocol and the notes specify "Applies to all - systems". If more than one protocol is present, the protocol that represents the largest number of systems is - listed last and notes specify as "All remaining systems". The other protocols are listed with notes containing + edges". If more than one protocol is present, the protocol that represents the largest number of systems is + listed last and notes specify as "All remaining edges". The other protocols are listed with notes containing the list of identifying strings. Parameters @@ -889,6 +935,7 @@ def _render_protocol_settings_yaml( return "protocol_settings: []\n" def _format_value(value: Any) -> str: + """Format a value for YAML. Quantity fields (with units) are rendered unquoted.""" if value is None: return "" if isinstance(value, bool): @@ -972,7 +1019,6 @@ def _full_protocol_setting_notes( output_lines = ["protocol_settings:"] multiple_protocols = len(protocol_settings_list) > 1 field_names = [ - "calculation_mode", "protocol", "timestep", "temperature", @@ -1002,22 +1048,26 @@ def _full_protocol_setting_notes( if is_primary: if multiple_protocols: - notes_lines = ["Applies to systems:"] + [ - f"- {item}" for item in sorted_ids + notes_lines = [f"Applies to {len(sorted_ids)} edges:"] + [ + f"- {item}" for item in sorted_ids[:5] ] + if len(notes_lines) - 1 != len(sorted_ids): + notes_lines.append("- etc.") notes = "\n".join(notes_lines) notes_is_multiline = True else: - notes = "Applies to all systems" + notes = "Applies to all edges" notes_is_multiline = False else: notes_lines = _full_protocol_setting_notes( primary_settings, protocol_settings ) - notes_lines += ["Applies to transformations:"] + [ - f"- {item}" for item in sorted_ids + trans_lines = [f"Applies to {len(sorted_ids)} edges:"] + [ + f"- {item}" for item in sorted_ids[:5] ] - notes = "\n".join(notes_lines) + if len(trans_lines) - 1 != len(sorted_ids): + trans_lines.append("- etc.") + notes = "\n".join(notes_lines + trans_lines) notes_is_multiline = True output_lines.append( @@ -1034,6 +1084,14 @@ def _full_protocol_setting_notes( else: output_lines.append(" notes: " + _format_value(notes)) continue + # Special handling for forcefields: render as JSON array + if field_name == "forcefields": + ff_value = getattr(protocol_settings, field_name) + if isinstance(ff_value, (list, tuple, set)) and ff_value: + items = [json.dumps(str(x)) for x in sorted(ff_value)] + if is_primary: + output_lines.append(f" {field_name}: [{', '.join(items)}]") + continue if is_primary: output_lines.append( f" {field_name}: {_format_value(getattr(protocol_settings, field_name))}" @@ -1052,7 +1110,7 @@ def _render_keyed_values_yaml( section_name: str, value_keys: list[tuple[Any, list[str]]], value_label: str = "value", - keys_label: str = "systems", + keys_label: str = "edges", ) -> str: """Render simple value-with-systems metadata into YAML. @@ -1073,9 +1131,15 @@ def _render_keyed_values_yaml( Output yaml section. """ if not value_keys: - return f"{section_name}: []\n" + return f"{section_name}: TODO" if len(value_keys) == 1: - return f"{section_name}: {json.dumps(str(value_keys[0][0]))}\n" + if isinstance(value_keys[0][0], str): + return f"{section_name}: {json.dumps(str(value_keys[0][0]))}" + elif isinstance(value_keys[0][0], (list, tuple, set)): + items = [json.dumps(str(x)) for x in sorted(value_keys[0][0])] + return f"{section_name}: [{', '.join(items)}]" + else: + raise ValueError(f"Unknown value type to print: {value_keys[0][0]}") ordered_settings = sorted( enumerate(value_keys), @@ -1083,18 +1147,24 @@ def _render_keyed_values_yaml( ) lines = [f"{section_name}:"] - for i, (value, keys) in ordered_settings: - lines.append(f" - {value_label}: {json.dumps(str(value))}") - if i != len(ordered_settings) - 1: - if keys: - lines.append(f" {keys_label}:") - for key in sorted(keys): - lines.append(f" - {json.dumps(str(key))}") + for _, (value, keys) in ordered_settings: + if isinstance(value, str): + lines.append(f" - {value_label}: {json.dumps(str(value))}") + elif isinstance(value, (list, tuple, set)): + items = [json.dumps(str(x)) for x in sorted(value)] + lines.append(f" - {value_label}: [{', '.join(items)}]") else: + raise ValueError(f"Unknown value type to print: {value_label}") + if keys: lines.append(f" {keys_label}:") - lines.append(" - All remaining") + for i, key in enumerate(sorted(keys)): + if i < 5: + lines.append(f" - {json.dumps(str(key))}") + else: + lines.append(" - etc.") + break - return "\n".join(lines) + "\n" + return "\n".join(lines) def _render_benchmark_system_yaml(system_info_dict: dict[tuple, SystemInfo]) -> str: @@ -1153,19 +1223,22 @@ def _make_submission_yaml( ) benchmark_system_yaml = _render_benchmark_system_yaml(metadata.system_info_dict) openfe_version_yaml = _render_keyed_values_yaml( - "openfe_version", metadata.openfe_version, "version", "systems" + "openfe_version", metadata.openfe_version, "version", "edges" ) openmm_version_yaml = _render_keyed_values_yaml( - "openmm_version", metadata.openmm_version, "version", "systems" + "openmm_version", metadata.openmm_version, "version", "edges" ) openff_toolkit_version_yaml = _render_keyed_values_yaml( - "openff_toolkit_version", metadata.openff_toolkit_version, "version", "systems" + "openff_toolkit_version", metadata.openff_toolkit_version, "version", "edges" + ) + mapper_yaml = _render_keyed_values_yaml( + "mapper", metadata.mapper, "mapper", "edges" ) forcefield_yaml = _render_keyed_values_yaml( - "forcefield", metadata.forcefield, "forcefield", "systems" + "forcefield", metadata.forcefield, "forcefield", "edges" ) partial_charges_yaml = _render_keyed_values_yaml( - "partial_charges", metadata.partial_charges, "partial_charges", "systems" + "partial_charges", metadata.partial_charges, "partial_charges", "edges" ) return f"""# REQUIRED: unique, kebab-case identifier for this submission @@ -1187,12 +1260,10 @@ def _make_submission_yaml( # REQUIRED: publication/submission date (ISO 8601) date: {date.today().isoformat()} - {openfe_version_yaml} {openmm_version_yaml} {openff_toolkit_version_yaml} - -# Recommended descriptors +{mapper_yaml} {forcefield_yaml} {partial_charges_yaml} {benchmark_system_yaml} @@ -1243,25 +1314,25 @@ def _make_zenodo_description( ) benchmark_system_yaml = _render_benchmark_system_yaml(metadata.system_info_dict) openfe_version_yaml = _render_keyed_values_yaml( - "openfe_version", metadata.openfe_version, "version", "systems" + "openfe_version", metadata.openfe_version, "version", "edges" ) openmm_version_yaml = _render_keyed_values_yaml( - "openmm_version", metadata.openmm_version, "version", "systems" + "openmm_version", metadata.openmm_version, "version", "edges" ) openff_toolkit_version_yaml = _render_keyed_values_yaml( "openff_toolkit_version", metadata.openff_toolkit_version, "version", - "systems", + "edges", ) forcefield_yaml = _render_keyed_values_yaml( - "forcefield", metadata.forcefield, "forcefield", "systems" + "forcefield", metadata.forcefield, "forcefield", "edges" ) partial_charges_yaml = _render_keyed_values_yaml( "partial_charges", metadata.partial_charges, "partial_charges", - "systems", + "edges", ) # Build network keys to systems mapping section @@ -1448,6 +1519,7 @@ def process_network( "small_molecule_force_field", "protocols", "protocol_settings_list", + "mapper", ]: for value, keys in getattr(metadata, key): _add_value_with_keys(getattr(merged_metadata, key), value, keys) @@ -1497,6 +1569,7 @@ def process_network( mode=mode, forcefield=merged_metadata.forcefield, partial_charge_tag=merged_metadata.partial_charges, + benchmark_data=merged_metadata.benchmark_sets_systems, user_keywords=tags_list, ) From 5998eaf1cb26838dcb65fb8f40c07c68192aaeee Mon Sep 17 00:00:00 2001 From: jaclark5 Date: Fri, 12 Jun 2026 15:17:42 -0400 Subject: [PATCH 15/24] Update processing pint quantities --- devtools/conda-envs/environment.yml | 1 + .../submission.yaml | 38 +++++++++---------- .../scripts/prepare_metadata_submission.py | 13 +++++-- 3 files changed, 30 insertions(+), 22 deletions(-) diff --git a/devtools/conda-envs/environment.yml b/devtools/conda-envs/environment.yml index dd4cb83..6d243ca 100644 --- a/devtools/conda-envs/environment.yml +++ b/devtools/conda-envs/environment.yml @@ -7,3 +7,4 @@ dependencies: - pytest - pre-commit - pontibus + - pint diff --git a/openfe_benchmarks/results/2026-03-18-openmm-840-qa-testing/submission.yaml b/openfe_benchmarks/results/2026-03-18-openmm-840-qa-testing/submission.yaml index 28133a8..de5a4b8 100644 --- a/openfe_benchmarks/results/2026-03-18-openmm-840-qa-testing/submission.yaml +++ b/openfe_benchmarks/results/2026-03-18-openmm-840-qa-testing/submission.yaml @@ -8,13 +8,13 @@ title: OpenFE RBFE - Multi-set Benchmark (2 sets, 5 systems) - 2026-03-18-openmm summary: | This submission describes the charge_annihilation_set, jacs_set RBFE benchmark (charge_annihilation_set: egfr, irak4_s2, irak4_s3; jacs_set: p38, tyk2) prepared with - tip3p_HFE_multivalent/ff14SB/phosaa10/tip3p_standard and am1bcc_at. The submission contains 160 + ff14SB/phosaa10/tip3p_HFE_multivalent/tip3p_standard and am1bcc_at. The submission contains 160 edges, 45 unique ligands, and 1 unique proteins. Note this means the charge annihilation sets are not complete compared to what is in that system and should not be compared to other complete runs due to the missing edges. # REQUIRED: list of submission tags -tags: [rbfe, tip3p_HFE_multivalent, ff14SB, phosaa10, tip3p_standard, irak4_s3, egfr, p38, tyk2, charge_annihilation_set, irak4_s2, jacs_set, am1bcc_at, charge_change, benchmark, openfe, openmm-840] +tags: [rbfe, ff14SB, phosaa10, tip3p_HFE_multivalent, tip3p_standard, irak4_s3, tyk2, irak4_s2, jacs_set, charge_annihilation_set, egfr, p38, am1bcc_at, charge_change, benchmark, openfe, openmm-840] # REQUIRED: list of contributing authors (name, affiliation; ORCID optional) authors: @@ -55,14 +55,14 @@ license: CC-BY-4.0 # RECOMMENDED / OPTIONAL metadata for protocol settings protocol_settings: - protocol: "RelativeHybridTopologyProtocol" - timestep: "{'val': 4.0, 'unit': 'femtosecond'}" - temperature: "{'val': 298.15, 'unit': 'kelvin'}" - pressure: "{'val': 1, 'unit': 'bar'}" + timestep: "4.0 fs" + temperature: "298.15 K" + pressure: "1 bar" forcefields: ["ff14SB", "phosaa10", "tip3p_HFE_multivalent", "tip3p_standard"] small_molecule_forcefield: "openff-2.3.0" partial_charges: "am1bcc_at" - equilibration_time: "{'val': 1.0, 'unit': 'nanosecond'}" - production_time: "{'val': 5.0, 'unit': 'nanosecond'}" + equilibration_time: "1.0 ns" + production_time: "5.0 ns" vacuum_equilibration_time: vacuum_production_time: solvent_equilibration_time: @@ -72,11 +72,11 @@ protocol_settings: lambda_schedule: "" notes: | Applies to 40 edges: - - AlchemicalNetwork-034843fe04133cac65ec83f9b73fc191 jacs_set-p38: ligand_start=2aa, ligand_final=none, solvent=none, cofactors=none, protein=none - - AlchemicalNetwork-034843fe04133cac65ec83f9b73fc191 jacs_set-p38: ligand_start=2bb, ligand_final=none, solvent=none, cofactors=none, protein=none - - AlchemicalNetwork-034843fe04133cac65ec83f9b73fc191 jacs_set-p38: ligand_start=2c, ligand_final=none, solvent=none, cofactors=none, protein=none - - AlchemicalNetwork-034843fe04133cac65ec83f9b73fc191 jacs_set-p38: ligand_start=2ee, ligand_final=none, solvent=none, cofactors=none, protein=none - - AlchemicalNetwork-034843fe04133cac65ec83f9b73fc191 jacs_set-p38: ligand_start=2f, ligand_final=none, solvent=none, cofactors=none, protein=none + - AlchemicalNetwork-034843fe04133cac65ec83f9b73fc191 jacs_set-p38: ligand_start=2aa, ligand_final=none, solvent=none, cofactors=none, protein={'unknown'} + - AlchemicalNetwork-034843fe04133cac65ec83f9b73fc191 jacs_set-p38: ligand_start=2bb, ligand_final=none, solvent=none, cofactors=none, protein={'unknown'} + - AlchemicalNetwork-034843fe04133cac65ec83f9b73fc191 jacs_set-p38: ligand_start=2c, ligand_final=none, solvent=none, cofactors=none, protein={'unknown'} + - AlchemicalNetwork-034843fe04133cac65ec83f9b73fc191 jacs_set-p38: ligand_start=2ee, ligand_final=none, solvent=none, cofactors=none, protein={'unknown'} + - AlchemicalNetwork-034843fe04133cac65ec83f9b73fc191 jacs_set-p38: ligand_start=2f, ligand_final=none, solvent=none, cofactors=none, protein={'unknown'} - etc. - protocol: "RelativeHybridTopologyProtocol" notes: | @@ -85,7 +85,6 @@ protocol_settings: - lambda_settings.lambda_windows: 11 -> 22 - simulation_settings.n_replicas: 11 -> 22 - simulation_settings.production_length.val: 5.0 -> 20 - - solvation_settings.solvent_padding.val: 1.5 -> 1 Applies to 5 edges: - AlchemicalNetwork-6c2af871ad714bf6cda4368eb34fd06a charge_annihilation_set-irak4_s3: ligand_start=19charg, ligand_final=none, solvent=none, cofactors=none, protein={'unknown'} - AlchemicalNetwork-6c2af871ad714bf6cda4368eb34fd06a charge_annihilation_set-irak4_s3: ligand_start=28charg, ligand_final=none, solvent=none, cofactors=none, protein={'unknown'} @@ -99,6 +98,7 @@ protocol_settings: - lambda_settings.lambda_windows: 11 -> 22 - simulation_settings.n_replicas: 11 -> 22 - simulation_settings.production_length.val: 5.0 -> 20 + - solvation_settings.solvent_padding.val: 1 -> 1.5 Applies to 5 edges: - AlchemicalNetwork-6c2af871ad714bf6cda4368eb34fd06a charge_annihilation_set-irak4_s3: ligand_start=19charg, ligand_final=none, solvent=none, cofactors=none, protein=none - AlchemicalNetwork-6c2af871ad714bf6cda4368eb34fd06a charge_annihilation_set-irak4_s3: ligand_start=28charg, ligand_final=none, solvent=none, cofactors=none, protein=none @@ -108,12 +108,12 @@ protocol_settings: - protocol: "RelativeHybridTopologyProtocol" notes: | Detailed protocol settings differ: - - solvation_settings.solvent_padding.val: 1.5 -> 1 + - solvation_settings.solvent_padding.val: 1 -> 1.5 Applies to 40 edges: - - AlchemicalNetwork-034843fe04133cac65ec83f9b73fc191 jacs_set-p38: ligand_start=2aa, ligand_final=none, solvent=none, cofactors=none, protein={'unknown'} - - AlchemicalNetwork-034843fe04133cac65ec83f9b73fc191 jacs_set-p38: ligand_start=2bb, ligand_final=none, solvent=none, cofactors=none, protein={'unknown'} - - AlchemicalNetwork-034843fe04133cac65ec83f9b73fc191 jacs_set-p38: ligand_start=2c, ligand_final=none, solvent=none, cofactors=none, protein={'unknown'} - - AlchemicalNetwork-034843fe04133cac65ec83f9b73fc191 jacs_set-p38: ligand_start=2ee, ligand_final=none, solvent=none, cofactors=none, protein={'unknown'} - - AlchemicalNetwork-034843fe04133cac65ec83f9b73fc191 jacs_set-p38: ligand_start=2f, ligand_final=none, solvent=none, cofactors=none, protein={'unknown'} + - AlchemicalNetwork-034843fe04133cac65ec83f9b73fc191 jacs_set-p38: ligand_start=2aa, ligand_final=none, solvent=none, cofactors=none, protein=none + - AlchemicalNetwork-034843fe04133cac65ec83f9b73fc191 jacs_set-p38: ligand_start=2bb, ligand_final=none, solvent=none, cofactors=none, protein=none + - AlchemicalNetwork-034843fe04133cac65ec83f9b73fc191 jacs_set-p38: ligand_start=2c, ligand_final=none, solvent=none, cofactors=none, protein=none + - AlchemicalNetwork-034843fe04133cac65ec83f9b73fc191 jacs_set-p38: ligand_start=2ee, ligand_final=none, solvent=none, cofactors=none, protein=none + - AlchemicalNetwork-034843fe04133cac65ec83f9b73fc191 jacs_set-p38: ligand_start=2f, ligand_final=none, solvent=none, cofactors=none, protein=none - etc. diff --git a/openfe_benchmarks/scripts/prepare_metadata_submission.py b/openfe_benchmarks/scripts/prepare_metadata_submission.py index 58bc14e..5b69eca 100644 --- a/openfe_benchmarks/scripts/prepare_metadata_submission.py +++ b/openfe_benchmarks/scripts/prepare_metadata_submission.py @@ -64,12 +64,16 @@ import warnings import pprint +from pint import UnitRegistry + from gufe.archival import AlchemicalArchive from gufe import AlchemicalNetwork from gufe.transformations.transformation import Transformation from openfe_benchmarks.data import BenchmarkIndex +ureg = UnitRegistry() + def _add_value_with_keys( list_obj: list[tuple[Any, list[str]]], @@ -399,9 +403,12 @@ def _iter_nested_items(obj: Any) -> list[tuple[str, Any]]: def _quantity_to_text(value: Any) -> str: - if hasattr(value, "magnitude"): - return f"{value:#~}" - return str(value) + # For pint quantities that have been processed by pydantic model_dump() + if isinstance(value, dict) and "unit" in value: + q = value["val"] * ureg.parse_expression(value["unit"]) + return f"{q:#~}" + else: + return str(value) def _infer_benchmark_data_set_system( From 0fa17f374c06c46baada49b14b0e8dd9cbe9b52e Mon Sep 17 00:00:00 2001 From: jaclark5 Date: Fri, 12 Jun 2026 15:21:42 -0400 Subject: [PATCH 16/24] Update set order --- .../results/2026-03-18-openmm-840-qa-testing/submission.yaml | 4 ++-- openfe_benchmarks/scripts/prepare_metadata_submission.py | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/openfe_benchmarks/results/2026-03-18-openmm-840-qa-testing/submission.yaml b/openfe_benchmarks/results/2026-03-18-openmm-840-qa-testing/submission.yaml index de5a4b8..7fbde20 100644 --- a/openfe_benchmarks/results/2026-03-18-openmm-840-qa-testing/submission.yaml +++ b/openfe_benchmarks/results/2026-03-18-openmm-840-qa-testing/submission.yaml @@ -8,13 +8,13 @@ title: OpenFE RBFE - Multi-set Benchmark (2 sets, 5 systems) - 2026-03-18-openmm summary: | This submission describes the charge_annihilation_set, jacs_set RBFE benchmark (charge_annihilation_set: egfr, irak4_s2, irak4_s3; jacs_set: p38, tyk2) prepared with - ff14SB/phosaa10/tip3p_HFE_multivalent/tip3p_standard and am1bcc_at. The submission contains 160 + tip3p_HFE_multivalent/ff14SB/tip3p_standard/phosaa10 and am1bcc_at. The submission contains 160 edges, 45 unique ligands, and 1 unique proteins. Note this means the charge annihilation sets are not complete compared to what is in that system and should not be compared to other complete runs due to the missing edges. # REQUIRED: list of submission tags -tags: [rbfe, ff14SB, phosaa10, tip3p_HFE_multivalent, tip3p_standard, irak4_s3, tyk2, irak4_s2, jacs_set, charge_annihilation_set, egfr, p38, am1bcc_at, charge_change, benchmark, openfe, openmm-840] +tags: [rbfe, tip3p_HFE_multivalent, ff14SB, tip3p_standard, phosaa10, charge_annihilation_set, jacs_set, tyk2, egfr, p38, irak4_s2, irak4_s3, am1bcc_at, charge_change, benchmark, openfe, openmm-840] # REQUIRED: list of contributing authors (name, affiliation; ORCID optional) authors: diff --git a/openfe_benchmarks/scripts/prepare_metadata_submission.py b/openfe_benchmarks/scripts/prepare_metadata_submission.py index 5b69eca..ef009e4 100644 --- a/openfe_benchmarks/scripts/prepare_metadata_submission.py +++ b/openfe_benchmarks/scripts/prepare_metadata_submission.py @@ -510,7 +510,7 @@ def _build_protocol_settings(protocol_obj, calc_mode) -> dict[str, str | set(str ffs = forcefield_settings.get("forcefields") if isinstance(ffs, list) and ffs: out["forcefields"] = set( - sorted([os.path.splitext(ff.split("/")[1])[0] for ff in ffs]) + sorted(os.path.splitext(ff.split("/")[1])[0] for ff in ffs) ) partial_charge_settings = settings.get("partial_charge_settings") or {} From 09f9e065b0c027969b23ad3effd3f52a3ff55c26 Mon Sep 17 00:00:00 2001 From: jaclark5 Date: Fri, 12 Jun 2026 15:45:17 -0400 Subject: [PATCH 17/24] Update pint use --- .../submission.yaml | 28 +++++++++---------- .../scripts/prepare_metadata_submission.py | 20 +++++++------ 2 files changed, 25 insertions(+), 23 deletions(-) diff --git a/openfe_benchmarks/results/2026-03-18-openmm-840-qa-testing/submission.yaml b/openfe_benchmarks/results/2026-03-18-openmm-840-qa-testing/submission.yaml index 7fbde20..0bf0977 100644 --- a/openfe_benchmarks/results/2026-03-18-openmm-840-qa-testing/submission.yaml +++ b/openfe_benchmarks/results/2026-03-18-openmm-840-qa-testing/submission.yaml @@ -8,13 +8,13 @@ title: OpenFE RBFE - Multi-set Benchmark (2 sets, 5 systems) - 2026-03-18-openmm summary: | This submission describes the charge_annihilation_set, jacs_set RBFE benchmark (charge_annihilation_set: egfr, irak4_s2, irak4_s3; jacs_set: p38, tyk2) prepared with - tip3p_HFE_multivalent/ff14SB/tip3p_standard/phosaa10 and am1bcc_at. The submission contains 160 + tip3p_HFE_multivalent/ff14SB/phosaa10/tip3p_standard and am1bcc_at. The submission contains 160 edges, 45 unique ligands, and 1 unique proteins. Note this means the charge annihilation sets are not complete compared to what is in that system and should not be compared to other complete runs due to the missing edges. # REQUIRED: list of submission tags -tags: [rbfe, tip3p_HFE_multivalent, ff14SB, tip3p_standard, phosaa10, charge_annihilation_set, jacs_set, tyk2, egfr, p38, irak4_s2, irak4_s3, am1bcc_at, charge_change, benchmark, openfe, openmm-840] +tags: [rbfe, ff14SB, phosaa10, tip3p_HFE_multivalent, tip3p_standard, charge_annihilation_set, egfr, irak4_s2, irak4_s3, jacs_set, p38, tyk2, am1bcc_at, charge_change, benchmark, openfe, openmm-840] # REQUIRED: list of contributing authors (name, affiliation; ORCID optional) authors: @@ -72,11 +72,11 @@ protocol_settings: lambda_schedule: "" notes: | Applies to 40 edges: - - AlchemicalNetwork-034843fe04133cac65ec83f9b73fc191 jacs_set-p38: ligand_start=2aa, ligand_final=none, solvent=none, cofactors=none, protein={'unknown'} - - AlchemicalNetwork-034843fe04133cac65ec83f9b73fc191 jacs_set-p38: ligand_start=2bb, ligand_final=none, solvent=none, cofactors=none, protein={'unknown'} - - AlchemicalNetwork-034843fe04133cac65ec83f9b73fc191 jacs_set-p38: ligand_start=2c, ligand_final=none, solvent=none, cofactors=none, protein={'unknown'} - - AlchemicalNetwork-034843fe04133cac65ec83f9b73fc191 jacs_set-p38: ligand_start=2ee, ligand_final=none, solvent=none, cofactors=none, protein={'unknown'} - - AlchemicalNetwork-034843fe04133cac65ec83f9b73fc191 jacs_set-p38: ligand_start=2f, ligand_final=none, solvent=none, cofactors=none, protein={'unknown'} + - AlchemicalNetwork-034843fe04133cac65ec83f9b73fc191 jacs_set-p38: ligand_start=2aa, ligand_final=none, solvent=none, cofactors=none, protein=none + - AlchemicalNetwork-034843fe04133cac65ec83f9b73fc191 jacs_set-p38: ligand_start=2bb, ligand_final=none, solvent=none, cofactors=none, protein=none + - AlchemicalNetwork-034843fe04133cac65ec83f9b73fc191 jacs_set-p38: ligand_start=2c, ligand_final=none, solvent=none, cofactors=none, protein=none + - AlchemicalNetwork-034843fe04133cac65ec83f9b73fc191 jacs_set-p38: ligand_start=2ee, ligand_final=none, solvent=none, cofactors=none, protein=none + - AlchemicalNetwork-034843fe04133cac65ec83f9b73fc191 jacs_set-p38: ligand_start=2f, ligand_final=none, solvent=none, cofactors=none, protein=none - etc. - protocol: "RelativeHybridTopologyProtocol" notes: | @@ -85,6 +85,7 @@ protocol_settings: - lambda_settings.lambda_windows: 11 -> 22 - simulation_settings.n_replicas: 11 -> 22 - simulation_settings.production_length.val: 5.0 -> 20 + - solvation_settings.solvent_padding.val: 1.5 -> 1 Applies to 5 edges: - AlchemicalNetwork-6c2af871ad714bf6cda4368eb34fd06a charge_annihilation_set-irak4_s3: ligand_start=19charg, ligand_final=none, solvent=none, cofactors=none, protein={'unknown'} - AlchemicalNetwork-6c2af871ad714bf6cda4368eb34fd06a charge_annihilation_set-irak4_s3: ligand_start=28charg, ligand_final=none, solvent=none, cofactors=none, protein={'unknown'} @@ -98,7 +99,6 @@ protocol_settings: - lambda_settings.lambda_windows: 11 -> 22 - simulation_settings.n_replicas: 11 -> 22 - simulation_settings.production_length.val: 5.0 -> 20 - - solvation_settings.solvent_padding.val: 1 -> 1.5 Applies to 5 edges: - AlchemicalNetwork-6c2af871ad714bf6cda4368eb34fd06a charge_annihilation_set-irak4_s3: ligand_start=19charg, ligand_final=none, solvent=none, cofactors=none, protein=none - AlchemicalNetwork-6c2af871ad714bf6cda4368eb34fd06a charge_annihilation_set-irak4_s3: ligand_start=28charg, ligand_final=none, solvent=none, cofactors=none, protein=none @@ -108,12 +108,12 @@ protocol_settings: - protocol: "RelativeHybridTopologyProtocol" notes: | Detailed protocol settings differ: - - solvation_settings.solvent_padding.val: 1 -> 1.5 + - solvation_settings.solvent_padding.val: 1.5 -> 1 Applies to 40 edges: - - AlchemicalNetwork-034843fe04133cac65ec83f9b73fc191 jacs_set-p38: ligand_start=2aa, ligand_final=none, solvent=none, cofactors=none, protein=none - - AlchemicalNetwork-034843fe04133cac65ec83f9b73fc191 jacs_set-p38: ligand_start=2bb, ligand_final=none, solvent=none, cofactors=none, protein=none - - AlchemicalNetwork-034843fe04133cac65ec83f9b73fc191 jacs_set-p38: ligand_start=2c, ligand_final=none, solvent=none, cofactors=none, protein=none - - AlchemicalNetwork-034843fe04133cac65ec83f9b73fc191 jacs_set-p38: ligand_start=2ee, ligand_final=none, solvent=none, cofactors=none, protein=none - - AlchemicalNetwork-034843fe04133cac65ec83f9b73fc191 jacs_set-p38: ligand_start=2f, ligand_final=none, solvent=none, cofactors=none, protein=none + - AlchemicalNetwork-034843fe04133cac65ec83f9b73fc191 jacs_set-p38: ligand_start=2aa, ligand_final=none, solvent=none, cofactors=none, protein={'unknown'} + - AlchemicalNetwork-034843fe04133cac65ec83f9b73fc191 jacs_set-p38: ligand_start=2bb, ligand_final=none, solvent=none, cofactors=none, protein={'unknown'} + - AlchemicalNetwork-034843fe04133cac65ec83f9b73fc191 jacs_set-p38: ligand_start=2c, ligand_final=none, solvent=none, cofactors=none, protein={'unknown'} + - AlchemicalNetwork-034843fe04133cac65ec83f9b73fc191 jacs_set-p38: ligand_start=2ee, ligand_final=none, solvent=none, cofactors=none, protein={'unknown'} + - AlchemicalNetwork-034843fe04133cac65ec83f9b73fc191 jacs_set-p38: ligand_start=2f, ligand_final=none, solvent=none, cofactors=none, protein={'unknown'} - etc. diff --git a/openfe_benchmarks/scripts/prepare_metadata_submission.py b/openfe_benchmarks/scripts/prepare_metadata_submission.py index ef009e4..8ab9c80 100644 --- a/openfe_benchmarks/scripts/prepare_metadata_submission.py +++ b/openfe_benchmarks/scripts/prepare_metadata_submission.py @@ -64,7 +64,7 @@ import warnings import pprint -from pint import UnitRegistry +from pint import Quantity from gufe.archival import AlchemicalArchive from gufe import AlchemicalNetwork @@ -72,8 +72,6 @@ from openfe_benchmarks.data import BenchmarkIndex -ureg = UnitRegistry() - def _add_value_with_keys( list_obj: list[tuple[Any, list[str]]], @@ -405,7 +403,7 @@ def _iter_nested_items(obj: Any) -> list[tuple[str, Any]]: def _quantity_to_text(value: Any) -> str: # For pint quantities that have been processed by pydantic model_dump() if isinstance(value, dict) and "unit" in value: - q = value["val"] * ureg.parse_expression(value["unit"]) + q = Quantity(value["val"], value["unit"]) return f"{q:#~}" else: return str(value) @@ -781,11 +779,11 @@ def _make_tags( tags: list[str] = [] tags.append(mode) if forcefield: - tags.extend(list(set(ff for ff_set, _ in forcefield for ff in ff_set))) + tags.extend(sorted(list(set(ff for ff_set, _ in forcefield for ff in ff_set)))) if benchmark_data: - tags.extend(list(set(y for x in benchmark_data for y in x))) + tags.extend(sorted(list(set(y for x in benchmark_data for y in x)))) if partial_charge_tag: - tags.extend(list(set(x[0] for x in partial_charge_tag))) + tags.extend(sorted(list(set(x[0] for x in partial_charge_tag)))) tags.extend(user_keywords) # Deduplicate while preserving order. @@ -962,7 +960,7 @@ def _format_identifier(identifier: Any) -> str: ordered_settings = sorted( enumerate(protocol_settings_list), - key=lambda item: (len(item[1]), item[0]), + key=lambda item: (len(item[1][1]), str(item[1][0].protocol), item[0]), ) primary_index = max( @@ -1150,7 +1148,7 @@ def _render_keyed_values_yaml( ordered_settings = sorted( enumerate(value_keys), - key=lambda item: (len(item[1]), item[0]), + key=lambda item: (len(item[1][1]), str(item[1][0]), item[0]), ) lines = [f"{section_name}:"] @@ -1341,6 +1339,9 @@ def _make_zenodo_description( "partial_charges", "edges", ) + mapper_yaml = _render_keyed_values_yaml( + "mapper", metadata.mapper, "mapper", "edges" + ) # Build network keys to systems mapping section network_keys_section = "" @@ -1382,6 +1383,7 @@ def _make_zenodo_description( {forcefield_yaml} {partial_charges_yaml} +{mapper_yaml} {benchmark_system_yaml} From 5467495872d198e8f74cde62b0f1e4ae7300709c Mon Sep 17 00:00:00 2001 From: jaclark5 Date: Thu, 18 Jun 2026 12:18:10 -0400 Subject: [PATCH 18/24] Address reviewer comments --- .../submission.yaml | 63 ++++++++++-------- .../scripts/prepare_metadata_submission.py | 66 +++++++++++++------ 2 files changed, 79 insertions(+), 50 deletions(-) diff --git a/openfe_benchmarks/results/2026-03-18-openmm-840-qa-testing/submission.yaml b/openfe_benchmarks/results/2026-03-18-openmm-840-qa-testing/submission.yaml index 0bf0977..432be77 100644 --- a/openfe_benchmarks/results/2026-03-18-openmm-840-qa-testing/submission.yaml +++ b/openfe_benchmarks/results/2026-03-18-openmm-840-qa-testing/submission.yaml @@ -8,10 +8,10 @@ title: OpenFE RBFE - Multi-set Benchmark (2 sets, 5 systems) - 2026-03-18-openmm summary: | This submission describes the charge_annihilation_set, jacs_set RBFE benchmark (charge_annihilation_set: egfr, irak4_s2, irak4_s3; jacs_set: p38, tyk2) prepared with - tip3p_HFE_multivalent/ff14SB/phosaa10/tip3p_standard and am1bcc_at. The submission contains 160 - edges, 45 unique ligands, and 1 unique proteins. Note this means the charge annihilation sets are - not complete compared to what is in that system and should not be compared to other complete runs - due to the missing edges. + ff14SB/tip3p_HFE_multivalent/tip3p_standard/phosaa10 for proteins and solvents, and openff-2.3.0 + with am1bcc_at for ligands, solutes, and cofactors. The submission contains 160 edges, 45 unique + ligands. Note this means the charge annihilation sets are not complete compared to what is in that + system and should not be compared to other complete runs due to the missing edges. # REQUIRED: list of submission tags tags: [rbfe, ff14SB, phosaa10, tip3p_HFE_multivalent, tip3p_standard, charge_annihilation_set, egfr, irak4_s2, irak4_s3, jacs_set, p38, tyk2, am1bcc_at, charge_change, benchmark, openfe, openmm-840] @@ -21,12 +21,13 @@ authors: - name: Josh Horton # REQUIRED: publication/submission date (ISO 8601) -date: 2026-06-12 +date: 2026-06-18 openfe_version: TODO openmm_version: TODO openff_toolkit_version: TODO mapper: "KartografAtomMapper 1.2.0 (LSA)" forcefield: ["ff14SB", "phosaa10", "tip3p_HFE_multivalent", "tip3p_standard"] +small_molecule_forcefield: "openff-2.3.0" partial_charges: "am1bcc_at" # BenchmarkData provenance (from openfe-benchmarks planning script) with associated network key @@ -72,48 +73,52 @@ protocol_settings: lambda_schedule: "" notes: | Applies to 40 edges: - - AlchemicalNetwork-034843fe04133cac65ec83f9b73fc191 jacs_set-p38: ligand_start=2aa, ligand_final=none, solvent=none, cofactors=none, protein=none - - AlchemicalNetwork-034843fe04133cac65ec83f9b73fc191 jacs_set-p38: ligand_start=2bb, ligand_final=none, solvent=none, cofactors=none, protein=none - - AlchemicalNetwork-034843fe04133cac65ec83f9b73fc191 jacs_set-p38: ligand_start=2c, ligand_final=none, solvent=none, cofactors=none, protein=none - - AlchemicalNetwork-034843fe04133cac65ec83f9b73fc191 jacs_set-p38: ligand_start=2ee, ligand_final=none, solvent=none, cofactors=none, protein=none - - AlchemicalNetwork-034843fe04133cac65ec83f9b73fc191 jacs_set-p38: ligand_start=2f, ligand_final=none, solvent=none, cofactors=none, protein=none + - AlchemicalNetwork-034843fe04133cac65ec83f9b73fc191 jacs_set-p38: ligand_start=2aa, ligand_final=none, solvent=none, cofactors=none, protein={'unknown'} + - AlchemicalNetwork-034843fe04133cac65ec83f9b73fc191 jacs_set-p38: ligand_start=2bb, ligand_final=none, solvent=none, cofactors=none, protein={'unknown'} + - AlchemicalNetwork-034843fe04133cac65ec83f9b73fc191 jacs_set-p38: ligand_start=2c, ligand_final=none, solvent=none, cofactors=none, protein={'unknown'} + - AlchemicalNetwork-034843fe04133cac65ec83f9b73fc191 jacs_set-p38: ligand_start=2ee, ligand_final=none, solvent=none, cofactors=none, protein={'unknown'} + - AlchemicalNetwork-034843fe04133cac65ec83f9b73fc191 jacs_set-p38: ligand_start=2f, ligand_final=none, solvent=none, cofactors=none, protein={'unknown'} - etc. - protocol: "RelativeHybridTopologyProtocol" + production_time: "20 ns" + lambda_windows: "22" notes: | Detailed protocol settings differ: - alchemical_settings.explicit_charge_correction: False -> True - lambda_settings.lambda_windows: 11 -> 22 - simulation_settings.n_replicas: 11 -> 22 - - simulation_settings.production_length.val: 5.0 -> 20 - - solvation_settings.solvent_padding.val: 1.5 -> 1 + - simulation_settings.production_length: 5.0 -> 20 + - solvation_settings.solvent_padding: 1 -> 1.5 Applies to 5 edges: - - AlchemicalNetwork-6c2af871ad714bf6cda4368eb34fd06a charge_annihilation_set-irak4_s3: ligand_start=19charg, ligand_final=none, solvent=none, cofactors=none, protein={'unknown'} - - AlchemicalNetwork-6c2af871ad714bf6cda4368eb34fd06a charge_annihilation_set-irak4_s3: ligand_start=28charg, ligand_final=none, solvent=none, cofactors=none, protein={'unknown'} - - AlchemicalNetwork-b15bcfadae8a358cee9a7dcb9f5445d6 charge_annihilation_set-egfr: ligand_start=21, ligand_final=none, solvent=none, cofactors=none, protein={'unknown'} - - AlchemicalNetwork-e516aadebadf1c036b5930cf7838c6d9 charge_annihilation_set-irak4_s2: ligand_start=17charg, ligand_final=none, solvent=none, cofactors=none, protein={'unknown'} - - AlchemicalNetwork-e516aadebadf1c036b5930cf7838c6d9 charge_annihilation_set-irak4_s2: ligand_start=6, ligand_final=none, solvent=none, cofactors=none, protein={'unknown'} + - AlchemicalNetwork-6c2af871ad714bf6cda4368eb34fd06a charge_annihilation_set-irak4_s3: ligand_start=19charg, ligand_final=none, solvent=none, cofactors=none, protein=none + - AlchemicalNetwork-6c2af871ad714bf6cda4368eb34fd06a charge_annihilation_set-irak4_s3: ligand_start=28charg, ligand_final=none, solvent=none, cofactors=none, protein=none + - AlchemicalNetwork-b15bcfadae8a358cee9a7dcb9f5445d6 charge_annihilation_set-egfr: ligand_start=21, ligand_final=none, solvent=none, cofactors=none, protein=none + - AlchemicalNetwork-e516aadebadf1c036b5930cf7838c6d9 charge_annihilation_set-irak4_s2: ligand_start=17charg, ligand_final=none, solvent=none, cofactors=none, protein=none + - AlchemicalNetwork-e516aadebadf1c036b5930cf7838c6d9 charge_annihilation_set-irak4_s2: ligand_start=6, ligand_final=none, solvent=none, cofactors=none, protein=none - protocol: "RelativeHybridTopologyProtocol" + production_time: "20 ns" + lambda_windows: "22" notes: | Detailed protocol settings differ: - alchemical_settings.explicit_charge_correction: False -> True - lambda_settings.lambda_windows: 11 -> 22 - simulation_settings.n_replicas: 11 -> 22 - - simulation_settings.production_length.val: 5.0 -> 20 + - simulation_settings.production_length: 5.0 -> 20 Applies to 5 edges: - - AlchemicalNetwork-6c2af871ad714bf6cda4368eb34fd06a charge_annihilation_set-irak4_s3: ligand_start=19charg, ligand_final=none, solvent=none, cofactors=none, protein=none - - AlchemicalNetwork-6c2af871ad714bf6cda4368eb34fd06a charge_annihilation_set-irak4_s3: ligand_start=28charg, ligand_final=none, solvent=none, cofactors=none, protein=none - - AlchemicalNetwork-b15bcfadae8a358cee9a7dcb9f5445d6 charge_annihilation_set-egfr: ligand_start=21, ligand_final=none, solvent=none, cofactors=none, protein=none - - AlchemicalNetwork-e516aadebadf1c036b5930cf7838c6d9 charge_annihilation_set-irak4_s2: ligand_start=17charg, ligand_final=none, solvent=none, cofactors=none, protein=none - - AlchemicalNetwork-e516aadebadf1c036b5930cf7838c6d9 charge_annihilation_set-irak4_s2: ligand_start=6, ligand_final=none, solvent=none, cofactors=none, protein=none + - AlchemicalNetwork-6c2af871ad714bf6cda4368eb34fd06a charge_annihilation_set-irak4_s3: ligand_start=19charg, ligand_final=none, solvent=none, cofactors=none, protein={'unknown'} + - AlchemicalNetwork-6c2af871ad714bf6cda4368eb34fd06a charge_annihilation_set-irak4_s3: ligand_start=28charg, ligand_final=none, solvent=none, cofactors=none, protein={'unknown'} + - AlchemicalNetwork-b15bcfadae8a358cee9a7dcb9f5445d6 charge_annihilation_set-egfr: ligand_start=21, ligand_final=none, solvent=none, cofactors=none, protein={'unknown'} + - AlchemicalNetwork-e516aadebadf1c036b5930cf7838c6d9 charge_annihilation_set-irak4_s2: ligand_start=17charg, ligand_final=none, solvent=none, cofactors=none, protein={'unknown'} + - AlchemicalNetwork-e516aadebadf1c036b5930cf7838c6d9 charge_annihilation_set-irak4_s2: ligand_start=6, ligand_final=none, solvent=none, cofactors=none, protein={'unknown'} - protocol: "RelativeHybridTopologyProtocol" notes: | Detailed protocol settings differ: - - solvation_settings.solvent_padding.val: 1.5 -> 1 + - solvation_settings.solvent_padding: 1 -> 1.5 Applies to 40 edges: - - AlchemicalNetwork-034843fe04133cac65ec83f9b73fc191 jacs_set-p38: ligand_start=2aa, ligand_final=none, solvent=none, cofactors=none, protein={'unknown'} - - AlchemicalNetwork-034843fe04133cac65ec83f9b73fc191 jacs_set-p38: ligand_start=2bb, ligand_final=none, solvent=none, cofactors=none, protein={'unknown'} - - AlchemicalNetwork-034843fe04133cac65ec83f9b73fc191 jacs_set-p38: ligand_start=2c, ligand_final=none, solvent=none, cofactors=none, protein={'unknown'} - - AlchemicalNetwork-034843fe04133cac65ec83f9b73fc191 jacs_set-p38: ligand_start=2ee, ligand_final=none, solvent=none, cofactors=none, protein={'unknown'} - - AlchemicalNetwork-034843fe04133cac65ec83f9b73fc191 jacs_set-p38: ligand_start=2f, ligand_final=none, solvent=none, cofactors=none, protein={'unknown'} + - AlchemicalNetwork-034843fe04133cac65ec83f9b73fc191 jacs_set-p38: ligand_start=2aa, ligand_final=none, solvent=none, cofactors=none, protein=none + - AlchemicalNetwork-034843fe04133cac65ec83f9b73fc191 jacs_set-p38: ligand_start=2bb, ligand_final=none, solvent=none, cofactors=none, protein=none + - AlchemicalNetwork-034843fe04133cac65ec83f9b73fc191 jacs_set-p38: ligand_start=2c, ligand_final=none, solvent=none, cofactors=none, protein=none + - AlchemicalNetwork-034843fe04133cac65ec83f9b73fc191 jacs_set-p38: ligand_start=2ee, ligand_final=none, solvent=none, cofactors=none, protein=none + - AlchemicalNetwork-034843fe04133cac65ec83f9b73fc191 jacs_set-p38: ligand_start=2f, ligand_final=none, solvent=none, cofactors=none, protein=none - etc. diff --git a/openfe_benchmarks/scripts/prepare_metadata_submission.py b/openfe_benchmarks/scripts/prepare_metadata_submission.py index 8ab9c80..5166b08 100644 --- a/openfe_benchmarks/scripts/prepare_metadata_submission.py +++ b/openfe_benchmarks/scripts/prepare_metadata_submission.py @@ -98,8 +98,10 @@ class ProtocolSettingsInfo: temperature: str pressure: str lambda_functions: str - small_molecule_forcefield: str - forcefields: set[str] + small_molecule_forcefield: ( + str # mirrors definition in OpenMMSystemGeneratorFFSettings + ) + forcefields: set[str] # mirrors definition in OpenMMSystemGeneratorFFSettings partial_charges: str lambda_windows: str = "" lambda_schedule: str = "" @@ -217,9 +219,7 @@ class AutoMetadata: mapper: list[tuple[str, list[str]]] = field(default_factory=list) protocols: list[tuple[str, list[str]]] = field(default_factory=list) forcefield: list[tuple[str, list[str]]] = field(default_factory=list) - small_molecule_force_field: list[tuple[str, list[str]]] = field( - default_factory=list - ) + small_molecule_forcefield: list[tuple[str, list[str]]] = field(default_factory=list) partial_charges: list[tuple[str, list[str]]] = field(default_factory=list) protocol_settings_list: list[tuple[ProtocolSettingsInfo, list[str]]] = field( default_factory=list @@ -235,7 +235,7 @@ def update_from_system_info(self) -> None: self.mapper = [] self.protocols = [] self.forcefield = [] - self.small_molecule_force_field = [] + self.small_molecule_forcefield = [] self.partial_charges = [] self.protocol_settings_list = [] @@ -260,7 +260,7 @@ def update_from_system_info(self) -> None: ) if protocol_settings.small_molecule_forcefield: _add_value_with_keys( - self.small_molecule_force_field, + self.small_molecule_forcefield, protocol_settings.small_molecule_forcefield, keys, ) @@ -824,6 +824,10 @@ def _build_content_summary( if not field_info: field_info = "an unspecified force field" + small_mol_ff_info = "/".join(set(x[0] for x in metadata.small_molecule_forcefield)) + if not small_mol_ff_info: + small_mol_ff_info = "an unspecified small molecule force field" + charge_info = "/".join(set(x[0] for x in metadata.partial_charges)) if not charge_info: charge_info = "an unspecified partial charges" @@ -882,13 +886,13 @@ def _build_content_summary( ) if len(unique_sets) > 1: summary_parts = [ - f"This submission describes the {subject} RBFE benchmark ({systems_desc}) prepared with {field_info} and {charge_info}.", - f"The submission contains {metadata.n_transformations} edges, {len(all_structures['ligands'])} unique ligands, and {len(all_structures['proteins'])} unique proteins.", + f"This submission describes the {subject} RBFE benchmark ({systems_desc}) prepared with {field_info} for proteins and solvents, and {small_mol_ff_info} with {charge_info} for ligands, solutes, and cofactors.", + f"The submission contains {metadata.n_transformations} edges, {len(all_structures['ligands'])} unique ligands.", ] else: summary_parts = [ - f"This submission describes the {subject} RBFE benchmark prepared with {field_info} and {charge_info}.", - f"The network contains {metadata.n_transformations} edges across {len(all_structures['ligands'])} unique ligands and {len(all_structures['proteins'])} unique proteins.", + f"This submission describes the {subject} RBFE benchmark prepared with {field_info} for proteins and solvents, and {small_mol_ff_info} with {charge_info} for ligands, solutes, and cofactors.", + f"The network contains {metadata.n_transformations} edges across {len(all_structures['ligands'])} unique ligands.", ] if systems_with_cofactors: summary_parts.append( @@ -897,12 +901,12 @@ def _build_content_summary( else: if len(unique_sets) > 1: summary_parts = [ - f"This submission describes the {subject} ASFE benchmark ({systems_desc}) prepared with {field_info} and {charge_info}.", + f"This submission describes the {subject} ASFE benchmark ({systems_desc}) prepared with {field_info} for solvents, and {small_mol_ff_info} with {charge_info} for solutes and cofactors.", f"The submission contains {metadata.n_transformations} edges, {len(all_structures['ligands'])} unique solutes, and {len(all_structures['solvents'])} unique solvents.", ] else: summary_parts = [ - f"This submission describes the {subject} ASFE benchmark prepared with {field_info} and {charge_info}.", + f"This submission describes the {subject} ASFE benchmark prepared with {field_info} for solvents, and {small_mol_ff_info} with {charge_info} for solutes and cofactors.", f"The archive contains {metadata.n_transformations} edges across {len(all_structures['ligands'])} unique solutes and {len(all_structures['solvents'])} unique solvents.", ] @@ -976,7 +980,11 @@ def _parse_full_protocol_settings(value: str) -> Any: return None def _format_path(path: list[str]) -> str: - return ".".join(path) + formatted = ".".join(path) + # Strip .val suffix for pint Quantities + if formatted.endswith(".val"): + formatted = formatted[:-4] + return formatted def _compare_full_protocol_settings( base: ProtocolSettingsInfo, other: ProtocolSettingsInfo @@ -1096,17 +1104,26 @@ def _full_protocol_setting_notes( items = [json.dumps(str(x)) for x in sorted(ff_value)] if is_primary: output_lines.append(f" {field_name}: [{', '.join(items)}]") + else: + # For non-primary, output if different from primary + primary_ff_value = getattr(primary_settings, field_name, None) + if primary_ff_value != ff_value: + output_lines.append( + f" {field_name}: [{', '.join(items)}]" + ) continue if is_primary: output_lines.append( f" {field_name}: {_format_value(getattr(protocol_settings, field_name))}" ) - elif field_name == "protocol": - output_lines.append( - f" {field_name}: {_format_value(getattr(protocol_settings, field_name))}" - ) - - # For non-primary protocols, only protocol and notes are listed. + else: + # For non-primary protocols, output field if it differs from primary + current_value = getattr(protocol_settings, field_name) + primary_value = getattr(primary_settings, field_name, None) + if current_value != primary_value: + output_lines.append( + f" {field_name}: {_format_value(current_value)}" + ) return "\n".join(output_lines) + "\n" @@ -1242,6 +1259,12 @@ def _make_submission_yaml( forcefield_yaml = _render_keyed_values_yaml( "forcefield", metadata.forcefield, "forcefield", "edges" ) + small_molecule_forcefield_yaml = _render_keyed_values_yaml( + "small_molecule_forcefield", + metadata.small_molecule_forcefield, + "small_molecule_forcefield", + "edges", + ) partial_charges_yaml = _render_keyed_values_yaml( "partial_charges", metadata.partial_charges, "partial_charges", "edges" ) @@ -1270,6 +1293,7 @@ def _make_submission_yaml( {openff_toolkit_version_yaml} {mapper_yaml} {forcefield_yaml} +{small_molecule_forcefield_yaml} {partial_charges_yaml} {benchmark_system_yaml} @@ -1525,7 +1549,7 @@ def process_network( "openff_toolkit_version", "forcefield", "partial_charges", - "small_molecule_force_field", + "small_molecule_forcefield", "protocols", "protocol_settings_list", "mapper", From cba07feb85d1389100b9391564bf3f865bc26b10 Mon Sep 17 00:00:00 2001 From: jaclark5 Date: Fri, 19 Jun 2026 08:47:21 -0400 Subject: [PATCH 19/24] Update descriptor in zenodo --- .../2026-03-18-openmm-840-qa-testing/submission.yaml | 4 ++-- openfe_benchmarks/scripts/prepare_metadata_submission.py | 7 +++++++ 2 files changed, 9 insertions(+), 2 deletions(-) diff --git a/openfe_benchmarks/results/2026-03-18-openmm-840-qa-testing/submission.yaml b/openfe_benchmarks/results/2026-03-18-openmm-840-qa-testing/submission.yaml index 432be77..57efbd5 100644 --- a/openfe_benchmarks/results/2026-03-18-openmm-840-qa-testing/submission.yaml +++ b/openfe_benchmarks/results/2026-03-18-openmm-840-qa-testing/submission.yaml @@ -8,7 +8,7 @@ title: OpenFE RBFE - Multi-set Benchmark (2 sets, 5 systems) - 2026-03-18-openmm summary: | This submission describes the charge_annihilation_set, jacs_set RBFE benchmark (charge_annihilation_set: egfr, irak4_s2, irak4_s3; jacs_set: p38, tyk2) prepared with - ff14SB/tip3p_HFE_multivalent/tip3p_standard/phosaa10 for proteins and solvents, and openff-2.3.0 + ff14SB/tip3p_standard/phosaa10/tip3p_HFE_multivalent for proteins and solvents, and openff-2.3.0 with am1bcc_at for ligands, solutes, and cofactors. The submission contains 160 edges, 45 unique ligands. Note this means the charge annihilation sets are not complete compared to what is in that system and should not be compared to other complete runs due to the missing edges. @@ -21,7 +21,7 @@ authors: - name: Josh Horton # REQUIRED: publication/submission date (ISO 8601) -date: 2026-06-18 +date: 2026-06-19 openfe_version: TODO openmm_version: TODO openff_toolkit_version: TODO diff --git a/openfe_benchmarks/scripts/prepare_metadata_submission.py b/openfe_benchmarks/scripts/prepare_metadata_submission.py index 5166b08..7382e7d 100644 --- a/openfe_benchmarks/scripts/prepare_metadata_submission.py +++ b/openfe_benchmarks/scripts/prepare_metadata_submission.py @@ -1366,6 +1366,12 @@ def _make_zenodo_description( mapper_yaml = _render_keyed_values_yaml( "mapper", metadata.mapper, "mapper", "edges" ) + small_molecule_forcefield_yaml = _render_keyed_values_yaml( + "small_molecule_forcefield", + metadata.small_molecule_forcefield, + "small_molecule_forcefield", + "edges", + ) # Build network keys to systems mapping section network_keys_section = "" @@ -1406,6 +1412,7 @@ def _make_zenodo_description( ## Recommended descriptors {forcefield_yaml} +{small_molecule_forcefield_yaml} {partial_charges_yaml} {mapper_yaml} From bab9f3c89bc8d6e81e2a4623a39622a3b9a9a6ee Mon Sep 17 00:00:00 2001 From: Josh Horton Date: Fri, 19 Jun 2026 16:51:44 +0100 Subject: [PATCH 20/24] make charges user defined, update yaml file --- .../submission.yaml | 18 +++++++++--------- .../scripts/prepare_metadata_submission.py | 18 +++++++----------- 2 files changed, 16 insertions(+), 20 deletions(-) diff --git a/openfe_benchmarks/results/2026-03-18-openmm-840-qa-testing/submission.yaml b/openfe_benchmarks/results/2026-03-18-openmm-840-qa-testing/submission.yaml index 57efbd5..5ce934a 100644 --- a/openfe_benchmarks/results/2026-03-18-openmm-840-qa-testing/submission.yaml +++ b/openfe_benchmarks/results/2026-03-18-openmm-840-qa-testing/submission.yaml @@ -9,12 +9,12 @@ summary: | This submission describes the charge_annihilation_set, jacs_set RBFE benchmark (charge_annihilation_set: egfr, irak4_s2, irak4_s3; jacs_set: p38, tyk2) prepared with ff14SB/tip3p_standard/phosaa10/tip3p_HFE_multivalent for proteins and solvents, and openff-2.3.0 - with am1bcc_at for ligands, solutes, and cofactors. The submission contains 160 edges, 45 unique + with nagl_openff-gnn-am1bcc-1.0.0.pt for ligands, solutes, and cofactors. The submission contains 160 edges, 45 unique ligands. Note this means the charge annihilation sets are not complete compared to what is in that system and should not be compared to other complete runs due to the missing edges. # REQUIRED: list of submission tags -tags: [rbfe, ff14SB, phosaa10, tip3p_HFE_multivalent, tip3p_standard, charge_annihilation_set, egfr, irak4_s2, irak4_s3, jacs_set, p38, tyk2, am1bcc_at, charge_change, benchmark, openfe, openmm-840] +tags: [rbfe, ff14SB, phosaa10, tip3p_HFE_multivalent, tip3p_standard, charge_annihilation_set, egfr, irak4_s2, irak4_s3, jacs_set, p38, tyk2, nagl_openff-gnn-am1bcc-1.0.0.pt, charge_change, benchmark, openfe, openmm-840] # REQUIRED: list of contributing authors (name, affiliation; ORCID optional) authors: @@ -22,13 +22,13 @@ authors: # REQUIRED: publication/submission date (ISO 8601) date: 2026-06-19 -openfe_version: TODO -openmm_version: TODO -openff_toolkit_version: TODO +openfe_version: 1.9.1 +openmm_version: 8.4.0 +openff_toolkit_version: 0.18.0 mapper: "KartografAtomMapper 1.2.0 (LSA)" forcefield: ["ff14SB", "phosaa10", "tip3p_HFE_multivalent", "tip3p_standard"] small_molecule_forcefield: "openff-2.3.0" -partial_charges: "am1bcc_at" +partial_charges: "nagl_openff-gnn-am1bcc-1.0.0.pt" # BenchmarkData provenance (from openfe-benchmarks planning script) with associated network key benchmark_data: @@ -47,8 +47,8 @@ results: computational_results.json # REQUIRED: long-term archive pointer (at least doi or url) archive: - doi: TODO add DOI - archive_provider: TODO add archive provider + doi: https://doi.org/10.5281/zenodo.20643703 + archive_provider: zenodo # REQUIRED: license for the submission license: CC-BY-4.0 @@ -61,7 +61,7 @@ protocol_settings: pressure: "1 bar" forcefields: ["ff14SB", "phosaa10", "tip3p_HFE_multivalent", "tip3p_standard"] small_molecule_forcefield: "openff-2.3.0" - partial_charges: "am1bcc_at" + partial_charges: "nagl_openff-gnn-am1bcc-1.0.0.pt" equilibration_time: "1.0 ns" production_time: "5.0 ns" vacuum_equilibration_time: diff --git a/openfe_benchmarks/scripts/prepare_metadata_submission.py b/openfe_benchmarks/scripts/prepare_metadata_submission.py index 7382e7d..7c79f60 100644 --- a/openfe_benchmarks/scripts/prepare_metadata_submission.py +++ b/openfe_benchmarks/scripts/prepare_metadata_submission.py @@ -352,7 +352,7 @@ def _default_submission_id(network_key: str) -> str: def _generate_title( mode: str, - benchmark_set_systems: list[tuple(str, str)], + benchmark_set_systems: list[tuple[str, str]], submission_id: str, ) -> str: """ @@ -1265,9 +1265,8 @@ def _make_submission_yaml( "small_molecule_forcefield", "edges", ) - partial_charges_yaml = _render_keyed_values_yaml( - "partial_charges", metadata.partial_charges, "partial_charges", "edges" - ) + # make the user add the charges manually as ligand charges might take priority over those in the protocol settings + partial_charges_yaml = "partial_charges: TODO" return f"""# REQUIRED: unique, kebab-case identifier for this submission submission_id: {submission_id} @@ -1357,12 +1356,9 @@ def _make_zenodo_description( forcefield_yaml = _render_keyed_values_yaml( "forcefield", metadata.forcefield, "forcefield", "edges" ) - partial_charges_yaml = _render_keyed_values_yaml( - "partial_charges", - metadata.partial_charges, - "partial_charges", - "edges", - ) + + # make the user add the charges manually as ligand charges might take priority over those in the protocol settings + partial_charges_yaml = "partial_charges: TODO" mapper_yaml = _render_keyed_values_yaml( "mapper", metadata.mapper, "mapper", "edges" ) @@ -1608,7 +1604,7 @@ def process_network( tags_final = _make_tags( mode=mode, forcefield=merged_metadata.forcefield, - partial_charge_tag=merged_metadata.partial_charges, + partial_charge_tag=[], benchmark_data=merged_metadata.benchmark_sets_systems, user_keywords=tags_list, ) From 20dbef3a86b321f53563e715e7cb2038d74e1f5f Mon Sep 17 00:00:00 2001 From: jaclark5 Date: Mon, 22 Jun 2026 11:40:30 -0400 Subject: [PATCH 21/24] Fix charge discrepancy --- .../scripts/prepare_metadata_submission.py | 258 +++++++++++++++++- 1 file changed, 256 insertions(+), 2 deletions(-) diff --git a/openfe_benchmarks/scripts/prepare_metadata_submission.py b/openfe_benchmarks/scripts/prepare_metadata_submission.py index 7c79f60..3093480 100644 --- a/openfe_benchmarks/scripts/prepare_metadata_submission.py +++ b/openfe_benchmarks/scripts/prepare_metadata_submission.py @@ -87,6 +87,103 @@ def _add_value_with_keys( list_obj.append((value, list(keys))) +def _extract_charge_provenance(component_dict: dict) -> dict | None: + """Extract and parse charge_provenance from component molprops. + + Parameters + ---------- + component_dict : dict + Component dictionary (after .to_dict() conversion) + + Returns + ------- + dict | None + Parsed charge_provenance dict if present and valid, None otherwise + """ + molprops = component_dict.get("molprops") or {} + if not isinstance(molprops, dict): + return None + + charge_provenance_str = molprops.get("charge_provenance") + if not charge_provenance_str: + return None + + try: + # charge_provenance is stored as a JSON string in molprops + provenance_dict = json.loads(charge_provenance_str) + return provenance_dict + except (json.JSONDecodeError, TypeError) as e: + warnings.warn( + f"Failed to parse charge_provenance for component {component_dict.get('name', 'unknown')}: {e}", + category=UserWarning, + ) + return None + + +def _extract_charges_from_transformation(trans, calc_mode: str) -> dict: + """Extract charge_provenance from ligand and cofactor components in both states. + + Parameters + ---------- + trans : Transformation + Transformation object + calc_mode : str + Calculation mode ("rbfe" or "asfe") + + Returns + ------- + dict + Structure: { + "stateA": { + "ligand": {...provenance dict...} or None, + "cofactor": {...provenance dict...} or None + }, + "stateB": { + "ligand": {...provenance dict...} or None, + "cofactor": {...provenance dict...} or None + } + } + """ + result = { + "stateA": {"ligand": None, "cofactor": None}, + "stateB": {"ligand": None, "cofactor": None}, + } + + for state_key in ("stateA", "stateB"): + chemical_system = getattr(trans, state_key) + if not chemical_system: + continue + + for label, component in chemical_system.components.items(): + qualname = str(type(component)).rstrip("'>").split(".")[-1] + component_dict = component.to_dict() + + # Determine component type based on label and qualname + if calc_mode == "asfe": + # In ASFE mode, we care about "solute" (ligand) + if "solute" in label or qualname == "SmallMoleculeComponent": + provenance = _extract_charge_provenance(component_dict) + if provenance: + result[state_key]["ligand"] = provenance + elif calc_mode == "rbfe": + # In RBFE mode, we care about "ligand" and "cofactor" + if "ligand" in label: + provenance = _extract_charge_provenance(component_dict) + if provenance: + result[state_key]["ligand"] = provenance + elif "cofactor" in label: + provenance = _extract_charge_provenance(component_dict) + if provenance: + result[state_key]["cofactor"] = provenance + elif qualname == "SmallMoleculeComponent" and "solvent" not in label: + # Non-solvent small molecules that are not explicit ligands are treated as cofactors + provenance = _extract_charge_provenance(component_dict) + if provenance: + result[state_key]["cofactor"] = provenance + + return result + + @dataclass class ProtocolSettingsInfo: """Container for protocol settings with source metadata.""" @@ -546,6 +643,121 @@ def _build_protocol_settings(protocol_obj, calc_mode) -> dict[str, str | set(str return out +def _charge_method_from_provenance(provenance_dict: dict | None) -> str: + """Construct normalized charge method tag from charge_provenance dict. + + Maps provenance charge_method to standardized tags matching _normalize_partial_charge_info(): + - am1bcc_at (AM1BCC with AmberTools) + - am1bcc_oe (AM1BCC with OpenEye) + - am1bccelf10_oe (AM1BCC ELF10 with OpenEye) + - nagl_off (NAGL with OpenFF Toolkit) + + For nagl_off, appends the model name if available in provenance. + + Parameters + ---------- + provenance_dict : dict | None + Provenance dict extracted from charge_provenance molprop + + Returns + ------- + str + Normalized method tag, e.g., "nagl_off_openff-gnn-am1bcc-1.0.0.pt" or "am1bccelf10_oe", + or empty string if provenance_dict is None + """ + if not provenance_dict or not isinstance(provenance_dict, dict): + return "" + + charge_method = provenance_dict.get("charge_method", "").lower().strip() + if not charge_method: + return "" + + # Map method names to standardized tags + if "nagl" in charge_method: + nagl_model = provenance_dict.get("nagl_model", "").strip() + if nagl_model: + nagl_model = nagl_model.split("/")[-1].split("\\")[-1] + return f"nagl_off_{nagl_model}" + return "nagl_off" + elif "am1bccelf10" in charge_method or "elf10" in charge_method: + return "am1bccelf10_oe" + elif "am1bcc" in charge_method: + # Determine toolkit backend from provenance + if "ambertools_version" in provenance_dict: + return "am1bcc_at" + elif "oeomega" in provenance_dict or "oequacpac" in provenance_dict: + return "am1bcc_oe" + else: + # Fallback: try to infer from charge_method name + if "ambertools" in charge_method.lower(): + return "am1bcc_at" + else: + return "am1bcc_oe" + else: + # Fallback: normalize any other method name + normalized = re.sub(r"[^a-z0-9._-]+", "_", charge_method).strip("_") + return normalized + + +def _reconcile_component_charges( + ligand_provenance: dict | None, + cofactor_provenance: dict | None, + trans_name: str, + state_label: str, +) -> tuple[str, bool]: + """Compare ligand vs cofactor charges and return preferred method with warning flag. + + Logic: + - If both present and differ: issue warning, return ligand method, has_warning=True + - If both present and same: return method, has_warning=False + - If only ligand present: return ligand method, has_warning=False + - If only cofactor present: return cofactor method, has_warning=False + - If neither: return empty string, has_warning=False + + Parameters + ---------- + ligand_provenance : dict | None + Provenance dict for ligand component + cofactor_provenance : dict | None + Provenance dict for cofactor component + trans_name : str + Transformation name for warning message + state_label : str + State label (e.g., "stateA", "stateB") for warning message + + Returns + ------- + tuple[str, bool] + (preferred_charge_method_tag, has_warning) + """ + ligand_method = ( + _charge_method_from_provenance(ligand_provenance) if ligand_provenance else "" + ) + cofactor_method = ( + _charge_method_from_provenance(cofactor_provenance) + if cofactor_provenance + else "" + ) + + if ligand_method and cofactor_method: + if ligand_method != cofactor_method: + warnings.warn( + f"Ligand and cofactor in {trans_name}[{state_label}] have different charges: " + f"ligand={ligand_method}, cofactor={cofactor_method}. Using ligand charges.", + category=UserWarning, + ) + return ligand_method, True + else: + # Both present and same + return ligand_method, False + elif ligand_method: + return ligand_method, False + elif cofactor_method: + return cofactor_method, False + else: + return "", False + + def _component_name(component) -> str: molprops = component.get("molprops") or {} if isinstance(molprops, dict): @@ -670,9 +882,51 @@ def _extract_auto_metadata( protein=system_info["proteins"], ) - protocol_info = ProtocolSettingsInfo( - **_build_protocol_settings(trans.protocol, metadata.calculation_mode) + trans_charge_provenance = _extract_charges_from_transformation( + trans, metadata.calculation_mode + ) + + # Detect ligand-cofactor charge mismatches + molprops_charge_methods = {} + for state_key in ("stateA", "stateB"): + charge_method, _ = _reconcile_component_charges( + trans_charge_provenance[state_key]["ligand"], + trans_charge_provenance[state_key]["cofactor"], + trans.name, + state_key, + ) + molprops_charge_methods[state_key] = charge_method + + # Use the first available charge method from molprops (prefer stateA, fallback to stateB) + molprops_charge_method = ( + molprops_charge_methods["stateA"] + or molprops_charge_methods["stateB"] + or None + ) + + # Build protocol settings + protocol_settings_dict = _build_protocol_settings( + trans.protocol, metadata.calculation_mode ) + if molprops_charge_method: # preferred source + protocol_settings_dict["partial_charges"] = molprops_charge_method + elif not protocol_settings_dict.get("partial_charges"): + protocol_settings_dict["partial_charges"] = "TODO" + else: + has_provenance = any( + trans_charge_provenance[state_key]["ligand"] + or trans_charge_provenance[state_key]["cofactor"] + for state_key in ("stateA", "stateB") + ) + if not has_provenance: + warnings.warn( + f"Transformation '{trans.name}' lacks charge_provenance in molprops. " + f"Falling back to protocol settings: {protocol_settings_dict['partial_charges']}. " + f"This may indicate molecules were not charged via charge_molecules.py.", + category=UserWarning, + ) + + protocol_info = ProtocolSettingsInfo(**protocol_settings_dict) metadata.system_info_dict[benchmark_set_system].add_protocol_settings( protocol_info, key ) From 2bbeeadea1d16b0cc07e6c9ea16a2353db9af359 Mon Sep 17 00:00:00 2001 From: jaclark5 Date: Mon, 22 Jun 2026 11:41:28 -0400 Subject: [PATCH 22/24] Fix alchemical trans ordering --- .../submission.yaml | 22 +++++++++---------- .../scripts/prepare_metadata_submission.py | 10 +++++---- 2 files changed, 17 insertions(+), 15 deletions(-) diff --git a/openfe_benchmarks/results/2026-03-18-openmm-840-qa-testing/submission.yaml b/openfe_benchmarks/results/2026-03-18-openmm-840-qa-testing/submission.yaml index 5ce934a..941fefd 100644 --- a/openfe_benchmarks/results/2026-03-18-openmm-840-qa-testing/submission.yaml +++ b/openfe_benchmarks/results/2026-03-18-openmm-840-qa-testing/submission.yaml @@ -88,13 +88,12 @@ protocol_settings: - lambda_settings.lambda_windows: 11 -> 22 - simulation_settings.n_replicas: 11 -> 22 - simulation_settings.production_length: 5.0 -> 20 - - solvation_settings.solvent_padding: 1 -> 1.5 Applies to 5 edges: - - AlchemicalNetwork-6c2af871ad714bf6cda4368eb34fd06a charge_annihilation_set-irak4_s3: ligand_start=19charg, ligand_final=none, solvent=none, cofactors=none, protein=none - - AlchemicalNetwork-6c2af871ad714bf6cda4368eb34fd06a charge_annihilation_set-irak4_s3: ligand_start=28charg, ligand_final=none, solvent=none, cofactors=none, protein=none - - AlchemicalNetwork-b15bcfadae8a358cee9a7dcb9f5445d6 charge_annihilation_set-egfr: ligand_start=21, ligand_final=none, solvent=none, cofactors=none, protein=none - - AlchemicalNetwork-e516aadebadf1c036b5930cf7838c6d9 charge_annihilation_set-irak4_s2: ligand_start=17charg, ligand_final=none, solvent=none, cofactors=none, protein=none - - AlchemicalNetwork-e516aadebadf1c036b5930cf7838c6d9 charge_annihilation_set-irak4_s2: ligand_start=6, ligand_final=none, solvent=none, cofactors=none, protein=none + - AlchemicalNetwork-6c2af871ad714bf6cda4368eb34fd06a charge_annihilation_set-irak4_s3: ligand_start=19charg, ligand_final=none, solvent=none, cofactors=none, protein={'unknown'} + - AlchemicalNetwork-6c2af871ad714bf6cda4368eb34fd06a charge_annihilation_set-irak4_s3: ligand_start=28charg, ligand_final=none, solvent=none, cofactors=none, protein={'unknown'} + - AlchemicalNetwork-b15bcfadae8a358cee9a7dcb9f5445d6 charge_annihilation_set-egfr: ligand_start=21, ligand_final=none, solvent=none, cofactors=none, protein={'unknown'} + - AlchemicalNetwork-e516aadebadf1c036b5930cf7838c6d9 charge_annihilation_set-irak4_s2: ligand_start=17charg, ligand_final=none, solvent=none, cofactors=none, protein={'unknown'} + - AlchemicalNetwork-e516aadebadf1c036b5930cf7838c6d9 charge_annihilation_set-irak4_s2: ligand_start=6, ligand_final=none, solvent=none, cofactors=none, protein={'unknown'} - protocol: "RelativeHybridTopologyProtocol" production_time: "20 ns" lambda_windows: "22" @@ -104,12 +103,13 @@ protocol_settings: - lambda_settings.lambda_windows: 11 -> 22 - simulation_settings.n_replicas: 11 -> 22 - simulation_settings.production_length: 5.0 -> 20 + - solvation_settings.solvent_padding: 1 -> 1.5 Applies to 5 edges: - - AlchemicalNetwork-6c2af871ad714bf6cda4368eb34fd06a charge_annihilation_set-irak4_s3: ligand_start=19charg, ligand_final=none, solvent=none, cofactors=none, protein={'unknown'} - - AlchemicalNetwork-6c2af871ad714bf6cda4368eb34fd06a charge_annihilation_set-irak4_s3: ligand_start=28charg, ligand_final=none, solvent=none, cofactors=none, protein={'unknown'} - - AlchemicalNetwork-b15bcfadae8a358cee9a7dcb9f5445d6 charge_annihilation_set-egfr: ligand_start=21, ligand_final=none, solvent=none, cofactors=none, protein={'unknown'} - - AlchemicalNetwork-e516aadebadf1c036b5930cf7838c6d9 charge_annihilation_set-irak4_s2: ligand_start=17charg, ligand_final=none, solvent=none, cofactors=none, protein={'unknown'} - - AlchemicalNetwork-e516aadebadf1c036b5930cf7838c6d9 charge_annihilation_set-irak4_s2: ligand_start=6, ligand_final=none, solvent=none, cofactors=none, protein={'unknown'} + - AlchemicalNetwork-6c2af871ad714bf6cda4368eb34fd06a charge_annihilation_set-irak4_s3: ligand_start=19charg, ligand_final=none, solvent=none, cofactors=none, protein=none + - AlchemicalNetwork-6c2af871ad714bf6cda4368eb34fd06a charge_annihilation_set-irak4_s3: ligand_start=28charg, ligand_final=none, solvent=none, cofactors=none, protein=none + - AlchemicalNetwork-b15bcfadae8a358cee9a7dcb9f5445d6 charge_annihilation_set-egfr: ligand_start=21, ligand_final=none, solvent=none, cofactors=none, protein=none + - AlchemicalNetwork-e516aadebadf1c036b5930cf7838c6d9 charge_annihilation_set-irak4_s2: ligand_start=17charg, ligand_final=none, solvent=none, cofactors=none, protein=none + - AlchemicalNetwork-e516aadebadf1c036b5930cf7838c6d9 charge_annihilation_set-irak4_s2: ligand_start=6, ligand_final=none, solvent=none, cofactors=none, protein=none - protocol: "RelativeHybridTopologyProtocol" notes: | Detailed protocol settings differ: diff --git a/openfe_benchmarks/scripts/prepare_metadata_submission.py b/openfe_benchmarks/scripts/prepare_metadata_submission.py index 3093480..bfc7136 100644 --- a/openfe_benchmarks/scripts/prepare_metadata_submission.py +++ b/openfe_benchmarks/scripts/prepare_metadata_submission.py @@ -198,7 +198,7 @@ class ProtocolSettingsInfo: small_molecule_forcefield: ( str # mirrors definition in OpenMMSystemGeneratorFFSettings ) - forcefields: set[str] # mirrors definition in OpenMMSystemGeneratorFFSettings + forcefields: tuple[str, ...] # sorted tuple for deterministic ordering partial_charges: str lambda_windows: str = "" lambda_schedule: str = "" @@ -604,7 +604,7 @@ def _build_protocol_settings(protocol_obj, calc_mode) -> dict[str, str | set(str ) ffs = forcefield_settings.get("forcefields") if isinstance(ffs, list) and ffs: - out["forcefields"] = set( + out["forcefields"] = tuple( sorted(os.path.splitext(ff.split("/")[1])[0] for ff in ffs) ) @@ -1074,7 +1074,9 @@ def _build_content_summary( (summary_text, list of SystemInfo objects) """ - field_info = "/".join(set(ff for ff_set, _ in metadata.forcefield for ff in ff_set)) + field_info = "/".join( + sorted(set(ff for ff_set, _ in metadata.forcefield for ff in ff_set)) + ) if not field_info: field_info = "an unspecified force field" @@ -1213,7 +1215,7 @@ def _format_identifier(identifier: Any) -> str: if isinstance(identifier, str): return identifier if isinstance(identifier, (list, tuple, set)): - return ", ".join(str(item) for item in identifier) + return ", ".join(str(item) for item in sorted(identifier)) return str(identifier) ordered_settings = sorted( From 375400543435f9ecb4d86454da575b17ea24d129 Mon Sep 17 00:00:00 2001 From: jaclark5 Date: Mon, 22 Jun 2026 11:42:21 -0400 Subject: [PATCH 23/24] Fix nagl charge name --- .../2026-03-18-openmm-840-qa-testing/submission.yaml | 9 +++++---- openfe_benchmarks/scripts/prepare_metadata_submission.py | 8 ++++---- 2 files changed, 9 insertions(+), 8 deletions(-) diff --git a/openfe_benchmarks/results/2026-03-18-openmm-840-qa-testing/submission.yaml b/openfe_benchmarks/results/2026-03-18-openmm-840-qa-testing/submission.yaml index 941fefd..155f4f0 100644 --- a/openfe_benchmarks/results/2026-03-18-openmm-840-qa-testing/submission.yaml +++ b/openfe_benchmarks/results/2026-03-18-openmm-840-qa-testing/submission.yaml @@ -8,10 +8,11 @@ title: OpenFE RBFE - Multi-set Benchmark (2 sets, 5 systems) - 2026-03-18-openmm summary: | This submission describes the charge_annihilation_set, jacs_set RBFE benchmark (charge_annihilation_set: egfr, irak4_s2, irak4_s3; jacs_set: p38, tyk2) prepared with - ff14SB/tip3p_standard/phosaa10/tip3p_HFE_multivalent for proteins and solvents, and openff-2.3.0 - with nagl_openff-gnn-am1bcc-1.0.0.pt for ligands, solutes, and cofactors. The submission contains 160 edges, 45 unique - ligands. Note this means the charge annihilation sets are not complete compared to what is in that - system and should not be compared to other complete runs due to the missing edges. + ff14SB/phosaa10/tip3p_HFE_multivalent/tip3p_standard for proteins and solvents, and openff-2.3.0 + with nagl_openff-gnn-am1bcc-1.0.0.pt for ligands, solutes, and cofactors. The submission contains + 160 edges, 45 unique ligands. Note this means the charge annihilation sets are not complete compared + to what is in that system and should not be compared to other complete runs due to the missing + edges. # REQUIRED: list of submission tags tags: [rbfe, ff14SB, phosaa10, tip3p_HFE_multivalent, tip3p_standard, charge_annihilation_set, egfr, irak4_s2, irak4_s3, jacs_set, p38, tyk2, nagl_openff-gnn-am1bcc-1.0.0.pt, charge_change, benchmark, openfe, openmm-840] diff --git a/openfe_benchmarks/scripts/prepare_metadata_submission.py b/openfe_benchmarks/scripts/prepare_metadata_submission.py index bfc7136..82faeb3 100644 --- a/openfe_benchmarks/scripts/prepare_metadata_submission.py +++ b/openfe_benchmarks/scripts/prepare_metadata_submission.py @@ -662,7 +662,7 @@ def _charge_method_from_provenance(provenance_dict: dict | None) -> str: Returns ------- str - Normalized method tag, e.g., "nagl_off_openff-gnn-am1bcc-1.0.0.pt" or "am1bccelf10_oe", + Normalized method tag, e.g., "nagl_openff-gnn-am1bcc-1.0.0.pt" or "am1bccelf10_oe", or empty string if provenance_dict is None """ if not provenance_dict or not isinstance(provenance_dict, dict): @@ -677,7 +677,7 @@ def _charge_method_from_provenance(provenance_dict: dict | None) -> str: nagl_model = provenance_dict.get("nagl_model", "").strip() if nagl_model: nagl_model = nagl_model.split("/")[-1].split("\\")[-1] - return f"nagl_off_{nagl_model}" + return f"nagl_{nagl_model}" return "nagl_off" elif "am1bccelf10" in charge_method or "elf10" in charge_method: return "am1bccelf10_oe" @@ -987,7 +987,7 @@ def _normalize_partial_charge_info(partial_charge_settings: dict) -> str: Returns ------- str - Normalized method tag, e.g., "nagl_off_openff-gnn-am1bcc-1.0.0.pt" or "am1bccelf10_oe". + Normalized method tag, e.g., "nagl_openff-gnn-am1bcc-1.0.0.pt" or "am1bccelf10_oe". """ if not partial_charge_settings or not isinstance(partial_charge_settings, dict): return "" @@ -1003,7 +1003,7 @@ def _normalize_partial_charge_info(partial_charge_settings: dict) -> str: if nagl_model: # Extract just the filename if it's a path nagl_model = nagl_model.split("/")[-1].split("\\")[-1] - return f"nagl_off_{nagl_model}" + return f"nagl_{nagl_model}" return "nagl_off" elif "am1bccelf10" in method or "elf10" in method: return "am1bccelf10_oe" From 9fc3d64b02737ef3c8e0a3b5d50e7090f462d10a Mon Sep 17 00:00:00 2001 From: jaclark5 Date: Mon, 22 Jun 2026 11:55:06 -0400 Subject: [PATCH 24/24] Use partial charge information --- .../scripts/prepare_metadata_submission.py | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/openfe_benchmarks/scripts/prepare_metadata_submission.py b/openfe_benchmarks/scripts/prepare_metadata_submission.py index 82faeb3..cd058ed 100644 --- a/openfe_benchmarks/scripts/prepare_metadata_submission.py +++ b/openfe_benchmarks/scripts/prepare_metadata_submission.py @@ -1521,8 +1521,12 @@ def _make_submission_yaml( "small_molecule_forcefield", "edges", ) - # make the user add the charges manually as ligand charges might take priority over those in the protocol settings - partial_charges_yaml = "partial_charges: TODO" + partial_charges_yaml = _render_keyed_values_yaml( + "partial_charges", + metadata.partial_charges, + "partial_charges", + "edges", + ) return f"""# REQUIRED: unique, kebab-case identifier for this submission submission_id: {submission_id} @@ -1860,7 +1864,7 @@ def process_network( tags_final = _make_tags( mode=mode, forcefield=merged_metadata.forcefield, - partial_charge_tag=[], + partial_charge_tag=merged_metadata.partial_charges, benchmark_data=merged_metadata.benchmark_sets_systems, user_keywords=tags_list, )