From 5230fd58d1ace765886ed8c59a396e314373e551 Mon Sep 17 00:00:00 2001 From: ardunn Date: Wed, 13 May 2020 12:12:05 -0700 Subject: [PATCH 1/7] rm individual pmg requirement and put all pmg requirement onto matminer --- requirements.txt | 1 - 1 file changed, 1 deletion(-) diff --git a/requirements.txt b/requirements.txt index 8bf88cb5..b6ef2635 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,6 +1,5 @@ # Direct requirements of this project alone matminer==0.6.2 -pymatgen==2020.01.28 xgboost==0.80 tpot==0.11.0 skrebate==0.6 From c2a6ae4ed2d6e6ba26fcc11c61ba274fa105ce7b Mon Sep 17 00:00:00 2001 From: ardunn Date: Tue, 7 Jul 2020 17:01:40 -0700 Subject: [PATCH 2/7] fix automatminer_dev matbench files --- automatminer_dev/matbench/expt_gap.py | 4 ++++ automatminer_dev/matbench/expt_is_metal.py | 4 ++++ automatminer_dev/matbench/steels.py | 2 ++ 3 files changed, 10 insertions(+) diff --git a/automatminer_dev/matbench/expt_gap.py b/automatminer_dev/matbench/expt_gap.py index 9c79b767..c2907651 100644 --- a/automatminer_dev/matbench/expt_gap.py +++ b/automatminer_dev/matbench/expt_gap.py @@ -73,6 +73,10 @@ df_new = df_new.reset_index(drop=True) +# you need to manually change GaAs0.1P0.9G1128 to its correct composition, which +# is GaAs0.1P0.9 from Solid Solutions in Semiconducting Systems.Handbook, +# M., Nauka 1978, 200 p. and was gathered from http://bg.imet-db.ru + store_dataframe_as_json(df_new, "expt_gap.json.gz", compression="gz") print(df_new) diff --git a/automatminer_dev/matbench/expt_is_metal.py b/automatminer_dev/matbench/expt_is_metal.py index 5cafe935..a33c802c 100644 --- a/automatminer_dev/matbench/expt_is_metal.py +++ b/automatminer_dev/matbench/expt_is_metal.py @@ -74,6 +74,10 @@ df_new["is_metal"] = df_new["is_metal"] == 1 +# you need to manually change GaAs0.1P0.9G1128 to its correct composition, which +# is GaAs0.1P0.9 from Solid Solutions in Semiconducting Systems.Handbook, +# M., Nauka 1978, 200 p. and was gathered from http://bg.imet-db.ru + store_dataframe_as_json(df_new, "expt_is_metal.json.gz", compression="gz") print(df_new) diff --git a/automatminer_dev/matbench/steels.py b/automatminer_dev/matbench/steels.py index bc0233cb..71a10114 100644 --- a/automatminer_dev/matbench/steels.py +++ b/automatminer_dev/matbench/steels.py @@ -8,6 +8,8 @@ from matminer.datasets.dataset_retrieval import load_dataset + +# Note the units are in MPa, NOT GPa if __name__ == "__main__": df = load_dataset("steel_strength") df = df[["formula", "yield strength"]] From 20723109cabc1ad7a03b2ff613ca876596b4e966 Mon Sep 17 00:00:00 2001 From: ardunn Date: Tue, 7 Jul 2020 18:36:26 -0700 Subject: [PATCH 3/7] update docs --- docs/_sources/datasets.rst.txt | 2 +- docs/automatminer.preprocessing.html | 2 +- docs/datasets.html | 2 +- docs/searchindex.js | 2 +- docs/source/datasets.rst | 2 +- 5 files changed, 5 insertions(+), 5 deletions(-) diff --git a/docs/_sources/datasets.rst.txt b/docs/_sources/datasets.rst.txt index 6f52dcd9..c74ec98a 100644 --- a/docs/_sources/datasets.rst.txt +++ b/docs/_sources/datasets.rst.txt @@ -131,7 +131,7 @@ procedures, etc.) on a dataset with :code:`matminer.datasets.get_all_dataset_inf Description: Matbench v0.1 dataset for predicting steel yield strengths from chemical composition alone. Retrieved from Citrine informatics. Deduplicated. Columns: composition: Chemical formula. - yield strength: Target variable. Experimentally measured steel yield strengths, in GPa. + yield strength: Target variable. Experimentally measured steel yield strengths, in MPa. Num Entries: 312 Reference: https://citrination.com/datasets/153092/ Bibtex citations: ['@misc{Citrine Informatics,\ntitle = {Mechanical properties of some steels},\nhowpublished = {\\url{https://citrination.com/datasets/153092/},\n}'] diff --git a/docs/automatminer.preprocessing.html b/docs/automatminer.preprocessing.html index 55e7ea82..c77af6b1 100644 --- a/docs/automatminer.preprocessing.html +++ b/docs/automatminer.preprocessing.html @@ -554,7 +554,7 @@

Submodules diff --git a/docs/datasets.html b/docs/datasets.html index e8beba2a..a41c8bf5 100644 --- a/docs/datasets.html +++ b/docs/datasets.html @@ -205,7 +205,7 @@

Getting dataset info Date: Mon, 10 Aug 2020 12:09:21 -0700 Subject: [PATCH 4/7] fix support forum --- docs/_sources/index.rst.txt | 2 +- docs/index.html | 2 +- docs/source/index.rst | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/docs/_sources/index.rst.txt b/docs/_sources/index.rst.txt index e460e525..b41163ad 100644 --- a/docs/_sources/index.rst.txt +++ b/docs/_sources/index.rst.txt @@ -150,7 +150,7 @@ Want to see something added or changed? Some ways to get involved are: - Contribute code! You can do this by forking `Automatminer on Github `_ and submitting a pull request. -- Post to our `support forum `_. Don't be shy, we look forward to feedback! +- Post to our `support forum `_. Don't be shy, we look forward to feedback! See our `contribution guidelines `_ diff --git a/docs/index.html b/docs/index.html index a877f7d8..2cd37dc6 100644 --- a/docs/index.html +++ b/docs/index.html @@ -210,7 +210,7 @@

Contributing / Contact / SupportAutomatminer on Github and submitting a pull request.

-
  • Post to our support forum. Don’t be shy, we look forward to feedback!

  • +
  • Post to our support forum. Don’t be shy, we look forward to feedback!

  • See our contribution guidelines for more inspect. For a list of contributors, see our diff --git a/docs/source/index.rst b/docs/source/index.rst index e460e525..b41163ad 100644 --- a/docs/source/index.rst +++ b/docs/source/index.rst @@ -150,7 +150,7 @@ Want to see something added or changed? Some ways to get involved are: - Contribute code! You can do this by forking `Automatminer on Github `_ and submitting a pull request. -- Post to our `support forum `_. Don't be shy, we look forward to feedback! +- Post to our `support forum `_. Don't be shy, we look forward to feedback! See our `contribution guidelines `_ From 4fb77dc0561097d1530611570dcd5009a09027df Mon Sep 17 00:00:00 2001 From: ardunn Date: Thu, 10 Sep 2020 20:54:06 -0700 Subject: [PATCH 5/7] update dev --- automatminer_dev/config.py | 13 ++ .../matbench/dataset_creation/__init__.py | 0 .../{ => dataset_creation}/castelli.py | 0 .../{ => dataset_creation}/dielectric.py | 0 .../{ => dataset_creation}/expt_gap.py | 0 .../{ => dataset_creation}/expt_is_metal.py | 0 .../matbench/{ => dataset_creation}/glass.py | 0 .../matbench/{ => dataset_creation}/jdft2d.py | 0 .../{ => dataset_creation}/mp_eform.py | 0 .../{ => dataset_creation}/mp_elasticity.py | 0 .../{ => dataset_creation}/mp_gaps.py | 0 .../{ => dataset_creation}/phonons.py | 0 .../matbench/{ => dataset_creation}/steels.py | 0 automatminer_dev/matbench/docs/__init__.py | 0 .../formatting_matbench_table.py} | 8 + .../matbench/mpcontribs/__init__.py | 0 .../matbench/mpcontribs/upload.py | 138 ++++++++++++++++++ requirements_dev.txt | 2 + 18 files changed, 161 insertions(+) create mode 100644 automatminer_dev/matbench/dataset_creation/__init__.py rename automatminer_dev/matbench/{ => dataset_creation}/castelli.py (100%) rename automatminer_dev/matbench/{ => dataset_creation}/dielectric.py (100%) rename automatminer_dev/matbench/{ => dataset_creation}/expt_gap.py (100%) rename automatminer_dev/matbench/{ => dataset_creation}/expt_is_metal.py (100%) rename automatminer_dev/matbench/{ => dataset_creation}/glass.py (100%) rename automatminer_dev/matbench/{ => dataset_creation}/jdft2d.py (100%) rename automatminer_dev/matbench/{ => dataset_creation}/mp_eform.py (100%) rename automatminer_dev/matbench/{ => dataset_creation}/mp_elasticity.py (100%) rename automatminer_dev/matbench/{ => dataset_creation}/mp_gaps.py (100%) rename automatminer_dev/matbench/{ => dataset_creation}/phonons.py (100%) rename automatminer_dev/matbench/{ => dataset_creation}/steels.py (100%) create mode 100644 automatminer_dev/matbench/docs/__init__.py rename automatminer_dev/matbench/{get_info.py => docs/formatting_matbench_table.py} (87%) create mode 100644 automatminer_dev/matbench/mpcontribs/__init__.py create mode 100644 automatminer_dev/matbench/mpcontribs/upload.py diff --git a/automatminer_dev/config.py b/automatminer_dev/config.py index 2ce6044b..01d522ed 100644 --- a/automatminer_dev/config.py +++ b/automatminer_dev/config.py @@ -29,6 +29,7 @@ "target": "log10(K_VRH)", "problem_type": AMM_REG_NAME, "clf_pos_label": None, + "unit": None, } LOG_GVRH = { @@ -37,6 +38,7 @@ "target": "log10(G_VRH)", "problem_type": AMM_REG_NAME, "clf_pos_label": None, + "unit": None, } DIELECTRIC = { @@ -45,6 +47,7 @@ "target": "n", "problem_type": AMM_REG_NAME, "clf_pos_label": None, + "unit": None, } JDFT2D = { @@ -53,6 +56,7 @@ "target": "exfoliation_en", "problem_type": AMM_REG_NAME, "clf_pos_label": None, + "unit": "meV/atom" } MP_GAP = { @@ -61,6 +65,7 @@ "target": "gap pbe", "problem_type": AMM_REG_NAME, "clf_pos_label": None, + "unit": "eV" } MP_IS_METAL = { @@ -69,6 +74,7 @@ "target": "is_metal", "problem_type": AMM_CLF_NAME, "clf_pos_label": True, + "unit": None } MP_E_FORM = { @@ -77,6 +83,7 @@ "target": "e_form", "problem_type": AMM_REG_NAME, "clf_pos_label": None, + "unit": "eV/atom" } PEROVSKITES = { @@ -85,6 +92,7 @@ "target": "e_form", "problem_type": AMM_REG_NAME, "clf_pos_label": None, + "unit": "eV" } GLASS = { @@ -93,6 +101,7 @@ "target": "gfa", "problem_type": AMM_CLF_NAME, "clf_pos_label": True, + "unit": None } EXPT_IS_METAL = { @@ -101,6 +110,7 @@ "target": "is_metal", "problem_type": AMM_CLF_NAME, "clf_pos_label": True, + "unit": None } EXPT_GAP = { @@ -109,6 +119,7 @@ "target": "gap expt", "problem_type": AMM_REG_NAME, "clf_pos_label": None, + "unit": "eV" } PHONONS = { @@ -117,6 +128,7 @@ "target": "last phdos peak", "problem_type": AMM_REG_NAME, "clf_pos_label": None, + "unit": "cm^-1" } STEELS = { @@ -125,6 +137,7 @@ "target": "yield strength", "problem_type": AMM_REG_NAME, "clf_pos_label": None, + "unit": "MPa" } BENCHMARK_DEBUG_SET = [JDFT2D, PHONONS, EXPT_IS_METAL, STEELS] diff --git a/automatminer_dev/matbench/dataset_creation/__init__.py b/automatminer_dev/matbench/dataset_creation/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/automatminer_dev/matbench/castelli.py b/automatminer_dev/matbench/dataset_creation/castelli.py similarity index 100% rename from automatminer_dev/matbench/castelli.py rename to automatminer_dev/matbench/dataset_creation/castelli.py diff --git a/automatminer_dev/matbench/dielectric.py b/automatminer_dev/matbench/dataset_creation/dielectric.py similarity index 100% rename from automatminer_dev/matbench/dielectric.py rename to automatminer_dev/matbench/dataset_creation/dielectric.py diff --git a/automatminer_dev/matbench/expt_gap.py b/automatminer_dev/matbench/dataset_creation/expt_gap.py similarity index 100% rename from automatminer_dev/matbench/expt_gap.py rename to automatminer_dev/matbench/dataset_creation/expt_gap.py diff --git a/automatminer_dev/matbench/expt_is_metal.py b/automatminer_dev/matbench/dataset_creation/expt_is_metal.py similarity index 100% rename from automatminer_dev/matbench/expt_is_metal.py rename to automatminer_dev/matbench/dataset_creation/expt_is_metal.py diff --git a/automatminer_dev/matbench/glass.py b/automatminer_dev/matbench/dataset_creation/glass.py similarity index 100% rename from automatminer_dev/matbench/glass.py rename to automatminer_dev/matbench/dataset_creation/glass.py diff --git a/automatminer_dev/matbench/jdft2d.py b/automatminer_dev/matbench/dataset_creation/jdft2d.py similarity index 100% rename from automatminer_dev/matbench/jdft2d.py rename to automatminer_dev/matbench/dataset_creation/jdft2d.py diff --git a/automatminer_dev/matbench/mp_eform.py b/automatminer_dev/matbench/dataset_creation/mp_eform.py similarity index 100% rename from automatminer_dev/matbench/mp_eform.py rename to automatminer_dev/matbench/dataset_creation/mp_eform.py diff --git a/automatminer_dev/matbench/mp_elasticity.py b/automatminer_dev/matbench/dataset_creation/mp_elasticity.py similarity index 100% rename from automatminer_dev/matbench/mp_elasticity.py rename to automatminer_dev/matbench/dataset_creation/mp_elasticity.py diff --git a/automatminer_dev/matbench/mp_gaps.py b/automatminer_dev/matbench/dataset_creation/mp_gaps.py similarity index 100% rename from automatminer_dev/matbench/mp_gaps.py rename to automatminer_dev/matbench/dataset_creation/mp_gaps.py diff --git a/automatminer_dev/matbench/phonons.py b/automatminer_dev/matbench/dataset_creation/phonons.py similarity index 100% rename from automatminer_dev/matbench/phonons.py rename to automatminer_dev/matbench/dataset_creation/phonons.py diff --git a/automatminer_dev/matbench/steels.py b/automatminer_dev/matbench/dataset_creation/steels.py similarity index 100% rename from automatminer_dev/matbench/steels.py rename to automatminer_dev/matbench/dataset_creation/steels.py diff --git a/automatminer_dev/matbench/docs/__init__.py b/automatminer_dev/matbench/docs/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/automatminer_dev/matbench/get_info.py b/automatminer_dev/matbench/docs/formatting_matbench_table.py similarity index 87% rename from automatminer_dev/matbench/get_info.py rename to automatminer_dev/matbench/docs/formatting_matbench_table.py index 8a14ecfe..bd1ee747 100644 --- a/automatminer_dev/matbench/get_info.py +++ b/automatminer_dev/matbench/docs/formatting_matbench_table.py @@ -1,4 +1,12 @@ from matminer.datasets.dataset_retrieval import load_dataset, get_available_datasets, get_all_dataset_info + + +''' + +Helper function to format matbench documentation page. +''' + + datasets = get_available_datasets(print_format=None) for dataset in datasets: diff --git a/automatminer_dev/matbench/mpcontribs/__init__.py b/automatminer_dev/matbench/mpcontribs/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/automatminer_dev/matbench/mpcontribs/upload.py b/automatminer_dev/matbench/mpcontribs/upload.py new file mode 100644 index 00000000..fda5d3cc --- /dev/null +++ b/automatminer_dev/matbench/mpcontribs/upload.py @@ -0,0 +1,138 @@ +import wget, json, os +from string import capwords +from pybtex.database import parse_string +import pybtex.errors +from mpcontribs.client import Client +from pymatgen import MPRester +import tqdm + + +# from matminer.datasets.dataset_retrieval import ( +# get_all_dataset_info, +# get_available_datasets, +# load_dataset, +# ) + +from matminer.datasets import load_dataset + +from automatminer_dev.config import DIELECTRIC + + +pybtex.errors.set_strict_mode(False) +api_key = os.environ["MPCONTRIBS_API_KEY"] +client = Client(api_key, host='ml-api.materialsproject.cloud') +mprester = MPRester() + + +# client.get_project("matbench_steels").pretty() + + +fn = 'dataset_metadata.json' +if not os.path.exists(fn): + wget.download(f'https://raw.githubusercontent.com/hackingmaterials/matminer/master/matminer/datasets/{fn}') +metadata = json.load(open(fn, 'r')) +metadata = {k: d for k, d in metadata.items() if "matbench" in k} + + + +# Creating new projects +####################### +# todo: might not have access to add new projects +# for name, info in metadata.items(): +# if "phonons" not in name: +# continue +# +# print(f"Uploading {name}") +# +# columns = {} +# for col, text in info['columns'].items(): +# k = col.replace('_', '|').replace('-', '|').replace('(', ' ').replace( +# ')', '') +# columns[k] = text +# +# project = { +# 'is_public': True, +# 'owner': 'ardunn@lbl.gov', +# "name": name, +# 'title': name, # TODO update and set long_title +# 'authors': 'A. Dunn, A. Jain', +# 'description': info['description'], +# 'other': { +# 'columns': columns, +# 'entries': info['num_entries'] +# }, +# 'references': [] +# } +# +# for ref in info['bibtex_refs']: +# +# if name == "matbench_phonons": +# ref = ref.replace( +# "petretto_dwaraknath_miranda_winston_giantomassi_rignanese_van setten_gonze_persson_hautier_2018", +# "petretto2018") +# +# bib = parse_string(ref, 'bibtex') +# for key, entry in bib.entries.items(): +# key_is_doi = key.startswith('doi:') +# url = 'https://doi.org/' + key.split(':', 1)[ +# -1] if key_is_doi else entry.fields.get('url') +# k = 'Zhuo2018' if key_is_doi else capwords(key.replace('_', '')) +# if k.startswith('C2'): +# k = 'Castelli2012' +# elif k.startswith('Landolt'): +# k = 'LB1997' +# elif k == 'Citrine': +# url = 'https://www.citrination.com' +# +# if len(k) > 8: +# k = k[:4] + k[-4:] +# project['references'].append({"label": k, "url": url}) +# +# try: +# print(client.projects.create_entry(project=project).result()) +# except Exception as ex: +# print( +# ex) # TODO should use get_entry to check existence -> use update_entry if project exists + + + + +# Entering all contributions to projects +######################################## + +LIMIT = 100 + + + +ds_config = DIELECTRIC +name = "matbench_" + ds_config["name"] +client.delete_contributions(name) +print(f"Loading {name}") +df = load_dataset(name) +target = ds_config["target"] +unit = f" {ds_config['unit']}" if ds_config["unit"] else "" + + +#todo: PROBLEMATIC ENTRY IS DIELECTRIC INDEX 105 + +# df = df.iloc[104:] + +chunks = (df.shape[0] - 1) // LIMIT + 1 +for j in range(chunks): + print(f"\tBatch {j} of {chunks}") + batch = df.iloc[j * LIMIT:(j + 1) * LIMIT] + + contributions = [] + + for i, row in enumerate(batch.iterrows()): + entry = row[1] + contrib = {'project': name, 'is_public': True, 'structures': []} + s = entry.loc["structure"] + c = s.composition.get_integer_formula_and_factor()[0] + identifier = f"mb-{ds_config['name']}-{(j - 1) * LIMIT + i}" + contrib["identifier"] = identifier + contrib["data"] = {target: f"{entry.loc[target]}{unit}"} + contrib["formula"] = c + contrib["structures"].append(s) + contributions.append(contrib) + client.submit_contributions(contributions) \ No newline at end of file diff --git a/requirements_dev.txt b/requirements_dev.txt index 19506027..37ca2ad9 100644 --- a/requirements_dev.txt +++ b/requirements_dev.txt @@ -7,3 +7,5 @@ isort==4.3.21 pre-commit==1.18.3 paramiko==2.6.0 scp==0.13.2 +pcontribs-client==3.3.0 +wget==3.2 \ No newline at end of file From 6c1ce01d0de9e6d6ae2d2d23c0c2ac043ff45c36 Mon Sep 17 00:00:00 2001 From: ardunn Date: Thu, 17 Sep 2020 16:51:10 -0700 Subject: [PATCH 6/7] update mpcontribs upload --- automatminer_dev/config.py | 14 ++ .../matbench/mpcontribs/upload.py | 138 ++++++++++++++---- 2 files changed, 126 insertions(+), 26 deletions(-) diff --git a/automatminer_dev/config.py b/automatminer_dev/config.py index 01d522ed..945b8c43 100644 --- a/automatminer_dev/config.py +++ b/automatminer_dev/config.py @@ -156,3 +156,17 @@ STEELS, PHONONS, ] + +HAS_STRUCTURE = [ + LOG_KVRH, + LOG_GVRH, + DIELECTRIC, + JDFT2D, + MP_GAP, + MP_IS_METAL, + MP_E_FORM, + PEROVSKITES, + PHONONS +] + +BENCHMARK_DICT = {ds["name"]: ds for ds in BENCHMARK_FULL_SET} \ No newline at end of file diff --git a/automatminer_dev/matbench/mpcontribs/upload.py b/automatminer_dev/matbench/mpcontribs/upload.py index fda5d3cc..0d576f4d 100644 --- a/automatminer_dev/matbench/mpcontribs/upload.py +++ b/automatminer_dev/matbench/mpcontribs/upload.py @@ -1,11 +1,11 @@ -import wget, json, os +import wget, json, os, math from string import capwords from pybtex.database import parse_string import pybtex.errors from mpcontribs.client import Client -from pymatgen import MPRester +from pymatgen import MPRester, Structure import tqdm - +import pprint # from matminer.datasets.dataset_retrieval import ( # get_all_dataset_info, @@ -15,7 +15,7 @@ from matminer.datasets import load_dataset -from automatminer_dev.config import DIELECTRIC +from automatminer_dev.config import DIELECTRIC, JDFT2D, PEROVSKITES, STEELS, BENCHMARK_FULL_SET, BENCHMARK_DICT, HAS_STRUCTURE pybtex.errors.set_strict_mode(False) @@ -39,8 +39,6 @@ ####################### # todo: might not have access to add new projects # for name, info in metadata.items(): -# if "phonons" not in name: -# continue # # print(f"Uploading {name}") # @@ -97,42 +95,130 @@ + +# Map of canonical yet non-mpcontribs-compatible tagret nams to compatible (unicode, no punctuation) target names +target_map = { + "yield strength": "σᵧ", + "log10(K_VRH)": "log₁₀Kᵛʳʰ", + "log10(G_VRH)": "log₁₀Gᵛʳʰ", + "n": "𝑛", + "exfoliation_en": "Eˣ", + "gap pbe": "Eᵍ", + "is_metal": "metallic", + "e_form": "Eᶠ", + "gfa": "glass", + "gap expt": "Eᵍ", + "last phdos peak": "ωᵐᵃˣ", +} + + +# # Getting project-level metadata in order +# ######################################### +# +# # Add warning to mpcontribs since the results will be stored out of order. +# # Also, fix columns for new mpcontribs deployment +# for name, info in metadata.items(): +# mb_shortname = name.replace("matbench_", "") +# +# description = info["description"] + f" If you are viewing this on MPContribs-ML interactively, please ensure the order of the identifiers is sequential (mb-{mb_shortname}-0001, mb-{mb_shortname}-0002, etc.) before benchmarking." +# if "For benchmarking" not in description: +# print(name, description) +# +# has_structure = mb_shortname in [ds["name"] for ds in HAS_STRUCTURE] +# primitive_key = "structure" if has_structure else "composition" +# target = BENCHMARK_DICT[mb_shortname]["target"] +# +# print(client.projects.update_entry( +# pk=name, +# project={ +# "description": description, +# 'other.columns': { +# target_map[target]: metadata[name]["columns"][target], +# primitive_key: metadata[name]["columns"][primitive_key] +# } +# }).result()) + + + + # Entering all contributions to projects ######################################## -LIMIT = 100 + +# steels.........X +# log_kvrh....... +# log_gvrh....... +# dielectric..... +# jdft2d.........X +# expt_gap.......X +# expt_is_metal..X +# phonons........ +# mp_is_metal.... +# mp_gap......... +# glass..........X +# mp_e_form...... +# perovskites.... + -ds_config = DIELECTRIC +ds_config = BENCHMARK_DICT["jdft2d"] + name = "matbench_" + ds_config["name"] -client.delete_contributions(name) print(f"Loading {name}") df = load_dataset(name) target = ds_config["target"] unit = f" {ds_config['unit']}" if ds_config["unit"] else "" -#todo: PROBLEMATIC ENTRY IS DIELECTRIC INDEX 105 +# print(f"Updating 'other' column entries of {name} with unicode.") +# print(client.projects.update_entry(pk=name, project={ +# 'other.columns': { +# target_map[target]: metadata[name]["columns"][target], +# "structure": metadata[name]["columns"]["structure"] +# # "composition": metadata[name]["columns"]["composition"] +# } +# }).result()) + + + +print(f"Deleting contributions of {name}") +client.delete_contributions(name) + -# df = df.iloc[104:] -chunks = (df.shape[0] - 1) // LIMIT + 1 -for j in range(chunks): - print(f"\tBatch {j} of {chunks}") - batch = df.iloc[j * LIMIT:(j + 1) * LIMIT] +print(f"Assembling and uploading contributions for {name}") +structure_filename = "/Users/ardunn/Downloads/outfile.cif" +contributions = [] +id_prefix = df.shape[0] - contributions = [] - for i, row in enumerate(batch.iterrows()): - entry = row[1] - contrib = {'project': name, 'is_public': True, 'structures': []} +id_n_zeros = math.floor(math.log(df.shape[0], 10)) + 1 + +df = df.iloc[:2] + +for i, row in tqdm.tqdm(enumerate(df.iterrows())): + entry = row[1] + contrib = {'project': name, 'is_public': True} + + if "structure" in entry.index: + structures = [] s = entry.loc["structure"] + s.to("cif", structure_filename) + s = Structure.from_file(structure_filename) c = s.composition.get_integer_formula_and_factor()[0] - identifier = f"mb-{ds_config['name']}-{(j - 1) * LIMIT + i}" - contrib["identifier"] = identifier - contrib["data"] = {target: f"{entry.loc[target]}{unit}"} - contrib["formula"] = c - contrib["structures"].append(s) - contributions.append(contrib) - client.submit_contributions(contributions) \ No newline at end of file + contrib["structures"] = [s] + + else: + c = entry["composition"] + + id_number = f"{i+1:0{id_n_zeros}d}" + identifier = f"mb-{ds_config['name']}-{id_number}" + contrib["identifier"] = identifier + + contrib["data"] = {target_map[target]: f"{entry.loc[target]}{unit}"} + contrib["formula"] = c + contributions.append(contrib) + +pprint.pprint(contributions) +client.submit_contributions(contributions) \ No newline at end of file From 5baa4ef2b355fa31344a8371dc488a4ad05571e4 Mon Sep 17 00:00:00 2001 From: ardunn Date: Mon, 21 Sep 2020 20:07:22 -0700 Subject: [PATCH 7/7] update upload to mpcontribs --- .../matbench/mpcontribs/upload.py | 87 +++++++++---------- 1 file changed, 41 insertions(+), 46 deletions(-) diff --git a/automatminer_dev/matbench/mpcontribs/upload.py b/automatminer_dev/matbench/mpcontribs/upload.py index 0d576f4d..bca132a7 100644 --- a/automatminer_dev/matbench/mpcontribs/upload.py +++ b/automatminer_dev/matbench/mpcontribs/upload.py @@ -160,65 +160,60 @@ # perovskites.... +for ds in ["dielectric", "phonons", "mp_gap", "mp_is_metal", "perovskites", "mp_e_form"]: + ds_config = BENCHMARK_DICT[ds] -ds_config = BENCHMARK_DICT["jdft2d"] + name = "matbench_" + ds_config["name"] + print(f"Loading {name}") + df = load_dataset(name) + target = ds_config["target"] + unit = f" {ds_config['unit']}" if ds_config["unit"] else "" -name = "matbench_" + ds_config["name"] -print(f"Loading {name}") -df = load_dataset(name) -target = ds_config["target"] -unit = f" {ds_config['unit']}" if ds_config["unit"] else "" - -# print(f"Updating 'other' column entries of {name} with unicode.") -# print(client.projects.update_entry(pk=name, project={ -# 'other.columns': { -# target_map[target]: metadata[name]["columns"][target], -# "structure": metadata[name]["columns"]["structure"] -# # "composition": metadata[name]["columns"]["composition"] -# } -# }).result()) - - - -print(f"Deleting contributions of {name}") -client.delete_contributions(name) + # print(f"Updating 'other' column entries of {name} with unicode.") + # print(client.projects.update_entry(pk=name, project={ + # 'other.columns': { + # target_map[target]: metadata[name]["columns"][target], + # "structure": metadata[name]["columns"]["structure"] + # # "composition": metadata[name]["columns"]["composition"] + # } + # }).result()) -print(f"Assembling and uploading contributions for {name}") -structure_filename = "/Users/ardunn/Downloads/outfile.cif" -contributions = [] -id_prefix = df.shape[0] + # print(f"Deleting contributions of {name}") + # client.delete_contributions(name) -id_n_zeros = math.floor(math.log(df.shape[0], 10)) + 1 + print(f"Assembling and uploading contributions for {name}") + structure_filename = "/Users/ardunn/Downloads/outfile.cif" + contributions = [] + id_prefix = df.shape[0] -df = df.iloc[:2] -for i, row in tqdm.tqdm(enumerate(df.iterrows())): - entry = row[1] - contrib = {'project': name, 'is_public': True} + id_n_zeros = math.floor(math.log(df.shape[0], 10)) + 1 + for i, row in tqdm.tqdm(enumerate(df.iterrows())): + entry = row[1] + contrib = {'project': name, 'is_public': True} - if "structure" in entry.index: - structures = [] - s = entry.loc["structure"] - s.to("cif", structure_filename) - s = Structure.from_file(structure_filename) - c = s.composition.get_integer_formula_and_factor()[0] - contrib["structures"] = [s] + if "structure" in entry.index: + structures = [] + s = entry.loc["structure"] + s.to("cif", structure_filename) + s = Structure.from_file(structure_filename) + c = s.composition.get_integer_formula_and_factor()[0] + contrib["structures"] = [s] - else: - c = entry["composition"] + else: + c = entry["composition"] - id_number = f"{i+1:0{id_n_zeros}d}" - identifier = f"mb-{ds_config['name']}-{id_number}" - contrib["identifier"] = identifier + id_number = f"{i+1:0{id_n_zeros}d}" + identifier = f"mb-{ds_config['name']}-{id_number}" + contrib["identifier"] = identifier - contrib["data"] = {target_map[target]: f"{entry.loc[target]}{unit}"} - contrib["formula"] = c - contributions.append(contrib) + contrib["data"] = {target_map[target]: f"{entry.loc[target]}{unit}"} + contrib["formula"] = c + contributions.append(contrib) -pprint.pprint(contributions) -client.submit_contributions(contributions) \ No newline at end of file + client.submit_contributions(contributions, per_page=10) \ No newline at end of file