From 5230fd58d1ace765886ed8c59a396e314373e551 Mon Sep 17 00:00:00 2001
From: ardunn
Date: Wed, 13 May 2020 12:12:05 -0700
Subject: [PATCH 1/7] rm individual pmg requirement and put all pmg requirement
onto matminer
---
requirements.txt | 1 -
1 file changed, 1 deletion(-)
diff --git a/requirements.txt b/requirements.txt
index 8bf88cb5..b6ef2635 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -1,6 +1,5 @@
# Direct requirements of this project alone
matminer==0.6.2
-pymatgen==2020.01.28
xgboost==0.80
tpot==0.11.0
skrebate==0.6
From c2a6ae4ed2d6e6ba26fcc11c61ba274fa105ce7b Mon Sep 17 00:00:00 2001
From: ardunn
Date: Tue, 7 Jul 2020 17:01:40 -0700
Subject: [PATCH 2/7] fix automatminer_dev matbench files
---
automatminer_dev/matbench/expt_gap.py | 4 ++++
automatminer_dev/matbench/expt_is_metal.py | 4 ++++
automatminer_dev/matbench/steels.py | 2 ++
3 files changed, 10 insertions(+)
diff --git a/automatminer_dev/matbench/expt_gap.py b/automatminer_dev/matbench/expt_gap.py
index 9c79b767..c2907651 100644
--- a/automatminer_dev/matbench/expt_gap.py
+++ b/automatminer_dev/matbench/expt_gap.py
@@ -73,6 +73,10 @@
df_new = df_new.reset_index(drop=True)
+# you need to manually change GaAs0.1P0.9G1128 to its correct composition, which
+# is GaAs0.1P0.9 from Solid Solutions in Semiconducting Systems.Handbook,
+# M., Nauka 1978, 200 p. and was gathered from http://bg.imet-db.ru
+
store_dataframe_as_json(df_new, "expt_gap.json.gz", compression="gz")
print(df_new)
diff --git a/automatminer_dev/matbench/expt_is_metal.py b/automatminer_dev/matbench/expt_is_metal.py
index 5cafe935..a33c802c 100644
--- a/automatminer_dev/matbench/expt_is_metal.py
+++ b/automatminer_dev/matbench/expt_is_metal.py
@@ -74,6 +74,10 @@
df_new["is_metal"] = df_new["is_metal"] == 1
+# you need to manually change GaAs0.1P0.9G1128 to its correct composition, which
+# is GaAs0.1P0.9 from Solid Solutions in Semiconducting Systems.Handbook,
+# M., Nauka 1978, 200 p. and was gathered from http://bg.imet-db.ru
+
store_dataframe_as_json(df_new, "expt_is_metal.json.gz", compression="gz")
print(df_new)
diff --git a/automatminer_dev/matbench/steels.py b/automatminer_dev/matbench/steels.py
index bc0233cb..71a10114 100644
--- a/automatminer_dev/matbench/steels.py
+++ b/automatminer_dev/matbench/steels.py
@@ -8,6 +8,8 @@
from matminer.datasets.dataset_retrieval import load_dataset
+
+# Note the units are in MPa, NOT GPa
if __name__ == "__main__":
df = load_dataset("steel_strength")
df = df[["formula", "yield strength"]]
From 20723109cabc1ad7a03b2ff613ca876596b4e966 Mon Sep 17 00:00:00 2001
From: ardunn
Date: Tue, 7 Jul 2020 18:36:26 -0700
Subject: [PATCH 3/7] update docs
---
docs/_sources/datasets.rst.txt | 2 +-
docs/automatminer.preprocessing.html | 2 +-
docs/datasets.html | 2 +-
docs/searchindex.js | 2 +-
docs/source/datasets.rst | 2 +-
5 files changed, 5 insertions(+), 5 deletions(-)
diff --git a/docs/_sources/datasets.rst.txt b/docs/_sources/datasets.rst.txt
index 6f52dcd9..c74ec98a 100644
--- a/docs/_sources/datasets.rst.txt
+++ b/docs/_sources/datasets.rst.txt
@@ -131,7 +131,7 @@ procedures, etc.) on a dataset with :code:`matminer.datasets.get_all_dataset_inf
Description: Matbench v0.1 dataset for predicting steel yield strengths from chemical composition alone. Retrieved from Citrine informatics. Deduplicated.
Columns:
composition: Chemical formula.
- yield strength: Target variable. Experimentally measured steel yield strengths, in GPa.
+ yield strength: Target variable. Experimentally measured steel yield strengths, in MPa.
Num Entries: 312
Reference: https://citrination.com/datasets/153092/
Bibtex citations: ['@misc{Citrine Informatics,\ntitle = {Mechanical properties of some steels},\nhowpublished = {\\url{https://citrination.com/datasets/153092/},\n}']
diff --git a/docs/automatminer.preprocessing.html b/docs/automatminer.preprocessing.html
index 55e7ea82..c77af6b1 100644
--- a/docs/automatminer.preprocessing.html
+++ b/docs/automatminer.preprocessing.html
@@ -554,7 +554,7 @@ Submodules
-Post to our support forum. Don’t be shy, we look forward to feedback!
+Post to our support forum. Don’t be shy, we look forward to feedback!
See our contribution guidelines
for more inspect. For a list of contributors, see our
diff --git a/docs/source/index.rst b/docs/source/index.rst
index e460e525..b41163ad 100644
--- a/docs/source/index.rst
+++ b/docs/source/index.rst
@@ -150,7 +150,7 @@ Want to see something added or changed? Some ways to get involved are:
- Contribute code! You can do this by forking
`Automatminer on Github `_
and submitting a pull request.
-- Post to our `support forum `_. Don't be shy, we look forward to feedback!
+- Post to our `support forum `_. Don't be shy, we look forward to feedback!
See our `contribution guidelines
`_
From 4fb77dc0561097d1530611570dcd5009a09027df Mon Sep 17 00:00:00 2001
From: ardunn
Date: Thu, 10 Sep 2020 20:54:06 -0700
Subject: [PATCH 5/7] update dev
---
automatminer_dev/config.py | 13 ++
.../matbench/dataset_creation/__init__.py | 0
.../{ => dataset_creation}/castelli.py | 0
.../{ => dataset_creation}/dielectric.py | 0
.../{ => dataset_creation}/expt_gap.py | 0
.../{ => dataset_creation}/expt_is_metal.py | 0
.../matbench/{ => dataset_creation}/glass.py | 0
.../matbench/{ => dataset_creation}/jdft2d.py | 0
.../{ => dataset_creation}/mp_eform.py | 0
.../{ => dataset_creation}/mp_elasticity.py | 0
.../{ => dataset_creation}/mp_gaps.py | 0
.../{ => dataset_creation}/phonons.py | 0
.../matbench/{ => dataset_creation}/steels.py | 0
automatminer_dev/matbench/docs/__init__.py | 0
.../formatting_matbench_table.py} | 8 +
.../matbench/mpcontribs/__init__.py | 0
.../matbench/mpcontribs/upload.py | 138 ++++++++++++++++++
requirements_dev.txt | 2 +
18 files changed, 161 insertions(+)
create mode 100644 automatminer_dev/matbench/dataset_creation/__init__.py
rename automatminer_dev/matbench/{ => dataset_creation}/castelli.py (100%)
rename automatminer_dev/matbench/{ => dataset_creation}/dielectric.py (100%)
rename automatminer_dev/matbench/{ => dataset_creation}/expt_gap.py (100%)
rename automatminer_dev/matbench/{ => dataset_creation}/expt_is_metal.py (100%)
rename automatminer_dev/matbench/{ => dataset_creation}/glass.py (100%)
rename automatminer_dev/matbench/{ => dataset_creation}/jdft2d.py (100%)
rename automatminer_dev/matbench/{ => dataset_creation}/mp_eform.py (100%)
rename automatminer_dev/matbench/{ => dataset_creation}/mp_elasticity.py (100%)
rename automatminer_dev/matbench/{ => dataset_creation}/mp_gaps.py (100%)
rename automatminer_dev/matbench/{ => dataset_creation}/phonons.py (100%)
rename automatminer_dev/matbench/{ => dataset_creation}/steels.py (100%)
create mode 100644 automatminer_dev/matbench/docs/__init__.py
rename automatminer_dev/matbench/{get_info.py => docs/formatting_matbench_table.py} (87%)
create mode 100644 automatminer_dev/matbench/mpcontribs/__init__.py
create mode 100644 automatminer_dev/matbench/mpcontribs/upload.py
diff --git a/automatminer_dev/config.py b/automatminer_dev/config.py
index 2ce6044b..01d522ed 100644
--- a/automatminer_dev/config.py
+++ b/automatminer_dev/config.py
@@ -29,6 +29,7 @@
"target": "log10(K_VRH)",
"problem_type": AMM_REG_NAME,
"clf_pos_label": None,
+ "unit": None,
}
LOG_GVRH = {
@@ -37,6 +38,7 @@
"target": "log10(G_VRH)",
"problem_type": AMM_REG_NAME,
"clf_pos_label": None,
+ "unit": None,
}
DIELECTRIC = {
@@ -45,6 +47,7 @@
"target": "n",
"problem_type": AMM_REG_NAME,
"clf_pos_label": None,
+ "unit": None,
}
JDFT2D = {
@@ -53,6 +56,7 @@
"target": "exfoliation_en",
"problem_type": AMM_REG_NAME,
"clf_pos_label": None,
+ "unit": "meV/atom"
}
MP_GAP = {
@@ -61,6 +65,7 @@
"target": "gap pbe",
"problem_type": AMM_REG_NAME,
"clf_pos_label": None,
+ "unit": "eV"
}
MP_IS_METAL = {
@@ -69,6 +74,7 @@
"target": "is_metal",
"problem_type": AMM_CLF_NAME,
"clf_pos_label": True,
+ "unit": None
}
MP_E_FORM = {
@@ -77,6 +83,7 @@
"target": "e_form",
"problem_type": AMM_REG_NAME,
"clf_pos_label": None,
+ "unit": "eV/atom"
}
PEROVSKITES = {
@@ -85,6 +92,7 @@
"target": "e_form",
"problem_type": AMM_REG_NAME,
"clf_pos_label": None,
+ "unit": "eV"
}
GLASS = {
@@ -93,6 +101,7 @@
"target": "gfa",
"problem_type": AMM_CLF_NAME,
"clf_pos_label": True,
+ "unit": None
}
EXPT_IS_METAL = {
@@ -101,6 +110,7 @@
"target": "is_metal",
"problem_type": AMM_CLF_NAME,
"clf_pos_label": True,
+ "unit": None
}
EXPT_GAP = {
@@ -109,6 +119,7 @@
"target": "gap expt",
"problem_type": AMM_REG_NAME,
"clf_pos_label": None,
+ "unit": "eV"
}
PHONONS = {
@@ -117,6 +128,7 @@
"target": "last phdos peak",
"problem_type": AMM_REG_NAME,
"clf_pos_label": None,
+ "unit": "cm^-1"
}
STEELS = {
@@ -125,6 +137,7 @@
"target": "yield strength",
"problem_type": AMM_REG_NAME,
"clf_pos_label": None,
+ "unit": "MPa"
}
BENCHMARK_DEBUG_SET = [JDFT2D, PHONONS, EXPT_IS_METAL, STEELS]
diff --git a/automatminer_dev/matbench/dataset_creation/__init__.py b/automatminer_dev/matbench/dataset_creation/__init__.py
new file mode 100644
index 00000000..e69de29b
diff --git a/automatminer_dev/matbench/castelli.py b/automatminer_dev/matbench/dataset_creation/castelli.py
similarity index 100%
rename from automatminer_dev/matbench/castelli.py
rename to automatminer_dev/matbench/dataset_creation/castelli.py
diff --git a/automatminer_dev/matbench/dielectric.py b/automatminer_dev/matbench/dataset_creation/dielectric.py
similarity index 100%
rename from automatminer_dev/matbench/dielectric.py
rename to automatminer_dev/matbench/dataset_creation/dielectric.py
diff --git a/automatminer_dev/matbench/expt_gap.py b/automatminer_dev/matbench/dataset_creation/expt_gap.py
similarity index 100%
rename from automatminer_dev/matbench/expt_gap.py
rename to automatminer_dev/matbench/dataset_creation/expt_gap.py
diff --git a/automatminer_dev/matbench/expt_is_metal.py b/automatminer_dev/matbench/dataset_creation/expt_is_metal.py
similarity index 100%
rename from automatminer_dev/matbench/expt_is_metal.py
rename to automatminer_dev/matbench/dataset_creation/expt_is_metal.py
diff --git a/automatminer_dev/matbench/glass.py b/automatminer_dev/matbench/dataset_creation/glass.py
similarity index 100%
rename from automatminer_dev/matbench/glass.py
rename to automatminer_dev/matbench/dataset_creation/glass.py
diff --git a/automatminer_dev/matbench/jdft2d.py b/automatminer_dev/matbench/dataset_creation/jdft2d.py
similarity index 100%
rename from automatminer_dev/matbench/jdft2d.py
rename to automatminer_dev/matbench/dataset_creation/jdft2d.py
diff --git a/automatminer_dev/matbench/mp_eform.py b/automatminer_dev/matbench/dataset_creation/mp_eform.py
similarity index 100%
rename from automatminer_dev/matbench/mp_eform.py
rename to automatminer_dev/matbench/dataset_creation/mp_eform.py
diff --git a/automatminer_dev/matbench/mp_elasticity.py b/automatminer_dev/matbench/dataset_creation/mp_elasticity.py
similarity index 100%
rename from automatminer_dev/matbench/mp_elasticity.py
rename to automatminer_dev/matbench/dataset_creation/mp_elasticity.py
diff --git a/automatminer_dev/matbench/mp_gaps.py b/automatminer_dev/matbench/dataset_creation/mp_gaps.py
similarity index 100%
rename from automatminer_dev/matbench/mp_gaps.py
rename to automatminer_dev/matbench/dataset_creation/mp_gaps.py
diff --git a/automatminer_dev/matbench/phonons.py b/automatminer_dev/matbench/dataset_creation/phonons.py
similarity index 100%
rename from automatminer_dev/matbench/phonons.py
rename to automatminer_dev/matbench/dataset_creation/phonons.py
diff --git a/automatminer_dev/matbench/steels.py b/automatminer_dev/matbench/dataset_creation/steels.py
similarity index 100%
rename from automatminer_dev/matbench/steels.py
rename to automatminer_dev/matbench/dataset_creation/steels.py
diff --git a/automatminer_dev/matbench/docs/__init__.py b/automatminer_dev/matbench/docs/__init__.py
new file mode 100644
index 00000000..e69de29b
diff --git a/automatminer_dev/matbench/get_info.py b/automatminer_dev/matbench/docs/formatting_matbench_table.py
similarity index 87%
rename from automatminer_dev/matbench/get_info.py
rename to automatminer_dev/matbench/docs/formatting_matbench_table.py
index 8a14ecfe..bd1ee747 100644
--- a/automatminer_dev/matbench/get_info.py
+++ b/automatminer_dev/matbench/docs/formatting_matbench_table.py
@@ -1,4 +1,12 @@
from matminer.datasets.dataset_retrieval import load_dataset, get_available_datasets, get_all_dataset_info
+
+
+'''
+
+Helper function to format matbench documentation page.
+'''
+
+
datasets = get_available_datasets(print_format=None)
for dataset in datasets:
diff --git a/automatminer_dev/matbench/mpcontribs/__init__.py b/automatminer_dev/matbench/mpcontribs/__init__.py
new file mode 100644
index 00000000..e69de29b
diff --git a/automatminer_dev/matbench/mpcontribs/upload.py b/automatminer_dev/matbench/mpcontribs/upload.py
new file mode 100644
index 00000000..fda5d3cc
--- /dev/null
+++ b/automatminer_dev/matbench/mpcontribs/upload.py
@@ -0,0 +1,138 @@
+import wget, json, os
+from string import capwords
+from pybtex.database import parse_string
+import pybtex.errors
+from mpcontribs.client import Client
+from pymatgen import MPRester
+import tqdm
+
+
+# from matminer.datasets.dataset_retrieval import (
+# get_all_dataset_info,
+# get_available_datasets,
+# load_dataset,
+# )
+
+from matminer.datasets import load_dataset
+
+from automatminer_dev.config import DIELECTRIC
+
+
+pybtex.errors.set_strict_mode(False)
+api_key = os.environ["MPCONTRIBS_API_KEY"]
+client = Client(api_key, host='ml-api.materialsproject.cloud')
+mprester = MPRester()
+
+
+# client.get_project("matbench_steels").pretty()
+
+
+fn = 'dataset_metadata.json'
+if not os.path.exists(fn):
+ wget.download(f'https://raw.githubusercontent.com/hackingmaterials/matminer/master/matminer/datasets/{fn}')
+metadata = json.load(open(fn, 'r'))
+metadata = {k: d for k, d in metadata.items() if "matbench" in k}
+
+
+
+# Creating new projects
+#######################
+# todo: might not have access to add new projects
+# for name, info in metadata.items():
+# if "phonons" not in name:
+# continue
+#
+# print(f"Uploading {name}")
+#
+# columns = {}
+# for col, text in info['columns'].items():
+# k = col.replace('_', '|').replace('-', '|').replace('(', ' ').replace(
+# ')', '')
+# columns[k] = text
+#
+# project = {
+# 'is_public': True,
+# 'owner': 'ardunn@lbl.gov',
+# "name": name,
+# 'title': name, # TODO update and set long_title
+# 'authors': 'A. Dunn, A. Jain',
+# 'description': info['description'],
+# 'other': {
+# 'columns': columns,
+# 'entries': info['num_entries']
+# },
+# 'references': []
+# }
+#
+# for ref in info['bibtex_refs']:
+#
+# if name == "matbench_phonons":
+# ref = ref.replace(
+# "petretto_dwaraknath_miranda_winston_giantomassi_rignanese_van setten_gonze_persson_hautier_2018",
+# "petretto2018")
+#
+# bib = parse_string(ref, 'bibtex')
+# for key, entry in bib.entries.items():
+# key_is_doi = key.startswith('doi:')
+# url = 'https://doi.org/' + key.split(':', 1)[
+# -1] if key_is_doi else entry.fields.get('url')
+# k = 'Zhuo2018' if key_is_doi else capwords(key.replace('_', ''))
+# if k.startswith('C2'):
+# k = 'Castelli2012'
+# elif k.startswith('Landolt'):
+# k = 'LB1997'
+# elif k == 'Citrine':
+# url = 'https://www.citrination.com'
+#
+# if len(k) > 8:
+# k = k[:4] + k[-4:]
+# project['references'].append({"label": k, "url": url})
+#
+# try:
+# print(client.projects.create_entry(project=project).result())
+# except Exception as ex:
+# print(
+# ex) # TODO should use get_entry to check existence -> use update_entry if project exists
+
+
+
+
+# Entering all contributions to projects
+########################################
+
+LIMIT = 100
+
+
+
+ds_config = DIELECTRIC
+name = "matbench_" + ds_config["name"]
+client.delete_contributions(name)
+print(f"Loading {name}")
+df = load_dataset(name)
+target = ds_config["target"]
+unit = f" {ds_config['unit']}" if ds_config["unit"] else ""
+
+
+#todo: PROBLEMATIC ENTRY IS DIELECTRIC INDEX 105
+
+# df = df.iloc[104:]
+
+chunks = (df.shape[0] - 1) // LIMIT + 1
+for j in range(chunks):
+ print(f"\tBatch {j} of {chunks}")
+ batch = df.iloc[j * LIMIT:(j + 1) * LIMIT]
+
+ contributions = []
+
+ for i, row in enumerate(batch.iterrows()):
+ entry = row[1]
+ contrib = {'project': name, 'is_public': True, 'structures': []}
+ s = entry.loc["structure"]
+ c = s.composition.get_integer_formula_and_factor()[0]
+ identifier = f"mb-{ds_config['name']}-{(j - 1) * LIMIT + i}"
+ contrib["identifier"] = identifier
+ contrib["data"] = {target: f"{entry.loc[target]}{unit}"}
+ contrib["formula"] = c
+ contrib["structures"].append(s)
+ contributions.append(contrib)
+ client.submit_contributions(contributions)
\ No newline at end of file
diff --git a/requirements_dev.txt b/requirements_dev.txt
index 19506027..37ca2ad9 100644
--- a/requirements_dev.txt
+++ b/requirements_dev.txt
@@ -7,3 +7,5 @@ isort==4.3.21
pre-commit==1.18.3
paramiko==2.6.0
scp==0.13.2
+pcontribs-client==3.3.0
+wget==3.2
\ No newline at end of file
From 6c1ce01d0de9e6d6ae2d2d23c0c2ac043ff45c36 Mon Sep 17 00:00:00 2001
From: ardunn
Date: Thu, 17 Sep 2020 16:51:10 -0700
Subject: [PATCH 6/7] update mpcontribs upload
---
automatminer_dev/config.py | 14 ++
.../matbench/mpcontribs/upload.py | 138 ++++++++++++++----
2 files changed, 126 insertions(+), 26 deletions(-)
diff --git a/automatminer_dev/config.py b/automatminer_dev/config.py
index 01d522ed..945b8c43 100644
--- a/automatminer_dev/config.py
+++ b/automatminer_dev/config.py
@@ -156,3 +156,17 @@
STEELS,
PHONONS,
]
+
+HAS_STRUCTURE = [
+ LOG_KVRH,
+ LOG_GVRH,
+ DIELECTRIC,
+ JDFT2D,
+ MP_GAP,
+ MP_IS_METAL,
+ MP_E_FORM,
+ PEROVSKITES,
+ PHONONS
+]
+
+BENCHMARK_DICT = {ds["name"]: ds for ds in BENCHMARK_FULL_SET}
\ No newline at end of file
diff --git a/automatminer_dev/matbench/mpcontribs/upload.py b/automatminer_dev/matbench/mpcontribs/upload.py
index fda5d3cc..0d576f4d 100644
--- a/automatminer_dev/matbench/mpcontribs/upload.py
+++ b/automatminer_dev/matbench/mpcontribs/upload.py
@@ -1,11 +1,11 @@
-import wget, json, os
+import wget, json, os, math
from string import capwords
from pybtex.database import parse_string
import pybtex.errors
from mpcontribs.client import Client
-from pymatgen import MPRester
+from pymatgen import MPRester, Structure
import tqdm
-
+import pprint
# from matminer.datasets.dataset_retrieval import (
# get_all_dataset_info,
@@ -15,7 +15,7 @@
from matminer.datasets import load_dataset
-from automatminer_dev.config import DIELECTRIC
+from automatminer_dev.config import DIELECTRIC, JDFT2D, PEROVSKITES, STEELS, BENCHMARK_FULL_SET, BENCHMARK_DICT, HAS_STRUCTURE
pybtex.errors.set_strict_mode(False)
@@ -39,8 +39,6 @@
#######################
# todo: might not have access to add new projects
# for name, info in metadata.items():
-# if "phonons" not in name:
-# continue
#
# print(f"Uploading {name}")
#
@@ -97,42 +95,130 @@
+
+# Map of canonical yet non-mpcontribs-compatible tagret nams to compatible (unicode, no punctuation) target names
+target_map = {
+ "yield strength": "σᵧ",
+ "log10(K_VRH)": "log₁₀Kᵛʳʰ",
+ "log10(G_VRH)": "log₁₀Gᵛʳʰ",
+ "n": "𝑛",
+ "exfoliation_en": "Eˣ",
+ "gap pbe": "Eᵍ",
+ "is_metal": "metallic",
+ "e_form": "Eᶠ",
+ "gfa": "glass",
+ "gap expt": "Eᵍ",
+ "last phdos peak": "ωᵐᵃˣ",
+}
+
+
+# # Getting project-level metadata in order
+# #########################################
+#
+# # Add warning to mpcontribs since the results will be stored out of order.
+# # Also, fix columns for new mpcontribs deployment
+# for name, info in metadata.items():
+# mb_shortname = name.replace("matbench_", "")
+#
+# description = info["description"] + f" If you are viewing this on MPContribs-ML interactively, please ensure the order of the identifiers is sequential (mb-{mb_shortname}-0001, mb-{mb_shortname}-0002, etc.) before benchmarking."
+# if "For benchmarking" not in description:
+# print(name, description)
+#
+# has_structure = mb_shortname in [ds["name"] for ds in HAS_STRUCTURE]
+# primitive_key = "structure" if has_structure else "composition"
+# target = BENCHMARK_DICT[mb_shortname]["target"]
+#
+# print(client.projects.update_entry(
+# pk=name,
+# project={
+# "description": description,
+# 'other.columns': {
+# target_map[target]: metadata[name]["columns"][target],
+# primitive_key: metadata[name]["columns"][primitive_key]
+# }
+# }).result())
+
+
+
+
# Entering all contributions to projects
########################################
-LIMIT = 100
+
+# steels.........X
+# log_kvrh.......
+# log_gvrh.......
+# dielectric.....
+# jdft2d.........X
+# expt_gap.......X
+# expt_is_metal..X
+# phonons........
+# mp_is_metal....
+# mp_gap.........
+# glass..........X
+# mp_e_form......
+# perovskites....
+
-ds_config = DIELECTRIC
+ds_config = BENCHMARK_DICT["jdft2d"]
+
name = "matbench_" + ds_config["name"]
-client.delete_contributions(name)
print(f"Loading {name}")
df = load_dataset(name)
target = ds_config["target"]
unit = f" {ds_config['unit']}" if ds_config["unit"] else ""
-#todo: PROBLEMATIC ENTRY IS DIELECTRIC INDEX 105
+# print(f"Updating 'other' column entries of {name} with unicode.")
+# print(client.projects.update_entry(pk=name, project={
+# 'other.columns': {
+# target_map[target]: metadata[name]["columns"][target],
+# "structure": metadata[name]["columns"]["structure"]
+# # "composition": metadata[name]["columns"]["composition"]
+# }
+# }).result())
+
+
+
+print(f"Deleting contributions of {name}")
+client.delete_contributions(name)
+
-# df = df.iloc[104:]
-chunks = (df.shape[0] - 1) // LIMIT + 1
-for j in range(chunks):
- print(f"\tBatch {j} of {chunks}")
- batch = df.iloc[j * LIMIT:(j + 1) * LIMIT]
+print(f"Assembling and uploading contributions for {name}")
+structure_filename = "/Users/ardunn/Downloads/outfile.cif"
+contributions = []
+id_prefix = df.shape[0]
- contributions = []
- for i, row in enumerate(batch.iterrows()):
- entry = row[1]
- contrib = {'project': name, 'is_public': True, 'structures': []}
+id_n_zeros = math.floor(math.log(df.shape[0], 10)) + 1
+
+df = df.iloc[:2]
+
+for i, row in tqdm.tqdm(enumerate(df.iterrows())):
+ entry = row[1]
+ contrib = {'project': name, 'is_public': True}
+
+ if "structure" in entry.index:
+ structures = []
s = entry.loc["structure"]
+ s.to("cif", structure_filename)
+ s = Structure.from_file(structure_filename)
c = s.composition.get_integer_formula_and_factor()[0]
- identifier = f"mb-{ds_config['name']}-{(j - 1) * LIMIT + i}"
- contrib["identifier"] = identifier
- contrib["data"] = {target: f"{entry.loc[target]}{unit}"}
- contrib["formula"] = c
- contrib["structures"].append(s)
- contributions.append(contrib)
- client.submit_contributions(contributions)
\ No newline at end of file
+ contrib["structures"] = [s]
+
+ else:
+ c = entry["composition"]
+
+ id_number = f"{i+1:0{id_n_zeros}d}"
+ identifier = f"mb-{ds_config['name']}-{id_number}"
+ contrib["identifier"] = identifier
+
+ contrib["data"] = {target_map[target]: f"{entry.loc[target]}{unit}"}
+ contrib["formula"] = c
+ contributions.append(contrib)
+
+pprint.pprint(contributions)
+client.submit_contributions(contributions)
\ No newline at end of file
From 5baa4ef2b355fa31344a8371dc488a4ad05571e4 Mon Sep 17 00:00:00 2001
From: ardunn
Date: Mon, 21 Sep 2020 20:07:22 -0700
Subject: [PATCH 7/7] update upload to mpcontribs
---
.../matbench/mpcontribs/upload.py | 87 +++++++++----------
1 file changed, 41 insertions(+), 46 deletions(-)
diff --git a/automatminer_dev/matbench/mpcontribs/upload.py b/automatminer_dev/matbench/mpcontribs/upload.py
index 0d576f4d..bca132a7 100644
--- a/automatminer_dev/matbench/mpcontribs/upload.py
+++ b/automatminer_dev/matbench/mpcontribs/upload.py
@@ -160,65 +160,60 @@
# perovskites....
+for ds in ["dielectric", "phonons", "mp_gap", "mp_is_metal", "perovskites", "mp_e_form"]:
+ ds_config = BENCHMARK_DICT[ds]
-ds_config = BENCHMARK_DICT["jdft2d"]
+ name = "matbench_" + ds_config["name"]
+ print(f"Loading {name}")
+ df = load_dataset(name)
+ target = ds_config["target"]
+ unit = f" {ds_config['unit']}" if ds_config["unit"] else ""
-name = "matbench_" + ds_config["name"]
-print(f"Loading {name}")
-df = load_dataset(name)
-target = ds_config["target"]
-unit = f" {ds_config['unit']}" if ds_config["unit"] else ""
-
-# print(f"Updating 'other' column entries of {name} with unicode.")
-# print(client.projects.update_entry(pk=name, project={
-# 'other.columns': {
-# target_map[target]: metadata[name]["columns"][target],
-# "structure": metadata[name]["columns"]["structure"]
-# # "composition": metadata[name]["columns"]["composition"]
-# }
-# }).result())
-
-
-
-print(f"Deleting contributions of {name}")
-client.delete_contributions(name)
+ # print(f"Updating 'other' column entries of {name} with unicode.")
+ # print(client.projects.update_entry(pk=name, project={
+ # 'other.columns': {
+ # target_map[target]: metadata[name]["columns"][target],
+ # "structure": metadata[name]["columns"]["structure"]
+ # # "composition": metadata[name]["columns"]["composition"]
+ # }
+ # }).result())
-print(f"Assembling and uploading contributions for {name}")
-structure_filename = "/Users/ardunn/Downloads/outfile.cif"
-contributions = []
-id_prefix = df.shape[0]
+ # print(f"Deleting contributions of {name}")
+ # client.delete_contributions(name)
-id_n_zeros = math.floor(math.log(df.shape[0], 10)) + 1
+ print(f"Assembling and uploading contributions for {name}")
+ structure_filename = "/Users/ardunn/Downloads/outfile.cif"
+ contributions = []
+ id_prefix = df.shape[0]
-df = df.iloc[:2]
-for i, row in tqdm.tqdm(enumerate(df.iterrows())):
- entry = row[1]
- contrib = {'project': name, 'is_public': True}
+ id_n_zeros = math.floor(math.log(df.shape[0], 10)) + 1
+ for i, row in tqdm.tqdm(enumerate(df.iterrows())):
+ entry = row[1]
+ contrib = {'project': name, 'is_public': True}
- if "structure" in entry.index:
- structures = []
- s = entry.loc["structure"]
- s.to("cif", structure_filename)
- s = Structure.from_file(structure_filename)
- c = s.composition.get_integer_formula_and_factor()[0]
- contrib["structures"] = [s]
+ if "structure" in entry.index:
+ structures = []
+ s = entry.loc["structure"]
+ s.to("cif", structure_filename)
+ s = Structure.from_file(structure_filename)
+ c = s.composition.get_integer_formula_and_factor()[0]
+ contrib["structures"] = [s]
- else:
- c = entry["composition"]
+ else:
+ c = entry["composition"]
- id_number = f"{i+1:0{id_n_zeros}d}"
- identifier = f"mb-{ds_config['name']}-{id_number}"
- contrib["identifier"] = identifier
+ id_number = f"{i+1:0{id_n_zeros}d}"
+ identifier = f"mb-{ds_config['name']}-{id_number}"
+ contrib["identifier"] = identifier
- contrib["data"] = {target_map[target]: f"{entry.loc[target]}{unit}"}
- contrib["formula"] = c
- contributions.append(contrib)
+ contrib["data"] = {target_map[target]: f"{entry.loc[target]}{unit}"}
+ contrib["formula"] = c
+ contributions.append(contrib)
-pprint.pprint(contributions)
-client.submit_contributions(contributions)
\ No newline at end of file
+ client.submit_contributions(contributions, per_page=10)
\ No newline at end of file