shenwanxiang · grumpybavarian · Oct 23, 2020 · Oct 23, 2020 · Oct 27, 2020 · Oct 27, 2020
diff --git a/README.md b/README.md
@@ -1,4 +1,4 @@
-In case you would like to cite this: 
+In case you would like to cite this:
 
 [![DOI](https://zenodo.org/badge/DOI/10.5281/zenodo.4054866.svg)](https://doi.org/10.5281/zenodo.4054866)
 
@@ -157,12 +157,10 @@ These benchmark datasets and the split induces have benn generated in this repo,
 ```bash
 git clone https://github.com/shenwanxiang/ChemBench.git
 cd ChemBench
-# add to PYTHONPATH
-echo export PYTHONPATH="\$PYTHONPATH:`pwd`" >> ~/.bashrc
-source ~/.bashrc
+pip install -e .
 ```
 
-### Usage-1: Load the Dataset and  MoleculeNet's Split Induces  
+### Usage-1: Load the Dataset and  MoleculeNet's Split Induces
 
 ```python
 from chembench import load_data
@@ -175,7 +173,7 @@ train_idx, valid_idx, test_idx = induces[2]
 ```
 ----
 
-### Usage-2: Load Dataset As Data Object 
+### Usage-2: Load Dataset As Data Object
 
 ```python
 from chembench import dataset
@@ -185,7 +183,7 @@ data.y
 data.description
 
 
-## regression 
+## regression
 dataset.load_Lipop()
 dataset.load_ESOL()
 dataset.load_FreeSolv()
@@ -223,8 +221,8 @@ print(len(induces1))
 print(len(induces2))
 ```
 
-For example, the chemical space of the ESOL dataset using 5fold cluster split : 
+For example, the chemical space of the ESOL dataset using 5fold cluster split :
 ![ESOL split chemical space](https://github.com/shenwanxiang/ChemBench/blob/master/chembench/cluster/cluster_split/cluster_split_results/ESOL/ESOL.png)
 
-the Kolmogorov-Smirnov statistic on the distribution for the pairwise groups(clusters): 
+the Kolmogorov-Smirnov statistic on the distribution for the pairwise groups(clusters):
 ![ESOL split distribution test](https://github.com/shenwanxiang/ChemBench/blob/master/chembench/cluster/cluster_split/cluster_split_results/ESOL/ESOL_stat_test.png)
diff --git a/chembench/__init__.py b/chembench/__init__.py
@@ -1,3 +1,5 @@
+__version__ = '0.1.0'
+
 import os
 from glob import glob
 import pandas as pd

diff --git a/setup.py b/setup.py
@@ -0,0 +1,51 @@
+import io
+import os
+import re
+import glob
+
+from setuptools import find_packages, setup
+
+# Get the version from chembench/__init__.py
+# Adapted from https://stackoverflow.com/a/39671214
+this_directory = os.path.dirname(os.path.realpath(__file__))
+version_matches = re.search(r'__version__\s*=\s*[\'"]([^\'"]*)[\'"]',
+                            io.open(f'{this_directory}/chembench/__init__.py', encoding='utf_8_sig').read())
+if version_matches is None:
+    raise Exception('Could not determine CHEMBENCH version from __init__.py')
+__version__ = version_matches.group(1)
+
+with open('README.md', 'r') as fh:
+    long_description = fh.read()
+
+data_file_endings = ['.pkl', '.csv', '.idx', '.sdf', '.csv.gz']
+package_data = []
+for file_ending in data_file_endings:
+    package_data += glob.glob(f'**/*{file_ending}', recursive=True)
+
+package_data = [os.path.relpath(p, 'chembench') for p in package_data]
+
+setup(name='ChemBench',
+      version=__version__,
+      author='Shen Wanxiang',
+      author_email='[email protected]',
+      description='MoleculeNet benchmark dataset & MolMapNet dataset',
+      long_description=long_description,
+      long_description_content_type='text/markdown',
+      url='https://github.com/shenwanxiang/ChemBench',
+
+      packages=find_packages(),
+
+      install_requires=[
+          'numpy>=1.16.3',
+          'tqdm>=4.32.1',
+          'pandas>=0.24.2'
+      ],
+      include_package_data=True,
+      package_data={'chembench': package_data},
+      zip_safe=True,
+
+      classifiers=(
+          'Programming Language :: Python :: 3',
+          'Operating System :: OS Independent',
+      ),
+      )