Make pip installable

jessebrennan · jessebrennan · commit 1c45df3fd0da · 2018-06-29T13:57:33.000-07:00
diff --git a/.gitignore b/.gitignore
@@ -21,6 +21,9 @@ atlassian-ide-plugin.xml
 # Virtualenv
 .venv
 
+# Development build artifacts
+*.egg-info
+
 # OS X metadata files
 .DS_Store
 
diff --git a/.travis.yml b/.travis.yml
@@ -5,6 +5,13 @@ before_install:
 - openssl aes-256-cbc -K $encrypted_e70c9f59db9f_key -iv $encrypted_e70c9f59db9f_iv
   -in client-secret.json.enc -out client-secret.json -d
 install:
-- pip install -r requirements-dev.txt
+- make develop
 script:
 - make test
+deploy:
+  provider: pypi
+  on:
+    tags: true
+  user: jessebrennan
+  password:
+    secure: eBqClaTiltLnIC/lwqAqQQpi5Qrb3cpKn666JQPIYUA5Nn36SqLmX8QxrN1lqc9rbE/H1CufMhdwQPgpM7D5n7nQ0e4o0dXU2UsggIuEyaD4ZHliRveogbj6rrk+zrvqczZXkvNp22nefoi4ehk9wYhYQL1SAA4ytgsLJm0Svd1X/Gm6RmLuvafwKdsXwFkSm2ihJIMb7VPZa8vQ+EX0TMllX4on+6P5lQNmaCo/9pdocnM3HQTPJohZ2lx6EfUMLaX/gkC5akqqJ5MHCcbCNezJpP/MC0JibF08GDcwUy7zc79f0mIqc4rpbyqPWjZeuBUFrkwh67v8BRWcC7al3r1L7xbQRXpL00CbS2ySUNOfXgmV2M4UUUSgga4TdA28tUuP/lxgcS7tUT07ccSpo+RlD+8xLWs28oBsTJ+J7ebRrKAdTKu4X2Qts8LklxWbYIfIy2XELth+GpxR9mHaIoGDpV7E+wHSFqwGQekLMiZyuzDKKw/pG2taMq2EK6JnUO/470hc7FZf6gBIDlUMFFBffoArPrEhC4LdCwxqtAlG2Vy/UBAYqLMcTYgm7IGHQAwt1pHqMwHfb3pVsFbHyJ7vH7HJMknWHmx7yCXv+/pE6zTRNrANEPlhkVNrNAve++PIIZvBIY7K213SmUwxdmftfaxKaaAMj8xSpe6xrKk=
diff --git a/MANIFEST.in b/MANIFEST.in
@@ -0,0 +1,2 @@
+include README.md VERSION release.py
+include *.txt
diff --git a/Makefile b/Makefile
@@ -10,15 +10,25 @@ lint:
 mypy:
 	mypy --ignore-missing-imports $(MODULES)
 
+check_readme:
+	python setup.py check -s
+
 tests:=$(wildcard tests/test_*.py)
 
 # A pattern rule that runs a single test module, for example:
 #   make tests/test_gen3_input_json.py
 
-$(tests): %.py : mypy lint
+$(tests): %.py : mypy lint check_readme
 	python -m unittest --verbose $*.py
 
 test: $(tests)
 
-.PHONY: all lint mypy test
+develop:
+	pip install -e .
+	pip install -r requirements-dev.txt
 
+undevelop:
+	python setup.py develop --uninstall
+	pip uninstall -y -r requirements-dev.txt
+
+.PHONY: all lint mypy test
diff --git a/README.md b/README.md
@@ -2,65 +2,60 @@
 Simple data loader for CGP HCA Data Store
 
 ## Common Setup
-1. Clone the repo:
+1. **(optional)**  We recommend using a Python 3
+   [virtual environment](https://docs.python.org/3/tutorial/venv.html).
 
-   `git clone https://github.com/DataBiosphere/cgp-dss-data-loader.git`
+1. Run:
 
-2. Go to the root directory of the cloned project:
-   
-   `cd cgp-dss-data-loader`
+   `pip3 install cgp-dss-data-loader`
 
-3. Run (ideally in a new [virtual environment](https://docs.python.org/3/tutorial/venv.html)):
+## setup for development
+1. clone the repo:
 
-   `pip install -r requirements.txt`
+   `git clone https://github.com/databiosphere/cgp-dss-data-loader.git`
 
-## Setup for Development
-1. Clone the repo:
+1. go to the root directory of the cloned project:
 
-   `git clone https://github.com/DataBiosphere/cgp-dss-data-loader.git`
-  
-2. Go to the root directory of the cloned project:
-   
    `cd cgp-dss-data-loader`
-   
-3. Make sure you are on the branch `develop`.
-  
-4. Run (ideally in a new [virtual environment](https://docs.python.org/3/tutorial/venv.html)):
 
-   `pip install -r requirements-dev.txt`
+1. make sure you are on the branch `develop`.
 
-## Running Tests
-Run:
+1. run (ideally in a new [virtual environment](https://docs.python.org/3/tutorial/venv.html)):
+
+   `make develop`
+
+## running tests
+run:
 
 `make test`
 
-## Getting data from Gen3 and Loading it
+## getting data from gen3 and loading it
 
-1. The first step is to extract the Gen3 data you want using the 
-   [sheepdog exporter](https://github.com/david4096/sheepdog-exporter). The TopMed public data extracted
+1. the first step is to extract the gen3 data you want using the
+   [sheepdog exporter](https://github.com/david4096/sheepdog-exporter). the topmed public data extracted
    from sheepdog is available [on the release page](https://github.com/david4096/sheepdog-exporter/releases/tag/0.3.1)
-   under Assets. Assuming you use this data, you will now have a file called `topmed-public.json`
-   
-2. Make sure you are running the virtual environment you set up in the **Setup** instructions.
+   under assets. assuming you use this data, you will now have a file called `topmed-public.json`
+
+1. make sure you are running the virtual environment you set up in the **setup** instructions.
 
-3. Now we need to transform the data. We can transform to the  outdated gen3 format, or to the new standard format.
+1. now we need to transform the data. we can transform to the  outdated gen3 format, or to the new standard format.
 
     - for the standard format, follow instructions at
       [newt-transformer](https://github.com/jessebrennan/newt-transformer#transforming-data-from-sheepdog-exporter).
 
     - for the old gen3 format
-      From the root of the project run:
+      from the root of the project run:
 
       ```
       python transformer/gen3_transformer.py /path/to/topmed_public.json --output-json transformed-topmed-public.json
       ```
 
-4. Now that we have our new transformed output we can run it with the loader.
+1. now that we have our new transformed output we can run it with the loader.
 
-    If you used the standard transformer use the command:
+    if you used the standard transformer use the command:
 
    ```
-   python scripts/cgp_data_loader.py --no-dry-run --dss-endpoint MY_DSS_ENDPOINT --staging-bucket NAME_OF_MY_S3_BUCKET standard --json-input-file transformed-topmed-public.json
+   python scripts/cgp_data_loader.py --no-dry-run --dss-endpoint my_dss_endpoint --staging-bucket name_of_my_s3_bucket standard --json-input-file transformed-topmed-public.json
    ```
 
    otherwise for the outdated gen3 format run:
@@ -69,4 +64,4 @@ Run:
    python scripts/cgp_data_loader.py --no-dry-run --dss-endpoint MY_DSS_ENDPOINT --staging-bucket NAME_OF_MY_S3_BUCKET gen3 --json-input-file transformed-topmed-public.json
    ```
    
-5. You did it!
+1. You did it!
diff --git a/VERSION b/VERSION
@@ -0,0 +1 @@
+0.0.1
diff --git a/requirements-dev.txt b/requirements-dev.txt
@@ -1,3 +1,2 @@
 flake8
 mypy >= 0.600
--r requirements.txt
diff --git a/requirements.txt b/requirements.txt
@@ -1,13 +1 @@
-boto3 >= 1.6.0, < 2
-cloud-blobstore >= 2.1.1, < 3
-crcmod >= 1.7, < 2
-dcplib >= 1.1.0, < 2
-google-cloud-storage >= 1.9.0, < 2
-hca >= 3.5.1, < 4
-requests >= 2.18.4, < 3
-
-# topmed metadata exporter
-lifelines >= 0.14.2, < 1
-numpy >= 1.14.3, < 2
-scipy >= 1.1.0, < 2
-matplotlib >= 2.2.2, < 3
+.
diff --git a/scripts/__init__.py b/scripts/__init__.py
diff --git a/scripts/cgp_data_loader.py b/scripts/cgp_data_loader.py
@@ -24,7 +24,7 @@
 GOOGLE_PROJECT_ID = "platform-dev-178517"  # For requester pays buckets
 
 
-def main(argv):
+def main(argv=sys.argv[1:]):
     import argparse
     parser = argparse.ArgumentParser(description=__doc__)
     dry_run_group = parser.add_mutually_exclusive_group(required=True)
@@ -74,4 +74,4 @@ def main(argv):
 
 
 if __name__ == '__main__':
-    main(sys.argv[1:])
+    main()
diff --git a/setup.py b/setup.py
@@ -0,0 +1,51 @@
+import os
+
+from setuptools import setup, find_packages
+
+VERSION_FILE = 'VERSION'
+
+
+def read_version():
+    with open(VERSION_FILE, 'r') as fp:
+        return tuple(map(int, fp.read().split('.')))
+
+
+def read(fname):
+    return open(os.path.join(os.path.dirname(__file__), fname)).read()
+
+
+setup(
+    name="cgp-dss-data-loader",
+    description="Simple data loader for CGP HCA Data Store",
+    packages=find_packages(exclude=('datasets', 'tests', 'transformer')),  # include all packages
+    url="https://github.com/DataBiosphere/cgp-dss-data-loader",
+    entry_points={
+        'console_scripts': [
+            'dssload=scripts.cgp_data_loader:main'
+        ]
+    },
+    long_description=read('README.md'),
+    long_description_content_type="text/markdown",
+    install_requires=['boto3 >= 1.6.0, < 2',
+                      'cloud-blobstore >= 2.1.1, < 3',
+                      'crcmod >= 1.7, < 2',
+                      'dcplib >= 1.1.0, < 2',
+                      'google-cloud-storage >= 1.9.0, < 2',
+                      'hca >= 3.5.1, < 4',
+                      'requests >= 2.18.4, < 3'],
+    license='Apache License 2.0',
+    include_package_data=True,
+    zip_safe=True,
+    author="Jesse Brennan",
+    author_email="brennan@ucsc.edu",
+    classifiers=[
+        'Development Status :: 3 - Alpha',
+        'Intended Audience :: Science/Research',
+        'License :: OSI Approved :: Apache Software License',
+        'Natural Language :: English',
+        'Programming Language :: Python :: 3.6',
+        'Topic :: Scientific/Engineering :: Bio-Informatics',
+    ],
+    version='{}.{}.{}'.format(*read_version()),
+    keywords=['genomics', 'metadata', 'loading', 'NIHDataCommons'],
+)

Original file line number	Diff line number	Diff line change
`@@ -0,0 +1,2 @@`
	`1`	`+include README.md VERSION release.py`
	`2`	`+include *.txt`
Original file line number	Diff line number	Diff line change
`@@ -1,3 +1,2 @@`
`1`	`1`	`flake8`
`2`	`2`	`mypy >= 0.600`
`3`		`--r requirements.txt`