ucsc-cgp
diff --git a/‎.flake8
Lines changed: 3 additions & 0 deletions b/‎.flake8
Lines changed: 3 additions & 0 deletions
diff --git a/‎.gitignore
Lines changed: 34 additions & 0 deletions b/‎.gitignore
Lines changed: 34 additions & 0 deletions
diff --git a/‎.travis.yml
Lines changed: 17 additions & 0 deletions b/‎.travis.yml
Lines changed: 17 additions & 0 deletions
diff --git a/‎MANIFEST.in
Lines changed: 2 additions & 0 deletions b/‎MANIFEST.in
Lines changed: 2 additions & 0 deletions
diff --git a/‎Makefile
Lines changed: 34 additions & 0 deletions b/‎Makefile
Lines changed: 34 additions & 0 deletions
diff --git a/‎README.md
Lines changed: 79 additions & 0 deletions b/‎README.md
Lines changed: 79 additions & 0 deletions
diff --git a/‎VERSION
Lines changed: 1 addition & 0 deletions b/‎VERSION
Lines changed: 1 addition & 0 deletions
diff --git a/‎client-secret.json.enc
2.33 KB b/‎client-secret.json.enc
2.33 KB
diff --git a/‎common.mk
Lines changed: 5 additions & 0 deletions b/‎common.mk
Lines changed: 5 additions & 0 deletions
diff --git a/‎datasets/topmed/topmed_107_open_access/__init__.py b/‎datasets/topmed/topmed_107_open_access/__init__.py
@@ -0,0 +1,3 @@
+[flake8]
+max-line-length=120
+ignore: E301, E302, E401, E261, E265, E226, F401, E501
@@ -0,0 +1,34 @@
+
+# Reminder:
+# - A leading slash means the pattern is anchored at the root.
+# - No leading slash means the pattern matches at any depth.
+
+# Python files
+*.pyc
+__pycache__/
+.mypy_cache/
+
+# IntelliJ IDEA / PyCharm project files
+/.idea
+/*.iml
+
+# MyPy
+.mypy_cache
+
+# PyCharm JIRA plugin
+atlassian-ide-plugin.xml
+
+# Virtualenv
+.venv
+
+# Development build artifacts
+*.egg-info
+
+# OS X metadata files
+.DS_Store
+
+# Temporary folder
+/tmp
+
+# Travis-ci (don't want to accidentally commit google key)
+client-secret.json
@@ -0,0 +1,17 @@
+language: python
+python:
+- '3.6'
+before_install:
+- openssl aes-256-cbc -K $encrypted_e70c9f59db9f_key -iv $encrypted_e70c9f59db9f_iv
+  -in client-secret.json.enc -out client-secret.json -d
+install:
+- make develop
+script:
+- make test
+deploy:
+  provider: pypi
+  on:
+    tags: true
+  user: jessebrennan
+  password:
+    secure: NTzkGJ6KUlyVxkyD5DjnnpwwT4mKCfaFzsSrLv9TWBlpk0YF0xBiOSLoK1yegLfWjPendPMfx+k54BOv9WZbZV95BFxWXdk0WpeZhfw2qoqzddPZtkWXXgU926kwM/DXb1X117iUzfG26oRoRfciccEiNgFq9ikEY0xDKJEyo3IquOqPpn6GYbTD6WcsDOoMbk24KXI1l/BGOsG93yfDCYg8iEIqGjY1SioUO5vAoggwY+rV/MAt0GpRM5zPh2XycbAjI1MBNwxIq5kc+Q0y2sOi5Cnj0EN+QpuLoUrpwOKEC7VJk0BaOzqDKvOrQYT6g6bFpT8u2Ry8ekggusbUQ7O3W2fnjoapWqPfbC3Q8+rqf8K1dsWeSv0j9zlTWNEtowaoPc5tenSiTntS9iHlP1Z+TlKvlo9bTif97PsZ0HNsjV2aReRlbUusSsQl6lU2XIs4TbOIesf5+/ju4LzacbLws8bvKpGdRJL1T5Qu6IVIk3Wk4Nv4EHMPJKovw0Yomrpa4ccmv2nQ5J3e7nU52DxkRPh6sZLQaKafuETYbcMN5EZI6RsmQ7cPMr3uaGzJHuRDEgIwTVcpC1tXAtTTLjEMMLs8TPU6rCTKdGi1MMe1+72sPjipNJWA0ZMMAZHkhKTmBV0FwfMOuDhR0ZBvW3OzbxZtIZdoMgoygFh3hSE=
@@ -0,0 +1,2 @@
+include README.md VERSION release.py
+include *.txt
@@ -0,0 +1,34 @@
+
+include common.mk
+MODULES=loader transformer scripts tests datasets/topmed/topmed_107_open_access
+
+all: test
+
+lint:
+	flake8 $(MODULES)
+
+mypy:
+	mypy --ignore-missing-imports $(MODULES)
+
+check_readme:
+	python setup.py check -s
+
+tests:=$(wildcard tests/test_*.py)
+
+# A pattern rule that runs a single test module, for example:
+#   make tests/test_gen3_input_json.py
+
+$(tests): %.py : mypy lint check_readme
+	python -m unittest --verbose $*.py
+
+test: $(tests)
+
+develop:
+	pip install -e .
+	pip install -r requirements-dev.txt
+
+undevelop:
+	python setup.py develop --uninstall
+	pip uninstall -y -r requirements-dev.txt
+
+.PHONY: all lint mypy test
@@ -1,2 +1,81 @@
 # cgp-dss-data-loader
 Simple data loader for CGP HCA Data Store
+
+## Common Setup
+1. **(optional)**  We recommend using a Python 3
+   [virtual environment](https://docs.python.org/3/tutorial/venv.html).
+
+1. Run:
+
+   `pip3 install cgp-dss-data-loader`
+
+## Setup for Development
+1. Clone the repo:
+
+   `git clone https://github.com/DataBiosphere/cgp-dss-data-loader.git`
+
+1. Go to the root directory of the cloned project:
+
+   `cd cgp-dss-data-loader`
+
+1. Make sure you are on the branch `develop`.
+
+1. Run (ideally in a new [virtual environment](https://docs.python.org/3/tutorial/venv.html)):
+
+   `make develop`
+
+## Cloud Credentials Setup
+Because this program uses Amazon Web Services and Google Cloud Platform, you will need to set up credentials
+for both of these before you can run the program.
+
+### AWS credentials
+1. If you haven't already you will need to make an IAM user and create a new access key. Instructions are
+   [here](https://docs.aws.amazon.com/general/latest/gr/managing-aws-access-keys.html).
+
+1. Next you will need to store your credentials so that Boto can access them. Instructions are
+   [here](https://boto3.readthedocs.io/en/latest/guide/configuration.html).
+
+### GCP credentials
+1. Follow the steps [here](https://cloud.google.com/docs/authentication/getting-started) to set up your Google
+   Credentials.
+
+## Running Tests
+Run:
+
+`make test`
+
+## Getting Data from Gen3 and Loading it
+
+1. The first step is to extract the Gen3 data you want using the
+   [sheepdog exporter](https://github.com/david4096/sheepdog-exporter). The TopMed public data extracted
+   from sheepdog is available [on the release page](https://github.com/david4096/sheepdog-exporter/releases/tag/0.3.1)
+   under Assets. Assuming you use this data, you will now have a file called `topmed-public.json`
+
+1. Make sure you are running the virtual environment you set up in the **Setup** instructions.
+
+1. Now we need to transform the data. We can transform to the outdated gen3 format, or to the new standard format.
+
+    - For the standard format, follow instructions at
+      [newt-transformer](https://github.com/jessebrennan/newt-transformer#transforming-data-from-sheepdog-exporter).
+
+    - For the old Gen3 format, run this from the root of the project:
+
+      ```
+      python transformer/gen3_transformer.py /path/to/topmed_public.json --output-json transformed-topmed-public.json
+      ```
+
+1. Now that we have our new transformed output we can run it with the loader.
+
+    If you used the standard transformer use the command:
+
+   ```
+   dssload --no-dry-run --dss-endpoint MY_DSS_ENDPOINT --staging-bucket NAME_OF_MY_S3_BUCKET standard --json-input-file transformed-topmed-public.json
+   ```
+
+   Otherwise for the outdated gen3 format run:
+
+   ```
+   dssload --no-dry-run --dss-endpoint MY_DSS_ENDPOINT --staging-bucket NAME_OF_MY_S3_BUCKET gen3 --json-input-file transformed-topmed-public.json
+   ```
+   
+1. You did it!
@@ -0,0 +1 @@
+0.1.0
@@ -0,0 +1,5 @@
+SHELL=/bin/bash
+
+ifeq ($(findstring Python 3.6, $(shell python --version 2>&1)),)
+$(error Please run make commands from a Python 3.6 virtualenv)
+endif
Original file line number	Diff line number	Diff line change
`@@ -0,0 +1,3 @@`
	`1`	`+[flake8]`
	`2`	`+max-line-length=120`
	`3`	`+ignore: E301, E302, E401, E261, E265, E226, F401, E501`
Original file line number	Diff line number	Diff line change
`@@ -0,0 +1,2 @@`
	`1`	`+include README.md VERSION release.py`
	`2`	`+include *.txt`