From 25b532d1c421191943ebb84dde2f20c322cf25e6 Mon Sep 17 00:00:00 2001 From: Chris Dilger Date: Thu, 1 Aug 2019 23:29:15 +1000 Subject: [PATCH 1/5] Do a full_table refresh using version messages On the assignments schema, which fails to delete old messages --- .gitignore | 2 ++ DOCUMENTATION.md | 47 ++++++++++++++++++++++--- README.md | 58 +++++++++++++++++++++++++------ tap_harvest_forecast/.DS_Store | Bin 6148 -> 0 bytes tap_harvest_forecast/__init__.py | 36 +++++++++++++------ 5 files changed, 117 insertions(+), 26 deletions(-) delete mode 100644 tap_harvest_forecast/.DS_Store diff --git a/.gitignore b/.gitignore index 0f78b91..4788c32 100644 --- a/.gitignore +++ b/.gitignore @@ -105,3 +105,5 @@ venv.bak/ # Config for script config.json +tap_config.json +*.DS_Store diff --git a/DOCUMENTATION.md b/DOCUMENTATION.md index a71663c..af80647 100644 --- a/DOCUMENTATION.md +++ b/DOCUMENTATION.md @@ -4,22 +4,59 @@ ### Requirements -To set up this Harvest Forecast in Stitch, you need to create a personal access token in the developer tools section of Harvest's website. +To set up this Harvest Forecast in Stitch, you need to create a personal access +token in the developer tools section of Harvest's website. ### Setup -Visit the [developer tools](https://id.getharvest.com/developers) page on Harvest's website and create a new personal access token. Create a `config.json` file in the following format, where `account_id` and `access_token` are the credentials you just created, and `start_date` is the date you want to start the sync from: +1. Install + + Clone this repository, and then install using setup.py. We recommend using a virtualenv: + + ```bash + virtualenv -p python 3 ~/virtualenvs/tap-harvest-forecast + source ~/virtualenvs/tap-harvest-forecast/bin/activate + python setup.py install + ``` + + We will also install a target, which isn't required but will let us save the + output nicely + ```bash + virtualenv -p python 3 ~/virtualenvs/target-csv + source ~/virtualenvs/target-csv/bin/activate + python setup.py install + ``` + The reason for this is that taps and targets both need their own + environments to work properly. More on development is [in the singer + docs](https://github.com/singer-io/getting-started/blob/master/docs/RUNNING_AND_DEVELOPING.md) + +2. Create your tap's `tap_config.json` file which should look like the following: +Create a `config.json` file in +the following format, where `client_id`, `client_secret` and `refresh_token` are the credentials +you just created in the [Requirements](#requirements), and `start_date` is the date you want to start the sync from ```json { + "client_id": "OAUTH_CLIENT_ID", + "client_secret": "OAUTH_CLIENT_SECRET", + "refresh_token": "YOUR_OAUTH_REFRESH_TOKEN", "start_date": "2017-04-19T13:37:30Z", - "account_id": "HARVEST_FORECAST_ACCOUNT_ID", - "access_token": "HARVEST_FORECAST_PERSONAL_ACCESSS_TOKEN" + "user_agent": "tap-harvest-forecast (your.email@example.com)" } ``` --- +## Developing on Windows + +Windows users will need to install WSL and develop within a linux environment as +a result of a formatting issue on windows, see [this +issue](https://github.com/singer-io/singer-python/issues/86) + +Singer.io should work inside the bash environment as it would on linux + +--- + ## Harvest Forecast Replication - All available data from the Harvest Forecast API is replicated. Currently, this includes information returned from the assignments, clients, milestones, people, and projects endpoints. @@ -34,7 +71,7 @@ Each header denotes the table name. ### assigments - Description: Assigments of projects to users. - Primary key column(s): id -- Replicated fully or incrementally _(uses a bookmark to maintain state)_: Incrementally +- Replicated fully or incrementally _(uses a bookmark to maintain state)_: Fully - Bookmark column: `updated_at` - Link to API endpoint documentation: https://help.getharvest.com/forecast/faqs/faq-list/api/ diff --git a/README.md b/README.md index abf181e..5751edb 100644 --- a/README.md +++ b/README.md @@ -13,22 +13,36 @@ Author: Robert Benjamin ([@robertbenjamin](https://github.com/robertbenjamin)) Clone this repository, and then install using setup.py. We recommend using a virtualenv: ```bash - > virtualenv -p python 3 venv - > source venv/bin/activate - > python setup.py install + virtualenv -p python3 venv + source venv/bin/activate + python setup.py install ``` -2. Create your tap's config file which should look like the following: +2. Retrieve your oauth credentials from the Harvest Forecast API + + Visit the [developer tools](https://id.getharvest.com/developers) page on + Harvest's website and create a new oauth token + + Paste the Client ID you got from the above page in the url of a browser like + `https://id.getharvest.com/oauth2/authorize?client_id={OAUTH_CLIENT_ID}&response_type=code`. Now you're + able to login, click 'authorize app' and then are redirected to a url like + this + `https://id.getharvest.com/oauth2/authorize?code={OAUTH_REFRESH_TOKEN}&scope=all`. + You will use this `OAUTH_REFRESH_TOKEN` in the following step to configure + the oauth application + +3. Create your tap's `tap_config.json` file which should look like the following: ```json { + "client_id": "OAUTH_CLIENT_ID", + "client_secret": "OAUTH_CLIENT_SECRET", + "refresh_token": "OAUTH_REFRESH_TOKEN", "start_date": "2017-04-19T13:37:30Z", - "account_id": "HARVEST_FORECAST_ACCOUNT_ID", - "access_token": "HARVEST_FORECAST_PERSONAL_ACCESSS_TOKEN" + "user_agent": "tap-harvest-forecast (your.email@example.com)" } - ``` -3. [Optional] Create the initial state file +4. [Optional] Create the initial state file ```json { @@ -40,14 +54,36 @@ Author: Robert Benjamin ([@robertbenjamin](https://github.com/robertbenjamin)) } ``` -4. Run the application - +5. Setup the catalog + `tap-harvest-forecast` can be run with: ```bash - tap-harvest-forecast --config config.json [--state state.json] + tap-harvest-forecast --config tap_config.json [--state state.json] + ``` + + Run the tap in discovery mode to obtain the catalog: + + ```bash + tap-harvest-forecast --config tap_config.json --discover > catalog.json ``` + You will need to add metadata in the catalog for stream/field selection, by + adding `"selected": true` to the `metadata` for each stream you wish to + select in `tap_config.json` + + +6. Run the application + + Run the Tap in sync mode: + + ```bash + tap-harvest-forecast --config tap_config.json --catalog catalog.json + ``` + + The output should consist of SCHEMA, RECORD, STATE, and METRIC messages. + If you wish to test the tap with a target, see the [documentation](DOCUMENTATION.md) + --- Copyright © 2018 Stitch diff --git a/tap_harvest_forecast/.DS_Store b/tap_harvest_forecast/.DS_Store deleted file mode 100644 index e169fad4c9c1bf5a1c3e3064c4e9fb402110af54..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 6148 zcmeHKOG-mQ5UkdK0XJD@IalxoLx?BH1tJ>aLL(T=`mH>dM@#hwF}!3WxRGk;u9=>$ zdAwS@eht7DA5V9{48WA`h))k=^Kv@{8Qa}nw0VyB_q`)r~@ZL+C-XU!ajm~>bTA68GcnoumB&ih-G!+N5k6p#X^ z3S8%Q=KcSkeqsJUC21!Gq`<#Yz$VMpa=}-s-a2|Y@3oD-rhCm7-Hq#@Fhn~hMmy%l f+wnY#vab1>=e=-93_9~cC+cUwb&*MdzgFM~*jyEu diff --git a/tap_harvest_forecast/__init__.py b/tap_harvest_forecast/__init__.py index cdc682b..2e8e87b 100644 --- a/tap_harvest_forecast/__init__.py +++ b/tap_harvest_forecast/__init__.py @@ -10,6 +10,7 @@ from singer import metadata from singer import Transformer, utils import backoff +import time LOGGER = singer.get_logger() SESSION = requests.Session() @@ -128,23 +129,33 @@ def request(url, params = None): resp.raise_for_status() return resp.json() + +def get_stream_version(tap_stream_id): + return int(time.time() * 1000) + def append_times_to_dates(item, date_fields): if date_fields: for date_field in date_fields: if item.get(date_field): item[date_field] += "T00:00:00Z" -def sync_endpoint(endpoint, schema, mdata, date_fields = None): - singer.write_schema(endpoint, +def sync_endpoint(catalog_entry, schema, mdata, date_fields = None): + singer.write_schema(catalog_entry.tap_stream_id, schema, [PRIMARY_KEY], bookmark_properties = [REPLICATION_KEY]) - start = get_start(endpoint) - url = get_url(endpoint) - data = request(url)[endpoint] + start = get_start(catalog_entry.tap_stream_id) + url = get_url(catalog_entry.tap_stream_id) + data = request(url)[catalog_entry.tap_stream_id] time_extracted = utils.now() + stream_version = get_stream_version(catalog_entry.tap_stream_id) + activate_version_message = singer.ActivateVersionMessage( + stream=catalog_entry.stream, + version=stream_version + ) + for row in data: with Transformer() as transformer: rec = transformer.transform(row, schema, mdata) @@ -152,12 +163,17 @@ def sync_endpoint(endpoint, schema, mdata, date_fields = None): updated_at = rec[REPLICATION_KEY] if updated_at >= start: - singer.write_record(endpoint, - rec, - time_extracted = time_extracted) - utils.update_state(STATE, endpoint, updated_at) + new_record = singer.RecordMessage( + stream=catalog_entry.stream, + record=rec, + version=stream_version, + time_extracted=time_extracted) + singer.write_message(new_record) + + utils.update_state(STATE, catalog_entry.tap_stream_id, updated_at) singer.write_state(STATE) + singer.write_message(activate_version_message) def do_sync(catalog): LOGGER.info("Starting sync") @@ -166,7 +182,7 @@ def do_sync(catalog): mdata = metadata.to_map(stream.metadata) is_selected = metadata.get(mdata, (), 'selected') if is_selected: - sync_endpoint(stream.tap_stream_id, stream.schema.to_dict(), mdata) + sync_endpoint(stream, stream.schema.to_dict(), mdata) LOGGER.info("Sync complete") From 2c1569e980837fca74dea1afd4c56973049d4938 Mon Sep 17 00:00:00 2001 From: cdilga Date: Thu, 14 Nov 2019 12:05:44 +1100 Subject: [PATCH 2/5] Include role schemas when building package --- tap_harvest_forecast/schemas/roles.json | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/tap_harvest_forecast/schemas/roles.json b/tap_harvest_forecast/schemas/roles.json index 08b312d..f29a7c9 100644 --- a/tap_harvest_forecast/schemas/roles.json +++ b/tap_harvest_forecast/schemas/roles.json @@ -25,6 +25,19 @@ "type": "integer" } }, + "updated_at": { + "type": [ + "null", + "string" + ], + "format": "date-time" + }, + "updated_by_id": { + "type": [ + "null", + "integer" + ] + }, "placeholder_ids": { "type": "array", "items": { From a2ea5dc238d94789b6d7aff207fc83db8d1a8321 Mon Sep 17 00:00:00 2001 From: cdilga Date: Thu, 14 Nov 2019 12:10:58 +1100 Subject: [PATCH 3/5] Add nullable definition to schema to handle real world case of nulls --- tap_harvest_forecast/schemas/people.json | 35 +++++++++++++++++++----- 1 file changed, 28 insertions(+), 7 deletions(-) diff --git a/tap_harvest_forecast/schemas/people.json b/tap_harvest_forecast/schemas/people.json index 1894d8a..98cff52 100644 --- a/tap_harvest_forecast/schemas/people.json +++ b/tap_harvest_forecast/schemas/people.json @@ -90,25 +90,46 @@ "type": "object", "properties": { "monday": { - "type": "boolean" + "type": [ + "boolean", + "null" + ] }, "tuesday": { - "type": "boolean" + "type": [ + "boolean", + "null" + ] }, "wednesday": { - "type": "boolean" + "type": [ + "boolean", + "null" + ] }, "thursday": { - "type": "boolean" + "type": [ + "boolean", + "null" + ] }, "friday": { - "type": "boolean" + "type": [ + "boolean", + "null" + ] }, "saturday": { - "type": "boolean" + "type": [ + "boolean", + "null" + ] }, "sunday": { - "type": "boolean" + "type": [ + "boolean", + "null" + ] } }, "additionalProperties": false From 55bafe2a23b01eebd9d45f0684fe99dd351ba7af Mon Sep 17 00:00:00 2001 From: cdilger Date: Mon, 25 Nov 2019 14:55:21 +1100 Subject: [PATCH 4/5] Add try catch to handle case of no updated_at --- .gitignore | 4 ++++ setup.py | 3 ++- tap_harvest_forecast/__init__.py | 11 +++++++---- 3 files changed, 13 insertions(+), 5 deletions(-) diff --git a/.gitignore b/.gitignore index 4788c32..858b7a7 100644 --- a/.gitignore +++ b/.gitignore @@ -107,3 +107,7 @@ venv.bak/ config.json tap_config.json *.DS_Store + +catalog.json + +*.json diff --git a/setup.py b/setup.py index 74cbb02..95287ed 100644 --- a/setup.py +++ b/setup.py @@ -25,7 +25,8 @@ "clients.json", "milestones.json", "people.json", - "projects.json" + "projects.json", + "roles.json" ], }, include_package_data=True diff --git a/tap_harvest_forecast/__init__.py b/tap_harvest_forecast/__init__.py index 2e8e87b..df269fe 100644 --- a/tap_harvest_forecast/__init__.py +++ b/tap_harvest_forecast/__init__.py @@ -28,12 +28,11 @@ "milestones", "people", "projects", - "placeholders", "roles" ] PRIMARY_KEY = "id" -REPLICATION_KEY = 'updated_at' +REPLICATION_KEY = "updated_at" BASE_URL = "https://api.forecastapp.com/" BASE_ID_URL = "https://id.getharvest.com/api/v2/" @@ -161,7 +160,11 @@ def sync_endpoint(catalog_entry, schema, mdata, date_fields = None): rec = transformer.transform(row, schema, mdata) append_times_to_dates(rec, date_fields) - updated_at = rec[REPLICATION_KEY] + try: + updated_at = rec[REPLICATION_KEY] + except KeyError: + updated_at = start + if updated_at >= start: new_record = singer.RecordMessage( stream=catalog_entry.stream, @@ -194,7 +197,7 @@ def do_discover(): mdata = metadata.new() mdata = metadata.write(mdata, (), 'table-key-properties', [PRIMARY_KEY]) - mdata = metadata.write(mdata, (), 'valid-replication-keys', [REPLICATION_KEY]) + mdata = metadata.write(mdata, (), 'valid-replication-keys', schema.replication_keys) for field_name in schema['properties'].keys(): if field_name == PRIMARY_KEY or field_name == REPLICATION_KEY: From 70a1b66a9e2b4a4850f2b596da32527b8360398f Mon Sep 17 00:00:00 2001 From: Chris Date: Thu, 1 Apr 2021 06:28:33 +1100 Subject: [PATCH 5/5] Update README.md --- README.md | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/README.md b/README.md index 5751edb..a862d8a 100644 --- a/README.md +++ b/README.md @@ -86,4 +86,11 @@ Author: Robert Benjamin ([@robertbenjamin](https://github.com/robertbenjamin)) --- +## Common Issues +If you see a completely blank run, like this: +``` +``` +This is caused by the catalog.json not containing `"selected": "true"` in the metadata sections for each stream. +Just add that and you're good to go + Copyright © 2018 Stitch