diff --git a/knesset/join_maya_resources.py b/knesset/join_maya_resources.py new file mode 100644 index 0000000..8d1d205 --- /dev/null +++ b/knesset/join_maya_resources.py @@ -0,0 +1,37 @@ +from datapackage_pipelines.wrapper import ingest, spew +import logging + + +parameters, datapackage, resources, stats = ingest() + ({},) + + +resource_names = [descriptor["name"] for descriptor in datapackage["resources"]] + + +def get_row(resource_name, row): + stats["num rows"] += 1 + row["_"] = row[""] + del row[""] + row["year"] = resource_name + row["rownum"] = stats["num rows"] + return row + + +def get_resource(): + stats["num rows"] = 0 + for resource_name, resource in zip(resource_names, resources): + for row in resource: + yield get_row(resource_name, row) + + +datapackage = dict(datapackage, resources=[datapackage["resources"][0]]) +datapackage["resources"][0].update(name='maya', path='maya.csv') +for field in datapackage["resources"][0]["schema"]["fields"]: + if field["name"] == "": + field["name"] = "_" +datapackage["resources"][0]["schema"]["fields"] += [{"name": "year", "type": "string"}, + {"name": "rownum", "type": "integer"}] +datapackage["resources"][0]["schema"]["primaryKey"] = ["rownum"] + + +spew(datapackage, [get_resource()], stats) diff --git a/knesset/knesset.source-spec.yaml b/knesset/knesset.source-spec.yaml index 982648c..7421514 100644 --- a/knesset/knesset.source-spec.yaml +++ b/knesset/knesset.source-spec.yaml @@ -63,3 +63,45 @@ kns_knessetdates: source: "{name}" type: datetime description: תאריך עדכון אחרון + + +dump-maya-slim-to-db: + pipeline: + - run: load_resource + parameters: + url: https://storage.googleapis.com/knesset-data-pipelines/external-data/maya_slim_feb25_18.zip + resource: .* + - run: join_maya_resources +# - run: dump.to_path +# parameters: +# out-path: mayaslim + - run: dump.to_sql + parameters: + # set the following env var to connect to oknesset DB from oknesset-db1 server + # DPP_DB_ENGINE=postgresql://oknesset:${PGPASSWORD}@localhost:5432/oknesset + engine: env://DPP_DB_ENGINE + tables: + maya_slim_feb: + resource-name: maya + mode: rewrite + + +dump-maya-full-to-db: + pipeline: + - run: load_resource + parameters: + url: https://storage.googleapis.com/knesset-data-pipelines/external-data/maya_full_feb25_18.zip + resource: .* + - run: join_maya_resources +# - run: dump.to_path +# parameters: +# out-path: mayafull + - run: dump.to_sql + parameters: + # set the following env var to connect to oknesset DB from oknesset-db1 server + # DPP_DB_ENGINE=postgresql://oknesset:${PGPASSWORD}@localhost:5432/oknesset + engine: env://DPP_DB_ENGINE + tables: + maya_full_feb: + resource-name: maya + mode: rewrite diff --git a/pipelines_script.sh b/pipelines_script.sh index afa07d9..cf082af 100755 --- a/pipelines_script.sh +++ b/pipelines_script.sh @@ -27,6 +27,22 @@ if [ "${1}" == "--dump-to-db" ]; then " && echo "failed to grant permissions to redash" && RES=1 echo Great Success! +elif [ "${1}" == "--dump-maya-to-db" ]; then + DB_USER="${DB_USER:-oknesset}" + DB_HOST="${DB_HOST:-localhost}" + DB_PORT="${DB_PORT:-5432}" + DB_NAME="${DB_NAME:-oknesset}" + ( [ -z "DB_USER" ] || [ -z "DB_PASS" ] || [ -z "DB_HOST" ] || [ -z "DB_PORT" ] || [ -z "DB_NAME" ] ) \ + && echo "missing required env vars" && exit 1 + export + ! DPP_DB_ENGINE="postgresql://${DB_USER}:${DB_PASS}@${DB_HOST}:${DB_PORT}/${DB_NAME}" dpp run ./knesset/dump-maya-% \ + && echo "failed to dump maya to db" && RES=1 + ! PGPASSWORD="${DB_PASS}" psql -h $DB_HOST -U $DB_USER -p $DB_PORT -d $DB_NAME -c " + grant select on maya_slim_feb to redash_reader; + grant select on maya_full_feb to redash_reader; + " && echo "failed to grant permissions to redash" && RES=1 + echo Great Success! + elif [ "${PIPELINES_BATCH_NAME}" == "dataservices1" ]; then ! $RUN_PIPELINE_CMD ./committees/kns_committee && RES=1 ! $RUN_PIPELINE_CMD ./committees/kns_jointcommittee && RES=1