Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 3 additions & 3 deletions Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -47,9 +47,9 @@ tests:
poetry run pytest ${args}

lint:
poetry run autoflake --recursive --in-place --remove-all-unused-imports --remove-unused-variables query
poetry run isort --profile black query
poetry run black query
poetry run autoflake --recursive --in-place --remove-all-unused-imports --remove-unused-variables query scripts
poetry run isort --profile black query scripts
poetry run black query scripts

# Refresh the countries.json file from the ProductOwner taxonomy
refresh_countries:
Expand Down
2 changes: 2 additions & 0 deletions docker-compose.yml
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,8 @@ services:
- "${POSTGRES_EXPOSE:-5512}:5432"
volumes:
- dbdata:/var/lib/postgresql/data
# a shared folder makes it easy to share data for import / exports
- ./data:/opt/data
networks:
- common_net

Expand Down
3 changes: 2 additions & 1 deletion query/services/ingestion.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,8 @@
INGREDIENTS_TAG,
create_ingredients_from_staging,
)
from ..tables.product_tags import COUNTRIES_TAG, TAG_TABLES, create_tags_from_staging
from ..tables.product_tags import create_tags_from_staging
from ..tables.product_tags_list import COUNTRIES_TAG, TAG_TABLES
from ..tables.settings import (
get_last_updated,
set_last_updated,
Expand Down
72 changes: 1 addition & 71 deletions query/tables/product_tags.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,77 +2,7 @@
The order of tags is not preserved"""

from ..database import create_record, get_rows_affected

COUNTRIES_TAG = "countries_tags"
tag_tables_v1 = {
"_keywords": "product_keywords_tag",
"additives_tags": "product_additives_tag",
"allergens_tags": "product_allergens_tag",
"amino_acids_tags": "product_amino_acids_tag",
"brands_tags": "product_brands_tag",
"categories_properties_tags": "product_categories_properites_tag",
"categories_tags": "product_categories_tag",
"checkers_tags": "product_checkers_tag",
"cities_tags": "product_cities_tag",
"codes_tags": "product_codes_tag",
"correctors_tags": "product_correctors_tag",
COUNTRIES_TAG: "product_countries_tag",
"data_quality_bugs_tags": "product_data_quality_bugs_tag",
"data_quality_errors_tags": "product_data_quality_errors_tag",
"data_quality_tags": "product_data_quality_tag",
"data_quality_warnings_tags": "product_data_quality_warnings_tag",
"data_sources_tags": "product_data_sources_tag",
"debug_tags": "product_debug_tag",
"ecoscore_tags": "product_ecoscore_tag",
"editors_tags": "product_editors_tag",
"emb_codes_tags": "product_emb_codes_tag",
"entry_dates_tags": "product_entry_dates_tag",
"food_groups_tags": "product_food_groups_tag",
"informers_tags": "product_informers_tag",
"ingredients_analysis_tags": "product_ingredients_analysis_tag",
"ingredients_from_palm_oil_tags": "product_ingredients_from_palm_oil_tag",
"ingredients_n_tags": "product_ingredients_ntag",
"ingredients_original_tags": "product_ingredients_original_tag",
"ingredients_tags": "product_ingredients_tag",
"ingredients_that_may_be_from_palm_oil_tags": "product_ingredients_that_may_be_from_palm_oil_tag",
"labels_tags": "product_labels_tag",
"languages_tags": "product_languages_tag",
"last_check_dates_tags": "product_last_check_dates_tag",
"last_edit_dates_tags": "product_last_edit_dates_tag",
"last_image_dates_tags": "product_latest_image_dates_tag",
"manufacturing_places_tags": "product_manufacturing_places_tag",
"minerals_tags": "product_minerals_tag",
"misc_tags": "product_misc_tag",
"nova_groups_tags": "product_nova_groups_tag",
"nucleotides_tags": "product_nucleotides_tag",
"nutrient_levels_tags": "product_nutrient_levels_tag",
"nutriscore_2021_tags": "product_nutriscore2021tag",
"nutriscore_2023_tags": "product_nutriscore2023tag",
"nutriscore_tags": "product_nutriscore_tag",
"nutrition_grades_tags": "product_nutrition_grades_tag",
"origins_tags": "product_origins_tag",
"other_nutritional_substances_tags": "product_other_nutritional_substances_tag",
"packaging_materials_tags": "product_packaging_materials_tag",
"packaging_recycling_tags": "product_packaging_recycling_tag",
"packaging_shapes_tags": "product_packaging_shapes_tag",
"packaging_tags": "product_packaging_tag",
"periods_after_opening_tags": "product_periods_after_opening_tag",
"photographers_tags": "product_photographers_tag",
"pnns_groups_1_tags": "product_pnns_groups1tag",
"pnns_groups_2_tags": "product_pnns_groups2tag",
"popularity_tags": "product_popularity_tag",
"purchase_places_tags": "product_purchase_places_tag",
"states_tags": "product_states_tag",
"stores_tags": "product_stores_tag",
"teams_tags": "product_teams_tag",
"traces_tags": "product_traces_tag",
"unknown_nutrients_tags": "product_unknown_nutrients_tag",
"vitamins_tags": "product_vitamins_tag",
"weighers_tags": "product_weighers_tag",
}

# Append additional tag tables to this list when we introduce them and then add a migration to create the new tables
TAG_TABLES = tag_tables_v1
from .product_tags_list import TAG_TABLES, tag_tables_v1


async def create_tables(transaction, tag_tables):
Expand Down
75 changes: 75 additions & 0 deletions query/tables/product_tags_list.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,75 @@
"""List of tags tables"""

# we keep it in this simple module to enable easy import eg. for export script.

COUNTRIES_TAG = "countries_tags"
tag_tables_v1 = {
"_keywords": "product_keywords_tag",
"additives_tags": "product_additives_tag",
"allergens_tags": "product_allergens_tag",
"amino_acids_tags": "product_amino_acids_tag",
"brands_tags": "product_brands_tag",
"categories_properties_tags": "product_categories_properites_tag",
"categories_tags": "product_categories_tag",
"checkers_tags": "product_checkers_tag",
"cities_tags": "product_cities_tag",
"codes_tags": "product_codes_tag",
"correctors_tags": "product_correctors_tag",
COUNTRIES_TAG: "product_countries_tag",
"data_quality_bugs_tags": "product_data_quality_bugs_tag",
"data_quality_errors_tags": "product_data_quality_errors_tag",
"data_quality_tags": "product_data_quality_tag",
"data_quality_warnings_tags": "product_data_quality_warnings_tag",
"data_sources_tags": "product_data_sources_tag",
"debug_tags": "product_debug_tag",
"ecoscore_tags": "product_ecoscore_tag",
"editors_tags": "product_editors_tag",
"emb_codes_tags": "product_emb_codes_tag",
"entry_dates_tags": "product_entry_dates_tag",
"food_groups_tags": "product_food_groups_tag",
"informers_tags": "product_informers_tag",
"ingredients_analysis_tags": "product_ingredients_analysis_tag",
"ingredients_from_palm_oil_tags": "product_ingredients_from_palm_oil_tag",
"ingredients_n_tags": "product_ingredients_ntag",
"ingredients_original_tags": "product_ingredients_original_tag",
"ingredients_tags": "product_ingredients_tag",
"ingredients_that_may_be_from_palm_oil_tags": "product_ingredients_that_may_be_from_palm_oil_tag",
"labels_tags": "product_labels_tag",
"languages_tags": "product_languages_tag",
"last_check_dates_tags": "product_last_check_dates_tag",
"last_edit_dates_tags": "product_last_edit_dates_tag",
"last_image_dates_tags": "product_latest_image_dates_tag",
"manufacturing_places_tags": "product_manufacturing_places_tag",
"minerals_tags": "product_minerals_tag",
"misc_tags": "product_misc_tag",
"nova_groups_tags": "product_nova_groups_tag",
"nucleotides_tags": "product_nucleotides_tag",
"nutrient_levels_tags": "product_nutrient_levels_tag",
"nutriscore_2021_tags": "product_nutriscore2021tag",
"nutriscore_2023_tags": "product_nutriscore2023tag",
"nutriscore_tags": "product_nutriscore_tag",
"nutrition_grades_tags": "product_nutrition_grades_tag",
"origins_tags": "product_origins_tag",
"other_nutritional_substances_tags": "product_other_nutritional_substances_tag",
"packaging_materials_tags": "product_packaging_materials_tag",
"packaging_recycling_tags": "product_packaging_recycling_tag",
"packaging_shapes_tags": "product_packaging_shapes_tag",
"packaging_tags": "product_packaging_tag",
"periods_after_opening_tags": "product_periods_after_opening_tag",
"photographers_tags": "product_photographers_tag",
"pnns_groups_1_tags": "product_pnns_groups1tag",
"pnns_groups_2_tags": "product_pnns_groups2tag",
"popularity_tags": "product_popularity_tag",
"purchase_places_tags": "product_purchase_places_tag",
"states_tags": "product_states_tag",
"stores_tags": "product_stores_tag",
"teams_tags": "product_teams_tag",
"traces_tags": "product_traces_tag",
"unknown_nutrients_tags": "product_unknown_nutrients_tag",
"vitamins_tags": "product_vitamins_tag",
"weighers_tags": "product_weighers_tag",
}

# Append additional tag tables to this list when we introduce them
# and then add a migration to create the new tables
TAG_TABLES = tag_tables_v1
151 changes: 151 additions & 0 deletions scripts/export_db_sample.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,151 @@
# /usr/bin/env python3
import argparse
import datetime as dt
import sys
from textwrap import dedent

sys.path.append(".")
from query.tables import product_tags_list

DESCRIPTION = """
Generate a script to export some data from openfoodfacts-query database,
starting from the event table.

It is intended to get data for developers.

It can be piped into the docker psql, this will generates exports in a folder,
that you can then either
get from a shared folder (typically /opt/data mapped to ./data),
or `docker cp -a container_name:/path/to/folder local/path`

This scripts currently only export part of the tables.
"""
# TODO: add product_country, product_scans_by_country, country, contributor


def generate_export_script(from_date, to_date, export_dir):

outputs = []

def cmd(sql):
outputs.append(dedent(sql))

cmd(
f"""
\\! mkdir -p {export_dir}
-- enable writing by postgres process
\\! chmod a+rwX {export_dir}
\\set export_dir '{export_dir}'
\\set from_date '{from_date}'
\\set to_date '{to_date}'
"""
)

cmd(
"""
\\set export_path :export_dir/product_update_event.csv

COPY (
select * from product_update_event as e
where
e.received_at::timestamp between :'from_date' and :'to_date'
)
TO :'export_path' DELIMITER ',' CSV HEADER;
"""
)

cmd(
"""
\\set export_path :export_dir/product_update.csv

COPY (
select p.* from product_update as p
left join product_update_event as e
on p.event_id = e.id
where
e.received_at::timestamp between :'from_date' and :'to_date'
)
TO :'export_path' DELIMITER ',' CSV HEADER;
"""
)
cmd(
"""
\\set export_path :export_dir/product.csv
COPY (
select * from product
where id in
(
select p.product_id from product_update as p
left join product_update_event as e
on p.event_id = e.id
where e.received_at::timestamp between :'from_date' and :'to_date'
)
)
TO :'export_path' DELIMITER ',' CSV HEADER;
"""
)
# should use product_tags.TAG_TABLES
for table_name in product_tags_list.TAG_TABLES.values():
cmd(
f"""
\\set table_name {table_name}
\\set export_path :export_dir/:table_name.csv

COPY (
select * from :table_name
where product_id in
(
select p.product_id from product_update as p
left join product_update_event as e
on p.event_id = e.id
where e.received_at::timestamp between :'from_date' and :'to_date'
)
)
TO :'export_path' DELIMITER ',' CSV HEADER;
"""
)
return outputs


def get_parser():
now = dt.datetime.now()
default_start = (now - dt.timedelta(minutes=20)).strftime("%Y-%m-%d %H:%M:%S")
default_end = now.strftime("%Y-%m-%d %H:%M:%S")
default_folder = "/opt/data/exports/" + now.strftime("%Y-%m-%d_%H:%M:%S")
parser = argparse.ArgumentParser(description=DESCRIPTION)
parser.add_argument(
"from_date",
default=default_start,
type=str,
nargs="?",
help=(
"Start date in iso format, e.g. 2025-11-21 11:00:00\n"
+ "If empty, it's now - 20 minutes"
),
)
parser.add_argument(
"to_date",
default=default_end,
type=str,
nargs="?",
help=(
"End date in iso format, e.g. 2025-11-21 11:20:00\n" + "If empty, it's now"
),
)
parser.add_argument(
"--dest",
default=default_folder,
type=str,
help=(
"Target directory, it will be created if it does not exists,"
+ f"defaults to {default_folder} (according to current time)"
),
)
return parser


if __name__ == "__main__":
parser = get_parser()
args = parser.parse_args()
outputs = generate_export_script(args.from_date, args.to_date, args.dest)
print("\n".join(outputs))
Loading