Update csv output for fada taxa list (#5023)

dimasciput · web-flow · commit f79397416870 · 2026-01-14T14:10:48.000Z
diff --git a/bims/fada/__init__.py b/bims/fada/__init__.py
@@ -0,0 +1 @@
+# FADA-specific functionality
diff --git a/bims/fada/taxa_list.py b/bims/fada/taxa_list.py
@@ -0,0 +1,139 @@
+# coding=utf-8
+"""
+FADA-specific taxa list export configuration and utilities.
+"""
+from bims.scripts.species_keys import BIOGRAPHIC_DISTRIBUTIONS
+
+# FADA-only additional_data keys (column names)
+FADA_ADDITIONAL_KEYS = [
+    'Taxonomic Comments',
+    'Taxonomic References',
+    'Biogeographic Comments',
+    'Biogeographic References',
+    'Environmental Comments',
+    'Environmental References',
+    'Conservation Comments',
+    'Conservation References',
+]
+
+# Columns to exclude for FADA exports
+FADA_EXCLUDED_COLUMNS = [
+    'variety', 'Variety',
+    'origin', 'Origin',
+    'endemism', 'Endemism',
+    'invasion', 'Invasion',
+    'conservation_status_global', 'Conservation status global',
+    'conservation_status_national', 'Conservation status national',
+    'gbif_coordinate_uncertainty_m', 'Gbif coordinate uncertainty m',
+    'gbif_coordinate_precision', 'Gbif coordinate precision',
+    'cites_listing', 'Cites listing', 'CITES listing',
+    # Scientific name and authority is excluded for FADA (author is separate)
+    'scientific_name_and_authority', 'Scientific name and authority',
+]
+
+# Biogeographic distributions in alphabetical order
+FADA_BIOGRAPHIC_ORDER = sorted(BIOGRAPHIC_DISTRIBUTIONS)
+
+
+def get_environmental_tags_order():
+    """
+    Get the ordered list of environmental tag names from TagGroup.
+    Tags are ordered by their TagGroup's order, then by the tag name within each group.
+    """
+    from bims.models import TagGroup
+
+    ordered_tags = []
+    for tag_group in TagGroup.objects.prefetch_related('tags').order_by('order'):
+        for tag in tag_group.tags.all().order_by('name'):
+            if tag.name not in ordered_tags:
+                ordered_tags.append(tag.name)
+    return ordered_tags
+
+
+def reorder_headers_for_fada(headers):
+    """
+    Reorder headers for FADA export
+    """
+    headers = [h for h in headers if h not in FADA_EXCLUDED_COLUMNS]
+
+    # Get environmental tags order from TagGroup
+    environmental_tags_order = get_environmental_tags_order()
+
+    biogeographic_comments = ['Biogeographic References', 'Biogeographic Comments']
+    environmental_comments = ['Environmental References', 'Environmental Comments']
+    # Taxonomic Reference before Taxonomic Comments
+    taxonomic_comments = ['Taxonomic References', 'Taxonomic Comments']
+    conservation_comments = ['Conservation Comments', 'Conservation References']
+
+    base_headers = []
+    biographic_headers = []
+    environmental_headers = []
+
+    for h in headers:
+        if h in FADA_BIOGRAPHIC_ORDER:
+            biographic_headers.append(h)
+        elif h in environmental_tags_order:
+            environmental_headers.append(h)
+        elif h in FADA_ADDITIONAL_KEYS:
+            continue
+        else:
+            base_headers.append(h)
+
+    if 'fada_id' in base_headers and 'taxon_rank' in base_headers:
+        taxon_rank_idx = base_headers.index('taxon_rank')
+        base_headers.remove('fada_id')
+        base_headers.insert(taxon_rank_idx, 'fada_id')
+
+    if 'species_group' in base_headers and 'species' in base_headers:
+        species_idx = base_headers.index('species')
+        base_headers.remove('species_group')
+        base_headers.insert(species_idx, 'species_group')
+
+    if 'author' in base_headers and 'taxon' in base_headers:
+        taxon_idx = base_headers.index('taxon')
+        base_headers.remove('author')
+        base_headers.insert(taxon_idx + 1, 'author')
+
+    if 'accepted_taxon' in base_headers and 'taxonomic_status' in base_headers:
+        taxonomic_status_idx = base_headers.index('taxonomic_status')
+        base_headers.remove('accepted_taxon')
+        base_headers.insert(taxonomic_status_idx + 1, 'accepted_taxon')
+
+    biographic_headers = sorted(biographic_headers)
+
+    # Sort environmental headers according to TagGroup order
+    env_order_map = {tag: idx for idx, tag in enumerate(environmental_tags_order)}
+    environmental_headers = sorted(
+        environmental_headers,
+        key=lambda x: env_order_map.get(x, len(environmental_tags_order))
+    )
+
+    result = []
+
+    for h in base_headers:
+        result.append(h)
+        if h == 'accepted_taxon':
+            for tc in taxonomic_comments:
+                if tc in headers:
+                    result.append(tc)
+        if h == 'gbif_link':
+            result.extend(biographic_headers)
+            for bc in biogeographic_comments:
+                if bc in headers:
+                    result.append(bc)
+
+    if 'fada_id' not in base_headers:
+        result.extend(biographic_headers)
+        for bc in biogeographic_comments:
+            if bc in headers:
+                result.append(bc)
+    result.extend(environmental_headers)
+    for ec in environmental_comments:
+        if ec in headers:
+            result.append(ec)
+
+    for cc in conservation_comments:
+        if cc in headers:
+            result.append(cc)
+
+    return result
diff --git a/bims/tasks/download_taxa_list.py b/bims/tasks/download_taxa_list.py
@@ -13,11 +13,15 @@
 from reportlab.platypus import Paragraph, Spacer, SimpleDocTemplate
 from reportlab.pdfbase import pdfmetrics
 from reportlab.pdfbase.ttfonts import TTFont
+from reportlab.pdfbase.pdfmetrics import registerFontFamily
 
 from bims.scripts.species_keys import (
     ACCEPTED_TAXON, TAXON_RANK,
     COMMON_NAME, CLASS, SUBSPECIES,
-    CITES_LISTING, FADA_ID, ON_GBIF, GBIF_LINK
+    CITES_LISTING, FADA_ID, ON_GBIF, GBIF_LINK,
+    SPECIES_GROUP, SUBGENUS, SUBTRIBE, SUBFAMILY,
+    SUBORDER, SUBCLASS, SUBPHYLUM, SPECIES, GENUS,
+    TRIBE, FAMILY, ORDER, PHYLUM, KINGDOM, AUTHORS
 )
 from bims.utils.domain import get_current_domain
 
@@ -41,22 +45,14 @@ def process_download_csv_taxa_list(request, csv_file_path, filename, user_id, do
     from bims.models.taxon_group import TaxonGroup
     from bims.models import TaxonGroupCitation
     from bims.templatetags import is_fada_site
+    from bims.fada.taxa_list import (
+        FADA_ADDITIONAL_KEYS,
+        reorder_headers_for_fada,
+    )
 
     is_fada = is_fada_site()
     sanparks_project = is_sanparks_project()
 
-    # FADA-only additional_data keys (column names)
-    FADA_ADDITIONAL_KEYS = [
-        'Taxonomic Comments',
-        'Taxonomic References',
-        'Biogeographic Comments',
-        'Biogeographic References',
-        'Environmental Comments',
-        'Environmental References',
-        'Conservation Comments',
-        'Conservation References',
-    ]
-
     def _from_additional_data(instance, k):
         data = getattr(instance, 'additional_data', None)
         if not data:
@@ -120,25 +116,45 @@ def __init__(self, get_data, user=None):
     additional_attributes_titles = set()
 
     def update_headers(_headers):
+        header_map = {
+            'class_name': CLASS,
+            'taxon_rank': TAXON_RANK,
+            'common_name': COMMON_NAME,
+            'accepted_taxon': ACCEPTED_TAXON,
+            'fada_id': FADA_ID,
+            'species_group': SPECIES_GROUP,
+            'subgenus': SUBGENUS,
+            'subtribe': SUBTRIBE,
+            'subfamily': SUBFAMILY,
+            'suborder': SUBORDER,
+            'subclass': SUBCLASS,
+            'subphylum': SUBPHYLUM,
+            'subspecies': SUBSPECIES,
+            'species': SPECIES,
+            'genus': GENUS,
+            'tribe': TRIBE,
+            'family': FAMILY,
+            'order': ORDER,
+            'phylum': PHYLUM,
+            'kingdom': KINGDOM,
+            'cites_listing': CITES_LISTING,
+            'author': AUTHORS,
+            'taxonomic_status': 'Taxonomic Status',
+        }
+
         _updated_headers = []
         for header in _headers:
             if header in FADA_ADDITIONAL_KEYS:
                 _updated_headers.append(header)
                 continue
             original = header
-            if header == 'class_name':
-                header = CLASS
-            elif header == 'taxon_rank':
-                header = TAXON_RANK
-            elif header == 'common_name':
-                header = COMMON_NAME
-            elif header == 'accepted_taxon':
-                header = ACCEPTED_TAXON
-            elif header == 'fada_id':
-                header = FADA_ID
+
+            if header.lower() in header_map:
+                header = header_map[header.lower()]
                 _updated_headers.append(header)
                 continue
-            elif header.lower().strip() in ['on_gbif', 'on gbif']:
+
+            if header.lower().strip() in ['on_gbif', 'on gbif']:
                 header = ON_GBIF
                 _updated_headers.append(header)
                 continue
@@ -159,10 +175,6 @@ def update_headers(_headers):
                 and original not in additional_attributes_titles
             ):
                 header = header.replace('_', ' ').capitalize()
-            if header == 'Subspecies':
-                header = SUBSPECIES
-            if header.lower().strip() == 'cites_listing':
-                header = CITES_LISTING
 
             _updated_headers.append(header)
         return _updated_headers
@@ -189,6 +201,7 @@ def update_headers(_headers):
         for k in FADA_ADDITIONAL_KEYS:
             if k not in raw_headers:
                 raw_headers.append(k)
+        raw_headers = reorder_headers_for_fada(raw_headers)
 
     updated_headers = update_headers(raw_headers)
 
@@ -296,8 +309,6 @@ def get_checklist_paragraphs(taxon_group, taxonomies):
         pdfmetrics.registerFont(TTFont('Garamond-Italic', os.path.join(font_dir, 'EBGaramond-Italic.ttf')))
         pdfmetrics.registerFont(TTFont('Garamond-BoldItalic', os.path.join(font_dir, 'EBGaramond-BoldItalic.ttf')))
 
-        # Register font family so <i> and <b> tags work in Paragraphs
-        from reportlab.pdfbase.pdfmetrics import registerFontFamily
         registerFontFamily(
             'Garamond',
             normal='Garamond',
@@ -401,15 +412,15 @@ def get_checklist_paragraphs(taxon_group, taxonomies):
             genus_line = g_obj.canonical_name
             genus_author = ''
             if g_obj.author and g_obj.author not in genus_line:
-                genus_author += f", {g_obj.author}"
+                genus_author += f" {g_obj.author}"
 
             paragraphs.append(Paragraph(f"<i>{genus_line}</i>{genus_author}", genus_style))
             paragraphs.append(Spacer(1, 10))
 
             for s_obj in info['species']:
                 sp_line = f'<i>{s_obj.canonical_name}</i>'
                 if s_obj.author:
-                    sp_line += f", {s_obj.author}"
+                    sp_line += f" {s_obj.author}"
                 if s_obj.origin:
                     sp_line += f" : {s_obj.origin.upper()}"
                 if "type species" in (s_obj.additional_data or {}):