Skip to content

Commit 1ee7c23

Browse files
authored
Add gbif dataset filter (#5030)
* Add gbif dataset filter * Cache datasetkeys
1 parent ddd8539 commit 1ee7c23

File tree

11 files changed

+446
-7
lines changed

11 files changed

+446
-7
lines changed

bims/api_urls.py

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -40,6 +40,7 @@
4040
TaxonTagAutocompleteAPIView, AddTagAPIView, TaxonProposalDetail, IUCNStatusFetchView, TaxonTreeJsonView,
4141
HarvestIUCNStatus, ApproveTaxonGroupProposalsView, ClearTaxaNotAssociatedInTaxonGroup
4242
)
43+
from bims.api_views.dataset import DatasetAutocompleteAPIView
4344
from bims.api_views.cluster import ClusterList
4445
from bims.api_views.collection import (
4546
CollectionDownloader
@@ -380,6 +381,9 @@
380381
re_path(r'^taxon-tag-autocomplete/$',
381382
TaxonTagAutocompleteAPIView.as_view(),
382383
name='taxon-tag-autocomplete'),
384+
re_path(r'^dataset-autocomplete/$',
385+
DatasetAutocompleteAPIView.as_view(),
386+
name='dataset-autocomplete'),
383387
path('taxonomy/<int:pk>/add-tag/',
384388
AddTagAPIView.as_view(),
385389
name='add-tag-taxon'),

bims/api_views/dataset.py

Lines changed: 64 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,64 @@
1+
from rest_framework.views import APIView
2+
from rest_framework.response import Response
3+
from django.core.cache import cache
4+
from bims.models.dataset import Dataset
5+
from bims.models.biological_collection_record import BiologicalCollectionRecord
6+
from bims.serializers.dataset_serializer import DatasetSerializer
7+
8+
9+
# Cache key for GBIF dataset keys
10+
GBIF_DATASET_KEYS_CACHE_KEY = 'gbif_dataset_keys'
11+
GBIF_DATASET_KEYS_CACHE_TIMEOUT = 86400 # 24 hours
12+
13+
14+
def clear_dataset_cache():
15+
"""
16+
Clear the GBIF dataset keys cache.
17+
Call this function after importing new GBIF data.
18+
"""
19+
cache.delete(GBIF_DATASET_KEYS_CACHE_KEY)
20+
21+
22+
class DatasetAutocompleteAPIView(APIView):
23+
"""
24+
Autocomplete API for GBIF datasets.
25+
26+
Query params:
27+
- q: search term for dataset name
28+
- ids: comma-separated database IDs for bootstrapping
29+
"""
30+
31+
def get(self, request, format=None):
32+
query = request.query_params.get('q', '')
33+
ids_param = request.query_params.get('ids', '')
34+
35+
dataset_keys = cache.get(GBIF_DATASET_KEYS_CACHE_KEY)
36+
37+
if dataset_keys is None:
38+
dataset_keys = BiologicalCollectionRecord.objects.filter(
39+
source_collection='gbif'
40+
).exclude(
41+
dataset_key__isnull=True
42+
).values_list('dataset_key', flat=True).distinct()
43+
44+
dataset_keys = list(filter(None, set(dataset_keys)))
45+
# Cache for 24 hours
46+
cache.set(GBIF_DATASET_KEYS_CACHE_KEY, dataset_keys, GBIF_DATASET_KEYS_CACHE_TIMEOUT)
47+
48+
# Base queryset: only datasets with GBIF records
49+
base_qs = Dataset.objects.filter(
50+
uuid__in=dataset_keys
51+
)
52+
53+
if ids_param:
54+
# Bootstrap mode: fetch by database IDs
55+
ids_list = [int(id.strip()) for id in ids_param.split(',') if id.strip()]
56+
datasets = base_qs.filter(id__in=ids_list)
57+
else:
58+
# Search mode: filter by name
59+
if len(query) < 2:
60+
return Response([])
61+
datasets = base_qs.filter(name__icontains=query).distinct()[:10]
62+
63+
serializer = DatasetSerializer(datasets, many=True)
64+
return Response(serializer.data)

bims/api_views/search.py

Lines changed: 31 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -366,6 +366,30 @@ def taxon_tags(self):
366366

367367
return [t.strip() for t in raw_tags.split(',') if t.strip()]
368368

369+
@property
370+
def dataset_keys(self):
371+
"""
372+
Returns a list of dataset UUIDs to filter GBIF records by.
373+
Converts dataset IDs from the URL to UUIDs.
374+
"""
375+
from bims.models.dataset import Dataset
376+
377+
dataset_ids = self.parse_request_json('datasetKeys')
378+
if not dataset_ids:
379+
return []
380+
381+
# Convert dataset IDs to UUIDs
382+
try:
383+
dataset_ids = [int(id) for id in dataset_ids]
384+
dataset_uuids = list(
385+
Dataset.objects.filter(
386+
id__in=dataset_ids
387+
).values_list('uuid', flat=True)
388+
)
389+
return dataset_uuids
390+
except (ValueError, TypeError):
391+
return []
392+
369393
@property
370394
def polygon(self):
371395
try:
@@ -669,6 +693,13 @@ def process_search(self):
669693
source_collection_filters.append(source_collection)
670694
if source_collection_filters:
671695
filters['source_collection__in'] = source_collection_filters
696+
if self.dataset_keys:
697+
filters['dataset_key__in'] = self.dataset_keys
698+
# Ensure GBIF is included when datasets are specified
699+
if 'source_collection__in' not in filters:
700+
filters['source_collection__in'] = ['gbif']
701+
elif 'gbif' not in filters['source_collection__in']:
702+
filters['source_collection__in'].append('gbif')
672703
if self.endemic:
673704
endemism_list = []
674705
for endemic in self.endemic:

bims/scripts/import_gbif_occurrences.py

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -939,5 +939,9 @@ def fetch_and_process_gbif_data(country_codes, geom_str=''):
939939
except Exception as e:
940940
log_to_file_or_logger(log_file_path, message=str(e))
941941
return str(e)
942+
finally:
943+
# Clear the dataset keys cache after GBIF import
944+
from bims.api_views.dataset import clear_dataset_cache
945+
clear_dataset_cache()
942946

943947
return message
Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,8 @@
1+
from rest_framework import serializers
2+
from bims.models.dataset import Dataset
3+
4+
5+
class DatasetSerializer(serializers.ModelSerializer):
6+
class Meta:
7+
model = Dataset
8+
fields = ['id', 'name', 'abbreviation']

bims/static/js/router.js

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -17,7 +17,7 @@ define(['backbone', 'views/olmap', 'utils/events_connector', 'shared'], function
1717
'taxon', 'months', 'siteId', 'search', 'collector',
1818
'category', 'yearFrom', 'yearTo', 'userBoundary', 'referenceCategory',
1919
'boundary', 'reference', 'endemic', 'invasions', 'conservationStatus', 'spatialFilter',
20-
'taxon', 'validated', 'modules', 'sourceCollection', 'ecologicalCategory',
20+
'taxon', 'validated', 'modules', 'sourceCollection', 'datasetKeys', 'ecologicalCategory',
2121
'module', 'tags', 'rank', 'orderBy', 'siteIdOpen', 'polygon',
2222
'dst', 'ecosystemType'
2323
];

bims/static/js/shared.js

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -12,7 +12,7 @@ define(['backbone', 'underscore', 'utils/storage', 'utils/color', 'utils/url', '
1212
"&reference=<%= reference %>&endemic=<%= endemic %>&invasions=<%= invasions %>&conservationStatus=<%= conservationStatus %>" +
1313
"&modules=<%= modules %>&validated=<%= validated %>&sourceCollection=<%= sourceCollection %>" +
1414
"&module=<%= module %>&ecologicalCategory=<%= ecologicalCategory %>&rank=<%= rank %>"+
15-
"&tags=<%= tags %>" +
15+
"&tags=<%= tags %>&datasetKeys=<%= datasetKeys %>" +
1616
"&siteIdOpen=<%= siteIdOpen %>&orderBy=<%= orderBy %>&polygon=<%= polygon %>&dst=<%= dst %>&ecosystemType=<%= ecosystemType %>",
1717
LocationSiteDetailXHRRequest: null,
1818
WetlandDashboardXHRRequest: null,

0 commit comments

Comments
 (0)