Skip to content

Commit 96ad345

Browse files
babo989sonnyp
andauthored
segmenter: standardize EcoTaxa naming conventions (#946)
Fixes fairscope/PlanktoScope3#270 Updates naming in the segmenter to align with EcoTaxa requirements and improve metadata consistency. --------- Co-authored-by: Sonny Piers <sonny@fairscope.com>
1 parent 28fceb5 commit 96ad345

3 files changed

Lines changed: 12 additions & 8 deletions

File tree

lib/db.js

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -190,7 +190,7 @@ async function getSegmentationFromPath(path) {
190190
continue
191191
}
192192

193-
if (extension === ".tsv" && (file.startsWith("Ecotaxa_") || file.startsWith("ecotaxa_"))) {
193+
if (extension === ".tsv" && file.toLowerCase().startsWith("ecotaxa_")) {
194194
tsv_path = join(path, file)
195195
}
196196
}
@@ -200,9 +200,9 @@ async function getSegmentationFromPath(path) {
200200
if (!tsv_row) return null
201201

202202
const project_name = tsv_row.sample_project
203+
// https://github.com/fairscope/PlanktoScope/pull/946
203204
const sample_id =
204-
tsv_row.sample_id.split(tsv_row.sample_project + "_")[1] ||
205-
tsv_row.sample_id
205+
tsv_row.sample_id.split(sample_id + "_")[1] || tsv_row.sample_id
206206
const acquisition_id =
207207
tsv_row.acq_id.split(sample_id + "_")[1] || tsv_row.acq_id
208208

segmenter/planktoscope/segmenter/__init__.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -870,7 +870,7 @@ def segment_path(self, path, ecotaxa_export):
870870
self.__archive_fn = os.path.join(
871871
self.__ecotaxa_path,
872872
# TODO #102 sanitize the filename to remove potential problems with spaces and special characters
873-
f"Ecotaxa_{project}_{acquisition}.zip",
873+
f"Ecotaxa_{sample}_{acquisition}.zip",
874874
)
875875

876876
self.__working_path = path

segmenter/planktoscope/segmenter/ecotaxa.py

Lines changed: 8 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -237,6 +237,13 @@ def ecotaxa_export(archive_filepath, metadata, image_base_path, keep_files=False
237237
logger.error("No objects metadata recorded, cannot continue the export")
238238
return 0
239239

240+
# Concatenated sample+acq id for both TSV columns and the filename.
241+
sample_id = metadata.get("sample_id", "unknown_sample").replace(" ", "_")
242+
acquisition_id = metadata.get("acq_id", "unknown_acq").replace(" ", "_")
243+
combined_id = f"{sample_id}_{acquisition_id}"
244+
metadata["sample_id"] = combined_id
245+
metadata["acq_id"] = combined_id
246+
240247
# sometimes the camera resolution is not exported as string
241248
if not isinstance(metadata["acq_camera_resolution"], str):
242249
metadata["acq_camera_resolution"] = (
@@ -269,10 +276,7 @@ def ecotaxa_export(archive_filepath, metadata, image_base_path, keep_files=False
269276
list(zip(tsv_content.columns, tsv_type_header))
270277
)
271278

272-
# create the filename with project name and acquisition ID
273-
project = metadata.get("sample_project", "unknown_project").replace(" ", "_")
274-
acquisition_id = metadata.get("acq_id", "unknown_acq").replace(" ", "_")
275-
tsv_filename = f"Ecotaxa_{project}_{acquisition_id}.tsv"
279+
tsv_filename = f"Ecotaxa_{sample_id}_{acquisition_id}.tsv"
276280

277281
# add the tsv to the archive
278282
archive.writestr(

0 commit comments

Comments
 (0)