Skip to content

Commit ef9743f

Browse files
authored
Merge pull request #36 from allenai/josh/claude/buoy-voltage-name-pattern
Add bare trailing-voltage pattern for buoy/gear name detection
2 parents 2156729 + 081f765 commit ef9743f

2 files changed

Lines changed: 41 additions & 0 deletions

File tree

ais/src/atlantes/machine_annotation/data_annotate_utils.py

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -101,6 +101,11 @@ def get_ais_vessel_category(activity_descs: Optional[list[str]]) -> list[int]:
101101
# reporting convention in gear names (~18% of GEAR records in VHS). The decimal
102102
# voltage format (e.g. [7.7V]) adds ~1,355 incremental catches at 45% GEAR
103103
# and only 1% FISHING.
104+
# - Bare trailing voltage: "\d\s+\d+V\b" (e.g. "368000010 9V"). Whole-number
105+
# volts ("9V", "12V") reported as a separate token after an ID/digit block,
106+
# with no trailing decimal digit (so not caught by "\d+V\d+"). The leading
107+
# "\d\s+" anchor requires preceding numeric content, which keeps the standalone
108+
# "6V"/"7V" garbled-AIS case excluded (see NOT-included note below).
104109
# - Double-dash separators: "\d+--\d+" (04001--2, 17002--41).
105110
# Gear IDs frequently use double-dashes between numeric fields. Validated
106111
# against VHS: 1,135 names not already caught by other patterns, of which
@@ -128,6 +133,7 @@ def get_ais_vessel_category(activity_descs: Optional[list[str]]) -> list[int]:
128133
r"\d+%",
129134
r"\d+V\d+",
130135
r"\d+\.\d+V",
136+
r"\d\s+\d+V\b",
131137
r"\d+--\d+",
132138
r"Net fish",
133139
r"NetFish",

ais/tests/unit/inference/atlas_entity/test_unit_entity_postprocessor.py

Lines changed: 35 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -716,6 +716,41 @@ def test_postprocess_numeric_gear_id_classified_as_buoy(
716716
assert output.entity_class == "buoy"
717717
assert output.entity_classification_details.postprocess_rule_applied is True
718718

719+
@pytest.mark.parametrize(
720+
"entity_name",
721+
[
722+
"368000010 9V",
723+
"368000002 9V",
724+
"BUOY 992-536 12V",
725+
],
726+
)
727+
def test_postprocess_bare_trailing_voltage_classified_as_buoy(
728+
self,
729+
entity_name: str,
730+
entity_postprocessor_class: AtlasEntityPostProcessor,
731+
) -> None:
732+
"""Test that names with a bare trailing voltage token (e.g. "9V", no decimal
733+
digit) following a digit block are classified as buoys, even for USA MMSIs."""
734+
input_data = EntityPostprocessorInput(
735+
predicted_class=AtlasEntityLabelsTrainingWithUnknown.VESSEL,
736+
entity_classification_details=EntityPostprocessorInputDetails(
737+
model="test", confidence=0.9, outputs=[0.9, 0.1]
738+
),
739+
metadata=EntityMetadata(
740+
binned_ship_type=0,
741+
ais_type=9999,
742+
mmsi="368000010",
743+
entity_name=entity_name,
744+
track_length=800,
745+
file_location=None,
746+
trackId="A:368000010",
747+
flag_code="USA",
748+
),
749+
)
750+
output = entity_postprocessor_class.postprocess(input_data)
751+
assert output.entity_class == "buoy"
752+
assert output.entity_classification_details.postprocess_rule_applied is True
753+
719754
def test_postprocess_raises_error_for_known_binned_ship_type_and_buoy_name(
720755
self, entity_postprocessor_class: AtlasEntityPostProcessor
721756
) -> None:

0 commit comments

Comments
 (0)