@@ -173,6 +173,16 @@ def size_mb(self) -> Optional[int]:
173173 "Protein" : ["GENE_OR_GENE_PRODUCT" , "PROTEIN" ],
174174 "Pathology" : ["DISEASE" , "PATHOLOGY" ],
175175 "Hematology" : ["CANCER" , "DISEASE" ],
176+ # Forward metadata for future Cardiology models; no Cardiology model is
177+ # registered today (see issue #317).
178+ "Cardiology" : [
179+ "CARDIAC_FINDING" ,
180+ "ECG_FINDING" ,
181+ "EJECTION_FRACTION" ,
182+ "CARDIAC_PROCEDURE" ,
183+ "CARDIAC_DEVICE" ,
184+ "ANATOMY" ,
185+ ],
176186 "Privacy" : _PII_ENTITY_TYPES ,
177187}
178188
@@ -617,53 +627,74 @@ def get_all_models() -> Dict[str, ModelInfo]:
617627 return OPENMED_MODELS .copy ()
618628
619629
630+ _CATEGORY_KEYWORDS : Dict [str , Tuple [str , str ]] = {
631+ "pii|deidentif|hipaa|phi|protected health|patient name|ssn|medical record|privacy|anonymiz" : (
632+ "Privacy" ,
633+ "Contains PII/de-identification terms" ,
634+ ),
635+ "cancer|tumor|oncolog|malign|chemotherapy|radiation" : (
636+ "Oncology" ,
637+ "Contains cancer/oncology terms" ,
638+ ),
639+ "drug|medication|pharma|dose|mg|pill|tablet|cisplatin" : (
640+ "Pharmaceutical" ,
641+ "Contains pharmaceutical terms" ,
642+ ),
643+ "gene|dna|protein|mutation|chromosome" : (
644+ "Genomics" ,
645+ "Contains genomic/genetic terms" ,
646+ ),
647+ "ecg|ekg|ejection fraction|arrhythmia|stent|pacemaker|murmur|st elevation|echocardiogram|cardiac|cardiolog" : (
648+ "Cardiology" ,
649+ "Contains cardiology terms" ,
650+ ),
651+ "heart|lung|brain|liver|kidney|organ" : (
652+ "Anatomy" ,
653+ "Contains anatomical terms" ,
654+ ),
655+ "bacteria|virus|organism|species" : (
656+ "Species" ,
657+ "Contains organism/species terms" ,
658+ ),
659+ "disease|condition|disorder|syndrome" : (
660+ "Disease" ,
661+ "Contains disease/condition terms" ,
662+ ),
663+ "pathology|histology|biopsy" : (
664+ "Pathology" ,
665+ "Contains pathological terms" ,
666+ ),
667+ "blood|lymph|leukemia|lymphoma" : (
668+ "Hematology" ,
669+ "Contains hematological terms" ,
670+ ),
671+ }
672+
673+
674+ def _match_categories (text : str ) -> List [Tuple [str , str ]]:
675+ """Return ``(category, reason)`` pairs whose keywords match ``text``.
676+
677+ This is the routing layer behind :func:`get_model_suggestions`. It reports
678+ a category whenever the text matches its keywords, independently of whether
679+ any model is registered for that category (e.g. ``Cardiology`` has keyword
680+ routing but no registered model yet).
681+ """
682+
683+ text_lower = text .lower ()
684+ return [
685+ (category , reason )
686+ for pattern , (category , reason ) in _CATEGORY_KEYWORDS .items ()
687+ if re .search (pattern , text_lower )
688+ ]
689+
690+
620691def get_model_suggestions (text : str ) -> List [Tuple [str , ModelInfo , str ]]:
621692 """Suggest appropriate models based on text content."""
622- text_lower = text .lower ()
623693 suggestions : List [Tuple [str , ModelInfo , str ]] = []
624- keywords = {
625- "pii|deidentif|hipaa|phi|protected health|patient name|ssn|medical record|privacy|anonymiz" : (
626- "Privacy" ,
627- "Contains PII/de-identification terms" ,
628- ),
629- "cancer|tumor|oncolog|malign|chemotherapy|radiation" : (
630- "Oncology" ,
631- "Contains cancer/oncology terms" ,
632- ),
633- "drug|medication|pharma|dose|mg|pill|tablet|cisplatin" : (
634- "Pharmaceutical" ,
635- "Contains pharmaceutical terms" ,
636- ),
637- "gene|dna|protein|mutation|chromosome" : (
638- "Genomics" ,
639- "Contains genomic/genetic terms" ,
640- ),
641- "heart|lung|brain|liver|kidney|organ" : (
642- "Anatomy" ,
643- "Contains anatomical terms" ,
644- ),
645- "bacteria|virus|organism|species" : (
646- "Species" ,
647- "Contains organism/species terms" ,
648- ),
649- "disease|condition|disorder|syndrome" : (
650- "Disease" ,
651- "Contains disease/condition terms" ,
652- ),
653- "pathology|histology|biopsy" : (
654- "Pathology" ,
655- "Contains pathological terms" ,
656- ),
657- "blood|lymph|leukemia|lymphoma" : (
658- "Hematology" ,
659- "Contains hematological terms" ,
660- ),
661- }
662694
663- for pattern , (category , reason ) in keywords .items ():
664- if re .search (pattern , text_lower ):
665- for key in CATEGORIES .get (category , [])[:3 ]:
666- suggestions .append ((key , OPENMED_MODELS [key ], reason ))
695+ for category , reason in _match_categories (text ):
696+ for key in CATEGORIES .get (category , [])[:3 ]:
697+ suggestions .append ((key , OPENMED_MODELS [key ], reason ))
667698
668699 if not suggestions :
669700 for key in SIZE_RECOMMENDATIONS .get ("balanced" , [])[:3 ]:
0 commit comments