Skip to content
This repository was archived by the owner on Jul 28, 2025. It is now read-only.

Commit 42f334c

Browse files
committed
CU-8697x7y9x: Fix issue with transformers 4.47+ affecting DeID
1 parent e0ae274 commit 42f334c

File tree

1 file changed

+15
-1
lines changed

1 file changed

+15
-1
lines changed

medcat/ner/transformers_ner.py

Lines changed: 15 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -89,8 +89,22 @@ def create_eval_pipeline(self):
8989
self.ner_pipe.tokenizer._in_target_context_manager = False
9090
if not hasattr(self.ner_pipe.tokenizer, 'split_special_tokens'):
9191
# NOTE: this will fix the DeID model(s) created with transformers before 4.42
92-
# and allow them to run with later transforemrs
92+
# and allow them to run with later transformers
9393
self.ner_pipe.tokenizer.split_special_tokens = False
94+
if not hasattr(self.ner_pipe.tokenizer, 'pad_token') and hasattr(self.ner_pipe.tokenizer, '_pad_token'):
95+
# NOTE: This will fix the DeID model(s) created with transformers before 4.47
96+
# and allow them to run with later transformmers versions
97+
# In 4.47 the special tokens started to be used differently, yet our saved model
98+
# is not aware of that. So we need to explicitly fix that.
99+
special_tokens_map = self.ner_pipe.tokenizer.__dict__.get('_special_tokens_map', {})
100+
for name in self.ner_pipe.tokenizer.SPECIAL_TOKENS_ATTRIBUTES:
101+
# previously saved in (e.g) _pad_token
102+
prev_val = getattr(self.ner_pipe.tokenizer, f"_{name}")
103+
# now saved in the special tokens map by its name
104+
special_tokens_map[name] = prev_val
105+
# the map is saved in __dict__ explicitly, and it is later used in __getattr__ of the base class.
106+
self.ner_pipe.tokenizer.__dict__['_special_tokens_map'] = special_tokens_map
107+
94108
self.ner_pipe.device = self.model.device
95109
self._consecutive_identical_failures = 0
96110
self._last_exception: Optional[Tuple[str, Type[Exception]]] = None

0 commit comments

Comments
 (0)