explosion · jberg5 · Mar 31, 2026 · Mar 31, 2026 · jberg5 · Mar 31, 2026
diff --git a/spacy/tests/tokenizer/test_tokenizer.py b/spacy/tests/tokenizer/test_tokenizer.py
@@ -5,6 +5,7 @@
 
 from spacy.lang.de import German
 from spacy.lang.en import English
+from spacy.strings import hash_string
 from spacy.symbols import ORTH
 from spacy.tokenizer import Tokenizer
 from spacy.tokens import Doc
@@ -555,3 +556,14 @@ def test_tokenizer_initial_special_case_explain(en_vocab):
     tokens = [t.text for t in tokenizer("id")]
     explain_tokens = [t[1] for t in tokenizer.explain("id")]
     assert tokens == explain_tokens
+
+
+@pytest.mark.issue(13950)
+def test_issue13950(en_tokenizer):
+    # Special contraction occurs before regular words
+    en_tokenizer("I can't believe you have done this")
+
+    # "believe" and "this" appear after the special case "can't".
+    # They should still be cached.
+    assert hash_string("believe") in en_tokenizer._cache
+    assert hash_string("this") in en_tokenizer._cache
diff --git a/spacy/tokenizer.pxd b/spacy/tokenizer.pxd
@@ -12,7 +12,7 @@ from .vocab cimport LexemesOrTokens, Vocab, _Cached
 
 cdef class Tokenizer:
     cdef Pool mem
-    cdef PreshMap _cache
+    cdef readonly PreshMap _cache  # readonly so tests can check state
     cdef PreshMap _specials
     cdef readonly Vocab vocab
 

diff --git a/spacy/tokenizer.pyx b/spacy/tokenizer.pyx
@@ -192,6 +192,7 @@ cdef class Tokenizer:
                     # we don't have to create the slice when we hit the cache.
                     span = string[start:i]
                     key = hash_string(span)
+                    has_special = 0
                     if not self._try_specials_and_cache(key, doc, &has_special, with_special_cases):
                         self._tokenize(doc, span, key, &has_special, with_special_cases)
                 if uc == ' ':
@@ -204,6 +205,7 @@ cdef class Tokenizer:
         if start < i:
             span = string[start:]
             key = hash_string(span)
+            has_special = 0
             if not self._try_specials_and_cache(key, doc, &has_special, with_special_cases):
                 self._tokenize(doc, span, key, &has_special, with_special_cases)
             doc.c[doc.length - 1].spacy = string[-1] == " " and not in_ws