Update llama_cache.py

Smartappli · web-flow · commit a35e3f2f4715 · 2024-08-15T08:04:38.000+02:00
diff --git a/llama_cpp/llama_cache.py b/llama_cpp/llama_cache.py
@@ -1,9 +1,9 @@
 import sys
 from abc import ABC, abstractmethod
 from collections import OrderedDict
-from collections.abc import Sequence
 from typing import (
     Optional,
+    Sequence,
     Tuple,
 )
 
@@ -27,8 +27,8 @@ def cache_size(self) -> int:
 
     def _find_longest_prefix_key(
         self,
-        key: tuple[int, ...],
-    ) -> tuple[int, ...] | None:
+        key: Tuple[int, ...],
+    ) -> Optional[Tuple[int, ...]]:
         pass
 
     @abstractmethod
@@ -52,7 +52,7 @@ class LlamaRAMCache(BaseLlamaCache):
     def __init__(self, capacity_bytes: int = (2 << 30)):
         super().__init__(capacity_bytes)
         self.capacity_bytes = capacity_bytes
-        self.cache_state: OrderedDict[tuple[int, ...], llama_cpp.llama.LlamaState] = (
+        self.cache_state: OrderedDict[Tuple[int, ...], llama_cpp.llama.LlamaState] = (
             OrderedDict()
         )
 
@@ -62,8 +62,8 @@ def cache_size(self):
 
     def _find_longest_prefix_key(
         self,
-        key: tuple[int, ...],
-    ) -> tuple[int, ...] | None:
+        key: Tuple[int, ...],
+    ) -> Optional[Tuple[int, ...]]:
         min_len = 0
         min_key = None
         keys = (
@@ -116,10 +116,10 @@ def cache_size(self):
 
     def _find_longest_prefix_key(
         self,
-        key: tuple[int, ...],
-    ) -> tuple[int, ...] | None:
+        key: Tuple[int, ...],
+    ) -> Optional[Tuple[int, ...]]:
         min_len = 0
-        min_key: tuple[int, ...] | None = None
+        min_key: Optional[Tuple[int, ...]] = None
         for k in self.cache.iterkeys():  # type: ignore
             prefix_len = llama_cpp.llama.Llama.longest_token_prefix(k, key)
             if prefix_len > min_len: