1
1
import sys
2
2
from abc import ABC , abstractmethod
3
3
from collections import OrderedDict
4
- from collections .abc import Sequence
5
4
from typing import (
6
5
Optional ,
6
+ Sequence ,
7
7
Tuple ,
8
8
)
9
9
@@ -27,8 +27,8 @@ def cache_size(self) -> int:
27
27
28
28
def _find_longest_prefix_key (
29
29
self ,
30
- key : tuple [int , ...],
31
- ) -> tuple [ int , ...] | None :
30
+ key : Tuple [int , ...],
31
+ ) -> Optional [ Tuple [ int , ...]] :
32
32
pass
33
33
34
34
@abstractmethod
@@ -52,7 +52,7 @@ class LlamaRAMCache(BaseLlamaCache):
52
52
def __init__ (self , capacity_bytes : int = (2 << 30 )):
53
53
super ().__init__ (capacity_bytes )
54
54
self .capacity_bytes = capacity_bytes
55
- self .cache_state : OrderedDict [tuple [int , ...], llama_cpp .llama .LlamaState ] = (
55
+ self .cache_state : OrderedDict [Tuple [int , ...], llama_cpp .llama .LlamaState ] = (
56
56
OrderedDict ()
57
57
)
58
58
@@ -62,8 +62,8 @@ def cache_size(self):
62
62
63
63
def _find_longest_prefix_key (
64
64
self ,
65
- key : tuple [int , ...],
66
- ) -> tuple [ int , ...] | None :
65
+ key : Tuple [int , ...],
66
+ ) -> Optional [ Tuple [ int , ...]] :
67
67
min_len = 0
68
68
min_key = None
69
69
keys = (
@@ -116,10 +116,10 @@ def cache_size(self):
116
116
117
117
def _find_longest_prefix_key (
118
118
self ,
119
- key : tuple [int , ...],
120
- ) -> tuple [ int , ...] | None :
119
+ key : Tuple [int , ...],
120
+ ) -> Optional [ Tuple [ int , ...]] :
121
121
min_len = 0
122
- min_key : tuple [ int , ...] | None = None
122
+ min_key : Optional [ Tuple [ int , ...]] = None
123
123
for k in self .cache .iterkeys (): # type: ignore
124
124
prefix_len = llama_cpp .llama .Llama .longest_token_prefix (k , key )
125
125
if prefix_len > min_len :
0 commit comments