Skip to content

Commit e831e3b

Browse files
committed
Fixed new tokenizer truncation. Read files truncation.
1 parent 5a1ad7d commit e831e3b

File tree

1 file changed

+13
-35
lines changed

1 file changed

+13
-35
lines changed

src/wcgw/client/tools.py

Lines changed: 13 additions & 35 deletions
Original file line numberDiff line numberDiff line change
@@ -361,29 +361,11 @@ def get_status() -> str:
361361
T = TypeVar("T")
362362

363363

364-
def save_out_of_context(
365-
tokens: list[T],
366-
max_tokens: int,
367-
suffix: str,
368-
tokens_converted: Callable[[list[T]], str],
369-
) -> tuple[str, list[Path]]:
370-
file_contents = list[str]()
371-
for i in range(0, len(tokens), max_tokens):
372-
file_contents.append(tokens_converted(tokens[i : i + max_tokens]))
373-
374-
if len(file_contents) == 1:
375-
return file_contents[0], []
376-
377-
rest_paths = list[Path]()
378-
for i, content in enumerate(file_contents):
379-
if i == 0:
380-
continue
381-
file_path = NamedTemporaryFile(delete=False, suffix=suffix).name
382-
with open(file_path, "w") as f:
383-
f.write(content)
384-
rest_paths.append(Path(file_path))
385-
386-
return file_contents[0], rest_paths
364+
def save_out_of_context(content: str, suffix: str) -> str:
365+
file_path = NamedTemporaryFile(delete=False, suffix=suffix).name
366+
with open(file_path, "w") as f:
367+
f.write(content)
368+
return file_path
387369

388370

389371
def rstrip(lines: list[str]) -> str:
@@ -565,7 +547,7 @@ def execute_bash(
565547

566548
if max_tokens and len(tokens) >= max_tokens:
567549
incremental_text = "(...truncated)\n" + enc.decode(
568-
tokens[-(max_tokens - 1) :]
550+
tokens.ids[-(max_tokens - 1) :]
569551
)
570552

571553
if is_interrupt:
@@ -591,7 +573,7 @@ def execute_bash(
591573

592574
tokens = enc.encode(output)
593575
if max_tokens and len(tokens) >= max_tokens:
594-
output = "(...truncated)\n" + enc.decode(tokens[-(max_tokens - 1) :])
576+
output = "(...truncated)\n" + enc.decode(tokens.ids[-(max_tokens - 1) :])
595577

596578
try:
597579
exit_status = get_status()
@@ -657,7 +639,7 @@ def truncate_if_over(content: str, max_tokens: Optional[int]) -> str:
657639
n_tokens = len(tokens)
658640
if n_tokens > max_tokens:
659641
content = (
660-
default_enc.decode(tokens[: max(0, max_tokens - 100)])
642+
default_enc.decode(tokens.ids[: max(0, max_tokens - 100)])
661643
+ "\n(...truncated)"
662644
)
663645

@@ -1403,14 +1385,10 @@ def read_file(file_path: str, max_tokens: Optional[int]) -> tuple[str, bool, int
14031385
tokens = default_enc.encode(content)
14041386
tokens_counts = len(tokens)
14051387
if len(tokens) > max_tokens:
1406-
content, rest = save_out_of_context(
1407-
tokens,
1408-
max(0, max_tokens - 100),
1409-
Path(file_path).suffix,
1410-
default_enc.decode,
1388+
content = default_enc.decode(tokens.ids[:max_tokens])
1389+
rest = save_out_of_context(
1390+
default_enc.decode(tokens.ids[max_tokens:]), Path(file_path).suffix
14111391
)
1412-
if rest:
1413-
rest_ = "\n".join(map(str, rest))
1414-
content += f"\n(...truncated)\n---\nI've split the rest of the file into multiple files. Here are the remaining splits, please read them:\n{rest_}"
1415-
truncated = True
1392+
content += f"\n(...truncated)\n---\nI've saved the continuation in a new file. Please read: `{rest}`"
1393+
truncated = True
14161394
return content, truncated, tokens_counts

0 commit comments

Comments
 (0)