Skip to content

Commit d74ee7d

Browse files
committed
Fix CI: type errors, Windows test paths, drop upstream-only tests
Type Check (ty) fixes in new/overwritten TACO files: - tmux_session.py: widen logging_path to PurePath and allow None asciinema paths (EnvironmentPaths.agent_dir is PurePosixPath; TACO can run without asciinema recording); fix get_incremental_output's last_command default (str | None = None); guard AsciinemaHandler call with a not-None check on the local recording path. - terminus_2.py: annotate _run_subagent's message_history as list[dict[str, Any] | Message] to match LiteLLM.call; drop an obsolete # type: ignore comment. - output_filter.py: use walrus-assigned re.search so match.group is only called after a non-None check. - module/client.py: suppress openai SDK overload noise with a native # ty: ignore[no-matching-overload] on the 3 create() calls. Test fixes / cleanups: - Fix Path(__file__).resolve().parents[N] in the 4 new compression tests (tests live at tests/unit/agents/terminus_2/, repo root is parents[4], not parents[3]). - Update the BibTeX citation in both READMEs to the official author list / @misc + eprint form. - Drop upstream-only tests that exercise agent features TACO does not ship (skills section, tmux send-keys chunking, _init_llm patching): the 3 TestTerminus2 classes in test_agent_skills.py, test_tmux_session.py, test_terminus_2_no_retry_on_cancelled.py. Made-with: Cursor
1 parent cf60668 commit d74ee7d

13 files changed

Lines changed: 38 additions & 469 deletions

README.md

Lines changed: 8 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -88,11 +88,13 @@ All flags are passed on the CLI as `--ak <name>=<value>`.
8888
## Citation
8989

9090
```bibtex
91-
@article{taco2026,
92-
title = {TACO: A Self-Evolving Framework for Efficient Terminal Agents
93-
via Observational Context Compression},
94-
author = {TACO Team},
95-
journal = {arXiv preprint arXiv:2604.19572},
96-
year = {2026}
91+
@misc{ren2026selfevolvingframeworkefficientterminal,
92+
title={A Self-Evolving Framework for Efficient Terminal Agents via Observational Context Compression},
93+
author={Jincheng Ren and Siwei Wu and Yizhi Li and Kang Zhu and Shu Xu and Boyu Feng and Ruibin Yuan and Wei Zhang and Riza Batista-Navarro and Jian Yang and Chenghua Lin},
94+
year={2026},
95+
eprint={2604.19572},
96+
archivePrefix={arXiv},
97+
primaryClass={cs.CL},
98+
url={https://arxiv.org/abs/2604.19572},
9799
}
98100
```

src/harbor/agents/terminus_2/README.md

Lines changed: 8 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -88,11 +88,13 @@ All flags are passed on the CLI as `--ak <name>=<value>`.
8888
## Citation
8989

9090
```bibtex
91-
@article{taco2026,
92-
title = {TACO: A Self-Evolving Framework for Efficient Terminal Agents
93-
via Observational Context Compression},
94-
author = {TACO Team},
95-
journal = {arXiv preprint arXiv:2604.19572},
96-
year = {2026}
91+
@misc{ren2026selfevolvingframeworkefficientterminal,
92+
title={A Self-Evolving Framework for Efficient Terminal Agents via Observational Context Compression},
93+
author={Jincheng Ren and Siwei Wu and Yizhi Li and Kang Zhu and Shu Xu and Boyu Feng and Ruibin Yuan and Wei Zhang and Riza Batista-Navarro and Jian Yang and Chenghua Lin},
94+
year={2026},
95+
eprint={2604.19572},
96+
archivePrefix={arXiv},
97+
primaryClass={cs.CL},
98+
url={https://arxiv.org/abs/2604.19572},
9799
}
98100
```

src/harbor/agents/terminus_2/module/client.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -294,7 +294,7 @@ def chat(
294294
if key not in ['messages', 'reasoning_effort']:
295295
completions_params[key] = value
296296

297-
response = self.client.completions.create(**completions_params)
297+
response = self.client.completions.create(**completions_params) # ty: ignore[no-matching-overload]
298298

299299
# Extract response content
300300
if response.choices and len(response.choices) > 0:
@@ -305,7 +305,7 @@ def chat(
305305
raise LLMClientError("Invalid response format: no choices in response")
306306
else:
307307
# /v1/chat/completions endpoint (server applies chat template).
308-
response = self.client.chat.completions.create(**request_params)
308+
response = self.client.chat.completions.create(**request_params) # ty: ignore[no-matching-overload]
309309

310310
# Extract response content
311311
if response.choices and len(response.choices) > 0:
@@ -368,7 +368,7 @@ def chat_with_usage(
368368
request_params.update(kwargs)
369369

370370
try:
371-
response = self.client.chat.completions.create(**request_params)
371+
response = self.client.chat.completions.create(**request_params) # ty: ignore[no-matching-overload]
372372

373373
# Extract content
374374
content = ""

src/harbor/agents/terminus_2/output_filter.py

Lines changed: 2 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -362,11 +362,9 @@ def _extract_progress(
362362
extracted.append("Reading package lists")
363363
elif "Building dependency tree" in tail:
364364
extracted.append("Building dependency tree")
365-
elif re.search(r"Setting up\s+(\S+)", tail):
366-
match = re.search(r"Setting up\s+(\S+)", tail)
365+
elif (match := re.search(r"Setting up\s+(\S+)", tail)) is not None:
367366
extracted.append(f"Setting up {match.group(1)}")
368-
elif re.search(r"Unpacking\s+(\S+)", tail):
369-
match = re.search(r"Unpacking\s+(\S+)", tail)
367+
elif (match := re.search(r"Unpacking\s+(\S+)", tail)) is not None:
370368
extracted.append(f"Unpacking {match.group(1)}")
371369

372370
if extracted:

src/harbor/agents/terminus_2/terminus_2.py

Lines changed: 5 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,9 @@
66
from dataclasses import dataclass
77
from datetime import datetime, timezone
88
from pathlib import Path
9-
from typing import Literal
9+
from typing import Any, Literal
10+
11+
from litellm import Message
1012

1113
from tenacity import retry, stop_after_attempt
1214

@@ -896,7 +898,7 @@ def _track_api_request_time(self, start_time: float) -> None:
896898
async def _run_subagent(
897899
self,
898900
prompt: str,
899-
message_history: list[dict],
901+
message_history: list[dict[str, Any] | Message],
900902
steps: list[Step],
901903
session_id: str,
902904
agent_name: str,
@@ -1330,7 +1332,7 @@ async def _query_llm(
13301332

13311333
if hasattr(self._parser, "salvage_truncated_response"):
13321334
salvaged_response, has_multiple_blocks = (
1333-
self._parser.salvage_truncated_response(truncated_response) # type: ignore
1335+
self._parser.salvage_truncated_response(truncated_response)
13341336
)
13351337

13361338
if salvaged_response:

src/harbor/agents/terminus_2/tmux_session.py

Lines changed: 8 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@
22
import re
33
import shlex
44
import time
5-
from pathlib import Path
5+
from pathlib import Path, PurePath
66

77
from harbor.agents.terminus_2.asciinema_handler import AsciinemaHandler
88
from harbor.environments.base import BaseEnvironment
@@ -25,9 +25,9 @@ def __init__(
2525
self,
2626
session_name: str,
2727
environment: BaseEnvironment,
28-
logging_path: Path,
29-
local_asciinema_recording_path: Path,
30-
remote_asciinema_recording_path: Path,
28+
logging_path: PurePath,
29+
local_asciinema_recording_path: Path | None,
30+
remote_asciinema_recording_path: PurePath | None,
3131
pane_width: int = 160,
3232
pane_height: int = 40,
3333
):
@@ -392,8 +392,9 @@ async def stop(self) -> None:
392392

393393
await asyncio.sleep(0.5)
394394

395-
# Merge markers into the recording
396-
if self._markers:
395+
# Merge markers into the recording. remote and local recording paths
396+
# are always set or unset as a pair, so the local path is non-None here.
397+
if self._markers and self._local_asciinema_recording_path is not None:
397398
self._logger.debug(
398399
f"Merging {len(self._markers)} markers into recording"
399400
)
@@ -571,7 +572,7 @@ async def _find_new_content(self, current_buffer: str) -> str | None:
571572
return current_buffer[idx:]
572573
return None
573574

574-
async def get_incremental_output(self, last_command: str = None) -> str:
575+
async def get_incremental_output(self, last_command: str | None = None) -> str:
575576
"""
576577
Get either new terminal output since last call, or current screen if
577578
unable to determine.

tests/integration/test_terminus_2_no_retry_on_cancelled.py

Lines changed: 0 additions & 107 deletions
This file was deleted.

tests/unit/agents/installed/test_agent_skills.py

Lines changed: 0 additions & 132 deletions
Original file line numberDiff line numberDiff line change
@@ -303,135 +303,3 @@ async def _exec(command, timeout_sec=None):
303303

304304
env.exec = AsyncMock(side_effect=_exec)
305305
return env
306-
307-
308-
class TestTerminus2Skills:
309-
"""Test _build_skills_section() for Terminus2."""
310-
311-
def _make_agent(self, temp_dir, skills_dir=None):
312-
from harbor.agents.terminus_2.terminus_2 import Terminus2
313-
314-
return Terminus2(
315-
logs_dir=temp_dir,
316-
model_name="anthropic/claude-sonnet-4-5",
317-
skills_dir=skills_dir,
318-
)
319-
320-
async def test_no_skills_dir_returns_none(self, temp_dir):
321-
agent = self._make_agent(temp_dir)
322-
env = _make_mock_environment()
323-
assert await agent._build_skills_section(env) is None
324-
325-
async def test_nonexistent_skills_dir_returns_none(self, temp_dir):
326-
agent = self._make_agent(temp_dir, skills_dir="/nonexistent/path")
327-
env = _make_mock_environment(is_dir=False)
328-
assert await agent._build_skills_section(env) is None
329-
330-
async def test_empty_skills_dir_returns_none(self, temp_dir):
331-
agent = self._make_agent(temp_dir, skills_dir="/skills")
332-
env = _make_mock_environment(skills=None)
333-
assert await agent._build_skills_section(env) is None
334-
335-
async def test_skills_dir_with_valid_skill(self, temp_dir):
336-
content = SKILL_FRONTMATTER.format(
337-
name="greet", description="Say hello to the user.", body="Do it."
338-
)
339-
agent = self._make_agent(temp_dir, skills_dir="/skills")
340-
env = _make_mock_environment(skills={"greet": content})
341-
result = await agent._build_skills_section(env)
342-
assert result is not None
343-
assert "<available_skills>" in result
344-
assert "<name>greet</name>" in result
345-
assert "<description>Say hello to the user.</description>" in result
346-
assert "<location>/skills/greet/SKILL.md</location>" in result
347-
348-
async def test_multiple_skills_sorted(self, temp_dir):
349-
skills = {
350-
"zeta": SKILL_FRONTMATTER.format(
351-
name="zeta", description="Zeta skill.", body=""
352-
),
353-
"alpha": SKILL_FRONTMATTER.format(
354-
name="alpha", description="Alpha skill.", body=""
355-
),
356-
"mid": SKILL_FRONTMATTER.format(
357-
name="mid", description="Mid skill.", body=""
358-
),
359-
}
360-
agent = self._make_agent(temp_dir, skills_dir="/skills")
361-
env = _make_mock_environment(skills=skills)
362-
result = await agent._build_skills_section(env)
363-
assert result is not None
364-
alpha_pos = result.index("<name>alpha</name>")
365-
mid_pos = result.index("<name>mid</name>")
366-
zeta_pos = result.index("<name>zeta</name>")
367-
assert alpha_pos < mid_pos < zeta_pos
368-
369-
async def test_skips_invalid_frontmatter(self, temp_dir):
370-
"""SKILL.md without valid YAML frontmatter is ignored."""
371-
agent = self._make_agent(temp_dir, skills_dir="/skills")
372-
env = _make_mock_environment(skills={"bad-skill": "No frontmatter here."})
373-
assert await agent._build_skills_section(env) is None
374-
375-
376-
class TestTerminus2ParseSkillFrontmatter:
377-
"""Test _parse_skill_frontmatter() directly."""
378-
379-
def test_valid_frontmatter(self):
380-
from harbor.agents.terminus_2.terminus_2 import Terminus2
381-
382-
content = "---\nname: my-skill\ndescription: Does things.\n---\nBody.\n"
383-
result = Terminus2._parse_skill_frontmatter(content)
384-
assert result == {"name": "my-skill", "description": "Does things."}
385-
386-
def test_missing_name(self):
387-
from harbor.agents.terminus_2.terminus_2 import Terminus2
388-
389-
content = "---\ndescription: No name field.\n---\nBody.\n"
390-
assert Terminus2._parse_skill_frontmatter(content) is None
391-
392-
def test_no_frontmatter_delimiter(self):
393-
from harbor.agents.terminus_2.terminus_2 import Terminus2
394-
395-
assert (
396-
Terminus2._parse_skill_frontmatter("Just markdown, no frontmatter.") is None
397-
)
398-
399-
def test_frontmatter_with_dashes_in_yaml_value(self):
400-
"""Ensure --- inside a YAML value does not break frontmatter parsing."""
401-
from harbor.agents.terminus_2.terminus_2 import Terminus2
402-
403-
content = '---\nname: my-skill\ndescription: "Use --- to separate sections"\n---\nBody.\n'
404-
result = Terminus2._parse_skill_frontmatter(content)
405-
assert result is not None
406-
assert result["name"] == "my-skill"
407-
assert result["description"] == "Use --- to separate sections"
408-
409-
410-
class TestTerminus2SkillsXmlEscaping:
411-
"""Test that XML special characters are properly escaped in skills output."""
412-
413-
def _make_agent(self, temp_dir, skills_dir=None):
414-
from harbor.agents.terminus_2.terminus_2 import Terminus2
415-
416-
return Terminus2(
417-
logs_dir=temp_dir,
418-
model_name="anthropic/claude-sonnet-4-5",
419-
skills_dir=skills_dir,
420-
)
421-
422-
async def test_xml_special_chars_escaped(self, temp_dir):
423-
"""Skill name/description with <, >, & must be escaped in XML output."""
424-
content = SKILL_FRONTMATTER.format(
425-
name="A<B>&C",
426-
description='Use <tag> & "quotes"',
427-
body="Body.",
428-
)
429-
agent = self._make_agent(temp_dir, skills_dir="/skills")
430-
env = _make_mock_environment(skills={"special": content})
431-
result = await agent._build_skills_section(env)
432-
assert result is not None
433-
assert "<available_skills>" in result
434-
# Raw < > & must not appear unescaped inside text content
435-
assert "A&lt;B&gt;&amp;C" in result
436-
assert "&lt;tag&gt;" in result
437-
assert "&amp;" in result

0 commit comments

Comments
 (0)