-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathgenerator.py
More file actions
94 lines (79 loc) · 3.91 KB
/
Copy pathgenerator.py
File metadata and controls
94 lines (79 loc) · 3.91 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
import asyncio
from collections.abc import AsyncIterator
import anthropic
from anthropic import APIStatusError
from config import settings
from prompts import build_system_blocks, get_user_prompt
# 429 = rate-limited, 529 = overloaded. Both are transient; retry with backoff.
# Anthropic also delivers overload errors mid-stream wrapped in a 200 OK SSE
# response — those carry status_code=200 on the SDK exception but have
# `error.type == "overloaded_error"` in the body, so we inspect both.
RETRYABLE_STATUSES = frozenset({429, 529})
RETRYABLE_ERROR_TYPES = frozenset({"overloaded_error", "rate_limit_error"})
RETRY_DELAYS = (1, 3, 7) # seconds before retries 1, 2, 3 — total budget ≈ 11s
def _error_type(e: APIStatusError) -> str | None:
try:
return e.body.get("error", {}).get("type") # type: ignore[union-attr]
except Exception:
return None
def _is_retryable(e: APIStatusError) -> bool:
return e.status_code in RETRYABLE_STATUSES or _error_type(e) in RETRYABLE_ERROR_TYPES
def _user_facing(e: APIStatusError) -> str:
err_type = _error_type(e)
if e.status_code == 529 or err_type == "overloaded_error":
return "Anthropic temporarily overloaded — try again in a few seconds."
if e.status_code == 429 or err_type == "rate_limit_error":
return "Anthropic rate limit hit — try again in a few seconds."
return f"API error ({e.status_code}): {str(e.message)[:160]}"
async def stream_repurposed_content(platform: str, content: str) -> AsyncIterator[str]:
"""Stream repurposed content from Claude, yielding text chunks.
Uses prompt caching on the source-content prefix — repeat calls with the
same content (different platforms) hit cache for ~5 minutes. Cache usage
is printed to stdout after each stream so you can verify hits.
Retries on transient API errors (429 rate-limit, 529 overloaded) up to
three times with exponential backoff. Only retries if no tokens have
been yielded yet — otherwise we'd duplicate output on the second attempt.
"""
client = anthropic.AsyncAnthropic(api_key=settings.anthropic_api_key)
attempt = 0
while True:
yielded_any = False
try:
async with client.messages.stream(
model=settings.model,
max_tokens=settings.max_tokens,
system=build_system_blocks(content),
messages=[{"role": "user", "content": get_user_prompt(platform)}],
) as stream:
async for text in stream.text_stream:
yielded_any = True
yield text
# Cache-stats logging is best-effort. Once the text stream has
# closed cleanly the user already has their content; a failure
# here shouldn't bubble up as an error event.
try:
final = await stream.get_final_message()
u = final.usage
print(
f"[{platform}] input={u.input_tokens} output={u.output_tokens} "
f"cache_read={u.cache_read_input_tokens} "
f"cache_write={u.cache_creation_input_tokens}",
flush=True,
)
except Exception as stats_err:
print(f"[{platform}] stats lookup failed: {stats_err}", flush=True)
return
except APIStatusError as e:
can_retry = (
_is_retryable(e) and not yielded_any and attempt < len(RETRY_DELAYS)
)
if not can_retry:
raise RuntimeError(_user_facing(e)) from e
wait = RETRY_DELAYS[attempt]
attempt += 1
label = _error_type(e) or f"status {e.status_code}"
print(
f"[{platform}] {label}; retry {attempt}/{len(RETRY_DELAYS)} in {wait}s",
flush=True,
)
await asyncio.sleep(wait)