-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathutils.py
More file actions
171 lines (142 loc) · 5.16 KB
/
utils.py
File metadata and controls
171 lines (142 loc) · 5.16 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
from ai_agent.retriever.software_doc import SoftwareDoc
from typing import Optional, List, Any
import re
import os
# Constants for affirmative detection
_MULTI_WORD_AFFIRMATIVES = ["go ahead", "do it", "run it", "sounds good", "looks good"]
_SINGLE_WORD_AFFIRMATIVES = [
"yes",
"y",
"yeah",
"yep",
"yup",
"sure",
"ok",
"okay",
"fine",
"alright",
"right",
"correct",
"affirmative",
]
_EMOJI_AFFIRMATIVES = ["👍", "✅", "✓"]
_NEGATION_WORDS = {"no", "not", "don't", "dont", "never", "nothing"}
# Thresholds for context-based detection
_PHRASE_LENGTH_MULTIPLIER = 3 # Max text length = phrase length * multiplier
_SHORT_MESSAGE_THRESHOLD = 30 # Character threshold for "short message"
# Pre-compile regex pattern for negation detection
_NEGATION_PATTERN = re.compile(
r"\b(" + "|".join(re.escape(w) for w in _NEGATION_WORDS) + r")\b"
)
def _best_runnable_link(doc: SoftwareDoc) -> Optional[str]:
"""Return the most user-friendly runnable link.
Preference order:
1. Hugging Face Space (hf.space or huggingface.co/spaces)
2. Other interactive demo hosts (gradio.live, replicate.run, etc.)
3. Executable notebook links (.ipynb, colab)
4. Fallback to first runnable example / notebook URL (GitHub last)
Explicit `priority` values in catalog still respected (lower is better), but
host preference can override large default values.
"""
def base_priority(item) -> float:
if isinstance(item, dict) and "priority" in item:
try:
return float(item["priority"])
except Exception:
pass
return 100.0 # neutral base
def extract_url(item) -> Optional[str]:
url = item.get("url")
if isinstance(url, list) and url:
return url[0].strip()
elif isinstance(url, str):
return url.strip()
return None
def host_bonus(u: str) -> float:
lu = u.lower()
if "huggingface.co/spaces" in lu or lu.startswith("https://hf.space"):
return -60.0
if "gradio.live" in lu:
return -40.0
if "replicate.run" in lu or "replicate.com" in lu:
return -30.0
if lu.endswith(".ipynb") or "colab.research.google.com" in lu:
return -10.0
if "github.com" in lu:
return +10.0 # de-prioritize plain GitHub vs real demos
return 0.0
collected = []
for items in (
getattr(doc, "runnable_example", None) or [],
getattr(doc, "has_executable_notebook", None) or [],
):
for it in items:
url = extract_url(it)
if not url:
continue
pr = base_priority(it) + host_bonus(url)
collected.append((pr, url))
if not collected:
return None
collected.sort(key=lambda x: x[0])
return collected[0][1]
def _coerce_files_to_paths(files: List[Any]) -> List[str]:
"""Convert Gradio file objects to paths."""
if not files:
return []
paths = []
for f in files:
if isinstance(f, str):
paths.append(f)
elif isinstance(f, dict):
p = f.get("name") or f.get("path")
if p:
paths.append(p)
elif hasattr(f, "name"):
paths.append(f.name)
# De-duplicate
seen = set()
deduped = []
for p in paths:
if p not in seen:
seen.add(p)
deduped.append(p)
return deduped
def _is_affirmative(text: str) -> bool:
"""Check if user message is affirmative (yes, ok, sure, etc.).
Uses word boundary matching and context checking to avoid false positives.
"""
text_lower = text.lower().strip()
if not text_lower:
return False
# Check emojis
for emoji in _EMOJI_AFFIRMATIVES:
if emoji in text:
return True
# With negation, only match if entire message is exactly one affirmative word
has_negation = _NEGATION_PATTERN.search(text_lower) is not None
if has_negation:
stripped = re.sub(r"[.,!?\s]+$", "", text_lower)
if stripped in _SINGLE_WORD_AFFIRMATIVES:
return True
return False
# Check multi-word phrases (reject if text is much longer than phrase)
for phrase in _MULTI_WORD_AFFIRMATIVES:
if re.search(r"\b" + re.escape(phrase) + r"\b", text_lower):
if len(text_lower) <= len(phrase) * _PHRASE_LENGTH_MULTIPLIER:
return True
# Check single words (reject if message is long)
for word in _SINGLE_WORD_AFFIRMATIVES:
if re.search(r"\b" + re.escape(word) + r"\b", text_lower):
if len(text_lower) <= _SHORT_MESSAGE_THRESHOLD:
return True
return False
def _env_flag(name: str, default: bool = False) -> bool:
"""Parse boolean env vars robustly."""
raw = os.getenv(name)
if raw is None:
return default
val = raw.split("#", 1)[0].strip().lower()
if not val:
return default
return val in {"1", "true", "yes", "on"}