ai-agent/src/ai_agent/utils/utils.py at 34ee2f410477b85c5a5935daa1d750af6518cb4b · Imaging-Plaza/ai-agent · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
from ai_agent.retriever.software_doc import SoftwareDoc
from typing import Optional, List, Any
import re
import os

# Constants for affirmative detection
_MULTI_WORD_AFFIRMATIVES = ["go ahead", "do it", "run it", "sounds good", "looks good"]

_SINGLE_WORD_AFFIRMATIVES = [
    "yes",
    "y",
    "yeah",
    "yep",
    "yup",
    "sure",
    "ok",
    "okay",
    "fine",
    "alright",
    "right",
    "correct",
    "affirmative",
]

_EMOJI_AFFIRMATIVES = ["👍", "✅", "✓"]

_NEGATION_WORDS = {"no", "not", "don't", "dont", "never", "nothing"}

# Thresholds for context-based detection
_PHRASE_LENGTH_MULTIPLIER = 3  # Max text length = phrase length * multiplier
_SHORT_MESSAGE_THRESHOLD = 30  # Character threshold for "short message"

# Pre-compile regex pattern for negation detection
_NEGATION_PATTERN = re.compile(
    r"\b(" + "|".join(re.escape(w) for w in _NEGATION_WORDS) + r")\b"
)


def _best_runnable_link(doc: SoftwareDoc) -> Optional[str]:
    """Return the most user-friendly runnable link.

    Preference order:
      1. Hugging Face Space (hf.space or huggingface.co/spaces)
      2. Other interactive demo hosts (gradio.live, replicate.run, etc.)
      3. Executable notebook links (.ipynb, colab)
      4. Fallback to first runnable example / notebook URL (GitHub last)
    Explicit `priority` values in catalog still respected (lower is better), but
    host preference can override large default values.
    """

    def base_priority(item) -> float:
        if isinstance(item, dict) and "priority" in item:
            try:
                return float(item["priority"])
            except Exception:
                pass
        return 100.0  # neutral base

    def extract_url(item) -> Optional[str]:
        url = item.get("url")
        if isinstance(url, list) and url:
            return url[0].strip()
        elif isinstance(url, str):
            return url.strip()
        return None

    def host_bonus(u: str) -> float:
        lu = u.lower()
        if "huggingface.co/spaces" in lu or lu.startswith("https://hf.space"):
            return -60.0
        if "gradio.live" in lu:
            return -40.0
        if "replicate.run" in lu or "replicate.com" in lu:
            return -30.0
        if lu.endswith(".ipynb") or "colab.research.google.com" in lu:
            return -10.0
        if "github.com" in lu:
            return +10.0  # de-prioritize plain GitHub vs real demos
        return 0.0

    collected = []
    for items in (
        getattr(doc, "runnable_example", None) or [],
        getattr(doc, "has_executable_notebook", None) or [],
    ):
        for it in items:
            url = extract_url(it)
            if not url:
                continue
            pr = base_priority(it) + host_bonus(url)
            collected.append((pr, url))

    if not collected:
        return None
    collected.sort(key=lambda x: x[0])
    return collected[0][1]


def _coerce_files_to_paths(files: List[Any]) -> List[str]:
    """Convert Gradio file objects to paths."""
    if not files:
        return []

    paths = []
    for f in files:
        if isinstance(f, str):
            paths.append(f)
        elif isinstance(f, dict):
            p = f.get("name") or f.get("path")
            if p:
                paths.append(p)
        elif hasattr(f, "name"):
            paths.append(f.name)

    # De-duplicate
    seen = set()
    deduped = []
    for p in paths:
        if p not in seen:
            seen.add(p)
            deduped.append(p)

    return deduped


def _is_affirmative(text: str) -> bool:
    """Check if user message is affirmative (yes, ok, sure, etc.).

    Uses word boundary matching and context checking to avoid false positives.
    """
    text_lower = text.lower().strip()

    if not text_lower:
        return False

    # Check emojis
    for emoji in _EMOJI_AFFIRMATIVES:
        if emoji in text:
            return True

    # With negation, only match if entire message is exactly one affirmative word
    has_negation = _NEGATION_PATTERN.search(text_lower) is not None
    if has_negation:
        stripped = re.sub(r"[.,!?\s]+$", "", text_lower)
        if stripped in _SINGLE_WORD_AFFIRMATIVES:
            return True
        return False

    # Check multi-word phrases (reject if text is much longer than phrase)
    for phrase in _MULTI_WORD_AFFIRMATIVES:
        if re.search(r"\b" + re.escape(phrase) + r"\b", text_lower):
            if len(text_lower) <= len(phrase) * _PHRASE_LENGTH_MULTIPLIER:
                return True

    # Check single words (reject if message is long)
    for word in _SINGLE_WORD_AFFIRMATIVES:
        if re.search(r"\b" + re.escape(word) + r"\b", text_lower):
            if len(text_lower) <= _SHORT_MESSAGE_THRESHOLD:
                return True

    return False

def _env_flag(name: str, default: bool = False) -> bool:
    """Parse boolean env vars robustly."""
    raw = os.getenv(name)
    if raw is None:
        return default
    val = raw.split("#", 1)[0].strip().lower()
    if not val:
        return default
    return val in {"1", "true", "yes", "on"}