conjure/utils.py at main · suvadityamuk/conjure · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
"""
Conjure Utils - API wrappers for Gemini and Meshy
"""

import base64
import os
import time

import requests


def get_client(api_key):
    """Create a Gemini client."""
    from google import genai

    return genai.Client(api_key=api_key)


def refine_prompt(api_key, prompt):
    """Use Gemini to refine a prompt for 3D model generation."""
    client = get_client(api_key)

    instruction = (
        f"Refine this prompt for generating a high-quality 3D model reference image. "
        f"Only generate a frontal view of the object. "
        f"Other views will be generated later again using this as the original "
        f"reference image, so this must be a perfect visualization of the original "
        f"objective."
        f"Adhere to the use of a white background, product shot setting. "
        f"Object: {prompt}. Return ONLY the refined prompt, no markdown."
    )

    response = client.models.generate_content(
        model="gemini-3-pro-preview", contents=instruction
    )
    return response.text.strip()


def generate_image(api_key, prompt, output_path, input_image_path=None):
    """
    Generate an image using Gemini.
    If input_image_path is provided, use it as reference for the generation.
    """
    from google.genai import types
    from PIL import Image

    client = get_client(api_key)

    config = types.GenerateContentConfig(
        response_modalities=["Image"],
        image_config=types.ImageConfig(aspect_ratio="1:1"),
    )

    if input_image_path:
        ref_image = Image.open(input_image_path)
        contents = [prompt, ref_image]
    else:
        contents = prompt

    response = client.models.generate_content(
        model="gemini-3-pro-image-preview",
        contents=contents,
        config=config,
    )

    for part in response.parts:
        if hasattr(part, "inline_data") and part.inline_data:
            img = part.as_image()
            img.save(output_path)
            return output_path

    raise Exception("No image in response")


def generate_3d_meshy(api_key, image_paths):
    """
    Generate 3D model from images using Meshy Multi-Image API.
    Accepts a single path or list of paths.
    """
    if isinstance(image_paths, str):
        image_paths = [image_paths]

    headers = {"Authorization": f"Bearer {api_key}", "Content-Type": "application/json"}

    # Convert images to data URIs
    image_urls = []
    for path in image_paths:
        with open(path, "rb") as f:
            encoded = base64.b64encode(f.read()).decode("utf-8")
        ext = os.path.splitext(path)[1].lower()
        mime = "image/png" if ext == ".png" else "image/jpeg"
        image_urls.append(f"data:{mime};base64,{encoded}")

    payload = {
        "image_urls": image_urls,
        "ai_model": "meshy-5",
        "topology": "triangle",
        "target_polycount": 75000,
        "should_remesh": True,
        "should_texture": False,
    }

    # Use Session to reuse TCP connection (SSL handshake optimization)
    with requests.Session() as session:
        session.headers.update(headers)

        # Create task - API returns 202 Accepted on success
        resp = session.post(
            "https://api.meshy.ai/openapi/v1/multi-image-to-3d",
            json=payload,
        )

        # 200 or 202 are both success
        if resp.status_code not in [200, 202]:
            raise Exception(f"Meshy API error {resp.status_code}: {resp.text}")

        task_id = resp.json()["result"]

        # Adaptive polling: Check frequently at first (2s), then back off to 5s
        # This reduces waiting time for fast jobs without spamming the API for slow ones.
        intervals = [2, 2, 2, 5]
        default_interval = 5

        # Poll until complete
        for i in range(120):
            wait_time = intervals[i] if i < len(intervals) else default_interval
            time.sleep(wait_time)

            status_resp = session.get(
                f"https://api.meshy.ai/openapi/v1/multi-image-to-3d/{task_id}"
            )
            if status_resp.status_code != 200:
                continue

            data = status_resp.json()
            if data["status"] == "SUCCEEDED":
                return data["model_urls"]["glb"]
            elif data["status"] in ["FAILED", "EXPIRED"]:
                msg = data.get("task_error", {}).get("message", "Unknown error")
                raise Exception(f"Meshy failed: {msg}")

    raise Exception("Meshy timed out")


def download_file(url, output_path):
    """Download a file from URL."""
    # ⚡ Bolt: Stream download to reduce memory usage for large files
    resp = requests.get(url, stream=True)
    resp.raise_for_status()
    with open(output_path, "wb") as f:
        for chunk in resp.iter_content(chunk_size=8192):
            f.write(chunk)
    return output_path