|
4 | 4 |
|
5 | 5 | import modal |
6 | 6 | import os |
| 7 | +from typing import Optional |
7 | 8 |
|
8 | 9 | # Define the Modal app |
9 | 10 | app = modal.App("datalab-marker-modal-demo") |
@@ -119,7 +120,7 @@ def load_models(self): |
119 | 120 | self.models = None |
120 | 121 |
|
121 | 122 | @modal.asgi_app() |
122 | | - def fastapi_app(self): |
| 123 | + def marker_api(self): |
123 | 124 | import traceback |
124 | 125 | import io |
125 | 126 | import base64 |
@@ -296,3 +297,101 @@ async def convert_document( |
296 | 297 | ) |
297 | 298 |
|
298 | 299 | return web_app |
| 300 | + |
| 301 | + |
| 302 | +# |
| 303 | +# This does not get deployed. It's a useful entrypoint from your local CLI |
| 304 | +# that you can use to test your deployment. It'll store the |
| 305 | +# API response in a new file on your machine. |
| 306 | +# |
| 307 | +@app.local_entrypoint() |
| 308 | +async def invoke_conversion( |
| 309 | + pdf_file: Optional[str] = None, |
| 310 | + output_format: str = "markdown", |
| 311 | + env: str = 'main' |
| 312 | +): |
| 313 | + """ |
| 314 | + Local entrypoint to test your deployed Marker endpoint in Modal. |
| 315 | +
|
| 316 | + Usage: |
| 317 | + modal run marker_modal_deployment.py::invoke_conversion --pdf-file /path/to/file.pdf --output-format markdown |
| 318 | + """ |
| 319 | + import requests |
| 320 | + import json |
| 321 | + from pathlib import Path |
| 322 | + |
| 323 | + if not pdf_file: |
| 324 | + print("No PDF file specified. Use --pdf-file /path/to/your.pdf") |
| 325 | + return |
| 326 | + |
| 327 | + pdf_path = Path(pdf_file) |
| 328 | + if not pdf_path.exists(): |
| 329 | + print(f"File not found: {pdf_file}") |
| 330 | + return |
| 331 | + |
| 332 | + # |
| 333 | + # Get the web URL for our deployed service |
| 334 | + # |
| 335 | + try: |
| 336 | + service = modal.Cls.from_name( |
| 337 | + "datalab-marker-modal-demo", |
| 338 | + "MarkerModalDemoService", |
| 339 | + environment_name=env |
| 340 | + ) |
| 341 | + web_url = service().marker_api.get_web_url() |
| 342 | + print(f"Found deployed service at: {web_url}") |
| 343 | + except Exception as e: |
| 344 | + print(f"Error getting web URL: {e}") |
| 345 | + print("Make sure you've deployed the service first with: modal deploy marker_modal_deployment.py") |
| 346 | + return |
| 347 | + |
| 348 | + print(f"Testing conversion of: {pdf_path.name}") |
| 349 | + print(f"Output format: {output_format}") |
| 350 | + |
| 351 | + # |
| 352 | + # Test health endpoint first |
| 353 | + # |
| 354 | + try: |
| 355 | + health_response = requests.get(f"{web_url}/health") |
| 356 | + health_data = health_response.json() |
| 357 | + print(f"Service health: {health_data['status']}") |
| 358 | + print(f"Models loaded: {health_data['models_loaded']} ({health_data['model_count']} models)") |
| 359 | + |
| 360 | + if not health_data['models_loaded']: |
| 361 | + print("Warning: Models not loaded yet. First request may be slow.") |
| 362 | + |
| 363 | + except Exception as e: |
| 364 | + print(f"Health check failed: {e}") |
| 365 | + |
| 366 | + # |
| 367 | + # Make conversion request |
| 368 | + # |
| 369 | + try: |
| 370 | + with open(pdf_path, 'rb') as f: |
| 371 | + files = {'file': (pdf_path.name, f, 'application/pdf')} |
| 372 | + data = {'output_format': output_format} |
| 373 | + |
| 374 | + print(f"Sending request to {web_url}/convert...") |
| 375 | + response = requests.post(f"{web_url}/convert", files=files, data=data) |
| 376 | + |
| 377 | + if response.status_code == 200: |
| 378 | + result = response.json() |
| 379 | + print(f"✅ Conversion successful!") |
| 380 | + print(f"Filename: {result['filename']}") |
| 381 | + print(f"Format: {result['output_format']}") |
| 382 | + print(f"Pages: {result['page_count']}") |
| 383 | + |
| 384 | + output_file = f"{pdf_path.stem}_response.json" |
| 385 | + with open(output_file, 'w', encoding='utf-8') as f: |
| 386 | + json.dump(result, f, indent=2, ensure_ascii=False) |
| 387 | + print(f"Full API response saved to: {output_file}") |
| 388 | + |
| 389 | + if result['images']: |
| 390 | + print(f"Images extracted: {len(result['images'])}") |
| 391 | + |
| 392 | + else: |
| 393 | + print(f"❌ Conversion failed: {response.status_code}") |
| 394 | + print(f"Error: {response.text}") |
| 395 | + |
| 396 | + except Exception as e: |
| 397 | + print(f"Request failed: {e}") |
0 commit comments