-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathmain.py
More file actions
63 lines (52 loc) · 1.75 KB
/
main.py
File metadata and controls
63 lines (52 loc) · 1.75 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
import uvicorn
from fastapi import FastAPI, HTTPException
from pydantic import BaseModel, HttpUrl
from typing import Union, Dict, Any, List
from crawler import WebCrawler
import os
# Configuration
PORT = 8001
app = FastAPI(
title="Web Crawler API",
description="API for crawling websites and sitemaps using Crawl4AI",
version="1.0.0"
)
class CrawlRequest(BaseModel):
url: HttpUrl
is_sitemap: bool = False
class CrawlResponse(BaseModel):
url: str
results: Union[Dict[str, Any], List[Dict[str, Any]]]
output_paths: Union[str, List[str]]
# Use a relative path for the output directory
crawler = WebCrawler(output_dir="crawl_results")
@app.post("/crawl", response_model=CrawlResponse)
async def crawl_url(request: CrawlRequest):
"""
Crawl a URL or sitemap.
- If is_sitemap is False, crawls the single URL
- If is_sitemap is True, extracts URLs from the sitemap and crawls them in parallel
"""
try:
if request.is_sitemap:
results = await crawler.crawl_sitemap(str(request.url))
return CrawlResponse(
url=str(request.url),
results=results,
output_paths=[r["output_path"] for r in results]
)
else:
result = await crawler.crawl_url(str(request.url))
return CrawlResponse(
url=str(request.url),
results=result,
output_paths=result["output_path"]
)
except Exception as e:
raise HTTPException(status_code=500, detail=str(e))
@app.get("/health")
async def health_check():
"""Health check endpoint."""
return {"status": "healthy"}
if __name__ == "__main__":
uvicorn.run("main:app", host="0.0.0.0", port=PORT, reload=True)