-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathmain.py
More file actions
103 lines (81 loc) · 2.84 KB
/
main.py
File metadata and controls
103 lines (81 loc) · 2.84 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
#!/usr/bin/env python3
"""
ICU-scraper main entry point.
This script demonstrates how to use the restructured ICU-scraper project
with its modular architecture.
"""
import sys
import os
# Add current directory to path for imports
sys.path.append(os.path.dirname(os.path.abspath(__file__)))
from scrapers.avito_scraper import scrape_avito
from scrapers.general_scraper import GeneralScraper
from utils.export import export_data
def demo_avito_scraper():
"""Demonstrate Avito scraper usage."""
print("🔍 Demo: Avito Scraper")
print("=" * 50)
# Example URL for apartments in Casablanca
url = "https://www.avito.ma/fr/maroc/appartement_casablanca"
# Scrape 2 pages with custom fields
custom_fields = {"title", "price", "location", "link"}
results, fields = scrape_avito(url, pages=2, delay=1, fields=custom_fields)
# Export results
export_data(results, list(fields), url, 2, "xlsx", site_name="avito_ma")
print(f"✅ Scraped {len(results)} items from Avito")
print()
def demo_general_scraper():
"""Demonstrate General scraper usage."""
print("🔍 Demo: General Scraper")
print("=" * 50)
# Example configuration for a generic e-commerce site
config = {
"site_name": "example_site",
"base_url": "https://example.com",
"headers": {"User-Agent": "Mozilla/5.0"},
"container_selector": "div.product-item",
"pagination": {
"type": "parameter",
"parameter": "page"
},
"fields": {
"title": {
"selector": "h2.product-title",
"default": "N/A"
},
"price": {
"selector": "span.price",
"default": "N/A"
},
"link": {
"selector": "a.product-link",
"type": "attribute",
"attribute": "href",
"default": "N/A"
}
}
}
# Create scraper instance
scraper = GeneralScraper(config)
# Note: This is just a demonstration - the URL doesn't exist
# In real usage, you would provide a valid URL
print("ℹ️ General scraper configured (example only)")
print(" To use with a real site, provide valid URL and configuration")
print()
def main():
"""Main entry point."""
print("🚀 ICU-scraper Demo")
print("=" * 60)
print()
# Demo Avito scraper
demo_avito_scraper()
# Demo General scraper
demo_general_scraper()
print("📚 Usage Examples:")
print("- Avito scraper: python -m scrapers.avito_scraper")
print("- 1moment scraper: python -m scrapers.1moment_scraper")
print("- General scraper: Use GeneralScraper class with custom config")
print()
print("📖 See README.md for detailed documentation")
if __name__ == "__main__":
main()