-
Notifications
You must be signed in to change notification settings - Fork 20
Expand file tree
/
Copy pathdocker-compose.yml
More file actions
113 lines (107 loc) · 3.75 KB
/
docker-compose.yml
File metadata and controls
113 lines (107 loc) · 3.75 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
# =============================================================================
# NScrapy Docker Compose - Three Deployment Modes
# =============================================================================
# Mode 1 - local-only:
# Redis disabled. Single spider runs locally without Redis.
# Usage: docker compose up -d spider
#
# Mode 2 - local-redis:
# Uses a local Redis container for distributed scraping.
# All services on the same Docker network.
# Usage: docker compose --profile local-redis up -d
#
# Mode 3 - managed-redis:
# Uses an external Redis (e.g., Azure Redis Cache, ElastiCache).
# Configure REDIS_HOST, REDIS_PASSWORD, REDIS_PORT, REDIS_USESSL via .env.
# Usage: docker compose up -d
# =============================================================================
services:
# ---------------------------------------------------------------------------
# Spider Service
# - Runs the NScrapy CLI spider node
# - Command format: nscrapy run <spider-name> [options]
# - Environment variables override CLI args (lowest priority)
# - Scale with: docker-compose up -d --scale spider=3
# ---------------------------------------------------------------------------
spider:
build:
context: .
dockerfile: Dockerfile.Spider
container_name: nscrapy-spider
command: run ${SPIDER_NAME:-MySpider} --role spider --distributed
environment:
# Redis configuration
- REDIS_HOST=${REDIS_HOST:-redis}
- REDIS_PORT=${REDIS_PORT:-6379}
- REDIS_PASSWORD=${REDIS_PASSWORD:-}
- REDIS_USESSL=${REDIS_USESSL:-false}
# Spider settings
- NSCRAPY_CONCURRENCY=${CONCURRENCY:-5}
- NSCRAPY_RECEIVER_QUEUE=${RECEIVER_QUEUE:-nscrapy:requests}
- NSCRAPY_RESPONSE_QUEUE=${RESPONSE_QUEUE:-nscrapy:responses}
# CLI will set these automatically when --distributed is used
- REDIS_ENABLED=true
volumes:
# Mount config file (optional, CLI args take priority)
- ./appsettings.spider.json:/app/appsettings.json:ro
depends_on:
- redis
networks:
- nscrapy-net
restart: unless-stopped
profiles:
- local-redis
# ---------------------------------------------------------------------------
# Downloader Service
# - Runs the NScrapy CLI downloader node
# - Command format: nscrapy run --role downloader
# - Scale with: docker-compose up -d --scale downloader=4
# ---------------------------------------------------------------------------
downloader:
build:
context: .
dockerfile: Dockerfile.Downloader
container_name: nscrapy-downloader
command: run --role downloader
environment:
# Redis configuration
- REDIS_HOST=${REDIS_HOST:-redis}
- REDIS_PORT=${REDIS_PORT:-6379}
- REDIS_PASSWORD=${REDIS_PASSWORD:-}
- REDIS_USESSL=${REDIS_USESSL:-false}
# CLI settings
- NSCRAPY_RECEIVER_QUEUE=${RECEIVER_QUEUE:-nscrapy:requests}
- NSCRAPY_RESPONSE_QUEUE=${RESPONSE_QUEUE:-nscrapy:responses}
- REDIS_ENABLED=true
volumes:
- ./appsettings.downloader.json:/app/appsettings.json:ro
depends_on:
- redis
networks:
- nscrapy-net
restart: unless-stopped
profiles:
- local-redis
deploy:
replicas: 1
# ---------------------------------------------------------------------------
# Local Redis (only used in local-redis mode)
# ---------------------------------------------------------------------------
redis:
image: redis:7-alpine
container_name: nscrapy-redis
ports:
- "6379:6379"
volumes:
- redis-data:/data
command: redis-server --appendonly yes
networks:
- nscrapy-net
restart: unless-stopped
profiles:
- local-redis
networks:
nscrapy-net:
driver: bridge
volumes:
redis-data: