Skip to content

Commit f53d6b7

Browse files
zheng elendilzheng elendil
authored andcommitted
feat(cli): add nscrapy CLI tool with new and run commands
- Add NScrapy.Cli project using Spectre.Console.Cli - Implement 'nscrapy new' command for generating spider projects from templates - Support basic and distributed spider templates - Configurable output directory and force overwrite - Implement 'nscrapy run' command for spider execution - Support single, spider, and downloader roles - Distributed mode with Redis configuration via CLI args - Configuration priority: CLI args > ENV > JSON - Update Dockerfiles to package CLI instead of raw DLLs - Update docker-compose.yml to use CLI commands
1 parent 9ed01f4 commit f53d6b7

File tree

11 files changed

+840
-73
lines changed

11 files changed

+840
-73
lines changed

Dockerfile.Downloader

Lines changed: 26 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -1,19 +1,20 @@
11
# =============================================================================
2-
# Dockerfile.Downloader - Multi-stage build for NScrapy DownloaderShell
2+
# Dockerfile.Downloader - Multi-stage build for NScrapy CLI (Downloader mode)
33
# =============================================================================
4-
# Build stage: compile NScrapy.DownloaderShell (Executable)
4+
# Build stage: compile NScrapy.Cli with all dependencies
55
# Runtime stage: run with .NET 10.0 runtime
66
#
77
# Usage:
88
# docker build -f Dockerfile.Downloader -t nscrapy-downloader .
9-
# docker run nscrapy-downloader
9+
# docker run nscrapy-downloader run --role downloader
1010
#
11-
# Environment variables (override appsettings.json):
12-
# REDIS_ENABLED - Enable Redis ("true" / "false")
11+
# Environment variables (override CLI args):
1312
# REDIS_HOST - Redis host address
1413
# REDIS_PORT - Redis port (default: 6379)
1514
# REDIS_PASSWORD - Redis password
1615
# REDIS_USESSL - Use SSL for Redis ("true" / "false")
16+
# NSCRAPY_REDIS_HOST - Same as REDIS_HOST (preferred)
17+
# NSCRAPY_REDIS_PORT - Same as REDIS_PORT (preferred)
1718
# =============================================================================
1819

1920
# ---- Build Stage ----
@@ -22,40 +23,45 @@ WORKDIR /src
2223

2324
# Copy solution and project files (layer caching optimisation)
2425
COPY NScrapy.sln ./
25-
COPY NScrapy.DownloaderShell/NScrapy.DownloaderShell.csproj NScrapy.DownloaderShell/
26+
COPY NScrapy.Cli/NScrapy.Cli.csproj NScrapy.Cli/
27+
COPY NScrapy.Core/NScrapy.Core.csproj NScrapy.Core/
2628
COPY NScrapy.Downloader/NScrapy.Downloader.csproj NScrapy.Downloader/
2729
COPY NScrapy.Infra/NScrapy.Infra.csproj NScrapy.Infra/
2830
COPY NScrapy.Scheduler/NScrapy.Scheduler.csproj NScrapy.Scheduler/
31+
COPY NScrapy.Engine/NScrapy.Engine.csproj NScrapy.Engine/
32+
COPY NScrapy.Spider/NScrapy.Spider.csproj NScrapy.Spider/
2933

3034
# Restore NuGet packages
31-
RUN dotnet restore NScrapy.DownloaderShell/NScrapy.DownloaderShell.csproj
35+
RUN dotnet restore NScrapy.Cli/NScrapy.Cli.csproj
3236

3337
# Copy source code
34-
COPY NScrapy.DownloaderShell/ NScrapy.DownloaderShell/
35-
COPY NScrapy.Downloader/ NScrapy.Downloader/
36-
COPY NScrapy.Infra/ NScrapy.Infra/
37-
COPY NScrapy.Scheduler/ NScrapy.Scheduler/
38+
COPY NScrapy.Cli/ NScrapy.Cli/
39+
COPY NScrapy.Core/ NScrapy.Core/
40+
COPY NScrapy.Downloader/ NScrapy.Downloader/
41+
COPY NScrapy.Infra/ NScrapy.Infra/
42+
COPY NScrapy.Scheduler/ NScrapy.Scheduler/
43+
COPY NScrapy.Engine/ NScrapy.Engine/
44+
COPY NScrapy.Spider/ NScrapy.Spider/
3845

39-
# Build and publish
40-
WORKDIR /src/NScrapy.DownloaderShell
41-
RUN dotnet build -c Release --no-restore -o /app/build
42-
RUN dotnet publish -c Release --no-build -o /app/publish
46+
# Build and publish CLI
47+
WORKDIR /src/NScrapy.Cli
48+
RUN dotnet publish -c Release --no-restore -o /app/publish -p:PublishSingleFile=false
4349

4450
# ---- Runtime Stage ----
45-
FROM mcr.microsoft.com/dotnet/runtime:10.0 AS runtime
51+
FROM mcr.microsoft.com/dotnet/aspnet:10.0 AS runtime
4652
WORKDIR /app
4753

4854
# Copy published output
4955
COPY --from=build /app/publish .
5056

5157
# Copy log4net config
52-
COPY NScrapy.DownloaderShell/log4net.config ./
58+
COPY NScrapy.Infra/log4net.config ./ 2>/dev/null || true
5359

5460
# Environment defaults
5561
ENV DOTNET_RUNNING_IN_CONTAINER=true
56-
ENV REDIS_ENABLED=true
5762
ENV REDIS_HOST=redis
5863
ENV REDIS_PORT=6379
5964

60-
# Entry point: run the DownloaderShell executable
61-
ENTRYPOINT ["dotnet", "NScrapy.DownloaderShell.dll"]
65+
# Entry point: run the CLI in downloader mode
66+
# Usage: docker run nscrapy-downloader run --role downloader
67+
ENTRYPOINT ["nscrapy"]

Dockerfile.Spider

Lines changed: 24 additions & 31 deletions
Original file line numberDiff line numberDiff line change
@@ -1,20 +1,20 @@
11
# =============================================================================
2-
# Dockerfile.Spider - Multi-stage build for NScrapy Spider
2+
# Dockerfile.Spider - Multi-stage build for NScrapy CLI
33
# =============================================================================
4-
# Build stage: compile the Spider / Shell library
4+
# Build stage: compile NScrapy.Cli and all dependencies
55
# Runtime stage: run with .NET 10.0 runtime
66
#
77
# Usage:
88
# docker build -f Dockerfile.Spider -t nscrapy-spider .
9-
# docker run nscrapy-spider
9+
# docker run nscrapy-spider run MySpider --role spider --distributed
1010
#
11-
# Environment variables (override appsettings.json):
12-
# REDIS_ENABLED - Enable Redis scheduler ("true" / "false")
11+
# Environment variables (override CLI args):
1312
# REDIS_HOST - Redis host address
1413
# REDIS_PORT - Redis port (default: 6379)
1514
# REDIS_PASSWORD - Redis password
1615
# REDIS_USESSL - Use SSL for Redis ("true" / "false")
17-
# Scheduler__SchedulerType - Scheduler type (InMemoryScheduler / RedisScheduler)
16+
# NSCRAPY_REDIS_HOST - Same as REDIS_HOST (preferred)
17+
# NSCRAPY_REDIS_PORT - Same as REDIS_PORT (preferred)
1818
# =============================================================================
1919

2020
# ---- Build Stage ----
@@ -23,40 +23,33 @@ WORKDIR /src
2323

2424
# Copy solution and project files first (layer caching optimisation)
2525
COPY NScrapy.sln ./
26-
COPY NScrapy/NScrapy.Shell.csproj NScrapy/
27-
COPY NScrapy/NScrapy.csproj NScrapy/ 2>/dev/null || true
26+
COPY NScrapy.Cli/NScrapy.Cli.csproj NScrapy.Cli/
27+
COPY NScrapy.Core/NScrapy.Core.csproj NScrapy.Core/
2828
COPY NScrapy.Engine/NScrapy.Engine.csproj NScrapy.Engine/
2929
COPY NScrapy.Infra/NScrapy.Infra.csproj NScrapy.Infra/
3030
COPY NScrapy.Scheduler/NScrapy.Scheduler.csproj NScrapy.Scheduler/
3131
COPY NScrapy.Spider/NScrapy.Spider.csproj NScrapy.Spider/
32-
COPY NScrapy.Project/NScrapy.Project.csproj NScrapy.Project/
3332
COPY NScrapy.Downloader/NScrapy.Downloader.csproj NScrapy.Downloader/
34-
COPY NScrapy.DownloaderShell/NScrapy.DownloaderShell.csproj NScrapy.DownloaderShell/
33+
COPY NScrapy.Spider/NScrapy.Spider.csproj NScrapy.Spider/
3534

3635
# Restore all NuGet packages
37-
RUN dotnet restore NScrapy/NScrapy.Shell.csproj
38-
RUN dotnet restore NScrapy.Project/NScrapy.Project.csproj
36+
RUN dotnet restore NScrapy.Cli/NScrapy.Cli.csproj
3937

4038
# Copy source code
41-
COPY NScrapy/ NScrapy/
42-
COPY NScrapy.Engine/ NScrapy.Engine/
43-
COPY NScrapy.Infra/ NScrapy.Infra/
44-
COPY NScrapy.Scheduler/ NScrapy.Scheduler/
45-
COPY NScrapy.Spider/ NScrapy.Spider/
46-
COPY NScrapy.Project/ NScrapy.Project/
47-
COPY NScrapy.Downloader/ NScrapy.Downloader/
48-
COPY NScrapy.DownloaderShell/ NScrapy.DownloaderShell/
39+
COPY NScrapy.Cli/ NScrapy.Cli/
40+
COPY NScrapy.Core/ NScrapy.Core/
41+
COPY NScrapy.Engine/ NScrapy.Engine/
42+
COPY NScrapy.Infra/ NScrapy.Infra/
43+
COPY NScrapy.Scheduler/ NScrapy.Scheduler/
44+
COPY NScrapy.Spider/ NScrapy.Spider/
45+
COPY NScrapy.Downloader/ NScrapy.Downloader/
4946

50-
# Build NScrapy.Project (contains the Main entry point that uses NScrapy.Shell)
51-
WORKDIR /src/NScrapy.Project
52-
RUN dotnet build -c Release --no-restore -o /app/build
53-
54-
# Publish the Spider project
55-
WORKDIR /src/NScrapy.Project
56-
RUN dotnet publish -c Release --no-build -o /app/publish /p:UseAppHost=false
47+
# Build and publish CLI
48+
WORKDIR /src/NScrapy.Cli
49+
RUN dotnet publish -c Release --no-restore -o /app/publish -p:PublishSingleFile=false
5750

5851
# ---- Runtime Stage ----
59-
FROM mcr.microsoft.com/dotnet/runtime:10.0 AS runtime
52+
FROM mcr.microsoft.com/dotnet/aspnet:10.0 AS runtime
6053
WORKDIR /app
6154

6255
# Copy published output from build stage
@@ -68,7 +61,7 @@ COPY NScrapy.Infra/log4net.config ./ 2>/dev/null || true
6861
# Environment defaults
6962
ENV DOTNET_RUNNING_IN_CONTAINER=true
7063
ENV ASPNETCORE_URLS=http://+:80
71-
ENV REDIS_ENABLED=false
7264

73-
# Entry point: run the Spider (NScrapy.Project contains Main)
74-
ENTRYPOINT ["dotnet", "NScrapy.Project.dll"]
65+
# Entry point: run the CLI
66+
# Usage: docker run nscrapy-spider run MySpider --role spider --distributed
67+
ENTRYPOINT ["nscrapy"]

NScrapy.Cli/Commands/NewCommand.cs

Lines changed: 83 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,83 @@
1+
using System;
2+
using System.ComponentModel;
3+
using System.IO;
4+
using Spectre.Console.Cli;
5+
using NScrapy.Cli.Services;
6+
7+
namespace NScrapy.Cli.Commands;
8+
9+
public class NewSettings : CommandSettings
10+
{
11+
[CommandArgument(0, "<SpiderName>")]
12+
[Description("Name of the spider to create")]
13+
public string SpiderName { get; set; } = string.Empty;
14+
15+
[CommandOption("-t|--type")]
16+
[Description("Type of spider template (basic or distributed)")]
17+
[DefaultValue("basic")]
18+
public string Type { get; set; } = "basic";
19+
20+
[CommandOption("-o|--output")]
21+
[Description("Output directory for the spider project")]
22+
[DefaultValue(".")]
23+
public string Output { get; set; } = ".";
24+
25+
[CommandOption("--force")]
26+
[Description("Overwrite existing files")]
27+
[DefaultValue(false)]
28+
public bool Force { get; set; }
29+
}
30+
31+
public class NewCommand : Command<NewSettings>
32+
{
33+
public override int Execute(CommandContext context, NewSettings settings, CancellationToken cancellationToken)
34+
{
35+
if (string.IsNullOrWhiteSpace(settings.SpiderName))
36+
{
37+
Console.WriteLine("Error: Spider name is required.");
38+
return 1;
39+
}
40+
41+
if (!IsValidSpiderName(settings.SpiderName))
42+
{
43+
Console.WriteLine("Error: Invalid spider name. Use PascalCase (e.g., MySpider).");
44+
return 1;
45+
}
46+
47+
var templateType = settings.Type.ToLowerInvariant() switch
48+
{
49+
"basic" => false,
50+
"distributed" => true,
51+
_ => false
52+
};
53+
54+
var templateName = templateType ? "distributed" : "basic";
55+
56+
Console.WriteLine($"Creating {templateName} spider: {settings.SpiderName}");
57+
58+
var outputPath = Path.GetFullPath(settings.Output);
59+
60+
var templateService = new TemplateService();
61+
templateService.CreateSpiderProject(
62+
settings.SpiderName,
63+
outputPath,
64+
templateType,
65+
settings.Force
66+
);
67+
68+
return 0;
69+
}
70+
71+
private bool IsValidSpiderName(string name)
72+
{
73+
if (string.IsNullOrWhiteSpace(name)) return false;
74+
if (char.IsDigit(name[0])) return false;
75+
76+
foreach (var c in name)
77+
{
78+
if (!char.IsLetterOrDigit(c) && c != '_')
79+
return false;
80+
}
81+
return true;
82+
}
83+
}

NScrapy.Cli/Commands/RunCommand.cs

Lines changed: 126 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,126 @@
1+
using System;
2+
using System.ComponentModel;
3+
using System.Threading;
4+
using Spectre.Console.Cli;
5+
using NScrapy.Cli.Services;
6+
7+
namespace NScrapy.Cli.Commands;
8+
9+
public class RunSettings : CommandSettings
10+
{
11+
[CommandArgument(0, "[spider-name]")]
12+
[Description("Name of the spider to run (required for single and spider modes)")]
13+
public string? SpiderName { get; set; }
14+
15+
[CommandOption("--role")]
16+
[Description("Running role: single, spider, or downloader")]
17+
[DefaultValue("single")]
18+
public string Role { get; set; } = "single";
19+
20+
[CommandOption("--distributed")]
21+
[Description("Enable distributed mode")]
22+
[DefaultValue(false)]
23+
public bool Distributed { get; set; }
24+
25+
[CommandOption("--redis")]
26+
[Description("Redis endpoint (host:port)")]
27+
public string? RedisEndpoint { get; set; }
28+
29+
[CommandOption("--redis-password")]
30+
[Description("Redis password")]
31+
public string? RedisPassword { get; set; }
32+
33+
[CommandOption("--redis-ssl")]
34+
[Description("Use SSL for Redis connection")]
35+
[DefaultValue(false)]
36+
public bool RedisSsl { get; set; }
37+
38+
[CommandOption("--receiver-queue")]
39+
[Description("Redis queue name for requests")]
40+
public string? ReceiverQueue { get; set; }
41+
42+
[CommandOption("--response-queue")]
43+
[Description("Redis queue name for responses")]
44+
public string? ResponseQueue { get; set; }
45+
46+
[CommandOption("--concurrency")]
47+
[Description("Number of concurrent requests")]
48+
public int? Concurrency { get; set; }
49+
50+
[CommandOption("--delay")]
51+
[Description("Delay between requests in milliseconds")]
52+
public int? DelayMs { get; set; }
53+
54+
[CommandOption("-c|--config")]
55+
[Description("Path to configuration file")]
56+
public string? ConfigFile { get; set; }
57+
}
58+
59+
public class RunCommand : Command<RunSettings>
60+
{
61+
public override int Execute(CommandContext context, RunSettings settings, CancellationToken cancellationToken)
62+
{
63+
var role = settings.Role.ToLowerInvariant() switch
64+
{
65+
"single" => RunRole.Single,
66+
"spider" => RunRole.Spider,
67+
"downloader" => RunRole.Downloader,
68+
_ => RunRole.Single
69+
};
70+
71+
if (role == RunRole.Single || role == RunRole.Spider)
72+
{
73+
if (string.IsNullOrEmpty(settings.SpiderName))
74+
{
75+
Console.WriteLine("Error: Spider name is required for single and spider modes.");
76+
return 1;
77+
}
78+
}
79+
80+
Console.WriteLine("NScrapy CLI v1.0.0");
81+
Console.WriteLine($"Role: {role}");
82+
83+
if (settings.Distributed)
84+
{
85+
Console.WriteLine("Mode: Distributed");
86+
}
87+
else
88+
{
89+
Console.WriteLine("Mode: Local");
90+
}
91+
92+
if (!string.IsNullOrEmpty(settings.RedisEndpoint))
93+
{
94+
Console.WriteLine($"Redis: {settings.RedisEndpoint}");
95+
}
96+
97+
Console.WriteLine();
98+
99+
var config = new RunConfiguration
100+
{
101+
Role = role,
102+
Distributed = settings.Distributed,
103+
RedisEndpoint = settings.RedisEndpoint,
104+
RedisPassword = settings.RedisPassword,
105+
RedisSsl = settings.RedisSsl,
106+
ReceiverQueue = settings.ReceiverQueue,
107+
ResponseQueue = settings.ResponseQueue,
108+
Concurrency = settings.Concurrency,
109+
DelayMs = settings.DelayMs,
110+
ConfigFile = settings.ConfigFile
111+
};
112+
113+
try
114+
{
115+
var runner = new RunnerService(config);
116+
runner.Run(settings.SpiderName);
117+
}
118+
catch (Exception ex)
119+
{
120+
Console.WriteLine($"Error: {ex.Message}");
121+
return 1;
122+
}
123+
124+
return 0;
125+
}
126+
}

0 commit comments

Comments
 (0)