-
Notifications
You must be signed in to change notification settings - Fork 12
Add multi-proxy support & granular proxy selection #160
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: master
Are you sure you want to change the base?
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -13,9 +13,62 @@ type Error struct { | |
| Func string | ||
| } | ||
|
|
||
| // ProxyNetwork defines the network layer (IPv4/IPv6) a proxy can support | ||
| type ProxyNetwork int | ||
|
|
||
| const ( | ||
| // ProxyNetworkUnset is the zero value and must not be used - forces explicit selection | ||
| ProxyNetworkUnset ProxyNetwork = iota | ||
| // ProxyNetworkAny means the proxy can be used for both IPv4 and IPv6 connections | ||
| ProxyNetworkAny | ||
| // ProxyNetworkIPv4 means the proxy should only be used for IPv4 connections | ||
| ProxyNetworkIPv4 | ||
| // ProxyNetworkIPv6 means the proxy should only be used for IPv6 connections | ||
| ProxyNetworkIPv6 | ||
| ) | ||
|
|
||
| // ProxyType defines the infrastructure type of a proxy | ||
| type ProxyType int | ||
|
|
||
| const ( | ||
| // ProxyTypeAny means the proxy can be used for any type of request | ||
| ProxyTypeAny ProxyType = iota | ||
| // ProxyTypeMobile means the proxy uses mobile network infrastructure | ||
| ProxyTypeMobile | ||
| // ProxyTypeResidential means the proxy uses residential IP addresses | ||
| ProxyTypeResidential | ||
| // ProxyTypeDatacenter means the proxy uses datacenter infrastructure | ||
| ProxyTypeDatacenter | ||
| ) | ||
|
Comment on lines
+30
to
+42
Collaborator
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. To me, it seems that ProxyTypeAny represents something different than the other ProxyTypes because it describes the traffic's preference, where as the other types describe the kind of infrastructure the proxy uses |
||
|
|
||
| // ProxyConfig defines the configuration for a single proxy | ||
| type ProxyConfig struct { | ||
| // URL is the proxy URL (e.g., "socks5://proxy.example.com:1080") | ||
| URL string | ||
| // Network specifies if this proxy supports IPv4, IPv6, or both | ||
| Network ProxyNetwork | ||
| // Type specifies the infrastructure type (Mobile, Residential, Datacenter, or Any) | ||
| Type ProxyType | ||
| // AllowedDomains is a list of glob patterns for domains this proxy should handle | ||
| // Examples: "*.example.com", "api.*.org" | ||
| // If empty, the proxy can be used for any domain | ||
| AllowedDomains []string | ||
| } | ||
|
|
||
| // ProxyStats holds statistics for a single proxy | ||
| type ProxyStats struct { | ||
| // RequestCount is the total number of requests made through this proxy | ||
| RequestCount atomic.Int64 | ||
| // ErrorCount is the number of failed requests/connections through this proxy | ||
| ErrorCount atomic.Int64 | ||
| // LastUsed is when this proxy was last selected (Unix nanoseconds) | ||
| LastUsed atomic.Int64 | ||
| } | ||
|
|
||
| type HTTPClientSettings struct { | ||
| RotatorSettings *RotatorSettings | ||
| Proxy string | ||
| Proxies []ProxyConfig | ||
| AllowDirectFallback bool | ||
| TempDir string | ||
| DiscardHook DiscardHook | ||
| DNSServers []string | ||
|
|
@@ -73,6 +126,9 @@ type CustomHTTPClient struct { | |
| CDXDedupeTotal *atomic.Int64 | ||
| DoppelgangerDedupeTotal *atomic.Int64 | ||
| LocalDedupeTotal *atomic.Int64 | ||
|
|
||
| // ProxyStats holds per-proxy statistics, keyed by proxy URL | ||
| ProxyStats map[string]*ProxyStats | ||
| } | ||
|
|
||
| func (c *CustomHTTPClient) Close() error { | ||
|
|
@@ -103,6 +159,11 @@ func (c *CustomHTTPClient) Close() error { | |
| return nil | ||
| } | ||
|
|
||
| // GetProxyStats returns a copy of the per-proxy statistics map | ||
| func (c *CustomHTTPClient) GetProxyStats() map[string]*ProxyStats { | ||
| return c.ProxyStats | ||
| } | ||
|
|
||
| func NewWARCWritingHTTPClient(HTTPClientSettings HTTPClientSettings) (httpClient *CustomHTTPClient, err error) { | ||
| httpClient = new(CustomHTTPClient) | ||
|
|
||
|
|
@@ -216,7 +277,7 @@ func NewWARCWritingHTTPClient(HTTPClientSettings HTTPClientSettings) (httpClient | |
| httpClient.ConnReadDeadline = HTTPClientSettings.ConnReadDeadline | ||
|
|
||
| // Configure custom dialer / transport | ||
| customDialer, err := newCustomDialer(httpClient, HTTPClientSettings.Proxy, HTTPClientSettings.DialTimeout, HTTPClientSettings.DNSRecordsTTL, HTTPClientSettings.DNSResolutionTimeout, HTTPClientSettings.DNSCacheSize, HTTPClientSettings.DNSServers, HTTPClientSettings.DNSConcurrency, HTTPClientSettings.DisableIPv4, HTTPClientSettings.DisableIPv6) | ||
| customDialer, err := newCustomDialer(httpClient, HTTPClientSettings.Proxies, HTTPClientSettings.AllowDirectFallback, HTTPClientSettings.DialTimeout, HTTPClientSettings.DNSRecordsTTL, HTTPClientSettings.DNSResolutionTimeout, HTTPClientSettings.DNSCacheSize, HTTPClientSettings.DNSServers, HTTPClientSettings.DNSConcurrency, HTTPClientSettings.DisableIPv4, HTTPClientSettings.DisableIPv6) | ||
| if err != nil { | ||
| return nil, err | ||
| } | ||
|
|
||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Love to see the golang-style enum pattern being used here!