Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
65 changes: 63 additions & 2 deletions client.go
Original file line number Diff line number Diff line change
Expand Up @@ -13,9 +13,62 @@ type Error struct {
Func string
}

// ProxyNetwork defines the network layer (IPv4/IPv6) a proxy can support
type ProxyNetwork int

const (
// ProxyNetworkUnset is the zero value and must not be used - forces explicit selection
ProxyNetworkUnset ProxyNetwork = iota
// ProxyNetworkAny means the proxy can be used for both IPv4 and IPv6 connections
ProxyNetworkAny
// ProxyNetworkIPv4 means the proxy should only be used for IPv4 connections
ProxyNetworkIPv4
// ProxyNetworkIPv6 means the proxy should only be used for IPv6 connections
ProxyNetworkIPv6
)
Comment on lines +16 to +28
Copy link
Copy Markdown
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Love to see the golang-style enum pattern being used here!


// ProxyType defines the infrastructure type of a proxy
type ProxyType int

const (
// ProxyTypeAny means the proxy can be used for any type of request
ProxyTypeAny ProxyType = iota
// ProxyTypeMobile means the proxy uses mobile network infrastructure
ProxyTypeMobile
// ProxyTypeResidential means the proxy uses residential IP addresses
ProxyTypeResidential
// ProxyTypeDatacenter means the proxy uses datacenter infrastructure
ProxyTypeDatacenter
)
Comment on lines +30 to +42
Copy link
Copy Markdown
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

To me, it seems that ProxyTypeAny represents something different than the other ProxyTypes because it describes the traffic's preference, where as the other types describe the kind of infrastructure the proxy uses
I think part of my confusion is that I understand you might want to send a URL to a proxy with residential IPs, or you might want to send a URL to a proxy with datacenter IPs, but ProxyTypeAny isn't a kind of proxy but is actually a decision that you don't care which proxy type you send the data to
So I'm confused under what condition you would define a proxy as ProxyTypeAny?


// ProxyConfig defines the configuration for a single proxy
type ProxyConfig struct {
// URL is the proxy URL (e.g., "socks5://proxy.example.com:1080")
URL string
// Network specifies if this proxy supports IPv4, IPv6, or both
Network ProxyNetwork
// Type specifies the infrastructure type (Mobile, Residential, Datacenter, or Any)
Type ProxyType
// AllowedDomains is a list of glob patterns for domains this proxy should handle
// Examples: "*.example.com", "api.*.org"
// If empty, the proxy can be used for any domain
AllowedDomains []string
}

// ProxyStats holds statistics for a single proxy
type ProxyStats struct {
// RequestCount is the total number of requests made through this proxy
RequestCount atomic.Int64
// ErrorCount is the number of failed requests/connections through this proxy
ErrorCount atomic.Int64
// LastUsed is when this proxy was last selected (Unix nanoseconds)
LastUsed atomic.Int64
}

type HTTPClientSettings struct {
RotatorSettings *RotatorSettings
Proxy string
Proxies []ProxyConfig
AllowDirectFallback bool
TempDir string
DiscardHook DiscardHook
DNSServers []string
Expand Down Expand Up @@ -73,6 +126,9 @@ type CustomHTTPClient struct {
CDXDedupeTotal *atomic.Int64
DoppelgangerDedupeTotal *atomic.Int64
LocalDedupeTotal *atomic.Int64

// ProxyStats holds per-proxy statistics, keyed by proxy URL
ProxyStats map[string]*ProxyStats
}

func (c *CustomHTTPClient) Close() error {
Expand Down Expand Up @@ -103,6 +159,11 @@ func (c *CustomHTTPClient) Close() error {
return nil
}

// GetProxyStats returns a copy of the per-proxy statistics map
func (c *CustomHTTPClient) GetProxyStats() map[string]*ProxyStats {
return c.ProxyStats
}

func NewWARCWritingHTTPClient(HTTPClientSettings HTTPClientSettings) (httpClient *CustomHTTPClient, err error) {
httpClient = new(CustomHTTPClient)

Expand Down Expand Up @@ -216,7 +277,7 @@ func NewWARCWritingHTTPClient(HTTPClientSettings HTTPClientSettings) (httpClient
httpClient.ConnReadDeadline = HTTPClientSettings.ConnReadDeadline

// Configure custom dialer / transport
customDialer, err := newCustomDialer(httpClient, HTTPClientSettings.Proxy, HTTPClientSettings.DialTimeout, HTTPClientSettings.DNSRecordsTTL, HTTPClientSettings.DNSResolutionTimeout, HTTPClientSettings.DNSCacheSize, HTTPClientSettings.DNSServers, HTTPClientSettings.DNSConcurrency, HTTPClientSettings.DisableIPv4, HTTPClientSettings.DisableIPv6)
customDialer, err := newCustomDialer(httpClient, HTTPClientSettings.Proxies, HTTPClientSettings.AllowDirectFallback, HTTPClientSettings.DialTimeout, HTTPClientSettings.DNSRecordsTTL, HTTPClientSettings.DNSResolutionTimeout, HTTPClientSettings.DNSCacheSize, HTTPClientSettings.DNSServers, HTTPClientSettings.DNSConcurrency, HTTPClientSettings.DisableIPv4, HTTPClientSettings.DisableIPv6)
if err != nil {
return nil, err
}
Expand Down
8 changes: 7 additions & 1 deletion client_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -688,7 +688,13 @@ func TestHTTPClientWithProxy(t *testing.T) {
// init the HTTP client responsible for recording HTTP(s) requests / responses
httpClient, err := NewWARCWritingHTTPClient(HTTPClientSettings{
RotatorSettings: rotatorSettings,
Proxy: fmt.Sprintf("socks5://%s", proxyAddr)})
Proxies: []ProxyConfig{
{
URL: fmt.Sprintf("socks5://%s", proxyAddr),
Network: ProxyNetworkAny,
Type: ProxyTypeAny,
},
}})
if err != nil {
t.Fatalf("Unable to init WARC writing HTTP client: %s", err)
}
Expand Down
Loading
Loading