Skip to content

Commit 0618488

Browse files
authored
[ENH] Make HTTP client pooling configuration configurable (#63)
* feat: make HTTP client pooling configuration configurable - Add HTTPMaxIdleConns, HTTPMaxIdleConnsPerHost, HTTPIdleTimeout fields to HFConfig - Support environment variables: HF_HTTP_MAX_IDLE_CONNS, HF_HTTP_MAX_IDLE_CONNS_PER_HOST, HF_HTTP_IDLE_TIMEOUT - Priority: config fields override env vars, both override defaults (100/10/90s) - Add comprehensive tests for all configuration scenarios - Maintain backward compatibility with sensible defaults Closes #52 * refactor: improve HTTP pooling configuration implementation - Extract default values as named constants for better maintainability - Update tests to use constants instead of hardcoded values - Add comprehensive documentation in HFConfig struct - Update CLAUDE.md with new environment variables documentation - Improve code clarity and consistency Per code review suggestions in PR #63 * feat: add validation and improve HTTP pooling configuration - Add validation to ensure maxIdleConns >= maxIdleConnsPerHost for logical consistency - Enforce reasonable upper bounds (1000/100) to prevent resource exhaustion - Extract env var parsing logic into reusable helper functions (getEnvInt, getEnvDuration) - Add comprehensive documentation about performance trade-offs and recommendations - Add test coverage for validation logic and edge cases Per code review suggestions in PR #63 * feat: add logging and improve thread-safety documentation - Add debug logging (DEBUG=1) to show actual HTTP pooling configuration values - Log warnings for invalid environment variable formats to help debugging - Document thread-safety: HTTP client initialized once per process via sync.Once - Add clear documentation that config changes after first use won't take effect - Improve user experience with better visibility into configuration Per final code review suggestions in PR #63 * refactor: improve configuration robustness and visibility - Extract magic numbers to named constants (maxAllowedIdleConns=1000, maxAllowedIdleConnsPerHost=100) - Always log configuration warnings (not just in DEBUG mode) to help users debug issues - Log all validation adjustments for better visibility into automatic corrections - Add comprehensive unit tests for getEnvInt and getEnvDuration helper functions - Improve error messages with detailed context about what went wrong - Update test assertions to use named constants Per final code review suggestions in PR #63
1 parent 66f258d commit 0618488

3 files changed

Lines changed: 509 additions & 21 deletions

File tree

CLAUDE.md

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -176,6 +176,9 @@ For detailed cache structure and management, see `docs/CACHE_MANAGEMENT.md`.
176176
- `GITHUB_TOKEN` or `GH_TOKEN`: GitHub authentication for API requests
177177
- `HF_TOKEN`: HuggingFace authentication token for private/gated models
178178
- `HF_HUB_CACHE`: Override HuggingFace cache directory
179+
- `HF_HTTP_MAX_IDLE_CONNS`: Maximum number of idle HTTP connections (default: 100)
180+
- `HF_HTTP_MAX_IDLE_CONNS_PER_HOST`: Maximum idle connections per host (default: 10)
181+
- `HF_HTTP_IDLE_TIMEOUT`: Idle connection timeout duration, e.g., "2m30s" (default: 90s)
179182

180183
## Error Handling
181184

huggingface.go

Lines changed: 149 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,7 @@ import (
55
"encoding/json"
66
"fmt"
77
"io"
8+
"log"
89
"math/rand"
910
"net"
1011
"net/http"
@@ -26,6 +27,16 @@ const (
2627
// HFMaxRetryAfterDelay caps the maximum delay from Retry-After headers
2728
// to prevent excessive waits from misconfigured or malicious servers
2829
HFMaxRetryAfterDelay = 5 * time.Minute
30+
31+
// HTTP connection pooling defaults
32+
defaultMaxIdleConns = 100
33+
defaultMaxIdleConnsPerHost = 10
34+
defaultIdleTimeout = 90 * time.Second
35+
36+
// HTTP connection pooling maximum bounds
37+
// These limits prevent resource exhaustion from misconfiguration
38+
maxAllowedIdleConns = 1000 // Maximum total idle connections across all hosts
39+
maxAllowedIdleConnsPerHost = 100 // Maximum idle connections per individual host
2940
)
3041

3142
var (
@@ -49,22 +60,127 @@ func SetLibraryVersion(version string) {
4960
}
5061
}
5162

52-
// initHFHTTPClient initializes the shared HTTP client with connection pooling
53-
func initHFHTTPClient() {
63+
// getEnvInt retrieves an integer value from environment variable
64+
func getEnvInt(key string, defaultValue int) int {
65+
if envVal := os.Getenv(key); envVal != "" {
66+
if val, err := strconv.Atoi(envVal); err == nil && val > 0 {
67+
return val
68+
} else if err != nil {
69+
// Always log warning for invalid configuration to help users debug
70+
log.Printf("[WARNING] Invalid integer value for %s: '%s' (error: %v), using default: %d\n",
71+
key, envVal, err, defaultValue)
72+
} else if val <= 0 {
73+
// Log warning for non-positive values
74+
log.Printf("[WARNING] Non-positive value for %s: %d, using default: %d\n",
75+
key, val, defaultValue)
76+
}
77+
}
78+
return defaultValue
79+
}
80+
81+
// getEnvDuration retrieves a duration value from environment variable
82+
func getEnvDuration(key string, defaultValue time.Duration) time.Duration {
83+
if envVal := os.Getenv(key); envVal != "" {
84+
if val, err := time.ParseDuration(envVal); err == nil && val > 0 {
85+
return val
86+
} else if err != nil {
87+
// Always log warning for invalid configuration to help users debug
88+
log.Printf("[WARNING] Invalid duration value for %s: '%s' (error: %v), using default: %v\n",
89+
key, envVal, err, defaultValue)
90+
} else if val <= 0 {
91+
// Log warning for non-positive durations
92+
log.Printf("[WARNING] Non-positive duration for %s: %v, using default: %v\n",
93+
key, val, defaultValue)
94+
}
95+
}
96+
return defaultValue
97+
}
98+
99+
// validateHTTPPoolingConfig validates and adjusts HTTP pooling configuration for logical consistency
100+
func validateHTTPPoolingConfig(maxIdleConns, maxIdleConnsPerHost int) (int, int) {
101+
originalMaxIdleConns := maxIdleConns
102+
originalMaxIdleConnsPerHost := maxIdleConnsPerHost
103+
104+
// Ensure maxIdleConns is at least as large as maxIdleConnsPerHost
105+
// This is logical since total idle connections should be >= per-host idle connections
106+
if maxIdleConns < maxIdleConnsPerHost {
107+
maxIdleConns = maxIdleConnsPerHost
108+
// Always log this important logical adjustment
109+
log.Printf("[WARNING] HTTPMaxIdleConns (%d) was less than HTTPMaxIdleConnsPerHost (%d), adjusted to %d for consistency",
110+
originalMaxIdleConns, maxIdleConnsPerHost, maxIdleConns)
111+
}
112+
113+
// Ensure reasonable bounds to prevent resource exhaustion
114+
if maxIdleConns > maxAllowedIdleConns {
115+
maxIdleConns = maxAllowedIdleConns
116+
log.Printf("[WARNING] HTTPMaxIdleConns (%d) exceeds maximum allowed (%d), capped to prevent resource exhaustion",
117+
originalMaxIdleConns, maxAllowedIdleConns)
118+
}
119+
if maxIdleConnsPerHost > maxAllowedIdleConnsPerHost {
120+
maxIdleConnsPerHost = maxAllowedIdleConnsPerHost
121+
log.Printf("[WARNING] HTTPMaxIdleConnsPerHost (%d) exceeds maximum allowed (%d), capped to prevent resource exhaustion",
122+
originalMaxIdleConnsPerHost, maxAllowedIdleConnsPerHost)
123+
}
124+
125+
return maxIdleConns, maxIdleConnsPerHost
126+
}
127+
128+
// initHFHTTPClient initializes the shared HTTP client with connection pooling.
129+
// NOTE: Due to thread-safety via sync.Once, configuration changes after the first
130+
// client initialization will not take effect. The HTTP client is initialized once
131+
// per process lifetime.
132+
func initHFHTTPClient(config *HFConfig) {
54133
hfClientOnce.Do(func() {
134+
// Apply configuration with priority: config fields > env vars > defaults
135+
maxIdleConns := config.HTTPMaxIdleConns
136+
if maxIdleConns == 0 {
137+
maxIdleConns = getEnvInt("HF_HTTP_MAX_IDLE_CONNS", defaultMaxIdleConns)
138+
}
139+
140+
maxIdleConnsPerHost := config.HTTPMaxIdleConnsPerHost
141+
if maxIdleConnsPerHost == 0 {
142+
maxIdleConnsPerHost = getEnvInt("HF_HTTP_MAX_IDLE_CONNS_PER_HOST", defaultMaxIdleConnsPerHost)
143+
}
144+
145+
// Store original values for logging
146+
originalMaxIdleConns := maxIdleConns
147+
originalMaxIdleConnsPerHost := maxIdleConnsPerHost
148+
149+
// Validate and adjust configuration for logical consistency
150+
maxIdleConns, maxIdleConnsPerHost = validateHTTPPoolingConfig(maxIdleConns, maxIdleConnsPerHost)
151+
152+
idleTimeout := config.HTTPIdleTimeout
153+
if idleTimeout == 0 {
154+
idleTimeout = getEnvDuration("HF_HTTP_IDLE_TIMEOUT", defaultIdleTimeout)
155+
}
156+
157+
// Log final configuration in debug mode
158+
if os.Getenv("DEBUG") != "" {
159+
log.Printf("[DEBUG] HTTP Client Configuration:\n")
160+
log.Printf(" MaxIdleConns: %d", maxIdleConns)
161+
if originalMaxIdleConns != maxIdleConns {
162+
log.Printf(" (adjusted from %d for consistency)", originalMaxIdleConns)
163+
}
164+
log.Printf(" MaxIdleConnsPerHost: %d", maxIdleConnsPerHost)
165+
if originalMaxIdleConnsPerHost != maxIdleConnsPerHost {
166+
log.Printf(" (adjusted from %d due to bounds)", originalMaxIdleConnsPerHost)
167+
}
168+
log.Printf(" IdleTimeout: %v", idleTimeout)
169+
}
170+
55171
transport := &http.Transport{
56172
Proxy: http.ProxyFromEnvironment,
57173
DialContext: (&net.Dialer{
58174
Timeout: 30 * time.Second,
59175
KeepAlive: 30 * time.Second,
60176
}).DialContext,
61177
ForceAttemptHTTP2: true,
62-
MaxIdleConns: 100,
63-
MaxIdleConnsPerHost: 10,
64-
// IdleConnTimeout: 90s is suitable for long-running processes that may
178+
MaxIdleConns: maxIdleConns,
179+
MaxIdleConnsPerHost: maxIdleConnsPerHost,
180+
// IdleConnTimeout is suitable for long-running processes that may
65181
// have periods of inactivity between downloads. For short scripts that
66182
// exit quickly, connections will be closed automatically on program exit.
67-
IdleConnTimeout: 90 * time.Second,
183+
IdleConnTimeout: idleTimeout,
68184
TLSHandshakeTimeout: 10 * time.Second,
69185
ExpectContinueTimeout: 1 * time.Second,
70186
}
@@ -78,8 +194,8 @@ func initHFHTTPClient() {
78194
}
79195

80196
// getHFHTTPClient returns the shared HTTP client for HuggingFace downloads
81-
func getHFHTTPClient() *http.Client {
82-
initHFHTTPClient()
197+
func getHFHTTPClient(config *HFConfig) *http.Client {
198+
initHFHTTPClient(config)
83199
return hfHTTPClient
84200
}
85201

@@ -91,6 +207,30 @@ type HFConfig struct {
91207
Timeout time.Duration
92208
MaxRetries int
93209
OfflineMode bool
210+
211+
// HTTP client pooling configuration
212+
// These settings control connection reuse for improved performance.
213+
// Config fields take priority over environment variables.
214+
//
215+
// IMPORTANT: The HTTP client is initialized once per process using sync.Once.
216+
// Changes to these configuration values after the first HuggingFace download
217+
// will NOT take effect. Set these values before any HuggingFace operations.
218+
//
219+
// Performance trade-offs:
220+
// - Higher values: Better connection reuse, reduced latency for subsequent requests, but increased memory usage
221+
// - Lower values: Reduced memory footprint, but more connection establishment overhead
222+
//
223+
// Recommended configurations:
224+
// - High-throughput services: Increase HTTPMaxIdleConnsPerHost (e.g., 20-50) for parallel downloads
225+
// - Resource-constrained environments: Reduce both values (e.g., 50/5) to minimize memory usage
226+
// - Short-lived scripts: Reduce HTTPIdleTimeout (e.g., 10s) to release resources quickly
227+
//
228+
// Note: HTTPMaxIdleConns will be automatically adjusted to be >= HTTPMaxIdleConnsPerHost for logical consistency
229+
//
230+
// Debug mode: Set DEBUG=1 environment variable to see actual configuration values being used
231+
HTTPMaxIdleConns int // Maximum idle connections across all hosts (env: HF_HTTP_MAX_IDLE_CONNS, default: 100, max: 1000)
232+
HTTPMaxIdleConnsPerHost int // Maximum idle connections per host (env: HF_HTTP_MAX_IDLE_CONNS_PER_HOST, default: 10, max: 100)
233+
HTTPIdleTimeout time.Duration // How long to keep idle connections open (env: HF_HTTP_IDLE_TIMEOUT, default: 90s)
94234
}
95235

96236
// FromHuggingFace loads a tokenizer from HuggingFace Hub using the model identifier.
@@ -305,7 +445,7 @@ func downloadWithRetryAndResponse(url string, config *HFConfig) ([]byte, *http.R
305445
}
306446

307447
// Use the shared HTTP client with connection pooling
308-
client := getHFHTTPClient()
448+
client := getHFHTTPClient(config)
309449
resp, err := client.Do(req)
310450
if err != nil {
311451
return nil, nil, errors.Wrap(err, "request failed")

0 commit comments

Comments
 (0)