Skip to content

Commit 04d9a29

Browse files
committed
Better failure logging and TLS failure tests
1 parent 857b805 commit 04d9a29

16 files changed

+288
-182
lines changed

README.md

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -157,9 +157,10 @@ For local development and testing without Docker:
157157
Troubleshooting
158158
---------------
159159
- If EPP endpoints are unreachable or not listening on gRPC, you may see `BAD_GATEWAY` when failure mode allow is off. Toggle `*_failure_mode_allow on` to fail-open during testing.
160+
- **Enhanced TLS Error Logging**: The module now provides detailed TLS certificate validation error messages (e.g., "invalid peer certificate: UnknownIssuer") instead of generic transport errors. Check error logs for specific TLS issues like unknown issuers or certificate validation failures.
160161
- Ensure your EPP implementation is configured to return a header mutation for the upstream endpoint. The module will parse response frames and search for `header_mutation` entries.
161162
- BBR processes JSON directly in the module - ensure request bodies contain valid JSON with a "model" field.
162-
- Use `error_log` and debug logging to verify module activation. BBR logs body reading and size limit enforcement; EPP logs gRPC errors. Set `error_log` to `debug` to observe processing details.
163+
- Use `error_log` and debug logging to verify module activation. BBR logs body reading and size limit enforcement; EPP logs gRPC errors with detailed TLS diagnostics. Set `error_log` to `debug` to observe processing details.
163164

164165
Roadmap
165166
-------
@@ -168,6 +169,8 @@ Roadmap
168169
- Validate large body handling and back-pressure for BBR; refine chunked reads/writes and resource usage for very large payloads.
169170
- Connection pooling and caching for improved performance at scale.
170171
- Enhanced TLS configuration options (client certificates, cipher suites, etc.).
172+
-**Completed**: Enhanced TLS error logging with detailed certificate validation messages
173+
-**Completed**: Streamlined test infrastructure with consistent vLLM backend usage
171174

172175
License
173176
-------

src/grpc.rs

Lines changed: 41 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -28,6 +28,26 @@ macro_rules! ngx_log_error_http {
2828
};
2929
}
3030

31+
/// Extract detailed error information from transport errors
32+
fn extract_error_details(error: &tonic::transport::Error) -> String {
33+
// Try to get the root cause error
34+
let mut current_error: &dyn std::error::Error = error;
35+
let mut error_chain = Vec::new();
36+
37+
// Walk the error chain to find the root cause
38+
while let Some(source) = current_error.source() {
39+
error_chain.push(source.to_string());
40+
current_error = source;
41+
}
42+
43+
// If we found a chain, use the most specific error
44+
if !error_chain.is_empty() {
45+
error_chain.last().unwrap().clone()
46+
} else {
47+
error.to_string()
48+
}
49+
}
50+
3151
static RUNTIME: OnceLock<tokio::runtime::Runtime> = OnceLock::new();
3252

3353
fn get_runtime() -> &'static tokio::runtime::Runtime {
@@ -441,17 +461,18 @@ pub fn epp_headers_blocking(
441461
let connect_result = tls_result.connect().await;
442462

443463
connect_result.map_err(|e| {
464+
let detailed_error = extract_error_details(&e);
444465
format!(
445-
"connect error (endpoint: {}, domain: {}): {e}",
446-
endpoint_copy, domain
466+
"TLS connection failed (endpoint: {}, domain: {}): {}",
467+
endpoint_copy, domain, detailed_error
447468
)
448469
})?
449470
} else {
450471
// PLAINTEXT MODE: No TLS configuration
451-
channel_builder
452-
.connect()
453-
.await
454-
.map_err(|e| format!("connect error: {e}"))?
472+
channel_builder.connect().await.map_err(|e| {
473+
let detailed_error = extract_error_details(&e);
474+
format!("HTTP connection failed: {}", detailed_error)
475+
})?
455476
};
456477

457478
let mut client = ExternalProcessorClient::new(channel);
@@ -681,17 +702,18 @@ pub fn epp_headers_async<F>(
681702
.map_err(|e| format!("tls config error: {e}"))?;
682703

683704
tls_result.connect().await.map_err(|e| {
705+
let detailed_error = extract_error_details(&e);
684706
format!(
685-
"connect error (endpoint: {}, domain: {}): {e}",
686-
endpoint, domain
707+
"TLS connection failed (endpoint: {}, domain: {}): {}",
708+
endpoint, domain, detailed_error
687709
)
688710
})?
689711
} else {
690712
// No TLS
691-
channel_builder
692-
.connect()
693-
.await
694-
.map_err(|e| format!("connect error: {e}"))?
713+
channel_builder.connect().await.map_err(|e| {
714+
let detailed_error = extract_error_details(&e);
715+
format!("HTTP connection failed: {}", detailed_error)
716+
})?
695717
};
696718

697719
let mut client = ExternalProcessorClient::new(channel);
@@ -874,17 +896,18 @@ pub async fn epp_headers_blocking_internal(
874896
.map_err(|e| format!("tls config error: {e}"))?;
875897

876898
tls_result.connect().await.map_err(|e| {
899+
let detailed_error = extract_error_details(&e);
877900
format!(
878-
"connect error (endpoint: {}, domain: {}): {e}",
879-
endpoint, domain
901+
"TLS connection failed (endpoint: {}, domain: {}): {}",
902+
endpoint, domain, detailed_error
880903
)
881904
})?
882905
} else {
883906
// No TLS
884-
channel_builder
885-
.connect()
886-
.await
887-
.map_err(|e| format!("connect error: {e}"))?
907+
channel_builder.connect().await.map_err(|e| {
908+
let detailed_error = extract_error_details(&e);
909+
format!("HTTP connection failed: {}", detailed_error)
910+
})?
888911
};
889912

890913
let mut client = ExternalProcessorClient::new(channel);

src/lib.rs

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -688,8 +688,8 @@ http_request_handler!(inference_access_handler, |request: &mut http::Request| {
688688
// Otherwise continue processing
689689
}
690690
core::Status::NGX_ERROR => {
691-
// Other BBR error - always return 502 Bad Gateway
692-
return http::HTTPStatus::BAD_GATEWAY.into();
691+
// Other BBR error - return 500 Internal Server Error
692+
return http::HTTPStatus::INTERNAL_SERVER_ERROR.into();
693693
}
694694
_ => {
695695
// Continue processing
@@ -710,7 +710,7 @@ http_request_handler!(inference_access_handler, |request: &mut http::Request| {
710710
unsafe {
711711
let msg = b"ngx-inference: EPP module processing failed internally\0";
712712
ngx::ffi::ngx_log_error_core(
713-
ngx::ffi::NGX_LOG_INFO as ngx::ffi::ngx_uint_t,
713+
ngx::ffi::NGX_LOG_ERR as ngx::ffi::ngx_uint_t,
714714
request.as_mut().connection.as_ref().unwrap().log,
715715
0,
716716
cstr_ptr(msg.as_ptr()),
@@ -725,10 +725,10 @@ http_request_handler!(inference_access_handler, |request: &mut http::Request| {
725725
(*(*r_ptr).connection).log,
726726
0,
727727
#[allow(clippy::manual_c_str_literals)] // FFI code
728-
cstr_ptr(b"ngx-inference: Module returning HTTP 502 due to EPP processing failure (fail-closed mode)\0".as_ptr()),
728+
cstr_ptr(b"ngx-inference: Module returning HTTP 500 due to EPP processing failure (fail-closed mode)\0".as_ptr()),
729729
);
730730
}
731-
return http::HTTPStatus::BAD_GATEWAY.into();
731+
return http::HTTPStatus::INTERNAL_SERVER_ERROR.into();
732732
}
733733
}
734734
_ => {

src/modules/epp.rs

Lines changed: 1 addition & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -213,12 +213,7 @@ impl EppProcessor {
213213
}
214214
}
215215
}
216-
Err(err) => {
217-
ngx_log_info_http!(
218-
request,
219-
"ngx-inference: EPP external service error: {}",
220-
err
221-
);
216+
Err(_err) => {
222217
return Err("epp grpc error");
223218
}
224219
}

tests/README.md

Lines changed: 6 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -9,10 +9,11 @@ This directory contains test scripts and utilities for validating the ngx-infere
99
Main test runner that validates BBR (Body-Based Routing) and EPP (External Processing Pipeline) module configurations. This script supports three testing environments:
1010

1111
**Configuration Test Matrix:**
12-
- **BBR ON + EPP OFF**: Tests model extraction only
13-
- **BBR OFF + EPP ON**: Tests upstream selection only
14-
- **BBR ON + EPP ON**: Tests both modules active
15-
- **BBR OFF + EPP OFF**: Tests no processing (baseline)
12+
- **BBR ON + EPP OFF**: Tests model extraction with vLLM backend
13+
- **BBR OFF + EPP ON**: Tests upstream selection with EPP routing to vLLM
14+
- **BBR ON + EPP ON**: Tests both modules active with vLLM responses
15+
- **BBR OFF + EPP OFF**: Tests direct vLLM routing (no processing)
16+
- **EPP Untrusted TLS**: Tests TLS certificate validation with enhanced error logging
1617

1718
**Execution Modes:**
1819
- **Local Mode**: Uses locally compiled nginx module with local backend services
@@ -39,7 +40,7 @@ DOCKER_ENVIRONMENT=main ./tests/test-config.sh # Docker mode
3940
Main Docker Compose configuration providing the test environment:
4041

4142
- **nginx**: Main web server with ngx-inference module
42-
- **echo-server**: Node.js service for request inspection and header validation
43+
- **echo-server**: Node.js service for request inspection and header validation (Docker/local testing only)
4344
- **mock-extproc**: Mock gRPC External Processing service for EPP module testing
4445

4546
### Kind Testing Environment (`kind-ngf/`)

tests/configs/bbr_off_epp_off.conf

Lines changed: 7 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -4,11 +4,11 @@ server {
44
server_name localhost;
55

66
# Global default upstream for all inference failures
7-
inference_default_upstream "127.0.0.1:8080";
7+
inference_default_upstream "vllm-llama3-8b-instruct.ngx-inference-test.svc.cluster.local:8000";
88

99
# vLLM Chat Completions API (both BBR and EPP disabled)
1010
location /v1/chat/completions {
11-
set $backend "echo-server.ngx-inference-test.svc.cluster.local:80";
11+
set $backend "vllm-llama3-8b-instruct.ngx-inference-test.svc.cluster.local:8000";
1212
proxy_set_header Host $host;
1313
proxy_set_header X-Real-IP $remote_addr;
1414
proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for;
@@ -17,7 +17,7 @@ server {
1717

1818
# vLLM Models API (both BBR and EPP disabled)
1919
location /v1/models {
20-
set $backend "echo-server.ngx-inference-test.svc.cluster.local:80";
20+
set $backend "vllm-llama3-8b-instruct.ngx-inference-test.svc.cluster.local:8000";
2121
proxy_set_header Host $host;
2222
proxy_set_header X-Real-IP $remote_addr;
2323
proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for;
@@ -27,7 +27,7 @@ server {
2727
# Both BBR and EPP disabled
2828
location /bbr-test {
2929
# BBR disabled - just proxy without BBR processing
30-
set $backend "127.0.0.1:8080";
30+
set $backend "vllm-llama3-8b-instruct.ngx-inference-test.svc.cluster.local:8000";
3131
proxy_set_header Host $host;
3232
proxy_set_header X-Real-IP $remote_addr;
3333
proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for;
@@ -36,7 +36,7 @@ server {
3636

3737
location /epp-test {
3838
# EPP disabled - just proxy without EPP processing
39-
set $backend "127.0.0.1:8080";
39+
set $backend "vllm-llama3-8b-instruct.ngx-inference-test.svc.cluster.local:8000";
4040
proxy_set_header Host $host;
4141
proxy_set_header X-Real-IP $remote_addr;
4242
proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for;
@@ -45,7 +45,7 @@ server {
4545

4646
location /responses {
4747
# Both BBR and EPP disabled
48-
set $backend "127.0.0.1:8080";
48+
set $backend "vllm-llama3-8b-instruct.ngx-inference-test.svc.cluster.local:8000";
4949
proxy_set_header Host $host;
5050
proxy_set_header X-Real-IP $remote_addr;
5151
proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for;
@@ -58,7 +58,7 @@ server {
5858
}
5959

6060
location / {
61-
set $backend "127.0.0.1:8080";
61+
set $backend "vllm-llama3-8b-instruct.ngx-inference-test.svc.cluster.local:8000";
6262
proxy_set_header Host $host;
6363
proxy_set_header X-Real-IP $remote_addr;
6464
proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for;

tests/configs/bbr_off_epp_on.conf

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,7 @@ server {
44
server_name localhost;
55

66
# Global default upstream for all inference failures
7-
inference_default_upstream "127.0.0.1:8080";
7+
inference_default_upstream "vllm-llama3-8b-instruct.ngx-inference-test.svc.cluster.local:8000";
88

99
# vLLM Chat Completions API with EPP
1010
location /v1/chat/completions {
@@ -39,7 +39,7 @@ server {
3939
# BBR disabled, EPP enabled
4040
location /bbr-test {
4141
# BBR disabled - just proxy without BBR processing
42-
set $backend "127.0.0.1:8080";
42+
set $backend "vllm-llama3-8b-instruct.ngx-inference-test.svc.cluster.local:8000";
4343
proxy_set_header Host $host;
4444
proxy_set_header X-Real-IP $remote_addr;
4545
proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for;
@@ -81,7 +81,7 @@ server {
8181
}
8282

8383
location / {
84-
set $backend "127.0.0.1:8080";
84+
set $backend "vllm-llama3-8b-instruct.ngx-inference-test.svc.cluster.local:8000";
8585
proxy_set_header Host $host;
8686
proxy_set_header X-Real-IP $remote_addr;
8787
proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for;
Lines changed: 85 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,85 @@
1+
server {
2+
listen 8081;
3+
listen 80;
4+
server_name localhost;
5+
6+
# Global default upstream for all inference failures - use vLLM server for untrusted TLS test
7+
inference_default_upstream "vllm-llama3-8b-instruct.ngx-inference-test.svc.cluster.local:8000";
8+
9+
# vLLM Chat Completions API with EPP - Untrusted TLS with failure mode allow
10+
location /v1/chat/completions {
11+
inference_epp on;
12+
inference_epp_endpoint "vllm-llama3-8b-instruct-epp:9002";
13+
inference_epp_timeout_ms 5000;
14+
inference_epp_failure_mode_allow on;
15+
inference_epp_tls on;
16+
# Note: No inference_epp_ca_file directive - causes untrusted TLS
17+
inference_epp_header_name "x-gateway-destination-endpoint";
18+
19+
proxy_set_header Host $host;
20+
proxy_set_header X-Real-IP $remote_addr;
21+
proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for;
22+
proxy_pass http://$inference_upstream;
23+
}
24+
25+
# vLLM Models API with EPP - Untrusted TLS with failure mode allow
26+
location /v1/models {
27+
inference_epp on;
28+
inference_epp_endpoint "vllm-llama3-8b-instruct-epp:9002";
29+
inference_epp_timeout_ms 5000;
30+
inference_epp_failure_mode_allow on;
31+
inference_epp_tls on;
32+
# Note: No inference_epp_ca_file directive - causes untrusted TLS
33+
inference_epp_header_name "x-gateway-destination-endpoint";
34+
35+
proxy_set_header Host $host;
36+
proxy_set_header X-Real-IP $remote_addr;
37+
proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for;
38+
proxy_pass http://$inference_upstream;
39+
}
40+
41+
# EPP test endpoint with untrusted TLS and failure mode allow
42+
location /epp-test {
43+
inference_epp on;
44+
inference_epp_endpoint "vllm-llama3-8b-instruct-epp:9002";
45+
inference_epp_timeout_ms 5000;
46+
inference_epp_failure_mode_allow on;
47+
inference_epp_tls on;
48+
# Note: No inference_epp_ca_file directive - causes untrusted TLS
49+
inference_epp_header_name "x-gateway-destination-endpoint";
50+
51+
proxy_set_header Host $host;
52+
proxy_set_header X-Real-IP $remote_addr;
53+
proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for;
54+
proxy_pass http://$inference_upstream;
55+
}
56+
57+
location /responses {
58+
# EPP enabled with untrusted TLS and failure mode allow
59+
inference_epp on;
60+
inference_epp_endpoint "vllm-llama3-8b-instruct-epp:9002";
61+
inference_epp_timeout_ms 5000;
62+
inference_epp_failure_mode_allow on;
63+
inference_epp_tls on;
64+
# Note: No inference_epp_ca_file directive - causes untrusted TLS
65+
inference_epp_header_name "x-gateway-destination-endpoint";
66+
67+
proxy_set_header Host $host;
68+
proxy_set_header X-Real-IP $remote_addr;
69+
proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for;
70+
proxy_pass http://$inference_upstream;
71+
}
72+
73+
location /health {
74+
return 200 "OK\n";
75+
add_header Content-Type text/plain;
76+
}
77+
78+
location / {
79+
set $backend "vllm-llama3-8b-instruct.ngx-inference-test.svc.cluster.local:8000";
80+
proxy_set_header Host $host;
81+
proxy_set_header X-Real-IP $remote_addr;
82+
proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for;
83+
proxy_pass http://$backend;
84+
}
85+
}

0 commit comments

Comments
 (0)