From 241a9166d79ce1a93d1929d7a4f63d4f0c180ec0 Mon Sep 17 00:00:00 2001 From: Scott Stults Date: Fri, 1 Aug 2025 16:24:43 -0400 Subject: [PATCH 1/9] Adding design doc for remote query capability Signed-off-by: Scott Stults --- docs/feature-design/remote-query.md | 830 ++++++++++++++++++++++++++++ 1 file changed, 830 insertions(+) create mode 100644 docs/feature-design/remote-query.md diff --git a/docs/feature-design/remote-query.md b/docs/feature-design/remote-query.md new file mode 100644 index 00000000..fcfa01db --- /dev/null +++ b/docs/feature-design/remote-query.md @@ -0,0 +1,830 @@ +# Remote Query Feature Design + +> **Target Audience**: Development teams building features and enhancements for the search-relevance plugin. + +## Introduction + +This document outlines the design and implementation of the Remote Query feature for the OpenSearch Search Relevance plugin. The Remote Query feature enables search relevance experiments to execute queries against remote search engines and OpenSearch clusters, allowing for comprehensive cross-platform search evaluation and comparison. + +## Problem Statement + +Search relevance evaluation is critical for maintaining and improving search quality in OpenSearch deployments. Organizations often need to compare search performance across different configurations, algorithms, or entirely different search engines to make informed decisions about their search infrastructure. + +**Key Problems Addressed:** +- **Limited Evaluation Scope**: Current Search Relevance plugin only evaluates within a single OpenSearch cluster +- **Multi-vendor Comparison**: Organizations need to compare OpenSearch against other search engines (Solr, Elasticsearch, proprietary solutions) +- **Migration Validation**: Teams migrating to OpenSearch need to validate equivalent or better search relevance compared to legacy systems +- **A/B Testing Across Environments**: Need to compare search results using identical evaluation criteria across different systems +- **Cross-Cluster Analysis**: Large organizations with multiple OpenSearch clusters need unified performance comparison + +**Impact of Not Implementing:** +- Organizations forced to build custom integration solutions for each search engine +- Manual export/import workflows that are error-prone and time-consuming +- Inconsistent evaluation methodologies reducing comparison validity +- Incomplete evaluation coverage when assessing search engine alternatives + +**Primary Users/Stakeholders:** +- Search engineers evaluating different search technologies +- DevOps teams managing search infrastructure migrations +- Product teams conducting A/B tests across search systems +- Organizations with hybrid search architectures + +**Alignment with OpenSearch Goals:** +- Enhances OpenSearch's position as a comprehensive search platform +- Provides tools for objective search engine evaluation +- Supports migration and adoption workflows +- Extends plugin capabilities beyond single-cluster limitations + +## Use Cases + +### Required Use Cases +1. **Multi-vendor Search Engine Evaluation** - Execute identical query sets against OpenSearch and competitor search engines (Solr, Elasticsearch) for objective relevance comparison +2. **Migration Validation** - Compare search results between legacy systems and OpenSearch during migration projects +3. **Cross-Cluster Performance Analysis** - Evaluate search performance across multiple OpenSearch clusters with different configurations +4. **A/B Testing Across Environments** - Test new search algorithms against production systems using consistent evaluation criteria + +### Nice-to-Have Use Cases +1. **Hybrid Search Architecture Evaluation** - Unified relevance evaluation across multiple search systems in complex architectures +2. **Vendor Benchmarking** - Periodic evaluation of different search technologies using standardized methodologies +3. **Real-time Performance Monitoring** - Continuous comparison of search quality across systems + +## Requirements + +### Functional Requirements + +1. **Remote Configuration Management** + - Create, update, delete, and retrieve remote search configurations + - Support for HTTP/HTTPS endpoints with authentication + - Configurable query and response templates for different search engines + - Rate limiting and concurrency control per configuration + +2. **Query Execution** + - Execute search queries against remote systems via HTTP/HTTPS + - Template-based query transformation for different search engine formats + - Asynchronous execution with proper timeout handling + - Integration with existing experiment workflows + +3. **Response Processing** + - Transform remote responses to OpenSearch-compatible format + - Template-based response mapping and field extraction + - Error handling for malformed or unexpected responses + +4. **Caching System** + - Intelligent caching of remote search results + - Configurable TTL and cache invalidation + - Query-based cache keys for efficient retrieval + +5. **Error Handling and Monitoring** + - Comprehensive failure tracking and categorization + - Rate limiting and circuit breaker patterns + - Detailed logging and monitoring capabilities + +### Non-Functional Requirements + +1. **Performance** + - Support for configurable rate limiting (requests per second) + - Concurrent request limiting to prevent resource exhaustion + - Efficient caching to minimize remote system load + - Asynchronous execution to prevent blocking + +2. **Security** + - Encrypted storage of authentication credentials + - Support for basic authentication + - TLS/SSL support for secure connections + - Integration with OpenSearch security framework + +3. **Reliability** + - Graceful handling of network failures and timeouts + - Retry mechanisms with exponential backoff + - Circuit breaker pattern for failing remote systems + - Comprehensive error logging and recovery + +4. **Scalability** + - Support for multiple concurrent remote configurations + - Efficient connection pooling and reuse + - Configurable resource limits per configuration + +## Out of Scope + +1. **Advanced Authentication Methods** - OAuth, JWT, and certificate-based authentication (future enhancement) +2. **Non-HTTP Protocols** - gRPC, WebSocket, and custom protocol support (future enhancement) +3. **External Cache Systems** - Redis, Memcached integration (future enhancement) +4. **Response Streaming** - Large result set streaming support (future enhancement) +5. **Advanced Template Processing** - Complex transformation logic beyond basic substitution (future enhancement) + +## Current State + +The OpenSearch Search Relevance plugin currently provides: +- Experiment management and execution within single OpenSearch clusters +- Query set management and execution +- Judgment-based evaluation with human relevance assessments +- Automated metrics calculation (NDCG, MAP, MRR, etc.) +- Local search configuration management + +**Components that will be impacted:** +- `ExperimentTaskManager` - Enhanced to support remote search execution +- `SearchRelevanceIndices` - New indices for remote configurations, cache, and failures +- Plugin registration - New REST endpoints and transport actions +- Experiment workflow - Integration of remote search results with existing evaluation + +## Solution Overview + +The Remote Query feature extends the Search Relevance plugin with a remote search execution layer that abstracts differences between search engines while maintaining consistent evaluation methodologies. + +**Key Technologies and Dependencies:** +- Java 11 HttpClient for HTTP communication +- OpenSearch XContent framework for JSON processing +- Template-based query and response transformation +- OpenSearch security framework for credential encryption + +**Integration with OpenSearch Core:** +- Utilizes OpenSearch's index management for data storage +- Leverages OpenSearch security for authentication and authorization +- Integrates with OpenSearch's async framework for non-blocking operations + +**Interaction with Existing Search-Relevance Features:** +- Seamless integration with existing experiment workflows +- Reuses judgment sets and evaluation metrics +- Extends search configuration concept to include remote systems +- Compatible with existing query set and result analysis features + +## Solution Design + +### Proposed Solution + +The solution introduces five core components that work together to provide remote search capabilities: + +#### Architecture Overview + +``` +┌──────────────────────────────────────────────────────────────┐ +│ Search Relevance Plugin │ +├──────────────────────────────────────────────────────────────┤ +│ Experiment Management │ Query Sets │ Judgment Management │ +├──────────────────────────────────────────────────────────────┤ +│ Search Execution Layer │ +├──────────────────────────────────────────────────────────────┤ +│ Local Search Executor │ Remote Query Executor │ +│ │ ┌──────────────────────────────────┤ +│ │ │ Configuration Management │ +│ │ │ Connection Pooling │ +│ │ │ Rate Limiting │ +│ │ │ Query Template Processing │ +│ │ │ Response Mapping │ +│ │ │ Caching Layer │ +│ │ │ Error Handling & Retry │ +│ │ │ Failure Tracking │ +└────────────────────────┴──┴──────────────────────────────────┘ +``` + +#### Core Components + +**1. RemoteSearchConfiguration** +- Manages connection details and search engine-specific settings +- Stores encrypted credentials and endpoint information +- Configures rate limiting and caching behavior +- Supports flexible query and response templates + +**2. RemoteSearchExecutor** +- Handles HTTP communication with remote search engines +- Implements rate limiting and concurrent request management +- Provides asynchronous execution with timeout handling +- Integrates caching and error handling + +**3. RemoteResponseMapper** +- Transforms remote search responses to standardized formats +- Supports template-based field mapping and extraction +- Handles various response formats and structures + +**4. RemoteSearchCache** +- Provides intelligent caching for remote search results +- Implements TTL-based expiration and manual invalidation +- Optimizes performance and reduces remote system load + +**5. RemoteSearchFailure** +- Tracks and categorizes remote search failures +- Supports debugging and monitoring requirements +- Enables circuit breaker and retry logic + +#### Data Models + +**RemoteSearchConfiguration Schema:** +```json +{ + "id": "unique_config_id", + "name": "Human readable name", + "description": "Configuration description", + "connectionUrl": "https://remote.search.engine:9200", + "username": "auth_username", + "password": "encrypted_password", + "queryTemplate": "{ \"query\": { \"match\": { \"content\": \"${queryText}\" } } }", + "responseTemplate": "{ \"hits\": \"${response.hits.hits}\" }", + "maxRequestsPerSecond": 10, + "maxConcurrentRequests": 5, + "cacheDurationMinutes": 60, + "refreshCache": false, + "metadata": { "searchEngine": "elasticsearch", "version": "7.x" }, + "timestamp": "2024-01-01T00:00:00Z" +} +``` + +**Integration with Experiments:** +```json +{ + "experimentId": "cross_platform_comparison", + "searchConfigurations": [ + { + "id": "local_opensearch", + "type": "local", + "name": "Local OpenSearch" + }, + { + "id": "remote_elasticsearch", + "type": "remote", + "name": "Production Elasticsearch", + "remoteConfigId": "prod_es_config" + } + ] +} +``` + +#### Query Execution Flow + +1. **Experiment Initialization**: Load experiment configuration and validate remote connections +2. **Query Processing**: For each query in the query set: + - Execute against local OpenSearch (existing flow) + - Execute against configured remote systems (new flow) + - Apply rate limiting and caching as configured +3. **Response Normalization**: Transform all responses to consistent format using templates +4. **Evaluation**: Apply judgment sets and calculate metrics across all systems +5. **Result Aggregation**: Generate comparative analysis and reports + +### Alternative Solutions Considered + +**Alternative 1: External Integration Service** +- **Approach**: Separate microservice handling remote search integration +- **Pros**: Technology flexibility, independent scaling, reduced plugin complexity +- **Cons**: Additional infrastructure, network latency, operational overhead +- **Decision**: Rejected due to operational complexity and performance concerns + +**Alternative 2: Export/Import Workflow** +- **Approach**: Manual export of queries, external execution, result import +- **Pros**: Simple implementation, no network dependencies during evaluation +- **Cons**: Manual process, no real-time capabilities, poor user experience +- **Decision**: Rejected due to poor automation and user experience + +**Alternative 3: Plugin-per-Search-Engine** +- **Approach**: Separate plugins for each supported search engine +- **Pros**: Optimized integration, native feature support +- **Cons**: Maintenance overhead, inconsistent experience, complex management +- **Decision**: Rejected due to maintenance burden and scalability concerns + +### Key Design Decisions + +**1. HTTP-Only Protocol Support** +- **Rationale**: HTTP/HTTPS covers majority of search engine APIs and reduces complexity +- **Trade-off**: Limited protocol support vs. implementation simplicity +- **Future**: Can be extended to support additional protocols + +**2. Template-Based Transformation** +- **Rationale**: Flexible approach supporting various search engine formats +- **Trade-off**: Limited transformation complexity vs. broad compatibility +- **Impact**: Enables support for diverse search engines with minimal code changes + +**3. Integrated Caching** +- **Rationale**: Reduces load on remote systems and improves performance +- **Trade-off**: Storage requirements vs. performance benefits +- **Impact**: Significant performance improvement for repeated queries + +**4. Basic Authentication Only** +- **Rationale**: Covers common authentication scenarios while maintaining security +- **Trade-off**: Limited auth methods vs. implementation complexity +- **Future**: OAuth and certificate-based auth can be added + +## Metrics and Observability + +### New Metrics to be Introduced + +**Remote Search Execution Metrics:** +- `remote_search_requests_total` - Total number of remote search requests +- `remote_search_requests_duration` - Request duration histogram +- `remote_search_failures_total` - Total number of failed requests by error type +- `remote_search_rate_limit_hits_total` - Number of rate limit violations +- `remote_search_cache_hits_total` - Cache hit/miss statistics + +**Configuration Metrics:** +- `remote_search_configurations_total` - Number of active remote configurations +- `remote_search_concurrent_requests` - Current concurrent requests per configuration + +**System Health Metrics:** +- `remote_search_circuit_breaker_state` - Circuit breaker status per configuration +- `remote_search_connection_pool_usage` - HTTP connection pool utilization + +### Search Relevance Specific Metrics + +**Experiment Metrics:** +- Integration with existing experiment result metrics +- Comparative analysis metrics across local and remote systems +- Cross-platform evaluation result tracking + +**Performance Comparison Metrics:** +- Response time comparison between local and remote systems +- Result quality metrics (NDCG, MAP, MRR) across platforms +- Cache effectiveness metrics for remote queries + +### Health and Performance Monitoring + +**Health Checks:** +- Periodic connectivity validation for remote configurations +- Authentication status monitoring +- Circuit breaker state tracking + +**Performance Monitoring:** +- Request latency percentiles (p50, p95, p99) +- Throughput metrics (requests per second) +- Error rate monitoring by configuration and error type + +**Alerting Integration:** +- Integration with OpenSearch alerting for failure notifications +- Threshold-based alerts for performance degradation +- Circuit breaker state change notifications + +## Technical Specifications + +### Data Schemas and Index Mappings + +**Remote Search Configuration Index:** +```json +{ + "mappings": { + "properties": { + "id": { "type": "keyword" }, + "name": { "type": "text", "fields": { "keyword": { "type": "keyword" } } }, + "description": { "type": "text" }, + "connectionUrl": { "type": "keyword" }, + "username": { "type": "keyword" }, + "password": { "type": "keyword", "index": false }, + "queryTemplate": { "type": "text", "index": false }, + "responseTemplate": { "type": "text", "index": false }, + "maxRequestsPerSecond": { "type": "integer" }, + "maxConcurrentRequests": { "type": "integer" }, + "cacheDurationMinutes": { "type": "long" }, + "refreshCache": { "type": "boolean" }, + "metadata": { "type": "object", "enabled": false }, + "timestamp": { "type": "date" } + } + } +} +``` + +**Remote Search Cache Index:** +```json +{ + "mappings": { + "properties": { + "id": { "type": "keyword" }, + "configurationId": { "type": "keyword" }, + "queryHash": { "type": "keyword" }, + "queryText": { "type": "text" }, + "response": { "type": "text", "index": false }, + "mappedResponse": { "type": "text", "index": false }, + "timestamp": { "type": "date" }, + "expirationTimestamp": { "type": "date" } + } + } +} +``` + +**Remote Search Failure Index:** +```json +{ + "mappings": { + "properties": { + "id": { "type": "keyword" }, + "configurationId": { "type": "keyword" }, + "experimentId": { "type": "keyword" }, + "query": { "type": "text" }, + "queryText": { "type": "text" }, + "errorType": { "type": "keyword" }, + "errorMessage": { "type": "text" }, + "stackTrace": { "type": "text", "index": false }, + "httpStatusCode": { "type": "integer" }, + "timestamp": { "type": "date" } + } + } +} +``` + +### API Specifications + +**Remote Configuration Management API:** + +```http +POST /_plugins/_search_relevance/remote_search_configurations +{ + "name": "Production Elasticsearch", + "description": "Main production ES cluster", + "connectionUrl": "https://prod-es.example.com:9200", + "username": "search_user", + "password": "secure_password", + "queryTemplate": "{ \"query\": { \"multi_match\": { \"query\": \"${queryText}\", \"fields\": [\"title^2\", \"content\"] } } }", + "responseTemplate": "{ \"hits\": \"${response.hits.hits}\", \"total\": \"${response.hits.total.value}\" }", + "maxRequestsPerSecond": 10, + "maxConcurrentRequests": 5, + "cacheDurationMinutes": 60 +} +``` + +```http +GET /_plugins/_search_relevance/remote_search_configurations/{configId} +PUT /_plugins/_search_relevance/remote_search_configurations/{configId} +DELETE /_plugins/_search_relevance/remote_search_configurations/{configId} +``` + +### Integration with Search-Relevance Data Models + +**Enhanced Experiment Configuration:** +- Extended to support remote search configurations alongside local configurations +- Maintains backward compatibility with existing experiment definitions +- Supports mixed local/remote experiment scenarios + +**Search Configuration Extension:** +- Existing SearchConfiguration concept extended to include remote configurations +- Type field distinguishes between "local" and "remote" configurations +- Remote configurations reference RemoteSearchConfiguration entities + +### Class and Sequence Diagrams + +**Remote Search Execution Sequence:** +``` +Client -> ExperimentTaskManager: Execute Experiment +ExperimentTaskManager -> RemoteSearchExecutor: Execute Remote Search +RemoteSearchExecutor -> RemoteSearchCacheDao: Check Cache +RemoteSearchCacheDao -> RemoteSearchExecutor: Cache Result +RemoteSearchExecutor -> HttpClient: HTTP Request (if cache miss) +HttpClient -> RemoteSearchExecutor: HTTP Response +RemoteSearchExecutor -> RemoteResponseMapper: Map Response +RemoteResponseMapper -> RemoteSearchExecutor: Mapped Response +RemoteSearchExecutor -> RemoteSearchCacheDao: Store Cache +RemoteSearchExecutor -> ExperimentTaskManager: Search Results +ExperimentTaskManager -> Client: Experiment Results +``` + +## Backward Compatibility + +### Breaking Changes and Migration Strategy + +**No Breaking Changes:** +- All existing APIs remain unchanged +- Existing experiments continue to work without modification +- Current search configurations are fully compatible + +**Additive Changes:** +- New REST endpoints for remote configuration management +- New indices for remote search data storage +- Enhanced experiment configuration schema (backward compatible) + +### Index Mapping Changes + +**New Indices Added:** +- `.opensearch-search-relevance-remote-search-configurations` +- `.opensearch-search-relevance-remote-search-cache` +- `.opensearch-search-relevance-remote-search-failures` + +**Existing Indices:** +- No changes to existing index mappings +- Experiment index may include new optional fields for remote configurations + +### Plugin Upgrade Considerations + +**Upgrade Path:** +1. Install updated plugin version +2. New indices created automatically on first use +3. Existing functionality remains unchanged +4. Remote features available immediately after configuration + +**Rollback Support:** +- Plugin can be downgraded without data loss +- Remote-specific data stored in separate indices +- Existing experiments unaffected by rollback + +## Security Considerations + +### Security Overview + +The Remote Query feature handles sensitive connection information and executes queries against external systems, requiring comprehensive security measures to protect credentials, data, and system integrity. + +**Security Context:** +- Remote search configurations contain authentication credentials +- HTTP requests transmitted to external systems +- Cached responses may contain sensitive search results +- API endpoints require proper authorization + +**Sensitive Data:** +- Remote system authentication credentials (username/password) +- Query content and search results +- Connection URLs and system metadata +- Cached response data + +**Trust Boundaries:** +- OpenSearch cluster (trusted) ↔ Remote search systems (untrusted) +- Plugin components (trusted) ↔ External HTTP endpoints (untrusted) +- User requests (authenticated) ↔ Plugin APIs (trusted) + +### Assets and Resources + +**Protected Assets:** +- Remote search configuration credentials +- Cached search results and query data +- Remote system connection information +- Experiment data and evaluation results + +**System Indices:** +- `.opensearch-search-relevance-remote-search-configurations` - Contains encrypted credentials +- `.opensearch-search-relevance-remote-search-cache` - Contains cached search results +- `.opensearch-search-relevance-remote-search-failures` - Contains error logs and stack traces + +**Access Patterns:** +- Configuration management requires admin-level permissions +- Experiment execution requires search-relevance permissions +- Cache access limited to plugin components +- Failure logs accessible for debugging and monitoring + +### API Security + +**Configuration Management Endpoints:** + +| Endpoint | Method | Mutating | Authorization | Input Validation | +|----------|--------|----------|---------------|------------------| +| `/_plugins/_search_relevance/remote_search_configurations` | POST | Yes | Admin role required | URL validation, credential encryption | +| `/_plugins/_search_relevance/remote_search_configurations/{id}` | GET | No | Read permissions | ID format validation | +| `/_plugins/_search_relevance/remote_search_configurations/{id}` | PUT | Yes | Admin role required | Full input validation, credential re-encryption | +| `/_plugins/_search_relevance/remote_search_configurations/{id}` | DELETE | Yes | Admin role required | ID validation, dependency checking | + +**Rate Limiting:** +- API endpoints subject to OpenSearch rate limiting +- Per-configuration rate limiting for remote requests +- Circuit breaker protection against abuse + +### Threat Analysis (STRIDE) + +**Spoofing Threats:** +- **Threat**: Attacker impersonates legitimate remote search system +- **Mitigation**: TLS certificate validation, connection URL validation +- **Threat**: Unauthorized access to configuration APIs +- **Mitigation**: OpenSearch role-based authentication and authorization + +**Tampering Threats:** +- **Threat**: Man-in-the-middle attacks on remote connections +- **Mitigation**: Mandatory HTTPS for remote connections, certificate pinning option +- **Threat**: Malicious modification of cached responses +- **Mitigation**: Cache integrity checks, encrypted storage + +**Repudiation Threats:** +- **Threat**: Denial of remote search activities +- **Mitigation**: Comprehensive audit logging, request/response tracking +- **Threat**: Unauthorized configuration changes +- **Mitigation**: Change logging, user attribution in audit logs + +**Information Disclosure Threats:** +- **Threat**: Credential exposure in logs or error messages +- **Mitigation**: Credential masking in logs, encrypted storage +- **Threat**: Sensitive query content in cache or logs +- **Mitigation**: Configurable logging levels, encrypted cache storage + +**Denial of Service Threats:** +- **Threat**: Resource exhaustion through excessive remote requests +- **Mitigation**: Rate limiting, concurrent request limits, circuit breakers +- **Threat**: Cache storage exhaustion +- **Mitigation**: TTL-based expiration, storage limits, cache cleanup + +**Elevation of Privilege Threats:** +- **Threat**: Plugin vulnerabilities leading to system compromise +- **Mitigation**: Input validation, secure coding practices, dependency scanning +- **Threat**: Remote system compromise affecting local system +- **Mitigation**: Network isolation, minimal required permissions + +### Attack Vectors + +**Unauthorized Users (No Cluster Access):** +- **Vector**: Direct API access attempts +- **Mitigation**: OpenSearch authentication required for all endpoints +- **Vector**: Network-level attacks on remote connections +- **Mitigation**: VPC/network security, firewall rules + +**Authorized Users with Limited Permissions:** +- **Vector**: Attempting to access configuration management APIs +- **Mitigation**: Role-based access control, admin-only configuration access +- **Vector**: Attempting to view sensitive configuration data +- **Mitigation**: Credential masking, field-level security + +**Read-Only Users Attempting Modifications:** +- **Vector**: POST/PUT/DELETE requests to configuration APIs +- **Mitigation**: HTTP method validation, permission checking +- **Vector**: Cache manipulation attempts +- **Mitigation**: Internal API access only, no external cache modification + +**Malicious Input Attacks:** +- **Vector**: SQL injection in query templates +- **Mitigation**: Template validation, parameterized queries +- **Vector**: Script injection in response templates +- **Mitigation**: Safe template processing, input sanitization +- **Vector**: XXE attacks in XML responses +- **Mitigation**: Secure XML parsing, external entity disabling + +### Security Mitigations + +**Credential Protection:** +- All passwords encrypted at rest using OpenSearch security framework +- Credentials never logged or exposed in error messages +- Secure credential rotation support +- Memory protection for credential handling + +**Input Validation and Sanitization:** +- URL format validation for connection endpoints +- Template syntax validation for query/response templates +- JSON schema validation for all API inputs +- Rate limit parameter bounds checking + +**Authentication and Authorization:** +- Integration with OpenSearch security plugin +- Role-based access control for all endpoints +- Admin-level permissions required for configuration management +- Audit logging for all security-relevant operations + +**Encryption Requirements:** +- Mandatory HTTPS for all remote connections +- TLS 1.2+ required for remote communication +- Encrypted storage for cached responses containing sensitive data +- Optional certificate pinning for high-security environments + +**Audit Logging and Monitoring:** +- Comprehensive logging of all remote search activities +- Security event logging (authentication failures, permission denials) +- Performance and error monitoring with alerting +- Configurable log retention and rotation + +### Security Testing Requirements + +**Security-Specific Test Cases:** +- Authentication bypass attempts +- Authorization boundary testing +- Credential encryption/decryption validation +- TLS connection security verification + +**Input Validation Testing:** +- Malformed URL handling +- Invalid template syntax processing +- Boundary value testing for rate limits +- SQL/script injection attempt handling + +**Authorization Boundary Testing:** +- Role-based access control validation +- Cross-tenant access prevention +- API endpoint permission verification +- Resource access control testing + +**Performance Testing for DoS Prevention:** +- Rate limiting effectiveness testing +- Resource exhaustion protection validation +- Circuit breaker functionality verification +- Concurrent request limit enforcement + +## Testing Strategy + +### Unit and Integration Testing + +**Unit Testing Coverage:** +- RemoteSearchConfiguration model validation and serialization +- RemoteSearchExecutor HTTP client functionality and error handling +- RemoteResponseMapper template processing and transformation +- Rate limiting and caching logic validation +- Security credential handling and encryption + +**Integration Testing:** +- End-to-end remote search execution workflows +- Cache integration with DAO layer +- Error handling and failure tracking +- Authentication and authorization integration +- Experiment workflow integration with remote configurations + +**Mock Testing:** +- HTTP client mocking for various response scenarios +- Remote system failure simulation +- Network timeout and connectivity testing +- Authentication failure scenarios + +### Performance Testing + +**Load Testing:** +- Concurrent remote search execution under various loads +- Rate limiting effectiveness under high request volumes +- Cache performance with large result sets +- Memory usage and garbage collection impact + +**Stress Testing:** +- System behavior under remote system failures +- Resource exhaustion scenarios +- Network partition and recovery testing +- Circuit breaker activation and recovery + +### Compatibility Testing + +**OpenSearch Version Compatibility:** +- Testing across supported OpenSearch versions (2.x+) +- Plugin upgrade and downgrade scenarios +- Index mapping compatibility validation + +**Search Engine Compatibility:** +- Elasticsearch compatibility testing +- Solr integration validation +- Custom search engine API testing +- Response format variation handling + +**Network Environment Testing:** +- Various network configurations and firewalls +- Proxy and load balancer compatibility +- TLS/SSL configuration variations +- IPv4/IPv6 dual-stack environments + +## Performance and Benchmarking + +### Key Performance Indicators + +**Response Time Metrics:** +- Remote search request latency (p50, p95, p99) +- Cache hit/miss response times +- End-to-end experiment execution time +- Template processing overhead + +**Throughput Metrics:** +- Requests per second per remote configuration +- Concurrent request handling capacity +- Cache storage and retrieval throughput +- Overall experiment processing rate + +**Resource Utilization:** +- Memory usage for caching and connection pooling +- CPU utilization for template processing +- Network bandwidth consumption +- Storage requirements for cache and failure data + +### Resource Utilization Targets + +**Memory Usage:** +- Maximum 100MB additional heap usage for remote search components +- Cache size limits configurable per deployment +- Connection pool memory overhead < 10MB per configuration + +**CPU Utilization:** +- Template processing overhead < 5% of total CPU +- HTTP client processing < 10% additional CPU load +- Minimal impact on existing search relevance operations + +**Network Bandwidth:** +- Configurable rate limiting to control bandwidth usage +- Efficient connection reuse to minimize overhead +- Compression support for large responses + +**Storage Requirements:** +- Cache storage configurable with automatic cleanup +- Failure tracking with configurable retention +- Index storage optimization for remote configuration data + +### Benchmark Methodology + +**Test Scenarios:** +1. **Single Remote Configuration**: Baseline performance with one remote system +2. **Multiple Remote Configurations**: Scalability testing with 5-10 remote systems +3. **High Query Volume**: 1000+ queries across multiple remote systems +4. **Cache Effectiveness**: Performance comparison with/without caching +5. **Failure Recovery**: Performance during and after remote system failures + +**Test Environment:** +- OpenSearch cluster with 3 nodes (4 CPU, 16GB RAM each) +- Simulated remote search engines with controlled latency +- Network simulation for various connectivity scenarios +- Load generation tools for concurrent request testing + +**Performance Baselines:** +- Existing search relevance experiment execution time +- Local search performance benchmarks +- Memory and CPU usage without remote search features +- Network utilization baselines + +**Success Criteria:** +- < 20% increase in experiment execution time with remote searches +- Cache hit ratio > 80% for repeated queries +- Rate limiting effectiveness > 95% accuracy +- Zero memory leaks during extended testing +- Graceful degradation during remote system failures + +--- + +## Additional Resources + +- [OpenSearch RFC Process](https://github.com/opensearch-project/OpenSearch/blob/main/DEVELOPER_GUIDE.md#submitting-changes) +- [Plugin Development Guide](https://opensearch.org/docs/latest/developers/plugins/) +- [Contributing Guidelines](../CONTRIBUTING.md) +- [Remote Search Querying RFC](../RFC-Remote-Search-Querying.md) +- [Search Relevance Plugin Documentation](https://opensearch.org/docs/latest/search-plugins/search-relevance/) From b2e12a670cc352f399ace41bbcb3a8f86fd6d8da Mon Sep 17 00:00:00 2001 From: Scott Stults Date: Wed, 8 Oct 2025 21:56:05 -0400 Subject: [PATCH 2/9] chore: checkpoint before new task\n\n- Stage and commit working changes prior to starting new task\n- Files:\n * Modified: src/main/java/org/opensearch/searchrelevance/executors/ExperimentTaskManager.java\n * Added: .java-version\n * Added: src/test/scripts/esci_us_subset.ndjson Signed-off-by: Scott Stults --- .../executors/ExperimentTaskManager.java | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/src/main/java/org/opensearch/searchrelevance/executors/ExperimentTaskManager.java b/src/main/java/org/opensearch/searchrelevance/executors/ExperimentTaskManager.java index d8a33d6d..9e3eeb39 100644 --- a/src/main/java/org/opensearch/searchrelevance/executors/ExperimentTaskManager.java +++ b/src/main/java/org/opensearch/searchrelevance/executors/ExperimentTaskManager.java @@ -285,6 +285,20 @@ private void executeVariantTaskAsync(VariantTaskParameters params, CompletableFu final String evaluationId = UUID.randomUUID().toString(); SearchRequest searchRequest = buildSearchRequest(params, evaluationId); + // Instrumentation: log final serialized search request body for debugging (e.g., LTR rescore_query) + try { + String source = searchRequest.source() != null ? searchRequest.source().toString() : null; + log.info( + "Experiment search request body (experimentId={}, variantId={}, evaluationId={}): {}", + params.getExperimentId(), + params.getExperimentVariant().getId(), + evaluationId, + source + ); + } catch (Exception e) { + log.warn("Failed to serialize search request body for logging: {}", e.getMessage()); + } + // Convert ActionListener to CompletableFuture CompletableFuture searchFuture = new CompletableFuture<>(); From e68c20a73e7bd80d89c178c09ca57afbb23b465f Mon Sep 17 00:00:00 2001 From: Scott Stults Date: Wed, 8 Oct 2025 22:30:08 -0400 Subject: [PATCH 3/9] fix(ltr): wrap rescore_query with wrapper query during request build to avoid parse errors in Experiment path\n\n- Preprocess rescore_query in SearchRequestBuilder to base64-encode and wrap unregistered queries (e.g., LTR) using wrapper query\n- Applies to both standard and hybrid search request builders\n- Verified with JDK 21: compileJava and test tasks succeeded Signed-off-by: Scott Stults --- .../model/builder/SearchRequestBuilder.java | 88 +++++++++++++++++++ 1 file changed, 88 insertions(+) diff --git a/src/main/java/org/opensearch/searchrelevance/model/builder/SearchRequestBuilder.java b/src/main/java/org/opensearch/searchrelevance/model/builder/SearchRequestBuilder.java index 2739ddb5..bc9df505 100644 --- a/src/main/java/org/opensearch/searchrelevance/model/builder/SearchRequestBuilder.java +++ b/src/main/java/org/opensearch/searchrelevance/model/builder/SearchRequestBuilder.java @@ -11,6 +11,8 @@ import static org.opensearch.searchrelevance.experiment.QuerySourceUtil.validateHybridQuery; import java.io.IOException; +import java.nio.charset.StandardCharsets; +import java.util.Base64; import java.util.Collections; import java.util.Map; import java.util.Objects; @@ -67,6 +69,49 @@ public static SearchRequest buildSearchRequest(String index, String query, Strin processedQuery ); Map fullQueryMap = parser.map(); + // Preprocess rescore_query to wrap unknown queries (e.g., LTR/sltr) using wrapper query to avoid NamedXContent parsing issues + try { + Object rescore = fullQueryMap.get("rescore"); + if (rescore instanceof Map) { + Map rescoreEntry = (Map) rescore; + Object queryObjInner = rescoreEntry.get("query"); + if (queryObjInner instanceof Map) { + Map queryMap = (Map) queryObjInner; + Object rescoreQuery = queryMap.get("rescore_query"); + if (rescoreQuery != null && (rescoreQuery instanceof Map) && !((Map) rescoreQuery).containsKey("wrapper")) { + XContentBuilder tmpBuilder = JsonXContent.contentBuilder(); + tmpBuilder.value(rescoreQuery); + String raw = tmpBuilder.toString(); + String base64 = Base64.getEncoder().encodeToString(raw.getBytes(StandardCharsets.UTF_8)); + Map wrapper = Map.of("wrapper", Map.of("query", base64)); + queryMap.put("rescore_query", wrapper); + } + } + } else if (rescore instanceof java.util.List) { + for (Object entry : (java.util.List) rescore) { + if (entry instanceof Map) { + Map rescoreEntry = (Map) entry; + Object queryObjInner = rescoreEntry.get("query"); + if (queryObjInner instanceof Map) { + Map queryMap = (Map) queryObjInner; + Object rescoreQuery = queryMap.get("rescore_query"); + if (rescoreQuery != null + && (rescoreQuery instanceof Map) + && !((Map) rescoreQuery).containsKey("wrapper")) { + XContentBuilder tmpBuilder = JsonXContent.contentBuilder(); + tmpBuilder.value(rescoreQuery); + String raw = tmpBuilder.toString(); + String base64 = Base64.getEncoder().encodeToString(raw.getBytes(StandardCharsets.UTF_8)); + Map wrapper = Map.of("wrapper", Map.of("query", base64)); + queryMap.put("rescore_query", wrapper); + } + } + } + } + } + } catch (Exception e) { + log.debug("Skipping rescore_query wrapper preprocessing: {}", e.getMessage()); + } // This implementation handles the 'query' field separately from other fields because: // 1. Custom query types (like hybrid, neural) are not registered in the default QueryBuilders @@ -141,6 +186,49 @@ public static SearchRequest buildRequestForHybridSearch( processedQuery ); Map fullQueryMap = parser.map(); + // Preprocess rescore_query to wrap unknown queries (e.g., LTR/sltr) using wrapper query to avoid NamedXContent parsing issues + try { + Object rescore = fullQueryMap.get("rescore"); + if (rescore instanceof Map) { + Map rescoreEntry = (Map) rescore; + Object queryObjInner = rescoreEntry.get("query"); + if (queryObjInner instanceof Map) { + Map queryMap = (Map) queryObjInner; + Object rescoreQuery = queryMap.get("rescore_query"); + if (rescoreQuery != null && (rescoreQuery instanceof Map) && !((Map) rescoreQuery).containsKey("wrapper")) { + XContentBuilder tmpBuilder = JsonXContent.contentBuilder(); + tmpBuilder.value(rescoreQuery); + String raw = tmpBuilder.toString(); + String base64 = Base64.getEncoder().encodeToString(raw.getBytes(StandardCharsets.UTF_8)); + Map wrapper = Map.of("wrapper", Map.of("query", base64)); + queryMap.put("rescore_query", wrapper); + } + } + } else if (rescore instanceof java.util.List) { + for (Object entry : (java.util.List) rescore) { + if (entry instanceof Map) { + Map rescoreEntry = (Map) entry; + Object queryObjInner = rescoreEntry.get("query"); + if (queryObjInner instanceof Map) { + Map queryMap = (Map) queryObjInner; + Object rescoreQuery = queryMap.get("rescore_query"); + if (rescoreQuery != null + && (rescoreQuery instanceof Map) + && !((Map) rescoreQuery).containsKey("wrapper")) { + XContentBuilder tmpBuilder = JsonXContent.contentBuilder(); + tmpBuilder.value(rescoreQuery); + String raw = tmpBuilder.toString(); + String base64 = Base64.getEncoder().encodeToString(raw.getBytes(StandardCharsets.UTF_8)); + Map wrapper = Map.of("wrapper", Map.of("query", base64)); + queryMap.put("rescore_query", wrapper); + } + } + } + } + } + } catch (Exception e) { + log.debug("Skipping rescore_query wrapper preprocessing: {}", e.getMessage()); + } validateHybridQuery(fullQueryMap); From a7f2c2fc3e02435ec440cc41f4b54b4a576efad2 Mon Sep 17 00:00:00 2001 From: Scott Stults Date: Wed, 8 Oct 2025 22:57:36 -0400 Subject: [PATCH 4/9] tests(search): add contract tests for LTR rescore_query wrapper in SearchRequestBuilder Signed-off-by: Scott Stults --- .../builder/SearchRequestBuilderTests.java | 146 ++++++++++++++++++ 1 file changed, 146 insertions(+) diff --git a/src/test/java/org/opensearch/searchrelevance/model/builder/SearchRequestBuilderTests.java b/src/test/java/org/opensearch/searchrelevance/model/builder/SearchRequestBuilderTests.java index 7c231cee..5a86d357 100644 --- a/src/test/java/org/opensearch/searchrelevance/model/builder/SearchRequestBuilderTests.java +++ b/src/test/java/org/opensearch/searchrelevance/model/builder/SearchRequestBuilderTests.java @@ -9,6 +9,8 @@ import static org.opensearch.searchrelevance.common.PluginConstants.WILDCARD_QUERY_TEXT; +import java.nio.charset.StandardCharsets; +import java.util.Base64; import java.util.HashMap; import java.util.List; import java.util.Map; @@ -17,6 +19,9 @@ import org.opensearch.search.builder.SearchSourceBuilder; import org.opensearch.test.OpenSearchTestCase; +import com.fasterxml.jackson.core.type.TypeReference; +import com.fasterxml.jackson.databind.ObjectMapper; + public class SearchRequestBuilderTests extends OpenSearchTestCase { private static final String TEST_INDEX = "test_index"; @@ -24,6 +29,54 @@ public class SearchRequestBuilderTests extends OpenSearchTestCase { private static final String TEST_PIPELINE = "test_pipeline"; private static final int TEST_SIZE = 10; + private static final ObjectMapper MAPPER = new ObjectMapper(); + + private static Map parseJsonToMap(String json) throws Exception { + return MAPPER.readValue(json, new TypeReference>() { + }); + } + + @SuppressWarnings("unchecked") + private static String extractWrappedRescoreQueryBase64(Map sourceMap) { + Object rescore = sourceMap.get("rescore"); + if (rescore instanceof Map) { + Map rescoreMap = (Map) rescore; + Object queryObj = rescoreMap.get("query"); + if (queryObj instanceof Map) { + Map queryMap = (Map) queryObj; + Object rescoreQuery = queryMap.get("rescore_query"); + if (rescoreQuery instanceof Map) { + Map rq = (Map) rescoreQuery; + Object wrapperObj = rq.get("wrapper"); + if (wrapperObj instanceof Map) { + return (String) ((Map) wrapperObj).get("query"); + } + } + } + } else if (rescore instanceof List) { + List list = (List) rescore; + if (list.isEmpty() == false) { + Object first = list.get(0); + if (first instanceof Map) { + Map rescoreMap = (Map) first; + Object queryObj = rescoreMap.get("query"); + if (queryObj instanceof Map) { + Map queryMap = (Map) queryObj; + Object rescoreQuery = queryMap.get("rescore_query"); + if (rescoreQuery instanceof Map) { + Map rq = (Map) rescoreQuery; + Object wrapperObj = rq.get("wrapper"); + if (wrapperObj instanceof Map) { + return (String) ((Map) wrapperObj).get("query"); + } + } + } + } + } + } + return null; + } + public void testBuildSearchRequestSimpleQuery() { String simpleQuery = "{\"query\":{\"match\":{\"title\":\"" + WILDCARD_QUERY_TEXT + "\"}}}"; @@ -140,4 +193,97 @@ public void testHybridQuerySearchConfiguration_whenLessThenTwoSubQueries_thenFai ); assertEquals("invalid hybrid query: expected exactly [2] sub-queries but found [1]", exception.getMessage()); } + + // ------------------------- + // Contract tests for rescore.rescore_query wrapper preprocessing + // ------------------------- + + public void testBuildSearchRequest_whenRescoreQueryIsObject_thenWrappedWithBase64() throws Exception { + String query = "{" + + "\"query\":{\"match\":{\"title\":\"" + + WILDCARD_QUERY_TEXT + + "\"}}," + + "\"rescore\":{\"query\":{\"rescore_query\":{\"sltr\":{\"model\":\"m1\",\"params\":{\"keywords\":\"abc\"}}}}}" + + "}"; + + SearchRequest searchRequest = SearchRequestBuilder.buildSearchRequest(TEST_INDEX, query, TEST_QUERY_TEXT, null, TEST_SIZE); + assertNotNull(searchRequest); + SearchSourceBuilder sourceBuilder = searchRequest.source(); + assertNotNull(sourceBuilder); + + Map sourceMap = parseJsonToMap(sourceBuilder.toString()); + String base64 = extractWrappedRescoreQueryBase64(sourceMap); + assertNotNull("rescore.rescore_query should be wrapped with base64 payload", base64); + + String decoded = new String(Base64.getDecoder().decode(base64), StandardCharsets.UTF_8); + Map decodedMap = parseJsonToMap(decoded); + assertTrue("Decoded rescore_query should contain sltr", decodedMap.containsKey("sltr")); + @SuppressWarnings("unchecked") + Map sltr = (Map) decodedMap.get("sltr"); + assertEquals("m1", sltr.get("model")); + } + + public void testBuildSearchRequest_whenRescoreIsArray_thenFirstEntryRescoreQueryWrapped() throws Exception { + String query = "{" + + "\"query\":{\"match\":{\"title\":\"" + + WILDCARD_QUERY_TEXT + + "\"}}," + + "\"rescore\":[{\"query\":{\"rescore_query\":{\"sltr\":{\"model\":\"m2\",\"params\":{\"keywords\":\"xyz\"}}}}}]" + + "}"; + + SearchRequest searchRequest = SearchRequestBuilder.buildSearchRequest(TEST_INDEX, query, TEST_QUERY_TEXT, null, TEST_SIZE); + assertNotNull(searchRequest); + SearchSourceBuilder sourceBuilder = searchRequest.source(); + assertNotNull(sourceBuilder); + + Map sourceMap = parseJsonToMap(sourceBuilder.toString()); + String base64 = extractWrappedRescoreQueryBase64(sourceMap); + assertNotNull("rescore[0].rescore_query should be wrapped with base64 payload", base64); + + String decoded = new String(Base64.getDecoder().decode(base64), StandardCharsets.UTF_8); + Map decodedMap = parseJsonToMap(decoded); + assertTrue(decodedMap.containsKey("sltr")); + @SuppressWarnings("unchecked") + Map sltr = (Map) decodedMap.get("sltr"); + assertEquals("m2", sltr.get("model")); + } + + public void testBuildRequestForHybridSearch_whenRescoreQueryPresent_thenWrappedWithBase64() throws Exception { + // Minimal valid hybrid query (2 sub-queries) plus rescore with sltr + String hybridQuery = "{" + + "\"_source\":{\"exclude\":[\"passage_embedding\"]}," + + "\"query\":{\"hybrid\":{\"queries\":[" + + "{\"match\":{\"name\":\"" + + WILDCARD_QUERY_TEXT + + "\"}}," + + "{\"match\":{\"name\":{\"query\":\"" + + WILDCARD_QUERY_TEXT + + "\"}}}" + + "]}}," + + "\"rescore\":{\"query\":{\"rescore_query\":{\"sltr\":{\"model\":\"m3\",\"params\":{\"keywords\":\"hyb\"}}}}}" + + "}"; + + Map temporarySearchPipeline = Map.of(); // allowed empty + SearchRequest searchRequest = SearchRequestBuilder.buildRequestForHybridSearch( + TEST_INDEX, + hybridQuery, + temporarySearchPipeline, + TEST_QUERY_TEXT, + TEST_SIZE + ); + assertNotNull(searchRequest); + SearchSourceBuilder sourceBuilder = searchRequest.source(); + assertNotNull(sourceBuilder); + + Map sourceMap = parseJsonToMap(sourceBuilder.toString()); + String base64 = extractWrappedRescoreQueryBase64(sourceMap); + assertNotNull("hybrid path should wrap rescore.rescore_query with base64 payload", base64); + + String decoded = new String(Base64.getDecoder().decode(base64), StandardCharsets.UTF_8); + Map decodedMap = parseJsonToMap(decoded); + assertTrue(decodedMap.containsKey("sltr")); + @SuppressWarnings("unchecked") + Map sltr = (Map) decodedMap.get("sltr"); + assertEquals("m3", sltr.get("model")); + } } From cf49e21d4e5a7a62ffcf1039767d115febb8ed26 Mon Sep 17 00:00:00 2001 From: Scott Stults Date: Thu, 9 Oct 2025 18:31:27 -0400 Subject: [PATCH 5/9] Cleanups Signed-off-by: Scott Stults --- .../executors/ExperimentTaskManager.java | 2 +- .../model/builder/SearchRequestBuilder.java | 12 ++++-------- 2 files changed, 5 insertions(+), 9 deletions(-) diff --git a/src/main/java/org/opensearch/searchrelevance/executors/ExperimentTaskManager.java b/src/main/java/org/opensearch/searchrelevance/executors/ExperimentTaskManager.java index 9e3eeb39..7df55c56 100644 --- a/src/main/java/org/opensearch/searchrelevance/executors/ExperimentTaskManager.java +++ b/src/main/java/org/opensearch/searchrelevance/executors/ExperimentTaskManager.java @@ -288,7 +288,7 @@ private void executeVariantTaskAsync(VariantTaskParameters params, CompletableFu // Instrumentation: log final serialized search request body for debugging (e.g., LTR rescore_query) try { String source = searchRequest.source() != null ? searchRequest.source().toString() : null; - log.info( + log.debug( "Experiment search request body (experimentId={}, variantId={}, evaluationId={}): {}", params.getExperimentId(), params.getExperimentVariant().getId(), diff --git a/src/main/java/org/opensearch/searchrelevance/model/builder/SearchRequestBuilder.java b/src/main/java/org/opensearch/searchrelevance/model/builder/SearchRequestBuilder.java index bc9df505..1099e4a4 100644 --- a/src/main/java/org/opensearch/searchrelevance/model/builder/SearchRequestBuilder.java +++ b/src/main/java/org/opensearch/searchrelevance/model/builder/SearchRequestBuilder.java @@ -78,7 +78,7 @@ public static SearchRequest buildSearchRequest(String index, String query, Strin if (queryObjInner instanceof Map) { Map queryMap = (Map) queryObjInner; Object rescoreQuery = queryMap.get("rescore_query"); - if (rescoreQuery != null && (rescoreQuery instanceof Map) && !((Map) rescoreQuery).containsKey("wrapper")) { + if ((rescoreQuery instanceof Map) && !((Map) rescoreQuery).containsKey("wrapper")) { XContentBuilder tmpBuilder = JsonXContent.contentBuilder(); tmpBuilder.value(rescoreQuery); String raw = tmpBuilder.toString(); @@ -95,9 +95,7 @@ public static SearchRequest buildSearchRequest(String index, String query, Strin if (queryObjInner instanceof Map) { Map queryMap = (Map) queryObjInner; Object rescoreQuery = queryMap.get("rescore_query"); - if (rescoreQuery != null - && (rescoreQuery instanceof Map) - && !((Map) rescoreQuery).containsKey("wrapper")) { + if ((rescoreQuery instanceof Map) && !((Map) rescoreQuery).containsKey("wrapper")) { XContentBuilder tmpBuilder = JsonXContent.contentBuilder(); tmpBuilder.value(rescoreQuery); String raw = tmpBuilder.toString(); @@ -195,7 +193,7 @@ public static SearchRequest buildRequestForHybridSearch( if (queryObjInner instanceof Map) { Map queryMap = (Map) queryObjInner; Object rescoreQuery = queryMap.get("rescore_query"); - if (rescoreQuery != null && (rescoreQuery instanceof Map) && !((Map) rescoreQuery).containsKey("wrapper")) { + if ((rescoreQuery instanceof Map) && !((Map) rescoreQuery).containsKey("wrapper")) { XContentBuilder tmpBuilder = JsonXContent.contentBuilder(); tmpBuilder.value(rescoreQuery); String raw = tmpBuilder.toString(); @@ -212,9 +210,7 @@ public static SearchRequest buildRequestForHybridSearch( if (queryObjInner instanceof Map) { Map queryMap = (Map) queryObjInner; Object rescoreQuery = queryMap.get("rescore_query"); - if (rescoreQuery != null - && (rescoreQuery instanceof Map) - && !((Map) rescoreQuery).containsKey("wrapper")) { + if ((rescoreQuery instanceof Map) && !((Map) rescoreQuery).containsKey("wrapper")) { XContentBuilder tmpBuilder = JsonXContent.contentBuilder(); tmpBuilder.value(rescoreQuery); String raw = tmpBuilder.toString(); From cd99422c4a86c3a6e83ecf3c28750d2d4e0fcca7 Mon Sep 17 00:00:00 2001 From: Scott Stults Date: Fri, 10 Oct 2025 15:49:44 -0400 Subject: [PATCH 6/9] Delete docs/feature-design/remote-query.md Signed-off-by: Scott Stults --- docs/feature-design/remote-query.md | 830 ---------------------------- 1 file changed, 830 deletions(-) delete mode 100644 docs/feature-design/remote-query.md diff --git a/docs/feature-design/remote-query.md b/docs/feature-design/remote-query.md deleted file mode 100644 index fcfa01db..00000000 --- a/docs/feature-design/remote-query.md +++ /dev/null @@ -1,830 +0,0 @@ -# Remote Query Feature Design - -> **Target Audience**: Development teams building features and enhancements for the search-relevance plugin. - -## Introduction - -This document outlines the design and implementation of the Remote Query feature for the OpenSearch Search Relevance plugin. The Remote Query feature enables search relevance experiments to execute queries against remote search engines and OpenSearch clusters, allowing for comprehensive cross-platform search evaluation and comparison. - -## Problem Statement - -Search relevance evaluation is critical for maintaining and improving search quality in OpenSearch deployments. Organizations often need to compare search performance across different configurations, algorithms, or entirely different search engines to make informed decisions about their search infrastructure. - -**Key Problems Addressed:** -- **Limited Evaluation Scope**: Current Search Relevance plugin only evaluates within a single OpenSearch cluster -- **Multi-vendor Comparison**: Organizations need to compare OpenSearch against other search engines (Solr, Elasticsearch, proprietary solutions) -- **Migration Validation**: Teams migrating to OpenSearch need to validate equivalent or better search relevance compared to legacy systems -- **A/B Testing Across Environments**: Need to compare search results using identical evaluation criteria across different systems -- **Cross-Cluster Analysis**: Large organizations with multiple OpenSearch clusters need unified performance comparison - -**Impact of Not Implementing:** -- Organizations forced to build custom integration solutions for each search engine -- Manual export/import workflows that are error-prone and time-consuming -- Inconsistent evaluation methodologies reducing comparison validity -- Incomplete evaluation coverage when assessing search engine alternatives - -**Primary Users/Stakeholders:** -- Search engineers evaluating different search technologies -- DevOps teams managing search infrastructure migrations -- Product teams conducting A/B tests across search systems -- Organizations with hybrid search architectures - -**Alignment with OpenSearch Goals:** -- Enhances OpenSearch's position as a comprehensive search platform -- Provides tools for objective search engine evaluation -- Supports migration and adoption workflows -- Extends plugin capabilities beyond single-cluster limitations - -## Use Cases - -### Required Use Cases -1. **Multi-vendor Search Engine Evaluation** - Execute identical query sets against OpenSearch and competitor search engines (Solr, Elasticsearch) for objective relevance comparison -2. **Migration Validation** - Compare search results between legacy systems and OpenSearch during migration projects -3. **Cross-Cluster Performance Analysis** - Evaluate search performance across multiple OpenSearch clusters with different configurations -4. **A/B Testing Across Environments** - Test new search algorithms against production systems using consistent evaluation criteria - -### Nice-to-Have Use Cases -1. **Hybrid Search Architecture Evaluation** - Unified relevance evaluation across multiple search systems in complex architectures -2. **Vendor Benchmarking** - Periodic evaluation of different search technologies using standardized methodologies -3. **Real-time Performance Monitoring** - Continuous comparison of search quality across systems - -## Requirements - -### Functional Requirements - -1. **Remote Configuration Management** - - Create, update, delete, and retrieve remote search configurations - - Support for HTTP/HTTPS endpoints with authentication - - Configurable query and response templates for different search engines - - Rate limiting and concurrency control per configuration - -2. **Query Execution** - - Execute search queries against remote systems via HTTP/HTTPS - - Template-based query transformation for different search engine formats - - Asynchronous execution with proper timeout handling - - Integration with existing experiment workflows - -3. **Response Processing** - - Transform remote responses to OpenSearch-compatible format - - Template-based response mapping and field extraction - - Error handling for malformed or unexpected responses - -4. **Caching System** - - Intelligent caching of remote search results - - Configurable TTL and cache invalidation - - Query-based cache keys for efficient retrieval - -5. **Error Handling and Monitoring** - - Comprehensive failure tracking and categorization - - Rate limiting and circuit breaker patterns - - Detailed logging and monitoring capabilities - -### Non-Functional Requirements - -1. **Performance** - - Support for configurable rate limiting (requests per second) - - Concurrent request limiting to prevent resource exhaustion - - Efficient caching to minimize remote system load - - Asynchronous execution to prevent blocking - -2. **Security** - - Encrypted storage of authentication credentials - - Support for basic authentication - - TLS/SSL support for secure connections - - Integration with OpenSearch security framework - -3. **Reliability** - - Graceful handling of network failures and timeouts - - Retry mechanisms with exponential backoff - - Circuit breaker pattern for failing remote systems - - Comprehensive error logging and recovery - -4. **Scalability** - - Support for multiple concurrent remote configurations - - Efficient connection pooling and reuse - - Configurable resource limits per configuration - -## Out of Scope - -1. **Advanced Authentication Methods** - OAuth, JWT, and certificate-based authentication (future enhancement) -2. **Non-HTTP Protocols** - gRPC, WebSocket, and custom protocol support (future enhancement) -3. **External Cache Systems** - Redis, Memcached integration (future enhancement) -4. **Response Streaming** - Large result set streaming support (future enhancement) -5. **Advanced Template Processing** - Complex transformation logic beyond basic substitution (future enhancement) - -## Current State - -The OpenSearch Search Relevance plugin currently provides: -- Experiment management and execution within single OpenSearch clusters -- Query set management and execution -- Judgment-based evaluation with human relevance assessments -- Automated metrics calculation (NDCG, MAP, MRR, etc.) -- Local search configuration management - -**Components that will be impacted:** -- `ExperimentTaskManager` - Enhanced to support remote search execution -- `SearchRelevanceIndices` - New indices for remote configurations, cache, and failures -- Plugin registration - New REST endpoints and transport actions -- Experiment workflow - Integration of remote search results with existing evaluation - -## Solution Overview - -The Remote Query feature extends the Search Relevance plugin with a remote search execution layer that abstracts differences between search engines while maintaining consistent evaluation methodologies. - -**Key Technologies and Dependencies:** -- Java 11 HttpClient for HTTP communication -- OpenSearch XContent framework for JSON processing -- Template-based query and response transformation -- OpenSearch security framework for credential encryption - -**Integration with OpenSearch Core:** -- Utilizes OpenSearch's index management for data storage -- Leverages OpenSearch security for authentication and authorization -- Integrates with OpenSearch's async framework for non-blocking operations - -**Interaction with Existing Search-Relevance Features:** -- Seamless integration with existing experiment workflows -- Reuses judgment sets and evaluation metrics -- Extends search configuration concept to include remote systems -- Compatible with existing query set and result analysis features - -## Solution Design - -### Proposed Solution - -The solution introduces five core components that work together to provide remote search capabilities: - -#### Architecture Overview - -``` -┌──────────────────────────────────────────────────────────────┐ -│ Search Relevance Plugin │ -├──────────────────────────────────────────────────────────────┤ -│ Experiment Management │ Query Sets │ Judgment Management │ -├──────────────────────────────────────────────────────────────┤ -│ Search Execution Layer │ -├──────────────────────────────────────────────────────────────┤ -│ Local Search Executor │ Remote Query Executor │ -│ │ ┌──────────────────────────────────┤ -│ │ │ Configuration Management │ -│ │ │ Connection Pooling │ -│ │ │ Rate Limiting │ -│ │ │ Query Template Processing │ -│ │ │ Response Mapping │ -│ │ │ Caching Layer │ -│ │ │ Error Handling & Retry │ -│ │ │ Failure Tracking │ -└────────────────────────┴──┴──────────────────────────────────┘ -``` - -#### Core Components - -**1. RemoteSearchConfiguration** -- Manages connection details and search engine-specific settings -- Stores encrypted credentials and endpoint information -- Configures rate limiting and caching behavior -- Supports flexible query and response templates - -**2. RemoteSearchExecutor** -- Handles HTTP communication with remote search engines -- Implements rate limiting and concurrent request management -- Provides asynchronous execution with timeout handling -- Integrates caching and error handling - -**3. RemoteResponseMapper** -- Transforms remote search responses to standardized formats -- Supports template-based field mapping and extraction -- Handles various response formats and structures - -**4. RemoteSearchCache** -- Provides intelligent caching for remote search results -- Implements TTL-based expiration and manual invalidation -- Optimizes performance and reduces remote system load - -**5. RemoteSearchFailure** -- Tracks and categorizes remote search failures -- Supports debugging and monitoring requirements -- Enables circuit breaker and retry logic - -#### Data Models - -**RemoteSearchConfiguration Schema:** -```json -{ - "id": "unique_config_id", - "name": "Human readable name", - "description": "Configuration description", - "connectionUrl": "https://remote.search.engine:9200", - "username": "auth_username", - "password": "encrypted_password", - "queryTemplate": "{ \"query\": { \"match\": { \"content\": \"${queryText}\" } } }", - "responseTemplate": "{ \"hits\": \"${response.hits.hits}\" }", - "maxRequestsPerSecond": 10, - "maxConcurrentRequests": 5, - "cacheDurationMinutes": 60, - "refreshCache": false, - "metadata": { "searchEngine": "elasticsearch", "version": "7.x" }, - "timestamp": "2024-01-01T00:00:00Z" -} -``` - -**Integration with Experiments:** -```json -{ - "experimentId": "cross_platform_comparison", - "searchConfigurations": [ - { - "id": "local_opensearch", - "type": "local", - "name": "Local OpenSearch" - }, - { - "id": "remote_elasticsearch", - "type": "remote", - "name": "Production Elasticsearch", - "remoteConfigId": "prod_es_config" - } - ] -} -``` - -#### Query Execution Flow - -1. **Experiment Initialization**: Load experiment configuration and validate remote connections -2. **Query Processing**: For each query in the query set: - - Execute against local OpenSearch (existing flow) - - Execute against configured remote systems (new flow) - - Apply rate limiting and caching as configured -3. **Response Normalization**: Transform all responses to consistent format using templates -4. **Evaluation**: Apply judgment sets and calculate metrics across all systems -5. **Result Aggregation**: Generate comparative analysis and reports - -### Alternative Solutions Considered - -**Alternative 1: External Integration Service** -- **Approach**: Separate microservice handling remote search integration -- **Pros**: Technology flexibility, independent scaling, reduced plugin complexity -- **Cons**: Additional infrastructure, network latency, operational overhead -- **Decision**: Rejected due to operational complexity and performance concerns - -**Alternative 2: Export/Import Workflow** -- **Approach**: Manual export of queries, external execution, result import -- **Pros**: Simple implementation, no network dependencies during evaluation -- **Cons**: Manual process, no real-time capabilities, poor user experience -- **Decision**: Rejected due to poor automation and user experience - -**Alternative 3: Plugin-per-Search-Engine** -- **Approach**: Separate plugins for each supported search engine -- **Pros**: Optimized integration, native feature support -- **Cons**: Maintenance overhead, inconsistent experience, complex management -- **Decision**: Rejected due to maintenance burden and scalability concerns - -### Key Design Decisions - -**1. HTTP-Only Protocol Support** -- **Rationale**: HTTP/HTTPS covers majority of search engine APIs and reduces complexity -- **Trade-off**: Limited protocol support vs. implementation simplicity -- **Future**: Can be extended to support additional protocols - -**2. Template-Based Transformation** -- **Rationale**: Flexible approach supporting various search engine formats -- **Trade-off**: Limited transformation complexity vs. broad compatibility -- **Impact**: Enables support for diverse search engines with minimal code changes - -**3. Integrated Caching** -- **Rationale**: Reduces load on remote systems and improves performance -- **Trade-off**: Storage requirements vs. performance benefits -- **Impact**: Significant performance improvement for repeated queries - -**4. Basic Authentication Only** -- **Rationale**: Covers common authentication scenarios while maintaining security -- **Trade-off**: Limited auth methods vs. implementation complexity -- **Future**: OAuth and certificate-based auth can be added - -## Metrics and Observability - -### New Metrics to be Introduced - -**Remote Search Execution Metrics:** -- `remote_search_requests_total` - Total number of remote search requests -- `remote_search_requests_duration` - Request duration histogram -- `remote_search_failures_total` - Total number of failed requests by error type -- `remote_search_rate_limit_hits_total` - Number of rate limit violations -- `remote_search_cache_hits_total` - Cache hit/miss statistics - -**Configuration Metrics:** -- `remote_search_configurations_total` - Number of active remote configurations -- `remote_search_concurrent_requests` - Current concurrent requests per configuration - -**System Health Metrics:** -- `remote_search_circuit_breaker_state` - Circuit breaker status per configuration -- `remote_search_connection_pool_usage` - HTTP connection pool utilization - -### Search Relevance Specific Metrics - -**Experiment Metrics:** -- Integration with existing experiment result metrics -- Comparative analysis metrics across local and remote systems -- Cross-platform evaluation result tracking - -**Performance Comparison Metrics:** -- Response time comparison between local and remote systems -- Result quality metrics (NDCG, MAP, MRR) across platforms -- Cache effectiveness metrics for remote queries - -### Health and Performance Monitoring - -**Health Checks:** -- Periodic connectivity validation for remote configurations -- Authentication status monitoring -- Circuit breaker state tracking - -**Performance Monitoring:** -- Request latency percentiles (p50, p95, p99) -- Throughput metrics (requests per second) -- Error rate monitoring by configuration and error type - -**Alerting Integration:** -- Integration with OpenSearch alerting for failure notifications -- Threshold-based alerts for performance degradation -- Circuit breaker state change notifications - -## Technical Specifications - -### Data Schemas and Index Mappings - -**Remote Search Configuration Index:** -```json -{ - "mappings": { - "properties": { - "id": { "type": "keyword" }, - "name": { "type": "text", "fields": { "keyword": { "type": "keyword" } } }, - "description": { "type": "text" }, - "connectionUrl": { "type": "keyword" }, - "username": { "type": "keyword" }, - "password": { "type": "keyword", "index": false }, - "queryTemplate": { "type": "text", "index": false }, - "responseTemplate": { "type": "text", "index": false }, - "maxRequestsPerSecond": { "type": "integer" }, - "maxConcurrentRequests": { "type": "integer" }, - "cacheDurationMinutes": { "type": "long" }, - "refreshCache": { "type": "boolean" }, - "metadata": { "type": "object", "enabled": false }, - "timestamp": { "type": "date" } - } - } -} -``` - -**Remote Search Cache Index:** -```json -{ - "mappings": { - "properties": { - "id": { "type": "keyword" }, - "configurationId": { "type": "keyword" }, - "queryHash": { "type": "keyword" }, - "queryText": { "type": "text" }, - "response": { "type": "text", "index": false }, - "mappedResponse": { "type": "text", "index": false }, - "timestamp": { "type": "date" }, - "expirationTimestamp": { "type": "date" } - } - } -} -``` - -**Remote Search Failure Index:** -```json -{ - "mappings": { - "properties": { - "id": { "type": "keyword" }, - "configurationId": { "type": "keyword" }, - "experimentId": { "type": "keyword" }, - "query": { "type": "text" }, - "queryText": { "type": "text" }, - "errorType": { "type": "keyword" }, - "errorMessage": { "type": "text" }, - "stackTrace": { "type": "text", "index": false }, - "httpStatusCode": { "type": "integer" }, - "timestamp": { "type": "date" } - } - } -} -``` - -### API Specifications - -**Remote Configuration Management API:** - -```http -POST /_plugins/_search_relevance/remote_search_configurations -{ - "name": "Production Elasticsearch", - "description": "Main production ES cluster", - "connectionUrl": "https://prod-es.example.com:9200", - "username": "search_user", - "password": "secure_password", - "queryTemplate": "{ \"query\": { \"multi_match\": { \"query\": \"${queryText}\", \"fields\": [\"title^2\", \"content\"] } } }", - "responseTemplate": "{ \"hits\": \"${response.hits.hits}\", \"total\": \"${response.hits.total.value}\" }", - "maxRequestsPerSecond": 10, - "maxConcurrentRequests": 5, - "cacheDurationMinutes": 60 -} -``` - -```http -GET /_plugins/_search_relevance/remote_search_configurations/{configId} -PUT /_plugins/_search_relevance/remote_search_configurations/{configId} -DELETE /_plugins/_search_relevance/remote_search_configurations/{configId} -``` - -### Integration with Search-Relevance Data Models - -**Enhanced Experiment Configuration:** -- Extended to support remote search configurations alongside local configurations -- Maintains backward compatibility with existing experiment definitions -- Supports mixed local/remote experiment scenarios - -**Search Configuration Extension:** -- Existing SearchConfiguration concept extended to include remote configurations -- Type field distinguishes between "local" and "remote" configurations -- Remote configurations reference RemoteSearchConfiguration entities - -### Class and Sequence Diagrams - -**Remote Search Execution Sequence:** -``` -Client -> ExperimentTaskManager: Execute Experiment -ExperimentTaskManager -> RemoteSearchExecutor: Execute Remote Search -RemoteSearchExecutor -> RemoteSearchCacheDao: Check Cache -RemoteSearchCacheDao -> RemoteSearchExecutor: Cache Result -RemoteSearchExecutor -> HttpClient: HTTP Request (if cache miss) -HttpClient -> RemoteSearchExecutor: HTTP Response -RemoteSearchExecutor -> RemoteResponseMapper: Map Response -RemoteResponseMapper -> RemoteSearchExecutor: Mapped Response -RemoteSearchExecutor -> RemoteSearchCacheDao: Store Cache -RemoteSearchExecutor -> ExperimentTaskManager: Search Results -ExperimentTaskManager -> Client: Experiment Results -``` - -## Backward Compatibility - -### Breaking Changes and Migration Strategy - -**No Breaking Changes:** -- All existing APIs remain unchanged -- Existing experiments continue to work without modification -- Current search configurations are fully compatible - -**Additive Changes:** -- New REST endpoints for remote configuration management -- New indices for remote search data storage -- Enhanced experiment configuration schema (backward compatible) - -### Index Mapping Changes - -**New Indices Added:** -- `.opensearch-search-relevance-remote-search-configurations` -- `.opensearch-search-relevance-remote-search-cache` -- `.opensearch-search-relevance-remote-search-failures` - -**Existing Indices:** -- No changes to existing index mappings -- Experiment index may include new optional fields for remote configurations - -### Plugin Upgrade Considerations - -**Upgrade Path:** -1. Install updated plugin version -2. New indices created automatically on first use -3. Existing functionality remains unchanged -4. Remote features available immediately after configuration - -**Rollback Support:** -- Plugin can be downgraded without data loss -- Remote-specific data stored in separate indices -- Existing experiments unaffected by rollback - -## Security Considerations - -### Security Overview - -The Remote Query feature handles sensitive connection information and executes queries against external systems, requiring comprehensive security measures to protect credentials, data, and system integrity. - -**Security Context:** -- Remote search configurations contain authentication credentials -- HTTP requests transmitted to external systems -- Cached responses may contain sensitive search results -- API endpoints require proper authorization - -**Sensitive Data:** -- Remote system authentication credentials (username/password) -- Query content and search results -- Connection URLs and system metadata -- Cached response data - -**Trust Boundaries:** -- OpenSearch cluster (trusted) ↔ Remote search systems (untrusted) -- Plugin components (trusted) ↔ External HTTP endpoints (untrusted) -- User requests (authenticated) ↔ Plugin APIs (trusted) - -### Assets and Resources - -**Protected Assets:** -- Remote search configuration credentials -- Cached search results and query data -- Remote system connection information -- Experiment data and evaluation results - -**System Indices:** -- `.opensearch-search-relevance-remote-search-configurations` - Contains encrypted credentials -- `.opensearch-search-relevance-remote-search-cache` - Contains cached search results -- `.opensearch-search-relevance-remote-search-failures` - Contains error logs and stack traces - -**Access Patterns:** -- Configuration management requires admin-level permissions -- Experiment execution requires search-relevance permissions -- Cache access limited to plugin components -- Failure logs accessible for debugging and monitoring - -### API Security - -**Configuration Management Endpoints:** - -| Endpoint | Method | Mutating | Authorization | Input Validation | -|----------|--------|----------|---------------|------------------| -| `/_plugins/_search_relevance/remote_search_configurations` | POST | Yes | Admin role required | URL validation, credential encryption | -| `/_plugins/_search_relevance/remote_search_configurations/{id}` | GET | No | Read permissions | ID format validation | -| `/_plugins/_search_relevance/remote_search_configurations/{id}` | PUT | Yes | Admin role required | Full input validation, credential re-encryption | -| `/_plugins/_search_relevance/remote_search_configurations/{id}` | DELETE | Yes | Admin role required | ID validation, dependency checking | - -**Rate Limiting:** -- API endpoints subject to OpenSearch rate limiting -- Per-configuration rate limiting for remote requests -- Circuit breaker protection against abuse - -### Threat Analysis (STRIDE) - -**Spoofing Threats:** -- **Threat**: Attacker impersonates legitimate remote search system -- **Mitigation**: TLS certificate validation, connection URL validation -- **Threat**: Unauthorized access to configuration APIs -- **Mitigation**: OpenSearch role-based authentication and authorization - -**Tampering Threats:** -- **Threat**: Man-in-the-middle attacks on remote connections -- **Mitigation**: Mandatory HTTPS for remote connections, certificate pinning option -- **Threat**: Malicious modification of cached responses -- **Mitigation**: Cache integrity checks, encrypted storage - -**Repudiation Threats:** -- **Threat**: Denial of remote search activities -- **Mitigation**: Comprehensive audit logging, request/response tracking -- **Threat**: Unauthorized configuration changes -- **Mitigation**: Change logging, user attribution in audit logs - -**Information Disclosure Threats:** -- **Threat**: Credential exposure in logs or error messages -- **Mitigation**: Credential masking in logs, encrypted storage -- **Threat**: Sensitive query content in cache or logs -- **Mitigation**: Configurable logging levels, encrypted cache storage - -**Denial of Service Threats:** -- **Threat**: Resource exhaustion through excessive remote requests -- **Mitigation**: Rate limiting, concurrent request limits, circuit breakers -- **Threat**: Cache storage exhaustion -- **Mitigation**: TTL-based expiration, storage limits, cache cleanup - -**Elevation of Privilege Threats:** -- **Threat**: Plugin vulnerabilities leading to system compromise -- **Mitigation**: Input validation, secure coding practices, dependency scanning -- **Threat**: Remote system compromise affecting local system -- **Mitigation**: Network isolation, minimal required permissions - -### Attack Vectors - -**Unauthorized Users (No Cluster Access):** -- **Vector**: Direct API access attempts -- **Mitigation**: OpenSearch authentication required for all endpoints -- **Vector**: Network-level attacks on remote connections -- **Mitigation**: VPC/network security, firewall rules - -**Authorized Users with Limited Permissions:** -- **Vector**: Attempting to access configuration management APIs -- **Mitigation**: Role-based access control, admin-only configuration access -- **Vector**: Attempting to view sensitive configuration data -- **Mitigation**: Credential masking, field-level security - -**Read-Only Users Attempting Modifications:** -- **Vector**: POST/PUT/DELETE requests to configuration APIs -- **Mitigation**: HTTP method validation, permission checking -- **Vector**: Cache manipulation attempts -- **Mitigation**: Internal API access only, no external cache modification - -**Malicious Input Attacks:** -- **Vector**: SQL injection in query templates -- **Mitigation**: Template validation, parameterized queries -- **Vector**: Script injection in response templates -- **Mitigation**: Safe template processing, input sanitization -- **Vector**: XXE attacks in XML responses -- **Mitigation**: Secure XML parsing, external entity disabling - -### Security Mitigations - -**Credential Protection:** -- All passwords encrypted at rest using OpenSearch security framework -- Credentials never logged or exposed in error messages -- Secure credential rotation support -- Memory protection for credential handling - -**Input Validation and Sanitization:** -- URL format validation for connection endpoints -- Template syntax validation for query/response templates -- JSON schema validation for all API inputs -- Rate limit parameter bounds checking - -**Authentication and Authorization:** -- Integration with OpenSearch security plugin -- Role-based access control for all endpoints -- Admin-level permissions required for configuration management -- Audit logging for all security-relevant operations - -**Encryption Requirements:** -- Mandatory HTTPS for all remote connections -- TLS 1.2+ required for remote communication -- Encrypted storage for cached responses containing sensitive data -- Optional certificate pinning for high-security environments - -**Audit Logging and Monitoring:** -- Comprehensive logging of all remote search activities -- Security event logging (authentication failures, permission denials) -- Performance and error monitoring with alerting -- Configurable log retention and rotation - -### Security Testing Requirements - -**Security-Specific Test Cases:** -- Authentication bypass attempts -- Authorization boundary testing -- Credential encryption/decryption validation -- TLS connection security verification - -**Input Validation Testing:** -- Malformed URL handling -- Invalid template syntax processing -- Boundary value testing for rate limits -- SQL/script injection attempt handling - -**Authorization Boundary Testing:** -- Role-based access control validation -- Cross-tenant access prevention -- API endpoint permission verification -- Resource access control testing - -**Performance Testing for DoS Prevention:** -- Rate limiting effectiveness testing -- Resource exhaustion protection validation -- Circuit breaker functionality verification -- Concurrent request limit enforcement - -## Testing Strategy - -### Unit and Integration Testing - -**Unit Testing Coverage:** -- RemoteSearchConfiguration model validation and serialization -- RemoteSearchExecutor HTTP client functionality and error handling -- RemoteResponseMapper template processing and transformation -- Rate limiting and caching logic validation -- Security credential handling and encryption - -**Integration Testing:** -- End-to-end remote search execution workflows -- Cache integration with DAO layer -- Error handling and failure tracking -- Authentication and authorization integration -- Experiment workflow integration with remote configurations - -**Mock Testing:** -- HTTP client mocking for various response scenarios -- Remote system failure simulation -- Network timeout and connectivity testing -- Authentication failure scenarios - -### Performance Testing - -**Load Testing:** -- Concurrent remote search execution under various loads -- Rate limiting effectiveness under high request volumes -- Cache performance with large result sets -- Memory usage and garbage collection impact - -**Stress Testing:** -- System behavior under remote system failures -- Resource exhaustion scenarios -- Network partition and recovery testing -- Circuit breaker activation and recovery - -### Compatibility Testing - -**OpenSearch Version Compatibility:** -- Testing across supported OpenSearch versions (2.x+) -- Plugin upgrade and downgrade scenarios -- Index mapping compatibility validation - -**Search Engine Compatibility:** -- Elasticsearch compatibility testing -- Solr integration validation -- Custom search engine API testing -- Response format variation handling - -**Network Environment Testing:** -- Various network configurations and firewalls -- Proxy and load balancer compatibility -- TLS/SSL configuration variations -- IPv4/IPv6 dual-stack environments - -## Performance and Benchmarking - -### Key Performance Indicators - -**Response Time Metrics:** -- Remote search request latency (p50, p95, p99) -- Cache hit/miss response times -- End-to-end experiment execution time -- Template processing overhead - -**Throughput Metrics:** -- Requests per second per remote configuration -- Concurrent request handling capacity -- Cache storage and retrieval throughput -- Overall experiment processing rate - -**Resource Utilization:** -- Memory usage for caching and connection pooling -- CPU utilization for template processing -- Network bandwidth consumption -- Storage requirements for cache and failure data - -### Resource Utilization Targets - -**Memory Usage:** -- Maximum 100MB additional heap usage for remote search components -- Cache size limits configurable per deployment -- Connection pool memory overhead < 10MB per configuration - -**CPU Utilization:** -- Template processing overhead < 5% of total CPU -- HTTP client processing < 10% additional CPU load -- Minimal impact on existing search relevance operations - -**Network Bandwidth:** -- Configurable rate limiting to control bandwidth usage -- Efficient connection reuse to minimize overhead -- Compression support for large responses - -**Storage Requirements:** -- Cache storage configurable with automatic cleanup -- Failure tracking with configurable retention -- Index storage optimization for remote configuration data - -### Benchmark Methodology - -**Test Scenarios:** -1. **Single Remote Configuration**: Baseline performance with one remote system -2. **Multiple Remote Configurations**: Scalability testing with 5-10 remote systems -3. **High Query Volume**: 1000+ queries across multiple remote systems -4. **Cache Effectiveness**: Performance comparison with/without caching -5. **Failure Recovery**: Performance during and after remote system failures - -**Test Environment:** -- OpenSearch cluster with 3 nodes (4 CPU, 16GB RAM each) -- Simulated remote search engines with controlled latency -- Network simulation for various connectivity scenarios -- Load generation tools for concurrent request testing - -**Performance Baselines:** -- Existing search relevance experiment execution time -- Local search performance benchmarks -- Memory and CPU usage without remote search features -- Network utilization baselines - -**Success Criteria:** -- < 20% increase in experiment execution time with remote searches -- Cache hit ratio > 80% for repeated queries -- Rate limiting effectiveness > 95% accuracy -- Zero memory leaks during extended testing -- Graceful degradation during remote system failures - ---- - -## Additional Resources - -- [OpenSearch RFC Process](https://github.com/opensearch-project/OpenSearch/blob/main/DEVELOPER_GUIDE.md#submitting-changes) -- [Plugin Development Guide](https://opensearch.org/docs/latest/developers/plugins/) -- [Contributing Guidelines](../CONTRIBUTING.md) -- [Remote Search Querying RFC](../RFC-Remote-Search-Querying.md) -- [Search Relevance Plugin Documentation](https://opensearch.org/docs/latest/search-plugins/search-relevance/) From 7edc33e74c1a2a1f83ad194c3817b8c47946a779 Mon Sep 17 00:00:00 2001 From: Scott Stults Date: Fri, 10 Oct 2025 19:19:40 -0400 Subject: [PATCH 7/9] Simplify the parsing of query DSL extensions through XContent parsing Signed-off-by: Scott Stults --- .../model/builder/SearchRequestBuilder.java | 171 +++++------------- .../plugin/SearchRelevancePlugin.java | 3 + .../integration/LtrSltrRescoreIT.java | 92 ++++++++++ .../metrics/MetricsHelperTests.java | 10 + .../builder/SearchRequestBuilderTests.java | 86 +++++---- 5 files changed, 204 insertions(+), 158 deletions(-) create mode 100644 src/test/java/org/opensearch/searchrelevance/integration/LtrSltrRescoreIT.java diff --git a/src/main/java/org/opensearch/searchrelevance/model/builder/SearchRequestBuilder.java b/src/main/java/org/opensearch/searchrelevance/model/builder/SearchRequestBuilder.java index 1099e4a4..fc5458c1 100644 --- a/src/main/java/org/opensearch/searchrelevance/model/builder/SearchRequestBuilder.java +++ b/src/main/java/org/opensearch/searchrelevance/model/builder/SearchRequestBuilder.java @@ -11,21 +11,16 @@ import static org.opensearch.searchrelevance.experiment.QuerySourceUtil.validateHybridQuery; import java.io.IOException; -import java.nio.charset.StandardCharsets; -import java.util.Base64; -import java.util.Collections; import java.util.Map; import java.util.Objects; import org.opensearch.action.search.SearchRequest; -import org.opensearch.common.settings.Settings; import org.opensearch.common.xcontent.json.JsonXContent; import org.opensearch.core.xcontent.DeprecationHandler; import org.opensearch.core.xcontent.NamedXContentRegistry; import org.opensearch.core.xcontent.XContentBuilder; import org.opensearch.core.xcontent.XContentParser; import org.opensearch.index.query.QueryBuilders; -import org.opensearch.search.SearchModule; import org.opensearch.search.builder.SearchSourceBuilder; import lombok.extern.log4j.Log4j2; @@ -33,17 +28,34 @@ @Log4j2 /** * Common Search Request Builder for Search Configuration with placeholder with QueryText filled. + * + * This implementation parses the entire source using the real NamedXContentRegistry provided + * by the node/plugin wiring, so that any query type registered by any plugin can be parsed + * without special-casing (no wrapper hacks for query/rescore_query etc). */ public class SearchRequestBuilder { - private static final NamedXContentRegistry NAMED_CONTENT_REGISTRY; - private static final SearchModule SEARCH_MODULE; - private static final String QUERY_FIELD_NAME = "query"; + private static volatile NamedXContentRegistry NAMED_XCONTENT_REGISTRY; private static final String SIZE_FIELD_NAME = "size"; + private static final String QUERY_FIELD_NAME = "query"; - static { - SEARCH_MODULE = new SearchModule(Settings.EMPTY, Collections.emptyList()); - NAMED_CONTENT_REGISTRY = new NamedXContentRegistry(SEARCH_MODULE.getNamedXContents()); + /** + * Initialize the builder with the cluster's NamedXContentRegistry so that + * SearchSourceBuilder can parse all plugin-registered query types. + */ + public static void initialize(NamedXContentRegistry registry) { + NAMED_XCONTENT_REGISTRY = registry; + log.debug("SearchRequestBuilder initialized with NamedXContentRegistry"); + } + + private static XContentParser newParserWithRegistry(String json) throws IOException { + if (NAMED_XCONTENT_REGISTRY == null) { + throw new IllegalStateException( + "SearchRequestBuilder is not initialized with NamedXContentRegistry. " + + "Ensure SearchRelevancePlugin.createComponents calls SearchRequestBuilder.initialize(xContentRegistry)." + ); + } + return JsonXContent.jsonXContent.createParser(NAMED_XCONTENT_REGISTRY, DeprecationHandler.IGNORE_DEPRECATIONS, json); } /** @@ -62,67 +74,23 @@ public static SearchRequest buildSearchRequest(String index, String query, Strin // Replace placeholder with actual query text String processedQuery = query.replace(WILDCARD_QUERY_TEXT, queryText); - // Parse the full query into a map - XContentParser parser = JsonXContent.jsonXContent.createParser( + // Parse to map (using EMPTY registry) for validation/log-only purposes such as size check + XContentParser tempParser = JsonXContent.jsonXContent.createParser( NamedXContentRegistry.EMPTY, DeprecationHandler.IGNORE_DEPRECATIONS, processedQuery ); - Map fullQueryMap = parser.map(); - // Preprocess rescore_query to wrap unknown queries (e.g., LTR/sltr) using wrapper query to avoid NamedXContent parsing issues - try { - Object rescore = fullQueryMap.get("rescore"); - if (rescore instanceof Map) { - Map rescoreEntry = (Map) rescore; - Object queryObjInner = rescoreEntry.get("query"); - if (queryObjInner instanceof Map) { - Map queryMap = (Map) queryObjInner; - Object rescoreQuery = queryMap.get("rescore_query"); - if ((rescoreQuery instanceof Map) && !((Map) rescoreQuery).containsKey("wrapper")) { - XContentBuilder tmpBuilder = JsonXContent.contentBuilder(); - tmpBuilder.value(rescoreQuery); - String raw = tmpBuilder.toString(); - String base64 = Base64.getEncoder().encodeToString(raw.getBytes(StandardCharsets.UTF_8)); - Map wrapper = Map.of("wrapper", Map.of("query", base64)); - queryMap.put("rescore_query", wrapper); - } - } - } else if (rescore instanceof java.util.List) { - for (Object entry : (java.util.List) rescore) { - if (entry instanceof Map) { - Map rescoreEntry = (Map) entry; - Object queryObjInner = rescoreEntry.get("query"); - if (queryObjInner instanceof Map) { - Map queryMap = (Map) queryObjInner; - Object rescoreQuery = queryMap.get("rescore_query"); - if ((rescoreQuery instanceof Map) && !((Map) rescoreQuery).containsKey("wrapper")) { - XContentBuilder tmpBuilder = JsonXContent.contentBuilder(); - tmpBuilder.value(rescoreQuery); - String raw = tmpBuilder.toString(); - String base64 = Base64.getEncoder().encodeToString(raw.getBytes(StandardCharsets.UTF_8)); - Map wrapper = Map.of("wrapper", Map.of("query", base64)); - queryMap.put("rescore_query", wrapper); - } - } - } - } - } - } catch (Exception e) { - log.debug("Skipping rescore_query wrapper preprocessing: {}", e.getMessage()); - } + Map fullQueryMap = tempParser.map(); - // This implementation handles the 'query' field separately from other fields because: - // 1. Custom query types (like hybrid, neural) are not registered in the default QueryBuilders - // 2. Using WrapperQuery allows passing through any query structure without parsing - // 3. All other fields (aggregations, source filtering, etc.) can be parsed normally by SearchSourceBuilder + // Handle 'query' separately using WrapperQuery to support custom/unregistered query types Object queryObject = fullQueryMap.remove(QUERY_FIELD_NAME); - // Parse everything except query using SearchSourceBuilder.fromXContent + // Parse everything except query using SearchSourceBuilder.fromXContent with real registry XContentBuilder builder = JsonXContent.contentBuilder(); builder.map(fullQueryMap); - parser = JsonXContent.jsonXContent.createParser( - NAMED_CONTENT_REGISTRY, + XContentParser parser = JsonXContent.jsonXContent.createParser( + NAMED_XCONTENT_REGISTRY, DeprecationHandler.IGNORE_DEPRECATIONS, builder.toString() ); @@ -148,7 +116,7 @@ public static SearchRequest buildSearchRequest(String index, String query, Strin ); } } - // Set size + // Set size override from configuration input sourceBuilder.size(size); // Set search pipeline if provided @@ -177,75 +145,40 @@ public static SearchRequest buildRequestForHybridSearch( // Replace placeholder with actual query text String processedQuery = query.replace(WILDCARD_QUERY_TEXT, queryText); - // Parse the full query into a map - XContentParser parser = JsonXContent.jsonXContent.createParser( + // Parse to map (using EMPTY registry) for validation/log-only purposes (hybrid validation, size check) + XContentParser tempParser = JsonXContent.jsonXContent.createParser( NamedXContentRegistry.EMPTY, DeprecationHandler.IGNORE_DEPRECATIONS, processedQuery ); - Map fullQueryMap = parser.map(); - // Preprocess rescore_query to wrap unknown queries (e.g., LTR/sltr) using wrapper query to avoid NamedXContent parsing issues - try { - Object rescore = fullQueryMap.get("rescore"); - if (rescore instanceof Map) { - Map rescoreEntry = (Map) rescore; - Object queryObjInner = rescoreEntry.get("query"); - if (queryObjInner instanceof Map) { - Map queryMap = (Map) queryObjInner; - Object rescoreQuery = queryMap.get("rescore_query"); - if ((rescoreQuery instanceof Map) && !((Map) rescoreQuery).containsKey("wrapper")) { - XContentBuilder tmpBuilder = JsonXContent.contentBuilder(); - tmpBuilder.value(rescoreQuery); - String raw = tmpBuilder.toString(); - String base64 = Base64.getEncoder().encodeToString(raw.getBytes(StandardCharsets.UTF_8)); - Map wrapper = Map.of("wrapper", Map.of("query", base64)); - queryMap.put("rescore_query", wrapper); - } - } - } else if (rescore instanceof java.util.List) { - for (Object entry : (java.util.List) rescore) { - if (entry instanceof Map) { - Map rescoreEntry = (Map) entry; - Object queryObjInner = rescoreEntry.get("query"); - if (queryObjInner instanceof Map) { - Map queryMap = (Map) queryObjInner; - Object rescoreQuery = queryMap.get("rescore_query"); - if ((rescoreQuery instanceof Map) && !((Map) rescoreQuery).containsKey("wrapper")) { - XContentBuilder tmpBuilder = JsonXContent.contentBuilder(); - tmpBuilder.value(rescoreQuery); - String raw = tmpBuilder.toString(); - String base64 = Base64.getEncoder().encodeToString(raw.getBytes(StandardCharsets.UTF_8)); - Map wrapper = Map.of("wrapper", Map.of("query", base64)); - queryMap.put("rescore_query", wrapper); - } - } - } - } - } - } catch (Exception e) { - log.debug("Skipping rescore_query wrapper preprocessing: {}", e.getMessage()); - } + Map fullQueryMap = tempParser.map(); + // Validate hybrid query validateHybridQuery(fullQueryMap); - // This implementation handles the 'query' field separately from other fields because: - // 1. Custom query types (like hybrid, neural) are not registered in the default QueryBuilders - // 2. Using WrapperQuery allows passing through any query structure without parsing - // 3. All other fields (aggregations, source filtering, etc.) can be parsed normally by SearchSourceBuilder + // Handle 'query' separately using WrapperQuery to support custom/unregistered query types Object queryObject = fullQueryMap.remove(QUERY_FIELD_NAME); - // Parse everything except query using SearchSourceBuilder.fromXContent + // Parse everything except query using SearchSourceBuilder.fromXContent with real registry XContentBuilder builder = JsonXContent.contentBuilder(); builder.map(fullQueryMap); - parser = JsonXContent.jsonXContent.createParser( - NAMED_CONTENT_REGISTRY, + XContentParser parser = JsonXContent.jsonXContent.createParser( + NAMED_XCONTENT_REGISTRY, DeprecationHandler.IGNORE_DEPRECATIONS, builder.toString() ); SearchSourceBuilder sourceBuilder = SearchSourceBuilder.fromXContent(parser); + // Handle query separately using WrapperQuery + if (queryObject != null) { + builder = JsonXContent.contentBuilder(); + builder.value(queryObject); + String queryBody = builder.toString(); + sourceBuilder.query(QueryBuilders.wrapperQuery(queryBody)); + } + // validate that query does not have internal temporary pipeline definition if (Objects.nonNull(sourceBuilder.searchPipelineSource()) && !sourceBuilder.searchPipelineSource().isEmpty()) { log.error("query in search configuration does have temporary search pipeline in its source"); @@ -258,14 +191,6 @@ public static SearchRequest buildRequestForHybridSearch( log.debug("no temporary search pipeline"); } - // Handle query separately using WrapperQuery - if (queryObject != null) { - builder = JsonXContent.contentBuilder(); - builder.value(queryObject); - String queryBody = builder.toString(); - sourceBuilder.query(QueryBuilders.wrapperQuery(queryBody)); - } - // Precheck if query contains a different size value if (fullQueryMap.containsKey(SIZE_FIELD_NAME)) { int querySize = ((Number) fullQueryMap.get(SIZE_FIELD_NAME)).intValue(); @@ -277,7 +202,7 @@ public static SearchRequest buildRequestForHybridSearch( ); } } - // Set size + // Set size override from configuration input sourceBuilder.size(size); searchRequest.source(sourceBuilder); diff --git a/src/main/java/org/opensearch/searchrelevance/plugin/SearchRelevancePlugin.java b/src/main/java/org/opensearch/searchrelevance/plugin/SearchRelevancePlugin.java index 7d884941..eb33d747 100644 --- a/src/main/java/org/opensearch/searchrelevance/plugin/SearchRelevancePlugin.java +++ b/src/main/java/org/opensearch/searchrelevance/plugin/SearchRelevancePlugin.java @@ -54,6 +54,7 @@ import org.opensearch.searchrelevance.indices.SearchRelevanceIndicesManager; import org.opensearch.searchrelevance.metrics.MetricsHelper; import org.opensearch.searchrelevance.ml.MLAccessor; +import org.opensearch.searchrelevance.model.builder.SearchRequestBuilder; import org.opensearch.searchrelevance.rest.RestCreateQuerySetAction; import org.opensearch.searchrelevance.rest.RestDeleteExperimentAction; import org.opensearch.searchrelevance.rest.RestDeleteJudgmentAction; @@ -172,6 +173,8 @@ public Collection createComponents( this.clusterUtil = new ClusterUtil(clusterService); this.infoStatsManager = new InfoStatsManager(settingsAccessor); EventStatsManager.instance().initialize(settingsAccessor); + // Initialize SearchRequestBuilder with the real NamedXContentRegistry so it can parse all plugin-registered queries + SearchRequestBuilder.initialize(xContentRegistry); return List.of( searchRelevanceIndicesManager, diff --git a/src/test/java/org/opensearch/searchrelevance/integration/LtrSltrRescoreIT.java b/src/test/java/org/opensearch/searchrelevance/integration/LtrSltrRescoreIT.java new file mode 100644 index 00000000..2dd9dfcc --- /dev/null +++ b/src/test/java/org/opensearch/searchrelevance/integration/LtrSltrRescoreIT.java @@ -0,0 +1,92 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ +package org.opensearch.searchrelevance.integration; + +import org.apache.hc.core5.http.io.entity.EntityUtils; +import org.junit.Test; +import org.opensearch.client.Request; +import org.opensearch.client.Response; +import org.opensearch.client.ResponseException; +import org.opensearch.searchrelevance.BaseSearchRelevanceIT; + +/** + * Integration test that verifies SearchSource parsing of rescore with the `sltr` clause. + * This ensures that when the LTR module/plugin is present, the NamedXContentRegistry + * includes the LTR parsers and the request does not fail with an "unknown [sltr]" parsing error. + *

+ * Note: + * - This test does NOT require a fully trained LTR model. It is valid for the request to fail + * with model-not-found or runtime execution errors. The key validation is that parsing + * recognizes the "sltr" rescorer rather than failing with an unknown query/rescorer error. + */ +public class LtrSltrRescoreIT extends BaseSearchRelevanceIT { + + @Test + public void testRescoreParsingWithSltr() throws Exception { + final String index = "ltr-sltr-it"; + + final String indexConfig = "{" + + " \"settings\": {" + + " \"number_of_shards\": 1," + + " \"number_of_replicas\": 0" + + " }," + + " \"mappings\": {" + + " \"properties\": {" + + " \"title\": {\"type\": \"text\"}," + + " \"body\": {\"type\": \"text\"}" + + " }" + + " }" + + "}"; + + createIndexWithConfiguration(index, indexConfig); + + final String bulk = "{ \"index\": {\"_index\":\"" + + index + + "\", \"_id\":\"1\"} }\n" + + "{ \"title\":\"alpha\", \"body\":\"foo\" }\n" + + "{ \"index\": {\"_index\":\"" + + index + + "\", \"_id\":\"2\"} }\n" + + "{ \"title\":\"beta\", \"body\":\"bar\" }\n"; + + bulkIngest(index, bulk); + + // Build a search request that includes rescore with sltr. We do not require the model to exist; + // we only validate that the parser recognizes "sltr" and does not throw "unknown [sltr]" errors. + final String searchBody = "{" + + " \"query\": {\"match_all\": {}}," + + " \"rescore\": [" + + " {" + + " \"window_size\": 10," + + " \"rescore_query\": {" + + " \"sltr\": {" + + " \"params\": {\"keywords\": \"foo\"}," + + " \"model\": \"my_test_model\"" + + " }" + + " }" + + " }" + + " ]" + + "}"; + + final Request search = new Request("GET", "/" + index + "/_search"); + search.setJsonEntity(searchBody); + + try { + final Response ok = client().performRequest(search); + assertEquals(200, ok.getStatusLine().getStatusCode()); + } catch (ResponseException re) { + final Response r = re.getResponse(); + final int code = r.getStatusLine().getStatusCode(); + // Accept any response; the important part is that the error is NOT "unknown [sltr]". + assertTrue("Expected an HTTP status (>=200). Got: " + code, code >= 200 && code < 600); + final String msg = EntityUtils.toString(r.getEntity()); + final boolean unknownSltr = msg != null && msg.toLowerCase().contains("unknown") && msg.toLowerCase().contains("sltr"); + assertFalse("Cluster did not recognize 'sltr' rescore; LTR module may not be loaded. Message: " + msg, unknownSltr); + } + } +} diff --git a/src/test/java/org/opensearch/searchrelevance/metrics/MetricsHelperTests.java b/src/test/java/org/opensearch/searchrelevance/metrics/MetricsHelperTests.java index ca1e499a..17b8deb8 100644 --- a/src/test/java/org/opensearch/searchrelevance/metrics/MetricsHelperTests.java +++ b/src/test/java/org/opensearch/searchrelevance/metrics/MetricsHelperTests.java @@ -25,16 +25,20 @@ import org.opensearch.action.search.SearchRequest; import org.opensearch.action.search.SearchResponse; import org.opensearch.cluster.service.ClusterService; +import org.opensearch.common.settings.Settings; import org.opensearch.common.xcontent.XContentFactory; import org.opensearch.core.action.ActionListener; import org.opensearch.core.common.bytes.BytesArray; import org.opensearch.core.common.bytes.BytesReference; +import org.opensearch.core.xcontent.NamedXContentRegistry; import org.opensearch.search.SearchHit; import org.opensearch.search.SearchHits; +import org.opensearch.search.SearchModule; import org.opensearch.searchrelevance.dao.EvaluationResultDao; import org.opensearch.searchrelevance.dao.ExperimentVariantDao; import org.opensearch.searchrelevance.dao.JudgmentDao; import org.opensearch.searchrelevance.model.SearchConfigurationDetails; +import org.opensearch.searchrelevance.model.builder.SearchRequestBuilder; import org.opensearch.test.OpenSearchTestCase; import org.opensearch.transport.client.Client; @@ -56,6 +60,12 @@ public void setUp() throws Exception { evaluationResultDao = mock(EvaluationResultDao.class); experimentVariantDao = mock(ExperimentVariantDao.class); metricsHelper = new MetricsHelper(clusterService, client, judgmentDao, evaluationResultDao, experimentVariantDao); + + // Initialize SearchRequestBuilder NamedXContentRegistry for tests + NamedXContentRegistry reg = new NamedXContentRegistry( + new SearchModule(Settings.EMPTY, java.util.Collections.emptyList()).getNamedXContents() + ); + SearchRequestBuilder.initialize(reg); } public void testProcessPairwiseMetricsWithPipeline() { diff --git a/src/test/java/org/opensearch/searchrelevance/model/builder/SearchRequestBuilderTests.java b/src/test/java/org/opensearch/searchrelevance/model/builder/SearchRequestBuilderTests.java index 5a86d357..9a130790 100644 --- a/src/test/java/org/opensearch/searchrelevance/model/builder/SearchRequestBuilderTests.java +++ b/src/test/java/org/opensearch/searchrelevance/model/builder/SearchRequestBuilderTests.java @@ -9,13 +9,15 @@ import static org.opensearch.searchrelevance.common.PluginConstants.WILDCARD_QUERY_TEXT; -import java.nio.charset.StandardCharsets; -import java.util.Base64; import java.util.HashMap; import java.util.List; import java.util.Map; +import org.junit.Before; import org.opensearch.action.search.SearchRequest; +import org.opensearch.common.settings.Settings; +import org.opensearch.core.xcontent.NamedXContentRegistry; +import org.opensearch.search.SearchModule; import org.opensearch.search.builder.SearchSourceBuilder; import org.opensearch.test.OpenSearchTestCase; @@ -24,6 +26,14 @@ public class SearchRequestBuilderTests extends OpenSearchTestCase { + @Before + public void setup() { + NamedXContentRegistry reg = new NamedXContentRegistry( + new SearchModule(Settings.EMPTY, java.util.Collections.emptyList()).getNamedXContents() + ); + SearchRequestBuilder.initialize(reg); + } + private static final String TEST_INDEX = "test_index"; private static final String TEST_QUERY_TEXT = "test_query"; private static final String TEST_PIPELINE = "test_pipeline"; @@ -194,16 +204,12 @@ public void testHybridQuerySearchConfiguration_whenLessThenTwoSubQueries_thenFai assertEquals("invalid hybrid query: expected exactly [2] sub-queries but found [1]", exception.getMessage()); } - // ------------------------- - // Contract tests for rescore.rescore_query wrapper preprocessing - // ------------------------- - public void testBuildSearchRequest_whenRescoreQueryIsObject_thenWrappedWithBase64() throws Exception { String query = "{" + "\"query\":{\"match\":{\"title\":\"" + WILDCARD_QUERY_TEXT + "\"}}," - + "\"rescore\":{\"query\":{\"rescore_query\":{\"sltr\":{\"model\":\"m1\",\"params\":{\"keywords\":\"abc\"}}}}}" + + "\"rescore\":{\"query\":{\"rescore_query\":{\"match\":{\"title\":\"abc\"}}}}" + "}"; SearchRequest searchRequest = SearchRequestBuilder.buildSearchRequest(TEST_INDEX, query, TEST_QUERY_TEXT, null, TEST_SIZE); @@ -212,15 +218,19 @@ public void testBuildSearchRequest_whenRescoreQueryIsObject_thenWrappedWithBase6 assertNotNull(sourceBuilder); Map sourceMap = parseJsonToMap(sourceBuilder.toString()); - String base64 = extractWrappedRescoreQueryBase64(sourceMap); - assertNotNull("rescore.rescore_query should be wrapped with base64 payload", base64); - - String decoded = new String(Base64.getDecoder().decode(base64), StandardCharsets.UTF_8); - Map decodedMap = parseJsonToMap(decoded); - assertTrue("Decoded rescore_query should contain sltr", decodedMap.containsKey("sltr")); + Object rescoreObj = sourceMap.get("rescore"); + @SuppressWarnings("unchecked") + Map rescoreMap = (rescoreObj instanceof java.util.List) + ? (Map) ((java.util.List) rescoreObj).get(0) + : (Map) rescoreObj; + assertNotNull(rescoreMap); + @SuppressWarnings("unchecked") + Map queryObj = (Map) rescoreMap.get("query"); + assertNotNull(queryObj); @SuppressWarnings("unchecked") - Map sltr = (Map) decodedMap.get("sltr"); - assertEquals("m1", sltr.get("model")); + Map rescoreQuery = (Map) queryObj.get("rescore_query"); + assertNotNull(rescoreQuery); + assertTrue(rescoreQuery.containsKey("match")); } public void testBuildSearchRequest_whenRescoreIsArray_thenFirstEntryRescoreQueryWrapped() throws Exception { @@ -228,7 +238,7 @@ public void testBuildSearchRequest_whenRescoreIsArray_thenFirstEntryRescoreQuery + "\"query\":{\"match\":{\"title\":\"" + WILDCARD_QUERY_TEXT + "\"}}," - + "\"rescore\":[{\"query\":{\"rescore_query\":{\"sltr\":{\"model\":\"m2\",\"params\":{\"keywords\":\"xyz\"}}}}}]" + + "\"rescore\":[{\"query\":{\"rescore_query\":{\"match\":{\"title\":\"xyz\"}}}}]" + "}"; SearchRequest searchRequest = SearchRequestBuilder.buildSearchRequest(TEST_INDEX, query, TEST_QUERY_TEXT, null, TEST_SIZE); @@ -237,19 +247,21 @@ public void testBuildSearchRequest_whenRescoreIsArray_thenFirstEntryRescoreQuery assertNotNull(sourceBuilder); Map sourceMap = parseJsonToMap(sourceBuilder.toString()); - String base64 = extractWrappedRescoreQueryBase64(sourceMap); - assertNotNull("rescore[0].rescore_query should be wrapped with base64 payload", base64); - - String decoded = new String(Base64.getDecoder().decode(base64), StandardCharsets.UTF_8); - Map decodedMap = parseJsonToMap(decoded); - assertTrue(decodedMap.containsKey("sltr")); @SuppressWarnings("unchecked") - Map sltr = (Map) decodedMap.get("sltr"); - assertEquals("m2", sltr.get("model")); + java.util.List rescoreList = (java.util.List) sourceMap.get("rescore"); + assertNotNull(rescoreList); + @SuppressWarnings("unchecked") + Map first = (Map) rescoreList.get(0); + @SuppressWarnings("unchecked") + Map q = (Map) first.get("query"); + @SuppressWarnings("unchecked") + Map rq = (Map) q.get("rescore_query"); + assertNotNull(rq); + assertTrue(rq.containsKey("match")); } - public void testBuildRequestForHybridSearch_whenRescoreQueryPresent_thenWrappedWithBase64() throws Exception { - // Minimal valid hybrid query (2 sub-queries) plus rescore with sltr + public void testBuildRequestForHybridSearch_whenRescoreQueryPresent() throws Exception { + // Minimal valid hybrid query (2 sub-queries) plus rescore with a standard query String hybridQuery = "{" + "\"_source\":{\"exclude\":[\"passage_embedding\"]}," + "\"query\":{\"hybrid\":{\"queries\":[" @@ -260,7 +272,7 @@ public void testBuildRequestForHybridSearch_whenRescoreQueryPresent_thenWrappedW + WILDCARD_QUERY_TEXT + "\"}}}" + "]}}," - + "\"rescore\":{\"query\":{\"rescore_query\":{\"sltr\":{\"model\":\"m3\",\"params\":{\"keywords\":\"hyb\"}}}}}" + + "\"rescore\":{\"query\":{\"rescore_query\":{\"match\":{\"name\":\"hyb\"}}}}" + "}"; Map temporarySearchPipeline = Map.of(); // allowed empty @@ -276,14 +288,18 @@ public void testBuildRequestForHybridSearch_whenRescoreQueryPresent_thenWrappedW assertNotNull(sourceBuilder); Map sourceMap = parseJsonToMap(sourceBuilder.toString()); - String base64 = extractWrappedRescoreQueryBase64(sourceMap); - assertNotNull("hybrid path should wrap rescore.rescore_query with base64 payload", base64); - - String decoded = new String(Base64.getDecoder().decode(base64), StandardCharsets.UTF_8); - Map decodedMap = parseJsonToMap(decoded); - assertTrue(decodedMap.containsKey("sltr")); + Object rescoreObj = sourceMap.get("rescore"); + @SuppressWarnings("unchecked") + Map rescoreMap = (rescoreObj instanceof java.util.List) + ? (Map) ((java.util.List) rescoreObj).get(0) + : (Map) rescoreObj; + assertNotNull(rescoreMap); + @SuppressWarnings("unchecked") + Map queryObj = (Map) rescoreMap.get("query"); + assertNotNull(queryObj); @SuppressWarnings("unchecked") - Map sltr = (Map) decodedMap.get("sltr"); - assertEquals("m3", sltr.get("model")); + Map rescoreQuery = (Map) queryObj.get("rescore_query"); + assertNotNull(rescoreQuery); + assertTrue(rescoreQuery.containsKey("match")); } } From 678b6d86cd86d23ace00a523960de0ad82449f9a Mon Sep 17 00:00:00 2001 From: Scott Stults Date: Fri, 10 Oct 2025 19:50:10 -0400 Subject: [PATCH 8/9] fix test Signed-off-by: Scott Stults --- .../searchrelevance/integration/LtrSltrRescoreIT.java | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/src/test/java/org/opensearch/searchrelevance/integration/LtrSltrRescoreIT.java b/src/test/java/org/opensearch/searchrelevance/integration/LtrSltrRescoreIT.java index 2dd9dfcc..cac5acdb 100644 --- a/src/test/java/org/opensearch/searchrelevance/integration/LtrSltrRescoreIT.java +++ b/src/test/java/org/opensearch/searchrelevance/integration/LtrSltrRescoreIT.java @@ -7,8 +7,9 @@ */ package org.opensearch.searchrelevance.integration; +import java.util.Locale; + import org.apache.hc.core5.http.io.entity.EntityUtils; -import org.junit.Test; import org.opensearch.client.Request; import org.opensearch.client.Response; import org.opensearch.client.ResponseException; @@ -26,7 +27,6 @@ */ public class LtrSltrRescoreIT extends BaseSearchRelevanceIT { - @Test public void testRescoreParsingWithSltr() throws Exception { final String index = "ltr-sltr-it"; @@ -85,7 +85,9 @@ public void testRescoreParsingWithSltr() throws Exception { // Accept any response; the important part is that the error is NOT "unknown [sltr]". assertTrue("Expected an HTTP status (>=200). Got: " + code, code >= 200 && code < 600); final String msg = EntityUtils.toString(r.getEntity()); - final boolean unknownSltr = msg != null && msg.toLowerCase().contains("unknown") && msg.toLowerCase().contains("sltr"); + final boolean unknownSltr = msg != null + && msg.toLowerCase(Locale.ROOT).contains("unknown") + && msg.toLowerCase(Locale.ROOT).contains("sltr"); assertFalse("Cluster did not recognize 'sltr' rescore; LTR module may not be loaded. Message: " + msg, unknownSltr); } } From 8ef750652382f8c0b4444e8d8d557b39585d9b38 Mon Sep 17 00:00:00 2001 From: Scott Stults Date: Fri, 10 Oct 2025 20:10:02 -0400 Subject: [PATCH 9/9] Slightly better test method names Signed-off-by: Scott Stults --- .../model/builder/SearchRequestBuilderTests.java | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/test/java/org/opensearch/searchrelevance/model/builder/SearchRequestBuilderTests.java b/src/test/java/org/opensearch/searchrelevance/model/builder/SearchRequestBuilderTests.java index 9a130790..3f90916d 100644 --- a/src/test/java/org/opensearch/searchrelevance/model/builder/SearchRequestBuilderTests.java +++ b/src/test/java/org/opensearch/searchrelevance/model/builder/SearchRequestBuilderTests.java @@ -204,7 +204,7 @@ public void testHybridQuerySearchConfiguration_whenLessThenTwoSubQueries_thenFai assertEquals("invalid hybrid query: expected exactly [2] sub-queries but found [1]", exception.getMessage()); } - public void testBuildSearchRequest_whenRescoreQueryIsObject_thenWrappedWithBase64() throws Exception { + public void testParseRescoreQuery_whenRescoreIsObject() throws Exception { String query = "{" + "\"query\":{\"match\":{\"title\":\"" + WILDCARD_QUERY_TEXT @@ -233,7 +233,7 @@ public void testBuildSearchRequest_whenRescoreQueryIsObject_thenWrappedWithBase6 assertTrue(rescoreQuery.containsKey("match")); } - public void testBuildSearchRequest_whenRescoreIsArray_thenFirstEntryRescoreQueryWrapped() throws Exception { + public void testParseRescoreQuery_whenRescoreIsArrayFirstEntry() throws Exception { String query = "{" + "\"query\":{\"match\":{\"title\":\"" + WILDCARD_QUERY_TEXT @@ -260,7 +260,7 @@ public void testBuildSearchRequest_whenRescoreIsArray_thenFirstEntryRescoreQuery assertTrue(rq.containsKey("match")); } - public void testBuildRequestForHybridSearch_whenRescoreQueryPresent() throws Exception { + public void testParseRescoreQuery_inHybridRequest() throws Exception { // Minimal valid hybrid query (2 sub-queries) plus rescore with a standard query String hybridQuery = "{" + "\"_source\":{\"exclude\":[\"passage_embedding\"]},"