diff --git a/src/store/src/Bridge/Cache/Store.php b/src/store/src/Bridge/Cache/Store.php index e9010a1a97..4ae7dd253b 100644 --- a/src/store/src/Bridge/Cache/Store.php +++ b/src/store/src/Bridge/Cache/Store.php @@ -17,7 +17,13 @@ use Symfony\AI\Store\Document\Metadata; use Symfony\AI\Store\Document\VectorDocument; use Symfony\AI\Store\Exception\InvalidArgumentException; +use Symfony\AI\Store\Exception\UnsupportedQueryTypeException; use Symfony\AI\Store\ManagedStoreInterface; +use Symfony\AI\Store\Query\Filter\EqualFilter; +use Symfony\AI\Store\Query\HybridQuery; +use Symfony\AI\Store\Query\QueryInterface; +use Symfony\AI\Store\Query\TextQuery; +use Symfony\AI\Store\Query\VectorQuery; use Symfony\AI\Store\StoreInterface; use Symfony\Contracts\Cache\CacheInterface; @@ -96,6 +102,15 @@ public function remove(string|array $ids, array $options = []): void $this->cache->save($cacheItem); } + public function supports(string $queryClass): bool + { + return \in_array($queryClass, [ + VectorQuery::class, + TextQuery::class, + HybridQuery::class, + ], true); + } + /** * @param array{ * maxItems?: positive-int, @@ -103,7 +118,28 @@ public function remove(string|array $ids, array $options = []): void * } $options If maxItems is provided, only the top N results will be returned. * If filter is provided, only documents matching the filter will be considered. */ - public function query(Vector $vector, array $options = []): iterable + public function query(QueryInterface $query, array $options = []): iterable + { + return match (true) { + $query instanceof VectorQuery => $this->queryVector($query, $options), + $query instanceof TextQuery => $this->queryText($query, $options), + $query instanceof HybridQuery => $this->queryHybrid($query, $options), + default => throw new UnsupportedQueryTypeException($query->getType(), $this), + }; + } + + public function drop(array $options = []): void + { + $this->cache->clear(); + } + + /** + * @param array{ + * maxItems?: positive-int, + * filter?: callable(VectorDocument): bool, + * } $options + */ + private function queryVector(VectorQuery $query, array $options): iterable { $documents = $this->cache->get($this->cacheKey, static fn (): array => []); @@ -117,15 +153,134 @@ public function query(Vector $vector, array $options = []): iterable metadata: new Metadata($document['metadata']), ), $documents); + $vectorDocuments = $this->applyFilter($vectorDocuments, $query->getFilter()); + if (isset($options['filter'])) { $vectorDocuments = array_values(array_filter($vectorDocuments, $options['filter'])); } - yield from $this->distanceCalculator->calculate($vectorDocuments, $vector, $options['maxItems'] ?? null); + yield from $this->distanceCalculator->calculate($vectorDocuments, $query->getVector(), $options['maxItems'] ?? null); } - public function drop(array $options = []): void + /** + * @param array{ + * maxItems?: positive-int, + * filter?: callable(VectorDocument): bool, + * } $options + */ + private function queryText(TextQuery $query, array $options): iterable { - $this->cache->clear(); + $documents = $this->cache->get($this->cacheKey, static fn (): array => []); + + if ([] === $documents) { + return; + } + + $vectorDocuments = array_map(static fn (array $document): VectorDocument => new VectorDocument( + id: $document['id'], + vector: new Vector($document['vector']), + metadata: new Metadata($document['metadata']), + ), $documents); + + if (isset($options['filter'])) { + $vectorDocuments = array_values(array_filter($vectorDocuments, $options['filter'])); + } + + $filteredDocuments = array_filter($vectorDocuments, function (VectorDocument $doc) use ($query) { + $text = $doc->metadata->getText() ?? ''; + + return str_contains(strtolower($text), strtolower($query->getText())); + }); + + $filteredDocuments = $this->applyFilter($filteredDocuments, $query->getFilter()); + + $maxItems = $options['maxItems'] ?? null; + $count = 0; + + foreach ($filteredDocuments as $document) { + if (null !== $maxItems && $count >= $maxItems) { + break; + } + + yield $document; + ++$count; + } + } + + /** + * @param array{ + * maxItems?: positive-int, + * filter?: callable(VectorDocument): bool, + * } $options + */ + private function queryHybrid(HybridQuery $query, array $options): iterable + { + $vectorResults = iterator_to_array($this->queryVector( + new VectorQuery($query->getVector(), $query->getFilter()), + $options + )); + + $textResults = iterator_to_array($this->queryText( + new TextQuery($query->getText(), $query->getFilter()), + $options + )); + + $mergedResults = []; + $seenIds = []; + + foreach ($vectorResults as $doc) { + $id = $doc->id->toRfc4122(); + if (!isset($seenIds[$id])) { + $mergedResults[] = new VectorDocument( + id: $doc->id, + vector: $doc->vector, + metadata: $doc->metadata, + score: null !== $doc->score ? $doc->score * $query->getSemanticRatio() : null, + ); + $seenIds[$id] = true; + } + } + + foreach ($textResults as $doc) { + $id = $doc->id->toRfc4122(); + if (!isset($seenIds[$id])) { + $mergedResults[] = $doc; + $seenIds[$id] = true; + } + } + + if (isset($options['filter'])) { + $mergedResults = array_values(array_filter($mergedResults, $options['filter'])); + } + + $maxItems = $options['maxItems'] ?? null; + $count = 0; + + foreach ($mergedResults as $document) { + if (null !== $maxItems && $count >= $maxItems) { + break; + } + + yield $document; + ++$count; + } + } + + /** + * @param VectorDocument[] $documents + * + * @return VectorDocument[] + */ + private function applyFilter(array $documents, $filter): array + { + if (!$filter instanceof EqualFilter) { + return $documents; + } + + return array_values(array_filter($documents, function (VectorDocument $doc) use ($filter) { + $metadata = $doc->metadata->getArrayCopy(); + + return isset($metadata[$filter->getField()]) && $metadata[$filter->getField()] === $filter->getValue(); + })); } } diff --git a/src/store/src/Bridge/ChromaDb/Store.php b/src/store/src/Bridge/ChromaDb/Store.php index e70e09c9c3..a48128c321 100644 --- a/src/store/src/Bridge/ChromaDb/Store.php +++ b/src/store/src/Bridge/ChromaDb/Store.php @@ -15,6 +15,11 @@ use Symfony\AI\Platform\Vector\Vector; use Symfony\AI\Store\Document\Metadata; use Symfony\AI\Store\Document\VectorDocument; +use Symfony\AI\Store\Exception\UnsupportedQueryTypeException; +use Symfony\AI\Store\Query\Filter\EqualFilter; +use Symfony\AI\Store\Query\QueryInterface; +use Symfony\AI\Store\Query\TextQuery; +use Symfony\AI\Store\Query\VectorQuery; use Symfony\AI\Store\StoreInterface; /** @@ -67,30 +72,94 @@ public function remove(string|array $ids, array $options = []): void $collection->delete(ids: $ids); } + public function supports(string $queryClass): bool + { + return \in_array($queryClass, [ + VectorQuery::class, + TextQuery::class, + ], true); + } + /** - * @param array{where?: array, whereDocument?: array, include?: array, queryTexts?: array} $options + * @param array{where?: array, whereDocument?: array, include?: array} $options */ - public function query(Vector $vector, array $options = []): iterable + public function query(QueryInterface $query, array $options = []): iterable { - $include = null; - if ([] !== ($options['include'] ?? [])) { - $include = array_values( - array_unique( - array_merge(['embeddings', 'metadatas', 'distances'], $options['include']) - ) - ); + if (!$this->supports($query::class)) { + throw new UnsupportedQueryTypeException($query->getType(), $this); } + return match (true) { + $query instanceof VectorQuery => $this->queryVector($query, $options), + $query instanceof TextQuery => $this->queryText($query, $options), + default => throw new UnsupportedQueryTypeException($query->getType(), $this), + }; + } + + /** + * @param array{where?: array, whereDocument?: array, include?: array, limit?: positive-int} $options + */ + private function queryVector(VectorQuery $query, array $options): iterable + { + $include = $this->buildInclude($options); + $where = $this->buildWhere($query->getFilter(), $options); + + $collection = $this->client->getOrCreateCollection($this->collectionName); + $queryResponse = $collection->query( + queryEmbeddings: [$query->getVector()->getData()], + nResults: $options['limit'] ?? 4, + where: $where, + whereDocument: $options['whereDocument'] ?? null, + include: $include, + ); + + yield from $this->transformResponse($queryResponse); + } + + /** + * @param array{where?: array, whereDocument?: array, include?: array, limit?: positive-int} $options + */ + private function queryText(TextQuery $query, array $options): iterable + { + $include = $this->buildInclude($options); + $where = $this->buildWhere($query->getFilter(), $options); + $collection = $this->client->getOrCreateCollection($this->collectionName); $queryResponse = $collection->query( - queryEmbeddings: [$vector->getData()], - queryTexts: $options['queryTexts'] ?? null, - nResults: 4, - where: $options['where'] ?? null, + queryTexts: [$query->getText()], + nResults: $options['limit'] ?? 4, + where: $where, whereDocument: $options['whereDocument'] ?? null, include: $include, ); + yield from $this->transformResponse($queryResponse); + } + + private function buildInclude(array $options): ?array + { + if ([] === ($options['include'] ?? [])) { + return null; + } + + return array_values( + array_unique( + array_merge(['embeddings', 'metadatas', 'distances'], $options['include']) + ) + ); + } + + private function buildWhere($filter, array $options): ?array + { + if (!$filter instanceof EqualFilter) { + return null; + } + + return [$filter->getField() => ['$eq' => $filter->getValue()]]; + } + + private function transformResponse(object $queryResponse): iterable + { $metaCount = \count($queryResponse->metadatas[0]); for ($i = 0; $i < $metaCount; ++$i) { diff --git a/src/store/src/Bridge/Pinecone/Store.php b/src/store/src/Bridge/Pinecone/Store.php index 5d0b0de600..c1245aee6f 100644 --- a/src/store/src/Bridge/Pinecone/Store.php +++ b/src/store/src/Bridge/Pinecone/Store.php @@ -17,7 +17,11 @@ use Symfony\AI\Store\Document\Metadata; use Symfony\AI\Store\Document\VectorDocument; use Symfony\AI\Store\Exception\InvalidArgumentException; +use Symfony\AI\Store\Exception\UnsupportedQueryTypeException; use Symfony\AI\Store\ManagedStoreInterface; +use Symfony\AI\Store\Query\Filter\EqualFilter; +use Symfony\AI\Store\Query\QueryInterface; +use Symfony\AI\Store\Query\VectorQuery; use Symfony\AI\Store\StoreInterface; /** @@ -112,13 +116,24 @@ public function remove(string|array $ids, array $options = []): void } } - public function query(Vector $vector, array $options = []): iterable + public function supports(string $queryClass): bool { + return VectorQuery::class === $queryClass; + } + + public function query(QueryInterface $query, array $options = []): iterable + { + if (!$query instanceof VectorQuery) { + throw new UnsupportedQueryTypeException($query->getType(), $this); + } + + $filter = $this->buildFilter($query->getFilter(), $options); + $result = $this->getVectors()->query( - vector: $vector->getData(), + vector: $query->getVector()->getData(), namespace: $options['namespace'] ?? $this->namespace, - filter: $options['filter'] ?? $this->filter, - topK: $options['topK'] ?? $this->topK, + filter: $filter, + topK: $options['topK'] ?? $options['limit'] ?? $this->topK, includeValues: true, ); @@ -140,6 +155,23 @@ public function drop(array $options = []): void ->delete(); } + private function buildFilter($queryFilter, array $options): array + { + $filter = $this->filter; + + if ($queryFilter instanceof EqualFilter) { + $filterCondition = [$queryFilter->getField() => ['$eq' => $queryFilter->getValue()]]; + + if ([] === $filter) { + $filter = $filterCondition; + } else { + $filter = ['$and' => [$filter, $filterCondition]]; + } + } + + return $filter; + } + private function getVectors(): VectorResource { return $this->pinecone->data()->vectors(); diff --git a/src/store/src/Bridge/Postgres/Store.php b/src/store/src/Bridge/Postgres/Store.php index d5dde25192..3ffb09689c 100644 --- a/src/store/src/Bridge/Postgres/Store.php +++ b/src/store/src/Bridge/Postgres/Store.php @@ -17,7 +17,13 @@ use Symfony\AI\Store\Document\Metadata; use Symfony\AI\Store\Document\VectorDocument; use Symfony\AI\Store\Exception\InvalidArgumentException; +use Symfony\AI\Store\Exception\UnsupportedQueryTypeException; use Symfony\AI\Store\ManagedStoreInterface; +use Symfony\AI\Store\Query\Filter\EqualFilter; +use Symfony\AI\Store\Query\HybridQuery; +use Symfony\AI\Store\Query\QueryInterface; +use Symfony\AI\Store\Query\TextQuery; +use Symfony\AI\Store\Query\VectorQuery; use Symfony\AI\Store\StoreInterface; /** @@ -150,22 +156,28 @@ public function remove(string|array $ids, array $options = []): void $statement->execute(); } - public function query(Vector $vector, array $options = []): iterable + public function supports(string $queryClass): bool { - $where = null; + return \in_array($queryClass, [ + VectorQuery::class, + TextQuery::class, + HybridQuery::class, + ], true); + } - $maxScore = $options['maxScore'] ?? null; - if ($maxScore) { - $where = "WHERE ({$this->vectorFieldName} {$this->distance->getComparisonSign()} :embedding) <= :maxScore"; - } + public function query(QueryInterface $query, array $options = []): iterable + { + return match (true) { + $query instanceof VectorQuery => $this->queryVector($query, $options), + $query instanceof TextQuery => $this->queryText($query, $options), + $query instanceof HybridQuery => $this->queryHybrid($query, $options), + default => throw new UnsupportedQueryTypeException($query->getType(), $this), + }; + } - if ($options['where'] ?? false) { - if ($where) { - $where .= ' AND ('.$options['where'].')'; - } else { - $where = 'WHERE '.$options['where']; - } - } + private function queryVector(VectorQuery $query, array $options): iterable + { + $where = $this->buildWhereClause($query->getFilter(), $options); $sql = \sprintf(<<vectorFieldName, $this->distance->getComparisonSign(), $this->tableName, - $where ?? '', + $where, $options['limit'] ?? 5, ); + $statement = $this->connection->prepare($sql); + $params = $this->buildParams($query->getFilter(), ['embedding' => $this->toPgvector($query->getVector())], $options); - $params = [ - 'embedding' => $this->toPgvector($vector), - ...$options['params'] ?? [], - ]; - if (null !== $maxScore) { - $params['maxScore'] = $maxScore; + foreach ($params as $key => $value) { + $statement->bindValue(':'.$key, $value); } + $statement->execute(); + + foreach ($statement->fetchAll(\PDO::FETCH_ASSOC) as $result) { + yield new VectorDocument( + id: $result['id'], + vector: new Vector($this->fromPgvector($result['embedding'])), + metadata: new Metadata(json_decode($result['metadata'] ?? '{}', true, 512, \JSON_THROW_ON_ERROR)), + score: $result['score'], + ); + } + } + + private function queryText(TextQuery $query, array $options): iterable + { + $where = $this->buildWhereClause($query->getFilter(), $options); + + $sql = \sprintf(<<>'text'), plainto_tsquery('english', :search_text)) AS score + FROM %s + %s + AND to_tsvector('english', metadata->>'text') @@ plainto_tsquery('english', :search_text) + ORDER BY score DESC + LIMIT %d + SQL, + $this->vectorFieldName, + $this->tableName, + $where, + $options['limit'] ?? 5, + ); + + $statement = $this->connection->prepare($sql); + $params = $this->buildParams($query->getFilter(), ['search_text' => $query->getText()], $options); + foreach ($params as $key => $value) { $statement->bindValue(':'.$key, $value); } @@ -207,6 +251,82 @@ public function query(Vector $vector, array $options = []): iterable } } + private function queryHybrid(HybridQuery $query, array $options): iterable + { + $where = $this->buildWhereClause($query->getFilter(), $options); + + $sql = \sprintf(<<>'text'), plainto_tsquery('english', :search_text)))) AS score + FROM %s + %s + AND to_tsvector('english', metadata->>'text') @@ plainto_tsquery('english', :search_text) + ORDER BY score DESC + LIMIT %d + SQL, + $this->vectorFieldName, + $this->vectorFieldName, + $this->distance->getComparisonSign(), + $this->tableName, + $where, + $options['limit'] ?? 5, + ); + + $statement = $this->connection->prepare($sql); + $params = $this->buildParams($query->getFilter(), [ + 'embedding' => $this->toPgvector($query->getVector()), + 'search_text' => $query->getText(), + 'semantic_ratio' => $query->getSemanticRatio(), + 'keyword_ratio' => $query->getKeywordRatio(), + ], $options); + + foreach ($params as $key => $value) { + $statement->bindValue(':'.$key, $value); + } + + $statement->execute(); + + foreach ($statement->fetchAll(\PDO::FETCH_ASSOC) as $result) { + yield new VectorDocument( + id: $result['id'], + vector: new Vector($this->fromPgvector($result['embedding'])), + metadata: new Metadata(json_decode($result['metadata'] ?? '{}', true, 512, \JSON_THROW_ON_ERROR)), + score: $result['score'], + ); + } + } + + private function buildWhereClause($filter, array $options): string + { + $conditions = []; + + if ($filter instanceof EqualFilter) { + $conditions[] = \sprintf("metadata->>'%s' = :filter_%s", $filter->getField(), $filter->getField()); + } + + if (isset($options['maxScore'])) { + $conditions[] = \sprintf('(%s %s :embedding) <= :maxScore', $this->vectorFieldName, $this->distance->getComparisonSign()); + } + + return [] === $conditions ? '' : 'WHERE '.implode(' AND ', $conditions); + } + + private function buildParams($filter, array $baseParams, array $options): array + { + $params = $baseParams; + + if ($filter instanceof EqualFilter) { + $params['filter_'.$filter->getField()] = $filter->getValue(); + } + + if (isset($options['maxScore'])) { + $params['maxScore'] = $options['maxScore']; + } + + return $params; + } + private function toPgvector(VectorInterface $vector): string { return '['.implode(',', $vector->getData()).']'; diff --git a/src/store/src/Bridge/Qdrant/Store.php b/src/store/src/Bridge/Qdrant/Store.php index 27dee35f2a..f1bd8acfac 100644 --- a/src/store/src/Bridge/Qdrant/Store.php +++ b/src/store/src/Bridge/Qdrant/Store.php @@ -17,7 +17,11 @@ use Symfony\AI\Store\Document\VectorDocument; use Symfony\AI\Store\Exception\InvalidArgumentException; use Symfony\AI\Store\Exception\LogicException; +use Symfony\AI\Store\Exception\UnsupportedQueryTypeException; use Symfony\AI\Store\ManagedStoreInterface; +use Symfony\AI\Store\Query\Filter\EqualFilter; +use Symfony\AI\Store\Query\QueryInterface; +use Symfony\AI\Store\Query\VectorQuery; use Symfony\AI\Store\StoreInterface; use Symfony\Contracts\HttpClient\HttpClientInterface; @@ -78,6 +82,11 @@ public function remove(string|array $ids, array $options = []): void throw new LogicException('Method not implemented yet.'); } + public function supports(string $queryClass): bool + { + return VectorQuery::class === $queryClass; + } + /** * @param array{ * filter?: array, @@ -85,16 +94,21 @@ public function remove(string|array $ids, array $options = []): void * offset?: positive-int * } $options */ - public function query(Vector $vector, array $options = []): iterable + public function query(QueryInterface $query, array $options = []): iterable { + if (!$query instanceof VectorQuery) { + throw new UnsupportedQueryTypeException($query->getType(), $this); + } + $payload = [ - 'query' => $vector->getData(), + 'query' => $query->getVector()->getData(), 'with_payload' => true, 'with_vector' => true, ]; - if (\array_key_exists('filter', $options)) { - $payload['filter'] = $options['filter']; + $filter = $this->buildFilter($query->getFilter(), $options); + if (null !== $filter) { + $payload['filter'] = $filter; } if (\array_key_exists('limit', $options)) { @@ -117,6 +131,22 @@ public function drop(array $options = []): void $this->request('DELETE', \sprintf('collections/%s', $this->collectionName)); } + private function buildFilter($queryFilter, array $options): ?array + { + if (!$queryFilter instanceof EqualFilter) { + return null; + } + + return [ + 'must' => [ + [ + 'key' => $queryFilter->getField(), + 'match' => ['value' => $queryFilter->getValue()], + ], + ], + ]; + } + /** * @param array $payload * @param array $queryParameters diff --git a/src/store/src/Bridge/Redis/Store.php b/src/store/src/Bridge/Redis/Store.php index 6453010160..187279a595 100644 --- a/src/store/src/Bridge/Redis/Store.php +++ b/src/store/src/Bridge/Redis/Store.php @@ -17,7 +17,11 @@ use Symfony\AI\Store\Document\VectorDocument; use Symfony\AI\Store\Exception\LogicException; use Symfony\AI\Store\Exception\RuntimeException; +use Symfony\AI\Store\Exception\UnsupportedQueryTypeException; use Symfony\AI\Store\ManagedStoreInterface; +use Symfony\AI\Store\Query\Filter\EqualFilter; +use Symfony\AI\Store\Query\QueryInterface; +use Symfony\AI\Store\Query\VectorQuery; use Symfony\AI\Store\StoreInterface; /** @@ -112,25 +116,34 @@ public function remove(string|array $ids, array $options = []): void throw new LogicException('Method not implemented yet.'); } + public function supports(string $queryClass): bool + { + return VectorQuery::class === $queryClass; + } + /** * @param array{limit?: positive-int, maxScore?: float, where?: string} $options * * @return VectorDocument[] */ - public function query(Vector $vector, array $options = []): iterable + public function query(QueryInterface $query, array $options = []): iterable { + if (!$query instanceof VectorQuery) { + throw new UnsupportedQueryTypeException($query->getType(), $this); + } + $limit = $options['limit'] ?? 5; $maxScore = $options['maxScore'] ?? null; - $whereFilter = $options['where'] ?? '*'; + $whereFilter = $this->buildWhereFilter($query->getFilter(), $options); - $query = "({$whereFilter}) => [KNN {$limit} @embedding \$query_vector AS vector_score]"; + $queryString = "({$whereFilter}) => [KNN {$limit} @embedding \$query_vector AS vector_score]"; try { $results = $this->redis->rawCommand( 'FT.SEARCH', $this->indexName, - $query, - 'PARAMS', 2, 'query_vector', $this->toRedisVector($vector), + $queryString, + 'PARAMS', 2, 'query_vector', $this->toRedisVector($query->getVector()), 'RETURN', 4, '$.id', '$.metadata', '$.embedding', 'vector_score', 'SORTBY', 'vector_score', 'ASC', 'LIMIT', 0, $limit, @@ -194,4 +207,13 @@ private function toRedisVector(VectorInterface $vector): string return $bytes; } + + private function buildWhereFilter($queryFilter, array $options): string + { + if (!$queryFilter instanceof EqualFilter) { + return '*'; + } + + return \sprintf('@%s:{%s}', $queryFilter->getField(), $queryFilter->getValue()); + } } diff --git a/src/store/src/Exception/UnsupportedQueryTypeException.php b/src/store/src/Exception/UnsupportedQueryTypeException.php new file mode 100644 index 0000000000..c381c97c4f --- /dev/null +++ b/src/store/src/Exception/UnsupportedQueryTypeException.php @@ -0,0 +1,32 @@ + + * + * For the full copyright and license information, please view the LICENSE + * file that was distributed with this source code. + */ + +namespace Symfony\AI\Store\Exception; + +use Symfony\AI\Store\Query\QueryType; +use Symfony\AI\Store\StoreInterface; + +/** + * Exception thrown when a store does not support a specific query type. + * + * @author Johannes Wachter + */ +final class UnsupportedQueryTypeException extends \RuntimeException +{ + public function __construct(QueryType $type, StoreInterface $store) + { + parent::__construct(\sprintf( + 'Query type "%s" is not supported by store "%s"', + $type->value, + $store::class + )); + } +} diff --git a/src/store/src/Query/Filter/EqualFilter.php b/src/store/src/Query/Filter/EqualFilter.php new file mode 100644 index 0000000000..f6898972cf --- /dev/null +++ b/src/store/src/Query/Filter/EqualFilter.php @@ -0,0 +1,55 @@ + + * + * For the full copyright and license information, please view the LICENSE + * file that was distributed with this source code. + */ + +namespace Symfony\AI\Store\Query\Filter; + +/** + * Filter that matches documents where a field equals a specific value. + * + * Common use cases: + * - Filtering by locale: new EqualFilter('locale', 'en') + * - Filtering by category: new EqualFilter('category', 'news') + * - Filtering by status: new EqualFilter('status', 'published') + * + * @author Johannes Wachter + */ +final class EqualFilter implements FilterInterface +{ + public function __construct( + private readonly string $field, + private readonly mixed $value, + ) { + } + + public function getField(): string + { + return $this->field; + } + + public function getValue(): mixed + { + return $this->value; + } + + public function getType(): string + { + return 'equal'; + } + + public function toArray(): array + { + return [ + 'type' => $this->getType(), + 'field' => $this->field, + 'value' => $this->value, + ]; + } +} diff --git a/src/store/src/Query/Filter/FilterInterface.php b/src/store/src/Query/Filter/FilterInterface.php new file mode 100644 index 0000000000..303279f2f4 --- /dev/null +++ b/src/store/src/Query/Filter/FilterInterface.php @@ -0,0 +1,29 @@ + + * + * For the full copyright and license information, please view the LICENSE + * file that was distributed with this source code. + */ + +namespace Symfony\AI\Store\Query\Filter; + +/** + * Base interface for query filters. + * + * Filters define constraints on document metadata to narrow search results. + * + * @author Johannes Wachter + */ +interface FilterInterface +{ + public function getType(): string; + + /** + * @return array + */ + public function toArray(): array; +} diff --git a/src/store/src/Query/HybridQuery.php b/src/store/src/Query/HybridQuery.php new file mode 100644 index 0000000000..4394f6c446 --- /dev/null +++ b/src/store/src/Query/HybridQuery.php @@ -0,0 +1,67 @@ + + * + * For the full copyright and license information, please view the LICENSE + * file that was distributed with this source code. + */ + +namespace Symfony\AI\Store\Query; + +use Symfony\AI\Platform\Vector\Vector; +use Symfony\AI\Store\Query\Filter\FilterInterface; + +/** + * Combined vector + text search query for hybrid retrieval. + * + * Hybrid queries combine semantic similarity (vector search) with keyword + * matching (full-text search) to provide more accurate and relevant results. + * + * @author Johannes Wachter + */ +final class HybridQuery implements QueryInterface +{ + public function __construct( + private readonly Vector $vector, + private readonly string $text, + private readonly float $semanticRatio = 0.5, + private readonly ?FilterInterface $filter = null, + ) { + if ($semanticRatio < 0.0 || $semanticRatio > 1.0) { + throw new \InvalidArgumentException(\sprintf('Semantic ratio must be between 0.0 and 1.0, got %.2f', $semanticRatio)); + } + } + + public function getVector(): Vector + { + return $this->vector; + } + + public function getText(): string + { + return $this->text; + } + + public function getSemanticRatio(): float + { + return $this->semanticRatio; + } + + public function getKeywordRatio(): float + { + return 1.0 - $this->semanticRatio; + } + + public function getFilter(): ?FilterInterface + { + return $this->filter; + } + + public function getType(): QueryType + { + return QueryType::Hybrid; + } +} diff --git a/src/store/src/Query/QueryInterface.php b/src/store/src/Query/QueryInterface.php new file mode 100644 index 0000000000..1afae893c4 --- /dev/null +++ b/src/store/src/Query/QueryInterface.php @@ -0,0 +1,29 @@ + + * + * For the full copyright and license information, please view the LICENSE + * file that was distributed with this source code. + */ + +namespace Symfony\AI\Store\Query; + +use Symfony\AI\Store\Query\Filter\FilterInterface; + +/** + * Base interface for all query types in the Store component. + * + * Queries represent search intent and are passed to Store::query() along with + * execution options (limit, etc.) in the $options array. + * + * @author Johannes Wachter + */ +interface QueryInterface +{ + public function getType(): QueryType; + + public function getFilter(): ?FilterInterface; +} diff --git a/src/store/src/Query/QueryType.php b/src/store/src/Query/QueryType.php new file mode 100644 index 0000000000..18765be005 --- /dev/null +++ b/src/store/src/Query/QueryType.php @@ -0,0 +1,24 @@ + + * + * For the full copyright and license information, please view the LICENSE + * file that was distributed with this source code. + */ + +namespace Symfony\AI\Store\Query; + +/** + * Defines the types of queries supported by the Store component. + * + * @author Johannes Wachter + */ +enum QueryType: string +{ + case Vector = 'vector'; + case Text = 'text'; + case Hybrid = 'hybrid'; +} diff --git a/src/store/src/Query/TextQuery.php b/src/store/src/Query/TextQuery.php new file mode 100644 index 0000000000..77c0d7bd5a --- /dev/null +++ b/src/store/src/Query/TextQuery.php @@ -0,0 +1,48 @@ + + * + * For the full copyright and license information, please view the LICENSE + * file that was distributed with this source code. + */ + +namespace Symfony\AI\Store\Query; + +use Symfony\AI\Store\Query\Filter\FilterInterface; + +/** + * Full-text search query using textual keywords. + * + * Used for: + * - FTS-only backends that don't support vectors + * - Internal vectorization (e.g., ChromaDB's queryTexts) + * - Retriever pre-extensions that transform to HybridQuery + * + * @author Johannes Wachter + */ +final class TextQuery implements QueryInterface +{ + public function __construct( + private readonly string $text, + private readonly ?FilterInterface $filter = null, + ) { + } + + public function getText(): string + { + return $this->text; + } + + public function getFilter(): ?FilterInterface + { + return $this->filter; + } + + public function getType(): QueryType + { + return QueryType::Text; + } +} diff --git a/src/store/src/Query/VectorQuery.php b/src/store/src/Query/VectorQuery.php new file mode 100644 index 0000000000..5a60c1146d --- /dev/null +++ b/src/store/src/Query/VectorQuery.php @@ -0,0 +1,48 @@ + + * + * For the full copyright and license information, please view the LICENSE + * file that was distributed with this source code. + */ + +namespace Symfony\AI\Store\Query; + +use Symfony\AI\Platform\Vector\Vector; +use Symfony\AI\Store\Query\Filter\FilterInterface; + +/** + * Classic vector search query using semantic similarity. + * + * Supported by all existing vector stores. This is the traditional approach + * where a query vector is compared against stored document vectors using + * distance metrics (cosine, euclidean, etc.). + * + * @author Johannes Wachter + */ +final class VectorQuery implements QueryInterface +{ + public function __construct( + private readonly Vector $vector, + private readonly ?FilterInterface $filter = null, + ) { + } + + public function getVector(): Vector + { + return $this->vector; + } + + public function getFilter(): ?FilterInterface + { + return $this->filter; + } + + public function getType(): QueryType + { + return QueryType::Vector; + } +} diff --git a/src/store/src/Retriever.php b/src/store/src/Retriever.php index 79e91297cd..45c056d17f 100644 --- a/src/store/src/Retriever.php +++ b/src/store/src/Retriever.php @@ -15,6 +15,10 @@ use Psr\Log\NullLogger; use Symfony\AI\Store\Document\VectorDocument; use Symfony\AI\Store\Document\VectorizerInterface; +use Symfony\AI\Store\Query\HybridQuery; +use Symfony\AI\Store\Query\QueryInterface; +use Symfony\AI\Store\Query\TextQuery; +use Symfony\AI\Store\Query\VectorQuery; /** * @author Oskar Stark @@ -22,8 +26,8 @@ final class Retriever implements RetrieverInterface { public function __construct( - private readonly VectorizerInterface $vectorizer, private readonly StoreInterface $store, + private readonly ?VectorizerInterface $vectorizer = null, private readonly LoggerInterface $logger = new NullLogger(), ) { } @@ -35,11 +39,11 @@ public function retrieve(string $query, array $options = []): iterable { $this->logger->debug('Starting document retrieval', ['query' => $query, 'options' => $options]); - $vector = $this->vectorizer->vectorize($query); + $queryObject = $this->createQuery($query, $options); - $this->logger->debug('Query vectorized, searching store'); + $this->logger->debug('Searching store', ['query_type' => $queryObject->getType()->value]); - $documents = $this->store->query($vector, $options); + $documents = $this->store->query($queryObject, $options); $count = 0; foreach ($documents as $document) { @@ -49,4 +53,29 @@ public function retrieve(string $query, array $options = []): iterable $this->logger->debug('Document retrieval completed', ['retrieved_count' => $count]); } + + private function createQuery(string $query, array $options): QueryInterface + { + if (null === $this->vectorizer) { + $this->logger->debug('No vectorizer configured, using TextQuery if supported'); + + return new TextQuery($query); + } + + if (!$this->store->supports(VectorQuery::class)) { + $this->logger->debug('Store does not support vector queries, falling back to TextQuery'); + + return new TextQuery($query); + } + + if ($this->store->supports(HybridQuery::class)) { + $this->logger->debug('Store supports hybrid queries, using HybridQuery with semantic ratio', ['semanticRatio' => $options['semanticRatio'] ?? 0.5]); + + return new HybridQuery($this->vectorizer->vectorize($query), $query, $options['semanticRatio'] ?? 0.5); + } + + $this->logger->debug('Store supports vector queries, using VectorQuery'); + + return new VectorQuery($this->vectorizer->vectorize($query)); + } } diff --git a/src/store/src/StoreInterface.php b/src/store/src/StoreInterface.php index 9eb129edc3..7fc45e35ec 100644 --- a/src/store/src/StoreInterface.php +++ b/src/store/src/StoreInterface.php @@ -11,8 +11,9 @@ namespace Symfony\AI\Store; -use Symfony\AI\Platform\Vector\Vector; use Symfony\AI\Store\Document\VectorDocument; +use Symfony\AI\Store\Exception\UnsupportedQueryTypeException; +use Symfony\AI\Store\Query\QueryInterface; /** * @author Christopher Hertel @@ -34,6 +35,13 @@ public function remove(string|array $ids, array $options = []): void; * @param array $options * * @return iterable + * + * @throws UnsupportedQueryTypeException if query type not supported + */ + public function query(QueryInterface $query, array $options = []): iterable; + + /** + * @param class-string $queryClass The query class to check */ - public function query(Vector $vector, array $options = []): iterable; + public function supports(string $queryClass): bool; }