diff --git a/CHANGELOG.md b/CHANGELOG.md index e186ae29e..7211b282a 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -8,6 +8,8 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ### Backward Compatibility Breaks ### Added +* Added support for "search after" based pagination [#1645](https://github.com/ruflin/Elastica/issues/1645) + ### Changed ### Deprecated ### Removed diff --git a/Makefile b/Makefile index e5ce16618..93c1c90b1 100644 --- a/Makefile +++ b/Makefile @@ -16,6 +16,7 @@ tools/phive.phar: vendor/autoload.php: # Installing Symfony Flex: parallel download of dependency libs + composer global config --no-plugins allow-plugins.symfony/flex true composer global require --no-progress --no-scripts --no-plugins symfony/flex composer install --prefer-dist --no-interaction ${COMPOSER_FLAGS} diff --git a/src/Document.php b/src/Document.php index 82a5ad28b..674b6de39 100644 --- a/src/Document.php +++ b/src/Document.php @@ -234,6 +234,18 @@ public function hasPipeline(): bool return $this->hasParam('pipeline'); } + /** + * @see \Elastica\Query::setSearchAfter() + * + * Returns the sort position for this document, which can be used as starting offset to search next hits page. + * + * @return array + */ + public function getSort(): array + { + return $this->getParam('sort'); + } + /** * Returns the document as an array. */ diff --git a/src/Index.php b/src/Index.php index c146edceb..afe1b78f3 100644 --- a/src/Index.php +++ b/src/Index.php @@ -539,6 +539,82 @@ public function count($query = '', string $method = Request::POST): int return $search->count('', false, $method); } + /** + * Iterates over all documents, matching given query, using "search after" based pagination. + * + * @see \Elastica\Query::setSearchAfter() + * + * @param mixed $query search query with sort by unique document field. + * @param int $batchSize the number of rows to be returned in each batch (e.g. each query size). + * @param array|null $options + * @param string $method request method. + * @return \Generator|\Elastica\Document[] list of all documents matched the given query as iterator. + */ + public function each($query = '', int $batchSize = 100, ?array $options = null, string $method = Request::POST): \Generator + { + $query = Query::create($query); + + if (!$query->hasParam('sort')) { + throw new \LogicException('Query must have "sort" parameter in order to use "search after" based iteration.'); + } + + $query->setSize($batchSize); + + while (true) { + $resultSet = $this->search($query, $options, $method); + foreach ($resultSet->getDocuments() as $document) { + yield $document; + } + + if (count($resultSet->getDocuments()) < $batchSize) { + break; + } + + $query->setSearchAfter($document->getSort()); + } + } + + /** + * Iterates over all documents in batches, matching given query, using "search after" based pagination. + * + * @see \Elastica\Query::setSearchAfter() + * + * @param mixed $query search query with sort by unique document field. + * @param int $batchSize the number of rows to be returned in each batch (e.g. each query size). + * @param array|null $options + * @param string $method request method. + * @return \Generator|\Elastica\Document[][] list of document batches matched the given query as iterator. + */ + public function batch($query = '', int $batchSize = 100, ?array $options = null, string $method = Request::POST): \Generator + { + $query = Query::create($query); + + if (!$query->hasParam('sort')) { + throw new \LogicException('Query must have "sort" parameter in order to use "search after" based iteration.'); + } + + $query->setSize($batchSize); + + while (true) { + $resultSet = $this->search($query, $options, $method); + + $documents = $resultSet->getDocuments(); + if (empty($documents)) { + break; + } + + yield $documents; + + if (count($documents) < $batchSize) { + break; + } + + $lastDocument = array_pop($documents); + + $query->setSearchAfter($lastDocument->getSort()); + } + } + /** * Opens an index. * diff --git a/src/Query.php b/src/Query.php index fe1e975e6..c2abbaed7 100644 --- a/src/Query.php +++ b/src/Query.php @@ -476,4 +476,22 @@ public function setTrackTotalHits($trackTotalHits = true): self return $this->setParam('track_total_hits', $trackTotalHits); } + + /** + * @see https://www.elastic.co/guide/en/elasticsearch/reference/current/paginate-search-results.html#search-after + * + * Allows retrieval of the next page of hits using sort values from previous page. + * The value of {@see \Elastica\Document::getSort()} should be passed as argument here. + * + * @param array $searchAfter the sort of the last document from previous search result set. + * @return static self reference. + */ + public function setSearchAfter(array $searchAfter): self + { + foreach ($searchAfter as $value) { + $this->addParam('search_after', $value); + } + + return $this; + } } diff --git a/tests/IndexTest.php b/tests/IndexTest.php index 11c0f8bf5..8a5d2bff6 100644 --- a/tests/IndexTest.php +++ b/tests/IndexTest.php @@ -9,6 +9,7 @@ use Elastica\Document; use Elastica\Index; use Elastica\Mapping; +use Elastica\Query; use Elastica\Query\QueryString; use Elastica\Query\SimpleQueryString; use Elastica\Query\Term; @@ -926,4 +927,70 @@ public function testGetEmptyAliases(): void $this->assertEquals([], $index->getAliases()); } + + /** + * @group functional + */ + public function testIterateEach(): void + { + $index = $this->_createIndex(); + $index->setMapping(new Mapping([ + 'country_id' => ['type' => 'integer'], + 'region_id' => ['type' => 'integer'], + ])); + + $index->addDocuments([ + new Document('1', ['country_id' => 1, 'region_id' => 1]), + new Document('2', ['country_id' => 1, 'region_id' => 2]), + new Document('3', ['country_id' => 2, 'region_id' => 3]), + ]); + $index->refresh(); + + $query = new Query(); + $query->setSize(2); + $query->setSort([ + 'region_id' => 'desc', + ]); + + $documentIds = []; + foreach ($index->each($query, 2) as $document) { + $documentIds[] = $document->getId(); + } + + $this->assertEquals(['3', '2', '1'], $documentIds); + } + + /** + * @group functional + */ + public function testIterateBatch(): void + { + $index = $this->_createIndex(); + $index->setMapping(new Mapping([ + 'country_id' => ['type' => 'integer'], + 'region_id' => ['type' => 'integer'], + ])); + + $index->addDocuments([ + new Document('1', ['country_id' => 1, 'region_id' => 1]), + new Document('2', ['country_id' => 1, 'region_id' => 2]), + new Document('3', ['country_id' => 2, 'region_id' => 3]), + ]); + $index->refresh(); + + $query = new Query(); + $query->setSize(2); + $query->setSort([ + 'region_id' => 'desc', + ]); + + $documentIds = []; + foreach ($index->batch($query, 2) as $documents) { + foreach ($documents as $document) { + $documentIds[] = $document->getId(); + } + } + + $this->assertEquals(['3', '2', '1'], $documentIds); + } } diff --git a/tests/QueryTest.php b/tests/QueryTest.php index 328bf5b19..529a06464 100644 --- a/tests/QueryTest.php +++ b/tests/QueryTest.php @@ -680,4 +680,46 @@ public function testSetTrackTotalHits(): void $this->assertEquals(25, $resultSet->getTotalHits()); $this->assertEquals('gte', $resultSet->getTotalHitsRelation()); } + + /** + * @group functional + */ + public function testSearchAfter(): void + { + $index = $this->_createIndex(); + $index->setMapping(new Mapping([ + 'country_id' => ['type' => 'integer'], + 'region_id' => ['type' => 'integer'], + ])); + + $index->addDocuments([ + new Document('1', ['country_id' => 1, 'region_id' => 1]), + new Document('2', ['country_id' => 1, 'region_id' => 2]), + new Document('3', ['country_id' => 2, 'region_id' => 3]), + ]); + $index->refresh(); + + $query = new Query(); + $query->setSize(2); + $query->setSort([ + 'country_id' => 'desc', + 'region_id' => 'asc', + ]); + $firstPageResultSet = $index->search($query); + + $documents = $firstPageResultSet->getDocuments(); + $this->assertCount(2, $documents); + + /** @var Document $lastDocument */ + $lastDocument = array_pop($documents); + $lastDocument->getParam('sort'); + + $this->assertNotEmpty($lastDocument->getSort()); + + $query->setSearchAfter($lastDocument->getSort()); + + $secondPageResultSet = $index->search($query); + $documents = $secondPageResultSet->getDocuments(); + $this->assertCount(1, $documents); + } }