Skip to content

Commit e9623a7

Browse files
committed
check reindexing status + better reindexing logic
- GET request to /fulltext/reindex will return json with reindexingStatus, which can be 'none', 'requested', 'in_progress', or 'complete'. In case if reindexing is complete but the ES is empty, returned reindexingStatus is 'none'. - POST request to /fulltext/reindex will only work if the ES has 0 indexed records for that library, and if the current reindexing status is either 'none' or 'complete'. Reindexing status file will be deleted before sending a new reindexing request to SQS. In both cases, reindexing status file can exist with the ES being empty in case if a library was deleted from ES, re-indexed, then deleted again, and another re-indexing request is submitted. Unlikely but not impossible over the span of multiple years.
1 parent 18dcb79 commit e9623a7

1 file changed

Lines changed: 44 additions & 20 deletions

File tree

controllers/FullTextController.php

Lines changed: 44 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -159,42 +159,66 @@ public function itemContent() {
159159
}
160160

161161
public function reindex() {
162-
$this->allowMethods(['POST']);
162+
// POST = request to reindex a library removed from ES
163+
// GET = fetch the status of reindexing
164+
$this->allowMethods(['POST', 'GET']);
163165

164-
$redis = Z_Redis::get('request-limiter');
165-
166-
// Only allow requests to this endpoint to come in once every $REINDEX_WAIT_MINUTES minutes
167-
if ($redis->get("reindex_$this->objectLibraryID")) {
168-
$this->e429("The request to reindex this library has already been submitted.");
169-
}
170-
else {
171-
$redis->setex("reindex_$this->objectLibraryID", Z_CONFIG::$REINDEX_WAIT_MINUTES * 60, 1);
172-
}
173-
174166
// Check for general library access
175167
if (!$this->permissions->canAccess($this->objectLibraryID)) {
176168
$this->e403();
177169
}
178170

179-
// Check how many records we have in Elasticsearch and how many attachments we have.
180-
$esCount = Zotero_FullText::countInLibrary($this->objectLibraryID);
181-
$expectedCount = Zotero_Libraries::countAttachments($this->objectLibraryID);
182-
// If they're equal, everything is indexed.
183-
if ($esCount == $expectedCount) {
184-
$this->e400("The library has been indexed.");
171+
// Ensure that if multiple requests arrive at roughly the same time, only one goes through
172+
if ($this->method == 'POST') {
173+
$redis = Z_Redis::get('request-limiter');
174+
175+
if ($redis->get("reindex_$this->objectLibraryID")) {
176+
$this->e429("The request to reindex this library has already been submitted.");
177+
}
178+
else {
179+
$redis->setex("reindex_$this->objectLibraryID", Z_CONFIG::$REINDEX_WAIT_MINUTES * 60, 1);
180+
}
185181
}
186182

187-
// If there exists _reindex_status file, reindexing is already in progress - do nothing
183+
$esCount = Zotero_FullText::countInLibrary($this->objectLibraryID);
184+
$status = "";
188185
$s3Client = Z_Core::$AWS->createS3();
189186
try {
190-
$result = $s3Client->headObject([
187+
// Try to fetch the current reindexing status
188+
$result = $s3Client->getObject([
191189
'Bucket' => Z_CONFIG::$S3_BUCKET_FULLTEXT,
192190
'Key' => $this->objectLibraryID . "/" . "_reindex_status"
193191
]);
194-
$this->e400("The library is being indexed.");
192+
$json = json_decode($result['Body']);
193+
$status = $json->status;
195194
}
196195
catch (\Aws\S3\Exception\S3Exception $e) { }
197196

197+
// GET = return reindexing status
198+
if ($this->method == "GET") {
199+
if (($esCount == 0 && $status == 'complete') || $status == "") {
200+
$status = 'none';
201+
}
202+
echo Zotero_Utilities::formatJSON(["reindexingStatus" => $status]);
203+
$this->end();
204+
return;
205+
}
206+
207+
// Reindexing is only possible when ES has no records for this library
208+
if ($esCount !== 0) {
209+
$this->e400("Reindexing is only possible when the library has no indexed records.");
210+
}
211+
// Cannot start reindexing if it has already been requested
212+
if (in_array($status, ['requested', 'in_progress'])) {
213+
$this->e400("The library is being indexed.");
214+
}
215+
// If the reindex status file exists, delete it before a fresh reindex run
216+
if ($status !== "") {
217+
$s3Client->deleteObject([
218+
'Bucket' => Z_CONFIG::$S3_BUCKET_FULLTEXT,
219+
'Key' => $this->objectLibraryID . "/" . "_reindex_status"
220+
]);
221+
}
198222
// Request reindexing
199223
Z_SQS::send(Z_CONFIG::$REINDEX_QUEUE_URL, json_encode(['libraryID' => $this->objectLibraryID]));
200224
$this->end();

0 commit comments

Comments
 (0)