|
12 | 12 | import os
|
13 | 13 | from abc import ABC, abstractmethod
|
14 | 14 |
|
15 |
| -from opensearchpy import OpenSearchException |
| 15 | +import opensearchpy.exceptions |
16 | 16 |
|
| 17 | +from osbenchmark import exceptions |
17 | 18 | from osbenchmark.utils import console
|
18 | 19 | from osbenchmark.workload_generator.config import CustomWorkload
|
19 | 20 |
|
@@ -41,8 +42,10 @@ def extract_indices(self, workload_path):
|
41 | 42 | try:
|
42 | 43 | for index in self.custom_workload.indices:
|
43 | 44 | extracted_indices += self.extract(workload_path, index.name)
|
44 |
| - except OpenSearchException: |
45 |
| - self.logger("Failed at extracting index [%s]", index) |
| 45 | + except opensearchpy.exceptions.NotFoundError: |
| 46 | + raise exceptions.SystemSetupError(f"Index [{index.name}] does not exist.") |
| 47 | + except opensearchpy.OpenSearchException: |
| 48 | + self.logger.error("Failed at extracting index [%s]", index) |
46 | 49 | failed_indices += index
|
47 | 50 |
|
48 | 51 | return extracted_indices, failed_indices
|
@@ -138,6 +141,9 @@ def extract_documents(self, index, documents_limit=None):
|
138 | 141 |
|
139 | 142 |
|
140 | 143 | class SequentialCorpusExtractor(CorpusExtractor):
|
| 144 | + DEFAULT_TEST_MODE_DOC_COUNT = 1000 |
| 145 | + DEFAULT_TEST_MODE_SUFFIX = "-1k" |
| 146 | + |
141 | 147 | def __init__(self, custom_workload, client):
|
142 | 148 | self.custom_workload: CustomWorkload = custom_workload
|
143 | 149 | self.client = client
|
@@ -173,15 +179,30 @@ def extract_documents(self, index, documents_limit=None):
|
173 | 179 |
|
174 | 180 | documents_to_extract = total_documents if not documents_limit else min(total_documents, documents_limit)
|
175 | 181 |
|
| 182 | + if documents_limit: |
| 183 | + # Only time when documents-1k.json will be less than 1K documents is |
| 184 | + # when the documents_limit is < 1k documents or source index has less than 1k documents |
| 185 | + if documents_limit < self.DEFAULT_TEST_MODE_DOC_COUNT: |
| 186 | + test_mode_warning_msg = "Due to --number-of-docs set by user, " + \ |
| 187 | + f"test-mode docs will be less than the default {self.DEFAULT_TEST_MODE_DOC_COUNT} documents." |
| 188 | + console.warn(test_mode_warning_msg) |
| 189 | + |
| 190 | + # Notify users when they specified more documents than available in index |
| 191 | + if documents_limit > total_documents: |
| 192 | + documents_to_extract_warning_msg = f"User requested extraction of {documents_limit} documents " + \ |
| 193 | + f"but there are only {total_documents} documents in {index}. " + \ |
| 194 | + f"Will only extract {total_documents} documents from {index}." |
| 195 | + console.warn(documents_to_extract_warning_msg) |
| 196 | + |
176 | 197 | if documents_to_extract > 0:
|
177 | 198 | logger.info("[%d] total docs in index [%s]. Extracting [%s] docs.", total_documents, index, documents_to_extract)
|
178 | 199 | docs_path = self._get_doc_outpath(self.custom_workload.workload_path, index)
|
179 | 200 | # Create test mode corpora
|
180 | 201 | self.dump_documents(
|
181 | 202 | self.client,
|
182 | 203 | index,
|
183 |
| - self._get_doc_outpath(self.custom_workload.workload_path, index, "-1k"), |
184 |
| - min(documents_to_extract, 1000), |
| 204 | + self._get_doc_outpath(self.custom_workload.workload_path, index, self.DEFAULT_TEST_MODE_SUFFIX), |
| 205 | + min(documents_to_extract, self.DEFAULT_TEST_MODE_DOC_COUNT), |
185 | 206 | " for test mode")
|
186 | 207 | # Create full corpora
|
187 | 208 | self.dump_documents(self.client, index, docs_path, documents_to_extract)
|
|
0 commit comments