Skip to content

Commit c5b24d1

Browse files
committed
Support corpus_name
1 parent f35c24b commit c5b24d1

1 file changed

Lines changed: 2 additions & 0 deletions

File tree

welearn_datastack/nodes_workflow/DocumentClassifier/generate_to_classify_batch.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -33,6 +33,7 @@ def main() -> None:
3333
parallelism_threshold: int = int(os.getenv("PARALLELISM_THRESHOLD", 100))
3434
parallelism_max: int = int(os.getenv("PARALLELISM_URL_MAX", 15))
3535
batch_urls_directory: str = os.getenv("BATCH_URLS_DIRECTORY", "batch_urls")
36+
corpus_name: str = os.getenv("PICK_CORPUS_NAME", "*")
3637
qty_max_str: str | None = os.getenv("PICK_QTY_MAX", None)
3738

3839
qty_max: int | None = None
@@ -61,6 +62,7 @@ def main() -> None:
6162
qty_max=qty_max,
6263
process_titles=[Step.DOCUMENT_VECTORIZED],
6364
weighed_scope=WeighedScope.DOCUMENT,
65+
corpus_name=corpus_name,
6466
)
6567
)
6668
logger.info("'%s' Docsids were retrieved", len(ids_to_batch))

0 commit comments

Comments
 (0)