We read every piece of feedback, and take your input very seriously.
To see all available qualifiers, see our documentation.
1 parent f35c24b commit c5b24d1Copy full SHA for c5b24d1
1 file changed
welearn_datastack/nodes_workflow/DocumentClassifier/generate_to_classify_batch.py
@@ -33,6 +33,7 @@ def main() -> None:
33
parallelism_threshold: int = int(os.getenv("PARALLELISM_THRESHOLD", 100))
34
parallelism_max: int = int(os.getenv("PARALLELISM_URL_MAX", 15))
35
batch_urls_directory: str = os.getenv("BATCH_URLS_DIRECTORY", "batch_urls")
36
+ corpus_name: str = os.getenv("PICK_CORPUS_NAME", "*")
37
qty_max_str: str | None = os.getenv("PICK_QTY_MAX", None)
38
39
qty_max: int | None = None
@@ -61,6 +62,7 @@ def main() -> None:
61
62
qty_max=qty_max,
63
process_titles=[Step.DOCUMENT_VECTORIZED],
64
weighed_scope=WeighedScope.DOCUMENT,
65
+ corpus_name=corpus_name,
66
)
67
68
logger.info("'%s' Docsids were retrieved", len(ids_to_batch))
0 commit comments