@@ -696,10 +696,11 @@ def __init__(self, workload, params, **kwargs):
696696 raise exceptions .InvalidSyntax ("'batch-size' must be numeric" )
697697
698698 self .ingest_percentage = self .float_param (params , name = "ingest-percentage" , default_value = 100 , min_value = 0 , max_value = 100 )
699+ self .looped = params .get ("looped" , False )
699700 self .param_source = PartitionBulkIndexParamSource (self .corpora , self .batch_size , self .bulk_size ,
700701 self .ingest_percentage , self .id_conflicts ,
701702 self .conflict_probability , self .on_conflict ,
702- self .recency , self .pipeline , self ._params )
703+ self .recency , self .pipeline , self .looped , self . _params )
703704
704705 def float_param (self , params , name , default_value , min_value , max_value , min_operator = operator .le ):
705706 try :
@@ -745,7 +746,7 @@ def params(self):
745746
746747class PartitionBulkIndexParamSource :
747748 def __init__ (self , corpora , batch_size , bulk_size , ingest_percentage , id_conflicts , conflict_probability ,
748- on_conflict , recency , pipeline = None , original_params = None ):
749+ on_conflict , recency , pipeline = None , looped = False , original_params = None ):
749750 """
750751
751752 :param corpora: Specification of affected document corpora.
@@ -758,6 +759,7 @@ def __init__(self, corpora, batch_size, bulk_size, ingest_percentage, id_conflic
758759 :param recency: A number between [0.0, 1.0] indicating whether to bias generation of conflicting ids towards more recent ones.
759760 May be None.
760761 :param pipeline: The name of the ingest pipeline to run.
762+ :param looped: Set to True for looped mode where bulk requests are repeated from the beginning when entire corpus was ingested.
761763 :param original_params: The original dict passed to the parent parameter source.
762764 """
763765 self .corpora = corpora
@@ -771,6 +773,7 @@ def __init__(self, corpora, batch_size, bulk_size, ingest_percentage, id_conflic
771773 self .on_conflict = on_conflict
772774 self .recency = recency
773775 self .pipeline = pipeline
776+ self .looped = looped
774777 self .original_params = original_params
775778 # this is only intended for unit-testing
776779 self .create_reader = original_params .pop ("__create_reader" , create_default_reader )
@@ -793,7 +796,12 @@ def params(self):
793796 # self.internal_params always reads all files. This is necessary to ensure we terminate early in case
794797 # the user has specified ingest percentage.
795798 if self .current_bulk == self .total_bulks :
796- raise StopIteration ()
799+ if self .looped :
800+ self .current_bulk = 0
801+ self ._init_internal_params ()
802+ else :
803+ raise StopIteration ()
804+
797805 self .current_bulk += 1
798806 return next (self .internal_params )
799807
0 commit comments