11import logging
22import time
3- from concurrent . futures import ThreadPoolExecutor , as_completed
3+ from queue import Queue
44
55from astrometry_net_client .session import Session
66from astrometry_net_client .settings import Settings
@@ -62,9 +62,7 @@ def __init__(self, session=None, settings=None, **kwargs):
6262 def upload_files_gen (
6363 self ,
6464 files_iter ,
65- filter_func = None ,
66- filter_args = None ,
67- workers = MAX_WORKERS ,
65+ queue_size = MAX_WORKERS ,
6866 ):
6967 """
7068 Generator which uploads a number of files concurrently, yielding the
@@ -79,15 +77,11 @@ def upload_files_gen(
7977 files_iter: iterable
8078 Some iterable containing paths to the files which will be uploaded.
8179 Is fully consumed before any result is yielded.
82- workers: int, optional
83- A positive integer, controlling the amount of workers to use for
84- the processing. Will not exceed the value of
80+ queue_size: int, optional
81+ A positive integer, controlling the size of the queue. This will
82+ determine the maximum number of simultaneous submissions. Must be
83+ greater than 0 and lower than :py:const:`MAX_WORKERS`. Default is
8584 :py:const:`MAX_WORKERS`.
86- filter_func: Callable
87- Predicate filter function which takes in the `filename` and
88- optionally some argument (`filter_args`).
89- filter_args: List
90- Arguments which are to be passed to the filter function.
9185
9286 Yields
9387 ------
@@ -97,75 +91,80 @@ def upload_files_gen(
9791 corresponding filename. Yields when the Job is finished.
9892 NOTE: Order of yielded filenames can (and likely will) be different
9993 from the given ``files_iter``
94+
95+ Raises
96+ ------
97+ ValueError
98+ When the queue_size is invalid.
10099 """
101- workers = min (MAX_WORKERS , workers )
102- with ThreadPoolExecutor (max_workers = workers ) as executor :
103- log .info ("Spawned executor {}" .format (executor ))
104-
105- # submit the files & save which future corresponds to which
106- # filename
107- future_to_file = {
108- executor .submit (
109- self .filtered_upload_wrapper ,
110- filename ,
111- filter_func = filter_func ,
112- filter_args = filter_args ,
113- ): filename
114- for filename in files_iter
115- }
100+ SLEEP_TIME = 0.3 # seconds
101+
102+ files_iter = iter (files_iter )
103+ if queue_size < 1 or queue_size > MAX_WORKERS :
104+ raise ValueError (
105+ "queue_size must be greater than 0 and less or equal to " ,
106+ f"{ MAX_WORKERS } , was: { queue_size } " ,
107+ )
108+ processing_queue = Queue (maxsize = queue_size )
109+
110+ # Populate queue initially
111+ for _ , filename in zip (range (queue_size ), files_iter ):
112+ self ._insert_submission (filename , processing_queue )
113+
114+ while not processing_queue .empty ():
115+ filename , submission , job = processing_queue .get ()
116+ log_msg = "Checking file {}, job exists: {}"
117+ log .debug (log_msg .format (filename , job is not None ))
118+ # The item in the queue has 2 states; if it is still only a
119+ # submission job will be None and we have to create a job out of
120+ # it. When the job is made, we can check if the job is done. When
121+ # the job is finished return (yield) the value, otherwise put it
122+ # back in the queue.
123+
124+ if job is None :
125+ submission .status ()
126+ if submission .done ():
127+ job = submission .jobs [0 ]
128+ else :
129+ processing_queue .put ((filename , submission , job ))
130+ continue
116131
117- # iterate over the results once they are completed.
118- for future in as_completed (future_to_file ):
119- result_filename = future_to_file [future ]
132+ job .status ()
133+ if job .done ():
120134 try :
121- res_job = future .result ()
122- except Exception as e :
123- # This exception is thrown inside the computed function.
124- err_msg = "File {} stopped with exception {}"
125- log .error (err_msg .format (result_filename , e ))
135+ filename = next (files_iter )
136+ except StopIteration :
137+ pass
126138 else :
127- if res_job is not None : # ignore if file was filtered out
128- yield res_job , result_filename
139+ self ._insert_submission (filename , processing_queue )
140+ log_msg = "FINISHED submission {}, yielding..."
141+ log .info (log_msg .format (filename ))
142+ yield (job , filename )
143+ else :
144+ processing_queue .put ((filename , submission , job ))
129145
130- def filtered_upload_wrapper (
131- self , filename , filter_func = None , filter_args = None , * args , ** kwargs
132- ):
146+ time . sleep ( SLEEP_TIME )
147+
148+ def _insert_submission ( self , filename , queue ):
133149 """
134- Wrapper around :py:func:`upload_file` which filters the given file
135- based on a specified filter function. Main use for this is a
136- computationally heavy filter function, like counting number of sources
137- locally, and only uploading if not enough are detected.
150+ Helper function which creates an upload for the given filename, and
151+ inserts the submission in a queue. This is not intended to be used
152+ by a user.
138153
139154 Parameters
140155 ----------
141156 filename: str
142- File to be uploaded. See :py:func:`upload_file`.
143- filter_func: Callable
144- Predicate filter function which takes in the `filename` and
145- optionally some argument (`filter_args`).
146- filter_args: List
147- Arguments which are to be passed to the filter function.
148- args: other arguments
149- Directly passed to :py:func:`upload_file`
150- kwargs: keyword arguments
151- Directly passed to :py:func:`upload_file`
152-
153- Returns
154- -------
155- Job or None: :py:class:`astrometry_net_client.statusables.Job`, `None`
156- Will be the job of the resulting upload (see
157- :py:func:`upload_file`), or `None` when `filter_func` evaluated to
158- `False`.
157+ The filename of the file to be submitted into the queue.
158+ queue: Queue
159+ The queue in which to insert the submission.
159160 """
160- if filter_args is None :
161- # allow arguments to be unpackable if it is not specified
162- filter_args = []
163-
164- if filter_func is not None and not filter_func (filename , * filter_args ):
165- log .info ("Filter function failed, skipping upload" )
166- return None
167-
168- return self .upload_file (filename , * args , ** kwargs )
161+ log_msg = "Submitting file {}"
162+ log .info (log_msg .format (filename ))
163+ upl = FileUpload (
164+ filename , session = self .session , settings = self .settings
165+ )
166+ submission = upl .submit ()
167+ queue .put ((filename , submission , None ))
169168
170169 def upload_file (self , filename , settings = None ):
171170 """
0 commit comments