-
Notifications
You must be signed in to change notification settings - Fork 234
Fix race condition in JobsList
#7061
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: main
Are you sure you want to change the base?
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change | ||||||||
|---|---|---|---|---|---|---|---|---|---|---|
|
|
@@ -57,10 +57,11 @@ def __init__(self, authinfo: AuthInfo, transport_queue: 'TransportQueue', last_u | |||||||||
| self._loop = transport_queue.loop | ||||||||||
| self._logger = logging.getLogger(__name__) | ||||||||||
|
|
||||||||||
| self._jobs_cache: Dict[Hashable, 'JobInfo'] = {} | ||||||||||
| self._job_update_requests: Dict[Hashable, asyncio.Future] = {} # Mapping: {job_id: Future} | ||||||||||
| self._jobs_cache: Dict[str, 'JobInfo'] = {} | ||||||||||
| self._job_update_requests: Dict[str, asyncio.Future] = {} # Mapping: {job_id: Future} | ||||||||||
| self._last_updated = last_updated | ||||||||||
| self._update_handle: Optional[asyncio.TimerHandle] = None | ||||||||||
| self._polling_jobs: List[str] = [] | ||||||||||
|
Collaborator
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Let's use the list builtin
Suggested change
|
||||||||||
|
|
||||||||||
| @property | ||||||||||
| def logger(self) -> logging.Logger: | ||||||||||
|
|
@@ -87,7 +88,7 @@ def last_updated(self) -> Optional[float]: | |||||||||
| """ | ||||||||||
| return self._last_updated | ||||||||||
|
|
||||||||||
| async def _get_jobs_from_scheduler(self) -> Dict[Hashable, 'JobInfo']: | ||||||||||
| async def _get_jobs_from_scheduler(self) -> Dict[str, 'JobInfo']: | ||||||||||
|
Collaborator
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. optional, since you're already touching the type:
Suggested change
|
||||||||||
| """Get the current jobs list from the scheduler. | ||||||||||
|
|
||||||||||
| :return: a mapping of job ids to :py:class:`~aiida.schedulers.datastructures.JobInfo` instances | ||||||||||
|
|
@@ -100,11 +101,13 @@ async def _get_jobs_from_scheduler(self) -> Dict[Hashable, 'JobInfo']: | |||||||||
| scheduler = self._authinfo.computer.get_scheduler() | ||||||||||
| scheduler.set_transport(transport) | ||||||||||
|
|
||||||||||
| self._polling_jobs = [str(job_id) for job_id, _ in self._job_update_requests.items()] | ||||||||||
|
Collaborator
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Could be slightly simpler
Suggested change
|
||||||||||
|
|
||||||||||
| kwargs: Dict[str, Any] = {'as_dict': True} | ||||||||||
| if scheduler.get_feature('can_query_by_user'): | ||||||||||
| kwargs['user'] = '$USER' | ||||||||||
| else: | ||||||||||
| kwargs['jobs'] = self._get_jobs_with_scheduler() | ||||||||||
| kwargs['jobs'] = self._polling_jobs | ||||||||||
|
|
||||||||||
| scheduler_response = scheduler.get_jobs(**kwargs) | ||||||||||
|
|
||||||||||
|
|
@@ -119,11 +122,14 @@ async def _get_jobs_from_scheduler(self) -> Dict[Hashable, 'JobInfo']: | |||||||||
| return jobs_cache | ||||||||||
|
|
||||||||||
| async def _update_job_info(self) -> None: | ||||||||||
| """Update all of the job information objects. | ||||||||||
| """Update job information and resolve pending requests. | ||||||||||
|
|
||||||||||
| This will set the futures for all pending update requests where the corresponding job has a new status compared | ||||||||||
| to the last update. | ||||||||||
| Note, _job_update_requests is dynamic, and might get new entries while polling from scheduler. | ||||||||||
| Therefore we only update the jobs actually polled, and the new entries will be handled in the next update. | ||||||||||
| """ | ||||||||||
|
|
||||||||||
| try: | ||||||||||
| if not self._update_requests_outstanding(): | ||||||||||
| return | ||||||||||
|
|
@@ -141,14 +147,15 @@ async def _update_job_info(self) -> None: | |||||||||
| # `_ensure_updating` will falsely conclude we are still updating, since the handle is not `None` and so it | ||||||||||
| # will not schedule the next update, causing the job update futures to never be resolved. | ||||||||||
| self._update_handle = None | ||||||||||
| self._job_update_requests = {} | ||||||||||
|
|
||||||||||
| raise | ||||||||||
| else: | ||||||||||
| for job_id, future in self._job_update_requests.items(): | ||||||||||
| if not future.done(): | ||||||||||
| future.set_result(self._jobs_cache.get(job_id, None)) | ||||||||||
| finally: | ||||||||||
| self._job_update_requests = {} | ||||||||||
| for job_id in self._polling_jobs: | ||||||||||
| future = self._job_update_requests.pop(job_id) | ||||||||||
| if future.done(): | ||||||||||
|
Comment on lines
+155
to
+156
Collaborator
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Despite the discussion below, I'd still do this a bit more defensively
Suggested change
|
||||||||||
| continue | ||||||||||
| future.set_result(self._jobs_cache.get(job_id, None)) | ||||||||||
danielhollas marked this conversation as resolved.
Show resolved
Hide resolved
|
||||||||||
|
|
||||||||||
| @contextlib.contextmanager | ||||||||||
| def request_job_info_update(self, authinfo: AuthInfo, job_id: Hashable) -> Iterator['asyncio.Future[JobInfo]']: | ||||||||||
|
|
@@ -161,7 +168,7 @@ def request_job_info_update(self, authinfo: AuthInfo, job_id: Hashable) -> Itera | |||||||||
| """ | ||||||||||
| self._authinfo = authinfo | ||||||||||
| # Get or create the future | ||||||||||
| request = self._job_update_requests.setdefault(job_id, asyncio.Future()) | ||||||||||
| request = self._job_update_requests.setdefault(str(job_id), asyncio.Future()) | ||||||||||
| assert not request.done(), 'Expected pending job info future, found in done state.' | ||||||||||
|
|
||||||||||
| try: | ||||||||||
|
|
@@ -235,14 +242,6 @@ def _get_next_update_delay(self) -> float: | |||||||||
| def _update_requests_outstanding(self) -> bool: | ||||||||||
| return any(not request.done() for request in self._job_update_requests.values()) | ||||||||||
|
|
||||||||||
| def _get_jobs_with_scheduler(self) -> List[str]: | ||||||||||
| """Get all the jobs that are currently with scheduler. | ||||||||||
|
|
||||||||||
| :return: the list of jobs with the scheduler | ||||||||||
| :rtype: list | ||||||||||
| """ | ||||||||||
| return [str(job_id) for job_id, _ in self._job_update_requests.items()] | ||||||||||
|
|
||||||||||
|
|
||||||||||
| class JobManager: | ||||||||||
| """A manager for :py:class:`~aiida.engine.processes.calcjobs.calcjob.CalcJob` submitted to ``Computer`` instances. | ||||||||||
|
|
||||||||||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
optional: