-
Notifications
You must be signed in to change notification settings - Fork 1.6k
Id not set in checkpoint2 #4468
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -98,9 +98,6 @@ def _is_external_doc_permissions_sync_due(cc_pair: ConnectorCredentialPair) -> b | |
if cc_pair.status != ConnectorCredentialPairStatus.ACTIVE: | ||
return False | ||
|
||
if cc_pair.status == ConnectorCredentialPairStatus.DELETING: | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. why remove? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Was never True, see lines above |
||
return False | ||
|
||
# If the last sync is None, it has never been run so we run the sync | ||
last_perm_sync = cc_pair.last_time_perm_sync | ||
if last_perm_sync is None: | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -122,14 +122,17 @@ def crawl_folders_for_files( | |
start=start, | ||
end=end, | ||
): | ||
found_files = True | ||
logger.info(f"Found file: {file['name']}, user email: {user_email}") | ||
found_files = True | ||
yield RetrievedDriveFile( | ||
drive_file=file, | ||
user_email=user_email, | ||
parent_id=parent_id, | ||
completion_stage=DriveRetrievalStage.FOLDER_FILES, | ||
) | ||
# Only mark a folder as done if it was fully traversed without errors | ||
if found_files: | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. why the move? If it's important, would prefer to add a comment as to why. If not / purely stylistic, ignore There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. previously we were marking folders as traversed if at least one file from the folder was retrieved without an error; now it will only be marked as done if ALL files from it are retrieved. With the new system for tracking folder completion (sorting and continuing from the last SEEN folder rather than last retrieved), this shouldn't cause us to get stuck and should let us handle pathological cases like a bunch of different users having individual access to files in a "shared folder" that isn't actually fully shared due to permission revoking. |
||
update_traversed_ids_func(parent_id) | ||
except Exception as e: | ||
logger.error(f"Error getting files in parent {parent_id}: {e}") | ||
yield RetrievedDriveFile( | ||
|
@@ -139,8 +142,6 @@ def crawl_folders_for_files( | |
completion_stage=DriveRetrievalStage.FOLDER_FILES, | ||
error=e, | ||
) | ||
if found_files: | ||
update_traversed_ids_func(parent_id) | ||
else: | ||
logger.info(f"Skipping subfolder files since already traversed: {parent_id}") | ||
|
||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
what's going on here?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
typing fixes