Skip to content

Commit 9a1e9cf

Browse files
jkhall06donadviser
andauthored
working-with-duplicates (#158)
* adding first files * renaming * linking to window section of book * ammending ToC * ammending re feedback * amending r code * convert working with duplicates ipynb to md format * moving file to correct folder --------- Co-authored-by: Derrick Njobuenwu <donadviser@gmail.com>
1 parent 4f44916 commit 9a1e9cf

8 files changed

Lines changed: 2730 additions & 2 deletions

File tree

ons-spark/_toc.yml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -59,6 +59,7 @@ parts:
5959
- file: spark-analysis/flags
6060
- file: spark-analysis/bin-continuous-variable
6161
- file: spark-analysis/cramer_v
62+
- file: spark-analysis/working-with-duplicates
6263
- caption: Testing and Debugging
6364
chapters:
6465
- file: testing-debugging/spark-errors
Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
{"name":"org.apache.hadoop.fs.s3a.commit.files.SuccessData/1","success":true,"timestamp":1750410871834,"date":"Fri Jun 20 09:14:31 GMT 2025","hostname":"wkb9feizl30pkz3z","committer":"magic","description":"Task committer attempt_202506200914273954636045344143310_0000_m_000000_0","jobId":"b670fb62-9f5d-4274-ae70-5dc85cdefe7b","jobIdSource":"spark.sql.sources.writeJobUUID","metrics":{"op_glob_status.failures":0,"files_created":0,"op_get_content_summary":0,"stream_write_block_uploads_aborted":0,"audit_span_creation":40,"client_side_encryption_enabled":0,"store_exists_probe":0,"committer_commits_reverted":0,"action_http_get_request.failures":0,"committer_load_single_pending_file":0,"op_rename.failures":0,"committer_magic_files_created":0,"object_copy_requests":0,"stream_read_close_operations":1,"committer_load_single_pending_file.failures":0,"store_io_retry":0,"stream_write_block_uploads_committed":0,"committer_stage_file_upload.failures":0,"op_mkdirs.failures":0,"delegation_token_issued":0,"stream_read_vectored_operations":0,"action_http_head_request":30,"op_create":0,"stream_read_fully_operations":0,"committer_commits_completed":1,"audit_request_execution":75,"stream_read_remote_stream_drain":1,"op_xattr_list.failures":0,"op_is_file.failures":0,"multipart_upload_list":0,"op_get_file_checksum.failures":0,"stream_read_seek_policy_changed":1,"committer_commits_created":0,"audit_access_check_failure":0,"audit_failure":0,"op_get_delegation_token":0,"op_access":0,"stream_write_exceptions":0,"directories_created":2,"op_xattr_get_named.failures":0,"files_delete_rejected":0,"stream_write_total_data":0,"op_hsync":0,"object_multipart_aborted.failures":0,"Store_io_throttle_rate99thPercentileFrequency (Hz)":0,"action_http_get_request":1,"files_copied_bytes":0,"stream_write_block_uploads_data_pending":0,"op_list_located_status":0,"object_bulk_delete_request":1,"committer_commits_aborted":0,"object_multipart_initiated.failures":0,"action_executor_acquired.failures":0,"committer_stage_file_upload":0,"action_http_head_request.failures":0,"stream_read_opened":1,"op_hflush":0,"op_list_status":24,"op_xattr_get_named_map":0,"stream_write_queue_duration.failures":0,"op_get_file_checksum":0,"op_xattr_get_map.failures":0,"ignored_errors":2,"committer_bytes_uploaded":0,"op_list_files":1,"files_deleted":0,"op_copy_from_local_file.failures":0,"action_file_opened.failures":0,"op_is_directory":3,"stream_read_seek_backward_operations":0,"multipart_upload_started":0,"stream_write_total_time":0,"object_delete_request.failures":0,"op_get_delegation_token.failures":0,"fake_directories_created":0,"stream_read_seek_operations":0,"stream_read_seek_forward_operations":0,"object_put_bytes":0,"op_is_file":0,"op_list_status.failures":0,"op_list_files.failures":0,"store_io_request":75,"stream_write_block_uploads":0,"committer_commits.failures":0,"object_delete_objects":2,"op_xattr_get_named_map.failures":0,"committer_commit_job":0,"multipart_upload_part_put":0,"op_open":1,"stream_read_remote_stream_drain.failures":0,"multipart_upload_abort_under_path_invoked":0,"object_put_request":2,"committer_commit_job.failures":0,"stream_read_bytes_backwards_on_seek":0,"multipart_upload_list.failures":0,"stream_read_seek_bytes_discarded":0,"multipart_upload_part_put_bytes":0,"object_put_request.failures":0,"Store_io_throttle_rate95thPercentileFrequency (Hz)":0,"op_abort.failures":0,"action_file_opened":1,"multipart_upload_aborted":0,"committer_bytes_committed":3543,"op_createfile":0,"committer_materialize_file":0,"op_createfile.failures":0,"object_metadata_request":30,"stream_write_block_uploads_active":0,"object_put_request_completed":2,"op_create_non_recursive":0,"stream_write_queue_duration":0,"Store_io_throttle_rate90thPercentileFrequency (Hz)":0,"object_put_request_active":0,"stream_read_vectored_incoming_ranges":0,"op_abort":0,"committer_jobs_completed":1,"multipart_instantiated":0,"Store_io_throttle_rate75thPercentileFrequency (Hz)":0,"stream_read_operations":1,"fake_directories_deleted":2,"object_bulk_delete_request.failures":0,"committer_magic_marker_put":0,"stream_aborted":0,"op_rename":0,"object_multipart_aborted":0,"op_get_file_status":0,"op_access.failures":0,"stream_read_total_bytes":4045,"committer_materialize_file.failures":0,"op_glob_status":0,"stream_read_exceptions":0,"delegation_token_issued.failures":0,"stream_read_version_mismatches":0,"stream_write_bytes":0,"action_executor_acquired":0,"op_exists":5,"Store_io_throttle_rateNumEvents":0,"op_openfile":0,"stream_write_exceptions_completing_upload":0,"op_xattr_list":0,"object_select_requests":0,"store_exists_probe.failures":0,"stream_write_block_uploads_pending":0,"op_is_directory.failures":1,"object_delete_request":0,"stream_read_unbuffered":0,"object_multipart_initiated":0,"stream_read_vectored_combined_ranges":0,"op_xattr_get_named":0,"committer_magic_marker_put.failures":0,"committer_jobs_failed":0,"stream_read_operations_incomplete":1,"object_put_bytes_pending":0,"op_delete":0,"stream_read_bytes":4045,"object_list_request.failures":0,"object_continue_list_request.failures":0,"stream_read_bytes_discarded_in_abort":0,"committer_tasks_completed":0,"op_get_file_status.failures":0,"stream_read_remote_stream_aborted.failures":0,"op_delete.failures":0,"object_list_request":38,"store_io_throttled":0,"stream_read_vectored_read_bytes_discarded":0,"files_copied":0,"committer_tasks_failed":0,"op_exists.failures":3,"stream_read_seek_bytes_skipped":0,"multipart_upload_completed":0,"Store_io_throttle_rate50thPercentileFrequency (Hz)":0,"object_continue_list_request":0,"stream_read_remote_stream_aborted":0,"op_mkdirs":2,"op_copy_from_local_file":0,"op_get_content_summary.failures":0,"stream_read_closed":1,"directories_deleted":0,"op_xattr_get_map":0,"op_create.failures":0,"stream_read_bytes_discarded_in_close":0},"diagnostics":{"fs.s3a.max.total.tasks":"32","fs.s3a.connection.maximum":"96","fs.s3a.committer.threads":"8"},"filenames":["/bat/dapcats/mot_duplicate_sample.parquet/part-00000-456a5519-8614-41b4-b994-f0b5b5d8bdde-c000.snappy.parquet"],"state":null,"stage":null,"iostatistics":{"counters":{"stream_write_exceptions_completing_upload":0,"stream_write_block_uploads":1,"committer_commits.failures":0,"committer_commit_job":1,"stream_write_exceptions":0,"committer_commits_reverted":0,"object_multipart_initiated":1,"committer_load_single_pending_file":0,"stream_write_total_data":3543,"committer_magic_marker_put.failures":0,"op_hsync":0,"object_multipart_aborted.failures":0,"committer_jobs_failed":0,"object_put_request":0,"committer_load_single_pending_file.failures":0,"committer_commit_job.failures":0,"object_put_request.failures":0,"committer_tasks_completed":0,"object_multipart_initiated.failures":0,"op_abort.failures":0,"committer_commits_aborted":0,"action_executor_acquired.failures":0,"committer_stage_file_upload.failures":0,"committer_stage_file_upload":0,"committer_bytes_committed":3543,"op_hflush":0,"committer_materialize_file":1,"committer_commits_completed":1,"committer_bytes_uploaded":0,"committer_tasks_failed":0,"multipart_upload_completed":0,"stream_write_queue_duration":0,"op_abort":0,"committer_jobs_completed":1,"committer_magic_marker_put":1,"object_multipart_aborted":0,"committer_commits_created":0,"stream_write_total_time":0,"multipart_upload_completed.failures":0,"committer_materialize_file.failures":0,"stream_write_bytes":3543,"action_executor_acquired":0},"gauges":{"stream_write_block_uploads_data_pending":0,"stream_write_block_uploads_pending":0},"minimums":{"op_abort.failures.min":-1,"committer_magic_marker_put.failures.min":-1,"object_multipart_aborted.failures.min":-1,"action_executor_acquired.min":8,"object_multipart_aborted.min":-1,"committer_commit_job.min":246,"committer_magic_marker_put.min":31,"committer_stage_file_upload.min":-1,"object_put_request.min":-1,"multipart_upload_completed.min":-1,"committer_commit_job.failures.min":-1,"committer_load_single_pending_file.min":-1,"committer_materialize_file.min":124,"committer_stage_file_upload.failures.min":-1,"committer_materialize_file.failures.min":-1,"multipart_upload_completed.failures.min":-1,"action_executor_acquired.failures.min":-1,"committer_load_single_pending_file.failures.min":-1,"object_put_request.failures.min":-1,"object_multipart_initiated.failures.min":-1,"op_abort.min":-1,"object_multipart_initiated.min":39},"maximums":{"action_executor_acquired.failures.max":-1,"committer_stage_file_upload.failures.max":-1,"committer_materialize_file.failures.max":-1,"multipart_upload_completed.failures.max":-1,"op_abort.max":-1,"committer_commit_job.max":246,"committer_load_single_pending_file.max":-1,"object_multipart_aborted.failures.max":-1,"object_multipart_initiated.max":39,"committer_load_single_pending_file.failures.max":-1,"object_put_request.failures.max":-1,"committer_materialize_file.max":124,"object_multipart_initiated.failures.max":-1,"op_abort.failures.max":-1,"committer_magic_marker_put.failures.max":-1,"action_executor_acquired.max":8,"object_multipart_aborted.max":-1,"committer_stage_file_upload.max":-1,"object_put_request.max":-1,"multipart_upload_completed.max":-1,"committer_commit_job.failures.max":-1,"committer_magic_marker_put.max":31},"meanstatistics":{"committer_stage_file_upload.mean":{"samples":0,"sum":0},"op_abort.mean":{"samples":0,"sum":0},"committer_materialize_file.mean":{"samples":1,"sum":124},"action_executor_acquired.mean":{"samples":1,"sum":8},"committer_commit_job.failures.mean":{"samples":0,"sum":0},"committer_materialize_file.failures.mean":{"samples":0,"sum":0},"committer_stage_file_upload.failures.mean":{"samples":0,"sum":0},"object_multipart_aborted.failures.mean":{"samples":0,"sum":0},"op_abort.failures.mean":{"samples":0,"sum":0},"multipart_upload_completed.failures.mean":{"samples":0,"sum":0},"object_put_request.mean":{"samples":0,"sum":0},"committer_magic_marker_put.mean":{"samples":1,"sum":31},"multipart_upload_completed.mean":{"samples":0,"sum":0},"object_multipart_initiated.failures.mean":{"samples":0,"sum":0},"committer_magic_marker_put.failures.mean":{"samples":0,"sum":0},"committer_load_single_pending_file.failures.mean":{"samples":0,"sum":0},"object_multipart_initiated.mean":{"samples":1,"sum":39},"object_multipart_aborted.mean":{"samples":0,"sum":0},"action_executor_acquired.failures.mean":{"samples":0,"sum":0},"committer_load_single_pending_file.mean":{"samples":0,"sum":0},"object_put_request.failures.mean":{"samples":0,"sum":0},"committer_commit_job.mean":{"samples":1,"sum":246}}}}

0 commit comments

Comments
 (0)