Skip to content

Commit 43d6d65

Browse files
feat: google drive internal formats support (#466)
Reworked google drive downloader to ling generation -> link download operation. This allows for fluent integration with clod native file formats.
1 parent 282c68a commit 43d6d65

File tree

9 files changed

+765
-71
lines changed

9 files changed

+765
-71
lines changed

CHANGELOG.md

+6
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,9 @@
1+
## 1.0.12-dev0
2+
3+
### Fixes
4+
5+
* **Replaced google drive connector's mechanism for file downloads.**
6+
17
## 1.0.11
28

39
### Fixes
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,8 @@
1+
{
2+
"directory_structure": [
3+
"fake.docx",
4+
"nested/fake.docx",
5+
"recalibrating-risk-report.pdf",
6+
"test-drive-doc.docx"
7+
]
8+
}
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,117 @@
1+
{
2+
"identifier": "117qrVqiCoR5EjYMsDHGdy3UMkEtKr9Q8",
3+
"connector_type": "google_drive",
4+
"source_identifiers": {
5+
"filename": "test-drive-doc.docx",
6+
"fullpath": "utic-test-ingest-fixtures/test-drive-doc.docx",
7+
"rel_path": "test-drive-doc.docx"
8+
},
9+
"metadata": {
10+
"url": "https://drive.google.com/uc?id=117qrVqiCoR5EjYMsDHGdy3UMkEtKr9Q8&export=download",
11+
"version": "34",
12+
"record_locator": {
13+
"file_id": "117qrVqiCoR5EjYMsDHGdy3UMkEtKr9Q8"
14+
},
15+
"date_created": "1686809758.931",
16+
"date_modified": "1686809744.0",
17+
"date_processed": null,
18+
"permissions_data": [
19+
{
20+
"read": {
21+
"users": [
22+
"03887347926440898356",
23+
"04774006893477068632",
24+
"09147371668407854156",
25+
"13662041828528429192",
26+
"18298851591250030956"
27+
],
28+
"groups": [
29+
"10619079449796831495"
30+
]
31+
}
32+
},
33+
{
34+
"update": {
35+
"users": [
36+
"03887347926440898356",
37+
"04774006893477068632",
38+
"09147371668407854156",
39+
"13662041828528429192",
40+
"18298851591250030956"
41+
],
42+
"groups": [
43+
"10619079449796831495"
44+
]
45+
}
46+
},
47+
{
48+
"delete": {
49+
"users": [
50+
"04774006893477068632"
51+
],
52+
"groups": []
53+
}
54+
}
55+
],
56+
"filesize_bytes": null
57+
},
58+
"additional_metadata": {
59+
"fileExtension": "docx",
60+
"md5Checksum": "b6bd26fa317493cf447882754dac5e9d",
61+
"mimeType": "application/vnd.openxmlformats-officedocument.wordprocessingml.document",
62+
"headRevisionId": "0B5wM1A2PIvPWOVFKa0U4MHQ1T2E0b0wzSWtGSkRZR1hwRUpzPQ",
63+
"webViewLink": "https://docs.google.com/document/d/117qrVqiCoR5EjYMsDHGdy3UMkEtKr9Q8/edit?usp=drivesdk&ouid=116756094827511368604&rtpof=true&sd=true",
64+
"id": "117qrVqiCoR5EjYMsDHGdy3UMkEtKr9Q8",
65+
"originalFilename": "test-drive-doc.docx",
66+
"capabilities": {
67+
"canChangeViewersCanCopyContent": false,
68+
"canEdit": true,
69+
"canCopy": true,
70+
"canComment": true,
71+
"canAddChildren": false,
72+
"canDelete": false,
73+
"canDownload": true,
74+
"canListChildren": false,
75+
"canRemoveChildren": false,
76+
"canRename": true,
77+
"canTrash": false,
78+
"canReadRevisions": true,
79+
"canChangeCopyRequiresWriterPermission": false,
80+
"canMoveItemIntoTeamDrive": false,
81+
"canUntrash": false,
82+
"canModifyContent": true,
83+
"canMoveItemOutOfDrive": false,
84+
"canAddMyDriveParent": false,
85+
"canRemoveMyDriveParent": true,
86+
"canMoveItemWithinDrive": true,
87+
"canShare": true,
88+
"canMoveChildrenWithinDrive": false,
89+
"canModifyContentRestriction": true,
90+
"canChangeSecurityUpdateEnabled": false,
91+
"canAcceptOwnership": false,
92+
"canReadLabels": false,
93+
"canModifyLabels": false,
94+
"canModifyEditorContentRestriction": true,
95+
"canModifyOwnerContentRestriction": false,
96+
"canRemoveContentRestriction": false,
97+
"canDisableInheritedPermissions": false,
98+
"canEnableInheritedPermissions": true
99+
},
100+
"permissionIds": [
101+
"anyoneWithLink",
102+
"10619079449796831495",
103+
"03887347926440898356",
104+
"13662041828528429192",
105+
"18298851591250030956",
106+
"04774006893477068632",
107+
"09147371668407854156"
108+
],
109+
"sha1Checksum": "70daaa25dd03bc56192ccc18fc25f53aa06b7b36",
110+
"sha256Checksum": "9dd205eea9d2fa9f4e7a993697c8d5b6c4ec7a92ccd12fff772b1dff084deca8",
111+
"download_method": "web_content_link",
112+
"download_url_used": "https://drive.google.com/uc?id=117qrVqiCoR5EjYMsDHGdy3UMkEtKr9Q8&export=download"
113+
},
114+
"reprocess": false,
115+
"local_download_path": "/tmp/tmpd1hbhlea/test-drive-doc.docx",
116+
"display_name": null
117+
}
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,117 @@
1+
{
2+
"identifier": "1SpQuE7jHz9nMt5hfQXsiok1SgIdRYX5o",
3+
"connector_type": "google_drive",
4+
"source_identifiers": {
5+
"filename": "fake.docx",
6+
"fullpath": "utic-test-ingest-fixtures/fake.docx",
7+
"rel_path": "fake.docx"
8+
},
9+
"metadata": {
10+
"url": "https://drive.google.com/uc?id=1SpQuE7jHz9nMt5hfQXsiok1SgIdRYX5o&export=download",
11+
"version": "39",
12+
"record_locator": {
13+
"file_id": "1SpQuE7jHz9nMt5hfQXsiok1SgIdRYX5o"
14+
},
15+
"date_created": "1686809759.687",
16+
"date_modified": "1686809743.0",
17+
"date_processed": null,
18+
"permissions_data": [
19+
{
20+
"read": {
21+
"users": [
22+
"03887347926440898356",
23+
"04774006893477068632",
24+
"09147371668407854156",
25+
"13662041828528429192",
26+
"18298851591250030956"
27+
],
28+
"groups": [
29+
"10619079449796831495"
30+
]
31+
}
32+
},
33+
{
34+
"update": {
35+
"users": [
36+
"03887347926440898356",
37+
"04774006893477068632",
38+
"09147371668407854156",
39+
"13662041828528429192",
40+
"18298851591250030956"
41+
],
42+
"groups": [
43+
"10619079449796831495"
44+
]
45+
}
46+
},
47+
{
48+
"delete": {
49+
"users": [
50+
"04774006893477068632"
51+
],
52+
"groups": []
53+
}
54+
}
55+
],
56+
"filesize_bytes": null
57+
},
58+
"additional_metadata": {
59+
"fileExtension": "docx",
60+
"md5Checksum": "5ddd25c0e5d5f6996a93c154830dc7aa",
61+
"mimeType": "application/vnd.openxmlformats-officedocument.wordprocessingml.document",
62+
"headRevisionId": "0B5wM1A2PIvPWOEoxZ2FjcDRsRUJsb2sveEQzTmJZZ1luY3JVPQ",
63+
"webViewLink": "https://docs.google.com/document/d/1SpQuE7jHz9nMt5hfQXsiok1SgIdRYX5o/edit?usp=drivesdk&ouid=116756094827511368604&rtpof=true&sd=true",
64+
"id": "1SpQuE7jHz9nMt5hfQXsiok1SgIdRYX5o",
65+
"originalFilename": "fake.docx",
66+
"capabilities": {
67+
"canChangeViewersCanCopyContent": false,
68+
"canEdit": true,
69+
"canCopy": true,
70+
"canComment": true,
71+
"canAddChildren": false,
72+
"canDelete": false,
73+
"canDownload": true,
74+
"canListChildren": false,
75+
"canRemoveChildren": false,
76+
"canRename": true,
77+
"canTrash": false,
78+
"canReadRevisions": true,
79+
"canChangeCopyRequiresWriterPermission": false,
80+
"canMoveItemIntoTeamDrive": false,
81+
"canUntrash": false,
82+
"canModifyContent": true,
83+
"canMoveItemOutOfDrive": false,
84+
"canAddMyDriveParent": false,
85+
"canRemoveMyDriveParent": true,
86+
"canMoveItemWithinDrive": true,
87+
"canShare": true,
88+
"canMoveChildrenWithinDrive": false,
89+
"canModifyContentRestriction": true,
90+
"canChangeSecurityUpdateEnabled": false,
91+
"canAcceptOwnership": false,
92+
"canReadLabels": false,
93+
"canModifyLabels": false,
94+
"canModifyEditorContentRestriction": true,
95+
"canModifyOwnerContentRestriction": false,
96+
"canRemoveContentRestriction": false,
97+
"canDisableInheritedPermissions": false,
98+
"canEnableInheritedPermissions": true
99+
},
100+
"permissionIds": [
101+
"anyoneWithLink",
102+
"10619079449796831495",
103+
"03887347926440898356",
104+
"13662041828528429192",
105+
"18298851591250030956",
106+
"04774006893477068632",
107+
"09147371668407854156"
108+
],
109+
"sha1Checksum": "07efd2a6bd828801d8d1d4bd77fcb15120bbad94",
110+
"sha256Checksum": "690b185fb657a76dfe5d304911e720f8998b328edb6677cbf2c846152284dfb3",
111+
"download_method": "web_content_link",
112+
"download_url_used": "https://drive.google.com/uc?id=1SpQuE7jHz9nMt5hfQXsiok1SgIdRYX5o&export=download"
113+
},
114+
"reprocess": false,
115+
"local_download_path": "/tmp/tmpd1hbhlea/fake.docx",
116+
"display_name": null
117+
}
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,117 @@
1+
{
2+
"identifier": "1m1TUgyLv0hHdlsuL7DOWBAKQtvrhWNiV",
3+
"connector_type": "google_drive",
4+
"source_identifiers": {
5+
"filename": "recalibrating-risk-report.pdf",
6+
"fullpath": "utic-test-ingest-fixtures/recalibrating-risk-report.pdf",
7+
"rel_path": "recalibrating-risk-report.pdf"
8+
},
9+
"metadata": {
10+
"url": "https://drive.google.com/uc?id=1m1TUgyLv0hHdlsuL7DOWBAKQtvrhWNiV&export=download",
11+
"version": "10",
12+
"record_locator": {
13+
"file_id": "1m1TUgyLv0hHdlsuL7DOWBAKQtvrhWNiV"
14+
},
15+
"date_created": "1718723636.34",
16+
"date_modified": "1676196572.0",
17+
"date_processed": null,
18+
"permissions_data": [
19+
{
20+
"read": {
21+
"users": [
22+
"03887347926440898356",
23+
"04774006893477068632",
24+
"09147371668407854156",
25+
"13662041828528429192",
26+
"18298851591250030956"
27+
],
28+
"groups": [
29+
"10619079449796831495"
30+
]
31+
}
32+
},
33+
{
34+
"update": {
35+
"users": [
36+
"03887347926440898356",
37+
"04774006893477068632",
38+
"09147371668407854156",
39+
"13662041828528429192",
40+
"18298851591250030956"
41+
],
42+
"groups": [
43+
"10619079449796831495"
44+
]
45+
}
46+
},
47+
{
48+
"delete": {
49+
"users": [
50+
"09147371668407854156"
51+
],
52+
"groups": []
53+
}
54+
}
55+
],
56+
"filesize_bytes": null
57+
},
58+
"additional_metadata": {
59+
"fileExtension": "pdf",
60+
"md5Checksum": "e690f37ef36368a509d150f373a0bbe0",
61+
"mimeType": "application/pdf",
62+
"headRevisionId": "0B8fLPtk3k4KOdlRMRDkvM0JCQkdtTU1NdjdpK0ZlSTJadXNjPQ",
63+
"webViewLink": "https://drive.google.com/file/d/1m1TUgyLv0hHdlsuL7DOWBAKQtvrhWNiV/view?usp=drivesdk",
64+
"id": "1m1TUgyLv0hHdlsuL7DOWBAKQtvrhWNiV",
65+
"originalFilename": "recalibrating-risk-report.pdf",
66+
"capabilities": {
67+
"canChangeViewersCanCopyContent": false,
68+
"canEdit": true,
69+
"canCopy": true,
70+
"canComment": true,
71+
"canAddChildren": false,
72+
"canDelete": false,
73+
"canDownload": true,
74+
"canListChildren": false,
75+
"canRemoveChildren": false,
76+
"canRename": true,
77+
"canTrash": false,
78+
"canReadRevisions": true,
79+
"canChangeCopyRequiresWriterPermission": false,
80+
"canMoveItemIntoTeamDrive": false,
81+
"canUntrash": false,
82+
"canModifyContent": true,
83+
"canMoveItemOutOfDrive": false,
84+
"canAddMyDriveParent": false,
85+
"canRemoveMyDriveParent": true,
86+
"canMoveItemWithinDrive": true,
87+
"canShare": true,
88+
"canMoveChildrenWithinDrive": false,
89+
"canModifyContentRestriction": true,
90+
"canChangeSecurityUpdateEnabled": false,
91+
"canAcceptOwnership": false,
92+
"canReadLabels": false,
93+
"canModifyLabels": false,
94+
"canModifyEditorContentRestriction": true,
95+
"canModifyOwnerContentRestriction": false,
96+
"canRemoveContentRestriction": false,
97+
"canDisableInheritedPermissions": false,
98+
"canEnableInheritedPermissions": true
99+
},
100+
"permissionIds": [
101+
"anyoneWithLink",
102+
"10619079449796831495",
103+
"03887347926440898356",
104+
"13662041828528429192",
105+
"18298851591250030956",
106+
"04774006893477068632",
107+
"09147371668407854156"
108+
],
109+
"sha1Checksum": "57e64b7be1e099ecf9dcd9832bc09c81ffc5fc44",
110+
"sha256Checksum": "076d8d243312e0b03a0fe2191e924c37fe159dd01d6b6ce3a3b2b7c642c36245",
111+
"download_method": "web_content_link",
112+
"download_url_used": "https://drive.google.com/uc?id=1m1TUgyLv0hHdlsuL7DOWBAKQtvrhWNiV&export=download"
113+
},
114+
"reprocess": false,
115+
"local_download_path": "/tmp/tmpd1hbhlea/recalibrating-risk-report.pdf",
116+
"display_name": null
117+
}

0 commit comments

Comments
 (0)