Skip to content

Commit 137b149

Browse files
authored
Bugfix/ingest pipeline check (#3303)
### Description Using a `isinstance` on the destination registry mapping breaks when inheritance is used for the associated uploader types. This adds a connector type field to all uploaders so that the entry can be deterministically fetched when running check for associated stager in pipeline.
1 parent 087adb2 commit 137b149

File tree

16 files changed

+25
-19
lines changed

16 files changed

+25
-19
lines changed

Diff for: CHANGELOG.md

+1-1
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
## 0.14.9-dev7
1+
## 0.14.9-dev8
22

33
### Enhancements
44

Diff for: unstructured/__version__.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -1 +1 @@
1-
__version__ = "0.14.9-dev7" # pragma: no cover
1+
__version__ = "0.14.9-dev8" # pragma: no cover

Diff for: unstructured/ingest/v2/interfaces/uploader.py

+1
Original file line numberDiff line numberDiff line change
@@ -26,6 +26,7 @@ class UploadContent:
2626
@dataclass
2727
class Uploader(BaseProcess, BaseConnector, ABC):
2828
upload_config: UploaderConfigT
29+
connector_type: str
2930

3031
def is_async(self) -> bool:
3132
return False

Diff for: unstructured/ingest/v2/pipeline/pipeline.py

+2-11
Original file line numberDiff line numberDiff line change
@@ -87,17 +87,8 @@ def check_destination_connector(self):
8787
# Make sure that if the set destination connector expects a stager, one is also set
8888
if not self.uploader_step:
8989
return
90-
matching_registry_entry = [
91-
v
92-
for v in destination_registry.values()
93-
if isinstance(self.uploader_step.process, v.uploader)
94-
]
95-
if len(matching_registry_entry) > 1:
96-
raise ValueError(
97-
f"More than one entry found in destination registry "
98-
f"for uploader type: {self.uploader_step.process}"
99-
)
100-
registry_entry = matching_registry_entry[0]
90+
uploader_connector_type = self.uploader_step.process.connector_type
91+
registry_entry = destination_registry[uploader_connector_type]
10192
if registry_entry.upload_stager and self.stager_step is None:
10293
raise ValueError(
10394
f"pipeline with uploader type {self.uploader_step.process.__class__.__name__} "

Diff for: unstructured/ingest/v2/processes/connectors/astra.py

+1
Original file line numberDiff line numberDiff line change
@@ -92,6 +92,7 @@ class AstraUploaderConfig(UploaderConfig):
9292
class AstraUploader(Uploader):
9393
connection_config: AstraConnectionConfig
9494
upload_config: AstraUploaderConfig
95+
connector_type: str = CONNECTOR_TYPE
9596

9697
@requires_dependencies(["astrapy"], extras="astra")
9798
def get_collection(self) -> "AstraDBCollection":

Diff for: unstructured/ingest/v2/processes/connectors/chroma.py

+1
Original file line numberDiff line numberDiff line change
@@ -114,6 +114,7 @@ class ChromaUploaderConfig(UploaderConfig):
114114

115115
@dataclass
116116
class ChromaUploader(Uploader):
117+
connector_type: str = CONNECTOR_TYPE
117118
upload_config: ChromaUploaderConfig
118119
connection_config: ChromaConnectionConfig
119120
client: Optional["Client"] = field(init=False)

Diff for: unstructured/ingest/v2/processes/connectors/elasticsearch.py

+1
Original file line numberDiff line numberDiff line change
@@ -327,6 +327,7 @@ class ElasticsearchUploaderConfig(UploaderConfig):
327327

328328
@dataclass
329329
class ElasticsearchUploader(Uploader):
330+
connector_type: str = CONNECTOR_TYPE
330331
upload_config: ElasticsearchUploaderConfig
331332
connection_config: ElasticsearchConnectionConfig
332333

Diff for: unstructured/ingest/v2/processes/connectors/fsspec/azure.py

+1
Original file line numberDiff line numberDiff line change
@@ -114,6 +114,7 @@ class AzureUploaderConfig(FsspecUploaderConfig):
114114

115115
@dataclass
116116
class AzureUploader(FsspecUploader):
117+
connector_type: str = CONNECTOR_TYPE
117118
connection_config: AzureConnectionConfig
118119
upload_config: AzureUploaderConfig = field(default=None)
119120

Diff for: unstructured/ingest/v2/processes/connectors/fsspec/box.py

+5-2
Original file line numberDiff line numberDiff line change
@@ -100,7 +100,8 @@ class BoxUploaderConfig(FsspecUploaderConfig):
100100

101101

102102
@dataclass
103-
class BoxUpload(FsspecUploader):
103+
class BoxUploader(FsspecUploader):
104+
connector_type: str = CONNECTOR_TYPE
104105
connection_config: BoxConnectionConfig
105106
upload_config: BoxUploaderConfig = field(default=None)
106107

@@ -131,6 +132,8 @@ async def run_async(self, path: Path, file_data: FileData, **kwargs: Any) -> Non
131132
add_destination_entry(
132133
destination_type=CONNECTOR_TYPE,
133134
entry=DestinationRegistryEntry(
134-
uploader=BoxUpload, uploader_config=BoxUploaderConfig, connection_config=BoxConnectionConfig
135+
uploader=BoxUploader,
136+
uploader_config=BoxUploaderConfig,
137+
connection_config=BoxConnectionConfig,
135138
),
136139
)

Diff for: unstructured/ingest/v2/processes/connectors/fsspec/dropbox.py

+3-2
Original file line numberDiff line numberDiff line change
@@ -99,7 +99,8 @@ class DropboxUploaderConfig(FsspecUploaderConfig):
9999

100100

101101
@dataclass
102-
class DropboxUpload(FsspecUploader):
102+
class DropboxUploader(FsspecUploader):
103+
connector_type: str = CONNECTOR_TYPE
103104
connection_config: DropboxConnectionConfig
104105
upload_config: DropboxUploaderConfig = field(default=None)
105106

@@ -130,7 +131,7 @@ async def run_async(self, path: Path, file_data: FileData, **kwargs: Any) -> Non
130131
add_destination_entry(
131132
destination_type=CONNECTOR_TYPE,
132133
entry=DestinationRegistryEntry(
133-
uploader=DropboxUpload,
134+
uploader=DropboxUploader,
134135
uploader_config=DropboxUploaderConfig,
135136
connection_config=DropboxConnectionConfig,
136137
),

Diff for: unstructured/ingest/v2/processes/connectors/fsspec/fsspec.py

+1
Original file line numberDiff line numberDiff line change
@@ -304,6 +304,7 @@ class FsspecUploaderConfig(FileConfig, UploaderConfig):
304304

305305
@dataclass
306306
class FsspecUploader(Uploader):
307+
connector_type: str = CONNECTOR_TYPE
307308
upload_config: FsspecUploaderConfigT = field(default=None)
308309

309310
@property

Diff for: unstructured/ingest/v2/processes/connectors/fsspec/gcs.py

+1
Original file line numberDiff line numberDiff line change
@@ -111,6 +111,7 @@ class GcsUploaderConfig(FsspecUploaderConfig):
111111

112112
@dataclass
113113
class GcsUploader(FsspecUploader):
114+
connector_type: str = CONNECTOR_TYPE
114115
connection_config: GcsConnectionConfig
115116
upload_config: GcsUploaderConfig = field(default=None)
116117

Diff for: unstructured/ingest/v2/processes/connectors/fsspec/s3.py

+3-2
Original file line numberDiff line numberDiff line change
@@ -125,7 +125,8 @@ class S3UploaderConfig(FsspecUploaderConfig):
125125

126126

127127
@dataclass
128-
class S3Upload(FsspecUploader):
128+
class S3Uploader(FsspecUploader):
129+
connector_type: str = CONNECTOR_TYPE
129130
connection_config: S3ConnectionConfig
130131
upload_config: S3UploaderConfig = field(default=None)
131132

@@ -156,7 +157,7 @@ async def run_async(self, path: Path, file_data: FileData, **kwargs: Any) -> Non
156157
add_destination_entry(
157158
destination_type=CONNECTOR_TYPE,
158159
entry=DestinationRegistryEntry(
159-
uploader=S3Upload,
160+
uploader=S3Uploader,
160161
uploader_config=S3UploaderConfig,
161162
connection_config=S3ConnectionConfig,
162163
),

Diff for: unstructured/ingest/v2/processes/connectors/fsspec/sftp.py

+1
Original file line numberDiff line numberDiff line change
@@ -136,6 +136,7 @@ class SftpUploaderConfig(FsspecUploaderConfig):
136136

137137
@dataclass
138138
class SftpUploader(FsspecUploader):
139+
connector_type: str = CONNECTOR_TYPE
139140
connection_config: SftpConnectionConfig
140141
upload_config: SftpUploaderConfig = field(default=None)
141142

Diff for: unstructured/ingest/v2/processes/connectors/local.py

+1
Original file line numberDiff line numberDiff line change
@@ -160,6 +160,7 @@ def __post_init__(self):
160160

161161
@dataclass
162162
class LocalUploader(Uploader):
163+
connector_type: str = CONNECTOR_TYPE
163164
upload_config: LocalUploaderConfig = field(default_factory=lambda: LocalUploaderConfig())
164165
connection_config: LocalConnectionConfig = field(
165166
default_factory=lambda: LocalConnectionConfig()

Diff for: unstructured/ingest/v2/processes/connectors/weaviate.py

+1
Original file line numberDiff line numberDiff line change
@@ -154,6 +154,7 @@ class WeaviateUploaderConfig(UploaderConfig):
154154

155155
@dataclass
156156
class WeaviateUploader(Uploader):
157+
connector_type: str = CONNECTOR_TYPE
157158
upload_config: WeaviateUploaderConfig
158159
connection_config: WeaviateConnectionConfig
159160
client: Optional["Client"] = field(init=False)

0 commit comments

Comments
 (0)