Skip to content

Commit bac1b2e

Browse files
authored
Merge branch 'main' into sgarg/add-ybdb-connector-to-unstructured
2 parents 1896c7e + 236c3bf commit bac1b2e

File tree

6 files changed

+78
-7
lines changed

6 files changed

+78
-7
lines changed

CHANGELOG.md

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,11 @@
1+
## 1.2.14
2+
3+
* **Fix: IBM watsonx.data S3 bucket authentication fix**
4+
5+
## 1.2.13
6+
7+
* **Feat: Make Bedrock embedding credentials optional and add IAM support**
8+
19
## 1.2.12
210

311
* **Fix: retry with wait when throttling error happens in Sharepoint connector**

test/integration/connectors/test_s3.py

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -48,6 +48,18 @@ def anon_connection_config() -> S3ConnectionConfig:
4848
return S3ConnectionConfig(access_config=S3AccessConfig(), anonymous=True)
4949

5050

51+
@pytest.fixture
52+
def ambient_credentials_config() -> S3ConnectionConfig:
53+
"""Test fixture for ambient credentials with mock values."""
54+
access_config = S3AccessConfig(
55+
use_ambient_credentials=True,
56+
presigned_url="https://example.com/mock-presigned-url",
57+
role_arn="arn:aws:iam::123456789012:role/test-role"
58+
)
59+
return S3ConnectionConfig(access_config=access_config)
60+
61+
62+
5163
@pytest.mark.asyncio
5264
@pytest.mark.tags(CONNECTOR_TYPE, SOURCE_TAG, BLOB_STORAGE_TAG)
5365
async def test_s3_source(anon_connection_config: S3ConnectionConfig):

test/unit/connectors/ibm_watsonx/test_ibm_watsonx_s3.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -243,6 +243,7 @@ def test_ibm_watsonx_connection_config_get_catalog_success(
243243
"s3.access-key-id": "test_access_key_id",
244244
"s3.secret-access-key": "test_secret_access_key",
245245
"s3.region": "test_region",
246+
"header.X-Iceberg-Access-Delegation": None,
246247
}
247248
)
248249

unstructured_ingest/__version__.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1 +1 @@
1-
__version__ = "1.2.12" # pragma: no cover
1+
__version__ = "1.2.14" # pragma: no cover

unstructured_ingest/embed/bedrock.py

Lines changed: 52 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -58,9 +58,22 @@ def conform_query(query: str, provider: str) -> dict:
5858

5959

6060
class BedrockEmbeddingConfig(EmbeddingConfig):
61-
aws_access_key_id: SecretStr = Field(description="aws access key id")
62-
aws_secret_access_key: SecretStr = Field(description="aws secret access key")
63-
region_name: str = Field(description="aws region name", default="us-west-2")
61+
aws_access_key_id: SecretStr | None = Field(description="aws access key id", default=None)
62+
aws_secret_access_key: SecretStr | None = Field(
63+
description="aws secret access key", default=None
64+
)
65+
region_name: str = Field(
66+
description="aws region name",
67+
default_factory=lambda: (
68+
os.getenv("BEDROCK_REGION_NAME") or
69+
os.getenv("AWS_DEFAULT_REGION") or
70+
"us-west-2"
71+
)
72+
)
73+
endpoint_url: str | None = Field(description="custom bedrock endpoint url", default=None)
74+
access_method: str = Field(
75+
description="authentication method", default="credentials"
76+
) # "credentials" or "iam"
6477
embedder_model_name: str = Field(
6578
default="amazon.titan-embed-text-v1",
6679
alias="model_name",
@@ -96,6 +109,20 @@ def wrap_error(self, e: Exception) -> Exception:
96109
return e
97110

98111
def run_precheck(self) -> None:
112+
# Validate access method and credentials configuration
113+
if self.access_method == "credentials":
114+
if not (self.aws_access_key_id and self.aws_secret_access_key):
115+
raise ValueError(
116+
"Credentials access method requires aws_access_key_id and aws_secret_access_key"
117+
)
118+
elif self.access_method == "iam":
119+
# For IAM, credentials are handled by AWS SDK
120+
pass
121+
else:
122+
raise ValueError(
123+
f"Invalid access_method: {self.access_method}. Must be 'credentials' or 'iam'"
124+
)
125+
99126
client = self.get_bedrock_client()
100127
try:
101128
model_info = client.list_foundation_models(byOutputModality="EMBEDDING")
@@ -113,11 +140,30 @@ def run_precheck(self) -> None:
113140
raise self.wrap_error(e=e)
114141

115142
def get_client_kwargs(self) -> dict:
116-
return {
117-
"aws_access_key_id": self.aws_access_key_id.get_secret_value(),
118-
"aws_secret_access_key": self.aws_secret_access_key.get_secret_value(),
143+
kwargs = {
119144
"region_name": self.region_name,
120145
}
146+
147+
if self.endpoint_url:
148+
kwargs["endpoint_url"] = self.endpoint_url
149+
150+
if self.access_method == "credentials":
151+
if self.aws_access_key_id and self.aws_secret_access_key:
152+
kwargs["aws_access_key_id"] = self.aws_access_key_id.get_secret_value()
153+
kwargs["aws_secret_access_key"] = self.aws_secret_access_key.get_secret_value()
154+
else:
155+
raise ValueError(
156+
"Credentials access method requires aws_access_key_id and aws_secret_access_key"
157+
)
158+
elif self.access_method == "iam":
159+
# For IAM, boto3 will use default credential chain (IAM roles, environment, etc.)
160+
pass
161+
else:
162+
raise ValueError(
163+
f"Invalid access_method: {self.access_method}. Must be 'credentials' or 'iam'"
164+
)
165+
166+
return kwargs
121167

122168
@requires_dependencies(
123169
["boto3"],

unstructured_ingest/processes/connectors/ibm_watsonx/ibm_watsonx_s3.py

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -147,6 +147,10 @@ def get_catalog_config(self) -> dict[str, Any]:
147147
"s3.access-key-id": self.access_config.get_secret_value().access_key_id,
148148
"s3.secret-access-key": self.access_config.get_secret_value().secret_access_key,
149149
"s3.region": self.object_storage_region,
150+
# By default this header is set to `vended-credentials`, and default bucket
151+
# configuration doesn't allow vending credentials. We need to set it to `None`
152+
# in order to use user-provided S3 credentials.
153+
"header.X-Iceberg-Access-Delegation": None,
150154
}
151155

152156
@requires_dependencies(["pyiceberg"], extras="ibm-watsonx-s3")

0 commit comments

Comments
 (0)