Skip to content

Commit fecc402

Browse files
committed
feat(dynamodb): add key_schema parameter to read_items to bypass DescribeTable (#2396)
Signed-off-by: Jagan Nalla <jagannalla1@gmail.com>
1 parent 2234f5e commit fecc402

3 files changed

Lines changed: 57 additions & 2 deletions

File tree

awswrangler/dynamodb/_read.py

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -456,7 +456,7 @@ def _convert_condition_base_to_expression(
456456
@_utils.validate_distributed_kwargs(
457457
unsupported_kwargs=["boto3_session", "dtype_backend"],
458458
)
459-
def read_items( # noqa: PLR0912
459+
def read_items( # noqa: PLR0912, PLR0915
460460
table_name: str,
461461
index_name: str | None = None,
462462
partition_values: Sequence[Any] | None = None,
@@ -475,6 +475,7 @@ def read_items( # noqa: PLR0912
475475
use_threads: bool | int = True,
476476
boto3_session: boto3.Session | None = None,
477477
pyarrow_additional_kwargs: dict[str, Any] | None = None,
478+
key_schema: list[dict[str, str]] | None = None,
478479
) -> pd.DataFrame | Iterator[pd.DataFrame] | _ItemsListType | Iterator[_ItemsListType]:
479480
"""Read items from given DynamoDB table.
480481
@@ -657,7 +658,10 @@ def read_items( # noqa: PLR0912
657658
# Extract key schema
658659
dynamodb_client = _utils.client(service_name="dynamodb", session=boto3_session)
659660
serializer = TypeSerializer()
660-
table_key_schema = dynamodb_client.describe_table(TableName=table_name)["Table"]["KeySchema"]
661+
if key_schema:
662+
table_key_schema = key_schema
663+
else:
664+
table_key_schema = dynamodb_client.describe_table(TableName=table_name)["Table"]["KeySchema"]
661665

662666
# Detect sort key, if any
663667
if len(table_key_schema) == 1:

awswrangler/dynamodb/_read.pyi

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -68,6 +68,7 @@ def read_items(
6868
use_threads: bool | int = ...,
6969
boto3_session: boto3.Session | None = ...,
7070
pyarrow_additional_kwargs: dict[str, Any] | None = ...,
71+
key_schema: list[dict[str, str]] | None = ...,
7172
) -> pd.DataFrame: ...
7273
@overload
7374
def read_items(
@@ -90,6 +91,7 @@ def read_items(
9091
use_threads: bool | int = ...,
9192
boto3_session: boto3.Session | None = ...,
9293
pyarrow_additional_kwargs: dict[str, Any] | None = ...,
94+
key_schema: list[dict[str, str]] | None = ...,
9395
) -> Iterator[pd.DataFrame]: ...
9496
@overload
9597
def read_items(
@@ -112,6 +114,7 @@ def read_items(
112114
use_threads: bool | int = ...,
113115
boto3_session: boto3.Session | None = ...,
114116
pyarrow_additional_kwargs: dict[str, Any] | None = ...,
117+
key_schema: list[dict[str, str]] | None = ...,
115118
) -> _ItemsListType: ...
116119
@overload
117120
def read_items(
@@ -134,6 +137,7 @@ def read_items(
134137
use_threads: bool | int = ...,
135138
boto3_session: boto3.Session | None = ...,
136139
pyarrow_additional_kwargs: dict[str, Any] | None = ...,
140+
key_schema: list[dict[str, str]] | None = ...,
137141
) -> Iterator[_ItemsListType]: ...
138142
@overload
139143
def read_items(
@@ -156,6 +160,7 @@ def read_items(
156160
use_threads: bool | int = ...,
157161
boto3_session: boto3.Session | None = ...,
158162
pyarrow_additional_kwargs: dict[str, Any] | None = ...,
163+
key_schema: list[dict[str, str]] | None = ...,
159164
) -> pd.DataFrame | _ItemsListType: ...
160165
@overload
161166
def read_items(
@@ -178,6 +183,7 @@ def read_items(
178183
use_threads: bool | int = ...,
179184
boto3_session: boto3.Session | None = ...,
180185
pyarrow_additional_kwargs: dict[str, Any] | None = ...,
186+
key_schema: list[dict[str, str]] | None = ...,
181187
) -> Iterator[pd.DataFrame] | Iterator[_ItemsListType]: ...
182188
@overload
183189
def read_items(
@@ -200,6 +206,7 @@ def read_items(
200206
use_threads: bool | int = ...,
201207
boto3_session: boto3.Session | None = ...,
202208
pyarrow_additional_kwargs: dict[str, Any] | None = ...,
209+
key_schema: list[dict[str, str]] | None = ...,
203210
) -> pd.DataFrame | Iterator[pd.DataFrame]: ...
204211
@overload
205212
def read_items(
@@ -222,6 +229,7 @@ def read_items(
222229
use_threads: bool | int = ...,
223230
boto3_session: boto3.Session | None = ...,
224231
pyarrow_additional_kwargs: dict[str, Any] | None = ...,
232+
key_schema: list[dict[str, str]] | None = ...,
225233
) -> _ItemsListType | Iterator[_ItemsListType]: ...
226234
@overload
227235
def read_items(
@@ -244,4 +252,5 @@ def read_items(
244252
use_threads: bool | int = ...,
245253
boto3_session: boto3.Session | None = ...,
246254
pyarrow_additional_kwargs: dict[str, Any] | None = ...,
255+
key_schema: list[dict[str, str]] | None = ...,
247256
) -> pd.DataFrame | Iterator[pd.DataFrame] | _ItemsListType | Iterator[_ItemsListType]: ...

tests/unit/test_moto.py

Lines changed: 42 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -760,3 +760,45 @@ def test_extract_ctas_manifest_paths_cross_bucket_raises(moto_s3_client: "S3Clie
760760

761761
with pytest.raises(InvalidArgumentValue, match="unexpected bucket"):
762762
_extract_ctas_manifest_paths(path=f"s3://bucket/{manifest_key}")
763+
764+
765+
def test_dynamodb_read_items_with_key_schema(moto_dynamodb_client, moto_dynamodb_table) -> None:
766+
# Insert items
767+
items = [{"key": 1, "value": "A"}, {"key": 2, "value": "B"}]
768+
wr.dynamodb.put_items(items=items, table_name=moto_dynamodb_table)
769+
770+
# 1. Verify read_items works with key_schema passed
771+
key_schema = [{"AttributeName": "key", "KeyType": "HASH"}]
772+
df = wr.dynamodb.read_items(
773+
table_name=moto_dynamodb_table,
774+
key_schema=key_schema,
775+
allow_full_scan=True,
776+
)
777+
assert len(df) == 2
778+
assert set(df["value"]) == {"A", "B"}
779+
780+
# 2. Assert DescribeTable is NOT called when key_schema is provided
781+
call = botocore.client.BaseClient._make_api_call
782+
describe_table_calls = 0
783+
784+
def mock_make_api_call(self, operation_name, kwarg):
785+
nonlocal describe_table_calls
786+
if operation_name == "DescribeTable":
787+
describe_table_calls += 1
788+
return call(self, operation_name, kwarg)
789+
790+
with mock.patch("botocore.client.BaseClient._make_api_call", new=mock_make_api_call):
791+
# Call read_items with key_schema
792+
wr.dynamodb.read_items(
793+
table_name=moto_dynamodb_table,
794+
key_schema=key_schema,
795+
allow_full_scan=True,
796+
)
797+
assert describe_table_calls == 0
798+
799+
# Call read_items WITHOUT key_schema
800+
wr.dynamodb.read_items(
801+
table_name=moto_dynamodb_table,
802+
allow_full_scan=True,
803+
)
804+
assert describe_table_calls == 1

0 commit comments

Comments
 (0)