Skip to content

Commit 8ee3763

Browse files
authored
more tests on read_batch() (#1987)
#### Reference Issues/PRs <!--Example: Fixes #1234. See also #3456.--> #### What does this implement or fix? Several additional tests on read_batch() method. Primary focus of tests are to include requests for reading from more symbols and/or snapshots, with different combinations of parameters for the read - using query builder etc on a more diverse dataframe types. - Added more dataframe utility methods - added a special assert_frame_equals method for frames that have row range type of index or other not datetime/timestamp that is natevly supported - as of previous PR assert_frames_equal will now log dataframes structure in CSV format wich will be logged only in case of assertaion failiure (later this CSV could be imported back if needed to reproduce problem) #### Any other comments? #### Checklist <details> <summary> Checklist for code changes... </summary> - [ ] Have you updated the relevant docstrings, documentation and copyright notice? - [ ] Is this contribution tested against [all ArcticDB's features](../docs/mkdocs/docs/technical/contributing.md)? - [ ] Do all exceptions introduced raise appropriate [error messages](https://docs.arcticdb.io/error_messages/)? - [ ] Are API changes highlighted in the PR description? - [ ] Is the PR labelled as enhancement or bug so it appears in autogenerated release notes? </details> <!-- Thanks for contributing a Pull Request to ArcticDB! Please ensure you have taken a look at: - ArcticDB's Code of Conduct: https://github.com/man-group/ArcticDB/blob/master/CODE_OF_CONDUCT.md - ArcticDB's Contribution Licensing: https://github.com/man-group/ArcticDB/blob/master/docs/mkdocs/docs/technical/contributing.md#contribution-licensing --> --------- Co-authored-by: Georgi Rusev <Georgi Rusev>
1 parent fd0ec5d commit 8ee3763

File tree

3 files changed

+545
-33
lines changed

3 files changed

+545
-33
lines changed

python/arcticdb/util/test.py

+36-3
Original file line numberDiff line numberDiff line change
@@ -8,9 +8,12 @@
88

99
import os
1010
from contextlib import contextmanager
11-
from typing import Mapping, Any, Optional, Iterable, NamedTuple, List, AnyStr
11+
from typing import Mapping, Any, Optional, Iterable, NamedTuple, List, AnyStr, Sequence
1212
import numpy as np
1313
import pandas as pd
14+
from pandas.core.series import Series
15+
from pandas import Index
16+
from pandas._typing import Scalar
1417
import datetime as dt
1518
import string
1619
import random
@@ -35,7 +38,6 @@
3538

3639
from arcticdb import log
3740

38-
3941
def create_df(start=0, columns=1) -> pd.DataFrame:
4042
data = {}
4143
for i in range(columns):
@@ -153,6 +155,26 @@ def dataframe_simulate_arcticdb_update_static(existing_df: pd.DataFrame, update_
153155
result_df = pd.concat(chunks)
154156
return result_df
155157

158+
def dataframe_single_column_string(length=1000, column_label='string_short', seed=0, string_len=1):
159+
"""
160+
creates a dataframe with one column, which label can be changed, containing string
161+
with specified length. Useful for combining this dataframe with another dataframe
162+
"""
163+
np.random.seed(seed)
164+
return pd.DataFrame({ column_label : [random_string(string_len) for _ in range(length)] })
165+
166+
def dataframe_filter_with_datetime_index(df: pd.DataFrame, start_timestamp:Scalar, end_timestamp:Scalar, inclusive='both') -> pd.DataFrame:
167+
"""
168+
Filters dataframe which has datetime index, and selects dates from start till end,
169+
where inclusive can be one of (both,left,right,neither)
170+
start and end can be pandas.Timeframe, datetime or string datetime
171+
"""
172+
173+
return df[
174+
df.index.to_series()
175+
.between(start_timestamp, end_timestamp, inclusive='both')
176+
]
177+
156178
def maybe_not_check_freq(f):
157179
"""Ignore frequency when pandas is newer as starts to check frequency which it did not previously do."""
158180

@@ -178,10 +200,21 @@ def wrapper(*args, **kwargs):
178200

179201
return wrapper
180202

181-
182203
assert_frame_equal = maybe_not_check_freq(pd.testing.assert_frame_equal)
183204
assert_series_equal = maybe_not_check_freq(pd.testing.assert_series_equal)
184205

206+
def assert_frame_equal_rebuild_index_first(expected : pd.DataFrame, actual : pd.DataFrame) -> None:
207+
"""
208+
Use for dataframes that have index row range and you
209+
obtain data from arctic with QueryBuilder.
210+
211+
First will rebuild index for dataframes to assure we
212+
have same index in both frames when row range index is used
213+
"""
214+
expected.reset_index(inplace = True, drop = True)
215+
actual.reset_index(inplace = True, drop = True)
216+
assert_frame_equal(left=expected, right=actual)
217+
185218
def random_string(length: int):
186219
return "".join(random.choice(string.ascii_uppercase + string.digits) for _ in range(length))
187220

python/tests/conftest.py

+31-30
Original file line numberDiff line numberDiff line change
@@ -38,6 +38,7 @@
3838
from arcticdb.storage_fixtures.in_memory import InMemoryStorageFixture
3939
from arcticdb.version_store._normalization import MsgPackNormalizer
4040
from arcticdb.util.test import create_df
41+
from arcticdb.arctic import Arctic
4142
from .util.mark import (
4243
AZURE_TESTS_MARK,
4344
MONGO_TESTS_MARK,
@@ -281,7 +282,7 @@ def mem_storage() -> Iterator[InMemoryStorageFixture]:
281282
pytest.param("real_s3", marks=REAL_S3_TESTS_MARK),
282283
],
283284
)
284-
def arctic_client(request, encoding_version):
285+
def arctic_client(request, encoding_version) -> Arctic:
285286
storage_fixture: StorageFixture = request.getfixturevalue(request.param + "_storage")
286287
ac = storage_fixture.create_arctic(encoding_version=encoding_version)
287288
assert not ac.list_libraries()
@@ -298,15 +299,15 @@ def arctic_client(request, encoding_version):
298299
pytest.param("real_s3", marks=REAL_S3_TESTS_MARK),
299300
],
300301
)
301-
def arctic_client_no_lmdb(request, encoding_version):
302+
def arctic_client_no_lmdb(request, encoding_version) -> Arctic:
302303
storage_fixture: StorageFixture = request.getfixturevalue(request.param + "_storage")
303304
ac = storage_fixture.create_arctic(encoding_version=encoding_version)
304305
assert not ac.list_libraries()
305306
return ac
306307

307308

308309
@pytest.fixture
309-
def arctic_library(arctic_client, lib_name):
310+
def arctic_library(arctic_client, lib_name) -> Arctic:
310311
return arctic_client.create_library(lib_name)
311312

312313

@@ -318,15 +319,15 @@ def arctic_library(arctic_client, lib_name):
318319
pytest.param("real_s3", marks=REAL_S3_TESTS_MARK),
319320
],
320321
)
321-
def basic_arctic_client(request, encoding_version):
322+
def basic_arctic_client(request, encoding_version) -> Arctic:
322323
storage_fixture: StorageFixture = request.getfixturevalue(request.param + "_storage")
323324
ac = storage_fixture.create_arctic(encoding_version=encoding_version)
324325
assert not ac.list_libraries()
325326
return ac
326327

327328

328329
@pytest.fixture
329-
def basic_arctic_library(basic_arctic_client, lib_name):
330+
def basic_arctic_library(basic_arctic_client, lib_name) -> Arctic:
330331
return basic_arctic_client.create_library(lib_name)
331332

332333

@@ -554,17 +555,17 @@ def azure_version_store_dynamic_schema(azure_store_factory):
554555

555556

556557
@pytest.fixture
557-
def lmdb_version_store_string_coercion(version_store_factory):
558+
def lmdb_version_store_string_coercion(version_store_factory) ->NativeVersionStore:
558559
return version_store_factory()
559560

560561

561562
@pytest.fixture
562-
def lmdb_version_store_v1(version_store_factory):
563+
def lmdb_version_store_v1(version_store_factory) -> NativeVersionStore:
563564
return version_store_factory(dynamic_strings=True)
564565

565566

566567
@pytest.fixture
567-
def lmdb_version_store_v2(version_store_factory, lib_name):
568+
def lmdb_version_store_v2(version_store_factory, lib_name) -> NativeVersionStore:
568569
library_name = lib_name + "_v2"
569570
return version_store_factory(dynamic_strings=True, encoding_version=int(EncodingVersion.V2), name=library_name)
570571

@@ -575,31 +576,31 @@ def lmdb_version_store(request):
575576

576577

577578
@pytest.fixture
578-
def lmdb_version_store_prune_previous(version_store_factory):
579+
def lmdb_version_store_prune_previous(version_store_factory) -> NativeVersionStore:
579580
return version_store_factory(dynamic_strings=True, prune_previous_version=True, use_tombstones=True)
580581

581582

582583
@pytest.fixture
583-
def lmdb_version_store_big_map(version_store_factory):
584+
def lmdb_version_store_big_map(version_store_factory) -> NativeVersionStore:
584585
return version_store_factory(lmdb_config={"map_size": 2**30})
585586

586587

587588
@pytest.fixture
588-
def lmdb_version_store_very_big_map(version_store_factory):
589+
def lmdb_version_store_very_big_map(version_store_factory) -> NativeVersionStore:
589590
return version_store_factory(lmdb_config={"map_size": 2**35})
590591

591592
@pytest.fixture
592-
def lmdb_version_store_column_buckets(version_store_factory):
593+
def lmdb_version_store_column_buckets(version_store_factory) -> NativeVersionStore:
593594
return version_store_factory(dynamic_schema=True, column_group_size=3, segment_row_size=2, bucketize_dynamic=True)
594595

595596

596597
@pytest.fixture
597-
def lmdb_version_store_dynamic_schema_v1(version_store_factory, lib_name):
598+
def lmdb_version_store_dynamic_schema_v1(version_store_factory, lib_name) -> NativeVersionStore:
598599
return version_store_factory(dynamic_schema=True, dynamic_strings=True)
599600

600601

601602
@pytest.fixture
602-
def lmdb_version_store_dynamic_schema_v2(version_store_factory, lib_name):
603+
def lmdb_version_store_dynamic_schema_v2(version_store_factory, lib_name) -> NativeVersionStore:
603604
library_name = lib_name + "_v2"
604605
return version_store_factory(
605606
dynamic_schema=True, dynamic_strings=True, encoding_version=int(EncodingVersion.V2), name=library_name
@@ -619,27 +620,27 @@ def lmdb_version_store_dynamic_schema(
619620

620621

621622
@pytest.fixture
622-
def lmdb_version_store_empty_types_v1(version_store_factory, lib_name):
623+
def lmdb_version_store_empty_types_v1(version_store_factory, lib_name) -> NativeVersionStore:
623624
library_name = lib_name + "_v1"
624625
return version_store_factory(dynamic_strings=True, empty_types=True, name=library_name)
625626

626627

627628
@pytest.fixture
628-
def lmdb_version_store_empty_types_v2(version_store_factory, lib_name):
629+
def lmdb_version_store_empty_types_v2(version_store_factory, lib_name) -> NativeVersionStore:
629630
library_name = lib_name + "_v2"
630631
return version_store_factory(
631632
dynamic_strings=True, empty_types=True, encoding_version=int(EncodingVersion.V2), name=library_name
632633
)
633634

634635

635636
@pytest.fixture
636-
def lmdb_version_store_empty_types_dynamic_schema_v1(version_store_factory, lib_name):
637+
def lmdb_version_store_empty_types_dynamic_schema_v1(version_store_factory, lib_name) -> NativeVersionStore:
637638
library_name = lib_name + "_v1"
638639
return version_store_factory(dynamic_strings=True, empty_types=True, dynamic_schema=True, name=library_name)
639640

640641

641642
@pytest.fixture
642-
def lmdb_version_store_empty_types_dynamic_schema_v2(version_store_factory, lib_name):
643+
def lmdb_version_store_empty_types_dynamic_schema_v2(version_store_factory, lib_name) -> NativeVersionStore:
643644
library_name = lib_name + "_v2"
644645
return version_store_factory(
645646
dynamic_strings=True,
@@ -651,14 +652,14 @@ def lmdb_version_store_empty_types_dynamic_schema_v2(version_store_factory, lib_
651652

652653

653654
@pytest.fixture
654-
def lmdb_version_store_delayed_deletes_v1(version_store_factory):
655+
def lmdb_version_store_delayed_deletes_v1(version_store_factory) -> NativeVersionStore:
655656
return version_store_factory(
656657
delayed_deletes=True, dynamic_strings=True, empty_types=True, prune_previous_version=True
657658
)
658659

659660

660661
@pytest.fixture
661-
def lmdb_version_store_delayed_deletes_v2(version_store_factory, lib_name):
662+
def lmdb_version_store_delayed_deletes_v2(version_store_factory, lib_name) -> NativeVersionStore:
662663
library_name = lib_name + "_v2"
663664
return version_store_factory(
664665
dynamic_strings=True,
@@ -670,52 +671,52 @@ def lmdb_version_store_delayed_deletes_v2(version_store_factory, lib_name):
670671

671672

672673
@pytest.fixture
673-
def lmdb_version_store_tombstones_no_symbol_list(version_store_factory):
674+
def lmdb_version_store_tombstones_no_symbol_list(version_store_factory) -> NativeVersionStore:
674675
return version_store_factory(use_tombstones=True, dynamic_schema=True, symbol_list=False, dynamic_strings=True)
675676

676677

677678
@pytest.fixture
678-
def lmdb_version_store_allows_pickling(version_store_factory, lib_name):
679+
def lmdb_version_store_allows_pickling(version_store_factory, lib_name) -> NativeVersionStore:
679680
return version_store_factory(use_norm_failure_handler_known_types=True, dynamic_strings=True)
680681

681682

682683
@pytest.fixture
683-
def lmdb_version_store_no_symbol_list(version_store_factory):
684+
def lmdb_version_store_no_symbol_list(version_store_factory) -> NativeVersionStore:
684685
return version_store_factory(col_per_group=None, row_per_segment=None, symbol_list=False)
685686

686687

687688
@pytest.fixture
688-
def lmdb_version_store_tombstone_and_pruning(version_store_factory):
689+
def lmdb_version_store_tombstone_and_pruning(version_store_factory) -> NativeVersionStore:
689690
return version_store_factory(use_tombstones=True, prune_previous_version=True)
690691

691692

692693
@pytest.fixture
693-
def lmdb_version_store_tombstone(version_store_factory):
694+
def lmdb_version_store_tombstone(version_store_factory) -> NativeVersionStore:
694695
return version_store_factory(use_tombstones=True)
695696

696697

697698
@pytest.fixture
698-
def lmdb_version_store_tombstone_and_sync_passive(version_store_factory):
699+
def lmdb_version_store_tombstone_and_sync_passive(version_store_factory) -> NativeVersionStore:
699700
return version_store_factory(use_tombstones=True, sync_passive=True)
700701

701702

702703
@pytest.fixture
703-
def lmdb_version_store_ignore_order(version_store_factory):
704+
def lmdb_version_store_ignore_order(version_store_factory) -> NativeVersionStore:
704705
return version_store_factory(ignore_sort_order=True)
705706

706707

707708
@pytest.fixture
708-
def lmdb_version_store_small_segment(version_store_factory):
709+
def lmdb_version_store_small_segment(version_store_factory) -> NativeVersionStore:
709710
return version_store_factory(column_group_size=1000, segment_row_size=1000, lmdb_config={"map_size": 2**30})
710711

711712

712713
@pytest.fixture
713-
def lmdb_version_store_tiny_segment(version_store_factory):
714+
def lmdb_version_store_tiny_segment(version_store_factory) -> NativeVersionStore:
714715
return version_store_factory(column_group_size=2, segment_row_size=2, lmdb_config={"map_size": 2**30})
715716

716717

717718
@pytest.fixture
718-
def lmdb_version_store_tiny_segment_dynamic(version_store_factory):
719+
def lmdb_version_store_tiny_segment_dynamic(version_store_factory) -> NativeVersionStore:
719720
return version_store_factory(column_group_size=2, segment_row_size=2, dynamic_schema=True)
720721

721722

0 commit comments

Comments
 (0)