|
17 | 17 | """ |
18 | 18 | import logging |
19 | 19 | import time |
| 20 | +import random |
20 | 21 | from datetime import date |
21 | 22 | from decimal import Decimal |
22 | 23 | from unittest.mock import Mock |
@@ -143,7 +144,9 @@ def test_full_data_types(self): |
143 | 144 | ('f10', pa.decimal128(10, 2)), |
144 | 145 | ('f11', pa.date32()), |
145 | 146 | ]) |
146 | | - schema = Schema.from_pyarrow_schema(simple_pa_schema) |
| 147 | + stats_enabled = random.random() < 0.5 |
| 148 | + options = {'metadata.stats-mode': 'full'} if stats_enabled else {} |
| 149 | + schema = Schema.from_pyarrow_schema(simple_pa_schema, options=options) |
147 | 150 | self.rest_catalog.create_table('default.test_full_data_types', schema, False) |
148 | 151 | table = self.rest_catalog.get_table('default.test_full_data_types') |
149 | 152 |
|
@@ -183,14 +186,25 @@ def test_full_data_types(self): |
183 | 186 | manifest_files[0].file_name, |
184 | 187 | lambda row: table_scan.starting_scanner._filter_manifest_entry(row), |
185 | 188 | drop_stats=False) |
186 | | - min_value_stats = GenericRowDeserializer.from_bytes(manifest_entries[0].file.value_stats.min_values.data, |
187 | | - table.fields).values |
188 | | - max_value_stats = GenericRowDeserializer.from_bytes(manifest_entries[0].file.value_stats.max_values.data, |
189 | | - table.fields).values |
190 | | - expected_min_values = [col[0].as_py() for col in expect_data] |
191 | | - expected_max_values = [col[1].as_py() for col in expect_data] |
192 | | - self.assertEqual(min_value_stats, expected_min_values) |
193 | | - self.assertEqual(max_value_stats, expected_max_values) |
| 189 | + # Python write does not produce value stats |
| 190 | + if stats_enabled: |
| 191 | + self.assertEqual(manifest_entries[0].file.value_stats_cols, None) |
| 192 | + min_value_stats = GenericRowDeserializer.from_bytes(manifest_entries[0].file.value_stats.min_values.data, |
| 193 | + table.fields).values |
| 194 | + max_value_stats = GenericRowDeserializer.from_bytes(manifest_entries[0].file.value_stats.max_values.data, |
| 195 | + table.fields).values |
| 196 | + expected_min_values = [col[0].as_py() for col in expect_data] |
| 197 | + expected_max_values = [col[1].as_py() for col in expect_data] |
| 198 | + self.assertEqual(min_value_stats, expected_min_values) |
| 199 | + self.assertEqual(max_value_stats, expected_max_values) |
| 200 | + else: |
| 201 | + self.assertEqual(manifest_entries[0].file.value_stats_cols, []) |
| 202 | + min_value_stats = GenericRowDeserializer.from_bytes(manifest_entries[0].file.value_stats.min_values.data, |
| 203 | + []).values |
| 204 | + max_value_stats = GenericRowDeserializer.from_bytes(manifest_entries[0].file.value_stats.max_values.data, |
| 205 | + []).values |
| 206 | + self.assertEqual(min_value_stats, []) |
| 207 | + self.assertEqual(max_value_stats, []) |
194 | 208 |
|
195 | 209 | def test_mixed_add_and_delete_entries_same_partition(self): |
196 | 210 | """Test record_count calculation with mixed ADD/DELETE entries in same partition.""" |
|
0 commit comments