11import unittest
2- import ray
2+ import pytest
33from deltacat .types .media import ContentEncoding , ContentType
44from deltacat .utils .daft import (
55 daft_file_to_pyarrow_table ,
@@ -162,12 +162,11 @@ def test_read_from_local_single_column_with_row_groups(self):
162162 self .assertEqual (table .num_rows , 10 )
163163
164164
165+ @pytest .mark .integration
165166class TestFilesToDataFrame (unittest .TestCase ):
166167 MVP_PATH = "deltacat/tests/utils/data/mvp.parquet"
167168
168169 def test_read_local_files_all_columns (self ):
169- if not ray .is_initialized ():
170- ray .init ()
171170 df = files_to_dataframe (
172171 uris = [self .MVP_PATH ],
173172 content_encoding = ContentEncoding .IDENTITY .value ,
@@ -180,8 +179,6 @@ def test_read_local_files_all_columns(self):
180179 self .assertEqual (table .num_rows , 100 )
181180
182181 def test_read_local_files_with_column_selection (self ):
183- if not ray .is_initialized ():
184- ray .init ()
185182 df = files_to_dataframe (
186183 uris = [self .MVP_PATH ],
187184 content_encoding = ContentEncoding .IDENTITY .value ,
@@ -195,8 +192,6 @@ def test_read_local_files_with_column_selection(self):
195192 self .assertEqual (table .num_rows , 100 )
196193
197194 def test_read_local_files_does_not_materialize_by_default (self ):
198- if not ray .is_initialized ():
199- ray .init ()
200195 df = files_to_dataframe (
201196 uris = [self .MVP_PATH ],
202197 content_encoding = ContentEncoding .IDENTITY .value ,
@@ -212,8 +207,6 @@ def test_read_local_files_does_not_materialize_by_default(self):
212207 self .assertEqual (len (df ), 100 )
213208
214209 def test_supports_unescaped_tsv_content_type (self ):
215- if not ray .is_initialized ():
216- ray .init ()
217210 # Test that UNESCAPED_TSV is now supported (was previously unsupported)
218211 # Use a CSV file since we're testing TSV reader functionality
219212 csv_path = "deltacat/tests/utils/data/non_empty_valid.csv"
@@ -230,8 +223,6 @@ def test_supports_unescaped_tsv_content_type(self):
230223 self .assertGreater (len (table .schema .names ), 0 )
231224
232225 def test_supports_gzip_content_encoding (self ):
233- if not ray .is_initialized ():
234- ray .init ()
235226 # Test that GZIP encoding is now supported (was previously unsupported)
236227 df = files_to_dataframe (
237228 uris = [self .MVP_PATH ],
@@ -245,8 +236,6 @@ def test_supports_gzip_content_encoding(self):
245236 self .assertEqual (table .num_rows , 100 )
246237
247238 def test_raises_error_if_not_supported_content_type (self ):
248- if not ray .is_initialized ():
249- ray .init ()
250239 # Test that truly unsupported content types raise NotImplementedError
251240 self .assertRaises (
252241 NotImplementedError ,
@@ -259,8 +248,6 @@ def test_raises_error_if_not_supported_content_type(self):
259248 )
260249
261250 def test_raises_error_if_not_supported_content_encoding (self ):
262- if not ray .is_initialized ():
263- ray .init ()
264251 # Test that truly unsupported content encodings raise NotImplementedError
265252 self .assertRaises (
266253 NotImplementedError ,
@@ -273,8 +260,6 @@ def test_raises_error_if_not_supported_content_encoding(self):
273260 )
274261
275262 def test_accepts_custom_kwargs (self ):
276- if not ray .is_initialized ():
277- ray .init ()
278263 # Test that custom kwargs are passed through to daft.read_parquet
279264 df = files_to_dataframe (
280265 uris = [self .MVP_PATH ],
@@ -290,8 +275,6 @@ def test_accepts_custom_kwargs(self):
290275 self .assertEqual (table .num_rows , 100 )
291276
292277 def test_accepts_io_config (self ):
293- if not ray .is_initialized ():
294- ray .init ()
295278 # Test that io_config parameter is accepted and passed correctly
296279 df = files_to_dataframe (
297280 uris = [self .MVP_PATH ],
0 commit comments