Skip to content
This repository was archived by the owner on Nov 16, 2023. It is now read-only.

Supporting pathlib's Path objects in FileDataStream #377

Open
wants to merge 2 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 5 additions & 0 deletions src/python/nimbusml/internal/utils/data_stream.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@
import os
import tempfile
from shutil import copyfile
from pathlib import Path

from .data_roles import DataRoles
from .data_schema import DataSchema
Expand Down Expand Up @@ -229,6 +230,10 @@ def __init__(self, filename, schema, roles=None):
:param schema: filename schema
"""
super(FileDataStream, self).__init__(schema, roles)

if isinstance(filename, Path):
filename = str(filename.resolve())

self._filename = filename

def __repr__(self):
Expand Down
12 changes: 12 additions & 0 deletions src/python/nimbusml/tests/test_data_stream.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@
import pandas
from nimbusml import DataSchema
from nimbusml import FileDataStream
from pathlib import Path

try:
from pandas.testing import assert_frame_equal
Expand All @@ -30,6 +31,17 @@ def test_data_stream(self):
assert repr(fi) == repr(fi2)
os.remove(f.name)

def test_data_stream_path_object(self):
df = pandas.DataFrame(dict(a=[0, 1], b=[0.1, 0.2]))
with tempfile.NamedTemporaryFile(mode='w', delete=False) as f:
df.to_csv(f, sep=',', index=False)

fi = FileDataStream.read_csv(Path(f.name), sep=',')
Copy link
Collaborator

@pieths pieths Dec 2, 2019

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The unit test is failing here. Looks like a similar change might also be required in internal\utils\data_schema.py (in read_schema(*data, **options)).

I haven't tested it but maybe updating the part of the code around line 844 might fix the issue:

elif hasattr(X, 'read') or isinstance(X, str) or (
                    six.PY2 and isinstance(X, (str, text_type))):

fi2 = fi.clone()
assert repr(fi) == repr(fi2)
os.remove(f.name)


def test_data_header_no_dataframe(self):
li = [1.0, 1.0, 2.0]
df = pandas.DataFrame(li)
Expand Down