@@ -116,7 +116,7 @@ def _convert_name(filenames, shuffle=False):
116116 return base + ".hdf5"
117117
118118
119- def open (path , convert = False , shuffle = False , * args , ** kwargs ):
119+ def open (path , convert = False , shuffle = False , copy_index = True , * args , ** kwargs ):
120120 """Open a dataset from file given by path
121121
122122 Example:
@@ -129,6 +129,7 @@ def open(path, convert=False, shuffle=False, *args, **kwargs):
129129 :param bool shuffle: shuffle converted dataset or not
130130 :param args: extra arguments for file readers that need it
131131 :param kwargs: extra keyword arguments
132+ :param bool copy_index: copy index when source is read via pandas
132133 :return: return dataset if file is supported, otherwise None
133134 :rtype: Dataset
134135
@@ -167,15 +168,18 @@ def open(path, convert=False, shuffle=False, *args, **kwargs):
167168 path = filenames [0 ]
168169 ext = os .path .splitext (path )[1 ]
169170 if os .path .exists (filename_hdf5 ) and convert : # also check mtime?
170- ds = vaex .file .open (filename_hdf5 , * args , ** kwargs )
171+ if convert :
172+ ds = vaex .file .open (filename_hdf5 )
173+ else :
174+ ds = vaex .file .open (filename_hdf5 , * args , ** kwargs )
171175 else :
172176 if ext == '.csv' : # special support for csv.. should probably approach it a different way
173- ds = from_csv (path , ** kwargs )
177+ ds = from_csv (path , copy_index = copy_index , ** kwargs )
174178 else :
175179 ds = vaex .file .open (path , * args , ** kwargs )
176180 if convert :
177181 ds .export_hdf5 (filename_hdf5 , shuffle = shuffle )
178- ds = vaex .file .open (filename_hdf5 , * args , ** kwargs )
182+ ds = vaex .file .open (filename_hdf5 ) # argument were meant for pandas?
179183 if ds is None :
180184 if os .path .exists (path ):
181185 raise IOError ('Could not open file: {}, did you install vaex-hdf5?' .format (path ))
@@ -348,10 +352,10 @@ def from_ascii(path, seperator=None, names=True, skip_lines=0, skip_after=0, **k
348352 return ds
349353
350354
351- def from_csv (filename_or_buffer , ** kwargs ):
355+ def from_csv (filename_or_buffer , copy_index = True , ** kwargs ):
352356 """Shortcut to read a csv file using pandas and convert to a dataset directly"""
353357 import pandas as pd
354- return from_pandas (pd .read_csv (filename_or_buffer , ** kwargs ))
358+ return from_pandas (pd .read_csv (filename_or_buffer , ** kwargs ), copy_index = copy_index )
355359
356360
357361def read_csv (filepath_or_buffer , ** kwargs ):
0 commit comments