Skip to content

Commit e241c09

Browse files
authored
Merge pull request #129 from TyShkan/fix_recursive_folder_scan
fix: recursive folder scan
2 parents c2d02b7 + 0f4c8b0 commit e241c09

File tree

5 files changed

+48
-6
lines changed

5 files changed

+48
-6
lines changed

meltano.yml

+2-2
Original file line numberDiff line numberDiff line change
@@ -22,10 +22,10 @@ plugins:
2222
add_metadata_columns: false
2323
settings:
2424
- name: files
25-
description: Array of objects containing keys - `entity`, `file`, `keys`, `encoding` (Optional), `delimiter` (Optional), `doublequote` (Optional), `escapechar` (Optional), `quotechar` (Optional), `skipinitialspace` (Optional), `strict` (Optional)
25+
description: Array of objects containing keys - `entity`, `path`, `keys`, `encoding` (Optional), `delimiter` (Optional), `doublequote` (Optional), `escapechar` (Optional), `quotechar` (Optional), `skipinitialspace` (Optional), `strict` (Optional)
2626
kind: array
2727
- name: csv_files_definition
28-
description: "Project-relative path to JSON file holding array of objects with keys: `entity`, `file`, `keys`, and `encoding` (Optional)."
28+
description: "Project-relative path to JSON file holding array of objects with keys: `entity`, `path`, `keys`, and `encoding` (Optional)."
2929
documentation: https://gitlab.com/meltano/tap-csv#run
3030
label: CSV Files Definition
3131
placeholder: Ex. files-def.json

tap_csv/client.py

+12-4
Original file line numberDiff line numberDiff line change
@@ -50,6 +50,17 @@ def get_records(self, context: Optional[dict]) -> Iterable[dict]:
5050

5151
yield dict(zip(self.header, row))
5252

53+
def _get_recursive_file_paths(self, file_path: str) -> list:
54+
file_paths = []
55+
56+
for dirpath, _, filenames in os.walk(file_path):
57+
for filename in filenames:
58+
file_path = os.path.join(dirpath, filename)
59+
if self.is_valid_filename(file_path):
60+
file_paths.append(file_path)
61+
62+
return file_paths
63+
5364
def get_file_paths(self) -> list:
5465
"""Return a list of file paths to read.
5566
@@ -67,10 +78,7 @@ def get_file_paths(self) -> list:
6778
file_paths = []
6879
if os.path.isdir(file_path):
6980
clean_file_path = os.path.normpath(file_path) + os.sep
70-
for filename in os.listdir(clean_file_path):
71-
file_path = clean_file_path + filename
72-
if self.is_valid_filename(file_path):
73-
file_paths.append(file_path)
81+
file_paths = self._get_recursive_file_paths(clean_file_path)
7482
else:
7583
if self.is_valid_filename(file_path):
7684
file_paths.append(file_path)
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,2 @@
1+
col1,col2,col3
2+
a,b,c
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,2 @@
1+
col1,col2,col3
2+
d,e,f

tap_csv/tests/test_client.py

+30
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,30 @@
1+
"""Tests client methods."""
2+
3+
import os
4+
5+
from tap_csv.tap import CSVStream, TapCSV
6+
7+
8+
def test_get_file_paths_recursively():
9+
"""Test get file paths recursively."""
10+
test_data_dir = os.path.dirname(os.path.abspath(__file__))
11+
12+
SAMPLE_CONFIG = {
13+
"files": [
14+
{
15+
"entity": "test",
16+
"path": f"{test_data_dir}/data/subfolder1/",
17+
"keys": [],
18+
}
19+
]
20+
}
21+
22+
stream = CSVStream(
23+
tap=TapCSV(config=SAMPLE_CONFIG, catalog={}, state={}),
24+
name="test_recursive",
25+
file_config=SAMPLE_CONFIG.get("files")[0],
26+
)
27+
assert stream.get_file_paths() == [
28+
f"{test_data_dir}/data/subfolder1/alphabet.csv",
29+
f"{test_data_dir}/data/subfolder1/subfolder2/alphabet.csv",
30+
]

0 commit comments

Comments
 (0)