Skip to content

Commit f78646d

Browse files
committed
file-based: fix get_nltk_temp_folder method name and make constants for paths
1 parent eb47618 commit f78646d

File tree

1 file changed

+7
-4
lines changed

1 file changed

+7
-4
lines changed

airbyte_cdk/sources/file_based/file_types/unstructured_parser.py

+7-4
Original file line numberDiff line numberDiff line change
@@ -43,23 +43,26 @@
4343
unstructured_partition_docx = None
4444
unstructured_partition_pptx = None
4545

46+
AIRBYTE_NLTK_DATA_DIR = "/airbyte/nltk_data"
47+
TMP_NLTK_DATA_DIR = "/tmp/nltk_data"
4648

47-
def get_ntlk_temp_folder() -> str:
49+
50+
def get_nltk_temp_folder() -> str:
4851
"""
4952
For non-root connectors /tmp is not currently writable, but we should allow it in the future.
5053
It's safe to use /airbyte for now. Fallback to /tmp for local development.
5154
"""
5255
try:
53-
nltk_data_dir = "/airbyte/nltk_data"
56+
nltk_data_dir = AIRBYTE_NLTK_DATA_DIR
5457
os.makedirs(nltk_data_dir, exist_ok=True)
5558
except OSError:
56-
nltk_data_dir = "/tmp/nltk_data"
59+
nltk_data_dir = TMP_NLTK_DATA_DIR
5760
os.makedirs(nltk_data_dir, exist_ok=True)
5861
return nltk_data_dir
5962

6063

6164
try:
62-
nltk_data_dir = get_ntlk_temp_folder()
65+
nltk_data_dir = get_nltk_temp_folder()
6366
nltk.data.path.append(nltk_data_dir)
6467
nltk.data.find("tokenizers/punkt.zip")
6568
nltk.data.find("tokenizers/punkt_tab.zip")

0 commit comments

Comments
 (0)